grammar.rb revision 324c4644fee44b9898524c09511bd33c3f12e2df
1#!/usr/bin/ruby
2# encoding: utf-8
3
4require 'antlr3'
5require 'antlr3/test/core-extensions'
6require 'antlr3/test/call-stack'
7
8if RUBY_VERSION =~ /^1\.9/
9  require 'digest/md5'
10  MD5 = Digest::MD5
11else
12  require 'md5'
13end
14
15module ANTLR3
16module Test
17module DependantFile
18  attr_accessor :path, :force
19  alias force? force
20  
21  GLOBAL_DEPENDENCIES = []
22  
23  def dependencies
24    @dependencies ||= GLOBAL_DEPENDENCIES.clone
25  end
26  
27  def depends_on( path )
28    path = File.expand_path path.to_s
29    dependencies << path if test( ?f, path )
30    return path
31  end
32  
33  def stale?
34    force and return( true )
35    target_files.any? do |target|
36      not test( ?f, target ) or
37        dependencies.any? { |dep| test( ?>, dep, target ) }
38    end
39  end
40end # module DependantFile
41
42class Grammar
43  include DependantFile
44
45  GRAMMAR_TYPES = %w(lexer parser tree combined)
46  TYPE_TO_CLASS = { 
47    'lexer'  => 'Lexer',
48    'parser' => 'Parser',
49    'tree'   => 'TreeParser'
50  }
51  CLASS_TO_TYPE = TYPE_TO_CLASS.invert
52
53  def self.global_dependency( path )
54    path = File.expand_path path.to_s
55    GLOBAL_DEPENDENCIES << path if test( ?f, path )
56    return path
57  end
58  
59  def self.inline( source, *args )
60    InlineGrammar.new( source, *args )
61  end
62  
63  ##################################################################
64  ######## CONSTRUCTOR #############################################
65  ##################################################################
66  def initialize( path, options = {} )
67    @path = path.to_s
68    @source = File.read( @path )
69    @output_directory = options.fetch( :output_directory, '.' )
70    @verbose = options.fetch( :verbose, $VERBOSE )
71    study
72    build_dependencies
73    
74    yield( self ) if block_given?
75  end
76  
77  ##################################################################
78  ######## ATTRIBUTES AND ATTRIBUTE-ISH METHODS ####################
79  ##################################################################
80  attr_reader :type, :name, :source
81  attr_accessor :output_directory, :verbose
82  
83  def lexer_class_name
84    self.name + "::Lexer"
85  end
86  
87  def lexer_file_name
88    if lexer? then base = name
89    elsif combined? then base = name + 'Lexer'
90    else return( nil )
91    end
92    return( base + '.rb' )
93  end
94  
95  def parser_class_name
96    name + "::Parser"
97  end
98  
99  def parser_file_name
100    if parser? then base = name
101    elsif combined? then base = name + 'Parser'
102    else return( nil )
103    end
104    return( base + '.rb' )
105  end
106  
107  def tree_parser_class_name
108    name + "::TreeParser"
109  end
110
111  def tree_parser_file_name
112    tree? and name + '.rb'
113  end
114  
115  def has_lexer?
116    @type == 'combined' || @type == 'lexer'
117  end
118  
119  def has_parser?
120    @type == 'combined' || @type == 'parser'
121  end
122  
123  def lexer?
124    @type == "lexer"
125  end
126  
127  def parser?
128    @type == "parser"
129  end
130  
131  def tree?
132    @type == "tree"
133  end
134  
135  alias has_tree? tree?
136  
137  def combined?
138    @type == "combined"
139  end
140  
141  def target_files( include_imports = true )
142    targets = []
143    
144    for target_type in %w(lexer parser tree_parser)
145      target_name = self.send( :"#{ target_type }_file_name" ) and
146        targets.push( output_directory / target_name )
147    end
148    
149    targets.concat( imported_target_files ) if include_imports
150    return targets
151  end
152  
153  def imports
154    @source.scan( /^\s*import\s+(\w+)\s*;/ ).
155      tap { |list| list.flatten! }
156  end
157  
158  def imported_target_files
159    imports.map! do |delegate|
160      output_directory / "#{ @name }_#{ delegate }.rb"
161    end
162  end
163
164  ##################################################################
165  ##### COMMAND METHODS ############################################
166  ##################################################################
167  def compile( options = {} )
168    if options[ :force ] or stale?
169      compile!( options )
170    end
171  end
172  
173  def compile!( options = {} )
174    command = build_command( options )
175    
176    blab( command )
177    output = IO.popen( command ) do |pipe|
178      pipe.read
179    end
180    
181    case status = $?.exitstatus
182    when 0, 130
183      post_compile( options )
184    else compilation_failure!( command, status, output )
185    end
186    
187    return target_files
188  end
189  
190  def clean!
191    deleted = []
192    for target in target_files
193      if test( ?f, target )
194        File.delete( target )
195        deleted << target
196      end
197    end
198    return deleted
199  end
200  
201  def inspect
202    sprintf( "grammar %s (%s)", @name, @path )
203  end
204  
205private
206  
207  def post_compile( options )
208    # do nothing for now
209  end
210  
211  def blab( string, *args )
212    $stderr.printf( string + "\n", *args ) if @verbose
213  end
214  
215  def default_antlr_jar
216    ENV[ 'ANTLR_JAR' ] || ANTLR3.antlr_jar
217  end
218  
219  def compilation_failure!( command, status, output )
220    for f in target_files
221      test( ?f, f ) and File.delete( f )
222    end
223    raise CompilationFailure.new( self, command, status, output )
224  end
225
226  def build_dependencies
227    depends_on( @path )
228    
229    if @source =~ /tokenVocab\s*=\s*(\S+)\s*;/
230      foreign_grammar_name = $1
231      token_file = output_directory / foreign_grammar_name + '.tokens'
232      grammar_file = File.dirname( path ) / foreign_grammar_name << '.g'
233      depends_on( token_file )
234      depends_on( grammar_file )
235    end    
236  end
237  
238  def shell_escape( token )
239    token = token.to_s.dup
240    token.empty? and return "''"
241    token.gsub!( /([^A-Za-z0-9_\-.,:\/@\n])/n, '\\\1' )
242    token.gsub!( /\n/, "'\n'" )
243    return token
244  end
245  
246  def build_command( options )
247    parts = %w(java)
248    jar_path = options.fetch( :antlr_jar, default_antlr_jar )
249    parts.push( '-cp', jar_path )
250    parts << 'org.antlr.Tool'
251    parts.push( '-fo', output_directory )
252    options[ :profile ] and parts << '-profile'
253    options[ :debug ]   and parts << '-debug'
254    options[ :trace ]   and parts << '-trace'
255    options[ :debug_st ] and parts << '-XdbgST'
256    parts << File.expand_path( @path )
257    parts.map! { |part| shell_escape( part ) }.join( ' ' ) << ' 2>&1'
258  end
259  
260  def study
261    @source =~ /^\s*(lexer|parser|tree)?\s*grammar\s*(\S+)\s*;/ or
262      raise Grammar::FormatError[ source, path ]
263    @name = $2
264    @type = $1 || 'combined'
265  end
266end # class Grammar
267
268class Grammar::InlineGrammar < Grammar
269  attr_accessor :host_file, :host_line
270  
271  def initialize( source, options = {} )
272    host = call_stack.find { |call| call.file != __FILE__ }
273    
274    @host_file = File.expand_path( options[ :file ] || host.file )
275    @host_line = ( options[ :line ] || host.line )
276    @output_directory = options.fetch( :output_directory, File.dirname( @host_file ) )
277    @verbose = options.fetch( :verbose, $VERBOSE )
278    
279    @source = source.to_s.fixed_indent( 0 )
280    @source.strip!
281    
282    study
283    write_to_disk
284    build_dependencies
285    
286    yield( self ) if block_given?
287  end
288  
289  def output_directory
290    @output_directory and return @output_directory
291    File.basename( @host_file )
292  end
293  
294  def path=( v )
295    previous, @path = @path, v.to_s
296    previous == @path or write_to_disk
297  end
298  
299  def inspect
300    sprintf( 'inline grammar %s (%s:%s)', name, @host_file, @host_line )
301  end
302  
303private
304  
305  def write_to_disk
306    @path ||= output_directory / @name + '.g'
307    test( ?d, output_directory ) or Dir.mkdir( output_directory )
308    unless test( ?f, @path ) and MD5.digest( @source ) == MD5.digest( File.read( @path ) )
309      open( @path, 'w' ) { |f| f.write( @source ) }
310    end
311  end
312end # class Grammar::InlineGrammar
313
314class Grammar::CompilationFailure < StandardError
315  JAVA_TRACE = /^(org\.)?antlr\.\S+\(\S+\.java:\d+\)\s*/
316  attr_reader :grammar, :command, :status, :output
317  
318  def initialize( grammar, command, status, output )
319    @command = command
320    @status = status
321    @output = output.gsub( JAVA_TRACE, '' )
322    
323    message = <<-END.here_indent! % [ command, status, grammar, @output ]
324    | command ``%s'' failed with status %s
325    | %p
326    | ~ ~ ~ command output ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
327    | %s
328    END
329    
330    super( message.chomp! || message )
331  end
332end # error Grammar::CompilationFailure
333
334class Grammar::FormatError < StandardError
335  attr_reader :file, :source
336  
337  def self.[]( *args )
338    new( *args )
339  end
340  
341  def initialize( source, file = nil )
342    @file = file
343    @source = source
344    message = ''
345    if file.nil? # inline
346      message << "bad inline grammar source:\n"
347      message << ( "-" * 80 ) << "\n"
348      message << @source
349      message[ -1 ] == ?\n or message << "\n"
350      message << ( "-" * 80 ) << "\n"
351      message << "could not locate a grammar name and type declaration matching\n"
352      message << "/^\s*(lexer|parser|tree)?\s*grammar\s*(\S+)\s*;/"
353    else
354      message << 'bad grammar source in file %p' % @file
355      message << ( "-" * 80 ) << "\n"
356      message << @source
357      message[ -1 ] == ?\n or message << "\n"
358      message << ( "-" * 80 ) << "\n"
359      message << "could not locate a grammar name and type declaration matching\n"
360      message << "/^\s*(lexer|parser|tree)?\s*grammar\s*(\S+)\s*;/"
361    end
362    super( message )
363  end
364end # error Grammar::FormatError
365
366end
367end
368