1#!/usr/bin/ruby
2# encoding: utf-8
3
4require 'antlr3'
5require 'set'
6require 'rake'
7require 'rake/tasklib'
8require 'shellwords'
9
10module ANTLR3
11
12=begin rdoc ANTLR3::CompileTask
13
14A rake task-generating utility concerning ANTLR grammar file
15compilation. This is a general utility -- the grammars do
16not have to be targetted for Ruby output; it handles all
17known ANTLR language targets.
18
19  require 'antlr3/task'
20  
21  ANTLR3::CompileTask.define(
22    :name => 'grammars', :output_directory => 'lib/parsers'
23  ) do | t |
24    t.grammar_set( 'antlr/MainParser.g', 'antlr/MainTree.g' )
25    
26    t.grammar_set( 'antlr/Template.g' ) do | gram |
27      gram.output_directory = 'lib/parsers/template'
28      gram.debug = true
29    end
30  end
31  
32
33TODO: finish documentation
34
35=end
36
37class CompileTask < Rake::TaskLib
38  attr_reader :grammar_sets, :options
39  attr_accessor :name
40  
41  def self.define( *grammar_files )
42    lib = new( *grammar_files )
43    block_given? and yield( lib )
44    lib.define
45    return( lib )
46  end
47  
48  def initialize( *grammar_files )
49    grammar_files = [ grammar_files ].flatten!
50    options = Hash === grammar_files.last ? grammar_files.pop : {}
51    @grammar_sets = []
52    @name = options.fetch( :name, 'antlr-grammars' )
53    @options = options
54    @namespace = Rake.application.current_scope
55    grammar_files.empty? or grammar_set( grammar_files )
56  end
57  
58  def target_files
59    @grammar_sets.inject( [] ) do | list, set |
60      list.concat( set.target_files )
61    end
62  end
63  
64  def grammar_set( *grammar_files )
65    grammar_files = [ grammar_files ].flatten!
66    options = @options.merge( 
67      Hash === grammar_files.last ? grammar_files.pop : {}
68    )
69    set = GrammarSet.new( grammar_files, options )
70    block_given? and yield( set )
71    @grammar_sets << set
72    return( set )
73  end
74  
75  def compile_task
76    full_name = ( @namespace + [ @name, 'compile' ] ).join( ':' )
77    Rake::Task[ full_name ]
78  end
79  
80  def compile!
81    compile_task.invoke
82  end
83  
84  def clobber_task
85    full_name = ( @namespace + [ @name, 'clobber' ] ).join( ':' )
86    Rake::Task[ full_name ]
87  end
88  
89  def clobber!
90    clobber_task.invoke
91  end
92  
93  def define
94    namespace( @name ) do
95      desc( "trash all ANTLR-generated source code" )
96      task( 'clobber' ) do
97        for set in @grammar_sets
98          set.clean
99        end
100      end
101      
102      for set in @grammar_sets
103        set.define_tasks
104      end
105      
106      desc( "compile ANTLR grammars" )
107      task( 'compile' => target_files )
108    end
109  end
110  
111
112#class CompileTask::GrammarSet
113class GrammarSet
114  attr_accessor :antlr_jar, :debug,
115                :trace, :profile, :compile_options,
116                :java_options
117  attr_reader :load_path, :grammars
118  attr_writer :output_directory
119  
120  def initialize( grammar_files, options = {} )
121    @load_path = grammar_files.map { | f | File.dirname( f ) }
122    @load_path.push( '.', @output_directory )
123    
124    if extra_load = options[ :load_path ]
125      extra_load = [ extra_load ].flatten
126      @load_path.unshift( extra_load )
127    end
128    @load_path.uniq!
129    
130    @grammars = grammar_files.map do | file |
131      GrammarFile.new( self, file )
132    end
133    @output_directory = '.'
134    dir = options[ :output_directory ] and @output_directory = dir.to_s
135    
136    @antlr_jar = options.fetch( :antlr_jar, ANTLR3.antlr_jar )
137    @debug = options.fetch( :debug, false )
138    @trace = options.fetch( :trace, false )
139    @profile = options.fetch( :profile, false )
140    @compile_options =
141      case opts = options[ :compile_options ]
142      when Array then opts
143      else Shellwords.shellwords( opts.to_s )
144      end
145    @java_options =
146      case opts = options[ :java_options ]
147      when Array then opts
148      else Shellwords.shellwords( opts.to_s )
149      end
150  end
151  
152  def target_files
153    @grammars.map { | gram | gram.target_files }.flatten
154  end
155  
156  def output_directory
157    @output_directory || '.'
158  end
159  
160  def define_tasks
161    file( @antlr_jar )
162    
163    for grammar in @grammars
164      deps = [ @antlr_jar ]
165      if  vocab = grammar.token_vocab and
166          tfile = find_tokens_file( vocab, grammar )
167        file( tfile )
168        deps << tfile
169      end
170      grammar.define_tasks( deps )
171    end
172  end
173  
174  def clean
175    for grammar in @grammars
176      grammar.clean
177    end
178    if test( ?d, output_directory ) and ( Dir.entries( output_directory ) - %w( . .. ) ).empty?
179      rmdir( output_directory )
180    end
181  end
182  
183  def find_tokens_file( vocab, grammar )
184    gram = @grammars.find { | gram | gram.name == vocab } and
185      return( gram.tokens_file )
186    file = locate( "#{ vocab }.tokens" ) and return( file )
187    warn( Util.tidy( <<-END, true ) )
188    | unable to locate .tokens file `#{ vocab }' referenced in #{ grammar.path }
189    | -- ignoring dependency
190    END
191    return( nil )
192  end
193  
194  def locate( file_name )
195    dir = @load_path.find do | dir |
196      File.file?( File.join( dir, file_name ) )
197    end
198    dir and return( File.join( dir, file_name ) )
199  end
200  
201  def compile( grammar )
202    dir = output_directory
203    test( ?d, dir ) or FileUtils.mkpath( dir )
204    sh( build_command( grammar ) )
205  end
206  
207  def build_command( grammar )
208    parts = [ 'java', '-cp', @antlr_jar ]
209    parts.concat( @java_options )
210    parts << 'org.antlr.Tool' << '-fo' << output_directory
211    parts << '-debug' if @debug
212    parts << '-profile' if @profile
213    parts << '-trace' if @trace
214    parts.concat( @compile_options )
215    parts << grammar.path
216    return parts.map! { | t | escape( t ) }.join( ' ' )
217  end
218  
219  def escape( token )
220    token = token.to_s.dup
221    token.empty? and return( %('') )
222    token.gsub!( /([^A-Za-z0-9_\-.,:\/@\n])/n, "\\\\\\1" )
223    token.gsub!( /\n/, "'\n'" )
224    return( token )
225  end
226  
227end
228
229class GrammarFile
230  LANGUAGES = { 
231    "ActionScript" => [ ".as" ],
232    "CSharp2" => [ ".cs" ],
233    "C" => [ ".c", ".h" ],
234    "ObjC" => [ ".m", ".h" ],
235    "CSharp3" => [ ".cs" ],
236    "Cpp" => [ ".cpp", ".h" ],
237    "Ruby" => [ ".rb" ],
238    "Java" => [ ".java" ],
239    "JavaScript" => [ ".js" ],
240    "Python" => [ ".py" ],
241    "Delphi" => [ ".pas" ],
242    "Perl5" => [ ".pm" ]
243  }.freeze
244  GRAMMAR_TYPES = %w(lexer parser tree combined)
245  
246  ##################################################################
247  ######## CONSTRUCTOR #############################################
248  ##################################################################
249  
250  def initialize( group, path, options = {} )
251    @group = group
252    @path = path.to_s
253    @imports = []
254    @language = 'Java'
255    @token_vocab = nil
256    @tasks_defined = false
257    @extra_dependencies = []
258    if extra = options[ :extra_dependencies ]
259      extra = [ extra ].flatten
260      @extra_dependencies.concat( extra )
261    end
262    
263    study
264    yield( self ) if block_given?
265    fetch_imports
266  end
267  
268  ##################################################################
269  ######## ATTRIBUTES AND ATTRIBUTE-ISH METHODS ####################
270  ##################################################################
271  attr_reader :type, :name, :language, :source,
272              :token_vocab, :imports, :imported_grammars,
273              :path, :group
274  
275  for attr in [ :output_directory, :load_path, :antlr_jar ]
276    class_eval( <<-END )
277      def #{ attr }
278        @group.#{ attr }
279      end
280    END
281  end
282  
283  def lexer_files
284    if lexer? then base = @name
285    elsif combined? then base = @name + 'Lexer'
286    else return( [] )
287    end
288    return( file_names( base ) )
289  end
290  
291  def parser_files
292    if parser? then base = @name
293    elsif combined? then base = @name + 'Parser'
294    else return( [] )
295    end
296    return( file_names( base ) )
297  end
298  
299  def tree_parser_files
300    return( tree? ? file_names( @name ) : [] )
301  end
302  
303  def file_names( base )
304    LANGUAGES.fetch( @language ).map do | ext |
305      File.join( output_directory, base + ext )
306    end
307  end
308  
309  for type in GRAMMAR_TYPES
310    class_eval( <<-END )
311      def #{ type }?
312        @type == #{ type.inspect }
313      end
314    END
315  end
316  
317  def delegate_files( delegate_suffix )
318    file_names( "#{ name }_#{ delegate_suffix }" )
319  end
320  
321  def tokens_file
322    File.join( output_directory, name + '.tokens' )
323  end
324  
325  def target_files( all = true )
326    targets = [ tokens_file ]
327    
328    for target_type in %w( lexer parser tree_parser )
329      for file in self.send( :"#{ target_type }_files" )
330        targets << file
331      end
332    end
333    
334    if all
335      for grammar in @imported_grammars
336        targets.concat( grammar.target_files )
337      end
338    end
339    
340    return targets
341  end
342  
343  def update
344    touch( @path )
345  end
346  
347  def all_imported_files
348    imported_files = []
349    for grammar in @imported_grammars
350      imported_files.push( grammar.path, *grammar.all_imported_files )
351    end
352    return imported_files
353  end
354  
355  def clean
356    deleted = []
357    for target in target_files
358      if test( ?f, target )
359        rm( target )
360        deleted << target
361      end
362    end
363    
364    for grammar in @imported_grammars
365      deleted.concat( grammar.clean )
366    end
367    
368    return deleted
369  end
370  
371  def define_tasks( shared_depends )
372    unless @tasks_defined
373      depends = [ @path, *all_imported_files ]
374      for f in depends
375        file( f )
376      end
377      depends = shared_depends + depends
378      
379      target_files.each do | target |
380        file( target => ( depends - [ target ] ) ) do   # prevents recursive .tokens file dependencies
381          @group.compile( self )
382        end
383      end
384      
385      @tasks_defined = true
386    end
387  end
388  
389private
390  
391  def fetch_imports
392    @imported_grammars = @imports.map do | imp |
393      file = group.locate( "#{ imp }.g" ) or raise( Util.tidy( <<-END ) )
394      | #{ @path }: unable to locate imported grammar file #{ imp }.g
395      | search directories ( @load_path ):
396      |   - #{ load_path.join( "\n  - " ) }
397      END
398      Imported.new( self, file )
399    end
400  end
401  
402  def study
403    @source = File.read( @path )
404    @source =~ /^\s*(lexer|parser|tree)?\s*grammar\s*(\S+)\s*;/ or
405      raise Grammar::FormatError[ @source, @path ]
406    @name = $2
407    @type = $1 || 'combined'
408    if @source =~ /^\s*options\s*\{(.*?)\}/m
409      option_block = $1
410      if option_block =~ /\s*language\s*=\s*(\S+)\s*;/
411        @language = $1
412        LANGUAGES.has_key?( @language ) or
413          raise( Grammar::FormatError, "Unknown ANTLR target language: %p" % @language )
414      end
415      option_block =~ /\s*tokenVocab\s*=\s*(\S+)\s*;/ and
416        @token_vocab = $1
417    end
418    
419    @source.scan( /^\s*import\s+(\w+\s*(?:,\s*\w+\s*)*);/ ) do
420      list = $1.strip
421      @imports.concat( list.split( /\s*,\s*/ ) )
422    end
423  end
424end # class Grammar
425
426class GrammarFile::Imported < GrammarFile
427  def initialize( owner, path )
428    @owner = owner
429    @path = path.to_s
430    @imports = []
431    @language = 'Java'
432    @token_vocab = nil
433    study
434    fetch_imports
435  end
436  
437  for attr in [ :load_path, :output_directory, :antlr_jar, :verbose, :group ]
438    class_eval( <<-END )
439      def #{ attr }
440        @owner.#{ attr }
441      end
442    END
443  end
444  
445  def delegate_files( suffix )
446    @owner.delegate_files( "#{ @name }_#{ suffix }" )
447  end
448  
449  def target_files
450    targets = [ tokens_file ]
451    targets.concat( @owner.delegate_files( @name ) )
452    return( targets )
453  end
454end
455
456class GrammarFile::FormatError < StandardError
457  attr_reader :file, :source
458  
459  def self.[]( *args )
460    new( *args )
461  end
462  
463  def initialize( source, file = nil )
464    @file = file
465    @source = source
466    message = ''
467    if file.nil? # inline
468      message << "bad inline grammar source:\n"
469      message << ( "-" * 80 ) << "\n"
470      message << @source
471      message[ -1 ] == ?\n or message << "\n"
472      message << ( "-" * 80 ) << "\n"
473      message << "could not locate a grammar name and type declaration matching\n"
474      message << "/^\s*(lexer|parser|tree)?\s*grammar\s*(\S+)\s*;/"
475    else
476      message << 'bad grammar source in file %p\n' % @file
477      message << ( "-" * 80 ) << "\n"
478      message << @source
479      message[ -1 ] == ?\n or message << "\n"
480      message << ( "-" * 80 ) << "\n"
481      message << "could not locate a grammar name and type declaration matching\n"
482      message << "/^\s*(lexer|parser|tree)?\s*grammar\s*(\S+)\s*;/"
483    end
484    super( message )
485  end
486end # error Grammar::FormatError
487end # class CompileTask
488end # module ANTLR3
489