168043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)#!/usr/bin/ruby
268043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)# encoding: utf-8
368043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)
468043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)require 'antlr3'
568043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)require 'antlr3/test/core-extensions'
668043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)require 'antlr3/test/call-stack'
703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
8f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)if RUBY_VERSION =~ /^1\.9/
9f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  require 'digest/md5'
1068043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)  MD5 = Digest::MD5
1168043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)else
1268043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)  require 'md5'
1368043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)end
14f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
15f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)module ANTLR3
16f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)module Test
1768043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)module DependantFile
18f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  attr_accessor :path, :force
19f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  alias force? force
20f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  
21f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  GLOBAL_DEPENDENCIES = []
2268043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)  
23f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  def dependencies
2468043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)    @dependencies ||= GLOBAL_DEPENDENCIES.clone
255d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  end
265d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  
2768043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)  def depends_on( path )
28f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    path = File.expand_path path.to_s
2968043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)    dependencies << path if test( ?f, path )
3068043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)    return path
3168043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)  end
325d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  
335d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  def stale?
345d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    force and return( true )
355d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    target_files.any? do |target|
365d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      not test( ?f, target ) or
375d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)        dependencies.any? { |dep| test( ?>, dep, target ) }
3868043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)    end
39f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  end
4068043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)end # module DependantFile
4168043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)
42f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)class Grammar
4368043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)  include DependantFile
4468043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)
455d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  GRAMMAR_TYPES = %w(lexer parser tree combined)
465d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  TYPE_TO_CLASS = { 
475d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    'lexer'  => 'Lexer',
485d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    'parser' => 'Parser',
495d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    'tree'   => 'TreeParser'
505d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  }
515d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  CLASS_TO_TYPE = TYPE_TO_CLASS.invert
525d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
5368043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)  def self.global_dependency( path )
54f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    path = File.expand_path path.to_s
55f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    GLOBAL_DEPENDENCIES << path if test( ?f, path )
56f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    return path
57f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  end
58f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  
59f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  def self.inline( source, *args )
6068043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)    InlineGrammar.new( source, *args )
61f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  end
62f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  
6368043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)  ##################################################################
6468043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)  ######## CONSTRUCTOR #############################################
6568043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)  ##################################################################
6668043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)  def initialize( path, options = {} )
6768043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)    @path = path.to_s
6868043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)    @source = File.read( @path )
695d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    @output_directory = options.fetch( :output_directory, '.' )
70f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    @verbose = options.fetch( :verbose, $VERBOSE )
71f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    study
72f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    build_dependencies
73f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    
74f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    yield( self ) if block_given?
75f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  end
76f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  
77f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  ##################################################################
78f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  ######## ATTRIBUTES AND ATTRIBUTE-ISH METHODS ####################
79f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  ##################################################################
80f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  attr_reader :type, :name, :source
81f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  attr_accessor :output_directory, :verbose
82f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  
83f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  def lexer_class_name
84f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    self.name + "::Lexer"
85f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  end
86f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  
87f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  def lexer_file_name
88f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    if lexer? then base = name
89f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    elsif combined? then base = name + 'Lexer'
90f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    else return( nil )
91f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    end
92f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    return( base + '.rb' )
93f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  end
94f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  
95f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  def parser_class_name
96f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    name + "::Parser"
97f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  end
98f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  
99f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  def parser_file_name
100f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    if parser? then base = name
101f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    elsif combined? then base = name + 'Parser'
102f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    else return( nil )
103f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    end
104f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    return( base + '.rb' )
105f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  end
106f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  
107f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  def tree_parser_class_name
108f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    name + "::TreeParser"
109f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  end
110f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
11168043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)  def tree_parser_file_name
11268043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)    tree? and name + '.rb'
11368043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)  end
114  
115  def has_lexer?
116    @type == 'combined' || @type == 'lexer'
117  end
118  
119  def has_parser?
120    @type == 'combined' || @type == 'parser'
121  end
122  
123  def lexer?
124    @type == "lexer"
125  end
126  
127  def parser?
128    @type == "parser"
129  end
130  
131  def tree?
132    @type == "tree"
133  end
134  
135  alias has_tree? tree?
136  
137  def combined?
138    @type == "combined"
139  end
140  
141  def target_files( include_imports = true )
142    targets = []
143    
144    for target_type in %w(lexer parser tree_parser)
145      target_name = self.send( :"#{ target_type }_file_name" ) and
146        targets.push( output_directory / target_name )
147    end
148    
149    targets.concat( imported_target_files ) if include_imports
150    return targets
151  end
152  
153  def imports
154    @source.scan( /^\s*import\s+(\w+)\s*;/ ).
155      tap { |list| list.flatten! }
156  end
157  
158  def imported_target_files
159    imports.map! do |delegate|
160      output_directory / "#{ @name }_#{ delegate }.rb"
161    end
162  end
163
164  ##################################################################
165  ##### COMMAND METHODS ############################################
166  ##################################################################
167  def compile( options = {} )
168    if options[ :force ] or stale?
169      compile!( options )
170    end
171  end
172  
173  def compile!( options = {} )
174    command = build_command( options )
175    
176    blab( command )
177    output = IO.popen( command ) do |pipe|
178      pipe.read
179    end
180    
181    case status = $?.exitstatus
182    when 0, 130
183      post_compile( options )
184    else compilation_failure!( command, status, output )
185    end
186    
187    return target_files
188  end
189  
190  def clean!
191    deleted = []
192    for target in target_files
193      if test( ?f, target )
194        File.delete( target )
195        deleted << target
196      end
197    end
198    return deleted
199  end
200  
201  def inspect
202    sprintf( "grammar %s (%s)", @name, @path )
203  end
204  
205private
206  
207  def post_compile( options )
208    # do nothing for now
209  end
210  
211  def blab( string, *args )
212    $stderr.printf( string + "\n", *args ) if @verbose
213  end
214  
215  def default_antlr_jar
216    ENV[ 'ANTLR_JAR' ] || ANTLR3.antlr_jar
217  end
218  
219  def compilation_failure!( command, status, output )
220    for f in target_files
221      test( ?f, f ) and File.delete( f )
222    end
223    raise CompilationFailure.new( self, command, status, output )
224  end
225
226  def build_dependencies
227    depends_on( @path )
228    
229    if @source =~ /tokenVocab\s*=\s*(\S+)\s*;/
230      foreign_grammar_name = $1
231      token_file = output_directory / foreign_grammar_name + '.tokens'
232      grammar_file = File.dirname( path ) / foreign_grammar_name << '.g'
233      depends_on( token_file )
234      depends_on( grammar_file )
235    end    
236  end
237  
238  def shell_escape( token )
239    token = token.to_s.dup
240    token.empty? and return "''"
241    token.gsub!( /([^A-Za-z0-9_\-.,:\/@\n])/n, '\\\1' )
242    token.gsub!( /\n/, "'\n'" )
243    return token
244  end
245  
246  def build_command( options )
247    parts = %w(java)
248    jar_path = options.fetch( :antlr_jar, default_antlr_jar )
249    parts.push( '-cp', jar_path )
250    parts << 'org.antlr.Tool'
251    parts.push( '-fo', output_directory )
252    options[ :profile ] and parts << '-profile'
253    options[ :debug ]   and parts << '-debug'
254    options[ :trace ]   and parts << '-trace'
255    options[ :debug_st ] and parts << '-XdbgST'
256    parts << File.expand_path( @path )
257    parts.map! { |part| shell_escape( part ) }.join( ' ' ) << ' 2>&1'
258  end
259  
260  def study
261    @source =~ /^\s*(lexer|parser|tree)?\s*grammar\s*(\S+)\s*;/ or
262      raise Grammar::FormatError[ source, path ]
263    @name = $2
264    @type = $1 || 'combined'
265  end
266end # class Grammar
267
268class Grammar::InlineGrammar < Grammar
269  attr_accessor :host_file, :host_line
270  
271  def initialize( source, options = {} )
272    host = call_stack.find { |call| call.file != __FILE__ }
273    
274    @host_file = File.expand_path( options[ :file ] || host.file )
275    @host_line = ( options[ :line ] || host.line )
276    @output_directory = options.fetch( :output_directory, File.dirname( @host_file ) )
277    @verbose = options.fetch( :verbose, $VERBOSE )
278    
279    @source = source.to_s.fixed_indent( 0 )
280    @source.strip!
281    
282    study
283    write_to_disk
284    build_dependencies
285    
286    yield( self ) if block_given?
287  end
288  
289  def output_directory
290    @output_directory and return @output_directory
291    File.basename( @host_file )
292  end
293  
294  def path=( v )
295    previous, @path = @path, v.to_s
296    previous == @path or write_to_disk
297  end
298  
299  def inspect
300    sprintf( 'inline grammar %s (%s:%s)', name, @host_file, @host_line )
301  end
302  
303private
304  
305  def write_to_disk
306    @path ||= output_directory / @name + '.g'
307    test( ?d, output_directory ) or Dir.mkdir( output_directory )
308    unless test( ?f, @path ) and MD5.digest( @source ) == MD5.digest( File.read( @path ) )
309      open( @path, 'w' ) { |f| f.write( @source ) }
310    end
311  end
312end # class Grammar::InlineGrammar
313
314class Grammar::CompilationFailure < StandardError
315  JAVA_TRACE = /^(org\.)?antlr\.\S+\(\S+\.java:\d+\)\s*/
316  attr_reader :grammar, :command, :status, :output
317  
318  def initialize( grammar, command, status, output )
319    @command = command
320    @status = status
321    @output = output.gsub( JAVA_TRACE, '' )
322    
323    message = <<-END.here_indent! % [ command, status, grammar, @output ]
324    | command ``%s'' failed with status %s
325    | %p
326    | ~ ~ ~ command output ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
327    | %s
328    END
329    
330    super( message.chomp! || message )
331  end
332end # error Grammar::CompilationFailure
333
334class Grammar::FormatError < StandardError
335  attr_reader :file, :source
336  
337  def self.[]( *args )
338    new( *args )
339  end
340  
341  def initialize( source, file = nil )
342    @file = file
343    @source = source
344    message = ''
345    if file.nil? # inline
346      message << "bad inline grammar source:\n"
347      message << ( "-" * 80 ) << "\n"
348      message << @source
349      message[ -1 ] == ?\n or message << "\n"
350      message << ( "-" * 80 ) << "\n"
351      message << "could not locate a grammar name and type declaration matching\n"
352      message << "/^\s*(lexer|parser|tree)?\s*grammar\s*(\S+)\s*;/"
353    else
354      message << 'bad grammar source in file %p' % @file
355      message << ( "-" * 80 ) << "\n"
356      message << @source
357      message[ -1 ] == ?\n or message << "\n"
358      message << ( "-" * 80 ) << "\n"
359      message << "could not locate a grammar name and type declaration matching\n"
360      message << "/^\s*(lexer|parser|tree)?\s*grammar\s*(\S+)\s*;/"
361    end
362    super( message )
363  end
364end # error Grammar::FormatError
365
366end
367end
368