168043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)#!/usr/bin/ruby 268043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)# encoding: utf-8 368043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) 468043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)require 'antlr3' 568043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)require 'antlr3/test/core-extensions' 668043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)require 'antlr3/test/call-stack' 703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 8f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)if RUBY_VERSION =~ /^1\.9/ 9f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) require 'digest/md5' 1068043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) MD5 = Digest::MD5 1168043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)else 1268043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) require 'md5' 1368043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)end 14f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 15f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)module ANTLR3 16f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)module Test 1768043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)module DependantFile 18f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) attr_accessor :path, :force 19f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) alias force? force 20f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 21f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) GLOBAL_DEPENDENCIES = [] 2268043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) 23f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) def dependencies 2468043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) @dependencies ||= GLOBAL_DEPENDENCIES.clone 255d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) end 265d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 2768043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) def depends_on( path ) 28f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) path = File.expand_path path.to_s 2968043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) dependencies << path if test( ?f, path ) 3068043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) return path 3168043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) end 325d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 335d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) def stale? 345d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) force and return( true ) 355d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) target_files.any? do |target| 365d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) not test( ?f, target ) or 375d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) dependencies.any? { |dep| test( ?>, dep, target ) } 3868043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) end 39f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) end 4068043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)end # module DependantFile 4168043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) 42f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)class Grammar 4368043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) include DependantFile 4468043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) 455d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) GRAMMAR_TYPES = %w(lexer parser tree combined) 465d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) TYPE_TO_CLASS = { 475d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 'lexer' => 'Lexer', 485d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 'parser' => 'Parser', 495d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 'tree' => 'TreeParser' 505d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) } 515d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) CLASS_TO_TYPE = TYPE_TO_CLASS.invert 525d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 5368043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) def self.global_dependency( path ) 54f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) path = File.expand_path path.to_s 55f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) GLOBAL_DEPENDENCIES << path if test( ?f, path ) 56f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) return path 57f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) end 58f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 59f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) def self.inline( source, *args ) 6068043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) InlineGrammar.new( source, *args ) 61f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) end 62f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 6368043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) ################################################################## 6468043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) ######## CONSTRUCTOR ############################################# 6568043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) ################################################################## 6668043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) def initialize( path, options = {} ) 6768043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) @path = path.to_s 6868043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) @source = File.read( @path ) 695d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) @output_directory = options.fetch( :output_directory, '.' ) 70f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) @verbose = options.fetch( :verbose, $VERBOSE ) 71f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) study 72f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) build_dependencies 73f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 74f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) yield( self ) if block_given? 75f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) end 76f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 77f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) ################################################################## 78f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) ######## ATTRIBUTES AND ATTRIBUTE-ISH METHODS #################### 79f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) ################################################################## 80f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) attr_reader :type, :name, :source 81f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) attr_accessor :output_directory, :verbose 82f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 83f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) def lexer_class_name 84f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) self.name + "::Lexer" 85f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) end 86f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 87f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) def lexer_file_name 88f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) if lexer? then base = name 89f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) elsif combined? then base = name + 'Lexer' 90f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) else return( nil ) 91f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) end 92f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) return( base + '.rb' ) 93f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) end 94f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 95f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) def parser_class_name 96f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) name + "::Parser" 97f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) end 98f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 99f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) def parser_file_name 100f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) if parser? then base = name 101f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) elsif combined? then base = name + 'Parser' 102f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) else return( nil ) 103f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) end 104f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) return( base + '.rb' ) 105f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) end 106f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 107f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) def tree_parser_class_name 108f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) name + "::TreeParser" 109f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) end 110f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 11168043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) def tree_parser_file_name 11268043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) tree? and name + '.rb' 11368043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) end 114 115 def has_lexer? 116 @type == 'combined' || @type == 'lexer' 117 end 118 119 def has_parser? 120 @type == 'combined' || @type == 'parser' 121 end 122 123 def lexer? 124 @type == "lexer" 125 end 126 127 def parser? 128 @type == "parser" 129 end 130 131 def tree? 132 @type == "tree" 133 end 134 135 alias has_tree? tree? 136 137 def combined? 138 @type == "combined" 139 end 140 141 def target_files( include_imports = true ) 142 targets = [] 143 144 for target_type in %w(lexer parser tree_parser) 145 target_name = self.send( :"#{ target_type }_file_name" ) and 146 targets.push( output_directory / target_name ) 147 end 148 149 targets.concat( imported_target_files ) if include_imports 150 return targets 151 end 152 153 def imports 154 @source.scan( /^\s*import\s+(\w+)\s*;/ ). 155 tap { |list| list.flatten! } 156 end 157 158 def imported_target_files 159 imports.map! do |delegate| 160 output_directory / "#{ @name }_#{ delegate }.rb" 161 end 162 end 163 164 ################################################################## 165 ##### COMMAND METHODS ############################################ 166 ################################################################## 167 def compile( options = {} ) 168 if options[ :force ] or stale? 169 compile!( options ) 170 end 171 end 172 173 def compile!( options = {} ) 174 command = build_command( options ) 175 176 blab( command ) 177 output = IO.popen( command ) do |pipe| 178 pipe.read 179 end 180 181 case status = $?.exitstatus 182 when 0, 130 183 post_compile( options ) 184 else compilation_failure!( command, status, output ) 185 end 186 187 return target_files 188 end 189 190 def clean! 191 deleted = [] 192 for target in target_files 193 if test( ?f, target ) 194 File.delete( target ) 195 deleted << target 196 end 197 end 198 return deleted 199 end 200 201 def inspect 202 sprintf( "grammar %s (%s)", @name, @path ) 203 end 204 205private 206 207 def post_compile( options ) 208 # do nothing for now 209 end 210 211 def blab( string, *args ) 212 $stderr.printf( string + "\n", *args ) if @verbose 213 end 214 215 def default_antlr_jar 216 ENV[ 'ANTLR_JAR' ] || ANTLR3.antlr_jar 217 end 218 219 def compilation_failure!( command, status, output ) 220 for f in target_files 221 test( ?f, f ) and File.delete( f ) 222 end 223 raise CompilationFailure.new( self, command, status, output ) 224 end 225 226 def build_dependencies 227 depends_on( @path ) 228 229 if @source =~ /tokenVocab\s*=\s*(\S+)\s*;/ 230 foreign_grammar_name = $1 231 token_file = output_directory / foreign_grammar_name + '.tokens' 232 grammar_file = File.dirname( path ) / foreign_grammar_name << '.g' 233 depends_on( token_file ) 234 depends_on( grammar_file ) 235 end 236 end 237 238 def shell_escape( token ) 239 token = token.to_s.dup 240 token.empty? and return "''" 241 token.gsub!( /([^A-Za-z0-9_\-.,:\/@\n])/n, '\\\1' ) 242 token.gsub!( /\n/, "'\n'" ) 243 return token 244 end 245 246 def build_command( options ) 247 parts = %w(java) 248 jar_path = options.fetch( :antlr_jar, default_antlr_jar ) 249 parts.push( '-cp', jar_path ) 250 parts << 'org.antlr.Tool' 251 parts.push( '-fo', output_directory ) 252 options[ :profile ] and parts << '-profile' 253 options[ :debug ] and parts << '-debug' 254 options[ :trace ] and parts << '-trace' 255 options[ :debug_st ] and parts << '-XdbgST' 256 parts << File.expand_path( @path ) 257 parts.map! { |part| shell_escape( part ) }.join( ' ' ) << ' 2>&1' 258 end 259 260 def study 261 @source =~ /^\s*(lexer|parser|tree)?\s*grammar\s*(\S+)\s*;/ or 262 raise Grammar::FormatError[ source, path ] 263 @name = $2 264 @type = $1 || 'combined' 265 end 266end # class Grammar 267 268class Grammar::InlineGrammar < Grammar 269 attr_accessor :host_file, :host_line 270 271 def initialize( source, options = {} ) 272 host = call_stack.find { |call| call.file != __FILE__ } 273 274 @host_file = File.expand_path( options[ :file ] || host.file ) 275 @host_line = ( options[ :line ] || host.line ) 276 @output_directory = options.fetch( :output_directory, File.dirname( @host_file ) ) 277 @verbose = options.fetch( :verbose, $VERBOSE ) 278 279 @source = source.to_s.fixed_indent( 0 ) 280 @source.strip! 281 282 study 283 write_to_disk 284 build_dependencies 285 286 yield( self ) if block_given? 287 end 288 289 def output_directory 290 @output_directory and return @output_directory 291 File.basename( @host_file ) 292 end 293 294 def path=( v ) 295 previous, @path = @path, v.to_s 296 previous == @path or write_to_disk 297 end 298 299 def inspect 300 sprintf( 'inline grammar %s (%s:%s)', name, @host_file, @host_line ) 301 end 302 303private 304 305 def write_to_disk 306 @path ||= output_directory / @name + '.g' 307 test( ?d, output_directory ) or Dir.mkdir( output_directory ) 308 unless test( ?f, @path ) and MD5.digest( @source ) == MD5.digest( File.read( @path ) ) 309 open( @path, 'w' ) { |f| f.write( @source ) } 310 end 311 end 312end # class Grammar::InlineGrammar 313 314class Grammar::CompilationFailure < StandardError 315 JAVA_TRACE = /^(org\.)?antlr\.\S+\(\S+\.java:\d+\)\s*/ 316 attr_reader :grammar, :command, :status, :output 317 318 def initialize( grammar, command, status, output ) 319 @command = command 320 @status = status 321 @output = output.gsub( JAVA_TRACE, '' ) 322 323 message = <<-END.here_indent! % [ command, status, grammar, @output ] 324 | command ``%s'' failed with status %s 325 | %p 326 | ~ ~ ~ command output ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ 327 | %s 328 END 329 330 super( message.chomp! || message ) 331 end 332end # error Grammar::CompilationFailure 333 334class Grammar::FormatError < StandardError 335 attr_reader :file, :source 336 337 def self.[]( *args ) 338 new( *args ) 339 end 340 341 def initialize( source, file = nil ) 342 @file = file 343 @source = source 344 message = '' 345 if file.nil? # inline 346 message << "bad inline grammar source:\n" 347 message << ( "-" * 80 ) << "\n" 348 message << @source 349 message[ -1 ] == ?\n or message << "\n" 350 message << ( "-" * 80 ) << "\n" 351 message << "could not locate a grammar name and type declaration matching\n" 352 message << "/^\s*(lexer|parser|tree)?\s*grammar\s*(\S+)\s*;/" 353 else 354 message << 'bad grammar source in file %p' % @file 355 message << ( "-" * 80 ) << "\n" 356 message << @source 357 message[ -1 ] == ?\n or message << "\n" 358 message << ( "-" * 80 ) << "\n" 359 message << "could not locate a grammar name and type declaration matching\n" 360 message << "/^\s*(lexer|parser|tree)?\s*grammar\s*(\S+)\s*;/" 361 end 362 super( message ) 363 end 364end # error Grammar::FormatError 365 366end 367end 368