1/* 2 [The "BSD license"] 3 Copyright (c) 2010 Kyle Yetter 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions 8 are met: 9 1. Redistributions of source code must retain the above copyright 10 notice, this list of conditions and the following disclaimer. 11 2. Redistributions in binary form must reproduce the above copyright 12 notice, this list of conditions and the following disclaimer in the 13 documentation and/or other materials provided with the distribution. 14 3. The name of the author may not be used to endorse or promote products 15 derived from this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29package org.antlr.codegen; 30 31import org.antlr.Tool; 32import org.antlr.tool.Grammar; 33import org.stringtemplate.v4.AttributeRenderer; 34import org.stringtemplate.v4.ST; 35import org.stringtemplate.v4.STGroup; 36 37import java.io.IOException; 38import java.util.*; 39 40public class RubyTarget extends Target 41{ 42 /** A set of ruby keywords which are used to escape labels and method names 43 * which will cause parse errors in the ruby source 44 */ 45 public static final Set rubyKeywords = 46 new HashSet() { 47 { 48 add( "alias" ); add( "END" ); add( "retry" ); 49 add( "and" ); add( "ensure" ); add( "return" ); 50 add( "BEGIN" ); add( "false" ); add( "self" ); 51 add( "begin" ); add( "for" ); add( "super" ); 52 add( "break" ); add( "if" ); add( "then" ); 53 add( "case" ); add( "in" ); add( "true" ); 54 add( "class" ); add( "module" ); add( "undef" ); 55 add( "def" ); add( "next" ); add( "unless" ); 56 add( "defined?" ); add( "nil" ); add( "until" ); 57 add( "do" ); add( "not" ); add( "when" ); 58 add( "else" ); add( "or" ); add( "while" ); 59 add( "elsif" ); add( "redo" ); add( "yield" ); 60 add( "end" ); add( "rescue" ); 61 } 62 }; 63 64 public static Map<String, Map<String, Object>> sharedActionBlocks = new HashMap<String, Map<String, Object>>(); 65 66 public class RubyRenderer implements AttributeRenderer 67 { 68 protected String[] rubyCharValueEscape = new String[256]; 69 70 public RubyRenderer() { 71 for ( int i = 0; i < 16; i++ ) { 72 rubyCharValueEscape[ i ] = "\\x0" + Integer.toHexString( i ); 73 } 74 for ( int i = 16; i < 32; i++ ) { 75 rubyCharValueEscape[ i ] = "\\x" + Integer.toHexString( i ); 76 } 77 for ( char i = 32; i < 127; i++ ) { 78 rubyCharValueEscape[ i ] = Character.toString( i ); 79 } 80 for ( int i = 127; i < 256; i++ ) { 81 rubyCharValueEscape[ i ] = "\\x" + Integer.toHexString( i ); 82 } 83 84 rubyCharValueEscape['\n'] = "\\n"; 85 rubyCharValueEscape['\r'] = "\\r"; 86 rubyCharValueEscape['\t'] = "\\t"; 87 rubyCharValueEscape['\b'] = "\\b"; 88 rubyCharValueEscape['\f'] = "\\f"; 89 rubyCharValueEscape['\\'] = "\\\\"; 90 rubyCharValueEscape['"'] = "\\\""; 91 } 92 93 public String toString( Object o, String formatName, Locale locale ) { 94 if ( formatName==null ) { 95 return o.toString(); 96 } 97 98 String idString = o.toString(); 99 100 if ( idString.isEmpty() ) return idString; 101 102 if ( formatName.equals( "snakecase" ) ) { 103 return snakecase( idString ); 104 } else if ( formatName.equals( "camelcase" ) ) { 105 return camelcase( idString ); 106 } else if ( formatName.equals( "subcamelcase" ) ) { 107 return subcamelcase( idString ); 108 } else if ( formatName.equals( "constant" ) ) { 109 return constantcase( idString ); 110 } else if ( formatName.equals( "platform" ) ) { 111 return platform( idString ); 112 } else if ( formatName.equals( "lexerRule" ) ) { 113 return lexerRule( idString ); 114 } else if ( formatName.equals( "constantPath" ) ) { 115 return constantPath( idString ); 116 } else if ( formatName.equals( "rubyString" ) ) { 117 return rubyString( idString ); 118 } else if ( formatName.equals( "label" ) ) { 119 return label( idString ); 120 } else if ( formatName.equals( "symbol" ) ) { 121 return symbol( idString ); 122 } else { 123 throw new IllegalArgumentException( "Unsupported format name" ); 124 } 125 } 126 127 /** given an input string, which is presumed 128 * to contain a word, which may potentially be camelcased, 129 * and convert it to snake_case underscore style. 130 * 131 * algorithm -- 132 * iterate through the string with a sliding window 3 chars wide 133 * 134 * example -- aGUIWhatNot 135 * c c+1 c+2 action 136 * a G << 'a' << '_' // a lower-upper word edge 137 * G U I << 'g' 138 * U I W << 'w' 139 * I W h << 'i' << '_' // the last character in an acronym run of uppers 140 * W h << 'w' 141 * ... and so on 142 */ 143 private String snakecase( String value ) { 144 StringBuilder output_buffer = new StringBuilder(); 145 int l = value.length(); 146 int cliff = l - 1; 147 char cur; 148 char next; 149 char peek; 150 151 if ( value.isEmpty() ) return value; 152 if ( l == 1 ) return value.toLowerCase(); 153 154 for ( int i = 0; i < cliff; i++ ) { 155 cur = value.charAt( i ); 156 next = value.charAt( i + 1 ); 157 158 if ( Character.isLetter( cur ) ) { 159 output_buffer.append( Character.toLowerCase( cur ) ); 160 161 if ( Character.isDigit( next ) || Character.isWhitespace( next ) ) { 162 output_buffer.append( '_' ); 163 } else if ( Character.isLowerCase( cur ) && Character.isUpperCase( next ) ) { 164 // at camelcase word edge 165 output_buffer.append( '_' ); 166 } else if ( ( i < cliff - 1 ) && Character.isUpperCase( cur ) && Character.isUpperCase( next ) ) { 167 // cur is part of an acronym 168 169 peek = value.charAt( i + 2 ); 170 if ( Character.isLowerCase( peek ) ) { 171 /* if next is the start of word (indicated when peek is lowercase) 172 then the acronym must be completed by appending an underscore */ 173 output_buffer.append( '_' ); 174 } 175 } 176 } else if ( Character.isDigit( cur ) ) { 177 output_buffer.append( cur ); 178 if ( Character.isLetter( next ) ) { 179 output_buffer.append( '_' ); 180 } 181 } else if ( Character.isWhitespace( cur ) ) { 182 // do nothing 183 } else { 184 output_buffer.append( cur ); 185 } 186 187 } 188 189 cur = value.charAt( cliff ); 190 if ( ! Character.isWhitespace( cur ) ) { 191 output_buffer.append( Character.toLowerCase( cur ) ); 192 } 193 194 return output_buffer.toString(); 195 } 196 197 private String constantcase( String value ) { 198 return snakecase( value ).toUpperCase(); 199 } 200 201 private String platform( String value ) { 202 return ( "__" + value + "__" ); 203 } 204 205 private String symbol( String value ) { 206 if ( value.matches( "[a-zA-Z_]\\w*[\\?\\!\\=]?" ) ) { 207 return ( ":" + value ); 208 } else { 209 return ( "%s(" + value + ")" ); 210 } 211 } 212 213 private String lexerRule( String value ) { 214 // System.out.print( "lexerRule( \"" + value + "\") => " ); 215 if ( value.equals( "Tokens" ) ) { 216 // System.out.println( "\"token!\"" ); 217 return "token!"; 218 } else { 219 // String result = snakecase( value ) + "!"; 220 // System.out.println( "\"" + result + "\"" ); 221 return ( snakecase( value ) + "!" ); 222 } 223 } 224 225 private String constantPath( String value ) { 226 return value.replaceAll( "\\.", "::" ); 227 } 228 229 private String rubyString( String value ) { 230 StringBuilder output_buffer = new StringBuilder(); 231 int len = value.length(); 232 233 output_buffer.append( '"' ); 234 for ( int i = 0; i < len; i++ ) { 235 output_buffer.append( rubyCharValueEscape[ value.charAt( i ) ] ); 236 } 237 output_buffer.append( '"' ); 238 return output_buffer.toString(); 239 } 240 241 private String camelcase( String value ) { 242 StringBuilder output_buffer = new StringBuilder(); 243 int cliff = value.length(); 244 char cur; 245 char next; 246 boolean at_edge = true; 247 248 if ( value.isEmpty() ) return value; 249 if ( cliff == 1 ) return value.toUpperCase(); 250 251 for ( int i = 0; i < cliff; i++ ) { 252 cur = value.charAt( i ); 253 254 if ( Character.isWhitespace( cur ) ) { 255 at_edge = true; 256 continue; 257 } else if ( cur == '_' ) { 258 at_edge = true; 259 continue; 260 } else if ( Character.isDigit( cur ) ) { 261 output_buffer.append( cur ); 262 at_edge = true; 263 continue; 264 } 265 266 if ( at_edge ) { 267 output_buffer.append( Character.toUpperCase( cur ) ); 268 if ( Character.isLetter( cur ) ) at_edge = false; 269 } else { 270 output_buffer.append( cur ); 271 } 272 } 273 274 return output_buffer.toString(); 275 } 276 277 private String label( String value ) { 278 if ( rubyKeywords.contains( value ) ) { 279 return platform( value ); 280 } else if ( Character.isUpperCase( value.charAt( 0 ) ) && 281 ( !value.equals( "FILE" ) ) && 282 ( !value.equals( "LINE" ) ) ) { 283 return platform( value ); 284 } else if ( value.equals( "FILE" ) ) { 285 return "_FILE_"; 286 } else if ( value.equals( "LINE" ) ) { 287 return "_LINE_"; 288 } else { 289 return value; 290 } 291 } 292 293 private String subcamelcase( String value ) { 294 value = camelcase( value ); 295 if ( value.isEmpty() ) 296 return value; 297 Character head = Character.toLowerCase( value.charAt( 0 ) ); 298 String tail = value.substring( 1 ); 299 return head.toString().concat( tail ); 300 } 301 } 302 303 protected void genRecognizerFile( 304 Tool tool, 305 CodeGenerator generator, 306 Grammar grammar, 307 ST outputFileST 308 ) throws IOException 309 { 310 /* 311 Below is an experimental attempt at providing a few named action blocks 312 that are printed in both lexer and parser files from combined grammars. 313 ANTLR appears to first generate a parser, then generate an independent lexer, 314 and then generate code from that. It keeps the combo/parser grammar object 315 and the lexer grammar object, as well as their respective code generator and 316 target instances, completely independent. So, while a bit hack-ish, this is 317 a solution that should work without having to modify Terrence Parr's 318 core tool code. 319 320 - sharedActionBlocks is a class variable containing a hash map 321 - if this method is called with a combo grammar, and the action map 322 in the grammar contains an entry for the named scope "all", 323 add an entry to sharedActionBlocks mapping the grammar name to 324 the "all" action map. 325 - if this method is called with an `implicit lexer' 326 (one that's extracted from a combo grammar), check to see if 327 there's an entry in sharedActionBlocks for the lexer's grammar name. 328 - if there is an action map entry, place it in the lexer's action map 329 - the recognizerFile template has code to place the 330 "all" actions appropriately 331 332 problems: 333 - This solution assumes that the parser will be generated 334 before the lexer. If that changes at some point, this will 335 not work. 336 - I have not investigated how this works with delegation yet 337 338 Kyle Yetter - March 25, 2010 339 */ 340 341 if ( grammar.type == Grammar.COMBINED ) { 342 Map<String, Map<String, Object>> actions = grammar.getActions(); 343 if ( actions.containsKey( "all" ) ) { 344 sharedActionBlocks.put( grammar.name, actions.get( "all" ) ); 345 } 346 } else if ( grammar.implicitLexer ) { 347 if ( sharedActionBlocks.containsKey( grammar.name ) ) { 348 Map<String, Map<String, Object>> actions = grammar.getActions(); 349 actions.put( "all", sharedActionBlocks.get( grammar.name ) ); 350 } 351 } 352 353 STGroup group = generator.getTemplates(); 354 RubyRenderer renderer = new RubyRenderer(); 355 try { 356 group.registerRenderer( Class.forName( "java.lang.String" ), renderer ); 357 } catch ( ClassNotFoundException e ) { 358 // this shouldn't happen 359 System.err.println( "ClassNotFoundException: " + e.getMessage() ); 360 e.printStackTrace( System.err ); 361 } 362 String fileName = 363 generator.getRecognizerFileName( grammar.name, grammar.type ); 364 generator.write( outputFileST, fileName ); 365 } 366 367 public String getTargetCharLiteralFromANTLRCharLiteral( 368 CodeGenerator generator, 369 String literal 370 ) 371 { 372 int code_point = 0; 373 literal = literal.substring( 1, literal.length() - 1 ); 374 375 if ( literal.charAt( 0 ) == '\\' ) { 376 switch ( literal.charAt( 1 ) ) { 377 case '\\': 378 case '"': 379 case '\'': 380 code_point = literal.codePointAt( 1 ); 381 break; 382 case 'n': 383 code_point = 10; 384 break; 385 case 'r': 386 code_point = 13; 387 break; 388 case 't': 389 code_point = 9; 390 break; 391 case 'b': 392 code_point = 8; 393 break; 394 case 'f': 395 code_point = 12; 396 break; 397 case 'u': // Assume unnnn 398 code_point = Integer.parseInt( literal.substring( 2 ), 16 ); 399 break; 400 default: 401 System.out.println( "1: hey you didn't account for this: \"" + literal + "\"" ); 402 break; 403 } 404 } else if ( literal.length() == 1 ) { 405 code_point = literal.codePointAt( 0 ); 406 } else { 407 System.out.println( "2: hey you didn't account for this: \"" + literal + "\"" ); 408 } 409 410 return ( "0x" + Integer.toHexString( code_point ) ); 411 } 412 413 public int getMaxCharValue( CodeGenerator generator ) 414 { 415 // Versions before 1.9 do not support unicode 416 return 0xFF; 417 } 418 419 public String getTokenTypeAsTargetLabel( CodeGenerator generator, int ttype ) 420 { 421 String name = generator.grammar.getTokenDisplayName( ttype ); 422 // If name is a literal, return the token type instead 423 if ( name.charAt( 0 )=='\'' ) { 424 return generator.grammar.computeTokenNameFromLiteral( ttype, name ); 425 } 426 return name; 427 } 428 429 public boolean isValidActionScope( int grammarType, String scope ) { 430 if ( scope.equals( "all" ) ) { 431 return true; 432 } 433 if ( scope.equals( "token" ) ) { 434 return true; 435 } 436 if ( scope.equals( "module" ) ) { 437 return true; 438 } 439 if ( scope.equals( "overrides" ) ) { 440 return true; 441 } 442 443 switch ( grammarType ) { 444 case Grammar.LEXER: 445 if ( scope.equals( "lexer" ) ) { 446 return true; 447 } 448 break; 449 case Grammar.PARSER: 450 if ( scope.equals( "parser" ) ) { 451 return true; 452 } 453 break; 454 case Grammar.COMBINED: 455 if ( scope.equals( "parser" ) ) { 456 return true; 457 } 458 if ( scope.equals( "lexer" ) ) { 459 return true; 460 } 461 break; 462 case Grammar.TREE_PARSER: 463 if ( scope.equals( "treeparser" ) ) { 464 return true; 465 } 466 break; 467 } 468 return false; 469 } 470 471 public String encodeIntAsCharEscape( final int v ) { 472 final int intValue; 473 474 if ( v == 65535 ) { 475 intValue = -1; 476 } else { 477 intValue = v; 478 } 479 480 return String.valueOf( intValue ); 481 } 482} 483