1/*
2 [The "BSD license"]
3 Copyright (c) 2010 Kyle Yetter
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
8 are met:
9 1. Redistributions of source code must retain the above copyright
10    notice, this list of conditions and the following disclaimer.
11 2. Redistributions in binary form must reproduce the above copyright
12    notice, this list of conditions and the following disclaimer in the
13    documentation and/or other materials provided with the distribution.
14 3. The name of the author may not be used to endorse or promote products
15    derived from this software without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29package org.antlr.codegen;
30
31import org.antlr.Tool;
32import org.antlr.tool.Grammar;
33import org.stringtemplate.v4.AttributeRenderer;
34import org.stringtemplate.v4.ST;
35import org.stringtemplate.v4.STGroup;
36
37import java.io.IOException;
38import java.util.*;
39
40public class RubyTarget extends Target
41{
42    /** A set of ruby keywords which are used to escape labels and method names
43     *  which will cause parse errors in the ruby source
44     */
45    public static final Set rubyKeywords =
46    new HashSet() {
47        {
48        	add( "alias" );     add( "END" );     add( "retry" );
49        	add( "and" );       add( "ensure" );  add( "return" );
50        	add( "BEGIN" );     add( "false" );   add( "self" );
51        	add( "begin" );     add( "for" );     add( "super" );
52        	add( "break" );     add( "if" );      add( "then" );
53        	add( "case" );      add( "in" );      add( "true" );
54        	add( "class" );     add( "module" );  add( "undef" );
55        	add( "def" );       add( "next" );    add( "unless" );
56        	add( "defined?" );  add( "nil" );     add( "until" );
57        	add( "do" );        add( "not" );     add( "when" );
58        	add( "else" );      add( "or" );      add( "while" );
59        	add( "elsif" );     add( "redo" );    add( "yield" );
60        	add( "end" );       add( "rescue" );
61        }
62    };
63
64    public static Map<String, Map<String, Object>> sharedActionBlocks = new HashMap<String, Map<String, Object>>();
65
66    public class RubyRenderer implements AttributeRenderer
67    {
68    	protected String[] rubyCharValueEscape = new String[256];
69
70    	public RubyRenderer() {
71    		for ( int i = 0; i < 16; i++ ) {
72    			rubyCharValueEscape[ i ] = "\\x0" + Integer.toHexString( i );
73    		}
74    		for ( int i = 16; i < 32; i++ ) {
75    			rubyCharValueEscape[ i ] = "\\x" + Integer.toHexString( i );
76    		}
77    		for ( char i = 32; i < 127; i++ ) {
78    			rubyCharValueEscape[ i ] = Character.toString( i );
79    		}
80    		for ( int i = 127; i < 256; i++ ) {
81    			rubyCharValueEscape[ i ] = "\\x" + Integer.toHexString( i );
82    		}
83
84    		rubyCharValueEscape['\n'] = "\\n";
85    		rubyCharValueEscape['\r'] = "\\r";
86    		rubyCharValueEscape['\t'] = "\\t";
87    		rubyCharValueEscape['\b'] = "\\b";
88    		rubyCharValueEscape['\f'] = "\\f";
89    		rubyCharValueEscape['\\'] = "\\\\";
90    		rubyCharValueEscape['"'] = "\\\"";
91    	}
92
93        public String toString( Object o, String formatName, Locale locale ) {
94			if ( formatName==null ) {
95				return o.toString();
96			}
97
98            String idString = o.toString();
99
100            if ( idString.isEmpty() ) return idString;
101
102            if ( formatName.equals( "snakecase" ) ) {
103                return snakecase( idString );
104            } else if ( formatName.equals( "camelcase" ) ) {
105                return camelcase( idString );
106            } else if ( formatName.equals( "subcamelcase" ) ) {
107                return subcamelcase( idString );
108            } else if ( formatName.equals( "constant" ) ) {
109                return constantcase( idString );
110            } else if ( formatName.equals( "platform" ) ) {
111                return platform( idString );
112            } else if ( formatName.equals( "lexerRule" ) ) {
113                return lexerRule( idString );
114            } else if ( formatName.equals( "constantPath" ) ) {
115            	return constantPath( idString );
116            } else if ( formatName.equals( "rubyString" ) ) {
117                return rubyString( idString );
118            } else if ( formatName.equals( "label" ) ) {
119                return label( idString );
120            } else if ( formatName.equals( "symbol" ) ) {
121                return symbol( idString );
122            } else {
123                throw new IllegalArgumentException( "Unsupported format name" );
124            }
125        }
126
127        /** given an input string, which is presumed
128         * to contain a word, which may potentially be camelcased,
129         * and convert it to snake_case underscore style.
130         *
131         * algorithm --
132         *   iterate through the string with a sliding window 3 chars wide
133         *
134         * example -- aGUIWhatNot
135         *   c   c+1 c+2  action
136         *   a   G        << 'a' << '_'  // a lower-upper word edge
137         *   G   U   I    << 'g'
138         *   U   I   W    << 'w'
139         *   I   W   h    << 'i' << '_'  // the last character in an acronym run of uppers
140         *   W   h        << 'w'
141         *   ... and so on
142         */
143        private String snakecase( String value ) {
144            StringBuilder output_buffer = new StringBuilder();
145            int l = value.length();
146            int cliff = l - 1;
147            char cur;
148            char next;
149            char peek;
150
151            if ( value.isEmpty() ) return value;
152            if ( l == 1 ) return value.toLowerCase();
153
154            for ( int i = 0; i < cliff; i++ ) {
155                cur  = value.charAt( i );
156                next = value.charAt( i + 1 );
157
158                if ( Character.isLetter( cur ) ) {
159                    output_buffer.append( Character.toLowerCase( cur ) );
160
161                    if ( Character.isDigit( next ) || Character.isWhitespace( next ) ) {
162                        output_buffer.append( '_' );
163                    } else if ( Character.isLowerCase( cur ) && Character.isUpperCase( next ) ) {
164                        // at camelcase word edge
165                        output_buffer.append( '_' );
166                    } else if ( ( i < cliff - 1 ) && Character.isUpperCase( cur ) && Character.isUpperCase( next ) ) {
167                        // cur is part of an acronym
168
169                        peek = value.charAt( i + 2 );
170                        if ( Character.isLowerCase( peek ) ) {
171                            /* if next is the start of word (indicated when peek is lowercase)
172                                         then the acronym must be completed by appending an underscore */
173                            output_buffer.append( '_' );
174                        }
175                    }
176                } else if ( Character.isDigit( cur ) ) {
177                    output_buffer.append( cur );
178                    if ( Character.isLetter( next ) ) {
179                        output_buffer.append( '_' );
180                    }
181                } else if ( Character.isWhitespace( cur ) ) {
182                    // do nothing
183                } else {
184                    output_buffer.append( cur );
185                }
186
187            }
188
189            cur  = value.charAt( cliff );
190            if ( ! Character.isWhitespace( cur ) ) {
191                output_buffer.append( Character.toLowerCase( cur ) );
192            }
193
194            return output_buffer.toString();
195        }
196
197        private String constantcase( String value ) {
198            return snakecase( value ).toUpperCase();
199        }
200
201        private String platform( String value ) {
202            return ( "__" + value + "__" );
203        }
204
205        private String symbol( String value ) {
206            if ( value.matches( "[a-zA-Z_]\\w*[\\?\\!\\=]?" ) ) {
207                return ( ":" + value );
208            } else {
209                return ( "%s(" + value + ")" );
210            }
211        }
212
213        private String lexerRule( String value ) {
214					  // System.out.print( "lexerRule( \"" + value + "\") => " );
215            if ( value.equals( "Tokens" ) ) {
216							  // System.out.println( "\"token!\"" );
217                return "token!";
218            } else {
219							  // String result = snakecase( value ) + "!";
220								// System.out.println( "\"" + result + "\"" );
221                return ( snakecase( value ) + "!" );
222            }
223        }
224
225        private String constantPath( String value ) {
226            return value.replaceAll( "\\.", "::" );
227        }
228
229        private String rubyString( String value ) {
230        	StringBuilder output_buffer = new StringBuilder();
231        	int len = value.length();
232
233        	output_buffer.append( '"' );
234        	for ( int i = 0; i < len; i++ ) {
235        		output_buffer.append( rubyCharValueEscape[ value.charAt( i ) ] );
236        	}
237        	output_buffer.append( '"' );
238        	return output_buffer.toString();
239        }
240
241        private String camelcase( String value ) {
242            StringBuilder output_buffer = new StringBuilder();
243            int cliff = value.length();
244            char cur;
245            char next;
246            boolean at_edge = true;
247
248            if ( value.isEmpty() ) return value;
249            if ( cliff == 1 ) return value.toUpperCase();
250
251            for ( int i = 0; i < cliff; i++ ) {
252                cur  = value.charAt( i );
253
254                if ( Character.isWhitespace( cur ) ) {
255                    at_edge = true;
256                    continue;
257                } else if ( cur == '_' ) {
258                    at_edge = true;
259                    continue;
260                } else if ( Character.isDigit( cur ) ) {
261                    output_buffer.append( cur );
262                    at_edge = true;
263                    continue;
264                }
265
266                if ( at_edge ) {
267                    output_buffer.append( Character.toUpperCase( cur ) );
268                    if ( Character.isLetter( cur ) ) at_edge = false;
269                } else {
270                    output_buffer.append( cur );
271                }
272            }
273
274            return output_buffer.toString();
275        }
276
277        private String label( String value ) {
278            if ( rubyKeywords.contains( value ) ) {
279                return platform( value );
280            } else if ( Character.isUpperCase( value.charAt( 0 ) ) &&
281                        ( !value.equals( "FILE" ) ) &&
282                        ( !value.equals( "LINE" ) ) ) {
283                return platform( value );
284            } else if ( value.equals( "FILE" ) ) {
285                return "_FILE_";
286            } else if ( value.equals( "LINE" ) ) {
287                return "_LINE_";
288            } else {
289                return value;
290            }
291        }
292
293        private String subcamelcase( String value ) {
294            value = camelcase( value );
295            if ( value.isEmpty() )
296                return value;
297            Character head = Character.toLowerCase( value.charAt( 0 ) );
298            String tail = value.substring( 1 );
299            return head.toString().concat( tail );
300        }
301    }
302
303    protected void genRecognizerFile(
304    		Tool tool,
305    		CodeGenerator generator,
306    		Grammar grammar,
307    		ST outputFileST
308    ) throws IOException
309    {
310        /*
311            Below is an experimental attempt at providing a few named action blocks
312            that are printed in both lexer and parser files from combined grammars.
313            ANTLR appears to first generate a parser, then generate an independent lexer,
314            and then generate code from that. It keeps the combo/parser grammar object
315            and the lexer grammar object, as well as their respective code generator and
316            target instances, completely independent. So, while a bit hack-ish, this is
317            a solution that should work without having to modify Terrence Parr's
318            core tool code.
319
320            - sharedActionBlocks is a class variable containing a hash map
321            - if this method is called with a combo grammar, and the action map
322              in the grammar contains an entry for the named scope "all",
323              add an entry to sharedActionBlocks mapping the grammar name to
324              the "all" action map.
325            - if this method is called with an `implicit lexer'
326              (one that's extracted from a combo grammar), check to see if
327              there's an entry in sharedActionBlocks for the lexer's grammar name.
328            - if there is an action map entry, place it in the lexer's action map
329            - the recognizerFile template has code to place the
330              "all" actions appropriately
331
332            problems:
333              - This solution assumes that the parser will be generated
334                before the lexer. If that changes at some point, this will
335                not work.
336              - I have not investigated how this works with delegation yet
337
338            Kyle Yetter - March 25, 2010
339        */
340
341        if ( grammar.type == Grammar.COMBINED ) {
342            Map<String, Map<String, Object>> actions = grammar.getActions();
343            if ( actions.containsKey( "all" ) ) {
344                sharedActionBlocks.put( grammar.name, actions.get( "all" ) );
345            }
346        } else if ( grammar.implicitLexer ) {
347            if ( sharedActionBlocks.containsKey( grammar.name ) ) {
348                Map<String, Map<String, Object>> actions = grammar.getActions();
349                actions.put( "all", sharedActionBlocks.get( grammar.name ) );
350            }
351        }
352
353        STGroup group = generator.getTemplates();
354        RubyRenderer renderer = new RubyRenderer();
355        try {
356            group.registerRenderer( Class.forName( "java.lang.String" ), renderer );
357        } catch ( ClassNotFoundException e ) {
358            // this shouldn't happen
359            System.err.println( "ClassNotFoundException: " + e.getMessage() );
360            e.printStackTrace( System.err );
361        }
362        String fileName =
363            generator.getRecognizerFileName( grammar.name, grammar.type );
364        generator.write( outputFileST, fileName );
365    }
366
367    public String getTargetCharLiteralFromANTLRCharLiteral(
368        CodeGenerator generator,
369        String literal
370    )
371    {
372        int code_point = 0;
373        literal = literal.substring( 1, literal.length() - 1 );
374
375        if ( literal.charAt( 0 ) == '\\' ) {
376            switch ( literal.charAt( 1 ) ) {
377                case    '\\':
378                case    '"':
379                case    '\'':
380                    code_point = literal.codePointAt( 1 );
381                    break;
382                case    'n':
383                    code_point = 10;
384                    break;
385                case    'r':
386                    code_point = 13;
387                    break;
388                case    't':
389                    code_point = 9;
390                    break;
391                case    'b':
392                    code_point = 8;
393                    break;
394                case    'f':
395                    code_point = 12;
396                    break;
397                case    'u':    // Assume unnnn
398                    code_point = Integer.parseInt( literal.substring( 2 ), 16 );
399                    break;
400                default:
401                    System.out.println( "1: hey you didn't account for this: \"" + literal + "\"" );
402                    break;
403            }
404        } else if ( literal.length() == 1 ) {
405            code_point = literal.codePointAt( 0 );
406        } else {
407            System.out.println( "2: hey you didn't account for this: \"" + literal + "\"" );
408        }
409
410        return ( "0x" + Integer.toHexString( code_point ) );
411    }
412
413    public int getMaxCharValue( CodeGenerator generator )
414    {
415        // Versions before 1.9 do not support unicode
416        return 0xFF;
417    }
418
419    public String getTokenTypeAsTargetLabel( CodeGenerator generator, int ttype )
420    {
421        String name = generator.grammar.getTokenDisplayName( ttype );
422        // If name is a literal, return the token type instead
423        if ( name.charAt( 0 )=='\'' ) {
424            return generator.grammar.computeTokenNameFromLiteral( ttype, name );
425        }
426        return name;
427    }
428
429    public boolean isValidActionScope( int grammarType, String scope ) {
430        if ( scope.equals( "all" ) )       {
431            return true;
432        }
433        if ( scope.equals( "token" ) )     {
434            return true;
435        }
436        if ( scope.equals( "module" ) )    {
437            return true;
438        }
439        if ( scope.equals( "overrides" ) ) {
440            return true;
441        }
442
443        switch ( grammarType ) {
444        case Grammar.LEXER:
445            if ( scope.equals( "lexer" ) ) {
446                return true;
447            }
448            break;
449        case Grammar.PARSER:
450            if ( scope.equals( "parser" ) ) {
451                return true;
452            }
453            break;
454        case Grammar.COMBINED:
455            if ( scope.equals( "parser" ) ) {
456                return true;
457            }
458            if ( scope.equals( "lexer" ) ) {
459                return true;
460            }
461            break;
462        case Grammar.TREE_PARSER:
463            if ( scope.equals( "treeparser" ) ) {
464                return true;
465            }
466            break;
467        }
468        return false;
469    }
470
471    public String encodeIntAsCharEscape( final int v ) {
472        final int intValue;
473
474        if ( v == 65535 ) {
475            intValue = -1;
476        } else {
477            intValue = v;
478        }
479
480        return String.valueOf( intValue );
481    }
482}
483