1#!/usr/bin/env python
2#
3# Copyright (C) 2007 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17#
18# Using instructions from an architecture-specific config file, generate C
19# and assembly source files for the Dalvik interpreter.
20#
21
22import sys, string, re, time
23from string import Template
24
25interp_defs_file = "../../libdex/DexOpcodes.h" # need opcode list
26kNumPackedOpcodes = 256 # TODO: Derive this from DexOpcodes.h.
27
28splitops = False
29verbose = False
30handler_size_bits = -1000
31handler_size_bytes = -1000
32in_op_start = 0             # 0=not started, 1=started, 2=ended
33in_alt_op_start = 0         # 0=not started, 1=started, 2=ended
34default_op_dir = None
35default_alt_stub = None
36opcode_locations = {}
37alt_opcode_locations = {}
38asm_stub_text = []
39label_prefix = ".L"         # use ".L" to hide labels from gdb
40alt_label_prefix = ".L_ALT" # use ".L" to hide labels from gdb
41style = None                # interpreter style
42generate_alt_table = False
43
44# Exception class.
45class DataParseError(SyntaxError):
46    "Failure when parsing data file"
47
48#
49# Set any omnipresent substitution values.
50#
51def getGlobalSubDict():
52    return { "handler_size_bits":handler_size_bits,
53             "handler_size_bytes":handler_size_bytes }
54
55#
56# Parse arch config file --
57# Set interpreter style.
58#
59def setHandlerStyle(tokens):
60    global style
61    if len(tokens) != 2:
62        raise DataParseError("handler-style requires one argument")
63    style = tokens[1]
64    if style != "computed-goto" and style != "jump-table" and style != "all-c":
65        raise DataParseError("handler-style (%s) invalid" % style)
66
67#
68# Parse arch config file --
69# Set handler_size_bytes to the value of tokens[1], and handler_size_bits to
70# log2(handler_size_bytes).  Throws an exception if "bytes" is not 0 or
71# a power of two.
72#
73def setHandlerSize(tokens):
74    global handler_size_bits, handler_size_bytes
75    if style != "computed-goto":
76        print "Warning: handler-size valid only for computed-goto interpreters"
77    if len(tokens) != 2:
78        raise DataParseError("handler-size requires one argument")
79    if handler_size_bits != -1000:
80        raise DataParseError("handler-size may only be set once")
81
82    # compute log2(n), and make sure n is 0 or a power of 2
83    handler_size_bytes = bytes = int(tokens[1])
84    bits = -1
85    while bytes > 0:
86        bytes //= 2     # halve with truncating division
87        bits += 1
88
89    if handler_size_bytes == 0 or handler_size_bytes != (1 << bits):
90        raise DataParseError("handler-size (%d) must be power of 2" \
91                % orig_bytes)
92    handler_size_bits = bits
93
94#
95# Parse arch config file --
96# Copy a file in to the C or asm output file.
97#
98def importFile(tokens):
99    if len(tokens) != 2:
100        raise DataParseError("import requires one argument")
101    source = tokens[1]
102    if source.endswith(".cpp"):
103        appendSourceFile(tokens[1], getGlobalSubDict(), c_fp, None)
104    elif source.endswith(".S"):
105        appendSourceFile(tokens[1], getGlobalSubDict(), asm_fp, None)
106    else:
107        raise DataParseError("don't know how to import %s (expecting .cpp/.S)"
108                % source)
109
110#
111# Parse arch config file --
112# Copy a file in to the C or asm output file.
113#
114def setAsmStub(tokens):
115    global asm_stub_text
116    if style == "all-c":
117        print "Warning: asm-stub ignored for all-c interpreter"
118    if len(tokens) != 2:
119        raise DataParseError("import requires one argument")
120    try:
121        stub_fp = open(tokens[1])
122        asm_stub_text = stub_fp.readlines()
123    except IOError, err:
124        stub_fp.close()
125        raise DataParseError("unable to load asm-stub: %s" % str(err))
126    stub_fp.close()
127
128#
129# Parse arch config file --
130# Record location of default alt stub
131#
132def setAsmAltStub(tokens):
133    global default_alt_stub, generate_alt_table
134    if style == "all-c":
135        print "Warning: asm-alt-stub ingored for all-c interpreter"
136    if len(tokens) != 2:
137        raise DataParseError("import requires one argument")
138    default_alt_stub = tokens[1]
139    generate_alt_table = True
140
141#
142# Parse arch config file --
143# Start of opcode list.
144#
145def opStart(tokens):
146    global in_op_start
147    global default_op_dir
148    if len(tokens) != 2:
149        raise DataParseError("opStart takes a directory name argument")
150    if in_op_start != 0:
151        raise DataParseError("opStart can only be specified once")
152    default_op_dir = tokens[1]
153    in_op_start = 1
154
155#
156# Parse arch config file --
157# Set location of a single alt opcode's source file.
158#
159def altEntry(tokens):
160    global generate_alt_table
161    if len(tokens) != 3:
162        raise DataParseError("alt requires exactly two arguments")
163    if in_op_start != 1:
164        raise DataParseError("alt statements must be between opStart/opEnd")
165    try:
166        index = opcodes.index(tokens[1])
167    except ValueError:
168        raise DataParseError("unknown opcode %s" % tokens[1])
169    if alt_opcode_locations.has_key(tokens[1]):
170        print "Note: alt overrides earlier %s (%s -> %s)" \
171                % (tokens[1], alt_opcode_locations[tokens[1]], tokens[2])
172    alt_opcode_locations[tokens[1]] = tokens[2]
173    generate_alt_table = True
174
175#
176# Parse arch config file --
177# Set location of a single opcode's source file.
178#
179def opEntry(tokens):
180    #global opcode_locations
181    if len(tokens) != 3:
182        raise DataParseError("op requires exactly two arguments")
183    if in_op_start != 1:
184        raise DataParseError("op statements must be between opStart/opEnd")
185    try:
186        index = opcodes.index(tokens[1])
187    except ValueError:
188        raise DataParseError("unknown opcode %s" % tokens[1])
189    if opcode_locations.has_key(tokens[1]):
190        print "Note: op overrides earlier %s (%s -> %s)" \
191                % (tokens[1], opcode_locations[tokens[1]], tokens[2])
192    opcode_locations[tokens[1]] = tokens[2]
193
194#
195# Emit jump table
196#
197def emitJmpTable(start_label, prefix):
198    asm_fp.write("\n    .global %s\n" % start_label)
199    asm_fp.write("    .text\n")
200    asm_fp.write("%s:\n" % start_label)
201    for i in xrange(kNumPackedOpcodes):
202        op = opcodes[i]
203        dict = getGlobalSubDict()
204        dict.update({ "opcode":op, "opnum":i })
205        asm_fp.write("    .long " + prefix + \
206                     "_%(opcode)s /* 0x%(opnum)02x */\n" % dict)
207
208#
209# Parse arch config file --
210# End of opcode list; emit instruction blocks.
211#
212def opEnd(tokens):
213    global in_op_start
214    if len(tokens) != 1:
215        raise DataParseError("opEnd takes no arguments")
216    if in_op_start != 1:
217        raise DataParseError("opEnd must follow opStart, and only appear once")
218    in_op_start = 2
219
220    loadAndEmitOpcodes()
221    if splitops == False:
222        if generate_alt_table:
223            loadAndEmitAltOpcodes()
224            if style == "jump-table":
225                emitJmpTable("dvmAsmInstructionStart", label_prefix);
226                emitJmpTable("dvmAsmAltInstructionStart", alt_label_prefix);
227
228def genaltop(tokens):
229    if in_op_start != 2:
230       raise DataParseError("alt-op can be specified only after op-end")
231    if len(tokens) != 1:
232        raise DataParseError("opEnd takes no arguments")
233    if generate_alt_table:
234        loadAndEmitAltOpcodes()
235        if style == "jump-table":
236            emitJmpTable("dvmAsmInstructionStart", label_prefix);
237            emitJmpTable("dvmAsmAltInstructionStart", alt_label_prefix);
238
239
240#
241# Extract an ordered list of instructions from the VM sources.  We use the
242# "goto table" definition macro, which has exactly kNumPackedOpcodes
243# entries.
244#
245def getOpcodeList():
246    opcodes = []
247    opcode_fp = open(interp_defs_file)
248    opcode_re = re.compile(r"^\s*H\(OP_(\w+)\),.*", re.DOTALL)
249    for line in opcode_fp:
250        match = opcode_re.match(line)
251        if not match:
252            continue
253        opcodes.append("OP_" + match.group(1))
254    opcode_fp.close()
255
256    if len(opcodes) != kNumPackedOpcodes:
257        print "ERROR: found %d opcodes in Interp.h (expected %d)" \
258                % (len(opcodes), kNumPackedOpcodes)
259        raise SyntaxError, "bad opcode count"
260    return opcodes
261
262def emitAlign():
263    if style == "computed-goto":
264        asm_fp.write("    .balign %d\n" % handler_size_bytes)
265
266#
267# Load and emit opcodes for all kNumPackedOpcodes instructions.
268#
269def loadAndEmitOpcodes():
270    sister_list = []
271    assert len(opcodes) == kNumPackedOpcodes
272    need_dummy_start = False
273    if style == "jump-table":
274        start_label = "dvmAsmInstructionStartCode"
275        end_label = "dvmAsmInstructionEndCode"
276    else:
277        start_label = "dvmAsmInstructionStart"
278        end_label = "dvmAsmInstructionEnd"
279
280    # point dvmAsmInstructionStart at the first handler or stub
281    asm_fp.write("\n    .global %s\n" % start_label)
282    asm_fp.write("    .type   %s, %%function\n" % start_label)
283    asm_fp.write("%s = " % start_label + label_prefix + "_OP_NOP\n")
284    asm_fp.write("    .text\n\n")
285
286    for i in xrange(kNumPackedOpcodes):
287        op = opcodes[i]
288
289        if opcode_locations.has_key(op):
290            location = opcode_locations[op]
291        else:
292            location = default_op_dir
293
294        if location == "c":
295            loadAndEmitC(location, i)
296            if len(asm_stub_text) == 0:
297                need_dummy_start = True
298        else:
299            loadAndEmitAsm(location, i, sister_list)
300
301    # For a 100% C implementation, there are no asm handlers or stubs.  We
302    # need to have the dvmAsmInstructionStart label point at OP_NOP, and it's
303    # too annoying to try to slide it in after the alignment psuedo-op, so
304    # we take the low road and just emit a dummy OP_NOP here.
305    if need_dummy_start:
306        emitAlign()
307        asm_fp.write(label_prefix + "_OP_NOP:   /* dummy */\n");
308
309    emitAlign()
310    asm_fp.write("    .size   %s, .-%s\n" % (start_label, start_label))
311    asm_fp.write("    .global %s\n" % end_label)
312    asm_fp.write("%s:\n" % end_label)
313
314    if style == "computed-goto":
315        emitSectionComment("Sister implementations", asm_fp)
316        asm_fp.write("    .global dvmAsmSisterStart\n")
317        asm_fp.write("    .type   dvmAsmSisterStart, %function\n")
318        asm_fp.write("    .text\n")
319        asm_fp.write("    .balign 4\n")
320        asm_fp.write("dvmAsmSisterStart:\n")
321        asm_fp.writelines(sister_list)
322        asm_fp.write("\n    .size   dvmAsmSisterStart, .-dvmAsmSisterStart\n")
323        asm_fp.write("    .global dvmAsmSisterEnd\n")
324        asm_fp.write("dvmAsmSisterEnd:\n\n")
325
326#
327# Load an alternate entry stub
328#
329def loadAndEmitAltStub(source, opindex):
330    op = opcodes[opindex]
331    if verbose:
332        print " alt emit %s --> stub" % source
333    dict = getGlobalSubDict()
334    dict.update({ "opcode":op, "opnum":opindex })
335
336    emitAsmHeader(asm_fp, dict, alt_label_prefix)
337    appendSourceFile(source, dict, asm_fp, None)
338
339#
340# Load and emit alternate opcodes for all kNumPackedOpcodes instructions.
341#
342def loadAndEmitAltOpcodes():
343    assert len(opcodes) == kNumPackedOpcodes
344    if style == "jump-table":
345        start_label = "dvmAsmAltInstructionStartCode"
346        end_label = "dvmAsmAltInstructionEndCode"
347    else:
348        start_label = "dvmAsmAltInstructionStart"
349        end_label = "dvmAsmAltInstructionEnd"
350
351    # point dvmAsmInstructionStart at the first handler or stub
352    asm_fp.write("\n    .global %s\n" % start_label)
353    asm_fp.write("    .type   %s, %%function\n" % start_label)
354    asm_fp.write("    .text\n\n")
355    asm_fp.write("%s = " % start_label + label_prefix + "_ALT_OP_NOP\n")
356
357    for i in xrange(kNumPackedOpcodes):
358        op = opcodes[i]
359        if alt_opcode_locations.has_key(op):
360            source = "%s/ALT_%s.S" % (alt_opcode_locations[op], op)
361        else:
362            source = default_alt_stub
363        loadAndEmitAltStub(source, i)
364
365    emitAlign()
366    asm_fp.write("    .size   %s, .-%s\n" % (start_label, start_label))
367    asm_fp.write("    .global %s\n" % end_label)
368    asm_fp.write("%s:\n" % end_label)
369
370#
371# Load a C fragment and emit it, then output an assembly stub.
372#
373def loadAndEmitC(location, opindex):
374    op = opcodes[opindex]
375    source = "%s/%s.cpp" % (location, op)
376    if verbose:
377        print " emit %s --> C++" % source
378    dict = getGlobalSubDict()
379    dict.update({ "opcode":op, "opnum":opindex })
380
381    appendSourceFile(source, dict, c_fp, None)
382
383    if len(asm_stub_text) != 0:
384        emitAsmStub(asm_fp, dict)
385
386#
387# Load an assembly fragment and emit it.
388#
389def loadAndEmitAsm(location, opindex, sister_list):
390    op = opcodes[opindex]
391    source = "%s/%s.S" % (location, op)
392    dict = getGlobalSubDict()
393    dict.update({ "opcode":op, "opnum":opindex })
394    if verbose:
395        print " emit %s --> asm" % source
396
397    emitAsmHeader(asm_fp, dict, label_prefix)
398    appendSourceFile(source, dict, asm_fp, sister_list)
399
400#
401# Output the alignment directive and label for an assembly piece.
402#
403def emitAsmHeader(outfp, dict, prefix):
404    outfp.write("/* ------------------------------ */\n")
405    # The alignment directive ensures that the handler occupies
406    # at least the correct amount of space.  We don't try to deal
407    # with overflow here.
408    emitAlign()
409    # Emit a label so that gdb will say the right thing.  We prepend an
410    # underscore so the symbol name doesn't clash with the Opcode enum.
411    outfp.write(prefix + "_%(opcode)s: /* 0x%(opnum)02x */\n" % dict)
412
413#
414# Output a generic instruction stub that updates the "glue" struct and
415# calls the C implementation.
416#
417def emitAsmStub(outfp, dict):
418    emitAsmHeader(outfp, dict, label_prefix)
419    for line in asm_stub_text:
420        templ = Template(line)
421        outfp.write(templ.substitute(dict))
422
423#
424# Append the file specified by "source" to the open "outfp".  Each line will
425# be template-replaced using the substitution dictionary "dict".
426#
427# If the first line of the file starts with "%" it is taken as a directive.
428# A "%include" line contains a filename and, optionally, a Python-style
429# dictionary declaration with substitution strings.  (This is implemented
430# with recursion.)
431#
432# If "sister_list" is provided, and we find a line that contains only "&",
433# all subsequent lines from the file will be appended to sister_list instead
434# of copied to the output.
435#
436# This may modify "dict".
437#
438def appendSourceFile(source, dict, outfp, sister_list):
439    outfp.write("/* File: %s */\n" % source)
440    infp = open(source, "r")
441    in_sister = False
442    for line in infp:
443        if line.startswith("%include"):
444            # Parse the "include" line
445            tokens = line.strip().split(' ', 2)
446            if len(tokens) < 2:
447                raise DataParseError("malformed %%include in %s" % source)
448
449            alt_source = tokens[1].strip("\"")
450            if alt_source == source:
451                raise DataParseError("self-referential %%include in %s"
452                        % source)
453
454            new_dict = dict.copy()
455            if len(tokens) == 3:
456                new_dict.update(eval(tokens[2]))
457            #print " including src=%s dict=%s" % (alt_source, new_dict)
458            appendSourceFile(alt_source, new_dict, outfp, sister_list)
459            continue
460
461        elif line.startswith("%default"):
462            # copy keywords into dictionary
463            tokens = line.strip().split(' ', 1)
464            if len(tokens) < 2:
465                raise DataParseError("malformed %%default in %s" % source)
466            defaultValues = eval(tokens[1])
467            for entry in defaultValues:
468                dict.setdefault(entry, defaultValues[entry])
469            continue
470
471        elif line.startswith("%verify"):
472            # more to come, someday
473            continue
474
475        elif line.startswith("%break") and sister_list != None:
476            # allow more than one %break, ignoring all following the first
477            if style == "computed-goto" and not in_sister:
478                in_sister = True
479                sister_list.append("\n/* continuation for %(opcode)s */\n"%dict)
480            continue
481
482        # perform keyword substitution if a dictionary was provided
483        if dict != None:
484            templ = Template(line)
485            try:
486                subline = templ.substitute(dict)
487            except KeyError, err:
488                raise DataParseError("keyword substitution failed in %s: %s"
489                        % (source, str(err)))
490            except:
491                print "ERROR: substitution failed: " + line
492                raise
493        else:
494            subline = line
495
496        # write output to appropriate file
497        if in_sister:
498            sister_list.append(subline)
499        else:
500            outfp.write(subline)
501    outfp.write("\n")
502    infp.close()
503
504#
505# Emit a C-style section header comment.
506#
507def emitSectionComment(str, fp):
508    equals = "========================================" \
509             "==================================="
510
511    fp.write("\n/*\n * %s\n *  %s\n * %s\n */\n" %
512        (equals, str, equals))
513
514
515#
516# ===========================================================================
517# "main" code
518#
519
520#
521# Check args.
522#
523if len(sys.argv) != 3:
524    print "Usage: %s target-arch output-dir" % sys.argv[0]
525    sys.exit(2)
526
527target_arch = sys.argv[1]
528output_dir = sys.argv[2]
529
530#
531# Extract opcode list.
532#
533opcodes = getOpcodeList()
534#for op in opcodes:
535#    print "  %s" % op
536
537#
538# Open config file.
539#
540try:
541    config_fp = open("config-%s" % target_arch)
542except:
543    print "Unable to open config file 'config-%s'" % target_arch
544    sys.exit(1)
545
546#
547# Open and prepare output files.
548#
549try:
550    c_fp = open("%s/InterpC-%s.cpp" % (output_dir, target_arch), "w")
551    asm_fp = open("%s/InterpAsm-%s.S" % (output_dir, target_arch), "w")
552except:
553    print "Unable to open output files"
554    print "Make sure directory '%s' exists and existing files are writable" \
555            % output_dir
556    # Ideally we'd remove the files to avoid confusing "make", but if they
557    # failed to open we probably won't be able to remove them either.
558    sys.exit(1)
559
560print "Generating %s, %s" % (c_fp.name, asm_fp.name)
561
562file_header = """/*
563 * This file was generated automatically by gen-mterp.py for '%s'.
564 *
565 * --> DO NOT EDIT <--
566 */
567
568""" % (target_arch)
569
570c_fp.write(file_header)
571asm_fp.write(file_header)
572
573#
574# Process the config file.
575#
576failed = False
577try:
578    for line in config_fp:
579        line = line.strip()         # remove CRLF, leading spaces
580        tokens = line.split(' ')    # tokenize
581        #print "%d: %s" % (len(tokens), tokens)
582        if len(tokens[0]) == 0:
583            #print "  blank"
584            pass
585        elif tokens[0][0] == '#':
586            #print "  comment"
587            pass
588        else:
589            if tokens[0] == "handler-size":
590                setHandlerSize(tokens)
591            elif tokens[0] == "import":
592                importFile(tokens)
593            elif tokens[0] == "asm-stub":
594                setAsmStub(tokens)
595            elif tokens[0] == "asm-alt-stub":
596                setAsmAltStub(tokens)
597            elif tokens[0] == "op-start":
598                opStart(tokens)
599            elif tokens[0] == "op-end":
600                opEnd(tokens)
601            elif tokens[0] == "alt":
602                altEntry(tokens)
603            elif tokens[0] == "op":
604                opEntry(tokens)
605            elif tokens[0] == "handler-style":
606                setHandlerStyle(tokens)
607            elif tokens[0] == "alt-ops":
608                genaltop(tokens)
609            elif tokens[0] == "split-ops":
610                splitops = True
611            else:
612                raise DataParseError, "unrecognized command '%s'" % tokens[0]
613            if style == None:
614                print "tokens[0] = %s" % tokens[0]
615                raise DataParseError, "handler-style must be first command"
616except DataParseError, err:
617    print "Failed: " + str(err)
618    # TODO: remove output files so "make" doesn't get confused
619    failed = True
620    c_fp.close()
621    asm_fp.close()
622    c_fp = asm_fp = None
623
624config_fp.close()
625
626#
627# Done!
628#
629if c_fp:
630    c_fp.close()
631if asm_fp:
632    asm_fp.close()
633
634sys.exit(failed)
635