1#!/usr/bin/env python
2
3#
4# Copyright 2012 the V8 project authors. All rights reserved.
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met:
8#
9#     * Redistributions of source code must retain the above copyright
10#       notice, this list of conditions and the following disclaimer.
11#     * Redistributions in binary form must reproduce the above
12#       copyright notice, this list of conditions and the following
13#       disclaimer in the documentation and/or other materials provided
14#       with the distribution.
15#     * Neither the name of Google Inc. nor the names of its
16#       contributors may be used to endorse or promote products derived
17#       from this software without specific prior written permission.
18#
19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30#
31
32#
33# Emits a C++ file to be compiled and linked into libv8 to support postmortem
34# debugging tools.  Most importantly, this tool emits constants describing V8
35# internals:
36#
37#    v8dbg_type_CLASS__TYPE = VALUE             Describes class type values
38#    v8dbg_class_CLASS__FIELD__TYPE = OFFSET    Describes class fields
39#    v8dbg_parent_CLASS__PARENT                 Describes class hierarchy
40#    v8dbg_frametype_NAME = VALUE               Describes stack frame values
41#    v8dbg_off_fp_NAME = OFFSET                 Frame pointer offsets
42#    v8dbg_prop_NAME = OFFSET                   Object property offsets
43#    v8dbg_NAME = VALUE                         Miscellaneous values
44#
45# These constants are declared as global integers so that they'll be present in
46# the generated libv8 binary.
47#
48
49import re
50import sys
51
52#
53# Miscellaneous constants, tags, and masks used for object identification.
54#
55consts_misc = [
56    { 'name': 'FirstNonstringType',     'value': 'FIRST_NONSTRING_TYPE' },
57
58    { 'name': 'IsNotStringMask',        'value': 'kIsNotStringMask' },
59    { 'name': 'StringTag',              'value': 'kStringTag' },
60    { 'name': 'NotStringTag',           'value': 'kNotStringTag' },
61
62    { 'name': 'StringEncodingMask',     'value': 'kStringEncodingMask' },
63    { 'name': 'TwoByteStringTag',       'value': 'kTwoByteStringTag' },
64    { 'name': 'OneByteStringTag',       'value': 'kOneByteStringTag' },
65
66    { 'name': 'StringRepresentationMask',
67        'value': 'kStringRepresentationMask' },
68    { 'name': 'SeqStringTag',           'value': 'kSeqStringTag' },
69    { 'name': 'ConsStringTag',          'value': 'kConsStringTag' },
70    { 'name': 'ExternalStringTag',      'value': 'kExternalStringTag' },
71    { 'name': 'SlicedStringTag',        'value': 'kSlicedStringTag' },
72
73    { 'name': 'FailureTag',             'value': 'kFailureTag' },
74    { 'name': 'FailureTagMask',         'value': 'kFailureTagMask' },
75    { 'name': 'HeapObjectTag',          'value': 'kHeapObjectTag' },
76    { 'name': 'HeapObjectTagMask',      'value': 'kHeapObjectTagMask' },
77    { 'name': 'SmiTag',                 'value': 'kSmiTag' },
78    { 'name': 'SmiTagMask',             'value': 'kSmiTagMask' },
79    { 'name': 'SmiValueShift',          'value': 'kSmiTagSize' },
80    { 'name': 'SmiShiftSize',           'value': 'kSmiShiftSize' },
81    { 'name': 'PointerSizeLog2',        'value': 'kPointerSizeLog2' },
82
83    { 'name': 'OddballFalse',           'value': 'Oddball::kFalse' },
84    { 'name': 'OddballTrue',            'value': 'Oddball::kTrue' },
85    { 'name': 'OddballTheHole',         'value': 'Oddball::kTheHole' },
86    { 'name': 'OddballNull',            'value': 'Oddball::kNull' },
87    { 'name': 'OddballArgumentMarker',  'value': 'Oddball::kArgumentMarker' },
88    { 'name': 'OddballUndefined',       'value': 'Oddball::kUndefined' },
89    { 'name': 'OddballUninitialized',   'value': 'Oddball::kUninitialized' },
90    { 'name': 'OddballOther',           'value': 'Oddball::kOther' },
91    { 'name': 'OddballException',       'value': 'Oddball::kException' },
92
93    { 'name': 'prop_idx_first',
94        'value': 'DescriptorArray::kFirstIndex' },
95    { 'name': 'prop_type_field',
96        'value': 'FIELD' },
97    { 'name': 'prop_type_first_phantom',
98        'value': 'TRANSITION' },
99    { 'name': 'prop_type_mask',
100        'value': 'PropertyDetails::TypeField::kMask' },
101    { 'name': 'prop_index_mask',
102        'value': 'PropertyDetails::FieldIndexField::kMask' },
103    { 'name': 'prop_index_shift',
104        'value': 'PropertyDetails::FieldIndexField::kShift' },
105
106    { 'name': 'prop_desc_key',
107        'value': 'DescriptorArray::kDescriptorKey' },
108    { 'name': 'prop_desc_details',
109        'value': 'DescriptorArray::kDescriptorDetails' },
110    { 'name': 'prop_desc_value',
111        'value': 'DescriptorArray::kDescriptorValue' },
112    { 'name': 'prop_desc_size',
113        'value': 'DescriptorArray::kDescriptorSize' },
114
115    { 'name': 'elements_fast_holey_elements',
116        'value': 'FAST_HOLEY_ELEMENTS' },
117    { 'name': 'elements_fast_elements',
118        'value': 'FAST_ELEMENTS' },
119    { 'name': 'elements_dictionary_elements',
120        'value': 'DICTIONARY_ELEMENTS' },
121
122    { 'name': 'bit_field2_elements_kind_mask',
123       'value': 'Map::kElementsKindMask' },
124    { 'name': 'bit_field2_elements_kind_shift',
125       'value': 'Map::kElementsKindShift' },
126    { 'name': 'bit_field3_dictionary_map_shift',
127        'value': 'Map::DictionaryMap::kShift' },
128
129    { 'name': 'off_fp_context',
130        'value': 'StandardFrameConstants::kContextOffset' },
131    { 'name': 'off_fp_constant_pool',
132        'value': 'StandardFrameConstants::kConstantPoolOffset' },
133    { 'name': 'off_fp_marker',
134        'value': 'StandardFrameConstants::kMarkerOffset' },
135    { 'name': 'off_fp_function',
136        'value': 'JavaScriptFrameConstants::kFunctionOffset' },
137    { 'name': 'off_fp_args',
138        'value': 'JavaScriptFrameConstants::kLastParameterOffset' },
139];
140
141#
142# The following useful fields are missing accessors, so we define fake ones.
143#
144extras_accessors = [
145    'HeapObject, map, Map, kMapOffset',
146    'JSObject, elements, Object, kElementsOffset',
147    'FixedArray, data, uintptr_t, kHeaderSize',
148    'Map, instance_attributes, int, kInstanceAttributesOffset',
149    'Map, inobject_properties, int, kInObjectPropertiesOffset',
150    'Map, instance_size, int, kInstanceSizeOffset',
151    'Map, bit_field, char, kBitFieldOffset',
152    'Map, bit_field2, char, kBitField2Offset',
153    'Map, bit_field3, SMI, kBitField3Offset',
154    'Map, prototype, Object, kPrototypeOffset',
155    'NameDictionaryShape, prefix_size, int, kPrefixSize',
156    'NameDictionaryShape, entry_size, int, kEntrySize',
157    'SeededNumberDictionaryShape, prefix_size, int, kPrefixSize',
158    'UnseededNumberDictionaryShape, prefix_size, int, kPrefixSize',
159    'NumberDictionaryShape, entry_size, int, kEntrySize',
160    'Oddball, kind_offset, int, kKindOffset',
161    'HeapNumber, value, double, kValueOffset',
162    'ConsString, first, String, kFirstOffset',
163    'ConsString, second, String, kSecondOffset',
164    'ExternalString, resource, Object, kResourceOffset',
165    'SeqOneByteString, chars, char, kHeaderSize',
166    'SeqTwoByteString, chars, char, kHeaderSize',
167    'SharedFunctionInfo, code, Code, kCodeOffset',
168    'SlicedString, parent, String, kParentOffset',
169    'Code, instruction_start, uintptr_t, kHeaderSize',
170    'Code, instruction_size, int, kInstructionSizeOffset',
171];
172
173#
174# The following is a whitelist of classes we expect to find when scanning the
175# source code. This list is not exhaustive, but it's still useful to identify
176# when this script gets out of sync with the source. See load_objects().
177#
178expected_classes = [
179    'ConsString', 'FixedArray', 'HeapNumber', 'JSArray', 'JSFunction',
180    'JSObject', 'JSRegExp', 'JSValue', 'Map', 'Oddball', 'Script',
181    'SeqOneByteString', 'SharedFunctionInfo'
182];
183
184
185#
186# The following structures store high-level representations of the structures
187# for which we're going to emit descriptive constants.
188#
189types = {};             # set of all type names
190typeclasses = {};       # maps type names to corresponding class names
191klasses = {};           # known classes, including parents
192fields = [];            # field declarations
193
194header = '''
195/*
196 * This file is generated by %s.  Do not edit directly.
197 */
198
199#include "v8.h"
200#include "frames.h"
201#include "frames-inl.h" /* for architecture-specific frame constants */
202
203using namespace v8::internal;
204
205extern "C" {
206
207/* stack frame constants */
208#define FRAME_CONST(value, klass)       \
209    int v8dbg_frametype_##klass = StackFrame::value;
210
211STACK_FRAME_TYPE_LIST(FRAME_CONST)
212
213#undef FRAME_CONST
214
215''' % sys.argv[0];
216
217footer = '''
218}
219'''
220
221#
222# Loads class hierarchy and type information from "objects.h".
223#
224def load_objects():
225        objfilename = sys.argv[2];
226        objfile = open(objfilename, 'r');
227        in_insttype = False;
228
229        typestr = '';
230
231        #
232        # Construct a dictionary for the classes we're sure should be present.
233        #
234        checktypes = {};
235        for klass in expected_classes:
236                checktypes[klass] = True;
237
238        #
239        # Iterate objects.h line-by-line to collect type and class information.
240        # For types, we accumulate a string representing the entire InstanceType
241        # enum definition and parse it later because it's easier to do so
242        # without the embedded newlines.
243        #
244        for line in objfile:
245                if (line.startswith('enum InstanceType {')):
246                        in_insttype = True;
247                        continue;
248
249                if (in_insttype and line.startswith('};')):
250                        in_insttype = False;
251                        continue;
252
253                line = re.sub('//.*', '', line.rstrip().lstrip());
254
255                if (in_insttype):
256                        typestr += line;
257                        continue;
258
259                match = re.match('class (\w[^\s:]*)(: public (\w[^\s{]*))?\s*{',
260                    line);
261
262                if (match):
263                        klass = match.group(1);
264                        pklass = match.group(3);
265                        klasses[klass] = { 'parent': pklass };
266
267        #
268        # Process the instance type declaration.
269        #
270        entries = typestr.split(',');
271        for entry in entries:
272                types[re.sub('\s*=.*', '', entry).lstrip()] = True;
273
274        #
275        # Infer class names for each type based on a systematic transformation.
276        # For example, "JS_FUNCTION_TYPE" becomes "JSFunction".  We find the
277        # class for each type rather than the other way around because there are
278        # fewer cases where one type maps to more than one class than the other
279        # way around.
280        #
281        for type in types:
282                #
283                # Symbols and Strings are implemented using the same classes.
284                #
285                usetype = re.sub('SYMBOL_', 'STRING_', type);
286
287                #
288                # REGEXP behaves like REG_EXP, as in JS_REGEXP_TYPE => JSRegExp.
289                #
290                usetype = re.sub('_REGEXP_', '_REG_EXP_', usetype);
291
292                #
293                # Remove the "_TYPE" suffix and then convert to camel case,
294                # except that a "JS" prefix remains uppercase (as in
295                # "JS_FUNCTION_TYPE" => "JSFunction").
296                #
297                if (not usetype.endswith('_TYPE')):
298                        continue;
299
300                usetype = usetype[0:len(usetype) - len('_TYPE')];
301                parts = usetype.split('_');
302                cctype = '';
303
304                if (parts[0] == 'JS'):
305                        cctype = 'JS';
306                        start = 1;
307                else:
308                        cctype = '';
309                        start = 0;
310
311                for ii in range(start, len(parts)):
312                        part = parts[ii];
313                        cctype += part[0].upper() + part[1:].lower();
314
315                #
316                # Mapping string types is more complicated.  Both types and
317                # class names for Strings specify a representation (e.g., Seq,
318                # Cons, External, or Sliced) and an encoding (TwoByte/OneByte),
319                # In the simplest case, both of these are explicit in both
320                # names, as in:
321                #
322                #       EXTERNAL_ONE_BYTE_STRING_TYPE => ExternalOneByteString
323                #
324                # However, either the representation or encoding can be omitted
325                # from the type name, in which case "Seq" and "TwoByte" are
326                # assumed, as in:
327                #
328                #       STRING_TYPE => SeqTwoByteString
329                #
330                # Additionally, sometimes the type name has more information
331                # than the class, as in:
332                #
333                #       CONS_ONE_BYTE_STRING_TYPE => ConsString
334                #
335                # To figure this out dynamically, we first check for a
336                # representation and encoding and add them if they're not
337                # present.  If that doesn't yield a valid class name, then we
338                # strip out the representation.
339                #
340                if (cctype.endswith('String')):
341                        if (cctype.find('Cons') == -1 and
342                            cctype.find('External') == -1 and
343                            cctype.find('Sliced') == -1):
344                                if (cctype.find('OneByte') != -1):
345                                        cctype = re.sub('OneByteString$',
346                                            'SeqOneByteString', cctype);
347                                else:
348                                        cctype = re.sub('String$',
349                                            'SeqString', cctype);
350
351                        if (cctype.find('OneByte') == -1):
352                                cctype = re.sub('String$', 'TwoByteString',
353                                    cctype);
354
355                        if (not (cctype in klasses)):
356                                cctype = re.sub('OneByte', '', cctype);
357                                cctype = re.sub('TwoByte', '', cctype);
358
359                #
360                # Despite all that, some types have no corresponding class.
361                #
362                if (cctype in klasses):
363                        typeclasses[type] = cctype;
364                        if (cctype in checktypes):
365                                del checktypes[cctype];
366
367        if (len(checktypes) > 0):
368                for klass in checktypes:
369                        print('error: expected class \"%s\" not found' % klass);
370
371                sys.exit(1);
372
373
374#
375# For a given macro call, pick apart the arguments and return an object
376# describing the corresponding output constant.  See load_fields().
377#
378def parse_field(call):
379        # Replace newlines with spaces.
380        for ii in range(0, len(call)):
381                if (call[ii] == '\n'):
382                        call[ii] == ' ';
383
384        idx = call.find('(');
385        kind = call[0:idx];
386        rest = call[idx + 1: len(call) - 1];
387        args = re.split('\s*,\s*', rest);
388
389        consts = [];
390
391        if (kind == 'ACCESSORS' or kind == 'ACCESSORS_GCSAFE'):
392                klass = args[0];
393                field = args[1];
394                dtype = args[2];
395                offset = args[3];
396
397                return ({
398                    'name': 'class_%s__%s__%s' % (klass, field, dtype),
399                    'value': '%s::%s' % (klass, offset)
400                });
401
402        assert(kind == 'SMI_ACCESSORS' or kind == 'ACCESSORS_TO_SMI');
403        klass = args[0];
404        field = args[1];
405        offset = args[2];
406
407        return ({
408            'name': 'class_%s__%s__%s' % (klass, field, 'SMI'),
409            'value': '%s::%s' % (klass, offset)
410        });
411
412#
413# Load field offset information from objects-inl.h.
414#
415def load_fields():
416        inlfilename = sys.argv[3];
417        inlfile = open(inlfilename, 'r');
418
419        #
420        # Each class's fields and the corresponding offsets are described in the
421        # source by calls to macros like "ACCESSORS" (and friends).  All we do
422        # here is extract these macro invocations, taking into account that they
423        # may span multiple lines and may contain nested parentheses.  We also
424        # call parse_field() to pick apart the invocation.
425        #
426        prefixes = [ 'ACCESSORS', 'ACCESSORS_GCSAFE',
427                     'SMI_ACCESSORS', 'ACCESSORS_TO_SMI' ];
428        current = '';
429        opens = 0;
430
431        for line in inlfile:
432                if (opens > 0):
433                        # Continuation line
434                        for ii in range(0, len(line)):
435                                if (line[ii] == '('):
436                                        opens += 1;
437                                elif (line[ii] == ')'):
438                                        opens -= 1;
439
440                                if (opens == 0):
441                                        break;
442
443                        current += line[0:ii + 1];
444                        continue;
445
446                for prefix in prefixes:
447                        if (not line.startswith(prefix + '(')):
448                                continue;
449
450                        if (len(current) > 0):
451                                fields.append(parse_field(current));
452                                current = '';
453
454                        for ii in range(len(prefix), len(line)):
455                                if (line[ii] == '('):
456                                        opens += 1;
457                                elif (line[ii] == ')'):
458                                        opens -= 1;
459
460                                if (opens == 0):
461                                        break;
462
463                        current += line[0:ii + 1];
464
465        if (len(current) > 0):
466                fields.append(parse_field(current));
467                current = '';
468
469        for body in extras_accessors:
470                fields.append(parse_field('ACCESSORS(%s)' % body));
471
472#
473# Emit a block of constants.
474#
475def emit_set(out, consts):
476        # Fix up overzealous parses.  This could be done inside the
477        # parsers but as there are several, it's easiest to do it here.
478        ws = re.compile('\s+')
479        for const in consts:
480                name = ws.sub('', const['name'])
481                value = ws.sub('', str(const['value']))  # Can be a number.
482                out.write('int v8dbg_%s = %s;\n' % (name, value))
483        out.write('\n');
484
485#
486# Emit the whole output file.
487#
488def emit_config():
489        out = file(sys.argv[1], 'w');
490
491        out.write(header);
492
493        out.write('/* miscellaneous constants */\n');
494        emit_set(out, consts_misc);
495
496        out.write('/* class type information */\n');
497        consts = [];
498        keys = typeclasses.keys();
499        keys.sort();
500        for typename in keys:
501                klass = typeclasses[typename];
502                consts.append({
503                    'name': 'type_%s__%s' % (klass, typename),
504                    'value': typename
505                });
506
507        emit_set(out, consts);
508
509        out.write('/* class hierarchy information */\n');
510        consts = [];
511        keys = klasses.keys();
512        keys.sort();
513        for klassname in keys:
514                pklass = klasses[klassname]['parent'];
515                if (pklass == None):
516                        continue;
517
518                consts.append({
519                    'name': 'parent_%s__%s' % (klassname, pklass),
520                    'value': 0
521                });
522
523        emit_set(out, consts);
524
525        out.write('/* field information */\n');
526        emit_set(out, fields);
527
528        out.write(footer);
529
530if (len(sys.argv) < 4):
531        print('usage: %s output.cc objects.h objects-inl.h' % sys.argv[0]);
532        sys.exit(2);
533
534load_objects();
535load_fields();
536emit_config();
537