1#!/usr/bin/env python
2
3#
4# Copyright 2012 the V8 project authors. All rights reserved.
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met:
8#
9#     * Redistributions of source code must retain the above copyright
10#       notice, this list of conditions and the following disclaimer.
11#     * Redistributions in binary form must reproduce the above
12#       copyright notice, this list of conditions and the following
13#       disclaimer in the documentation and/or other materials provided
14#       with the distribution.
15#     * Neither the name of Google Inc. nor the names of its
16#       contributors may be used to endorse or promote products derived
17#       from this software without specific prior written permission.
18#
19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30#
31
32#
33# Emits a C++ file to be compiled and linked into libv8 to support postmortem
34# debugging tools.  Most importantly, this tool emits constants describing V8
35# internals:
36#
37#    v8dbg_type_CLASS__TYPE = VALUE             Describes class type values
38#    v8dbg_class_CLASS__FIELD__TYPE = OFFSET    Describes class fields
39#    v8dbg_parent_CLASS__PARENT                 Describes class hierarchy
40#    v8dbg_frametype_NAME = VALUE               Describes stack frame values
41#    v8dbg_off_fp_NAME = OFFSET                 Frame pointer offsets
42#    v8dbg_prop_NAME = OFFSET                   Object property offsets
43#    v8dbg_NAME = VALUE                         Miscellaneous values
44#
45# These constants are declared as global integers so that they'll be present in
46# the generated libv8 binary.
47#
48
49import re
50import sys
51
52#
53# Miscellaneous constants, tags, and masks used for object identification.
54#
55consts_misc = [
56    { 'name': 'FirstNonstringType',     'value': 'FIRST_NONSTRING_TYPE' },
57
58    { 'name': 'IsNotStringMask',        'value': 'kIsNotStringMask' },
59    { 'name': 'StringTag',              'value': 'kStringTag' },
60    { 'name': 'NotStringTag',           'value': 'kNotStringTag' },
61
62    { 'name': 'StringEncodingMask',     'value': 'kStringEncodingMask' },
63    { 'name': 'TwoByteStringTag',       'value': 'kTwoByteStringTag' },
64    { 'name': 'AsciiStringTag',         'value': 'kAsciiStringTag' },
65
66    { 'name': 'StringRepresentationMask',
67        'value': 'kStringRepresentationMask' },
68    { 'name': 'SeqStringTag',           'value': 'kSeqStringTag' },
69    { 'name': 'ConsStringTag',          'value': 'kConsStringTag' },
70    { 'name': 'ExternalStringTag',      'value': 'kExternalStringTag' },
71
72    { 'name': 'FailureTag',             'value': 'kFailureTag' },
73    { 'name': 'FailureTagMask',         'value': 'kFailureTagMask' },
74    { 'name': 'HeapObjectTag',          'value': 'kHeapObjectTag' },
75    { 'name': 'HeapObjectTagMask',      'value': 'kHeapObjectTagMask' },
76    { 'name': 'SmiTag',                 'value': 'kSmiTag' },
77    { 'name': 'SmiTagMask',             'value': 'kSmiTagMask' },
78    { 'name': 'SmiValueShift',          'value': 'kSmiTagSize' },
79    { 'name': 'PointerSizeLog2',        'value': 'kPointerSizeLog2' },
80
81    { 'name': 'prop_idx_content',
82        'value': 'DescriptorArray::kContentArrayIndex' },
83    { 'name': 'prop_idx_first',
84        'value': 'DescriptorArray::kFirstIndex' },
85    { 'name': 'prop_type_field',
86        'value': 'FIELD' },
87    { 'name': 'prop_type_first_phantom',
88        'value': 'MAP_TRANSITION' },
89    { 'name': 'prop_type_mask',
90        'value': 'PropertyDetails::TypeField::kMask' },
91
92    { 'name': 'off_fp_context',
93        'value': 'StandardFrameConstants::kContextOffset' },
94    { 'name': 'off_fp_marker',
95        'value': 'StandardFrameConstants::kMarkerOffset' },
96    { 'name': 'off_fp_function',
97        'value': 'JavaScriptFrameConstants::kFunctionOffset' },
98    { 'name': 'off_fp_args',
99        'value': 'JavaScriptFrameConstants::kLastParameterOffset' },
100];
101
102#
103# The following useful fields are missing accessors, so we define fake ones.
104#
105extras_accessors = [
106    'HeapObject, map, Map, kMapOffset',
107    'JSObject, elements, Object, kElementsOffset',
108    'FixedArray, data, uintptr_t, kHeaderSize',
109    'Map, instance_attributes, int, kInstanceAttributesOffset',
110    'Map, instance_descriptors, int, kInstanceDescriptorsOrBitField3Offset',
111    'Map, inobject_properties, int, kInObjectPropertiesOffset',
112    'Map, instance_size, int, kInstanceSizeOffset',
113    'HeapNumber, value, double, kValueOffset',
114    'ConsString, first, String, kFirstOffset',
115    'ConsString, second, String, kSecondOffset',
116    'ExternalString, resource, Object, kResourceOffset',
117    'SeqAsciiString, chars, char, kHeaderSize',
118    'SharedFunctionInfo, code, Code, kCodeOffset',
119    'Code, instruction_start, uintptr_t, kHeaderSize',
120    'Code, instruction_size, int, kInstructionSizeOffset',
121];
122
123#
124# The following is a whitelist of classes we expect to find when scanning the
125# source code. This list is not exhaustive, but it's still useful to identify
126# when this script gets out of sync with the source. See load_objects().
127#
128expected_classes = [
129    'ConsString', 'FixedArray', 'HeapNumber', 'JSArray', 'JSFunction',
130    'JSObject', 'JSRegExp', 'JSValue', 'Map', 'Oddball', 'Script',
131    'SeqAsciiString', 'SharedFunctionInfo'
132];
133
134
135#
136# The following structures store high-level representations of the structures
137# for which we're going to emit descriptive constants.
138#
139types = {};             # set of all type names
140typeclasses = {};       # maps type names to corresponding class names
141klasses = {};           # known classes, including parents
142fields = [];            # field declarations
143
144header = '''
145/*
146 * This file is generated by %s.  Do not edit directly.
147 */
148
149#include "v8.h"
150#include "frames.h"
151#include "frames-inl.h" /* for architecture-specific frame constants */
152
153using namespace v8::internal;
154
155extern "C" {
156
157/* stack frame constants */
158#define FRAME_CONST(value, klass)       \
159    int v8dbg_frametype_##klass = StackFrame::value;
160
161STACK_FRAME_TYPE_LIST(FRAME_CONST)
162
163#undef FRAME_CONST
164
165''' % sys.argv[0];
166
167footer = '''
168}
169'''
170
171#
172# Loads class hierarchy and type information from "objects.h".
173#
174def load_objects():
175        objfilename = sys.argv[2];
176        objfile = open(objfilename, 'r');
177        in_insttype = False;
178
179        typestr = '';
180
181        #
182        # Construct a dictionary for the classes we're sure should be present.
183        #
184        checktypes = {};
185        for klass in expected_classes:
186                checktypes[klass] = True;
187
188        #
189        # Iterate objects.h line-by-line to collect type and class information.
190        # For types, we accumulate a string representing the entire InstanceType
191        # enum definition and parse it later because it's easier to do so
192        # without the embedded newlines.
193        #
194        for line in objfile:
195                if (line.startswith('enum InstanceType {')):
196                        in_insttype = True;
197                        continue;
198
199                if (in_insttype and line.startswith('};')):
200                        in_insttype = False;
201                        continue;
202
203                line = re.sub('//.*', '', line.rstrip().lstrip());
204
205                if (in_insttype):
206                        typestr += line;
207                        continue;
208
209                match = re.match('class (\w[^\s:]*)(: public (\w[^\s{]*))?\s*{',
210                    line);
211
212                if (match):
213                        klass = match.group(1);
214                        pklass = match.group(3);
215                        klasses[klass] = { 'parent': pklass };
216
217        #
218        # Process the instance type declaration.
219        #
220        entries = typestr.split(',');
221        for entry in entries:
222                types[re.sub('\s*=.*', '', entry).lstrip()] = True;
223
224        #
225        # Infer class names for each type based on a systematic transformation.
226        # For example, "JS_FUNCTION_TYPE" becomes "JSFunction".  We find the
227        # class for each type rather than the other way around because there are
228        # fewer cases where one type maps to more than one class than the other
229        # way around.
230        #
231        for type in types:
232                #
233                # Symbols and Strings are implemented using the same classes.
234                #
235                usetype = re.sub('SYMBOL_', 'STRING_', type);
236
237                #
238                # REGEXP behaves like REG_EXP, as in JS_REGEXP_TYPE => JSRegExp.
239                #
240                usetype = re.sub('_REGEXP_', '_REG_EXP_', usetype);
241
242                #
243                # Remove the "_TYPE" suffix and then convert to camel case,
244                # except that a "JS" prefix remains uppercase (as in
245                # "JS_FUNCTION_TYPE" => "JSFunction").
246                #
247                if (not usetype.endswith('_TYPE')):
248                        continue;
249
250                usetype = usetype[0:len(usetype) - len('_TYPE')];
251                parts = usetype.split('_');
252                cctype = '';
253
254                if (parts[0] == 'JS'):
255                        cctype = 'JS';
256                        start = 1;
257                else:
258                        cctype = '';
259                        start = 0;
260
261                for ii in range(start, len(parts)):
262                        part = parts[ii];
263                        cctype += part[0].upper() + part[1:].lower();
264
265                #
266                # Mapping string types is more complicated.  Both types and
267                # class names for Strings specify a representation (e.g., Seq,
268                # Cons, External, or Sliced) and an encoding (TwoByte or Ascii),
269                # In the simplest case, both of these are explicit in both
270                # names, as in:
271                #
272                #       EXTERNAL_ASCII_STRING_TYPE => ExternalAsciiString
273                #
274                # However, either the representation or encoding can be omitted
275                # from the type name, in which case "Seq" and "TwoByte" are
276                # assumed, as in:
277                #
278                #       STRING_TYPE => SeqTwoByteString
279                #
280                # Additionally, sometimes the type name has more information
281                # than the class, as in:
282                #
283                #       CONS_ASCII_STRING_TYPE => ConsString
284                #
285                # To figure this out dynamically, we first check for a
286                # representation and encoding and add them if they're not
287                # present.  If that doesn't yield a valid class name, then we
288                # strip out the representation.
289                #
290                if (cctype.endswith('String')):
291                        if (cctype.find('Cons') == -1 and
292                            cctype.find('External') == -1 and
293                            cctype.find('Sliced') == -1):
294                                if (cctype.find('Ascii') != -1):
295                                        cctype = re.sub('AsciiString$',
296                                            'SeqAsciiString', cctype);
297                                else:
298                                        cctype = re.sub('String$',
299                                            'SeqString', cctype);
300
301                        if (cctype.find('Ascii') == -1):
302                                cctype = re.sub('String$', 'TwoByteString',
303                                    cctype);
304
305                        if (not (cctype in klasses)):
306                                cctype = re.sub('Ascii', '', cctype);
307                                cctype = re.sub('TwoByte', '', cctype);
308
309                #
310                # Despite all that, some types have no corresponding class.
311                #
312                if (cctype in klasses):
313                        typeclasses[type] = cctype;
314                        if (cctype in checktypes):
315                                del checktypes[cctype];
316
317        if (len(checktypes) > 0):
318                for klass in checktypes:
319                        print('error: expected class \"%s\" not found' % klass);
320
321                sys.exit(1);
322
323
324#
325# For a given macro call, pick apart the arguments and return an object
326# describing the corresponding output constant.  See load_fields().
327#
328def parse_field(call):
329        # Replace newlines with spaces.
330        for ii in range(0, len(call)):
331                if (call[ii] == '\n'):
332                        call[ii] == ' ';
333
334        idx = call.find('(');
335        kind = call[0:idx];
336        rest = call[idx + 1: len(call) - 1];
337        args = re.split('\s*,\s*', rest);
338
339        consts = [];
340
341        if (kind == 'ACCESSORS' or kind == 'ACCESSORS_GCSAFE'):
342                klass = args[0];
343                field = args[1];
344                dtype = args[2];
345                offset = args[3];
346
347                return ({
348                    'name': 'class_%s__%s__%s' % (klass, field, dtype),
349                    'value': '%s::%s' % (klass, offset)
350                });
351
352        assert(kind == 'SMI_ACCESSORS');
353        klass = args[0];
354        field = args[1];
355        offset = args[2];
356
357        return ({
358            'name': 'class_%s__%s__%s' % (klass, field, 'SMI'),
359            'value': '%s::%s' % (klass, offset)
360        });
361
362#
363# Load field offset information from objects-inl.h.
364#
365def load_fields():
366        inlfilename = sys.argv[3];
367        inlfile = open(inlfilename, 'r');
368
369        #
370        # Each class's fields and the corresponding offsets are described in the
371        # source by calls to macros like "ACCESSORS" (and friends).  All we do
372        # here is extract these macro invocations, taking into account that they
373        # may span multiple lines and may contain nested parentheses.  We also
374        # call parse_field() to pick apart the invocation.
375        #
376        prefixes = [ 'ACCESSORS', 'ACCESSORS_GCSAFE', 'SMI_ACCESSORS' ];
377        current = '';
378        opens = 0;
379
380        for line in inlfile:
381                if (opens > 0):
382                        # Continuation line
383                        for ii in range(0, len(line)):
384                                if (line[ii] == '('):
385                                        opens += 1;
386                                elif (line[ii] == ')'):
387                                        opens -= 1;
388
389                                if (opens == 0):
390                                        break;
391
392                        current += line[0:ii + 1];
393                        continue;
394
395                for prefix in prefixes:
396                        if (not line.startswith(prefix + '(')):
397                                continue;
398
399                        if (len(current) > 0):
400                                fields.append(parse_field(current));
401                                current = '';
402
403                        for ii in range(len(prefix), len(line)):
404                                if (line[ii] == '('):
405                                        opens += 1;
406                                elif (line[ii] == ')'):
407                                        opens -= 1;
408
409                                if (opens == 0):
410                                        break;
411
412                        current += line[0:ii + 1];
413
414        if (len(current) > 0):
415                fields.append(parse_field(current));
416                current = '';
417
418        for body in extras_accessors:
419                fields.append(parse_field('ACCESSORS(%s)' % body));
420
421#
422# Emit a block of constants.
423#
424def emit_set(out, consts):
425        for ii in range(0, len(consts)):
426                out.write('int v8dbg_%s = %s;\n' %
427                    (consts[ii]['name'], consts[ii]['value']));
428        out.write('\n');
429
430#
431# Emit the whole output file.
432#
433def emit_config():
434        out = file(sys.argv[1], 'w');
435
436        out.write(header);
437
438        out.write('/* miscellaneous constants */\n');
439        emit_set(out, consts_misc);
440
441        out.write('/* class type information */\n');
442        consts = [];
443        keys = typeclasses.keys();
444        keys.sort();
445        for typename in keys:
446                klass = typeclasses[typename];
447                consts.append({
448                    'name': 'type_%s__%s' % (klass, typename),
449                    'value': typename
450                });
451
452        emit_set(out, consts);
453
454        out.write('/* class hierarchy information */\n');
455        consts = [];
456        keys = klasses.keys();
457        keys.sort();
458        for klassname in keys:
459                pklass = klasses[klassname]['parent'];
460                if (pklass == None):
461                        continue;
462
463                consts.append({
464                    'name': 'parent_%s__%s' % (klassname, pklass),
465                    'value': 0
466                });
467
468        emit_set(out, consts);
469
470        out.write('/* field information */\n');
471        emit_set(out, fields);
472
473        out.write(footer);
474
475if (len(sys.argv) < 4):
476        print('usage: %s output.cc objects.h objects-inl.h' % sys.argv[0]);
477        sys.exit(2);
478
479load_objects();
480load_fields();
481emit_config();
482