1#!/usr/bin/env python
2#
3# Copyright 2012 the V8 project authors. All rights reserved.
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met:
7#
8#     * Redistributions of source code must retain the above copyright
9#       notice, this list of conditions and the following disclaimer.
10#     * Redistributions in binary form must reproduce the above
11#       copyright notice, this list of conditions and the following
12#       disclaimer in the documentation and/or other materials provided
13#       with the distribution.
14#     * Neither the name of Google Inc. nor the names of its
15#       contributors may be used to endorse or promote products derived
16#       from this software without specific prior written permission.
17#
18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30# This is a utility for converting JavaScript source code into C-style
31# char arrays. It is used for embedded JavaScript code in the V8
32# library.
33
34import os, re, sys, string
35import optparse
36import jsmin
37import bz2
38import textwrap
39
40
41class Error(Exception):
42  def __init__(self, msg):
43    Exception.__init__(self, msg)
44
45
46def ToCArray(byte_sequence):
47  result = []
48  for chr in byte_sequence:
49    result.append(str(ord(chr)))
50  joined = ", ".join(result)
51  return textwrap.fill(joined, 80)
52
53
54def RemoveCommentsAndTrailingWhitespace(lines):
55  lines = re.sub(r'//.*\n', '\n', lines) # end-of-line comments
56  lines = re.sub(re.compile(r'/\*.*?\*/', re.DOTALL), '', lines) # comments.
57  lines = re.sub(r'\s+\n+', '\n', lines) # trailing whitespace
58  return lines
59
60
61def ReadFile(filename):
62  file = open(filename, "rt")
63  try:
64    lines = file.read()
65  finally:
66    file.close()
67  return lines
68
69
70EVAL_PATTERN = re.compile(r'\beval\s*\(')
71WITH_PATTERN = re.compile(r'\bwith\s*\(')
72
73def Validate(lines):
74  # Because of simplified context setup, eval and with is not
75  # allowed in the natives files.
76  if EVAL_PATTERN.search(lines):
77    raise Error("Eval disallowed in natives.")
78  if WITH_PATTERN.search(lines):
79    raise Error("With statements disallowed in natives.")
80
81  # Pass lines through unchanged.
82  return lines
83
84
85def ExpandConstants(lines, constants):
86  for key, value in constants:
87    lines = key.sub(str(value), lines)
88  return lines
89
90
91def ExpandMacroDefinition(lines, pos, name_pattern, macro, expander):
92  pattern_match = name_pattern.search(lines, pos)
93  while pattern_match is not None:
94    # Scan over the arguments
95    height = 1
96    start = pattern_match.start()
97    end = pattern_match.end()
98    assert lines[end - 1] == '('
99    last_match = end
100    arg_index = [0]  # Wrap state into array, to work around Python "scoping"
101    mapping = { }
102    def add_arg(str):
103      # Remember to expand recursively in the arguments
104      replacement = expander(str.strip())
105      mapping[macro.args[arg_index[0]]] = replacement
106      arg_index[0] += 1
107    while end < len(lines) and height > 0:
108      # We don't count commas at higher nesting levels.
109      if lines[end] == ',' and height == 1:
110        add_arg(lines[last_match:end])
111        last_match = end + 1
112      elif lines[end] in ['(', '{', '[']:
113        height = height + 1
114      elif lines[end] in [')', '}', ']']:
115        height = height - 1
116      end = end + 1
117    # Remember to add the last match.
118    add_arg(lines[last_match:end-1])
119    result = macro.expand(mapping)
120    # Replace the occurrence of the macro with the expansion
121    lines = lines[:start] + result + lines[end:]
122    pattern_match = name_pattern.search(lines, start + len(result))
123  return lines
124
125def ExpandMacros(lines, macros):
126  # We allow macros to depend on the previously declared macros, but
127  # we don't allow self-dependecies or recursion.
128  for name_pattern, macro in reversed(macros):
129    def expander(s):
130      return ExpandMacros(s, macros)
131    lines = ExpandMacroDefinition(lines, 0, name_pattern, macro, expander)
132  return lines
133
134class TextMacro:
135  def __init__(self, args, body):
136    self.args = args
137    self.body = body
138  def expand(self, mapping):
139    result = self.body
140    for key, value in mapping.items():
141        result = result.replace(key, value)
142    return result
143
144class PythonMacro:
145  def __init__(self, args, fun):
146    self.args = args
147    self.fun = fun
148  def expand(self, mapping):
149    args = []
150    for arg in self.args:
151      args.append(mapping[arg])
152    return str(self.fun(*args))
153
154CONST_PATTERN = re.compile(r'^const\s+([a-zA-Z0-9_]+)\s*=\s*([^;]*);$')
155MACRO_PATTERN = re.compile(r'^macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*=\s*([^;]*);$')
156PYTHON_MACRO_PATTERN = re.compile(r'^python\s+macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*=\s*([^;]*);$')
157
158
159def ReadMacros(lines):
160  constants = []
161  macros = []
162  for line in lines.split('\n'):
163    hash = line.find('#')
164    if hash != -1: line = line[:hash]
165    line = line.strip()
166    if len(line) is 0: continue
167    const_match = CONST_PATTERN.match(line)
168    if const_match:
169      name = const_match.group(1)
170      value = const_match.group(2).strip()
171      constants.append((re.compile("\\b%s\\b" % name), value))
172    else:
173      macro_match = MACRO_PATTERN.match(line)
174      if macro_match:
175        name = macro_match.group(1)
176        args = [match.strip() for match in macro_match.group(2).split(',')]
177        body = macro_match.group(3).strip()
178        macros.append((re.compile("\\b%s\\(" % name), TextMacro(args, body)))
179      else:
180        python_match = PYTHON_MACRO_PATTERN.match(line)
181        if python_match:
182          name = python_match.group(1)
183          args = [match.strip() for match in python_match.group(2).split(',')]
184          body = python_match.group(3).strip()
185          fun = eval("lambda " + ",".join(args) + ': ' + body)
186          macros.append((re.compile("\\b%s\\(" % name), PythonMacro(args, fun)))
187        else:
188          raise Error("Illegal line: " + line)
189  return (constants, macros)
190
191INLINE_MACRO_PATTERN = re.compile(r'macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*\n')
192INLINE_MACRO_END_PATTERN = re.compile(r'endmacro\s*\n')
193
194def ExpandInlineMacros(lines):
195  pos = 0
196  while True:
197    macro_match = INLINE_MACRO_PATTERN.search(lines, pos)
198    if macro_match is None:
199      # no more macros
200      return lines
201    name = macro_match.group(1)
202    args = [match.strip() for match in macro_match.group(2).split(',')]
203    end_macro_match = INLINE_MACRO_END_PATTERN.search(lines, macro_match.end());
204    if end_macro_match is None:
205      raise Error("Macro %s unclosed" % name)
206    body = lines[macro_match.end():end_macro_match.start()]
207
208    # remove macro definition
209    lines = lines[:macro_match.start()] + lines[end_macro_match.end():]
210    name_pattern = re.compile("\\b%s\\(" % name)
211    macro = TextMacro(args, body)
212
213    # advance position to where the macro defintion was
214    pos = macro_match.start()
215
216    def non_expander(s):
217      return s
218    lines = ExpandMacroDefinition(lines, pos, name_pattern, macro, non_expander)
219
220
221INLINE_CONSTANT_PATTERN = re.compile(r'const\s+([a-zA-Z0-9_]+)\s*=\s*([^;\n]+)[;\n]')
222
223def ExpandInlineConstants(lines):
224  pos = 0
225  while True:
226    const_match = INLINE_CONSTANT_PATTERN.search(lines, pos)
227    if const_match is None:
228      # no more constants
229      return lines
230    name = const_match.group(1)
231    replacement = const_match.group(2)
232    name_pattern = re.compile("\\b%s\\b" % name)
233
234    # remove constant definition and replace
235    lines = (lines[:const_match.start()] +
236             re.sub(name_pattern, replacement, lines[const_match.end():]))
237
238    # advance position to where the constant defintion was
239    pos = const_match.start()
240
241
242HEADER_TEMPLATE = """\
243// Copyright 2011 Google Inc. All Rights Reserved.
244
245// This file was generated from .js source files by GYP.  If you
246// want to make changes to this file you should either change the
247// javascript source files or the GYP script.
248
249#include "src/v8.h"
250#include "src/natives.h"
251#include "src/utils.h"
252
253namespace v8 {
254namespace internal {
255
256%(sources_declaration)s\
257
258%(raw_sources_declaration)s\
259
260  template <>
261  int NativesCollection<%(type)s>::GetBuiltinsCount() {
262    return %(builtin_count)i;
263  }
264
265  template <>
266  int NativesCollection<%(type)s>::GetDebuggerCount() {
267    return %(debugger_count)i;
268  }
269
270  template <>
271  int NativesCollection<%(type)s>::GetIndex(const char* name) {
272%(get_index_cases)s\
273    return -1;
274  }
275
276  template <>
277  int NativesCollection<%(type)s>::GetRawScriptsSize() {
278    return %(raw_total_length)i;
279  }
280
281  template <>
282  Vector<const char> NativesCollection<%(type)s>::GetRawScriptSource(int index) {
283%(get_raw_script_source_cases)s\
284    return Vector<const char>("", 0);
285  }
286
287  template <>
288  Vector<const char> NativesCollection<%(type)s>::GetScriptName(int index) {
289%(get_script_name_cases)s\
290    return Vector<const char>("", 0);
291  }
292
293  template <>
294  Vector<const byte> NativesCollection<%(type)s>::GetScriptsSource() {
295    return Vector<const byte>(sources, %(total_length)i);
296  }
297
298  template <>
299  void NativesCollection<%(type)s>::SetRawScriptsSource(Vector<const char> raw_source) {
300    DCHECK(%(raw_total_length)i == raw_source.length());
301    raw_sources = raw_source.start();
302  }
303
304}  // internal
305}  // v8
306"""
307
308SOURCES_DECLARATION = """\
309  static const byte sources[] = { %s };
310"""
311
312
313RAW_SOURCES_COMPRESSION_DECLARATION = """\
314  static const char* raw_sources = NULL;
315"""
316
317
318RAW_SOURCES_DECLARATION = """\
319  static const char* raw_sources = reinterpret_cast<const char*>(sources);
320"""
321
322
323GET_INDEX_CASE = """\
324    if (strcmp(name, "%(id)s") == 0) return %(i)i;
325"""
326
327
328GET_RAW_SCRIPT_SOURCE_CASE = """\
329    if (index == %(i)i) return Vector<const char>(raw_sources + %(offset)i, %(raw_length)i);
330"""
331
332
333GET_SCRIPT_NAME_CASE = """\
334    if (index == %(i)i) return Vector<const char>("%(name)s", %(length)i);
335"""
336
337
338def BuildFilterChain(macro_filename):
339  """Build the chain of filter functions to be applied to the sources.
340
341  Args:
342    macro_filename: Name of the macro file, if any.
343
344  Returns:
345    A function (string -> string) that reads a source file and processes it.
346  """
347  filter_chain = [ReadFile]
348
349  if macro_filename:
350    (consts, macros) = ReadMacros(ReadFile(macro_filename))
351    filter_chain.append(lambda l: ExpandConstants(l, consts))
352    filter_chain.append(lambda l: ExpandMacros(l, macros))
353
354  filter_chain.extend([
355    RemoveCommentsAndTrailingWhitespace,
356    ExpandInlineMacros,
357    ExpandInlineConstants,
358    Validate,
359    jsmin.JavaScriptMinifier().JSMinify
360  ])
361
362  def chain(f1, f2):
363    return lambda x: f2(f1(x))
364
365  return reduce(chain, filter_chain)
366
367
368class Sources:
369  def __init__(self):
370    self.names = []
371    self.modules = []
372    self.is_debugger_id = []
373
374
375def IsDebuggerFile(filename):
376  return filename.endswith("-debugger.js")
377
378def IsMacroFile(filename):
379  return filename.endswith("macros.py")
380
381
382def PrepareSources(source_files):
383  """Read, prepare and assemble the list of source files.
384
385  Args:
386    sources: List of Javascript-ish source files. A file named macros.py
387        will be treated as a list of macros.
388
389  Returns:
390    An instance of Sources.
391  """
392  macro_file = None
393  macro_files = filter(IsMacroFile, source_files)
394  assert len(macro_files) in [0, 1]
395  if macro_files:
396    source_files.remove(macro_files[0])
397    macro_file = macro_files[0]
398
399  filters = BuildFilterChain(macro_file)
400
401  # Sort 'debugger' sources first.
402  source_files = sorted(source_files,
403                        lambda l,r: IsDebuggerFile(r) - IsDebuggerFile(l))
404
405  result = Sources()
406  for source in source_files:
407    try:
408      lines = filters(source)
409    except Error as e:
410      raise Error("In file %s:\n%s" % (source, str(e)))
411
412    result.modules.append(lines);
413
414    is_debugger = IsDebuggerFile(source)
415    result.is_debugger_id.append(is_debugger);
416
417    name = os.path.basename(source)[:-3]
418    result.names.append(name if not is_debugger else name[:-9]);
419  return result
420
421
422def BuildMetadata(sources, source_bytes, native_type):
423  """Build the meta data required to generate a libaries file.
424
425  Args:
426    sources: A Sources instance with the prepared sources.
427    source_bytes: A list of source bytes.
428        (The concatenation of all sources; might be compressed.)
429    native_type: The parameter for the NativesCollection template.
430
431  Returns:
432    A dictionary for use with HEADER_TEMPLATE.
433  """
434  total_length = len(source_bytes)
435  raw_sources = "".join(sources.modules)
436
437  # The sources are expected to be ASCII-only.
438  assert not filter(lambda value: ord(value) >= 128, raw_sources)
439
440  # Loop over modules and build up indices into the source blob:
441  get_index_cases = []
442  get_script_name_cases = []
443  get_raw_script_source_cases = []
444  offset = 0
445  for i in xrange(len(sources.modules)):
446    native_name = "native %s.js" % sources.names[i]
447    d = {
448        "i": i,
449        "id": sources.names[i],
450        "name": native_name,
451        "length": len(native_name),
452        "offset": offset,
453        "raw_length": len(sources.modules[i]),
454    }
455    get_index_cases.append(GET_INDEX_CASE % d)
456    get_script_name_cases.append(GET_SCRIPT_NAME_CASE % d)
457    get_raw_script_source_cases.append(GET_RAW_SCRIPT_SOURCE_CASE % d)
458    offset += len(sources.modules[i])
459  assert offset == len(raw_sources)
460
461  # If we have the raw sources we can declare them accordingly.
462  have_raw_sources = source_bytes == raw_sources
463  raw_sources_declaration = (RAW_SOURCES_DECLARATION
464      if have_raw_sources else RAW_SOURCES_COMPRESSION_DECLARATION)
465
466  metadata = {
467    "builtin_count": len(sources.modules),
468    "debugger_count": sum(sources.is_debugger_id),
469    "sources_declaration": SOURCES_DECLARATION % ToCArray(source_bytes),
470    "raw_sources_declaration": raw_sources_declaration,
471    "raw_total_length": sum(map(len, sources.modules)),
472    "total_length": total_length,
473    "get_index_cases": "".join(get_index_cases),
474    "get_raw_script_source_cases": "".join(get_raw_script_source_cases),
475    "get_script_name_cases": "".join(get_script_name_cases),
476    "type": native_type,
477  }
478  return metadata
479
480
481def CompressMaybe(sources, compression_type):
482  """Take the prepared sources and generate a sequence of bytes.
483
484  Args:
485    sources: A Sources instance with the prepared sourced.
486    compression_type: string, describing the desired compression.
487
488  Returns:
489    A sequence of bytes.
490  """
491  sources_bytes = "".join(sources.modules)
492  if compression_type == "off":
493    return sources_bytes
494  elif compression_type == "bz2":
495    return bz2.compress(sources_bytes)
496  else:
497    raise Error("Unknown compression type %s." % compression_type)
498
499
500def PutInt(blob_file, value):
501  assert(value >= 0 and value < (1 << 20))
502  size = 1 if (value < 1 << 6) else (2 if (value < 1 << 14) else 3)
503  value_with_length = (value << 2) | size
504
505  byte_sequence = bytearray()
506  for i in xrange(size):
507    byte_sequence.append(value_with_length & 255)
508    value_with_length >>= 8;
509  blob_file.write(byte_sequence)
510
511
512def PutStr(blob_file, value):
513  PutInt(blob_file, len(value));
514  blob_file.write(value);
515
516
517def WriteStartupBlob(sources, startup_blob):
518  """Write a startup blob, as expected by V8 Initialize ...
519    TODO(vogelheim): Add proper method name.
520
521  Args:
522    sources: A Sources instance with the prepared sources.
523    startup_blob_file: Name of file to write the blob to.
524  """
525  output = open(startup_blob, "wb")
526
527  debug_sources = sum(sources.is_debugger_id);
528  PutInt(output, debug_sources)
529  for i in xrange(debug_sources):
530    PutStr(output, sources.names[i]);
531    PutStr(output, sources.modules[i]);
532
533  PutInt(output, len(sources.names) - debug_sources)
534  for i in xrange(debug_sources, len(sources.names)):
535    PutStr(output, sources.names[i]);
536    PutStr(output, sources.modules[i]);
537
538  output.close()
539
540
541def JS2C(source, target, native_type, compression_type, raw_file, startup_blob):
542  sources = PrepareSources(source)
543  sources_bytes = CompressMaybe(sources, compression_type)
544  metadata = BuildMetadata(sources, sources_bytes, native_type)
545
546  # Optionally emit raw file.
547  if raw_file:
548    output = open(raw_file, "w")
549    output.write(sources_bytes)
550    output.close()
551
552  if startup_blob:
553    WriteStartupBlob(sources, startup_blob);
554
555  # Emit resulting source file.
556  output = open(target, "w")
557  output.write(HEADER_TEMPLATE % metadata)
558  output.close()
559
560
561def main():
562  parser = optparse.OptionParser()
563  parser.add_option("--raw", action="store",
564                    help="file to write the processed sources array to.")
565  parser.add_option("--startup_blob", action="store",
566                    help="file to write the startup blob to.")
567  parser.set_usage("""js2c out.cc type compression sources.js ...
568      out.cc: C code to be generated.
569      type: type parameter for NativesCollection template.
570      compression: type of compression used. [off|bz2]
571      sources.js: JS internal sources or macros.py.""")
572  (options, args) = parser.parse_args()
573
574  JS2C(args[3:], args[0], args[1], args[2], options.raw, options.startup_blob)
575
576
577if __name__ == "__main__":
578  main()
579