1# Copyright 2013 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Takes the same arguments as Windows link.exe, and a definition of libraries
6to split into subcomponents. Does multiple passes of link.exe invocation to
7determine exports between parts and generates .def and import libraries to
8cause symbols to be available to other parts."""
9
10import _winreg
11import ctypes
12import os
13import re
14import shutil
15import subprocess
16import sys
17import tempfile
18
19
20BASE_DIR = os.path.dirname(os.path.abspath(__file__))
21
22
23# This can be set to ignore data exports. The resulting DLLs will probably not
24# run, but at least they can be generated. The log of data exports will still
25# be output.
26IGNORE_DATA = 0
27
28
29def Log(message):
30  print 'split_link:', message
31
32
33def GetFlagsAndInputs(argv):
34  """Parses the command line intended for link.exe and return the flags and
35  input files."""
36  rsp_expanded = []
37  for arg in argv:
38    if arg[0] == '@':
39      with open(arg[1:]) as rsp:
40        rsp_expanded.extend(rsp.read().splitlines())
41    else:
42      rsp_expanded.append(arg)
43
44  # Use CommandLineToArgvW so we match link.exe parsing.
45  try:
46    size = ctypes.c_int()
47    ptr = ctypes.windll.shell32.CommandLineToArgvW(
48        ctypes.create_unicode_buffer(' '.join(rsp_expanded)),
49        ctypes.byref(size))
50    ref = ctypes.c_wchar_p * size.value
51    raw = ref.from_address(ptr)
52    args = [arg for arg in raw]
53  finally:
54    ctypes.windll.kernel32.LocalFree(ptr)
55
56  inputs = []
57  flags = []
58  intermediate_manifest = ''
59  for arg in args:
60    lower_arg = arg.lower()
61    # We'll be replacing these ourselves.
62    if lower_arg.startswith('/out:'):
63      continue
64    if lower_arg.startswith('/manifestfile:'):
65      intermediate_manifest = arg[arg.index(':')+1:]
66      continue
67    if lower_arg.startswith('/pdb:'):
68      continue
69    if (not lower_arg.startswith('/') and
70        lower_arg.endswith(('.obj', '.lib', '.res'))):
71      inputs.append(arg)
72    else:
73      flags.append(arg)
74
75  return flags, inputs, intermediate_manifest
76
77
78def GetRegistryValue(subkey):
79  try:
80    val = _winreg.QueryValue(_winreg.HKEY_CURRENT_USER,
81                             'Software\\Chromium\\' + subkey)
82    if os.path.exists(val):
83      return val
84  except WindowsError:
85    pass
86
87  raise SystemExit("Couldn't read from registry")
88
89
90def GetOriginalLinkerPath():
91  return GetRegistryValue('split_link_installed')
92
93
94def GetMtPath():
95  return GetRegistryValue('split_link_mt_path')
96
97
98def PartFor(input_file, description_parts, description_all):
99  """Determines which part a given link input should be put into (or all)."""
100  # Check if it should go in all parts.
101  input_file = input_file.lower()
102  if any(re.search(spec, input_file) for spec in description_all):
103    return -1
104  # Or pick which particular one it belongs in.
105  for i, spec_list in enumerate(description_parts):
106    if any(re.search(spec, input_file) for spec in spec_list):
107      return i
108  raise ValueError("couldn't find location for %s" % input_file)
109
110
111def ParseOutExternals(output):
112  """Given the stdout of link.exe, parses the error messages to retrieve all
113  symbols that are unresolved."""
114  result = set()
115  # Styles of messages for unresolved externals, and a boolean to indicate
116  # whether the error message emits the symbols with or without a leading
117  # underscore.
118  unresolved_regexes = [
119    (re.compile(r' : error LNK2019: unresolved external symbol ".*" \((.*)\)'
120                r' referenced in function'),
121     False),
122    (re.compile(r' : error LNK2001: unresolved external symbol ".*" \((.*)\)$'),
123     False),
124    (re.compile(r' : error LNK2019: unresolved external symbol (.*)'
125                r' referenced in function '),
126     True),
127    (re.compile(r' : error LNK2001: unresolved external symbol (.*)$'),
128     True),
129  ]
130  for line in output.splitlines():
131    line = line.strip()
132    for regex, strip_leading_underscore in unresolved_regexes:
133      mo = regex.search(line)
134      if mo:
135        if strip_leading_underscore:
136          result.add(mo.group(1)[1:])
137        else:
138          result.add(mo.group(1))
139        break
140
141  mo = re.search(r'fatal error LNK1120: (\d+) unresolved externals', output)
142  # Make sure we have the same number that the linker thinks we have.
143  if mo is None and result:
144    raise SystemExit(output)
145  if len(result) != int(mo.group(1)):
146    print output
147    print 'Expecting %d, got %d' % (int(mo.group(1)), len(result))
148  assert len(result) == int(mo.group(1))
149  return sorted(result)
150
151
152def AsCommandLineArgs(items):
153  """Intended for output to a response file. Quotes all arguments."""
154  return '\n'.join('"' + x + '"' for x in items)
155
156
157def OutputNameForIndex(index):
158  """Gets the final output DLL name, given a zero-based index."""
159  if index == 0:
160    return "chrome.dll"
161  else:
162    return 'chrome%d.dll' % index
163
164
165def ManifestNameForIndex(index):
166  return OutputNameForIndex(index) + '.intermediate.manifest'
167
168
169def PdbNameForIndex(index):
170  return OutputNameForIndex(index) + '.pdb'
171
172
173def RunLinker(flags, index, inputs, phase, intermediate_manifest):
174  """Invokes the linker and returns the stdout, returncode and target name."""
175  rspfile = 'part%d_%s.rsp' % (index, phase)
176  with open(rspfile, 'w') as f:
177    print >> f, AsCommandLineArgs(inputs)
178    print >> f, AsCommandLineArgs(flags)
179    output_name = OutputNameForIndex(index)
180    manifest_name = ManifestNameForIndex(index)
181    print >> f, '/ENTRY:ChromeEmptyEntry@12'
182    print >> f, '/OUT:' + output_name
183    print >> f, '/MANIFESTFILE:' + manifest_name
184    print >> f, '/PDB:' + PdbNameForIndex(index)
185  # Log('[[[\n' + open(rspfile).read() + '\n]]]')
186  link_exe = GetOriginalLinkerPath()
187  popen = subprocess.Popen([link_exe, '@' + rspfile], stdout=subprocess.PIPE)
188  stdout, _ = popen.communicate()
189  if index == 0 and popen.returncode == 0 and intermediate_manifest:
190    # Hack for ninja build. After the linker runs, it does some manifest
191    # things and expects there to be a file in this location. We just put it
192    # there so it's happy. This is a no-op.
193    if os.path.isdir(os.path.dirname(intermediate_manifest)):
194      shutil.copyfile(manifest_name, intermediate_manifest)
195  return stdout, popen.returncode, output_name
196
197
198def GetLibObjList(lib):
199  """Gets the list of object files contained in a .lib."""
200  link_exe = GetOriginalLinkerPath()
201  popen = subprocess.Popen(
202      [link_exe, '/lib', '/nologo', '/list', lib], stdout=subprocess.PIPE)
203  stdout, _ = popen.communicate()
204  return stdout.splitlines()
205
206
207def ExtractObjFromLib(lib, obj):
208  """Extracts a .obj file contained in a .lib file. Returns the absolute path
209  a temp file."""
210  link_exe = GetOriginalLinkerPath()
211  temp = tempfile.NamedTemporaryFile(
212      prefix='split_link_', suffix='.obj', delete=False)
213  temp.close()
214  subprocess.check_call([
215    link_exe, '/lib', '/nologo', '/extract:' + obj, lib, '/out:' + temp.name])
216  return temp.name
217
218
219def Unmangle(export):
220  "Returns the human-presentable name of a mangled symbol."""
221  # Use dbghelp.dll to demangle the name.
222  # TODO(scottmg): Perhaps a simple cache? Seems pretty fast though.
223  UnDecorateSymbolName = ctypes.windll.dbghelp.UnDecorateSymbolName
224  buffer_size = 2048
225  output_string = ctypes.create_string_buffer(buffer_size)
226  if not UnDecorateSymbolName(
227      export, ctypes.byref(output_string), buffer_size, 0):
228    raise ctypes.WinError()
229  return output_string.value
230
231
232def IsDataDefinition(export):
233  """Determines if a given name is data rather than a function. Always returns
234  False for C-style (as opposed to C++-style names)."""
235  if export[0] != '?':
236    return False
237
238  # If it contains a '(' we assume it's a function.
239  return '(' not in Unmangle(export)
240
241
242def GenerateDefFiles(unresolved_by_part):
243  """Given a list of unresolved externals, generates a .def file that will
244  cause all those symbols to be exported."""
245  deffiles = []
246  Log('generating .def files')
247  for i, part in enumerate(unresolved_by_part):
248    deffile = 'part%d.def' % i
249    with open(deffile, 'w') as f:
250      print >> f, 'LIBRARY %s' % OutputNameForIndex(i)
251      print >> f, 'EXPORTS'
252      for j, part in enumerate(unresolved_by_part):
253        if i == j:
254          continue
255        is_data = \
256            [' DATA' if IsDataDefinition(export) and not IGNORE_DATA else ''
257             for export in part]
258        print >> f, '\n'.join('  ' + export + data
259                              for export, data in zip(part, is_data))
260    deffiles.append(deffile)
261  return deffiles
262
263
264def BuildImportLibs(flags, inputs_by_part, deffiles):
265  """Runs the linker to generate an import library."""
266  import_libs = []
267  Log('building import libs')
268  for i, (inputs, deffile) in enumerate(zip(inputs_by_part, deffiles)):
269    libfile = 'part%d.lib' % i
270    flags_with_implib_and_deffile = flags + ['/IMPLIB:%s' % libfile,
271                                             '/DEF:%s' % deffile]
272    RunLinker(flags_with_implib_and_deffile, i, inputs, 'implib', None)
273    import_libs.append(libfile)
274  return import_libs
275
276
277def AttemptLink(flags, inputs_by_part, unresolved_by_part, deffiles,
278                import_libs, intermediate_manifest):
279  """Tries to run the linker for all parts using the current round of
280  generated import libs and .def files. If the link fails, updates the
281  unresolved externals list per part."""
282  dlls = []
283  all_succeeded = True
284  new_externals = []
285  Log('unresolveds now: %r' % [len(part) for part in unresolved_by_part])
286  for i, (inputs, deffile) in enumerate(zip(inputs_by_part, deffiles)):
287    Log('running link, part %d' % i)
288    others_implibs = import_libs[:]
289    others_implibs.pop(i)
290    inputs_with_implib = inputs + filter(lambda x: x, others_implibs)
291    if deffile:
292      flags = flags + ['/DEF:%s' % deffile, '/LTCG']
293    stdout, rc, output = RunLinker(
294        flags, i, inputs_with_implib, 'final', intermediate_manifest)
295    if rc != 0:
296      all_succeeded = False
297      new_externals.append(ParseOutExternals(stdout))
298    else:
299      new_externals.append([])
300      dlls.append(output)
301  combined_externals = [sorted(set(prev) | set(new))
302                        for prev, new in zip(unresolved_by_part, new_externals)]
303  return all_succeeded, dlls, combined_externals
304
305
306def ExtractSubObjsTargetedAtAll(
307    inputs,
308    num_parts,
309    description_parts,
310    description_all,
311    description_all_from_libs):
312  """For (lib, obj) tuples in the all_from_libs section, extract the obj out of
313  the lib and added it to inputs. Returns a list of lists for which part the
314  extracted obj belongs in (which is whichever the .lib isn't in)."""
315  by_parts = [[] for _ in range(num_parts)]
316  for lib_spec, obj_spec in description_all_from_libs:
317    for input_file in inputs:
318      if re.search(lib_spec, input_file):
319        objs = GetLibObjList(input_file)
320        match_count = 0
321        for obj in objs:
322          if re.search(obj_spec, obj, re.I):
323            extracted_obj = ExtractObjFromLib(input_file, obj)
324            #Log('extracted %s (%s %s)' % (extracted_obj, input_file, obj))
325            i = PartFor(input_file, description_parts, description_all)
326            if i == -1:
327              raise SystemExit(
328                  '%s is already in all parts, but matched '
329                  '%s in all_from_libs' % (input_file, obj))
330            # See note in main().
331            assert num_parts == 2, "Can't handle > 2 dlls currently"
332            by_parts[1 - i].append(obj)
333            match_count += 1
334        if match_count == 0:
335          raise SystemExit(
336              '%s, %s matched a lib, but no objs' % (lib_spec, obj_spec))
337  return by_parts
338
339
340def main():
341  flags, inputs, intermediate_manifest = GetFlagsAndInputs(sys.argv[1:])
342  partition_file = os.path.normpath(
343      os.path.join(BASE_DIR, '../../../build/split_link_partition.py'))
344  with open(partition_file) as partition:
345    description = eval(partition.read())
346  inputs_by_part = []
347  description_parts = description['parts']
348  # We currently assume that if a symbol isn't in dll 0, then it's in dll 1
349  # when generating def files. Otherwise, we'd need to do more complex things
350  # to figure out where each symbol actually is to assign it to the correct
351  # .def file.
352  num_parts = len(description_parts)
353  assert num_parts == 2, "Can't handle > 2 dlls currently"
354  description_parts.reverse()
355  objs_from_libs = ExtractSubObjsTargetedAtAll(
356      inputs,
357      num_parts,
358      description_parts,
359      description['all'],
360      description['all_from_libs'])
361  objs_from_libs.reverse()
362  inputs_by_part = [[] for _ in range(num_parts)]
363  for input_file in inputs:
364    i = PartFor(input_file, description_parts, description['all'])
365    if i == -1:
366      for part in inputs_by_part:
367        part.append(input_file)
368    else:
369      inputs_by_part[i].append(input_file)
370  inputs_by_part.reverse()
371
372  # Put the subobjs on to the main list.
373  for i, part in enumerate(objs_from_libs):
374    Log('%d sub .objs added to part %d' % (len(part), i))
375    inputs_by_part[i].extend(part)
376
377  unresolved_by_part = [[] for _ in range(num_parts)]
378  import_libs = [None] * num_parts
379  deffiles = [None] * num_parts
380
381  data_exports = 0
382  for i in range(5):
383    Log('--- starting pass %d' % i)
384    ok, dlls, unresolved_by_part = AttemptLink(
385        flags, inputs_by_part, unresolved_by_part, deffiles, import_libs,
386        intermediate_manifest)
387    if ok:
388      break
389    data_exports = 0
390    for i, part in enumerate(unresolved_by_part):
391      for export in part:
392        if IsDataDefinition(export):
393          print 'part %d contains data export: %s (aka %s)' % (
394              i, Unmangle(export), export)
395          data_exports += 1
396    deffiles = GenerateDefFiles(unresolved_by_part)
397    import_libs = BuildImportLibs(flags, inputs_by_part, deffiles)
398  else:
399    if data_exports and not IGNORE_DATA:
400      print '%d data exports found, see report above.' % data_exports
401      print('These cannot be exported, and must be either duplicated to the '
402            'target DLL (if constant), or wrapped in a function.')
403    return 1
404
405  mt_exe = GetMtPath()
406  for i, dll in enumerate(dlls):
407    Log('embedding manifest in %s' % dll)
408    args = [mt_exe, '-nologo', '-manifest']
409    args.append(ManifestNameForIndex(i))
410    args.append(description['manifest'])
411    args.append('-outputresource:%s;2' % dll)
412    subprocess.check_call(args)
413
414  Log('built %r' % dlls)
415
416  return 0
417
418
419if __name__ == '__main__':
420  sys.exit(main())
421