1#!/usr/bin/env python
2
3# Copyright (c) 2011 The Chromium Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7# Usage: strip_save_dsym <whatever-arguments-you-would-pass-to-strip>
8#
9# strip_save_dsym is a wrapper around the standard strip utility.  Given an
10# input Mach-O file, strip_save_dsym will save a copy of the file in a "fake"
11# .dSYM bundle for debugging, and then call strip to strip the Mach-O file.
12# Note that the .dSYM file is a "fake" in that it's not a self-contained
13# .dSYM bundle, it just contains a copy of the original (unstripped) Mach-O
14# file, and therefore contains references to object files on the filesystem.
15# The generated .dSYM bundle is therefore unsuitable for debugging in the
16# absence of these .o files.
17#
18# If a .dSYM already exists and has a newer timestamp than the Mach-O file,
19# this utility does nothing.  That allows strip_save_dsym to be run on a file
20# that has already been stripped without trashing the .dSYM.
21#
22# Rationale: the "right" way to generate dSYM bundles, dsymutil, is incredibly
23# slow.  On the other hand, doing a file copy (which is really all that
24# dsymutil does) is comparatively fast.  Since we usually just want to strip
25# a release-mode executable but still be able to debug it, and we don't care
26# so much about generating a hermetic dSYM bundle, we'll prefer the file copy.
27# If a real dSYM is ever needed, it's still possible to create one by running
28# dsymutil and pointing it at the original Mach-O file inside the "fake"
29# bundle, provided that the object files are available.
30
31import errno
32import os
33import re
34import shutil
35import subprocess
36import sys
37import time
38
39# Returns a list of architectures contained in a Mach-O file.  The file can be
40# a universal (fat) file, in which case there will be one list element for
41# each contained architecture, or it can be a thin single-architecture Mach-O
42# file, in which case the list will contain a single element identifying the
43# architecture.  On error, returns an empty list.  Determines the architecture
44# list by calling file.
45def macho_archs(macho):
46  macho_types = ["executable",
47                 "dynamically linked shared library",
48                 "bundle"]
49  macho_types_re = "Mach-O (?:64-bit )?(?:" + "|".join(macho_types) + ")"
50
51  file_cmd = subprocess.Popen(["/usr/bin/file", "-b", "--", macho],
52                              stdout=subprocess.PIPE)
53
54  archs = []
55
56  type_line = file_cmd.stdout.readline()
57  type_match = re.match("^%s (.*)$" % macho_types_re, type_line)
58  if type_match:
59    archs.append(type_match.group(1))
60    return [type_match.group(1)]
61  else:
62    type_match = re.match("^Mach-O universal binary with (.*) architectures$",
63                          type_line)
64    if type_match:
65      for i in range(0, int(type_match.group(1))):
66        arch_line = file_cmd.stdout.readline()
67        arch_match = re.match(
68                     "^.* \(for architecture (.*)\):\t%s .*$" % macho_types_re,
69                     arch_line)
70        if arch_match:
71          archs.append(arch_match.group(1))
72
73  if file_cmd.wait() != 0:
74    archs = []
75
76  if len(archs) == 0:
77    print >> sys.stderr, "No architectures in %s" % macho
78
79  return archs
80
81# Returns a dictionary mapping architectures contained in the file as returned
82# by macho_archs to the LC_UUID load command for that architecture.
83# Architectures with no LC_UUID load command are omitted from the dictionary.
84# Determines the UUID value by calling otool.
85def macho_uuids(macho):
86  uuids = {}
87
88  archs = macho_archs(macho)
89  if len(archs) == 0:
90    return uuids
91
92  for arch in archs:
93    if arch == "":
94      continue
95
96    otool_cmd = subprocess.Popen(["/usr/bin/otool", "-arch", arch, "-l", "-",
97                                  macho],
98                                 stdout=subprocess.PIPE)
99    # state 0 is when nothing UUID-related has been seen yet.  State 1 is
100    # entered after a load command begins, but it may not be an LC_UUID load
101    # command.  States 2, 3, and 4 are intermediate states while reading an
102    # LC_UUID command.  State 5 is the terminal state for a successful LC_UUID
103    # read.  State 6 is the error state.
104    state = 0
105    uuid = ""
106    for otool_line in otool_cmd.stdout:
107      if state == 0:
108        if re.match("^Load command .*$", otool_line):
109          state = 1
110      elif state == 1:
111        if re.match("^     cmd LC_UUID$", otool_line):
112          state = 2
113        else:
114          state = 0
115      elif state == 2:
116        if re.match("^ cmdsize 24$", otool_line):
117          state = 3
118        else:
119          state = 6
120      elif state == 3:
121        # The UUID display format changed in the version of otool shipping
122        # with the Xcode 3.2.2 prerelease.  The new format is traditional:
123        #    uuid 4D7135B2-9C56-C5F5-5F49-A994258E0955
124        # and with Xcode 3.2.6, then line is indented one more space:
125        #     uuid 4D7135B2-9C56-C5F5-5F49-A994258E0955
126        # The old format, from cctools-750 and older's otool, breaks the UUID
127        # up into a sequence of bytes:
128        #    uuid 0x4d 0x71 0x35 0xb2 0x9c 0x56 0xc5 0xf5
129        #         0x5f 0x49 0xa9 0x94 0x25 0x8e 0x09 0x55
130        new_uuid_match = re.match("^ {3,4}uuid (.{8}-.{4}-.{4}-.{4}-.{12})$",
131                                  otool_line)
132        if new_uuid_match:
133          uuid = new_uuid_match.group(1)
134
135          # Skip state 4, there is no second line to read.
136          state = 5
137        else:
138          old_uuid_match = re.match("^   uuid 0x(..) 0x(..) 0x(..) 0x(..) "
139                                    "0x(..) 0x(..) 0x(..) 0x(..)$",
140                                    otool_line)
141          if old_uuid_match:
142            state = 4
143            uuid = old_uuid_match.group(1) + old_uuid_match.group(2) + \
144                   old_uuid_match.group(3) + old_uuid_match.group(4) + "-" + \
145                   old_uuid_match.group(5) + old_uuid_match.group(6) + "-" + \
146                   old_uuid_match.group(7) + old_uuid_match.group(8) + "-"
147          else:
148            state = 6
149      elif state == 4:
150        old_uuid_match = re.match("^        0x(..) 0x(..) 0x(..) 0x(..) "
151                                  "0x(..) 0x(..) 0x(..) 0x(..)$",
152                                  otool_line)
153        if old_uuid_match:
154          state = 5
155          uuid += old_uuid_match.group(1) + old_uuid_match.group(2) + "-" + \
156                  old_uuid_match.group(3) + old_uuid_match.group(4) + \
157                  old_uuid_match.group(5) + old_uuid_match.group(6) + \
158                  old_uuid_match.group(7) + old_uuid_match.group(8)
159        else:
160          state = 6
161
162    if otool_cmd.wait() != 0:
163      state = 6
164
165    if state == 5:
166      uuids[arch] = uuid.upper()
167
168  if len(uuids) == 0:
169    print >> sys.stderr, "No UUIDs in %s" % macho
170
171  return uuids
172
173# Given a path to a Mach-O file and possible information from the environment,
174# determines the desired path to the .dSYM.
175def dsym_path(macho):
176  # If building a bundle, the .dSYM should be placed next to the bundle.  Use
177  # WRAPPER_NAME to make this determination.  If called from xcodebuild,
178  # WRAPPER_NAME will be set to the name of the bundle.
179  dsym = ""
180  if "WRAPPER_NAME" in os.environ:
181    if "BUILT_PRODUCTS_DIR" in os.environ:
182      dsym = os.path.join(os.environ["BUILT_PRODUCTS_DIR"],
183                          os.environ["WRAPPER_NAME"])
184    else:
185      dsym = os.environ["WRAPPER_NAME"]
186  else:
187    dsym = macho
188
189  dsym += ".dSYM"
190
191  return dsym
192
193# Creates a fake .dSYM bundle at dsym for macho, a Mach-O image with the
194# architectures and UUIDs specified by the uuids map.
195def make_fake_dsym(macho, dsym):
196  uuids = macho_uuids(macho)
197  if len(uuids) == 0:
198    return False
199
200  dwarf_dir = os.path.join(dsym, "Contents", "Resources", "DWARF")
201  dwarf_file = os.path.join(dwarf_dir, os.path.basename(macho))
202  try:
203    os.makedirs(dwarf_dir)
204  except OSError, (err, error_string):
205    if err != errno.EEXIST:
206      raise
207  shutil.copyfile(macho, dwarf_file)
208
209  # info_template is the same as what dsymutil would have written, with the
210  # addition of the fake_dsym key.
211  info_template = \
212'''<?xml version="1.0" encoding="UTF-8"?>
213<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
214<plist version="1.0">
215	<dict>
216		<key>CFBundleDevelopmentRegion</key>
217		<string>English</string>
218		<key>CFBundleIdentifier</key>
219		<string>com.apple.xcode.dsym.%(root_name)s</string>
220		<key>CFBundleInfoDictionaryVersion</key>
221		<string>6.0</string>
222		<key>CFBundlePackageType</key>
223		<string>dSYM</string>
224		<key>CFBundleSignature</key>
225		<string>????</string>
226		<key>CFBundleShortVersionString</key>
227		<string>1.0</string>
228		<key>CFBundleVersion</key>
229		<string>1</string>
230		<key>dSYM_UUID</key>
231		<dict>
232%(uuid_dict)s		</dict>
233		<key>fake_dsym</key>
234		<true/>
235	</dict>
236</plist>
237'''
238
239  root_name = os.path.basename(dsym)[:-5]  # whatever.dSYM without .dSYM
240  uuid_dict = ""
241  for arch in sorted(uuids):
242    uuid_dict += "\t\t\t<key>" + arch + "</key>\n"\
243                 "\t\t\t<string>" + uuids[arch] + "</string>\n"
244  info_dict = {
245    "root_name": root_name,
246    "uuid_dict": uuid_dict,
247  }
248  info_contents = info_template % info_dict
249  info_file = os.path.join(dsym, "Contents", "Info.plist")
250  info_fd = open(info_file, "w")
251  info_fd.write(info_contents)
252  info_fd.close()
253
254  return True
255
256# For a Mach-O file, determines where the .dSYM bundle should be located.  If
257# the bundle does not exist or has a modification time older than the Mach-O
258# file, calls make_fake_dsym to create a fake .dSYM bundle there, then strips
259# the Mach-O file and sets the modification time on the .dSYM bundle and Mach-O
260# file to be identical.
261def strip_and_make_fake_dsym(macho):
262  dsym = dsym_path(macho)
263  macho_stat = os.stat(macho)
264  dsym_stat = None
265  try:
266    dsym_stat = os.stat(dsym)
267  except OSError, (err, error_string):
268    if err != errno.ENOENT:
269      raise
270
271  if dsym_stat is None or dsym_stat.st_mtime < macho_stat.st_mtime:
272    # Make a .dSYM bundle
273    if not make_fake_dsym(macho, dsym):
274      return False
275
276    # Strip the Mach-O file
277    remove_dsym = True
278    try:
279      strip_cmdline = ['xcrun', 'strip'] + sys.argv[1:]
280      strip_cmd = subprocess.Popen(strip_cmdline)
281      if strip_cmd.wait() == 0:
282        remove_dsym = False
283    finally:
284      if remove_dsym:
285        shutil.rmtree(dsym)
286
287    # Update modification time on the Mach-O file and .dSYM bundle
288    now = time.time()
289    os.utime(macho, (now, now))
290    os.utime(dsym, (now, now))
291
292  return True
293
294def main(argv=None):
295  if argv is None:
296    argv = sys.argv
297
298  # This only supports operating on one file at a time.  Look at the arguments
299  # to strip to figure out what the source to be stripped is.  Arguments are
300  # processed in the same way that strip does, although to reduce complexity,
301  # this doesn't do all of the same checking as strip.  For example, strip
302  # has no -Z switch and would treat -Z on the command line as an error.  For
303  # the purposes this is needed for, that's fine.
304  macho = None
305  process_switches = True
306  ignore_argument = False
307  for arg in argv[1:]:
308    if ignore_argument:
309      ignore_argument = False
310      continue
311    if process_switches:
312      if arg == "-":
313        process_switches = False
314      # strip has these switches accept an argument:
315      if arg in ["-s", "-R", "-d", "-o", "-arch"]:
316        ignore_argument = True
317      if arg[0] == "-":
318        continue
319    if macho is None:
320      macho = arg
321    else:
322      print >> sys.stderr, "Too many things to strip"
323      return 1
324
325  if macho is None:
326    print >> sys.stderr, "Nothing to strip"
327    return 1
328
329  if not strip_and_make_fake_dsym(macho):
330    return 1
331
332  return 0
333
334if __name__ == "__main__":
335  sys.exit(main(sys.argv))
336