1/* Copyright 2014, Google Inc.
2All rights reserved.
3
4Redistribution and use in source and binary forms, with or without
5modification, are permitted provided that the following conditions are
6met:
7
8 * Redistributions of source code must retain the above copyright
9notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above
11copyright notice, this list of conditions and the following disclaimer
12in the documentation and/or other materials provided with the
13distribution.
14 * Neither the name of Google Inc. nor the names of its
15contributors may be used to endorse or promote products derived from
16this software without specific prior written permission.
17
18THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31/*
32Tool upload_system_symbols generates and uploads Breakpad symbol files for OS X system libraries.
33
34This tool shells out to the dump_syms and symupload Breakpad tools. In its default mode, this
35will find all dynamic libraries on the system, run dump_syms to create the Breakpad symbol files,
36and then upload them to Google's crash infrastructure.
37
38The tool can also be used to only dump libraries or upload from a directory. See -help for more
39information.
40
41Both i386 and x86_64 architectures will be dumped and uploaded.
42*/
43package main
44
45import (
46	"debug/macho"
47	"flag"
48	"fmt"
49	"io"
50	"io/ioutil"
51	"log"
52	"os"
53	"os/exec"
54	"path"
55	"regexp"
56	"strings"
57	"sync"
58	"time"
59)
60
61var (
62	breakpadTools    = flag.String("breakpad-tools", "out/Release/", "Path to the Breakpad tools directory, containing dump_syms and symupload.")
63	uploadOnlyPath   = flag.String("upload-from", "", "Upload a directory of symbol files that has been dumped independently.")
64	dumpOnlyPath     = flag.String("dump-to", "", "Dump the symbols to the specified directory, but do not upload them.")
65	systemRoot       = flag.String("system-root", "", "Path to the root of the Mac OS X system whose symbols will be dumped.")
66	dumpArchitecture = flag.String("arch", "", "The CPU architecture for which symbols should be dumped. If not specified, dumps all architectures.")
67)
68
69var (
70	// pathsToScan are the subpaths in the systemRoot that should be scanned for shared libraries.
71	pathsToScan = []string{
72		"/Library/QuickTime",
73		"/System/Library/Components",
74		"/System/Library/Frameworks",
75		"/System/Library/PrivateFrameworks",
76		"/usr/lib",
77	}
78
79	// uploadServers are the list of servers to which symbols should be uploaded.
80	uploadServers = []string{
81		"https://clients2.google.com/cr/symbol",
82		"https://clients2.google.com/cr/staging_symbol",
83	}
84
85	// blacklistRegexps match paths that should be excluded from dumping.
86	blacklistRegexps = []*regexp.Regexp{
87		regexp.MustCompile(`/System/Library/Frameworks/Python\.framework/`),
88		regexp.MustCompile(`/System/Library/Frameworks/Ruby\.framework/`),
89		regexp.MustCompile(`_profile\.dylib$`),
90		regexp.MustCompile(`_debug\.dylib$`),
91		regexp.MustCompile(`\.a$`),
92		regexp.MustCompile(`\.dat$`),
93	}
94)
95
96func main() {
97	flag.Parse()
98	log.SetFlags(0)
99
100	var uq *UploadQueue
101
102	if *uploadOnlyPath != "" {
103		// -upload-from specified, so handle that case early.
104		uq = StartUploadQueue()
105		uploadFromDirectory(*uploadOnlyPath, uq)
106		uq.Wait()
107		return
108	}
109
110	if *systemRoot == "" {
111		log.Fatal("Need a -system-root to dump symbols for")
112	}
113
114	if *dumpOnlyPath != "" {
115		// -dump-to specified, so make sure that the path is a directory.
116		if fi, err := os.Stat(*dumpOnlyPath); err != nil {
117			log.Fatal("-dump-to location: %v", err)
118		} else if !fi.IsDir() {
119			log.Fatal("-dump-to location is not a directory")
120		}
121	}
122
123	dumpPath := *dumpOnlyPath
124	if *dumpOnlyPath == "" {
125		// If -dump-to was not specified, then run the upload pipeline and create
126		// a temporary dump output directory.
127		uq = StartUploadQueue()
128
129		if p, err := ioutil.TempDir("", "upload_system_symbols"); err != nil {
130			log.Fatal("Failed to create temporary directory: %v", err)
131		} else {
132			dumpPath = p
133			defer os.RemoveAll(p)
134		}
135	}
136
137	dq := StartDumpQueue(*systemRoot, dumpPath, uq)
138	dq.Wait()
139	if uq != nil {
140		uq.Wait()
141	}
142}
143
144type WorkerPool struct {
145	wg sync.WaitGroup
146}
147
148// StartWorkerPool will launch numWorkers goroutines all running workerFunc.
149// When workerFunc exits, the goroutine will terminate.
150func StartWorkerPool(numWorkers int, workerFunc func()) *WorkerPool {
151	p := new(WorkerPool)
152	for i := 0; i < numWorkers; i++ {
153		p.wg.Add(1)
154		go func() {
155			workerFunc()
156			p.wg.Done()
157		}()
158	}
159	return p
160}
161
162// Wait for all the workers in the pool to complete the workerFunc.
163func (p *WorkerPool) Wait() {
164	p.wg.Wait()
165}
166
167type UploadQueue struct {
168	*WorkerPool
169	queue chan string
170}
171
172// StartUploadQueue creates a new worker pool and queue, to which paths to
173// Breakpad symbol files may be sent for uploading.
174func StartUploadQueue() *UploadQueue {
175	uq := &UploadQueue{
176		queue: make(chan string, 10),
177	}
178	uq.WorkerPool = StartWorkerPool(5, uq.worker)
179	return uq
180}
181
182// Upload enqueues the contents of filepath to be uploaded.
183func (uq *UploadQueue) Upload(filepath string) {
184	uq.queue <- filepath
185}
186
187// Done tells the queue that no more files need to be uploaded. This must be
188// called before WorkerPool.Wait.
189func (uq *UploadQueue) Done() {
190	close(uq.queue)
191}
192
193func (uq *UploadQueue) worker() {
194	symUpload := path.Join(*breakpadTools, "symupload")
195
196	for symfile := range uq.queue {
197		for _, server := range uploadServers {
198			for i := 0; i < 3; i++ { // Give each upload 3 attempts to succeed.
199				cmd := exec.Command(symUpload, symfile, server)
200				if output, err := cmd.Output(); err == nil {
201					// Success. No retry needed.
202					fmt.Printf("Uploaded %s to %s\n", symfile, server)
203					break
204				} else {
205					log.Printf("Error running symupload(%s, %s), attempt %d: %v: %s\n", symfile, server, i, err, output)
206					time.Sleep(1 * time.Second)
207				}
208			}
209		}
210	}
211}
212
213type DumpQueue struct {
214	*WorkerPool
215	dumpPath string
216	queue    chan dumpRequest
217	uq       *UploadQueue
218}
219
220type dumpRequest struct {
221	path string
222	arch string
223}
224
225// StartDumpQueue creates a new worker pool to find all the Mach-O libraries in
226// root and dump their symbols to dumpPath. If an UploadQueue is passed, the
227// path to the symbol file will be enqueued there, too.
228func StartDumpQueue(root, dumpPath string, uq *UploadQueue) *DumpQueue {
229	dq := &DumpQueue{
230		dumpPath: dumpPath,
231		queue:    make(chan dumpRequest),
232		uq:       uq,
233	}
234	dq.WorkerPool = StartWorkerPool(12, dq.worker)
235
236	findLibsInRoot(root, dq)
237
238	return dq
239}
240
241// DumpSymbols enqueues the filepath to have its symbols dumped in the specified
242// architecture.
243func (dq *DumpQueue) DumpSymbols(filepath string, arch string) {
244	dq.queue <- dumpRequest{
245		path: filepath,
246		arch: arch,
247	}
248}
249
250func (dq *DumpQueue) Wait() {
251	dq.WorkerPool.Wait()
252	if dq.uq != nil {
253		dq.uq.Done()
254	}
255}
256
257func (dq *DumpQueue) done() {
258	close(dq.queue)
259}
260
261func (dq *DumpQueue) worker() {
262	dumpSyms := path.Join(*breakpadTools, "dump_syms")
263
264	for req := range dq.queue {
265		filebase := path.Join(dq.dumpPath, strings.Replace(req.path, "/", "_", -1))
266		symfile := fmt.Sprintf("%s_%s.sym", filebase, req.arch)
267		f, err := os.Create(symfile)
268		if err != nil {
269			log.Fatal("Error creating symbol file:", err)
270		}
271
272		cmd := exec.Command(dumpSyms, "-a", req.arch, req.path)
273		cmd.Stdout = f
274		err = cmd.Run()
275		f.Close()
276
277		if err != nil {
278			os.Remove(symfile)
279			log.Printf("Error running dump_syms(%s, %s): %v\n", req.arch, req.path, err)
280		} else if dq.uq != nil {
281			dq.uq.Upload(symfile)
282		}
283	}
284}
285
286// uploadFromDirectory handles the upload-only case and merely uploads all files in
287// a directory.
288func uploadFromDirectory(directory string, uq *UploadQueue) {
289	d, err := os.Open(directory)
290	if err != nil {
291		log.Fatal("Could not open directory to upload: %v", err)
292	}
293	defer d.Close()
294
295	entries, err := d.Readdirnames(0)
296	if err != nil {
297		log.Fatal("Could not read directory: %v", err)
298	}
299
300	for _, entry := range entries {
301		uq.Upload(path.Join(directory, entry))
302	}
303
304	uq.Done()
305}
306
307// findQueue is an implementation detail of the DumpQueue that finds all the
308// Mach-O files and their architectures.
309type findQueue struct {
310	*WorkerPool
311	queue chan string
312	dq    *DumpQueue
313}
314
315// findLibsInRoot looks in all the pathsToScan in the root and manages the
316// interaction between findQueue and DumpQueue.
317func findLibsInRoot(root string, dq *DumpQueue) {
318	fq := &findQueue{
319		queue: make(chan string, 10),
320		dq:    dq,
321	}
322	fq.WorkerPool = StartWorkerPool(12, fq.worker)
323
324	for _, p := range pathsToScan {
325		fq.findLibsInPath(path.Join(root, p))
326	}
327
328	close(fq.queue)
329	fq.Wait()
330	dq.done()
331}
332
333// findLibsInPath recursively walks the directory tree, sending file paths to
334// test for being Mach-O to the findQueue.
335func (fq *findQueue) findLibsInPath(loc string) {
336	d, err := os.Open(loc)
337	if err != nil {
338		log.Fatal("Could not open %s: %v", loc, err)
339	}
340	defer d.Close()
341
342	for {
343		fis, err := d.Readdir(100)
344		if err != nil && err != io.EOF {
345			log.Fatal("Error reading directory %s: %v", loc, err)
346		}
347
348		for _, fi := range fis {
349			fp := path.Join(loc, fi.Name())
350			if fi.IsDir() {
351				fq.findLibsInPath(fp)
352				continue
353			} else if fi.Mode()&os.ModeSymlink != 0 {
354				continue
355			}
356
357			// Test the blacklist in the worker to not slow down this main loop.
358
359			fq.queue <- fp
360		}
361
362		if err == io.EOF {
363			break
364		}
365	}
366}
367
368func (fq *findQueue) worker() {
369	for fp := range fq.queue {
370		blacklisted := false
371		for _, re := range blacklistRegexps {
372			blacklisted = blacklisted || re.MatchString(fp)
373		}
374		if blacklisted {
375			continue
376		}
377
378		f, err := os.Open(fp)
379		if err != nil {
380			log.Printf("%s: %v", fp, err)
381			continue
382		}
383
384		fatFile, err := macho.NewFatFile(f)
385		if err == nil {
386			// The file is fat, so dump its architectures.
387			for _, fatArch := range fatFile.Arches {
388				fq.dumpMachOFile(fp, fatArch.File)
389			}
390			fatFile.Close()
391		} else if err == macho.ErrNotFat {
392			// The file isn't fat but may still be MachO.
393			thinFile, err := macho.NewFile(f)
394			if err != nil {
395				log.Printf("%s: %v", fp, err)
396				continue
397			}
398			fq.dumpMachOFile(fp, thinFile)
399			thinFile.Close()
400		} else {
401			f.Close()
402		}
403	}
404}
405
406func (fq *findQueue) dumpMachOFile(fp string, image *macho.File) {
407	if image.Type != MachODylib && image.Type != MachOBundle {
408		return
409	}
410
411	arch := getArchStringFromHeader(image.FileHeader)
412	if arch == "" {
413		// Don't know about this architecture type.
414		return
415	}
416
417	if (*dumpArchitecture != "" && *dumpArchitecture == arch) || *dumpArchitecture == "" {
418		fq.dq.DumpSymbols(fp, arch)
419	}
420}
421