1// Copyright (c) 2009 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// TODO
6//  - Make capturing system call arguments optional and the number configurable.
7//  - Lots of places depend on the ABI so that we can modify EAX or EDX, this
8//    is safe, but these could be moved to be saved and restored anyway.
9//  - Understand the loader better, and make some more meaningful hooks with
10//    proper data collection and durations.  Right now it's just noise.
11//  - Get the returned pointer from AllocateHeap.
12
13#include <windows.h>
14
15#include <stdio.h>
16
17#include <map>
18#include <string>
19
20#include "assembler.h"
21#include "logging.h"
22#include "rdtsc.h"
23#include "sym_resolver.h"
24#include "syscall_map.h"
25
26#include "sidestep/mini_disassembler.h"
27
28namespace {
29
30std::string JSONString(const std::string& str) {
31  static const char hextable[] = "0123456789abcdef";
32  std::string out;
33  out.push_back('"');
34  for (std::string::const_iterator it = str.begin(); it != str.end(); ++it) {
35    unsigned char c = static_cast<unsigned char>(*it);
36    switch (c) {
37      case '\\':
38      case '"':
39      case '\'':
40        out.push_back('\\'); out.push_back(c);
41        break;
42      default:
43        if (c < 20 || c >= 127) {
44          out.push_back('\\'); out.push_back('x');
45          out.push_back(hextable[c >> 4]); out.push_back(hextable[c & 0xf]);
46        } else {
47          // Unescaped.
48          out.push_back(c);
49        }
50        break;
51    }
52  }
53  out.push_back('"');
54  return out;
55}
56
57}  // namespace
58
59class Playground {
60 public:
61  static const int kPlaygroundSize = 64 * 1024 * 1024;
62
63  // Encapsulate the configuration options to the playground.
64  class Options {
65   public:
66    Options()
67        : stack_unwind_depth_(0),
68          log_heap_(false),
69          log_lock_(false),
70          vista_(false) { }
71
72
73    // The maximum amount of frames we should unwind from the call stack.
74    int stack_unwind_depth() { return stack_unwind_depth_; }
75    void set_stack_unwind_depth(int depth) { stack_unwind_depth_ = depth; }
76
77    // Whether we should log heap operations (alloc / free).
78    bool log_heap() { return log_heap_; }
79    void set_log_heap(bool x) { log_heap_ = x; }
80
81    // Whether we should log lock (critical section) operations.
82    bool log_lock() { return log_lock_; }
83    void set_log_lock(bool x) { log_lock_ = x; }
84
85    // Whether we are running on Vista.
86    bool vista() { return vista_; }
87    void set_vista(bool x) { vista_ = x; }
88
89   private:
90    int stack_unwind_depth_;
91    bool log_heap_;
92    bool log_lock_;
93    bool vista_;
94  };
95
96  Playground(HANDLE proc, const Options& options)
97      : proc_(proc),
98        remote_addr_(NULL),
99        resolver_("ntdll.dll"),
100        options_(options) {
101    // We copy the entire playground into the remote process, and we have
102    // fields that we expect to be zero.  TODO this could be a lot better.
103    memset(buf_, 0, sizeof(buf_));
104  }
105
106  void AllocateInRemote() {
107    // Try to get something out of the way and easy to debug.
108    static void* kPlaygroundAddr = reinterpret_cast<void*>(0x66660000);
109    // Allocate our playground memory in the target process.  This is a big
110    // slab of read/write/execute memory that we use for our code
111    // instrumentation, and the memory for writing out our logging events.
112    remote_addr_ = reinterpret_cast<char*>(
113        VirtualAllocEx(proc_,
114                       kPlaygroundAddr,
115                       kPlaygroundSize,
116                       MEM_COMMIT | MEM_RESERVE,
117                       PAGE_EXECUTE_READWRITE));
118    if (remote_addr_ == NULL || remote_addr_ != kPlaygroundAddr) {
119      NOTREACHED("Falied to allocate playground: 0x%08x", remote_addr_);
120    }
121  }
122
123  void CopyToRemote() {
124    WriteProcessMemory(proc_,
125                       remote_addr_,
126                       buf_,
127                       sizeof(buf_),
128                       NULL);
129  }
130
131  void CopyFromRemote() {
132    SIZE_T size = 0;
133    ReadProcessMemory(proc_,
134                      remote_addr_,
135                      buf_,
136                      sizeof(buf_),
137                      &size);
138  }
139
140  enum EventRecordType {
141    EVENT_TYPE_LDR              = 0,
142    EVENT_TYPE_THREADBEGIN      = 1,
143    EVENT_TYPE_THREADNAME       = 2,
144    EVENT_TYPE_EXCEPTION        = 3,
145    EVENT_TYPE_PROCESSEXIT      = 4,
146    EVENT_TYPE_CREATETHREAD     = 5,
147    EVENT_TYPE_THREADEXIT       = 6,
148    EVENT_TYPE_ALLOCHEAP        = 7,
149    EVENT_TYPE_FREEHEAP         = 8,
150    EVENT_TYPE_SYSCALL          = 9,
151    EVENT_TYPE_ENTER_CS         = 10,
152    EVENT_TYPE_TRYENTER_CS      = 11,
153    EVENT_TYPE_LEAVE_CS         = 12,
154    EVENT_TYPE_APC              = 13
155  };
156
157  static const int kThreadNameBufSize = 64;
158  static const int kLdrBufSize = 512;  // Looks like internal buffer is 512.
159
160  static const int kCodeBlockSize               = 256;
161
162  static const int kOffLdrCode                  = 0 * kCodeBlockSize;
163  static const int kOffCreateThreadCode         = 1 * kCodeBlockSize;
164  static const int kOffThreadCode               = 2 * kCodeBlockSize;
165  static const int kOffExpCode                  = 3 * kCodeBlockSize;
166  static const int kOffExitCode                 = 4 * kCodeBlockSize;
167  static const int kOffThreadExitCode           = 5 * kCodeBlockSize;
168  static const int kOffAllocHeapCode            = 6 * kCodeBlockSize;
169  static const int kOffFreeHeapCode             = 7 * kCodeBlockSize;
170  static const int kOffSyscallCode              = 8 * kCodeBlockSize;
171  static const int kOffEnterCritSecCode         = 9 * kCodeBlockSize;
172  static const int kOffTryEnterCritSecCode      = 10 * kCodeBlockSize;
173  static const int kOffLeaveCritSecCode         = 11 * kCodeBlockSize;
174  static const int kOffApcDispCode              = 12 * kCodeBlockSize;
175
176  static const int kOffLogAreaPtr               = 4096;
177  static const int kOffLogAreaData              = 4096 + 4;
178
179  static const int kRecordHeaderSize = 8 + 4 + 4 + 4;
180
181  // Given the address to the start of a function, patch the function to jump
182  // to a given offset into the playground.  This function will try to take
183  // advantage of hotpatch code, if the function is prefixed with 5 0x90 bytes.
184  // Returns a std::string of any assembly instructions that must be relocated,
185  // as they were overwritten during patching.
186  std::string PatchPreamble(int func_addr, int playground_off) {
187    sidestep::MiniDisassembler disas;
188    int stub_addr = reinterpret_cast<int>(remote_addr_ + playground_off);
189
190    std::string instrs;
191
192    char buf[15];
193    if (ReadProcessMemory(proc_,
194                          reinterpret_cast<void*>(func_addr - 5),
195                          buf,
196                          sizeof(buf),
197                          NULL) == 0) {
198      NOTREACHED("ReadProcessMemory(0x%08x) failed: %d",
199                 func_addr - 5, GetLastError());
200    }
201
202    // TODO(deanm): It seems in more recent updates the compiler is generating
203    // complicated sequences for padding / alignment.  For example:
204    // 00000000  8DA42400000000    lea esp,[esp+0x0]
205    // 00000007  8D4900            lea ecx,[ecx+0x0]
206    // is used for a 16 byte alignment.  We need a better way of handling this.
207    if (memcmp(buf, "\x90\x90\x90\x90\x90", 5) == 0 ||
208        memcmp(buf, "\x00\x8D\x64\x24\x00", 5) == 0 ||
209        memcmp(buf, "\x00\x00\x8D\x49\x00", 5) == 0) {
210      unsigned int instr_bytes = 0;
211
212      // We might have a hotpatch no-op of mov edi, edi "\x8b\xff".  It is a
213      // bit of a waste to relocate it, but it makes everything simpler.
214
215      while (instr_bytes < 2) {
216        if (disas.Disassemble(
217            reinterpret_cast<unsigned char*>(buf + 5 + instr_bytes),
218            &instr_bytes) != sidestep::IT_GENERIC) {
219          NOTREACHED("Could not disassemble or relocate instruction.");
220        }
221        // We only read 10 bytes worth of instructions.
222        CHECK(instr_bytes < 10);
223      }
224
225      instrs.assign(buf + 5, instr_bytes);
226
227      // We have a hotpatch prefix of 5 nop bytes.  We can use this for our
228      // long jump, and then overwrite the first 2 bytes to jump back to there.
229      CodeBuffer patch(buf);
230      int off = stub_addr - func_addr;
231      patch.jmp_rel(off);
232      patch.jmp_rel_short(-2 - 5);
233    } else {
234      // We need a full 5 bytes for the jump.
235      unsigned int instr_bytes = 0;
236      while (instr_bytes < 5) {
237        if (disas.Disassemble(
238            reinterpret_cast<unsigned char*>(buf + 5 + instr_bytes),
239            &instr_bytes) != sidestep::IT_GENERIC) {
240          NOTREACHED("Could not disassemble or relocate instruction.");
241        }
242        // We only read 10 bytes worth of instructions.
243        CHECK(instr_bytes < 10);
244      }
245
246      instrs.assign(buf + 5, instr_bytes);
247
248      // Overwrite the first 5 bytes with a relative jump to our stub.
249      CodeBuffer patch(buf + 5);
250      int off = stub_addr - (func_addr + 5);
251      patch.jmp_rel(off);
252    }
253
254    // Write back the bytes, we are really probably writing more back than we
255    // need to, but it shouldn't really matter.
256    if (WriteProcessMemory(proc_,
257                           reinterpret_cast<void*>(func_addr - 5),
258                           buf,
259                           sizeof(buf),
260                           NULL) == 0) {
261      NOTREACHED("WriteProcessMemory(0x%08x) failed: %d",
262                 func_addr - 5, GetLastError());
263    }
264
265    return instrs;
266  }
267
268  std::string PatchPreamble(const char* func_name, int playground_off) {
269    return PatchPreamble(
270        reinterpret_cast<int>(resolver_.Resolve(func_name)), playground_off);
271  }
272
273  // Restore any instructions that needed to be moved to make space for our
274  // patch and jump back to the original code.
275  void ResumeOriginalFunction(const char* func_name,
276                              const std::string& moved_instructions,
277                              int stub_offset,
278                              CodeBuffer* cb) {
279    cb->emit_bytes(moved_instructions);
280    int off = resolver_.Resolve(func_name) +
281              moved_instructions.size() -
282              (remote_addr_ + stub_offset + cb->size() + 5);
283    cb->jmp_rel(off);
284  }
285
286  // Makes a call to NtQueryPerformanceCounter, writing the timestamp to the
287  // buffer pointed to by EDI.  EDI it not incremented.  EAX is not preserved.
288  void AssembleQueryPerformanceCounter(CodeBuffer* cb) {
289    // Make a call to NtQueryPerformanceCounter and write the result into
290    // the log area.  The buffer we write to should be aligned, but we should
291    // garantee that anyway for the logging area for performance.
292    cb->push_imm(0);       // PerformanceFrequency
293    cb->push(EDI);         // PerformanceCounter
294    cb->mov_imm(EAX, reinterpret_cast<int>(
295        resolver_.Resolve("ntdll!NtQueryPerformanceCounter")));
296    cb->call(EAX);
297  }
298
299  // This is the common log setup routine.  It will allocate a new log entry,
300  // and write out the common log header to the event entry.  The header is:
301  // is [ 64bit QPC ] [ 32bit cpu id ] [ 32bit thread id ] [ 32bit rec id ]
302  // EDI will be left pointing to the log entry, with |space| bytes left for
303  // type specific data.  All other registers should not be clobbered.
304  void AssembleHeaderCode(CodeBuffer* cb, EventRecordType rt, int space) {
305    cb->push(EAX);
306    cb->push(EDX);
307    cb->push(ECX);
308    cb->push(ESI);
309
310    int unwind_depth = options_.stack_unwind_depth();
311
312    // Load EDI with the number of bytes we want for our log entry, this will
313    // be used in the atomic increment to allocate the log entry.
314    cb->mov_imm(EDI, kRecordHeaderSize + (unwind_depth * 4) + space);
315    // Do the increment and have EDI point to our log entry buffer space.
316    cb->mov_imm(EDX, reinterpret_cast<int>(remote_addr_ + kOffLogAreaPtr));
317    cb->inc_atomic(EDX, EDI);
318    // EDI is the buffer offset, make it a pointer to the record entry.
319    cb->add_imm(EDI, reinterpret_cast<int>(remote_addr_ + kOffLogAreaData));
320
321    AssembleQueryPerformanceCounter(cb);
322    cb->add_imm(EDI, 8);
323
324    cb->which_cpu();
325    cb->stosd();
326
327    cb->which_thread();
328    cb->stosd();
329
330    // Stack unwinding, follow EBP to the maximum number of frames, and make
331    // sure that it stays on the stack (between ESP and TEB.StackBase).
332    if (unwind_depth > 0) {
333      cb->mov_imm(ECX, unwind_depth);
334      cb->fs(); cb->mov(EDX, Operand(0x04));  // get TEB.StackBase
335
336      // Start at EBP.
337      cb->mov(EAX, EBP);
338
339      Label unwind_loop, bail;
340      cb->bind(&unwind_loop);
341
342      // Bail if (EAX < ESP) (below the stack)
343      cb->cmp(EAX, ESP);
344      cb->jcc(below, &bail);
345      // Bail if (EAX >= EDX) (above the stack)
346      cb->cmp(EAX, EDX);
347      cb->jcc(above_equal, &bail);
348
349      // We have a valid stack pointer, it should point to something like:
350      //   [ saved frame pointer ] [ return address ] [ arguments ... ]
351      cb->mov(ESI, EAX);
352      cb->lodsd();  // Get the new stack pointer to follow in EAX
353      cb->movsd();  // Copy the return address to the log area.
354
355      cb->loop(&unwind_loop);
356
357      cb->bind(&bail);
358      // If we did managed to unwind to the max, fill the rest with 0 (really
359      // we just want to inc EDI to the end, and this is an easy way).
360      cb->mov_imm(EAX, 0);  // TODO use an xor
361      cb->rep(); cb->stosd();
362    }
363
364    // Store the type for this record entry.
365    cb->mov_imm(EAX, rt);
366    cb->stosd();
367
368    cb->pop(ESI);
369    cb->pop(ECX);
370    cb->pop(EDX);
371    cb->pop(EAX);
372  }
373
374  void PatchLoader() {
375    static const EventRecordType kRecordType =  EVENT_TYPE_LDR;
376    static const char* kFuncName = "ntdll!DebugPrint";
377    static const int kStubOffset = kOffLdrCode;
378
379    std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
380
381    // Set ShowSnaps to one to get the print routines to be called.
382    char enabled = 1;
383    WriteProcessMemory(
384        proc_, resolver_.Resolve("ntdll!ShowSnaps"), &enabled, 1, NULL);
385
386    CodeBuffer cb(buf_ + kStubOffset);
387
388    cb.pop(EDX);  // return address
389    cb.pop(EAX);  // First param in eax
390    cb.push(ESI);
391    cb.push(EDI);
392    cb.push(EDX);
393
394    cb.mov(ESI, EAX);  // ESI points at the string structure.
395
396    // We used to do variable length based on the length supplied in the str
397    // structure, but it's easier (and sloppier) to just copy a fixed amount.
398    AssembleHeaderCode(&cb, kRecordType, kLdrBufSize);
399
400    cb.lodsd();        // Load the character count
401    cb.lodsd();        // Load the char*
402    cb.mov(ESI, EAX);
403    cb.mov_imm(ECX, kLdrBufSize / 4);  // load the char count as the rep count
404    cb.rep(); cb.movsb();  // Copy the string to the logging buffer
405
406    // Return
407    cb.pop(EDX);
408    cb.pop(EDI);
409    cb.pop(ESI);
410    cb.pop(ECX);  // don't care
411    cb.pop(ECX);  // don't care
412    cb.jmp(EDX);
413  }
414
415  void PatchCreateThread() {
416    static const EventRecordType kRecordType =  EVENT_TYPE_CREATETHREAD;
417    static const char* kFuncName =
418      options_.vista() ? "ntdll!NtCreateThreadEx" : "ntdll!NtCreateThread";
419    static const int kStubOffset = kOffCreateThreadCode;
420
421    std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
422
423    CodeBuffer cb(buf_ + kStubOffset);
424
425    cb.push(EDI);
426    cb.push(ESI);
427
428    AssembleHeaderCode(&cb, kRecordType, 8);
429
430    cb.mov(EAX, Operand(ESP, 0x18 + 8));
431
432    // Super ugly hack.  To coorrelate between creating a thread and the new
433    // thread running, we stash something to identify the creating event when
434    // we log the created event.  We just use a pointer to the event log data
435    // since this will be unique and can tie the two events together.  We pass
436    // it by writing into the context structure, so it will be passed in ESI.
437    cb.add_imm(EAX, 0xa0);
438    cb.push(EDI);
439    cb.mov(EDI, EAX);
440    cb.pop(EAX);
441    cb.push(EAX);
442    cb.stosd();
443
444    // Get and save CONTEXT.Eip
445    cb.mov(ESI, EDI);
446    cb.add_imm(ESI, 20);
447    cb.pop(EDI);
448    cb.mov(EAX, EDI);
449    cb.stosd();  // Record the event identifier to tie together the events.
450    cb.movsd();  // write Eip to the log event
451
452    cb.pop(ESI);
453    cb.pop(EDI);
454
455    ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);
456  }
457
458  void PatchThreadBegin() {
459    static const EventRecordType kRecordType =  EVENT_TYPE_THREADBEGIN;
460    static const char* kFuncName = "ntdll!CsrNewThread";
461    static const int kStubOffset = kOffThreadCode;
462
463    std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
464
465    CodeBuffer cb(buf_ + kStubOffset);
466
467    cb.push(EDI);
468
469    AssembleHeaderCode(&cb, kRecordType, 8);
470
471    cb.mov(EAX, ESI);  // We stashed the creator's eventid in the context ESI.
472    cb.stosd();
473
474    // TODO(deanm): The pointer is going to point into the CRT or something,
475    // should we dig deeper to get more information about the real entry?
476    cb.mov(EAX, Operand(EBP, 0x8));
477    cb.stosd();
478    cb.pop(EDI);
479
480    ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);
481  }
482
483  void PatchThreadBeginVista() {
484    static const EventRecordType kRecordType =  EVENT_TYPE_THREADBEGIN;
485    static const char* kFuncName = "ntdll!_RtlUserThreadStart";
486    static const int kStubOffset = kOffThreadCode;
487
488    std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
489
490    CodeBuffer cb(buf_ + kStubOffset);
491
492    cb.push(EDI);
493
494    AssembleHeaderCode(&cb, kRecordType, 8);
495
496    cb.mov(EAX, ESI);  // We stashed the creator's eventid in the context ESI.
497    cb.stosd();
498
499    // TODO(deanm): The pointer is going to point into the CRT or something,
500    // should we dig deeper to get more information about the real entry?
501    //cb.mov(EAX, Operand(EBP, 0x8));
502    cb.mov_imm(EAX, 0);
503    cb.stosd();
504    cb.pop(EDI);
505
506    ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);
507  }
508
509  // Intercept exception dispatching so we can catch when threads set a thread
510  // name (which is an exception with a special code).  TODO it could be
511  // useful to log all exceptions.
512  void PatchSetThreadName() {
513    static const EventRecordType kRecordType =  EVENT_TYPE_THREADNAME;
514    static const char* kFuncName = "ntdll!RtlDispatchException";
515    static const int kStubOffset = kOffExpCode;
516
517    std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
518
519    CodeBuffer cb(buf_ + kStubOffset);
520
521    cb.pop(EDX);  // return address
522    cb.pop(EAX);  // ExceptionRecord
523    cb.push(EAX);
524    cb.push(EDX);
525
526    cb.push(ESI);
527
528    cb.mov(ESI, EAX);
529    cb.lodsd();
530
531    Label bail;
532    // exception code
533    cb.cmp_imm(EAX, 0x406D1388);
534    cb.jcc(not_equal, &bail);
535
536    cb.push(EDI);
537
538    AssembleHeaderCode(&cb, kRecordType, kThreadNameBufSize);
539
540    // Fetch the second parameter.
541    for (int i = 0; i < 6; ++i) {
542      cb.lodsd();
543    }
544
545    // TODO This is sloppy and we could run into unmapped memory...
546    cb.mov(ESI, EAX);
547    cb.mov_imm(ECX, kThreadNameBufSize / 4);
548    cb.rep(); cb.movsd();
549
550    cb.pop(EDI);
551
552    cb.bind(&bail);
553    cb.pop(ESI);
554
555    ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);
556  }
557
558
559  void PatchThreadExit() {
560    static const EventRecordType kRecordType =  EVENT_TYPE_THREADEXIT;
561    static const char* kFuncName = "ntdll!LdrShutdownThread";
562    static const int kStubOffset = kOffThreadExitCode;
563
564    std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
565    CodeBuffer cb(buf_ + kStubOffset);
566
567    cb.push(EDI);
568    AssembleHeaderCode(&cb, kRecordType, 0);
569    cb.pop(EDI);
570
571    ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);
572  }
573
574  void PatchAllocateHeap() {
575    static const EventRecordType kRecordType =  EVENT_TYPE_ALLOCHEAP;
576    static const char* kFuncName = "ntdll!RtlAllocateHeap";
577    static const int kStubOffset = kOffAllocHeapCode;
578
579    std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
580    CodeBuffer cb(buf_ + kStubOffset);
581
582    cb.push(EDI);
583    cb.push(ESI);
584
585    AssembleHeaderCode(&cb, kRecordType, 12);
586
587    cb.mov(ESI, ESP);
588    cb.add_imm(ESI, 12);  // Skip over our saved and the return address
589    cb.movsd(); cb.movsd(); cb.movsd();  // Copy the 3 parameters
590
591    cb.pop(ESI);
592    cb.pop(EDI);
593
594    ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);
595  }
596
597  void PatchFreeHeap() {
598    static const EventRecordType kRecordType =  EVENT_TYPE_FREEHEAP;
599    static const char* kFuncName = "ntdll!RtlFreeHeap";
600    static const int kStubOffset = kOffFreeHeapCode;
601
602    std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
603    CodeBuffer cb(buf_ + kStubOffset);
604
605    cb.push(EDI);
606    cb.push(ESI);
607
608    AssembleHeaderCode(&cb, kRecordType, 12);
609
610    cb.mov(ESI, ESP);
611    cb.add_imm(ESI, 12);  // Skip over our saved and the return address
612    cb.movsd(); cb.movsd(); cb.movsd();  // Copy the 3 parameters
613
614    cb.pop(ESI);
615    cb.pop(EDI);
616
617    ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);
618  }
619
620  // Don't even bother going back to the original code, just implement our
621  // own KiFastSystemCall.  The original looks like:
622  //   .text:7C90EB8B                 mov     edx, esp
623  //   .text:7C90EB8D                 sysenter
624  //   .text:7C90EB8F                 nop
625  //   .text:7C90EB90                 nop
626  //   .text:7C90EB91                 nop
627  //   .text:7C90EB92                 nop
628  //   .text:7C90EB93                 nop
629  void PatchSyscall() {
630    static const EventRecordType kRecordType =  EVENT_TYPE_SYSCALL;
631    static const char* kFuncName = "ntdll!KiFastSystemCall";
632    static const int kStubOffset = kOffSyscallCode;
633
634    std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
635
636    {
637      CodeBuffer cb(buf_ + kStubOffset);
638
639      Label skip;
640
641      // Skip 0xa5 which is QueryPerformanceCounter, to make sure we don't log
642      // our own logging's QPC.  Disabled for now, using ret addr check...
643      // cb.cmp_imm(EAX, 0xa5);
644      // cb.jcc(equal, &skip);
645
646      // Check if the return address is from 0x6666 (our code region).
647      // 66817C24066666    cmp word [esp+0x6],0x6666
648      cb.emit(0x66); cb.emit(0x81); cb.emit(0x7C);
649      cb.emit(0x24); cb.emit(0x06); cb.emit(0x66); cb.emit(0x66);
650      cb.jcc(equal, &skip);
651
652      // This is all a bit shit.  Originally I thought I could store some state
653      // on the stack above ESP, however, it seems that when APCs, etc are
654      // queued, they will use the stack above ESP.  Well, not above ESP, above
655      // what was passed in as EDX into the systemcall, not matter if ESP was
656      // different than this :(.  So we need to store our state in the event
657      // log record, and then we stick a pointer to that over a ret addr...
658
659      // Our stack starts like:
660      //  [ ret addr ] [ ret addr 2 ] [ arguments ]
661      // We will update it to look like
662      //  [ ret stub addr ] [ event entry ptr ] [ arguments ]
663
664      cb.push(EDI);  // save EDI since we're using it
665      AssembleHeaderCode(&cb, kRecordType, 16 + 16 + 8);
666      cb.mov(EDX, EAX);  // Save EAX...
667      cb.stosd();  // eax is the syscall number
668      cb.pop(EAX);
669      cb.stosd();  // store the saved EDI
670      cb.pop(EAX);
671      cb.stosd();  // store the real return address
672      cb.pop(EAX);
673      cb.stosd();  // store the real (secondary) return address;
674
675      cb.push(ESI);
676      cb.mov(ESI, ESP);
677      cb.lodsd();
678      cb.movsd();  // argument 1
679      cb.movsd();  // argument 2
680      cb.movsd();  // argument 3
681      cb.pop(ESI);
682
683      cb.push(EDI);  // store our event ptr over the secondary ret addr.
684      cb.push_imm(reinterpret_cast<int>(remote_addr_ + kOffSyscallCode + 200));
685      cb.mov(EAX, EDX);  // restore EAX
686
687      cb.bind(&skip);
688      cb.mov(EDX, ESP);
689      cb.sysenter();
690
691      if (cb.size() > 200) {
692        NOTREACHED("code too big: %d", cb.size());
693      }
694    }
695
696    {
697      CodeBuffer cb(buf_ + kStubOffset + 200);
698
699      // TODO share the QPC code, this is a copy and paste...
700
701      cb.pop(EDI);  // get the log area
702
703      cb.stosd();   // Log the system call return value.
704
705      // QPC will clobber EAX, and it's very important to save it since it
706      // is the return value from the system call.  TODO validate if there is
707      // anything else we need to save...
708      cb.push(EAX);
709      AssembleQueryPerformanceCounter(&cb);
710      cb.pop(EAX);
711
712      // We need to:
713      //  - Restore the original "seconary" return address
714      //  - Restore the original value of the EDI register
715      //  - Jump control flow to the original return address
716      // All 3 of these values are stored in the log record...
717      // [ syscall num ] [ saved edi ] [ real rets ] [ args ] [ retval ] [ ts ]
718      //                   currently edi points here     ----^
719
720      cb.push(Operand(EDI, -4 - 16));   // push the real 2nd ret
721      cb.push(Operand(EDI, -8 - 16));   // push the real ret
722      cb.push(Operand(EDI, -12 - 16));  // push the saved EDI
723
724      cb.pop(EDI);  // restore EDI that was saved in the record
725      cb.ret();     // jmp back to the real ret ...
726
727      if (cb.size() > 56) {
728        NOTREACHED("ug");
729      }
730    }
731  }
732
733  // Patch lock (criticial section) holding.
734  void PatchEnterCriticalSection() {
735    static const EventRecordType kRecordType =  EVENT_TYPE_ENTER_CS;
736    static const char* kFuncName = "ntdll!RtlEnterCriticalSection";
737    static const int kStubOffset = kOffEnterCritSecCode;
738
739    std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
740
741    // We just want to capture the return address and original argument, so
742    // we know when EnterCriticalSection returned, we don't want to know when
743    // it entered because it could sit waiting.  We want to know when the lock
744    // actually started being held.  The compiler will sometimes generated code
745    // that overwrites arguments, so we'll keep a copy of the argument just in
746    // case code like this is ever generated in the future.  TODO is it enough
747    // to just assume a LPCRITICAL_SECTION uniquely identifies the lock, or
748    // can the same lock have multiple different copies, I would assume not.
749    {
750      CodeBuffer cb(buf_ + kStubOffset);
751
752      // Set up an additional frame so that we capture the return.
753      // TODO use memory instructions instead of using registers.
754      cb.pop(EAX);  // return address
755      cb.pop(EDX);  // first argument (critical section pointer)
756
757      cb.push(EDX);
758      cb.push(EAX);
759      cb.push(EDX);
760      cb.push_imm(
761          reinterpret_cast<int>(remote_addr_ + kStubOffset + 40));
762
763      ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);
764      CHECK(cb.size() < 40);
765    }
766
767    {
768      CodeBuffer cb(buf_ + kStubOffset + 40);
769
770      cb.push(ESI);
771      cb.mov(ESI, ESP);
772      cb.push(EAX);
773      cb.push(EDI);
774
775      AssembleHeaderCode(&cb, kRecordType, 4);
776
777      cb.lodsd();  // Skip over our saved ESI
778      cb.lodsd();  // Skip over the return address
779      cb.movsd();  // Write the CRITICAL_SECTION* to the event record.
780
781      cb.pop(EDI);
782      cb.pop(EAX);
783      cb.pop(ESI);
784
785      cb.ret(0x04);
786    }
787  }
788
789  void PatchTryEnterCriticalSection() {
790    static const EventRecordType kRecordType =  EVENT_TYPE_TRYENTER_CS;
791    static const char* kFuncName = "ntdll!RtlTryEnterCriticalSection";
792    static const int kStubOffset = kOffTryEnterCritSecCode;
793
794    std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
795
796    {
797      CodeBuffer cb(buf_ + kStubOffset);
798
799      // Set up an additional frame so that we capture the return.
800      // TODO use memory instructions instead of using registers.
801      cb.pop(EAX);  // return address
802      cb.pop(EDX);  // first argument (critical section pointer)
803
804      cb.push(EDX);
805      cb.push(EAX);
806      cb.push(EDX);
807      cb.push_imm(reinterpret_cast<int>(remote_addr_ + kStubOffset + 40));
808
809      ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);
810      CHECK(cb.size() < 40);
811    }
812
813    {
814      CodeBuffer cb(buf_ + kStubOffset + 40);
815
816      cb.push(ESI);
817      cb.mov(ESI, ESP);
818      cb.push(EDI);
819
820      cb.push(EAX);
821
822      AssembleHeaderCode(&cb, kRecordType, 8);
823
824      cb.lodsd();  // Skip over our saved ESI
825      cb.lodsd();  // Skip over the return address
826      cb.movsd();  // Write the CRITICAL_SECTION* to the event record.
827
828      cb.pop(EAX);
829      cb.stosd();  // Write the return value to the event record.
830
831      cb.pop(EDI);
832      cb.pop(ESI);
833
834      cb.ret(0x04);
835    }
836  }
837
838  void PatchLeaveCriticalSection() {
839    static const EventRecordType kRecordType =  EVENT_TYPE_LEAVE_CS;
840    static const char* kFuncName = "ntdll!RtlLeaveCriticalSection";
841    static const int kStubOffset = kOffLeaveCritSecCode;
842
843    std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
844    CodeBuffer cb(buf_ + kStubOffset);
845
846    // TODO use memory instructions instead of using registers.
847    cb.pop(EDX);  // return address
848    cb.pop(EAX);  // first argument (critical section pointer)
849    cb.push(EAX);
850    cb.push(EDX);
851
852    cb.push(EDI);
853    AssembleHeaderCode(&cb, kRecordType, 4);
854    cb.stosd();  // Write the CRITICAL_SECTION* to the event record.
855    cb.pop(EDI);
856
857    ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);
858  }
859
860  // Patch APC dispatching.  This is a bit hacky, since the return to kernel
861  // mode is done with NtContinue, we have to shim in a stub return address to
862  // catch when the callback is finished.  It is probably a bit fragile.
863  void PatchApcDispatcher() {
864    static const EventRecordType kRecordType =  EVENT_TYPE_APC;
865    static const char* kFuncName = "ntdll!KiUserApcDispatcher";
866    static const int kStubOffset = kOffApcDispCode;
867
868    std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
869
870    {
871      CodeBuffer cb(buf_ + kStubOffset);
872
873      // We don't really need to preserve these since we're the first thing
874      // executing from the kernel dispatch, but yeah, it is good practice.
875      cb.push(EDI);
876      cb.push(EAX);
877
878      AssembleHeaderCode(&cb, kRecordType, 4 + 4 + 8);
879
880      cb.mov_imm(EAX, reinterpret_cast<int>(remote_addr_ + kStubOffset + 140));
881      cb.xchg(EAX, Operand(ESP, 8));  // Swap the callback address with ours.
882      cb.stosd();  // Store the original callback function address.
883
884      // TODO for now we're lazy and depend that ESI will be preserved, and we
885      // use it to store the pointer into our log record.  EDI isn't preserved.
886      cb.mov(ESI, EDI);
887
888      cb.pop(EAX);
889      cb.pop(EDI);
890
891      ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);
892
893      CHECK(cb.size() < 140);
894    }
895    {
896      CodeBuffer cb(buf_ + kStubOffset + 140);
897
898      // This is our shim, we need to call the original callback function, then
899      // we can catch the return and log when it was completed.
900      cb.pop(EAX);  // The real return address, safe to use EAX w/ the ABI?
901      cb.push(EDI);
902
903      cb.mov(EDI, ESI);
904      cb.stosd();  // Store the real return address, we'll need it.
905
906      cb.add_imm(ESI, -4);
907      cb.lodsd();   // Load the real callback address.
908
909      cb.mov(ESI, EDI);
910      cb.pop(EDI);
911
912      cb.call(EAX);  // Call the original callback address.
913
914      cb.push(EAX);
915      cb.push(EDI);
916
917      cb.mov(EDI, ESI);
918      AssembleQueryPerformanceCounter(&cb);
919
920      cb.pop(EDI);
921      cb.pop(EAX);
922
923      cb.push(Operand(ESI, -4));  // Push the real return address.
924      cb.ret();  // Return back to the APC Dispatcher.
925
926      CHECK(cb.size() < 50);
927    }
928  }
929
930  // We need to hook into process shutdown for two reasons.  Most importantly,
931  // we need to copy the playground back from the process before the address
932  // space goes away.  We could avoid this with shared memory, however, there
933  // is a reason two.  In order to capture symbols for all of the libraries
934  // loaded into arbitrary applications, on shutdown we do an instrusive load
935  // of symbols into the traced process.
936  //
937  // ntdll!LdrShutdownProcess
938  //  - NtSetEvent(event, 0);
939  //  - NtWaitForSingleObject(event, FALSE, NULL);
940  //  - jmp back
941  void PatchExit(HANDLE exiting, HANDLE exited) {
942    static const EventRecordType kRecordType =  EVENT_TYPE_PROCESSEXIT;
943    static const char* kFuncName = "ntdll!LdrShutdownProcess";
944    static const int kStubOffset = kOffExitCode;
945
946    HANDLE rexiting, rexited;
947    if (!DuplicateHandle(::GetCurrentProcess(),
948                         exiting,
949                         proc_,
950                         &rexiting,
951                         0,
952                         FALSE,
953                         DUPLICATE_SAME_ACCESS)) {
954      NOTREACHED("");
955    }
956    if (!DuplicateHandle(::GetCurrentProcess(),
957                         exited,
958                         proc_,
959                         &rexited,
960                         0,
961                         FALSE,
962                         DUPLICATE_SAME_ACCESS)) {
963      NOTREACHED("");
964    }
965
966    std::string moved_instructions = PatchPreamble(kFuncName, kStubOffset);
967    CodeBuffer cb(buf_ + kStubOffset);
968
969    cb.push(EDI);
970    AssembleHeaderCode(&cb, kRecordType, 0);
971    cb.pop(EDI);
972
973    // NtSetEvent(exiting, 0);
974    cb.push_imm(0);
975    cb.push_imm(reinterpret_cast<int>(rexiting));
976    cb.mov_imm(EAX, reinterpret_cast<int>(
977        resolver_.Resolve("ntdll!NtSetEvent")));
978    cb.call(EAX);
979
980    // NtWaitForSingleObject(exited, FALSE, INFINITE);
981    cb.push_imm(0);
982    cb.push_imm(0);
983    cb.push_imm(reinterpret_cast<int>(rexited));
984    cb.mov_imm(EAX, reinterpret_cast<int>(
985        resolver_.Resolve("ntdll!NtWaitForSingleObject")));
986    cb.call(EAX);
987
988    ResumeOriginalFunction(kFuncName, moved_instructions, kStubOffset, &cb);
989  }
990
991
992  void Patch() {
993    if (options_.vista()) {
994      // TODO(deanm): Make PatchCreateThread work on Vista.
995      PatchThreadBeginVista();
996    } else {
997      PatchCreateThread();
998      PatchThreadBegin();
999    }
1000
1001    PatchThreadExit();
1002    PatchSetThreadName();
1003    PatchSyscall();
1004
1005    PatchApcDispatcher();
1006
1007    // The loader logging needs to be improved a bit to really be useful.
1008    //PatchLoader();
1009
1010    // These are interesting, but will collect a ton of data:
1011    if (options_.log_heap()) {
1012      PatchAllocateHeap();
1013      PatchFreeHeap();
1014    }
1015    if (options_.log_lock()) {
1016      PatchEnterCriticalSection();
1017      PatchTryEnterCriticalSection();
1018      PatchLeaveCriticalSection();
1019    }
1020  }
1021
1022  // Dump the event records from the playground to stdout in a JSON format.
1023  // TODO: Drop RDTSCNormalizer, it was from old code that tried to use the
1024  // rdtsc counters from the CPU, and this required a bunch of normalization
1025  // to account for non-syncronized timestamps across different cores, etc.
1026  void DumpJSON(RDTSCNormalizer* rdn, SymResolver* res) {
1027    int pos = kOffLogAreaPtr;
1028    int i = IntAt(pos);
1029    pos += 4;
1030
1031    std::map<int, const char*> syscalls = CreateSyscallMap();
1032
1033    printf("parseEvents([\n");
1034    for (int end = pos + i; pos < end; ) {
1035      printf("{\n");
1036      __int64 ts = Int64At(pos);
1037      pos += 8;
1038      void* cpuid = reinterpret_cast<void*>(IntAt(pos));
1039      pos += 4;
1040      printf("'ms': %f,\n", rdn->MsFromStart(cpuid, ts));
1041
1042      printf("'cpu': 0x%x,\n'thread': 0x%x,\n", cpuid, IntAt(pos));
1043      pos += 4;
1044
1045      if (options_.stack_unwind_depth() > 0) {
1046        printf("'stacktrace': [\n");
1047        for (int i = 0; i < options_.stack_unwind_depth(); ++i) {
1048          int retaddr = IntAt(pos + (i * 4));
1049          if (!retaddr)
1050            break;
1051          printf("  [ 0x%x, %s ],\n",
1052                 retaddr,
1053                 res ? JSONString(res->Unresolve(retaddr)).c_str() : "\"\"");
1054        }
1055        printf("],\n");
1056        pos += (options_.stack_unwind_depth() * 4);
1057      }
1058
1059
1060      EventRecordType rt = static_cast<EventRecordType>(IntAt(pos));
1061      pos += 4;
1062
1063      switch (rt) {
1064        case EVENT_TYPE_LDR:
1065        {
1066          printf("'eventtype': 'EVENT_TYPE_LDR',\n");
1067          std::string str(&buf_[pos], kLdrBufSize);
1068          str = str.substr(0, str.find('\0'));
1069          printf("'ldrinfo': %s,\n", JSONString(str).c_str());
1070          pos += kLdrBufSize;
1071          break;
1072        }
1073        case EVENT_TYPE_CREATETHREAD:
1074        {
1075          printf("'eventtype': 'EVENT_TYPE_CREATETHREAD',\n"
1076                 "'eventid': 0x%x,\n"
1077                 "'startaddr': 0x%x,\n",
1078                 IntAt(pos), IntAt(pos+4));
1079          pos += 8;
1080          break;
1081        }
1082        case EVENT_TYPE_THREADBEGIN:
1083        {
1084          printf("'eventtype': 'EVENT_TYPE_THREADBEGIN',\n"
1085                 "'parenteventid': 0x%x,\n"
1086                 "'startaddr': 0x%x,\n",
1087                 IntAt(pos), IntAt(pos+4));
1088          pos += 8;
1089          break;
1090        }
1091        case EVENT_TYPE_THREADNAME:
1092        {
1093          std::string str(&buf_[pos], kThreadNameBufSize);
1094          str = str.substr(0, str.find('\0'));
1095          printf("'eventtype': 'EVENT_TYPE_THREADNAME',\n"
1096                 "'threadname': %s,\n",
1097                 JSONString(str).c_str());
1098          pos += kThreadNameBufSize;
1099          break;
1100        }
1101        case EVENT_TYPE_PROCESSEXIT:
1102        {
1103          printf("'eventtype': 'EVENT_TYPE_PROCESSEXIT',\n");
1104          break;
1105        }
1106        case EVENT_TYPE_THREADEXIT:
1107        {
1108          printf("'eventtype': 'EVENT_TYPE_THREADEXIT',\n");
1109          break;
1110        }
1111        case EVENT_TYPE_ALLOCHEAP:
1112        {
1113          printf("'eventtype': 'EVENT_TYPE_ALLOCHEAP',\n"
1114                 "'heaphandle': 0x%x,\n"
1115                 "'heapflags': 0x%x,\n"
1116                 "'heapsize': %d,\n",
1117                 IntAt(pos), IntAt(pos+4), IntAt(pos+8));
1118          pos += 12;
1119          break;
1120        }
1121        case EVENT_TYPE_FREEHEAP:
1122        {
1123          printf("'eventtype': 'EVENT_TYPE_FREEHEAP',\n"
1124                 "'heaphandle': 0x%x,\n"
1125                 "'heapflags': 0x%x,\n"
1126                 "'heapptr': %d,\n",
1127                 IntAt(pos), IntAt(pos+4), IntAt(pos+8));
1128          pos += 12;
1129          break;
1130        }
1131        case EVENT_TYPE_SYSCALL:
1132        {
1133          int syscall = IntAt(pos);
1134          printf("'eventtype': 'EVENT_TYPE_SYSCALL',\n"
1135                 "'syscall': 0x%x,\n", syscall);
1136          pos += 16;
1137
1138          printf("'syscallargs': [\n");
1139          for (int i = 0; i < 3; ++i) {
1140            printf("  0x%x,\n", IntAt(pos));
1141            pos += 4;
1142          }
1143          printf("],\n");
1144
1145          printf("'retval': 0x%x,\n"
1146                 "'done': %f,\n",
1147                 IntAt(pos), rdn->MsFromStart(0, Int64At(pos+4)));
1148          pos += 12;
1149
1150          if (syscalls.count(syscall) == 1) {
1151            std::string sname = syscalls[syscall];
1152            printf("'syscallname': %s,\n",
1153                   JSONString(sname).c_str());
1154            // Mark system calls that we should consider "waiting" system
1155            // calls, where we are not actually active.
1156            if (sname.find("WaitFor") != std::string::npos ||
1157                sname.find("RemoveIoCompletion") != std::string::npos) {
1158              printf("'waiting': 1,\n");
1159            }
1160          }
1161          break;
1162        }
1163        case EVENT_TYPE_ENTER_CS:
1164        {
1165          printf("'eventtype': 'EVENT_TYPE_ENTER_CS',\n"
1166                 "'critical_section': 0x%x,\n", IntAt(pos));
1167          pos += 4;
1168          break;
1169        }
1170        case EVENT_TYPE_TRYENTER_CS:
1171        {
1172          printf("'eventtype': 'EVENT_TYPE_TRYENTER_CS',\n"
1173                 "'critical_section': 0x%x,\n"
1174                 "'retval': 0x%x,\n",
1175                 IntAt(pos), IntAt(pos+4));
1176          pos += 8;
1177          break;
1178        }
1179        case EVENT_TYPE_LEAVE_CS:
1180        {
1181          printf("'eventtype': 'EVENT_TYPE_LEAVE_CS',\n"
1182                 "'critical_section': 0x%x,\n", IntAt(pos));
1183          pos += 4;
1184          break;
1185        }
1186        case EVENT_TYPE_APC:
1187        {
1188          int func_addr = IntAt(pos);
1189          printf("'eventtype': 'EVENT_TYPE_APC',\n"
1190                 "'func_addr': 0x%x,\n"
1191                 "'func_addr_name': %s,\n"
1192                 "'ret_addr': 0x%x,\n"
1193                 "'done': %f,\n",
1194                 func_addr,
1195                 res ? JSONString(res->Unresolve(func_addr)).c_str() : "\"\"",
1196                 IntAt(pos+4), rdn->MsFromStart(0, Int64At(pos+8)));
1197          pos += 16;
1198          break;
1199        }
1200        default:
1201          NOTREACHED("Unknown event type: %d", rt);
1202          break;
1203      }
1204      printf("},\n");
1205    }
1206    printf("]);");
1207  }
1208
1209  int IntAt(int pos) { return *reinterpret_cast<int*>(&buf_[pos]); }
1210  __int64 Int64At(int pos) { return *reinterpret_cast<__int64*>(&buf_[pos]); }
1211
1212
1213 private:
1214  // Handle the process we install into or read back from.
1215  HANDLE proc_;
1216  // The address where we will keep our playground in the remote process.
1217  char* remote_addr_;
1218  // Lookup addresses from symbol names for ntdll.dll.
1219  SymResolver resolver_;
1220  Options options_;
1221  // A local copy of the playground data, we copy it into the remote process.
1222  char buf_[kPlaygroundSize];
1223};
1224
1225
1226int main(int argc, char** argv) {
1227  std::string command_line;
1228  bool use_symbols = false;
1229  bool attaching = false;
1230  bool launched = false;
1231  bool manual_quit = false;
1232
1233  Playground::Options options;
1234
1235  PROCESS_INFORMATION info = {0};
1236
1237  argc--; argv++;
1238
1239  while (argc > 0) {
1240    if (std::string("--symbols") == argv[0]) {
1241      use_symbols = true;
1242    } else if (std::string("--vista") == argv[0]) {
1243      options.set_vista(true);
1244    } else if (std::string("--log-heap") == argv[0]) {
1245      options.set_log_heap(true);
1246    } else if (std::string("--log-lock") == argv[0]) {
1247      options.set_log_lock(true);
1248    } else if (std::string("--manual-quit") == argv[0]) {
1249      manual_quit = true;
1250    } else if (argc >= 2 && std::string("--unwind") == argv[0]) {
1251      options.set_stack_unwind_depth(atoi(argv[1]));
1252      argc--; argv++;
1253    } else if (argc >= 2 && !launched && std::string("--attach") == argv[0]) {
1254      attaching = true;
1255      info.hProcess = OpenProcess(PROCESS_ALL_ACCESS, FALSE, atoi(argv[1]));
1256      launched = true;
1257      argc--; argv++;
1258    } else if (!launched) {
1259      STARTUPINFOA start_info = {0};
1260      start_info.cb = sizeof(start_info);
1261
1262      if (!CreateProcessA(NULL,
1263                          argv[0],
1264                          NULL,
1265                          NULL,
1266                          FALSE,
1267                          CREATE_SUSPENDED,
1268                          NULL,
1269                          NULL,
1270                          &start_info,
1271                          &info)) {
1272        NOTREACHED("Failed to launch \"%s\": %d\n", argv[0], GetLastError());
1273        return 1;
1274      }
1275      launched = true;
1276    } else {
1277      NOTREACHED("error parsing command line.");
1278    }
1279    argc--; argv++;
1280  }
1281
1282  if (!launched) {
1283    printf("usage: traceline.exe \"app.exe my arguments\"\n"
1284           "  --attach 123: buggy support for attaching to a process\n"
1285           "  --unwind 16: unwind the stack to the specified max depth\n"
1286           "  --symbols: use symbols for stacktraces\n"
1287           "  --log-heap: log heap operations (alloc / free).\n"
1288           "  --log-lock: log lock (critical section) operations.\n");
1289    return 1;
1290  }
1291
1292
1293  HANDLE exiting = CreateEvent(NULL, FALSE, FALSE, NULL);
1294  HANDLE exited = CreateEvent(NULL, FALSE, FALSE, NULL);
1295
1296  // The playground object is big (32MB), dynamically alloc.
1297  Playground* pg = new Playground(info.hProcess, options);
1298
1299  pg->AllocateInRemote();
1300  pg->Patch();
1301  pg->PatchExit(exiting, exited);
1302  pg->CopyToRemote();
1303
1304  RDTSCNormalizer rdn;
1305  rdn.Start();
1306
1307  if (!attaching)
1308    ResumeThread(info.hThread);
1309
1310  // Wait until we have been notified that it's exiting.
1311  if (manual_quit) {
1312    fprintf(stderr, "Press enter when you want stop tracing and collect.\n");
1313    fflush(stderr);
1314    getchar();
1315  } else {
1316    HANDLE whs[] = {exiting, info.hProcess};
1317    if (WaitForMultipleObjects(2, whs, FALSE, INFINITE) != WAIT_OBJECT_0) {
1318      NOTREACHED("Failed to correctly capture process shutdown.");
1319    }
1320  }
1321
1322  pg->CopyFromRemote();
1323
1324  if (use_symbols) {
1325    // Break in and get the symbols...
1326    SymResolver res(NULL, info.hProcess);
1327    pg->DumpJSON(&rdn, &res);
1328  } else {
1329    pg->DumpJSON(&rdn, NULL);
1330  }
1331
1332  // Notify that it can exit now, we are done.
1333  SetEvent(exited);
1334
1335  CloseHandle(info.hProcess);
1336  CloseHandle(info.hThread);
1337
1338  delete pg;
1339}
1340