1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package dexfuzz.program;
18
19import dexfuzz.Log;
20import dexfuzz.rawdex.CodeItem;
21import dexfuzz.rawdex.EncodedCatchHandler;
22import dexfuzz.rawdex.EncodedTypeAddrPair;
23import dexfuzz.rawdex.Instruction;
24import dexfuzz.rawdex.Opcode;
25import dexfuzz.rawdex.TryItem;
26import dexfuzz.rawdex.formats.ContainsTarget;
27import dexfuzz.rawdex.formats.RawInsnHelper;
28
29import java.util.ArrayList;
30import java.util.Collections;
31import java.util.Comparator;
32import java.util.HashMap;
33import java.util.LinkedList;
34import java.util.List;
35import java.util.Map;
36
37/**
38 * Translates from a CodeItem (the raw list of Instructions) to MutatableCode
39 * (graph of Instructions, using MInsns and subclasses) and vice-versa.
40 */
41public class CodeTranslator {
42
43  /**
44   * Given a raw DEX file's CodeItem, produce a MutatableCode object, that CodeMutators
45   * are designed to operate on.
46   * @param codeItemIdx Used to make sure the correct CodeItem is updated later after mutation.
47   * @return A new MutatableCode object, which contains all relevant information
48   *         obtained from the CodeItem.
49   */
50  public MutatableCode codeItemToMutatableCode(Program program, CodeItem codeItem,
51      int codeItemIdx, int mutatableCodeIdx) {
52    Log.debug("Translating CodeItem " + codeItemIdx
53        + " (" + codeItem.meta.methodName + ") to MutatableCode");
54
55    MutatableCode mutatableCode = new MutatableCode(program);
56
57    codeItem.registerMutatableCode(mutatableCode);
58
59    mutatableCode.name = codeItem.meta.methodName;
60    mutatableCode.shorty = codeItem.meta.shorty;
61    mutatableCode.isStatic = codeItem.meta.isStatic;
62
63    mutatableCode.codeItemIdx = codeItemIdx;
64
65    mutatableCode.mutatableCodeIdx = mutatableCodeIdx;
66
67    mutatableCode.registersSize = codeItem.registersSize;
68    mutatableCode.insSize = codeItem.insSize;
69    mutatableCode.outsSize = codeItem.outsSize;
70    mutatableCode.triesSize = codeItem.triesSize;
71
72    // Temporary map from bytecode offset -> instruction.
73    Map<Integer,MInsn> insnLocationMap = new HashMap<Integer,MInsn>();
74
75    List<Instruction> inputInsns = codeItem.insns;
76
77    // Create the MInsns.
78    int loc = 0;
79    for (Instruction insn : inputInsns) {
80      MInsn mInsn = null;
81
82      if (isInstructionSwitch(insn)) {
83        mInsn = new MSwitchInsn();
84      } else if (isInstructionBranch(insn)) {
85        mInsn = new MBranchInsn();
86      } else if (isInstructionFillArrayData(insn)) {
87        mInsn = new MInsnWithData();
88      } else {
89        mInsn = new MInsn();
90      }
91
92      mInsn.insn = insn;
93
94      // Populate the temporary map.
95      insnLocationMap.put(loc, mInsn);
96
97      // Populate the proper list of mutatable instructions.
98      mutatableCode.addInstructionToEnd(mInsn);
99
100      // Calculate the offsets for each instruction.
101      mInsn.location = loc;
102      mInsn.locationUpdated = false;
103
104      loc += mInsn.insn.getSize();
105    }
106
107    // Now make branch/switch instructions point at the right target instructions.
108    for (MInsn mInsn : mutatableCode.getInstructions()) {
109      if (mInsn instanceof MSwitchInsn) {
110        readSwitchInstruction((MSwitchInsn) mInsn, insnLocationMap);
111      } else if (mInsn instanceof MInsnWithData) {
112        ContainsTarget containsTarget = (ContainsTarget) mInsn.insn.info.format;
113        int targetLoc = mInsn.location + (int) containsTarget.getTarget(mInsn.insn);
114        ((MInsnWithData)mInsn).dataTarget = insnLocationMap.get(targetLoc);
115        if (((MInsnWithData)mInsn).dataTarget == null) {
116          Log.errorAndQuit("Bad offset calculation in data-target insn");
117        }
118      } else if (mInsn instanceof MBranchInsn) {
119        ContainsTarget containsTarget = (ContainsTarget) mInsn.insn.info.format;
120        int targetLoc = mInsn.location + (int) containsTarget.getTarget(mInsn.insn);
121        ((MBranchInsn)mInsn).target = insnLocationMap.get(targetLoc);
122        if (((MBranchInsn)mInsn).target == null) {
123          Log.errorAndQuit("Bad offset calculation in branch insn");
124        }
125      }
126    }
127
128    // Now create try blocks.
129    if (mutatableCode.triesSize > 0) {
130      readTryBlocks(codeItem, mutatableCode, insnLocationMap);
131    }
132
133    return mutatableCode;
134  }
135
136  /**
137   * Given a MutatableCode item that may have been mutated, update the original CodeItem
138   * correctly, to allow valid DEX to be written back to the output file.
139   */
140  public void mutatableCodeToCodeItem(CodeItem codeItem, MutatableCode mutatableCode) {
141    Log.debug("Translating MutatableCode " + mutatableCode.name
142        + " to CodeItem " + mutatableCode.codeItemIdx);
143
144    // We must first align any data instructions at the end of the code
145    // before we recalculate any offsets.
146    // This also updates their sizes...
147    alignDataInstructions(mutatableCode);
148
149    // Validate that the tracked locations for instructions are valid.
150    // Also mark locations as no longer being updated.
151    int loc = 0;
152    for (MInsn mInsn : mutatableCode.getInstructions()) {
153      if (mInsn.insn.justRaw) {
154        // All just_raw instructions need alignment!
155        if ((loc % 2) != 0) {
156          loc++;
157        }
158      }
159      if (mInsn.location != loc) {
160        Log.errorAndQuit(String.format("%s does not have expected location 0x%x",
161            mInsn, loc));
162      }
163      mInsn.locationUpdated = false;
164      loc += mInsn.insn.getSize();
165    }
166
167    // This new list will be attached to the CodeItem at the end...
168    List<Instruction> outputInsns = new LinkedList<Instruction>();
169
170    // Go through our new list of MInsns, adding them to the new
171    // list of instructions that will be attached to the CodeItem.
172    // Also recalculate offsets for branches.
173    for (MInsn mInsn : mutatableCode.getInstructions()) {
174      if (mInsn instanceof MSwitchInsn) {
175        updateSwitchInstruction((MSwitchInsn)mInsn, mutatableCode);
176      } else if (mInsn instanceof MInsnWithData) {
177        MInsn target = ((MInsnWithData) mInsn).dataTarget;
178        int dataOffset = target.location - mInsn.location;
179        ContainsTarget containsTarget = (ContainsTarget) mInsn.insn.info.format;
180        containsTarget.setTarget(mInsn.insn, dataOffset);
181      } else if (mInsn instanceof MBranchInsn) {
182        MInsn target = ((MBranchInsn) mInsn).target;
183        int branchOffset = target.location - mInsn.location;
184        ContainsTarget containsTarget = (ContainsTarget) mInsn.insn.info.format;
185        containsTarget.setTarget(mInsn.insn, branchOffset);
186      }
187      outputInsns.add(mInsn.insn);
188    }
189
190    // Calculate the new insns_size.
191    int newInsnsSize = 0;
192    for (Instruction insn : outputInsns) {
193      newInsnsSize += insn.getSize();
194    }
195
196    if (mutatableCode.triesSize > 0) {
197      updateTryBlocks(codeItem, mutatableCode);
198    }
199
200    codeItem.insnsSize = newInsnsSize;
201    codeItem.insns = outputInsns;
202    codeItem.registersSize = mutatableCode.registersSize;
203    codeItem.insSize = mutatableCode.insSize;
204    codeItem.outsSize = mutatableCode.outsSize;
205    codeItem.triesSize = mutatableCode.triesSize;
206  }
207
208  /**
209   * The TryItem specifies an offset into the EncodedCatchHandlerList for a given CodeItem,
210   * but we only have an array of the EncodedCatchHandlers that the List contains.
211   * This function produces a map that offers a way to find out the index into our array,
212   * from the try handler's offset.
213   */
214  private Map<Short,Integer> createTryHandlerOffsetToIndexMap(CodeItem codeItem) {
215    // Create a sorted set of offsets.
216    List<Short> uniqueOffsets = new ArrayList<Short>();
217    for (TryItem tryItem : codeItem.tries) {
218      int index = 0;
219      while (true) {
220        if ((index == uniqueOffsets.size())
221            || (uniqueOffsets.get(index) > tryItem.handlerOff)) {
222          // First condition means we're at the end of the set (or we're inserting
223          //   into an empty set)
224          // Second condition means that the offset belongs here
225          // ...so insert it here, pushing the element currently in this position to the
226          //    right, if it exists
227          uniqueOffsets.add(index, tryItem.handlerOff);
228          break;
229        } else if (uniqueOffsets.get(index) == tryItem.handlerOff) {
230          // We've already seen it, and we're making a set, not a list.
231          break;
232        } else {
233          // Keep searching.
234          index++;
235        }
236      }
237    }
238    // Now we have an (implicit) index -> offset mapping!
239
240    // Now create the reverse mapping.
241    Map<Short,Integer> offsetIndexMap = new HashMap<Short,Integer>();
242    for (int i = 0; i < uniqueOffsets.size(); i++) {
243      offsetIndexMap.put(uniqueOffsets.get(i), i);
244    }
245
246    return offsetIndexMap;
247  }
248
249  private void readTryBlocks(CodeItem codeItem, MutatableCode mutatableCode,
250      Map<Integer,MInsn> insnLocationMap) {
251    mutatableCode.mutatableTries = new LinkedList<MTryBlock>();
252
253    Map<Short,Integer> offsetIndexMap = createTryHandlerOffsetToIndexMap(codeItem);
254
255    // Read each TryItem into a MutatableTryBlock.
256    for (TryItem tryItem : codeItem.tries) {
257      MTryBlock mTryBlock = new MTryBlock();
258
259      // Get the MInsns that form the start and end of the try block.
260      int startLocation = tryItem.startAddr;
261      mTryBlock.startInsn = insnLocationMap.get(startLocation);
262      int endLocation = tryItem.startAddr + tryItem.insnCount;
263      mTryBlock.endInsn = insnLocationMap.get(endLocation);
264
265      // Sanity checks.
266      if (mTryBlock.startInsn == null) {
267        Log.errorAndQuit(String.format(
268            "Couldn't find a mutatable insn at start offset 0x%x",
269            startLocation));
270      }
271      if (mTryBlock.endInsn == null) {
272        Log.errorAndQuit(String.format(
273            "Couldn't find a mutatable insn at end offset 0x%x",
274            endLocation));
275      }
276
277      // Get the EncodedCatchHandler.
278      int handlerIdx = offsetIndexMap.get(tryItem.handlerOff);
279      EncodedCatchHandler encodedCatchHandler = codeItem.handlers.list[handlerIdx];
280
281      // Do we have a catch all? If so, associate the MInsn that's there.
282      if (encodedCatchHandler.size <= 0) {
283        mTryBlock.catchAllHandler =
284            insnLocationMap.get(encodedCatchHandler.catchAllAddr);
285        // Sanity check.
286        if (mTryBlock.catchAllHandler == null) {
287          Log.errorAndQuit(
288              String.format("Couldn't find a mutatable insn at catch-all offset 0x%x",
289                  encodedCatchHandler.catchAllAddr));
290        }
291      }
292      // Do we have explicitly-typed handlers? This will remain empty if not.
293      mTryBlock.handlers = new LinkedList<MInsn>();
294
295      // Associate all the explicitly-typed handlers.
296      for (int i = 0; i < Math.abs(encodedCatchHandler.size); i++) {
297        EncodedTypeAddrPair handler = encodedCatchHandler.handlers[i];
298        MInsn handlerInsn = insnLocationMap.get(handler.addr);
299        // Sanity check.
300        if (handlerInsn == null) {
301          Log.errorAndQuit(String.format(
302              "Couldn't find a mutatable instruction at handler offset 0x%x",
303              handler.addr));
304        }
305        mTryBlock.handlers.add(handlerInsn);
306      }
307
308      // Now finally add the new MutatableTryBlock into this MutatableCode's list!
309      mutatableCode.mutatableTries.add(mTryBlock);
310    }
311  }
312
313  private void updateTryBlocks(CodeItem codeItem, MutatableCode mutatableCode) {
314
315    // TODO: Support ability to add extra try blocks/handlers?
316
317    for (MTryBlock mTryBlock : mutatableCode.mutatableTries) {
318      if (mTryBlock.startInsn.location > mTryBlock.endInsn.location) {
319        // Mutation has put this try block's end insn before its start insn. Fix this.
320        MInsn tempInsn = mTryBlock.startInsn;
321        mTryBlock.startInsn = mTryBlock.endInsn;
322        mTryBlock.endInsn = tempInsn;
323      }
324    }
325
326    // First, manipulate the try blocks if they overlap.
327    for (int i = 0; i < mutatableCode.mutatableTries.size() - 1; i++) {
328      MTryBlock first = mutatableCode.mutatableTries.get(i);
329      MTryBlock second = mutatableCode.mutatableTries.get(i + 1);
330
331      // Do they overlap?
332      if (first.endInsn.location > second.startInsn.location) {
333
334        Log.debug("Found overlap in TryBlocks, moving 2nd TryBlock...");
335        Log.debug("1st TryBlock goes from " + first.startInsn + " to "  + first.endInsn);
336        Log.debug("2nd TryBlock goes from " + second.startInsn + " to "  + second.endInsn);
337
338        // Find the first instruction that comes after that does not overlap
339        // with the first try block.
340        MInsn newInsn = second.startInsn;
341        int ptr = mutatableCode.getInstructionIndex(newInsn);
342        while (first.endInsn.location > newInsn.location) {
343          ptr++;
344          newInsn = mutatableCode.getInstructionAt(ptr);
345        }
346        second.startInsn = newInsn;
347
348        Log.debug("Now 2nd TryBlock goes from " + second.startInsn + " to "  + second.endInsn);
349      }
350    }
351
352    Map<Short,Integer> offsetIndexMap = createTryHandlerOffsetToIndexMap(codeItem);
353
354    int tryItemIdx = 0;
355    for (MTryBlock mTryBlock : mutatableCode.mutatableTries) {
356      TryItem tryItem = codeItem.tries[tryItemIdx];
357
358      tryItem.startAddr = mTryBlock.startInsn.location;
359      tryItem.insnCount =
360          (short) (mTryBlock.endInsn.location - mTryBlock.startInsn.location);
361
362      // Get the EncodedCatchHandler.
363      EncodedCatchHandler encodedCatchHandler =
364          codeItem.handlers.list[offsetIndexMap.get(tryItem.handlerOff)];
365
366      if (encodedCatchHandler.size <= 0) {
367        encodedCatchHandler.catchAllAddr = mTryBlock.catchAllHandler.location;
368      }
369      for (int i = 0; i < Math.abs(encodedCatchHandler.size); i++) {
370        MInsn handlerInsn = mTryBlock.handlers.get(i);
371        EncodedTypeAddrPair handler = encodedCatchHandler.handlers[i];
372        handler.addr = handlerInsn.location;
373      }
374      tryItemIdx++;
375    }
376  }
377
378  /**
379   * Given a switch instruction, find the associated data's raw[] form, and update
380   * the targets of the switch instruction to point to the correct instructions.
381   */
382  private void readSwitchInstruction(MSwitchInsn switchInsn,
383      Map<Integer,MInsn> insnLocationMap) {
384    // Find the data.
385    ContainsTarget containsTarget = (ContainsTarget) switchInsn.insn.info.format;
386    int dataLocation = switchInsn.location + (int) containsTarget.getTarget(switchInsn.insn);
387    switchInsn.dataTarget = insnLocationMap.get(dataLocation);
388    if (switchInsn.dataTarget == null) {
389      Log.errorAndQuit("Bad offset calculation for data target in switch insn");
390    }
391
392    // Now read the data.
393    Instruction dataInsn = switchInsn.dataTarget.insn;
394
395    int rawPtr = 2;
396
397    int targetsSize = (int) RawInsnHelper.getUnsignedShortFromTwoBytes(dataInsn.rawBytes, rawPtr);
398    rawPtr += 2;
399
400    int[] keys = new int[targetsSize];
401    int[] targets = new int[targetsSize];
402
403    if (dataInsn.rawType == 1) {
404      switchInsn.packed = true;
405      // Dealing with a packed-switch.
406      // Read the first key.
407      keys[0] = (int) RawInsnHelper.getUnsignedIntFromFourBytes(dataInsn.rawBytes, rawPtr);
408      rawPtr += 4;
409      // Calculate the rest of the keys.
410      for (int i = 1; i < targetsSize; i++) {
411        keys[i] = keys[i - 1] + 1;
412      }
413    } else if (dataInsn.rawType == 2) {
414      // Dealing with a sparse-switch.
415      // Read all of the keys.
416      for (int i = 0; i < targetsSize; i++) {
417        keys[i] = (int) RawInsnHelper.getUnsignedIntFromFourBytes(dataInsn.rawBytes,
418            rawPtr);
419        rawPtr += 4;
420      }
421    }
422
423    // Now read the targets.
424    for (int i = 0; i < targetsSize; i++) {
425      targets[i] = (int) RawInsnHelper.getUnsignedIntFromFourBytes(dataInsn.rawBytes,
426          rawPtr);
427      rawPtr += 4;
428    }
429
430    // Store the keys.
431    switchInsn.keys = keys;
432
433    // Convert our targets[] offsets into pointers to MInsns.
434    for (int target : targets) {
435      int targetLocation = switchInsn.location + target;
436      MInsn targetInsn = insnLocationMap.get(targetLocation);
437      switchInsn.targets.add(targetInsn);
438      if (targetInsn == null) {
439        Log.errorAndQuit("Bad offset calculation for target in switch insn");
440      }
441    }
442  }
443
444  /**
445   * Given a mutatable switch instruction, which may have had some of its branch
446   * targets moved, update all the target offsets in the raw[] form of the instruction.
447   */
448  private void updateSwitchInstruction(MSwitchInsn switchInsn, MutatableCode mutatableCode) {
449    // Update the offset to the data instruction
450    MInsn dataTarget = switchInsn.dataTarget;
451    int dataOffset = dataTarget.location - switchInsn.location;
452    ContainsTarget containsTarget = (ContainsTarget) switchInsn.insn.info.format;
453    containsTarget.setTarget(switchInsn.insn, dataOffset);
454
455    int targetsSize = switchInsn.targets.size();
456
457    int[] keys = switchInsn.keys;
458    int[] targets = new int[targetsSize];
459
460    // Calculate the new offsets.
461    int targetIdx = 0;
462    for (MInsn target : switchInsn.targets) {
463      targets[targetIdx] = target.location - switchInsn.location;
464      targetIdx++;
465    }
466
467    // Now write the data back to the raw bytes.
468    Instruction dataInsn = switchInsn.dataTarget.insn;
469
470    int rawPtr = 2;
471
472    // Write out the size.
473    RawInsnHelper.writeUnsignedShortToTwoBytes(dataInsn.rawBytes, rawPtr, targetsSize);
474    rawPtr += 2;
475
476    // Write out the keys.
477    if (switchInsn.packed) {
478      // Only write out one key - the first.
479      RawInsnHelper.writeUnsignedIntToFourBytes(dataInsn.rawBytes, rawPtr, keys[0]);
480      rawPtr += 4;
481    } else {
482      // Write out all the keys.
483      for (int i = 0; i < targetsSize; i++) {
484        RawInsnHelper.writeUnsignedIntToFourBytes(dataInsn.rawBytes, rawPtr, keys[i]);
485        rawPtr += 4;
486      }
487    }
488
489    // Write out all the targets.
490    for (int i = 0; i < targetsSize; i++) {
491      RawInsnHelper.writeUnsignedIntToFourBytes(dataInsn.rawBytes, rawPtr, targets[i]);
492      rawPtr += 4;
493    }
494  }
495
496  /**
497   * After mutation, data instructions may no longer be 4-byte aligned.
498   * If this is the case, insert nops to align them all.
499   * This makes a number of assumptions about data currently:
500   * - data is always at the end of method insns
501   * - all data instructions are stored contiguously
502   */
503  private void alignDataInstructions(MutatableCode mutatableCode) {
504    // Find all the switch data instructions.
505    List<MInsn> dataInsns = new ArrayList<MInsn>();
506
507    // Update raw sizes of the data instructions as well.
508    for (MInsn mInsn : mutatableCode.getInstructions()) {
509      if (mInsn instanceof MSwitchInsn) {
510        // Update the raw size of the instruction.
511        MSwitchInsn switchInsn = (MSwitchInsn) mInsn;
512        int targetsSize = switchInsn.targets.size();
513        Instruction dataInsn = switchInsn.dataTarget.insn;
514        if (switchInsn.packed) {
515          dataInsn.rawSize = (targetsSize * 2) + 4;
516        } else {
517          dataInsn.rawSize = (targetsSize * 4) + 2;
518        }
519        dataInsns.add(switchInsn.dataTarget);
520      } else if (mInsn instanceof MInsnWithData) {
521        MInsnWithData insnWithData =
522            (MInsnWithData) mInsn;
523        dataInsns.add(insnWithData.dataTarget);
524      }
525    }
526
527    // Only need to align switch data instructions if there are any!
528    if (!dataInsns.isEmpty()) {
529
530      Log.debug("Found data instructions, checking alignment...");
531
532      // Sort data_insns by location.
533      Collections.sort(dataInsns, new Comparator<MInsn>() {
534        @Override
535        public int compare(MInsn first, MInsn second) {
536          if (first.location < second.location) {
537            return -1;
538          } else if (first.location > second.location) {
539            return 1;
540          }
541          return 0;
542        }
543      });
544
545      boolean performedAlignment = false;
546
547      // Go through all the data insns, and insert an alignment nop if they're unaligned.
548      for (MInsn dataInsn : dataInsns) {
549        if (dataInsn.location % 2 != 0) {
550          Log.debug("Aligning data instruction with a nop.");
551          int alignmentNopIdx = mutatableCode.getInstructionIndex(dataInsn);
552          MInsn nop = new MInsn();
553          nop.insn = new Instruction();
554          nop.insn.info = Instruction.getOpcodeInfo(Opcode.NOP);
555          mutatableCode.insertInstructionAt(nop, alignmentNopIdx);
556          performedAlignment = true;
557        }
558      }
559
560      if (!performedAlignment) {
561        Log.debug("Alignment okay.");
562      }
563    }
564  }
565
566  /**
567   * Determine if a particular instruction is a branch instruction, based on opcode.
568   */
569  private boolean isInstructionBranch(Instruction insn) {
570    Opcode opcode = insn.info.opcode;
571    if (Opcode.isBetween(opcode, Opcode.IF_EQ, Opcode.IF_LEZ)
572        || Opcode.isBetween(opcode, Opcode.GOTO, Opcode.GOTO_32)) {
573      return true;
574    }
575    return false;
576  }
577
578  /**
579   * Determine if a particular instruction is a switch instruction, based on opcode.
580   */
581  private boolean isInstructionSwitch(Instruction insn) {
582    Opcode opcode = insn.info.opcode;
583    if (Opcode.isBetween(opcode, Opcode.PACKED_SWITCH, Opcode.SPARSE_SWITCH)) {
584      return true;
585    }
586    return false;
587  }
588
589  private boolean isInstructionFillArrayData(Instruction insn) {
590    return (insn.info.opcode == Opcode.FILL_ARRAY_DATA);
591  }
592}
593