1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16#include <sys/mman.h>
17#include "Dalvik.h"
18#include "libdex/DexOpcodes.h"
19#include "compiler/Compiler.h"
20#include "compiler/CompilerIR.h"
21#include "interp/Jit.h"
22#include "libdex/DexFile.h"
23#include "Lower.h"
24#include "NcgAot.h"
25#include "compiler/codegen/CompilerCodegen.h"
26
27/* Init values when a predicted chain is initially assembled */
28/* E7FE is branch to self */
29#define PREDICTED_CHAIN_BX_PAIR_INIT     0xe7fe
30
31/* Target-specific save/restore */
32extern "C" void dvmJitCalleeSave(double *saveArea);
33extern "C" void dvmJitCalleeRestore(double *saveArea);
34
35/*
36 * Determine the initial instruction set to be used for this trace.
37 * Later components may decide to change this.
38 */
39//JitInstructionSetType dvmCompilerInstructionSet(CompilationUnit *cUnit)
40JitInstructionSetType dvmCompilerInstructionSet(void)
41{
42    return DALVIK_JIT_IA32;
43}
44
45JitInstructionSetType dvmCompilerGetInterpretTemplateSet()
46{
47    return DALVIK_JIT_IA32;
48}
49
50/* we don't use template for IA32 */
51void *dvmCompilerGetInterpretTemplate()
52{
53      return NULL;
54}
55
56/* Track the number of times that the code cache is patched */
57#if defined(WITH_JIT_TUNING)
58#define UPDATE_CODE_CACHE_PATCHES()    (gDvmJit.codeCachePatches++)
59#else
60#define UPDATE_CODE_CACHE_PATCHES()
61#endif
62
63bool dvmCompilerArchInit() {
64    /* Target-specific configuration */
65    gDvmJit.jitTableSize = 1 << 12;
66    gDvmJit.jitTableMask = gDvmJit.jitTableSize - 1;
67    if (gDvmJit.threshold == 0) {
68        gDvmJit.threshold = 255;
69    }
70    gDvmJit.codeCacheSize = 512*1024;
71    gDvmJit.optLevel = kJitOptLevelO1;
72
73    //Disable Method-JIT
74    gDvmJit.disableOpt |= (1 << kMethodJit);
75
76#if defined(WITH_SELF_VERIFICATION)
77    /* Force into blocking mode */
78    gDvmJit.blockingMode = true;
79    gDvm.nativeDebuggerActive = true;
80#endif
81
82    // Make sure all threads have current values
83    dvmJitUpdateThreadStateAll();
84
85    return true;
86}
87
88void dvmCompilerPatchInlineCache(void)
89{
90    int i;
91    PredictedChainingCell *minAddr, *maxAddr;
92
93    /* Nothing to be done */
94    if (gDvmJit.compilerICPatchIndex == 0) return;
95
96    /*
97     * Since all threads are already stopped we don't really need to acquire
98     * the lock. But race condition can be easily introduced in the future w/o
99     * paying attention so we still acquire the lock here.
100     */
101    dvmLockMutex(&gDvmJit.compilerICPatchLock);
102
103    UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
104
105    //ALOGD("Number of IC patch work orders: %d", gDvmJit.compilerICPatchIndex);
106
107    /* Initialize the min/max address range */
108    minAddr = (PredictedChainingCell *)
109        ((char *) gDvmJit.codeCache + gDvmJit.codeCacheSize);
110    maxAddr = (PredictedChainingCell *) gDvmJit.codeCache;
111
112    for (i = 0; i < gDvmJit.compilerICPatchIndex; i++) {
113        ICPatchWorkOrder *workOrder = &gDvmJit.compilerICPatchQueue[i];
114        PredictedChainingCell *cellAddr = workOrder->cellAddr;
115        PredictedChainingCell *cellContent = &workOrder->cellContent;
116        ClassObject *clazz = dvmFindClassNoInit(workOrder->classDescriptor,
117                                                workOrder->classLoader);
118
119        assert(clazz->serialNumber == workOrder->serialNumber);
120
121        /* Use the newly resolved clazz pointer */
122        cellContent->clazz = clazz;
123
124        if (cellAddr->clazz == NULL) {
125            COMPILER_TRACE_CHAINING(
126                ALOGI("Jit Runtime: predicted chain %p to %s (%s) initialized",
127                      cellAddr,
128                      cellContent->clazz->descriptor,
129                      cellContent->method->name));
130        } else {
131            COMPILER_TRACE_CHAINING(
132                ALOGI("Jit Runtime: predicted chain %p from %s to %s (%s) "
133                      "patched",
134                      cellAddr,
135                      cellAddr->clazz->descriptor,
136                      cellContent->clazz->descriptor,
137                      cellContent->method->name));
138        }
139
140        /* Patch the chaining cell */
141        *cellAddr = *cellContent;
142        minAddr = (cellAddr < minAddr) ? cellAddr : minAddr;
143        maxAddr = (cellAddr > maxAddr) ? cellAddr : maxAddr;
144    }
145
146    PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
147
148    gDvmJit.compilerICPatchIndex = 0;
149    dvmUnlockMutex(&gDvmJit.compilerICPatchLock);
150}
151
152/* Target-specific cache clearing */
153void dvmCompilerCacheClear(char *start, size_t size)
154{
155    /* "0xFF 0xFF" is an invalid opcode for x86. */
156    memset(start, 0xFF, size);
157}
158
159/* for JIT debugging, to be implemented */
160void dvmJitCalleeSave(double *saveArea) {
161}
162
163void dvmJitCalleeRestore(double *saveArea) {
164}
165
166void dvmJitToInterpSingleStep() {
167}
168
169JitTraceDescription *dvmCopyTraceDescriptor(const u2 *pc,
170                                            const JitEntry *knownEntry) {
171    return NULL;
172}
173
174void dvmCompilerCodegenDump(CompilationUnit *cUnit) //in ArchUtility.c
175{
176}
177
178void dvmCompilerArchDump(void)
179{
180}
181
182char *getTraceBase(const JitEntry *p)
183{
184    return NULL;
185}
186
187void dvmCompilerAssembleLIR(CompilationUnit *cUnit, JitTranslationInfo* info)
188{
189}
190
191void dvmJitInstallClassObjectPointers(CompilationUnit *cUnit, char *codeAddress)
192{
193}
194
195void dvmCompilerMethodMIR2LIR(CompilationUnit *cUnit)
196{
197    // Method-based JIT not supported for x86.
198}
199
200void dvmJitScanAllClassPointers(void (*callback)(void *))
201{
202}
203
204/* Handy function to retrieve the profile count */
205static inline int getProfileCount(const JitEntry *entry)
206{
207    if (entry->dPC == 0 || entry->codeAddress == 0)
208        return 0;
209    u4 *pExecutionCount = (u4 *) getTraceBase(entry);
210
211    return pExecutionCount ? *pExecutionCount : 0;
212}
213
214/* qsort callback function */
215static int sortTraceProfileCount(const void *entry1, const void *entry2)
216{
217    const JitEntry *jitEntry1 = (const JitEntry *)entry1;
218    const JitEntry *jitEntry2 = (const JitEntry *)entry2;
219
220    JitTraceCounter_t count1 = getProfileCount(jitEntry1);
221    JitTraceCounter_t count2 = getProfileCount(jitEntry2);
222    return (count1 == count2) ? 0 : ((count1 > count2) ? -1 : 1);
223}
224
225/* Sort the trace profile counts and dump them */
226void dvmCompilerSortAndPrintTraceProfiles() //in Assemble.c
227{
228    JitEntry *sortedEntries;
229    int numTraces = 0;
230    unsigned long counts = 0;
231    unsigned int i;
232
233    /* Make sure that the table is not changing */
234    dvmLockMutex(&gDvmJit.tableLock);
235
236    /* Sort the entries by descending order */
237    sortedEntries = (JitEntry *)malloc(sizeof(JitEntry) * gDvmJit.jitTableSize);
238    if (sortedEntries == NULL)
239        goto done;
240    memcpy(sortedEntries, gDvmJit.pJitEntryTable,
241           sizeof(JitEntry) * gDvmJit.jitTableSize);
242    qsort(sortedEntries, gDvmJit.jitTableSize, sizeof(JitEntry),
243          sortTraceProfileCount);
244
245    /* Dump the sorted entries */
246    for (i=0; i < gDvmJit.jitTableSize; i++) {
247        if (sortedEntries[i].dPC != 0) {
248            numTraces++;
249        }
250    }
251    if (numTraces == 0)
252        numTraces = 1;
253    ALOGI("JIT: Average execution count -> %d",(int)(counts / numTraces));
254
255    free(sortedEntries);
256done:
257    dvmUnlockMutex(&gDvmJit.tableLock);
258    return;
259}
260
261void jumpWithRelOffset(char* instAddr, int relOffset) {
262    stream = instAddr;
263    OpndSize immSize = estOpndSizeFromImm(relOffset);
264    relOffset -= getJmpCallInstSize(immSize, JmpCall_uncond);
265    dump_imm(Mnemonic_JMP, immSize, relOffset);
266}
267
268// works whether instructions for target basic block are generated or not
269LowOp* jumpToBasicBlock(char* instAddr, int targetId) {
270    stream = instAddr;
271    bool unknown;
272    OpndSize size;
273    int relativeNCG = targetId;
274    relativeNCG = getRelativeNCG(targetId, JmpCall_uncond, &unknown, &size);
275    unconditional_jump_int(relativeNCG, size);
276    return NULL;
277}
278
279LowOp* condJumpToBasicBlock(char* instAddr, ConditionCode cc, int targetId) {
280    stream = instAddr;
281    bool unknown;
282    OpndSize size;
283    int relativeNCG = targetId;
284    relativeNCG = getRelativeNCG(targetId, JmpCall_cond, &unknown, &size);
285    conditional_jump_int(cc, relativeNCG, size);
286    return NULL;
287}
288
289/*
290 * Attempt to enqueue a work order to patch an inline cache for a predicted
291 * chaining cell for virtual/interface calls.
292 */
293static bool inlineCachePatchEnqueue(PredictedChainingCell *cellAddr,
294                                    PredictedChainingCell *newContent)
295{
296    bool result = true;
297
298    /*
299     * Make sure only one thread gets here since updating the cell (ie fast
300     * path and queueing the request (ie the queued path) have to be done
301     * in an atomic fashion.
302     */
303    dvmLockMutex(&gDvmJit.compilerICPatchLock);
304
305    /* Fast path for uninitialized chaining cell */
306    if (cellAddr->clazz == NULL &&
307        cellAddr->branch == PREDICTED_CHAIN_BX_PAIR_INIT) {
308        UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
309
310        cellAddr->method = newContent->method;
311        cellAddr->branch = newContent->branch;
312        cellAddr->branch2 = newContent->branch2;
313
314        /*
315         * The update order matters - make sure clazz is updated last since it
316         * will bring the uninitialized chaining cell to life.
317         */
318        android_atomic_release_store((int32_t)newContent->clazz,
319            (volatile int32_t *)(void*) &cellAddr->clazz);
320        //cacheflush((intptr_t) cellAddr, (intptr_t) (cellAddr+1), 0);
321        UPDATE_CODE_CACHE_PATCHES();
322
323        PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
324
325#if 0
326        MEM_BARRIER();
327        cellAddr->clazz = newContent->clazz;
328        //cacheflush((intptr_t) cellAddr, (intptr_t) (cellAddr+1), 0);
329#endif
330#if defined(WITH_JIT_TUNING)
331        gDvmJit.icPatchInit++;
332#endif
333        COMPILER_TRACE_CHAINING(
334            ALOGI("Jit Runtime: FAST predicted chain %p to method %s%s %p",
335                  cellAddr, newContent->clazz->descriptor, newContent->method->name, newContent->method));
336    /* Check if this is a frequently missed clazz */
337    } else if (cellAddr->stagedClazz != newContent->clazz) {
338        /* Not proven to be frequent yet - build up the filter cache */
339        UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
340
341        cellAddr->stagedClazz = newContent->clazz;
342
343        UPDATE_CODE_CACHE_PATCHES();
344        PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
345
346#if defined(WITH_JIT_TUNING)
347        gDvmJit.icPatchRejected++;
348#endif
349    /*
350     * Different classes but same method implementation - it is safe to just
351     * patch the class value without the need to stop the world.
352     */
353    } else if (cellAddr->method == newContent->method) {
354        UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
355
356        cellAddr->clazz = newContent->clazz;
357        /* No need to flush the cache here since the branch is not patched */
358        UPDATE_CODE_CACHE_PATCHES();
359
360        PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
361
362#if defined(WITH_JIT_TUNING)
363        gDvmJit.icPatchLockFree++;
364#endif
365    /*
366     * Cannot patch the chaining cell inline - queue it until the next safe
367     * point.
368     */
369    } else if (gDvmJit.compilerICPatchIndex < COMPILER_IC_PATCH_QUEUE_SIZE)  {
370        int index = gDvmJit.compilerICPatchIndex++;
371        const ClassObject *clazz = newContent->clazz;
372
373        gDvmJit.compilerICPatchQueue[index].cellAddr = cellAddr;
374        gDvmJit.compilerICPatchQueue[index].cellContent = *newContent;
375        gDvmJit.compilerICPatchQueue[index].classDescriptor = clazz->descriptor;
376        gDvmJit.compilerICPatchQueue[index].classLoader = clazz->classLoader;
377        /* For verification purpose only */
378        gDvmJit.compilerICPatchQueue[index].serialNumber = clazz->serialNumber;
379
380#if defined(WITH_JIT_TUNING)
381        gDvmJit.icPatchQueued++;
382#endif
383        COMPILER_TRACE_CHAINING(
384            ALOGI("Jit Runtime: QUEUE predicted chain %p to method %s%s",
385                  cellAddr, newContent->clazz->descriptor, newContent->method->name));
386    } else {
387    /* Queue is full - just drop this patch request */
388#if defined(WITH_JIT_TUNING)
389        gDvmJit.icPatchDropped++;
390#endif
391
392        COMPILER_TRACE_CHAINING(
393            ALOGI("Jit Runtime: DROP predicted chain %p to method %s%s",
394                  cellAddr, newContent->clazz->descriptor, newContent->method->name));
395    }
396
397    dvmUnlockMutex(&gDvmJit.compilerICPatchLock);
398    return result;
399}
400
401/*
402 * This method is called from the invoke templates for virtual and interface
403 * methods to speculatively setup a chain to the callee. The templates are
404 * written in assembly and have setup method, cell, and clazz at r0, r2, and
405 * r3 respectively, so there is a unused argument in the list. Upon return one
406 * of the following three results may happen:
407 *   1) Chain is not setup because the callee is native. Reset the rechain
408 *      count to a big number so that it will take a long time before the next
409 *      rechain attempt to happen.
410 *   2) Chain is not setup because the callee has not been created yet. Reset
411 *      the rechain count to a small number and retry in the near future.
412 *   3) Ask all other threads to stop before patching this chaining cell.
413 *      This is required because another thread may have passed the class check
414 *      but hasn't reached the chaining cell yet to follow the chain. If we
415 *      patch the content before halting the other thread, there could be a
416 *      small window for race conditions to happen that it may follow the new
417 *      but wrong chain to invoke a different method.
418 */
419const Method *dvmJitToPatchPredictedChain(const Method *method,
420                                          Thread *self,
421                                          PredictedChainingCell *cell,
422                                          const ClassObject *clazz)
423{
424    int newRechainCount = PREDICTED_CHAIN_COUNTER_RECHAIN;
425    /* Don't come back here for a long time if the method is native */
426    if (dvmIsNativeMethod(method)) {
427        UNPROTECT_CODE_CACHE(cell, sizeof(*cell));
428
429        /*
430         * Put a non-zero/bogus value in the clazz field so that it won't
431         * trigger immediate patching and will continue to fail to match with
432         * a real clazz pointer.
433         */
434        cell->clazz = (ClassObject *) PREDICTED_CHAIN_FAKE_CLAZZ;
435
436        UPDATE_CODE_CACHE_PATCHES();
437        PROTECT_CODE_CACHE(cell, sizeof(*cell));
438        COMPILER_TRACE_CHAINING(
439            ALOGI("Jit Runtime: predicted chain %p to native method %s ignored",
440                  cell, method->name));
441        goto done;
442    }
443    {
444    int tgtAddr = (int) dvmJitGetTraceAddr(method->insns);
445
446    /*
447     * Compilation not made yet for the callee. Reset the counter to a small
448     * value and come back to check soon.
449     */
450    if ((tgtAddr == 0) ||
451        ((void*)tgtAddr == dvmCompilerGetInterpretTemplate())) {
452        COMPILER_TRACE_CHAINING(
453            ALOGI("Jit Runtime: predicted chain %p to method %s%s delayed",
454                  cell, method->clazz->descriptor, method->name));
455        goto done;
456    }
457
458    PredictedChainingCell newCell;
459
460    if (cell->clazz == NULL) {
461        newRechainCount = self->icRechainCount;
462    }
463
464    int relOffset = (int) tgtAddr - (int)cell;
465    OpndSize immSize = estOpndSizeFromImm(relOffset);
466    int jumpSize = getJmpCallInstSize(immSize, JmpCall_uncond);
467    relOffset -= jumpSize;
468    COMPILER_TRACE_CHAINING(
469            ALOGI("inlineCachePatchEnqueue chain %p to method %s%s inst size %d",
470                  cell, method->clazz->descriptor, method->name, jumpSize));
471    //can't use stream here since it is used by the compilation thread
472    dump_imm_with_codeaddr(Mnemonic_JMP, immSize, relOffset, (char*) (&newCell)); //update newCell.branch
473
474    newCell.clazz = clazz;
475    newCell.method = method;
476
477    /*
478     * Enter the work order to the queue and the chaining cell will be patched
479     * the next time a safe point is entered.
480     *
481     * If the enqueuing fails reset the rechain count to a normal value so that
482     * it won't get indefinitely delayed.
483     */
484    inlineCachePatchEnqueue(cell, &newCell);
485    }
486done:
487    self->icRechainCount = newRechainCount;
488    return method;
489}
490
491/*
492 * Unchain a trace given the starting address of the translation
493 * in the code cache.  Refer to the diagram in dvmCompilerAssembleLIR.
494 * For ARM, it returns the address following the last cell unchained.
495 * For IA, it returns NULL since cacheflush is not required for IA.
496 */
497u4* dvmJitUnchain(void* codeAddr)
498{
499    /* codeAddr is 4-byte aligned, so is chain cell count offset */
500    u2* pChainCellCountOffset = (u2*)((char*)codeAddr - 4);
501    u2 chainCellCountOffset = *pChainCellCountOffset;
502    /* chain cell counts information is 4-byte aligned */
503    ChainCellCounts *pChainCellCounts =
504          (ChainCellCounts*)((char*)codeAddr + chainCellCountOffset);
505    u2* pChainCellOffset = (u2*)((char*)codeAddr - 2);
506    u2 chainCellOffset = *pChainCellOffset;
507    u1* pChainCells;
508    int i,j;
509    PredictedChainingCell *predChainCell;
510    int padding;
511
512    /* Locate the beginning of the chain cell region */
513    pChainCells = (u1 *)((char*)codeAddr + chainCellOffset);
514
515    /* The cells are sorted in order - walk through them and reset */
516    for (i = 0; i < kChainingCellGap; i++) {
517        /* for hot, normal, singleton chaining:
518               nop  //padding.
519               jmp 0
520               mov imm32, reg1
521               mov imm32, reg2
522               call reg2
523           after chaining:
524               nop
525               jmp imm
526               mov imm32, reg1
527               mov imm32, reg2
528               call reg2
529           after unchaining:
530               nop
531               jmp 0
532               mov imm32, reg1
533               mov imm32, reg2
534               call reg2
535           Space occupied by the chaining cell in bytes: nop is for padding,
536                jump 0, the target 0 is 4 bytes aligned.
537           Space for predicted chaining: 5 words = 20 bytes
538        */
539        int elemSize = 0;
540        if (i == kChainingCellInvokePredicted) {
541            elemSize = 20;
542        }
543        COMPILER_TRACE_CHAINING(
544            ALOGI("Jit Runtime: unchaining type %d count %d", i, pChainCellCounts->u.count[i]));
545
546        for (j = 0; j < pChainCellCounts->u.count[i]; j++) {
547            switch(i) {
548                case kChainingCellNormal:
549                case kChainingCellHot:
550                case kChainingCellInvokeSingleton:
551                case kChainingCellBackwardBranch:
552                    COMPILER_TRACE_CHAINING(
553                        ALOGI("Jit Runtime: unchaining of normal, hot, or singleton"));
554                    pChainCells = (u1*) (((uint)pChainCells + 4)&(~0x03));
555                    elemSize = 4+5+5+2;
556                    memset(pChainCells, 0, 4);
557                    break;
558                case kChainingCellInvokePredicted:
559                    COMPILER_TRACE_CHAINING(
560                        ALOGI("Jit Runtime: unchaining of predicted"));
561                    /* 4-byte aligned */
562                    padding = (4 - ((u4)pChainCells & 3)) & 3;
563                    pChainCells += padding;
564                    predChainCell = (PredictedChainingCell *) pChainCells;
565                    /*
566                     * There could be a race on another mutator thread to use
567                     * this particular predicted cell and the check has passed
568                     * the clazz comparison. So we cannot safely wipe the
569                     * method and branch but it is safe to clear the clazz,
570                     * which serves as the key.
571                     */
572                    predChainCell->clazz = PREDICTED_CHAIN_CLAZZ_INIT;
573                    break;
574                default:
575                    ALOGE("Unexpected chaining type: %d", i);
576                    dvmAbort();  // dvmAbort OK here - can't safely recover
577            }
578            COMPILER_TRACE_CHAINING(
579                ALOGI("Jit Runtime: unchaining 0x%x", (int)pChainCells));
580            pChainCells += elemSize;  /* Advance by a fixed number of bytes */
581        }
582    }
583    return NULL;
584}
585
586/* Unchain all translation in the cache. */
587void dvmJitUnchainAll()
588{
589    ALOGV("Jit Runtime: unchaining all");
590    if (gDvmJit.pJitEntryTable != NULL) {
591        COMPILER_TRACE_CHAINING(ALOGI("Jit Runtime: unchaining all"));
592        dvmLockMutex(&gDvmJit.tableLock);
593
594        UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
595
596        for (size_t i = 0; i < gDvmJit.jitTableSize; i++) {
597            if (gDvmJit.pJitEntryTable[i].dPC &&
598                !gDvmJit.pJitEntryTable[i].u.info.isMethodEntry &&
599                gDvmJit.pJitEntryTable[i].codeAddress) {
600                      dvmJitUnchain(gDvmJit.pJitEntryTable[i].codeAddress);
601            }
602        }
603
604        PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
605
606        dvmUnlockMutex(&gDvmJit.tableLock);
607        gDvmJit.translationChains = 0;
608    }
609    gDvmJit.hasNewChain = false;
610}
611
612#define P_GPR_1 PhysicalReg_EBX
613/* Add an additional jump instruction, keep jump target 4 bytes aligned.*/
614static void insertJumpHelp()
615{
616    int rem = (uint)stream % 4;
617    int nop_size = 3 - rem;
618    dump_nop(nop_size);
619    unconditional_jump_int(0, OpndSize_32);
620    return;
621}
622
623/* Chaining cell for code that may need warmup. */
624/* ARM assembly: ldr r0, [r6, #76] (why a single instruction to access member of glue structure?)
625                 blx r0
626                 data 0xb23a //bytecode address: 0x5115b23a
627                 data 0x5115
628   IA32 assembly:
629                  jmp  0 //5 bytes
630                  movl address, %ebx
631                  movl dvmJitToInterpNormal, %eax
632                  call %eax
633                  <-- return address
634*/
635static void handleNormalChainingCell(CompilationUnit *cUnit,
636                                     unsigned int offset, int blockId, LowOpBlockLabel* labelList)
637{
638    ALOGV("in handleNormalChainingCell for method %s block %d BC offset %x NCG offset %x",
639          cUnit->method->name, blockId, offset, stream - streamMethodStart);
640    if(dump_x86_inst)
641        ALOGI("LOWER NormalChainingCell at offsetPC %x offsetNCG %x @%p",
642              offset, stream - streamMethodStart, stream);
643    /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
644     * reslove the multithreading issue.
645     */
646    insertJumpHelp();
647    move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true);
648    scratchRegs[0] = PhysicalReg_EAX;
649    call_dvmJitToInterpNormal();
650    //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */
651}
652
653/*
654 * Chaining cell for instructions that immediately following already translated
655 * code.
656 */
657static void handleHotChainingCell(CompilationUnit *cUnit,
658                                  unsigned int offset, int blockId, LowOpBlockLabel* labelList)
659{
660    ALOGV("in handleHotChainingCell for method %s block %d BC offset %x NCG offset %x",
661          cUnit->method->name, blockId, offset, stream - streamMethodStart);
662    if(dump_x86_inst)
663        ALOGI("LOWER HotChainingCell at offsetPC %x offsetNCG %x @%p",
664              offset, stream - streamMethodStart, stream);
665    /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
666     * reslove the multithreading issue.
667     */
668    insertJumpHelp();
669    move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true);
670    scratchRegs[0] = PhysicalReg_EAX;
671    call_dvmJitToInterpTraceSelect();
672    //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */
673}
674
675/* Chaining cell for branches that branch back into the same basic block */
676static void handleBackwardBranchChainingCell(CompilationUnit *cUnit,
677                                     unsigned int offset, int blockId, LowOpBlockLabel* labelList)
678{
679    ALOGV("in handleBackwardBranchChainingCell for method %s block %d BC offset %x NCG offset %x",
680          cUnit->method->name, blockId, offset, stream - streamMethodStart);
681    if(dump_x86_inst)
682        ALOGI("LOWER BackwardBranchChainingCell at offsetPC %x offsetNCG %x @%p",
683              offset, stream - streamMethodStart, stream);
684    /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
685     * reslove the multithreading issue.
686     */
687    insertJumpHelp();
688    move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true);
689    scratchRegs[0] = PhysicalReg_EAX;
690    call_dvmJitToInterpNormal();
691    //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */
692}
693
694/* Chaining cell for monomorphic method invocations. */
695static void handleInvokeSingletonChainingCell(CompilationUnit *cUnit,
696                                              const Method *callee, int blockId, LowOpBlockLabel* labelList)
697{
698    ALOGV("in handleInvokeSingletonChainingCell for method %s block %d callee %s NCG offset %x",
699          cUnit->method->name, blockId, callee->name, stream - streamMethodStart);
700    if(dump_x86_inst)
701        ALOGI("LOWER InvokeSingletonChainingCell at block %d offsetNCG %x @%p",
702              blockId, stream - streamMethodStart, stream);
703    /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
704     * reslove the multithreading issue.
705     */
706    insertJumpHelp();
707    move_imm_to_reg(OpndSize_32, (int) (callee->insns), P_GPR_1, true);
708    scratchRegs[0] = PhysicalReg_EAX;
709    call_dvmJitToInterpTraceSelect();
710    //move_imm_to_reg(OpndSize_32, (int) (callee->insns), P_GPR_1, true); /* used when unchaining */
711}
712#undef P_GPR_1
713
714/* Chaining cell for monomorphic method invocations. */
715static void handleInvokePredictedChainingCell(CompilationUnit *cUnit, int blockId)
716{
717    if(dump_x86_inst)
718        ALOGI("LOWER InvokePredictedChainingCell at block %d offsetNCG %x @%p",
719              blockId, stream - streamMethodStart, stream);
720#ifndef PREDICTED_CHAINING
721    //assume rPC for callee->insns in %ebx
722    scratchRegs[0] = PhysicalReg_EAX;
723#if defined(WITH_JIT_TUNING)
724    /* Predicted chaining is not enabled. Fall back to interpreter and
725     * indicate that predicted chaining was not done.
726     */
727    move_imm_to_reg(OpndSize_32, kInlineCacheMiss, PhysicalReg_EDX, true);
728#endif
729    call_dvmJitToInterpTraceSelectNoChain();
730#else
731    /* make sure section for predicited chaining cell is 4-byte aligned */
732    //int padding = (4 - ((u4)stream & 3)) & 3;
733    //stream += padding;
734    int* streamData = (int*)stream;
735    /* Should not be executed in the initial state */
736    streamData[0] = PREDICTED_CHAIN_BX_PAIR_INIT;
737    streamData[1] = 0;
738    /* To be filled: class */
739    streamData[2] = PREDICTED_CHAIN_CLAZZ_INIT;
740    /* To be filled: method */
741    streamData[3] = PREDICTED_CHAIN_METHOD_INIT;
742    /*
743     * Rechain count. The initial value of 0 here will trigger chaining upon
744     * the first invocation of this callsite.
745     */
746    streamData[4] = PREDICTED_CHAIN_COUNTER_INIT;
747#if 0
748    ALOGI("--- DATA @ %p: %x %x %x %x", stream, *((int*)stream), *((int*)(stream+4)),
749          *((int*)(stream+8)), *((int*)(stream+12)));
750#endif
751    stream += 20; //5 *4
752#endif
753}
754
755/* Load the Dalvik PC into r0 and jump to the specified target */
756static void handlePCReconstruction(CompilationUnit *cUnit,
757                                   LowOpBlockLabel *targetLabel)
758{
759#if 0
760    LowOp **pcrLabel =
761        (LowOp **) cUnit->pcReconstructionList.elemList;
762    int numElems = cUnit->pcReconstructionList.numUsed;
763    int i;
764    for (i = 0; i < numElems; i++) {
765        dvmCompilerAppendLIR(cUnit, (LIR *) pcrLabel[i]);
766        /* r0 = dalvik PC */
767        loadConstant(cUnit, r0, pcrLabel[i]->operands[0]);
768        genUnconditionalBranch(cUnit, targetLabel);
769    }
770#endif
771}
772
773//use O0 code generator for hoisted checks outside of the loop
774/*
775 * vA = arrayReg;
776 * vB = idxReg;
777 * vC = endConditionReg;
778 * arg[0] = maxC
779 * arg[1] = minC
780 * arg[2] = loopBranchConditionCode
781 */
782#define P_GPR_1 PhysicalReg_EBX
783#define P_GPR_2 PhysicalReg_ECX
784static void genHoistedChecksForCountUpLoop(CompilationUnit *cUnit, MIR *mir)
785{
786    /*
787     * NOTE: these synthesized blocks don't have ssa names assigned
788     * for Dalvik registers.  However, because they dominate the following
789     * blocks we can simply use the Dalvik name w/ subscript 0 as the
790     * ssa name.
791     */
792    DecodedInstruction *dInsn = &mir->dalvikInsn;
793    const int maxC = dInsn->arg[0];
794
795    /* assign array in virtual register to P_GPR_1 */
796    get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true);
797    /* assign index in virtual register to P_GPR_2 */
798    get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, P_GPR_2, true);
799    export_pc();
800    compare_imm_reg(OpndSize_32, 0, P_GPR_1, true);
801    condJumpToBasicBlock(stream, Condition_E, cUnit->exceptionBlockId);
802    int delta = maxC;
803    /*
804     * If the loop end condition is ">=" instead of ">", then the largest value
805     * of the index is "endCondition - 1".
806     */
807    if (dInsn->arg[2] == OP_IF_GE) {
808        delta--;
809    }
810
811    if (delta < 0) { //+delta
812        //if P_GPR_2 is mapped to a VR, we can't do this
813        alu_binary_imm_reg(OpndSize_32, sub_opc, -delta, P_GPR_2, true);
814    } else if(delta > 0) {
815        alu_binary_imm_reg(OpndSize_32, add_opc, delta, P_GPR_2, true);
816    }
817    compare_mem_reg(OpndSize_32, offArrayObject_length, P_GPR_1, true, P_GPR_2, true);
818    condJumpToBasicBlock(stream, Condition_NC, cUnit->exceptionBlockId);
819}
820
821/*
822 * vA = arrayReg;
823 * vB = idxReg;
824 * vC = endConditionReg;
825 * arg[0] = maxC
826 * arg[1] = minC
827 * arg[2] = loopBranchConditionCode
828 */
829static void genHoistedChecksForCountDownLoop(CompilationUnit *cUnit, MIR *mir)
830{
831    DecodedInstruction *dInsn = &mir->dalvikInsn;
832    const int maxC = dInsn->arg[0];
833
834    /* assign array in virtual register to P_GPR_1 */
835    get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true);
836    /* assign index in virtual register to P_GPR_2 */
837    get_virtual_reg(mir->dalvikInsn.vB, OpndSize_32, P_GPR_2, true);
838    export_pc();
839    compare_imm_reg(OpndSize_32, 0, P_GPR_1, true);
840    condJumpToBasicBlock(stream, Condition_E, cUnit->exceptionBlockId);
841
842    if (maxC < 0) {
843        //if P_GPR_2 is mapped to a VR, we can't do this
844        alu_binary_imm_reg(OpndSize_32, sub_opc, -maxC, P_GPR_2, true);
845    } else if(maxC > 0) {
846        alu_binary_imm_reg(OpndSize_32, add_opc, maxC, P_GPR_2, true);
847    }
848    compare_mem_reg(OpndSize_32, offArrayObject_length, P_GPR_1, true, P_GPR_2, true);
849    condJumpToBasicBlock(stream, Condition_NC, cUnit->exceptionBlockId);
850
851}
852#undef P_GPR_1
853#undef P_GPR_2
854
855/*
856 * vA = idxReg;
857 * vB = minC;
858 */
859#define P_GPR_1 PhysicalReg_ECX
860static void genHoistedLowerBoundCheck(CompilationUnit *cUnit, MIR *mir)
861{
862    DecodedInstruction *dInsn = &mir->dalvikInsn;
863    const int minC = dInsn->vB;
864    get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true); //array
865    export_pc();
866    compare_imm_reg(OpndSize_32, -minC, P_GPR_1, true);
867    condJumpToBasicBlock(stream, Condition_C, cUnit->exceptionBlockId);
868}
869#undef P_GPR_1
870
871#ifdef WITH_JIT_INLINING
872static void genValidationForPredictedInline(CompilationUnit *cUnit, MIR *mir)
873{
874    CallsiteInfo *callsiteInfo = mir->meta.callsiteInfo;
875    if(gDvm.executionMode == kExecutionModeNcgO0) {
876        get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, PhysicalReg_EBX, true);
877        move_imm_to_reg(OpndSize_32, (int) callsiteInfo->clazz, PhysicalReg_ECX, true);
878        compare_imm_reg(OpndSize_32, 0, PhysicalReg_EBX, true);
879        export_pc(); //use %edx
880        conditional_jump_global_API(, Condition_E, "common_errNullObject", false);
881        move_mem_to_reg(OpndSize_32, offObject_clazz, PhysicalReg_EBX, true, PhysicalReg_EAX, true);
882        compare_reg_reg(PhysicalReg_ECX, true, PhysicalReg_EAX, true);
883    } else {
884        get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, 5, false);
885        move_imm_to_reg(OpndSize_32, (int) callsiteInfo->clazz, 4, false);
886        nullCheck(5, false, 1, mir->dalvikInsn.vC);
887        move_mem_to_reg(OpndSize_32, offObject_clazz, 5, false, 6, false);
888        compare_reg_reg(4, false, 6, false);
889    }
890
891    //immdiate will be updated later in genLandingPadForMispredictedCallee
892    streamMisPred = stream;
893    callsiteInfo->misPredBranchOver = (LIR*)conditional_jump_int(Condition_NE, 0, OpndSize_8);
894}
895#endif
896
897/* Extended MIR instructions like PHI */
898void handleExtendedMIR(CompilationUnit *cUnit, MIR *mir)
899{
900    ExecutionMode origMode = gDvm.executionMode;
901    gDvm.executionMode = kExecutionModeNcgO0;
902    switch ((ExtendedMIROpcode)mir->dalvikInsn.opcode) {
903        case kMirOpPhi: {
904            break;
905        }
906        case kMirOpNullNRangeUpCheck: {
907            genHoistedChecksForCountUpLoop(cUnit, mir);
908            break;
909        }
910        case kMirOpNullNRangeDownCheck: {
911            genHoistedChecksForCountDownLoop(cUnit, mir);
912            break;
913        }
914        case kMirOpLowerBound: {
915            genHoistedLowerBoundCheck(cUnit, mir);
916            break;
917        }
918        case kMirOpPunt: {
919            break;
920        }
921#ifdef WITH_JIT_INLINING
922        case kMirOpCheckInlinePrediction: { //handled in ncg_o1_data.c
923            genValidationForPredictedInline(cUnit, mir);
924            break;
925        }
926#endif
927        default:
928            break;
929    }
930    gDvm.executionMode = origMode;
931}
932
933static void setupLoopEntryBlock(CompilationUnit *cUnit, BasicBlock *entry,
934                                int bodyId)
935{
936    /*
937     * Next, create two branches - one branch over to the loop body and the
938     * other branch to the PCR cell to punt.
939     */
940    //LowOp* branchToBody = jumpToBasicBlock(stream, bodyId);
941    //setupResourceMasks(branchToBody);
942    //cUnit->loopAnalysis->branchToBody = ((LIR*)branchToBody);
943
944#if 0
945    LowOp *branchToPCR = dvmCompilerNew(sizeof(ArmLIR), true);
946    branchToPCR->opCode = kThumbBUncond;
947    branchToPCR->generic.target = (LIR *) pcrLabel;
948    setupResourceMasks(branchToPCR);
949    cUnit->loopAnalysis->branchToPCR = (LIR *) branchToPCR;
950#endif
951}
952
953/* check whether we can merge the block at index i with its target block */
954bool mergeBlock(BasicBlock *bb) {
955    if(bb->blockType == kDalvikByteCode &&
956       bb->firstMIRInsn != NULL &&
957       (bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO_16 ||
958        bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO ||
959        bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO_32) &&
960       bb->fallThrough == NULL) {// &&
961       //cUnit->hasLoop) {
962        //ALOGI("merge blocks ending with goto at index %d", i);
963        MIR* prevInsn = bb->lastMIRInsn->prev;
964        if(bb->taken == NULL) return false;
965        MIR* mergeInsn = bb->taken->firstMIRInsn;
966        if(mergeInsn == NULL) return false;
967        if(prevInsn == NULL) {//the block has a single instruction
968            bb->firstMIRInsn = mergeInsn;
969        } else {
970            prevInsn->next = mergeInsn; //remove goto from the chain
971        }
972        mergeInsn->prev = prevInsn;
973        bb->lastMIRInsn = bb->taken->lastMIRInsn;
974        bb->taken->firstMIRInsn = NULL; //block being merged in
975        bb->fallThrough = bb->taken->fallThrough;
976        bb->taken = bb->taken->taken;
977        return true;
978    }
979    return false;
980}
981
982static int genTraceProfileEntry(CompilationUnit *cUnit)
983{
984    cUnit->headerSize = 6;
985    if ((gDvmJit.profileMode == kTraceProfilingContinuous) ||
986        (gDvmJit.profileMode == kTraceProfilingDisabled)) {
987        return 12;
988    } else {
989        return 4;
990    }
991
992}
993
994#define PRINT_BUFFER_LEN 1024
995/* Print the code block in code cache in the range of [startAddr, endAddr)
996 * in readable format.
997 */
998void printEmittedCodeBlock(unsigned char *startAddr, unsigned char *endAddr)
999{
1000    char strbuf[PRINT_BUFFER_LEN];
1001    unsigned char *addr;
1002    unsigned char *next_addr;
1003    int n;
1004
1005    if (gDvmJit.printBinary) {
1006        // print binary in bytes
1007        n = 0;
1008        for (addr = startAddr; addr < endAddr; addr++) {
1009            n += snprintf(&strbuf[n], PRINT_BUFFER_LEN-n, "0x%x, ", *addr);
1010            if (n > PRINT_BUFFER_LEN - 10) {
1011                ALOGD("## %s", strbuf);
1012                n = 0;
1013            }
1014        }
1015        if (n > 0)
1016            ALOGD("## %s", strbuf);
1017    }
1018
1019    // print disassembled instructions
1020    addr = startAddr;
1021    while (addr < endAddr) {
1022        next_addr = reinterpret_cast<unsigned char*>
1023            (decoder_disassemble_instr(reinterpret_cast<char*>(addr),
1024                                       strbuf, PRINT_BUFFER_LEN));
1025        if (addr != next_addr) {
1026            ALOGD("**  %p: %s", addr, strbuf);
1027        } else {                // check whether this is nop padding
1028            if (addr[0] == 0x90) {
1029                ALOGD("**  %p: NOP (1 byte)", addr);
1030                next_addr += 1;
1031            } else if (addr[0] == 0x66 && addr[1] == 0x90) {
1032                ALOGD("**  %p: NOP (2 bytes)", addr);
1033                next_addr += 2;
1034            } else if (addr[0] == 0x0f && addr[1] == 0x1f && addr[2] == 0x00) {
1035                ALOGD("**  %p: NOP (3 bytes)", addr);
1036                next_addr += 3;
1037            } else {
1038                ALOGD("** unable to decode binary at %p", addr);
1039                break;
1040            }
1041        }
1042        addr = next_addr;
1043    }
1044}
1045
1046/* 4 is the number of additional bytes needed for chaining information for trace:
1047 * 2 bytes for chaining cell count offset and 2 bytes for chaining cell offset */
1048#define EXTRA_BYTES_FOR_CHAINING 4
1049
1050/* Entry function to invoke the backend of the JIT compiler */
1051void dvmCompilerMIR2LIR(CompilationUnit *cUnit, JitTranslationInfo *info)
1052{
1053    dump_x86_inst = cUnit->printMe;
1054    /* Used to hold the labels of each block */
1055    LowOpBlockLabel *labelList =
1056        (LowOpBlockLabel *)dvmCompilerNew(sizeof(LowOpBlockLabel) * cUnit->numBlocks, true); //Utility.c
1057    LowOp *headLIR = NULL;
1058    GrowableList chainingListByType[kChainingCellLast];
1059    unsigned int i, padding;
1060
1061    /*
1062     * Initialize various types chaining lists.
1063     */
1064    for (i = 0; i < kChainingCellLast; i++) {
1065        dvmInitGrowableList(&chainingListByType[i], 2);
1066    }
1067
1068    /* Clear the visited flag for each block */
1069    dvmCompilerDataFlowAnalysisDispatcher(cUnit, dvmCompilerClearVisitedFlag,
1070                                          kAllNodes, false /* isIterative */);
1071
1072    GrowableListIterator iterator;
1073    dvmGrowableListIteratorInit(&cUnit->blockList, &iterator);
1074
1075    /* Traces start with a profiling entry point.  Generate it here */
1076    cUnit->profileCodeSize = genTraceProfileEntry(cUnit);
1077
1078    //BasicBlock **blockList = cUnit->blockList;
1079    GrowableList *blockList = &cUnit->blockList;
1080    BasicBlock *bb;
1081
1082    info->codeAddress = NULL;
1083    stream = (char*)gDvmJit.codeCache + gDvmJit.codeCacheByteUsed;
1084
1085    // TODO: compile into a temporary buffer and then copy into the code cache.
1086    // That would let us leave the code cache unprotected for a shorter time.
1087    size_t unprotected_code_cache_bytes =
1088            gDvmJit.codeCacheSize - gDvmJit.codeCacheByteUsed - CODE_CACHE_PADDING;
1089    UNPROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1090
1091    streamStart = stream; /* trace start before alignment */
1092    stream += EXTRA_BYTES_FOR_CHAINING; /* This is needed for chaining. Add the bytes before the alignment */
1093    stream = (char*)(((unsigned int)stream + 0xF) & ~0xF); /* Align trace to 16-bytes */
1094    streamMethodStart = stream; /* code start */
1095    for (i = 0; i < ((unsigned int) cUnit->numBlocks); i++) {
1096        labelList[i].lop.generic.offset = -1;
1097    }
1098    cUnit->exceptionBlockId = -1;
1099    for (i = 0; i < blockList->numUsed; i++) {
1100        bb = (BasicBlock *) blockList->elemList[i];
1101        if(bb->blockType == kExceptionHandling)
1102            cUnit->exceptionBlockId = i;
1103    }
1104    startOfTrace(cUnit->method, labelList, cUnit->exceptionBlockId, cUnit);
1105    if(gDvm.executionMode == kExecutionModeNcgO1) {
1106        //merge blocks ending with "goto" with the fall through block
1107        if (cUnit->jitMode != kJitLoop)
1108            for (i = 0; i < blockList->numUsed; i++) {
1109                bb = (BasicBlock *) blockList->elemList[i];
1110                bool merged = mergeBlock(bb);
1111                while(merged) merged = mergeBlock(bb);
1112            }
1113        for (i = 0; i < blockList->numUsed; i++) {
1114            bb = (BasicBlock *) blockList->elemList[i];
1115            if(bb->blockType == kDalvikByteCode &&
1116               bb->firstMIRInsn != NULL) {
1117                preprocessingBB(bb);
1118            }
1119        }
1120        preprocessingTrace();
1121    }
1122
1123    /* Handle the content in each basic block */
1124    for (i = 0; ; i++) {
1125        MIR *mir;
1126        bb = (BasicBlock *) dvmGrowableListIteratorNext(&iterator);
1127        if (bb == NULL) break;
1128        if (bb->visited == true) continue;
1129
1130        labelList[i].immOpnd.value = bb->startOffset;
1131
1132        if (bb->blockType >= kChainingCellLast) {
1133            /*
1134             * Append the label pseudo LIR first. Chaining cells will be handled
1135             * separately afterwards.
1136             */
1137            dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[i]);
1138        }
1139
1140        if (bb->blockType == kEntryBlock) {
1141            labelList[i].lop.opCode2 = ATOM_PSEUDO_ENTRY_BLOCK;
1142            if (bb->firstMIRInsn == NULL) {
1143                continue;
1144            } else {
1145              setupLoopEntryBlock(cUnit, bb, bb->fallThrough->id);
1146                                  //&labelList[blockList[i]->fallThrough->id]);
1147            }
1148        } else if (bb->blockType == kExitBlock) {
1149            labelList[i].lop.opCode2 = ATOM_PSEUDO_EXIT_BLOCK;
1150            labelList[i].lop.generic.offset = (stream - streamMethodStart);
1151            goto gen_fallthrough;
1152        } else if (bb->blockType == kDalvikByteCode) {
1153            if (bb->hidden == true) continue;
1154            labelList[i].lop.opCode2 = ATOM_PSEUDO_NORMAL_BLOCK_LABEL;
1155            /* Reset the register state */
1156#if 0
1157            resetRegisterScoreboard(cUnit);
1158#endif
1159        } else {
1160            switch (bb->blockType) {
1161                case kChainingCellNormal:
1162                    labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_NORMAL;
1163                    /* handle the codegen later */
1164                    dvmInsertGrowableList(
1165                        &chainingListByType[kChainingCellNormal], i);
1166                    break;
1167                case kChainingCellInvokeSingleton:
1168                    labelList[i].lop.opCode2 =
1169                        ATOM_PSEUDO_CHAINING_CELL_INVOKE_SINGLETON;
1170                    labelList[i].immOpnd.value =
1171                        (int) bb->containingMethod;
1172                    /* handle the codegen later */
1173                    dvmInsertGrowableList(
1174                        &chainingListByType[kChainingCellInvokeSingleton], i);
1175                    break;
1176                case kChainingCellInvokePredicted:
1177                    labelList[i].lop.opCode2 =
1178                        ATOM_PSEUDO_CHAINING_CELL_INVOKE_PREDICTED;
1179                   /*
1180                     * Move the cached method pointer from operand 1 to 0.
1181                     * Operand 0 was clobbered earlier in this routine to store
1182                     * the block starting offset, which is not applicable to
1183                     * predicted chaining cell.
1184                     */
1185                    //TODO
1186                    //labelList[i].operands[0] = labelList[i].operands[1];
1187
1188                    /* handle the codegen later */
1189                    dvmInsertGrowableList(
1190                        &chainingListByType[kChainingCellInvokePredicted], i);
1191                    break;
1192                case kChainingCellHot:
1193                    labelList[i].lop.opCode2 =
1194                        ATOM_PSEUDO_CHAINING_CELL_HOT;
1195                    /* handle the codegen later */
1196                    dvmInsertGrowableList(
1197                        &chainingListByType[kChainingCellHot], i);
1198                    break;
1199                case kPCReconstruction:
1200                    /* Make sure exception handling block is next */
1201                    labelList[i].lop.opCode2 =
1202                        ATOM_PSEUDO_PC_RECONSTRUCTION_BLOCK_LABEL;
1203                    //assert (i == cUnit->numBlocks - 2);
1204                    labelList[i].lop.generic.offset = (stream - streamMethodStart);
1205                    handlePCReconstruction(cUnit,
1206                                           &labelList[cUnit->puntBlock->id]);
1207                    break;
1208                case kExceptionHandling:
1209                    labelList[i].lop.opCode2 = ATOM_PSEUDO_EH_BLOCK_LABEL;
1210                    labelList[i].lop.generic.offset = (stream - streamMethodStart);
1211                    //if (cUnit->pcReconstructionList.numUsed) {
1212                        scratchRegs[0] = PhysicalReg_EAX;
1213                        jumpToInterpPunt();
1214                        //call_dvmJitToInterpPunt();
1215                    //}
1216                    break;
1217                case kChainingCellBackwardBranch:
1218                    labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_BACKWARD_BRANCH;
1219                    /* handle the codegen later */
1220                    dvmInsertGrowableList(
1221                        &chainingListByType[kChainingCellBackwardBranch],
1222                        i);
1223                    break;
1224                default:
1225                    break;
1226            }
1227            continue;
1228        }
1229        {
1230        //LowOp *headLIR = NULL;
1231        const DexCode *dexCode = dvmGetMethodCode(cUnit->method);
1232        const u2 *startCodePtr = dexCode->insns;
1233        const u2 *codePtr;
1234        labelList[i].lop.generic.offset = (stream - streamMethodStart);
1235        ALOGV("get ready to handle JIT bb %d type %d hidden %d",
1236              bb->id, bb->blockType, bb->hidden);
1237        for (BasicBlock *nextBB = bb; nextBB != NULL; nextBB = cUnit->nextCodegenBlock) {
1238            bb = nextBB;
1239            bb->visited = true;
1240            cUnit->nextCodegenBlock = NULL;
1241
1242        if(gDvm.executionMode == kExecutionModeNcgO1 &&
1243           bb->blockType != kEntryBlock &&
1244           bb->firstMIRInsn != NULL) {
1245            startOfBasicBlock(bb);
1246            int cg_ret = codeGenBasicBlockJit(cUnit->method, bb);
1247            endOfBasicBlock(bb);
1248            if(cg_ret < 0) {
1249                endOfTrace(true/*freeOnly*/);
1250                cUnit->baseAddr = NULL;
1251                PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1252                return;
1253            }
1254        } else {
1255        for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
1256            startOfBasicBlock(bb); //why here for O0
1257            Opcode dalvikOpCode = mir->dalvikInsn.opcode;
1258            if((int)dalvikOpCode >= (int)kMirOpFirst) {
1259                handleExtendedMIR(cUnit, mir);
1260                continue;
1261            }
1262            InstructionFormat dalvikFormat =
1263                dexGetFormatFromOpcode(dalvikOpCode);
1264            ALOGV("ready to handle bytecode at offset %x: opcode %d format %d",
1265                  mir->offset, dalvikOpCode, dalvikFormat);
1266            LowOpImm *boundaryLIR = dump_special(ATOM_PSEUDO_DALVIK_BYTECODE_BOUNDARY, mir->offset);
1267            /* Remember the first LIR for this block */
1268            if (headLIR == NULL) {
1269                headLIR = (LowOp*)boundaryLIR;
1270            }
1271            bool notHandled = true;
1272            /*
1273             * Debugging: screen the opcode first to see if it is in the
1274             * do[-not]-compile list
1275             */
1276            bool singleStepMe =
1277                gDvmJit.includeSelectedOp !=
1278                ((gDvmJit.opList[dalvikOpCode >> 3] &
1279                  (1 << (dalvikOpCode & 0x7))) !=
1280                 0);
1281            if (singleStepMe || cUnit->allSingleStep) {
1282            } else {
1283                codePtr = startCodePtr + mir->offset;
1284                //lower each byte code, update LIR
1285                notHandled = lowerByteCodeJit(cUnit->method, cUnit->method->insns+mir->offset, mir);
1286                if(gDvmJit.codeCacheByteUsed + (stream - streamStart) +
1287                   CODE_CACHE_PADDING > gDvmJit.codeCacheSize) {
1288                    ALOGI("JIT code cache full after lowerByteCodeJit (trace uses %uB)", (stream - streamStart));
1289                    gDvmJit.codeCacheFull = true;
1290                    cUnit->baseAddr = NULL;
1291                    endOfTrace(true/*freeOnly*/);
1292                    PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1293                    return;
1294                }
1295            }
1296            if (notHandled) {
1297                ALOGE("%#06x: Opcode 0x%x (%s) / Fmt %d not handled",
1298                     mir->offset,
1299                     dalvikOpCode, dexGetOpcodeName(dalvikOpCode),
1300                     dalvikFormat);
1301                dvmAbort();
1302                break;
1303            }
1304        } // end for
1305        } // end else //JIT + O0 code generator
1306        }
1307        } // end for
1308        /* Eliminate redundant loads/stores and delay stores into later slots */
1309#if 0
1310        dvmCompilerApplyLocalOptimizations(cUnit, (LIR *) headLIR,
1311                                           cUnit->lastLIRInsn);
1312#endif
1313        if (headLIR) headLIR = NULL;
1314gen_fallthrough:
1315        /*
1316         * Check if the block is terminated due to trace length constraint -
1317         * insert an unconditional branch to the chaining cell.
1318         */
1319        if (bb->needFallThroughBranch) {
1320            jumpToBasicBlock(stream, bb->fallThrough->id);
1321        }
1322
1323    }
1324
1325    char* streamChainingStart = (char*)stream;
1326    /* Handle the chaining cells in predefined order */
1327    for (i = 0; i < kChainingCellGap; i++) {
1328        size_t j;
1329        int *blockIdList = (int *) chainingListByType[i].elemList;
1330
1331        cUnit->numChainingCells[i] = chainingListByType[i].numUsed;
1332
1333        /* No chaining cells of this type */
1334        if (cUnit->numChainingCells[i] == 0)
1335            continue;
1336
1337        /* Record the first LIR for a new type of chaining cell */
1338        cUnit->firstChainingLIR[i] = (LIR *) &labelList[blockIdList[0]];
1339        for (j = 0; j < chainingListByType[i].numUsed; j++) {
1340            int blockId = blockIdList[j];
1341            BasicBlock *chainingBlock =
1342                (BasicBlock *) dvmGrowableListGetElement(&cUnit->blockList,
1343                                                         blockId);
1344
1345            labelList[blockId].lop.generic.offset = (stream - streamMethodStart);
1346
1347            /* Align this chaining cell first */
1348#if 0
1349            newLIR0(cUnit, ATOM_PSEUDO_ALIGN4);
1350#endif
1351            /* Insert the pseudo chaining instruction */
1352            dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[blockId]);
1353
1354
1355            switch (chainingBlock->blockType) {
1356                case kChainingCellNormal:
1357                    handleNormalChainingCell(cUnit,
1358                     chainingBlock->startOffset, blockId, labelList);
1359                    break;
1360                case kChainingCellInvokeSingleton:
1361                    handleInvokeSingletonChainingCell(cUnit,
1362                        chainingBlock->containingMethod, blockId, labelList);
1363                    break;
1364                case kChainingCellInvokePredicted:
1365                    handleInvokePredictedChainingCell(cUnit, blockId);
1366                    break;
1367                case kChainingCellHot:
1368                    handleHotChainingCell(cUnit,
1369                        chainingBlock->startOffset, blockId, labelList);
1370                    break;
1371                case kChainingCellBackwardBranch:
1372                    handleBackwardBranchChainingCell(cUnit,
1373                        chainingBlock->startOffset, blockId, labelList);
1374                    break;
1375                default:
1376                    ALOGE("Bad blocktype %d", chainingBlock->blockType);
1377                    dvmAbort();
1378                    break;
1379            }
1380
1381            if (gDvmJit.codeCacheByteUsed + (stream - streamStart) + CODE_CACHE_PADDING > gDvmJit.codeCacheSize) {
1382                ALOGI("JIT code cache full after ChainingCell (trace uses %uB)", (stream - streamStart));
1383                gDvmJit.codeCacheFull = true;
1384                cUnit->baseAddr = NULL;
1385                endOfTrace(true); /* need to free structures */
1386                PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1387                return;
1388            }
1389        }
1390    }
1391#if 0
1392    dvmCompilerApplyGlobalOptimizations(cUnit);
1393#endif
1394    endOfTrace(false);
1395
1396    if (gDvmJit.codeCacheFull) {
1397        /* We hit code cache size limit inside endofTrace(false).
1398         * Bail out for this trace!
1399         */
1400        ALOGI("JIT code cache full after endOfTrace (trace uses %uB)", (stream - streamStart));
1401        cUnit->baseAddr = NULL;
1402        PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1403        return;
1404    }
1405
1406    /* dump section for chaining cell counts, make sure it is 4-byte aligned */
1407    padding = (4 - ((u4)stream & 3)) & 3;
1408    stream += padding;
1409    ChainCellCounts chainCellCounts;
1410    /* Install the chaining cell counts */
1411    for (i=0; i< kChainingCellGap; i++) {
1412        chainCellCounts.u.count[i] = cUnit->numChainingCells[i];
1413    }
1414    char* streamCountStart = (char*)stream;
1415    memcpy((char*)stream, &chainCellCounts, sizeof(chainCellCounts));
1416    stream += sizeof(chainCellCounts);
1417
1418    cUnit->baseAddr = streamMethodStart;
1419    cUnit->totalSize = (stream - streamStart);
1420    if(gDvmJit.codeCacheByteUsed + cUnit->totalSize + CODE_CACHE_PADDING > gDvmJit.codeCacheSize) {
1421        ALOGI("JIT code cache full after ChainingCellCounts (trace uses %uB)", (stream - streamStart));
1422        gDvmJit.codeCacheFull = true;
1423        cUnit->baseAddr = NULL;
1424        PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1425        return;
1426    }
1427
1428    /* write chaining cell count offset & chaining cell offset */
1429    u2* pOffset = (u2*)(streamMethodStart - EXTRA_BYTES_FOR_CHAINING); /* space was already allocated for this purpose */
1430    *pOffset = streamCountStart - streamMethodStart; /* from codeAddr */
1431    pOffset[1] = streamChainingStart - streamMethodStart;
1432
1433    PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1434
1435    gDvmJit.codeCacheByteUsed += (stream - streamStart);
1436    if (cUnit->printMe) {
1437        unsigned char* codeBaseAddr = (unsigned char *) cUnit->baseAddr;
1438        unsigned char* codeBaseAddrNext = ((unsigned char *) gDvmJit.codeCache) + gDvmJit.codeCacheByteUsed;
1439        ALOGD("-------- Built trace for %s%s, JIT code [%p, %p) cache start %p",
1440              cUnit->method->clazz->descriptor, cUnit->method->name,
1441              codeBaseAddr, codeBaseAddrNext, gDvmJit.codeCache);
1442        ALOGD("** %s%s@0x%x:", cUnit->method->clazz->descriptor,
1443              cUnit->method->name, cUnit->traceDesc->trace[0].info.frag.startOffset);
1444        printEmittedCodeBlock(codeBaseAddr, codeBaseAddrNext);
1445    }
1446    ALOGV("JIT CODE after trace %p to %p size %x START %p", cUnit->baseAddr,
1447          (char *) gDvmJit.codeCache + gDvmJit.codeCacheByteUsed,
1448          cUnit->totalSize, gDvmJit.codeCache);
1449
1450    gDvmJit.numCompilations++;
1451
1452    info->codeAddress = (char*)cUnit->baseAddr;// + cUnit->headerSize;
1453}
1454
1455/*
1456 * Perform translation chain operation.
1457 */
1458void* dvmJitChain(void* tgtAddr, u4* branchAddr)
1459{
1460#ifdef JIT_CHAIN
1461    int relOffset = (int) tgtAddr - (int)branchAddr;
1462
1463    if ((gDvmJit.pProfTable != NULL) && (gDvm.sumThreadSuspendCount == 0) &&
1464        (gDvmJit.codeCacheFull == false)) {
1465
1466        gDvmJit.translationChains++;
1467
1468        //OpndSize immSize = estOpndSizeFromImm(relOffset);
1469        //relOffset -= getJmpCallInstSize(immSize, JmpCall_uncond);
1470        /* Hard coded the jump opnd size to 32 bits, This instruction will replace the "jump 0" in
1471         * the original code sequence.
1472         */
1473        OpndSize immSize = OpndSize_32;
1474        relOffset -= 5;
1475        //can't use stream here since it is used by the compilation thread
1476        UNPROTECT_CODE_CACHE(branchAddr, sizeof(*branchAddr));
1477        dump_imm_with_codeaddr(Mnemonic_JMP, immSize, relOffset, (char*)branchAddr); //dump to branchAddr
1478        PROTECT_CODE_CACHE(branchAddr, sizeof(*branchAddr));
1479
1480        gDvmJit.hasNewChain = true;
1481
1482        COMPILER_TRACE_CHAINING(
1483            ALOGI("Jit Runtime: chaining 0x%x to %p with relOffset %x",
1484                  (int) branchAddr, tgtAddr, relOffset));
1485    }
1486#endif
1487    return tgtAddr;
1488}
1489
1490/*
1491 * Accept the work and start compiling.  Returns true if compilation
1492 * is attempted.
1493 */
1494bool dvmCompilerDoWork(CompilerWorkOrder *work)
1495{
1496    JitTraceDescription *desc;
1497    bool isCompile;
1498    bool success = true;
1499
1500    if (gDvmJit.codeCacheFull) {
1501        return false;
1502    }
1503
1504    switch (work->kind) {
1505        case kWorkOrderTrace:
1506            isCompile = true;
1507            /* Start compilation with maximally allowed trace length */
1508            desc = (JitTraceDescription *)work->info;
1509            success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result,
1510                                        work->bailPtr, 0 /* no hints */);
1511            break;
1512        case kWorkOrderTraceDebug: {
1513            bool oldPrintMe = gDvmJit.printMe;
1514            gDvmJit.printMe = true;
1515            isCompile = true;
1516            /* Start compilation with maximally allowed trace length */
1517            desc = (JitTraceDescription *)work->info;
1518            success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result,
1519                                        work->bailPtr, 0 /* no hints */);
1520            gDvmJit.printMe = oldPrintMe;
1521            break;
1522        }
1523        case kWorkOrderProfileMode:
1524            dvmJitChangeProfileMode((TraceProfilingModes)(int)work->info);
1525            isCompile = false;
1526            break;
1527        default:
1528            isCompile = false;
1529            ALOGE("Jit: unknown work order type");
1530            assert(0);  // Bail if debug build, discard otherwise
1531    }
1532    if (!success)
1533        work->result.codeAddress = NULL;
1534    return isCompile;
1535}
1536
1537void dvmCompilerCacheFlush(long start, long end, long flags) {
1538  /* cacheflush is needed for ARM, but not for IA32 (coherent icache) */
1539}
1540
1541//#endif
1542