CodegenInterface.cpp revision 9c789541c5a37dc8c5d12d98b8db74def61e26db
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16#include <sys/mman.h>
17#include "Dalvik.h"
18#include "libdex/DexOpcodes.h"
19#include "compiler/Compiler.h"
20#include "compiler/CompilerIR.h"
21#include "interp/Jit.h"
22#include "libdex/DexFile.h"
23#include "Lower.h"
24#include "NcgAot.h"
25#include "compiler/codegen/CompilerCodegen.h"
26
27/* Init values when a predicted chain is initially assembled */
28/* E7FE is branch to self */
29#define PREDICTED_CHAIN_BX_PAIR_INIT     0xe7fe
30
31/* Target-specific save/restore */
32extern "C" void dvmJitCalleeSave(double *saveArea);
33extern "C" void dvmJitCalleeRestore(double *saveArea);
34
35/*
36 * Determine the initial instruction set to be used for this trace.
37 * Later components may decide to change this.
38 */
39//JitInstructionSetType dvmCompilerInstructionSet(CompilationUnit *cUnit)
40JitInstructionSetType dvmCompilerInstructionSet(void)
41{
42    return DALVIK_JIT_IA32;
43}
44
45JitInstructionSetType dvmCompilerGetInterpretTemplateSet()
46{
47    return DALVIK_JIT_IA32;
48}
49
50/* we don't use template for IA32 */
51void *dvmCompilerGetInterpretTemplate()
52{
53      return NULL;
54}
55
56/* Track the number of times that the code cache is patched */
57#if defined(WITH_JIT_TUNING)
58#define UPDATE_CODE_CACHE_PATCHES()    (gDvmJit.codeCachePatches++)
59#else
60#define UPDATE_CODE_CACHE_PATCHES()
61#endif
62
63bool dvmCompilerArchInit() {
64    /* Target-specific configuration */
65    gDvmJit.jitTableSize = 1 << 12;
66    gDvmJit.jitTableMask = gDvmJit.jitTableSize - 1;
67    gDvmJit.threshold = 255;
68    gDvmJit.codeCacheSize = 512*1024;
69    gDvmJit.optLevel = kJitOptLevelO1;
70
71#if defined(WITH_SELF_VERIFICATION)
72    /* Force into blocking mode */
73    gDvmJit.blockingMode = true;
74    gDvm.nativeDebuggerActive = true;
75#endif
76
77    // Make sure all threads have current values
78    dvmJitUpdateThreadStateAll();
79
80    return true;
81}
82
83void dvmCompilerPatchInlineCache(void)
84{
85    int i;
86    PredictedChainingCell *minAddr, *maxAddr;
87
88    /* Nothing to be done */
89    if (gDvmJit.compilerICPatchIndex == 0) return;
90
91    /*
92     * Since all threads are already stopped we don't really need to acquire
93     * the lock. But race condition can be easily introduced in the future w/o
94     * paying attention so we still acquire the lock here.
95     */
96    dvmLockMutex(&gDvmJit.compilerICPatchLock);
97
98    UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
99
100    //ALOGD("Number of IC patch work orders: %d", gDvmJit.compilerICPatchIndex);
101
102    /* Initialize the min/max address range */
103    minAddr = (PredictedChainingCell *)
104        ((char *) gDvmJit.codeCache + gDvmJit.codeCacheSize);
105    maxAddr = (PredictedChainingCell *) gDvmJit.codeCache;
106
107    for (i = 0; i < gDvmJit.compilerICPatchIndex; i++) {
108        ICPatchWorkOrder *workOrder = &gDvmJit.compilerICPatchQueue[i];
109        PredictedChainingCell *cellAddr = workOrder->cellAddr;
110        PredictedChainingCell *cellContent = &workOrder->cellContent;
111        ClassObject *clazz = dvmFindClassNoInit(workOrder->classDescriptor,
112                                                workOrder->classLoader);
113
114        assert(clazz->serialNumber == workOrder->serialNumber);
115
116        /* Use the newly resolved clazz pointer */
117        cellContent->clazz = clazz;
118
119        if (cellAddr->clazz == NULL) {
120            COMPILER_TRACE_CHAINING(
121                ALOGI("Jit Runtime: predicted chain %p to %s (%s) initialized",
122                      cellAddr,
123                      cellContent->clazz->descriptor,
124                      cellContent->method->name));
125        } else {
126            COMPILER_TRACE_CHAINING(
127                ALOGI("Jit Runtime: predicted chain %p from %s to %s (%s) "
128                      "patched",
129                      cellAddr,
130                      cellAddr->clazz->descriptor,
131                      cellContent->clazz->descriptor,
132                      cellContent->method->name));
133        }
134
135        /* Patch the chaining cell */
136        *cellAddr = *cellContent;
137        minAddr = (cellAddr < minAddr) ? cellAddr : minAddr;
138        maxAddr = (cellAddr > maxAddr) ? cellAddr : maxAddr;
139    }
140
141    PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
142
143    gDvmJit.compilerICPatchIndex = 0;
144    dvmUnlockMutex(&gDvmJit.compilerICPatchLock);
145}
146
147/* Target-specific cache clearing */
148void dvmCompilerCacheClear(char *start, size_t size)
149{
150    /* "0xFF 0xFF" is an invalid opcode for x86. */
151    memset(start, 0xFF, size);
152}
153
154/* for JIT debugging, to be implemented */
155void dvmJitCalleeSave(double *saveArea) {
156}
157
158void dvmJitCalleeRestore(double *saveArea) {
159}
160
161void dvmJitToInterpSingleStep() {
162}
163
164JitTraceDescription *dvmCopyTraceDescriptor(const u2 *pc,
165                                            const JitEntry *knownEntry) {
166    return NULL;
167}
168
169void dvmCompilerCodegenDump(CompilationUnit *cUnit) //in ArchUtility.c
170{
171}
172
173void dvmCompilerArchDump(void)
174{
175}
176
177char *getTraceBase(const JitEntry *p)
178{
179    return NULL;
180}
181
182void dvmCompilerAssembleLIR(CompilationUnit *cUnit, JitTranslationInfo* info)
183{
184}
185
186void dvmJitInstallClassObjectPointers(CompilationUnit *cUnit, char *codeAddress)
187{
188}
189
190void dvmCompilerMethodMIR2LIR(CompilationUnit *cUnit)
191{
192    // Method-based JIT not supported for x86.
193}
194
195void dvmJitScanAllClassPointers(void (*callback)(void *))
196{
197}
198
199/* Handy function to retrieve the profile count */
200static inline int getProfileCount(const JitEntry *entry)
201{
202    if (entry->dPC == 0 || entry->codeAddress == 0)
203        return 0;
204    u4 *pExecutionCount = (u4 *) getTraceBase(entry);
205
206    return pExecutionCount ? *pExecutionCount : 0;
207}
208
209/* qsort callback function */
210static int sortTraceProfileCount(const void *entry1, const void *entry2)
211{
212    const JitEntry *jitEntry1 = (const JitEntry *)entry1;
213    const JitEntry *jitEntry2 = (const JitEntry *)entry2;
214
215    JitTraceCounter_t count1 = getProfileCount(jitEntry1);
216    JitTraceCounter_t count2 = getProfileCount(jitEntry2);
217    return (count1 == count2) ? 0 : ((count1 > count2) ? -1 : 1);
218}
219
220/* Sort the trace profile counts and dump them */
221void dvmCompilerSortAndPrintTraceProfiles() //in Assemble.c
222{
223    JitEntry *sortedEntries;
224    int numTraces = 0;
225    unsigned long counts = 0;
226    unsigned int i;
227
228    /* Make sure that the table is not changing */
229    dvmLockMutex(&gDvmJit.tableLock);
230
231    /* Sort the entries by descending order */
232    sortedEntries = (JitEntry *)malloc(sizeof(JitEntry) * gDvmJit.jitTableSize);
233    if (sortedEntries == NULL)
234        goto done;
235    memcpy(sortedEntries, gDvmJit.pJitEntryTable,
236           sizeof(JitEntry) * gDvmJit.jitTableSize);
237    qsort(sortedEntries, gDvmJit.jitTableSize, sizeof(JitEntry),
238          sortTraceProfileCount);
239
240    /* Dump the sorted entries */
241    for (i=0; i < gDvmJit.jitTableSize; i++) {
242        if (sortedEntries[i].dPC != 0) {
243            numTraces++;
244        }
245    }
246    if (numTraces == 0)
247        numTraces = 1;
248    ALOGI("JIT: Average execution count -> %d",(int)(counts / numTraces));
249
250    free(sortedEntries);
251done:
252    dvmUnlockMutex(&gDvmJit.tableLock);
253    return;
254}
255
256void jumpWithRelOffset(char* instAddr, int relOffset) {
257    stream = instAddr;
258    OpndSize immSize = estOpndSizeFromImm(relOffset);
259    relOffset -= getJmpCallInstSize(immSize, JmpCall_uncond);
260    dump_imm(Mnemonic_JMP, immSize, relOffset);
261}
262
263// works whether instructions for target basic block are generated or not
264LowOp* jumpToBasicBlock(char* instAddr, int targetId) {
265    stream = instAddr;
266    bool unknown;
267    OpndSize size;
268    int relativeNCG = targetId;
269    relativeNCG = getRelativeNCG(targetId, JmpCall_uncond, &unknown, &size);
270    unconditional_jump_int(relativeNCG, size);
271    return NULL;
272}
273
274LowOp* condJumpToBasicBlock(char* instAddr, ConditionCode cc, int targetId) {
275    stream = instAddr;
276    bool unknown;
277    OpndSize size;
278    int relativeNCG = targetId;
279    relativeNCG = getRelativeNCG(targetId, JmpCall_cond, &unknown, &size);
280    conditional_jump_int(cc, relativeNCG, size);
281    return NULL;
282}
283
284/*
285 * Attempt to enqueue a work order to patch an inline cache for a predicted
286 * chaining cell for virtual/interface calls.
287 */
288static bool inlineCachePatchEnqueue(PredictedChainingCell *cellAddr,
289                                    PredictedChainingCell *newContent)
290{
291    bool result = true;
292
293    /*
294     * Make sure only one thread gets here since updating the cell (ie fast
295     * path and queueing the request (ie the queued path) have to be done
296     * in an atomic fashion.
297     */
298    dvmLockMutex(&gDvmJit.compilerICPatchLock);
299
300    /* Fast path for uninitialized chaining cell */
301    if (cellAddr->clazz == NULL &&
302        cellAddr->branch == PREDICTED_CHAIN_BX_PAIR_INIT) {
303        UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
304
305        cellAddr->method = newContent->method;
306        cellAddr->branch = newContent->branch;
307        cellAddr->branch2 = newContent->branch2;
308
309        /*
310         * The update order matters - make sure clazz is updated last since it
311         * will bring the uninitialized chaining cell to life.
312         */
313        android_atomic_release_store((int32_t)newContent->clazz,
314            (volatile int32_t *)(void*) &cellAddr->clazz);
315        //cacheflush((intptr_t) cellAddr, (intptr_t) (cellAddr+1), 0);
316        UPDATE_CODE_CACHE_PATCHES();
317
318        PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
319
320#if 0
321        MEM_BARRIER();
322        cellAddr->clazz = newContent->clazz;
323        //cacheflush((intptr_t) cellAddr, (intptr_t) (cellAddr+1), 0);
324#endif
325#if defined(IA_JIT_TUNING)
326        gDvmJit.icPatchInit++;
327#endif
328        COMPILER_TRACE_CHAINING(
329            ALOGI("Jit Runtime: FAST predicted chain %p to method %s%s %p",
330                  cellAddr, newContent->clazz->descriptor, newContent->method->name, newContent->method));
331    /* Check if this is a frequently missed clazz */
332    } else if (cellAddr->stagedClazz != newContent->clazz) {
333        /* Not proven to be frequent yet - build up the filter cache */
334        UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
335
336        cellAddr->stagedClazz = newContent->clazz;
337
338        UPDATE_CODE_CACHE_PATCHES();
339        PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
340
341#if defined(WITH_JIT_TUNING)
342        gDvmJit.icPatchRejected++;
343#endif
344    /*
345     * Different classes but same method implementation - it is safe to just
346     * patch the class value without the need to stop the world.
347     */
348    } else if (cellAddr->method == newContent->method) {
349        UNPROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
350
351        cellAddr->clazz = newContent->clazz;
352        /* No need to flush the cache here since the branch is not patched */
353        UPDATE_CODE_CACHE_PATCHES();
354
355        PROTECT_CODE_CACHE(cellAddr, sizeof(*cellAddr));
356
357#if defined(WITH_JIT_TUNING)
358        gDvmJit.icPatchLockFree++;
359#endif
360    /*
361     * Cannot patch the chaining cell inline - queue it until the next safe
362     * point.
363     */
364    } else if (gDvmJit.compilerICPatchIndex < COMPILER_IC_PATCH_QUEUE_SIZE)  {
365        int index = gDvmJit.compilerICPatchIndex++;
366        const ClassObject *clazz = newContent->clazz;
367
368        gDvmJit.compilerICPatchQueue[index].cellAddr = cellAddr;
369        gDvmJit.compilerICPatchQueue[index].cellContent = *newContent;
370        gDvmJit.compilerICPatchQueue[index].classDescriptor = clazz->descriptor;
371        gDvmJit.compilerICPatchQueue[index].classLoader = clazz->classLoader;
372        /* For verification purpose only */
373        gDvmJit.compilerICPatchQueue[index].serialNumber = clazz->serialNumber;
374
375#if defined(WITH_JIT_TUNING)
376        gDvmJit.icPatchQueued++;
377#endif
378        COMPILER_TRACE_CHAINING(
379            ALOGI("Jit Runtime: QUEUE predicted chain %p to method %s%s",
380                  cellAddr, newContent->clazz->descriptor, newContent->method->name));
381    } else {
382    /* Queue is full - just drop this patch request */
383#if defined(WITH_JIT_TUNING)
384        gDvmJit.icPatchDropped++;
385#endif
386
387        COMPILER_TRACE_CHAINING(
388            ALOGI("Jit Runtime: DROP predicted chain %p to method %s%s",
389                  cellAddr, newContent->clazz->descriptor, newContent->method->name));
390    }
391
392    dvmUnlockMutex(&gDvmJit.compilerICPatchLock);
393    return result;
394}
395
396/*
397 * This method is called from the invoke templates for virtual and interface
398 * methods to speculatively setup a chain to the callee. The templates are
399 * written in assembly and have setup method, cell, and clazz at r0, r2, and
400 * r3 respectively, so there is a unused argument in the list. Upon return one
401 * of the following three results may happen:
402 *   1) Chain is not setup because the callee is native. Reset the rechain
403 *      count to a big number so that it will take a long time before the next
404 *      rechain attempt to happen.
405 *   2) Chain is not setup because the callee has not been created yet. Reset
406 *      the rechain count to a small number and retry in the near future.
407 *   3) Ask all other threads to stop before patching this chaining cell.
408 *      This is required because another thread may have passed the class check
409 *      but hasn't reached the chaining cell yet to follow the chain. If we
410 *      patch the content before halting the other thread, there could be a
411 *      small window for race conditions to happen that it may follow the new
412 *      but wrong chain to invoke a different method.
413 */
414const Method *dvmJitToPatchPredictedChain(const Method *method,
415                                          Thread *self,
416                                          PredictedChainingCell *cell,
417                                          const ClassObject *clazz)
418{
419    int newRechainCount = PREDICTED_CHAIN_COUNTER_RECHAIN;
420    /* Don't come back here for a long time if the method is native */
421    if (dvmIsNativeMethod(method)) {
422        UNPROTECT_CODE_CACHE(cell, sizeof(*cell));
423
424        /*
425         * Put a non-zero/bogus value in the clazz field so that it won't
426         * trigger immediate patching and will continue to fail to match with
427         * a real clazz pointer.
428         */
429        cell->clazz = (ClassObject *) PREDICTED_CHAIN_FAKE_CLAZZ;
430
431        UPDATE_CODE_CACHE_PATCHES();
432        PROTECT_CODE_CACHE(cell, sizeof(*cell));
433        COMPILER_TRACE_CHAINING(
434            ALOGI("Jit Runtime: predicted chain %p to native method %s ignored",
435                  cell, method->name));
436        goto done;
437    }
438    {
439    int tgtAddr = (int) dvmJitGetTraceAddr(method->insns);
440
441    /*
442     * Compilation not made yet for the callee. Reset the counter to a small
443     * value and come back to check soon.
444     */
445    if ((tgtAddr == 0) ||
446        ((void*)tgtAddr == dvmCompilerGetInterpretTemplate())) {
447        COMPILER_TRACE_CHAINING(
448            ALOGI("Jit Runtime: predicted chain %p to method %s%s delayed",
449                  cell, method->clazz->descriptor, method->name));
450        goto done;
451    }
452
453    PredictedChainingCell newCell;
454
455    if (cell->clazz == NULL) {
456        newRechainCount = self->icRechainCount;
457    }
458
459    int relOffset = (int) tgtAddr - (int)cell;
460    OpndSize immSize = estOpndSizeFromImm(relOffset);
461    int jumpSize = getJmpCallInstSize(immSize, JmpCall_uncond);
462    relOffset -= jumpSize;
463    COMPILER_TRACE_CHAINING(
464            ALOGI("inlineCachePatchEnqueue chain %p to method %s%s inst size %d",
465                  cell, method->clazz->descriptor, method->name, jumpSize));
466    //can't use stream here since it is used by the compilation thread
467    dump_imm_with_codeaddr(Mnemonic_JMP, immSize, relOffset, (char*) (&newCell)); //update newCell.branch
468
469    newCell.clazz = clazz;
470    newCell.method = method;
471
472    /*
473     * Enter the work order to the queue and the chaining cell will be patched
474     * the next time a safe point is entered.
475     *
476     * If the enqueuing fails reset the rechain count to a normal value so that
477     * it won't get indefinitely delayed.
478     */
479    inlineCachePatchEnqueue(cell, &newCell);
480    }
481done:
482    self->icRechainCount = newRechainCount;
483    return method;
484}
485
486/*
487 * Unchain a trace given the starting address of the translation
488 * in the code cache.  Refer to the diagram in dvmCompilerAssembleLIR.
489 * For ARM, it returns the address following the last cell unchained.
490 * For IA, it returns NULL since cacheflush is not required for IA.
491 */
492u4* dvmJitUnchain(void* codeAddr)
493{
494    /* codeAddr is 4-byte aligned, so is chain cell count offset */
495    u2* pChainCellCountOffset = (u2*)((char*)codeAddr - 4);
496    u2 chainCellCountOffset = *pChainCellCountOffset;
497    /* chain cell counts information is 4-byte aligned */
498    ChainCellCounts *pChainCellCounts =
499          (ChainCellCounts*)((char*)codeAddr + chainCellCountOffset);
500    u2* pChainCellOffset = (u2*)((char*)codeAddr - 2);
501    u2 chainCellOffset = *pChainCellOffset;
502    u1* pChainCells;
503    int i,j;
504    PredictedChainingCell *predChainCell;
505    int padding;
506
507    /* Locate the beginning of the chain cell region */
508    pChainCells = (u1 *)((char*)codeAddr + chainCellOffset);
509
510    /* The cells are sorted in order - walk through them and reset */
511    for (i = 0; i < kChainingCellGap; i++) {
512        /* for hot, normal, singleton chaining:
513               nop  //padding.
514               jmp 0
515               mov imm32, reg1
516               mov imm32, reg2
517               call reg2
518           after chaining:
519               nop
520               jmp imm
521               mov imm32, reg1
522               mov imm32, reg2
523               call reg2
524           after unchaining:
525               nop
526               jmp 0
527               mov imm32, reg1
528               mov imm32, reg2
529               call reg2
530           Space occupied by the chaining cell in bytes: nop is for padding,
531                jump 0, the target 0 is 4 bytes aligned.
532           Space for predicted chaining: 5 words = 20 bytes
533        */
534        int elemSize = 0;
535        if (i == kChainingCellInvokePredicted) {
536            elemSize = 20;
537        }
538        COMPILER_TRACE_CHAINING(
539            ALOGI("Jit Runtime: unchaining type %d count %d", i, pChainCellCounts->u.count[i]));
540
541        for (j = 0; j < pChainCellCounts->u.count[i]; j++) {
542            switch(i) {
543                case kChainingCellNormal:
544                case kChainingCellHot:
545                case kChainingCellInvokeSingleton:
546                case kChainingCellBackwardBranch:
547                    COMPILER_TRACE_CHAINING(
548                        ALOGI("Jit Runtime: unchaining of normal, hot, or singleton"));
549                    pChainCells = (u1*) (((uint)pChainCells + 4)&(~0x03));
550                    elemSize = 4+5+5+2;
551                    memset(pChainCells, 0, 4);
552                    break;
553                case kChainingCellInvokePredicted:
554                    COMPILER_TRACE_CHAINING(
555                        ALOGI("Jit Runtime: unchaining of predicted"));
556                    /* 4-byte aligned */
557                    padding = (4 - ((u4)pChainCells & 3)) & 3;
558                    pChainCells += padding;
559                    predChainCell = (PredictedChainingCell *) pChainCells;
560                    /*
561                     * There could be a race on another mutator thread to use
562                     * this particular predicted cell and the check has passed
563                     * the clazz comparison. So we cannot safely wipe the
564                     * method and branch but it is safe to clear the clazz,
565                     * which serves as the key.
566                     */
567                    predChainCell->clazz = PREDICTED_CHAIN_CLAZZ_INIT;
568                    break;
569                default:
570                    ALOGE("Unexpected chaining type: %d", i);
571                    dvmAbort();  // dvmAbort OK here - can't safely recover
572            }
573            COMPILER_TRACE_CHAINING(
574                ALOGI("Jit Runtime: unchaining 0x%x", (int)pChainCells));
575            pChainCells += elemSize;  /* Advance by a fixed number of bytes */
576        }
577    }
578    return NULL;
579}
580
581/* Unchain all translation in the cache. */
582void dvmJitUnchainAll()
583{
584    ALOGV("Jit Runtime: unchaining all");
585    if (gDvmJit.pJitEntryTable != NULL) {
586        COMPILER_TRACE_CHAINING(ALOGI("Jit Runtime: unchaining all"));
587        dvmLockMutex(&gDvmJit.tableLock);
588
589        UNPROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
590
591        for (size_t i = 0; i < gDvmJit.jitTableSize; i++) {
592            if (gDvmJit.pJitEntryTable[i].dPC &&
593                !gDvmJit.pJitEntryTable[i].u.info.isMethodEntry &&
594                gDvmJit.pJitEntryTable[i].codeAddress) {
595                      dvmJitUnchain(gDvmJit.pJitEntryTable[i].codeAddress);
596            }
597        }
598
599        PROTECT_CODE_CACHE(gDvmJit.codeCache, gDvmJit.codeCacheByteUsed);
600
601        dvmUnlockMutex(&gDvmJit.tableLock);
602        gDvmJit.translationChains = 0;
603    }
604    gDvmJit.hasNewChain = false;
605}
606
607#define P_GPR_1 PhysicalReg_EBX
608/* Add an additional jump instruction, keep jump target 4 bytes aligned.*/
609static void insertJumpHelp()
610{
611    int rem = (uint)stream % 4;
612    int nop_size = 3 - rem;
613    dump_nop(nop_size);
614    unconditional_jump_int(0, OpndSize_32);
615    return;
616}
617
618/* Chaining cell for code that may need warmup. */
619/* ARM assembly: ldr r0, [r6, #76] (why a single instruction to access member of glue structure?)
620                 blx r0
621                 data 0xb23a //bytecode address: 0x5115b23a
622                 data 0x5115
623   IA32 assembly:
624                  jmp  0 //5 bytes
625                  movl address, %ebx
626                  movl dvmJitToInterpNormal, %eax
627                  call %eax
628                  <-- return address
629*/
630static void handleNormalChainingCell(CompilationUnit *cUnit,
631                                     unsigned int offset, int blockId, LowOpBlockLabel* labelList)
632{
633    ALOGV("in handleNormalChainingCell for method %s block %d BC offset %x NCG offset %x",
634          cUnit->method->name, blockId, offset, stream - streamMethodStart);
635    if(dump_x86_inst)
636        ALOGI("LOWER NormalChainingCell at offsetPC %x offsetNCG %x @%p",
637              offset, stream - streamMethodStart, stream);
638    /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
639     * reslove the multithreading issue.
640     */
641    insertJumpHelp();
642    move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true);
643    scratchRegs[0] = PhysicalReg_EAX;
644    call_dvmJitToInterpNormal();
645    //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */
646}
647
648/*
649 * Chaining cell for instructions that immediately following already translated
650 * code.
651 */
652static void handleHotChainingCell(CompilationUnit *cUnit,
653                                  unsigned int offset, int blockId, LowOpBlockLabel* labelList)
654{
655    ALOGV("in handleHotChainingCell for method %s block %d BC offset %x NCG offset %x",
656          cUnit->method->name, blockId, offset, stream - streamMethodStart);
657    if(dump_x86_inst)
658        ALOGI("LOWER HotChainingCell at offsetPC %x offsetNCG %x @%p",
659              offset, stream - streamMethodStart, stream);
660    /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
661     * reslove the multithreading issue.
662     */
663    insertJumpHelp();
664    move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true);
665    scratchRegs[0] = PhysicalReg_EAX;
666    call_dvmJitToInterpTraceSelect();
667    //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */
668}
669
670/* Chaining cell for branches that branch back into the same basic block */
671static void handleBackwardBranchChainingCell(CompilationUnit *cUnit,
672                                     unsigned int offset, int blockId, LowOpBlockLabel* labelList)
673{
674    ALOGV("in handleBackwardBranchChainingCell for method %s block %d BC offset %x NCG offset %x",
675          cUnit->method->name, blockId, offset, stream - streamMethodStart);
676    if(dump_x86_inst)
677        ALOGI("LOWER BackwardBranchChainingCell at offsetPC %x offsetNCG %x @%p",
678              offset, stream - streamMethodStart, stream);
679    /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
680     * reslove the multithreading issue.
681     */
682    insertJumpHelp();
683    move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true);
684    scratchRegs[0] = PhysicalReg_EAX;
685    call_dvmJitToInterpNormal();
686    //move_imm_to_reg(OpndSize_32, (int) (cUnit->method->insns + offset), P_GPR_1, true); /* used when unchaining */
687}
688
689/* Chaining cell for monomorphic method invocations. */
690static void handleInvokeSingletonChainingCell(CompilationUnit *cUnit,
691                                              const Method *callee, int blockId, LowOpBlockLabel* labelList)
692{
693    ALOGV("in handleInvokeSingletonChainingCell for method %s block %d callee %s NCG offset %x",
694          cUnit->method->name, blockId, callee->name, stream - streamMethodStart);
695    if(dump_x86_inst)
696        ALOGI("LOWER InvokeSingletonChainingCell at block %d offsetNCG %x @%p",
697              blockId, stream - streamMethodStart, stream);
698    /* Add one additional "jump 0" instruction, it may be modified during jit chaining. This helps
699     * reslove the multithreading issue.
700     */
701    insertJumpHelp();
702    move_imm_to_reg(OpndSize_32, (int) (callee->insns), P_GPR_1, true);
703    scratchRegs[0] = PhysicalReg_EAX;
704    call_dvmJitToInterpTraceSelect();
705    //move_imm_to_reg(OpndSize_32, (int) (callee->insns), P_GPR_1, true); /* used when unchaining */
706}
707#undef P_GPR_1
708
709/* Chaining cell for monomorphic method invocations. */
710static void handleInvokePredictedChainingCell(CompilationUnit *cUnit, int blockId)
711{
712    if(dump_x86_inst)
713        ALOGI("LOWER InvokePredictedChainingCell at block %d offsetNCG %x @%p",
714              blockId, stream - streamMethodStart, stream);
715#ifndef PREDICTED_CHAINING
716    //assume rPC for callee->insns in %ebx
717    scratchRegs[0] = PhysicalReg_EAX;
718    call_dvmJitToInterpTraceSelectNoChain();
719#else
720    /* make sure section for predicited chaining cell is 4-byte aligned */
721    //int padding = (4 - ((u4)stream & 3)) & 3;
722    //stream += padding;
723    int* streamData = (int*)stream;
724    /* Should not be executed in the initial state */
725    streamData[0] = PREDICTED_CHAIN_BX_PAIR_INIT;
726    streamData[1] = 0;
727    /* To be filled: class */
728    streamData[2] = PREDICTED_CHAIN_CLAZZ_INIT;
729    /* To be filled: method */
730    streamData[3] = PREDICTED_CHAIN_METHOD_INIT;
731    /*
732     * Rechain count. The initial value of 0 here will trigger chaining upon
733     * the first invocation of this callsite.
734     */
735    streamData[4] = PREDICTED_CHAIN_COUNTER_INIT;
736#if 0
737    ALOGI("--- DATA @ %p: %x %x %x %x", stream, *((int*)stream), *((int*)(stream+4)),
738          *((int*)(stream+8)), *((int*)(stream+12)));
739#endif
740    stream += 20; //5 *4
741#endif
742}
743
744/* Load the Dalvik PC into r0 and jump to the specified target */
745static void handlePCReconstruction(CompilationUnit *cUnit,
746                                   LowOpBlockLabel *targetLabel)
747{
748#if 0
749    LowOp **pcrLabel =
750        (LowOp **) cUnit->pcReconstructionList.elemList;
751    int numElems = cUnit->pcReconstructionList.numUsed;
752    int i;
753    for (i = 0; i < numElems; i++) {
754        dvmCompilerAppendLIR(cUnit, (LIR *) pcrLabel[i]);
755        /* r0 = dalvik PC */
756        loadConstant(cUnit, r0, pcrLabel[i]->operands[0]);
757        genUnconditionalBranch(cUnit, targetLabel);
758    }
759#endif
760}
761
762//use O0 code generator for hoisted checks outside of the loop
763/*
764 * vA = arrayReg;
765 * vB = idxReg;
766 * vC = endConditionReg;
767 * arg[0] = maxC
768 * arg[1] = minC
769 * arg[2] = loopBranchConditionCode
770 */
771#define P_GPR_1 PhysicalReg_EBX
772#define P_GPR_2 PhysicalReg_ECX
773static void genHoistedChecksForCountUpLoop(CompilationUnit *cUnit, MIR *mir)
774{
775    /*
776     * NOTE: these synthesized blocks don't have ssa names assigned
777     * for Dalvik registers.  However, because they dominate the following
778     * blocks we can simply use the Dalvik name w/ subscript 0 as the
779     * ssa name.
780     */
781    DecodedInstruction *dInsn = &mir->dalvikInsn;
782    const int maxC = dInsn->arg[0];
783
784    /* assign array in virtual register to P_GPR_1 */
785    get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true);
786    /* assign index in virtual register to P_GPR_2 */
787    get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, P_GPR_2, true);
788    export_pc();
789    compare_imm_reg(OpndSize_32, 0, P_GPR_1, true);
790    condJumpToBasicBlock(stream, Condition_E, cUnit->exceptionBlockId);
791    int delta = maxC;
792    /*
793     * If the loop end condition is ">=" instead of ">", then the largest value
794     * of the index is "endCondition - 1".
795     */
796    if (dInsn->arg[2] == OP_IF_GE) {
797        delta--;
798    }
799
800    if (delta < 0) { //+delta
801        //if P_GPR_2 is mapped to a VR, we can't do this
802        alu_binary_imm_reg(OpndSize_32, sub_opc, -delta, P_GPR_2, true);
803    } else if(delta > 0) {
804        alu_binary_imm_reg(OpndSize_32, add_opc, delta, P_GPR_2, true);
805    }
806    compare_mem_reg(OpndSize_32, offArrayObject_length, P_GPR_1, true, P_GPR_2, true);
807    condJumpToBasicBlock(stream, Condition_NC, cUnit->exceptionBlockId);
808}
809
810/*
811 * vA = arrayReg;
812 * vB = idxReg;
813 * vC = endConditionReg;
814 * arg[0] = maxC
815 * arg[1] = minC
816 * arg[2] = loopBranchConditionCode
817 */
818static void genHoistedChecksForCountDownLoop(CompilationUnit *cUnit, MIR *mir)
819{
820    DecodedInstruction *dInsn = &mir->dalvikInsn;
821    const int maxC = dInsn->arg[0];
822
823    /* assign array in virtual register to P_GPR_1 */
824    get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true);
825    /* assign index in virtual register to P_GPR_2 */
826    get_virtual_reg(mir->dalvikInsn.vB, OpndSize_32, P_GPR_2, true);
827    export_pc();
828    compare_imm_reg(OpndSize_32, 0, P_GPR_1, true);
829    condJumpToBasicBlock(stream, Condition_E, cUnit->exceptionBlockId);
830
831    if (maxC < 0) {
832        //if P_GPR_2 is mapped to a VR, we can't do this
833        alu_binary_imm_reg(OpndSize_32, sub_opc, -maxC, P_GPR_2, true);
834    } else if(maxC > 0) {
835        alu_binary_imm_reg(OpndSize_32, add_opc, maxC, P_GPR_2, true);
836    }
837    compare_mem_reg(OpndSize_32, offArrayObject_length, P_GPR_1, true, P_GPR_2, true);
838    condJumpToBasicBlock(stream, Condition_NC, cUnit->exceptionBlockId);
839
840}
841#undef P_GPR_1
842#undef P_GPR_2
843
844/*
845 * vA = idxReg;
846 * vB = minC;
847 */
848#define P_GPR_1 PhysicalReg_ECX
849static void genHoistedLowerBoundCheck(CompilationUnit *cUnit, MIR *mir)
850{
851    DecodedInstruction *dInsn = &mir->dalvikInsn;
852    const int minC = dInsn->vB;
853    get_virtual_reg(mir->dalvikInsn.vA, OpndSize_32, P_GPR_1, true); //array
854    export_pc();
855    compare_imm_reg(OpndSize_32, -minC, P_GPR_1, true);
856    condJumpToBasicBlock(stream, Condition_C, cUnit->exceptionBlockId);
857}
858#undef P_GPR_1
859
860#ifdef WITH_JIT_INLINING
861static void genValidationForPredictedInline(CompilationUnit *cUnit, MIR *mir)
862{
863    CallsiteInfo *callsiteInfo = mir->meta.callsiteInfo;
864    if(gDvm.executionMode == kExecutionModeNcgO0) {
865        get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, PhysicalReg_EBX, true);
866        move_imm_to_reg(OpndSize_32, (int) callsiteInfo->clazz, PhysicalReg_ECX, true);
867        compare_imm_reg(OpndSize_32, 0, PhysicalReg_EBX, true);
868        export_pc(); //use %edx
869        conditional_jump_global_API(, Condition_E, "common_errNullObject", false);
870        move_mem_to_reg(OpndSize_32, offObject_clazz, PhysicalReg_EBX, true, PhysicalReg_EAX, true);
871        compare_reg_reg(PhysicalReg_ECX, true, PhysicalReg_EAX, true);
872    } else {
873        get_virtual_reg(mir->dalvikInsn.vC, OpndSize_32, 5, false);
874        move_imm_to_reg(OpndSize_32, (int) callsiteInfo->clazz, 4, false);
875        nullCheck(5, false, 1, mir->dalvikInsn.vC);
876        move_mem_to_reg(OpndSize_32, offObject_clazz, 5, false, 6, false);
877        compare_reg_reg(4, false, 6, false);
878    }
879
880    //immdiate will be updated later in genLandingPadForMispredictedCallee
881    streamMisPred = stream;
882    callsiteInfo->misPredBranchOver = (LIR*)conditional_jump_int(Condition_NE, 0, OpndSize_8);
883}
884#endif
885
886/* Extended MIR instructions like PHI */
887void handleExtendedMIR(CompilationUnit *cUnit, MIR *mir)
888{
889    ExecutionMode origMode = gDvm.executionMode;
890    gDvm.executionMode = kExecutionModeNcgO0;
891    switch ((ExtendedMIROpcode)mir->dalvikInsn.opcode) {
892        case kMirOpPhi: {
893            break;
894        }
895        case kMirOpNullNRangeUpCheck: {
896            genHoistedChecksForCountUpLoop(cUnit, mir);
897            break;
898        }
899        case kMirOpNullNRangeDownCheck: {
900            genHoistedChecksForCountDownLoop(cUnit, mir);
901            break;
902        }
903        case kMirOpLowerBound: {
904            genHoistedLowerBoundCheck(cUnit, mir);
905            break;
906        }
907        case kMirOpPunt: {
908            break;
909        }
910#ifdef WITH_JIT_INLINING
911        case kMirOpCheckInlinePrediction: { //handled in ncg_o1_data.c
912            genValidationForPredictedInline(cUnit, mir);
913            break;
914        }
915#endif
916        default:
917            break;
918    }
919    gDvm.executionMode = origMode;
920}
921
922static void setupLoopEntryBlock(CompilationUnit *cUnit, BasicBlock *entry,
923                                int bodyId)
924{
925    /*
926     * Next, create two branches - one branch over to the loop body and the
927     * other branch to the PCR cell to punt.
928     */
929    //LowOp* branchToBody = jumpToBasicBlock(stream, bodyId);
930    //setupResourceMasks(branchToBody);
931    //cUnit->loopAnalysis->branchToBody = ((LIR*)branchToBody);
932
933#if 0
934    LowOp *branchToPCR = dvmCompilerNew(sizeof(ArmLIR), true);
935    branchToPCR->opCode = kThumbBUncond;
936    branchToPCR->generic.target = (LIR *) pcrLabel;
937    setupResourceMasks(branchToPCR);
938    cUnit->loopAnalysis->branchToPCR = (LIR *) branchToPCR;
939#endif
940}
941
942/* check whether we can merge the block at index i with its target block */
943bool mergeBlock(BasicBlock *bb) {
944    if(bb->blockType == kDalvikByteCode &&
945       bb->firstMIRInsn != NULL &&
946       (bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO_16 ||
947        bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO ||
948        bb->lastMIRInsn->dalvikInsn.opcode == OP_GOTO_32) &&
949       bb->fallThrough == NULL) {// &&
950       //cUnit->hasLoop) {
951        //ALOGI("merge blocks ending with goto at index %d", i);
952        MIR* prevInsn = bb->lastMIRInsn->prev;
953        if(bb->taken == NULL) return false;
954        MIR* mergeInsn = bb->taken->firstMIRInsn;
955        if(mergeInsn == NULL) return false;
956        if(prevInsn == NULL) {//the block has a single instruction
957            bb->firstMIRInsn = mergeInsn;
958        } else {
959            prevInsn->next = mergeInsn; //remove goto from the chain
960        }
961        mergeInsn->prev = prevInsn;
962        bb->lastMIRInsn = bb->taken->lastMIRInsn;
963        bb->taken->firstMIRInsn = NULL; //block being merged in
964        bb->fallThrough = bb->taken->fallThrough;
965        bb->taken = bb->taken->taken;
966        return true;
967    }
968    return false;
969}
970
971static int genTraceProfileEntry(CompilationUnit *cUnit)
972{
973    cUnit->headerSize = 6;
974    if ((gDvmJit.profileMode == kTraceProfilingContinuous) ||
975        (gDvmJit.profileMode == kTraceProfilingDisabled)) {
976        return 12;
977    } else {
978        return 4;
979    }
980
981}
982
983#define PRINT_BUFFER_LEN 1024
984/* Print the code block in code cache in the range of [startAddr, endAddr)
985 * in readable format.
986 */
987void printEmittedCodeBlock(unsigned char *startAddr, unsigned char *endAddr)
988{
989    char strbuf[PRINT_BUFFER_LEN];
990    unsigned char *addr;
991    unsigned char *next_addr;
992    int n;
993
994    if (gDvmJit.printBinary) {
995        // print binary in bytes
996        n = 0;
997        for (addr = startAddr; addr < endAddr; addr++) {
998            n += snprintf(&strbuf[n], PRINT_BUFFER_LEN-n, "0x%x, ", *addr);
999            if (n > PRINT_BUFFER_LEN - 10) {
1000                ALOGD("## %s", strbuf);
1001                n = 0;
1002            }
1003        }
1004        if (n > 0)
1005            ALOGD("## %s", strbuf);
1006    }
1007
1008    // print disassembled instructions
1009    addr = startAddr;
1010    while (addr < endAddr) {
1011        next_addr = reinterpret_cast<unsigned char*>
1012            (decoder_disassemble_instr(reinterpret_cast<char*>(addr),
1013                                       strbuf, PRINT_BUFFER_LEN));
1014        if (addr != next_addr) {
1015            ALOGD("**  %p: %s", addr, strbuf);
1016        } else {                // check whether this is nop padding
1017            if (addr[0] == 0x90) {
1018                ALOGD("**  %p: NOP (1 byte)", addr);
1019                next_addr += 1;
1020            } else if (addr[0] == 0x66 && addr[1] == 0x90) {
1021                ALOGD("**  %p: NOP (2 bytes)", addr);
1022                next_addr += 2;
1023            } else if (addr[0] == 0x0f && addr[1] == 0x1f && addr[2] == 0x00) {
1024                ALOGD("**  %p: NOP (3 bytes)", addr);
1025                next_addr += 3;
1026            } else {
1027                ALOGD("** unable to decode binary at %p", addr);
1028                break;
1029            }
1030        }
1031        addr = next_addr;
1032    }
1033}
1034
1035/* 4 is the number of additional bytes needed for chaining information for trace:
1036 * 2 bytes for chaining cell count offset and 2 bytes for chaining cell offset */
1037#define EXTRA_BYTES_FOR_CHAINING 4
1038
1039/* Entry function to invoke the backend of the JIT compiler */
1040void dvmCompilerMIR2LIR(CompilationUnit *cUnit, JitTranslationInfo *info)
1041{
1042    dump_x86_inst = cUnit->printMe;
1043    /* Used to hold the labels of each block */
1044    LowOpBlockLabel *labelList =
1045        (LowOpBlockLabel *)dvmCompilerNew(sizeof(LowOpBlockLabel) * cUnit->numBlocks, true); //Utility.c
1046    LowOp *headLIR = NULL;
1047    GrowableList chainingListByType[kChainingCellLast];
1048    unsigned int i, padding;
1049
1050    /*
1051     * Initialize various types chaining lists.
1052     */
1053    for (i = 0; i < kChainingCellLast; i++) {
1054        dvmInitGrowableList(&chainingListByType[i], 2);
1055    }
1056
1057    /* Clear the visited flag for each block */
1058    dvmCompilerDataFlowAnalysisDispatcher(cUnit, dvmCompilerClearVisitedFlag,
1059                                          kAllNodes, false /* isIterative */);
1060
1061    GrowableListIterator iterator;
1062    dvmGrowableListIteratorInit(&cUnit->blockList, &iterator);
1063
1064    /* Traces start with a profiling entry point.  Generate it here */
1065    cUnit->profileCodeSize = genTraceProfileEntry(cUnit);
1066
1067    //BasicBlock **blockList = cUnit->blockList;
1068    GrowableList *blockList = &cUnit->blockList;
1069    BasicBlock *bb;
1070
1071    info->codeAddress = NULL;
1072    stream = (char*)gDvmJit.codeCache + gDvmJit.codeCacheByteUsed;
1073
1074    // TODO: compile into a temporary buffer and then copy into the code cache.
1075    // That would let us leave the code cache unprotected for a shorter time.
1076    size_t unprotected_code_cache_bytes =
1077            gDvmJit.codeCacheSize - gDvmJit.codeCacheByteUsed - CODE_CACHE_PADDING;
1078    UNPROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1079
1080    streamStart = stream; /* trace start before alignment */
1081    stream += EXTRA_BYTES_FOR_CHAINING; /* This is needed for chaining. Add the bytes before the alignment */
1082    stream = (char*)(((unsigned int)stream + 0xF) & ~0xF); /* Align trace to 16-bytes */
1083    streamMethodStart = stream; /* code start */
1084    for (i = 0; i < ((unsigned int) cUnit->numBlocks); i++) {
1085        labelList[i].lop.generic.offset = -1;
1086    }
1087    cUnit->exceptionBlockId = -1;
1088    for (i = 0; i < blockList->numUsed; i++) {
1089        bb = (BasicBlock *) blockList->elemList[i];
1090        if(bb->blockType == kExceptionHandling)
1091            cUnit->exceptionBlockId = i;
1092    }
1093    startOfTrace(cUnit->method, labelList, cUnit->exceptionBlockId, cUnit);
1094    if(gDvm.executionMode == kExecutionModeNcgO1) {
1095        //merge blocks ending with "goto" with the fall through block
1096        if (cUnit->jitMode != kJitLoop)
1097            for (i = 0; i < blockList->numUsed; i++) {
1098                bb = (BasicBlock *) blockList->elemList[i];
1099                bool merged = mergeBlock(bb);
1100                while(merged) merged = mergeBlock(bb);
1101            }
1102        for (i = 0; i < blockList->numUsed; i++) {
1103            bb = (BasicBlock *) blockList->elemList[i];
1104            if(bb->blockType == kDalvikByteCode &&
1105               bb->firstMIRInsn != NULL) {
1106                preprocessingBB(bb);
1107            }
1108        }
1109        preprocessingTrace();
1110    }
1111
1112    /* Handle the content in each basic block */
1113    for (i = 0; ; i++) {
1114        MIR *mir;
1115        bb = (BasicBlock *) dvmGrowableListIteratorNext(&iterator);
1116        if (bb == NULL) break;
1117        if (bb->visited == true) continue;
1118
1119        labelList[i].immOpnd.value = bb->startOffset;
1120
1121        if (bb->blockType >= kChainingCellLast) {
1122            /*
1123             * Append the label pseudo LIR first. Chaining cells will be handled
1124             * separately afterwards.
1125             */
1126            dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[i]);
1127        }
1128
1129        if (bb->blockType == kEntryBlock) {
1130            labelList[i].lop.opCode2 = ATOM_PSEUDO_ENTRY_BLOCK;
1131            if (bb->firstMIRInsn == NULL) {
1132                continue;
1133            } else {
1134              setupLoopEntryBlock(cUnit, bb, bb->fallThrough->id);
1135                                  //&labelList[blockList[i]->fallThrough->id]);
1136            }
1137        } else if (bb->blockType == kExitBlock) {
1138            labelList[i].lop.opCode2 = ATOM_PSEUDO_EXIT_BLOCK;
1139            labelList[i].lop.generic.offset = (stream - streamMethodStart);
1140            goto gen_fallthrough;
1141        } else if (bb->blockType == kDalvikByteCode) {
1142            if (bb->hidden == true) continue;
1143            labelList[i].lop.opCode2 = ATOM_PSEUDO_NORMAL_BLOCK_LABEL;
1144            /* Reset the register state */
1145#if 0
1146            resetRegisterScoreboard(cUnit);
1147#endif
1148        } else {
1149            switch (bb->blockType) {
1150                case kChainingCellNormal:
1151                    labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_NORMAL;
1152                    /* handle the codegen later */
1153                    dvmInsertGrowableList(
1154                        &chainingListByType[kChainingCellNormal], i);
1155                    break;
1156                case kChainingCellInvokeSingleton:
1157                    labelList[i].lop.opCode2 =
1158                        ATOM_PSEUDO_CHAINING_CELL_INVOKE_SINGLETON;
1159                    labelList[i].immOpnd.value =
1160                        (int) bb->containingMethod;
1161                    /* handle the codegen later */
1162                    dvmInsertGrowableList(
1163                        &chainingListByType[kChainingCellInvokeSingleton], i);
1164                    break;
1165                case kChainingCellInvokePredicted:
1166                    labelList[i].lop.opCode2 =
1167                        ATOM_PSEUDO_CHAINING_CELL_INVOKE_PREDICTED;
1168                   /*
1169                     * Move the cached method pointer from operand 1 to 0.
1170                     * Operand 0 was clobbered earlier in this routine to store
1171                     * the block starting offset, which is not applicable to
1172                     * predicted chaining cell.
1173                     */
1174                    //TODO
1175                    //labelList[i].operands[0] = labelList[i].operands[1];
1176
1177                    /* handle the codegen later */
1178                    dvmInsertGrowableList(
1179                        &chainingListByType[kChainingCellInvokePredicted], i);
1180                    break;
1181                case kChainingCellHot:
1182                    labelList[i].lop.opCode2 =
1183                        ATOM_PSEUDO_CHAINING_CELL_HOT;
1184                    /* handle the codegen later */
1185                    dvmInsertGrowableList(
1186                        &chainingListByType[kChainingCellHot], i);
1187                    break;
1188                case kPCReconstruction:
1189                    /* Make sure exception handling block is next */
1190                    labelList[i].lop.opCode2 =
1191                        ATOM_PSEUDO_PC_RECONSTRUCTION_BLOCK_LABEL;
1192                    //assert (i == cUnit->numBlocks - 2);
1193                    labelList[i].lop.generic.offset = (stream - streamMethodStart);
1194                    handlePCReconstruction(cUnit,
1195                                           &labelList[cUnit->puntBlock->id]);
1196                    break;
1197                case kExceptionHandling:
1198                    labelList[i].lop.opCode2 = ATOM_PSEUDO_EH_BLOCK_LABEL;
1199                    labelList[i].lop.generic.offset = (stream - streamMethodStart);
1200                    //if (cUnit->pcReconstructionList.numUsed) {
1201                        scratchRegs[0] = PhysicalReg_EAX;
1202                        jumpToInterpPunt();
1203                        //call_dvmJitToInterpPunt();
1204                    //}
1205                    break;
1206                case kChainingCellBackwardBranch:
1207                    labelList[i].lop.opCode2 = ATOM_PSEUDO_CHAINING_CELL_BACKWARD_BRANCH;
1208                    /* handle the codegen later */
1209                    dvmInsertGrowableList(
1210                        &chainingListByType[kChainingCellBackwardBranch],
1211                        i);
1212                    break;
1213                default:
1214                    break;
1215            }
1216            continue;
1217        }
1218        {
1219        //LowOp *headLIR = NULL;
1220        const DexCode *dexCode = dvmGetMethodCode(cUnit->method);
1221        const u2 *startCodePtr = dexCode->insns;
1222        const u2 *codePtr;
1223        labelList[i].lop.generic.offset = (stream - streamMethodStart);
1224        ALOGV("get ready to handle JIT bb %d type %d hidden %d",
1225              bb->id, bb->blockType, bb->hidden);
1226        for (BasicBlock *nextBB = bb; nextBB != NULL; nextBB = cUnit->nextCodegenBlock) {
1227            bb = nextBB;
1228            bb->visited = true;
1229            cUnit->nextCodegenBlock = NULL;
1230
1231        if(gDvm.executionMode == kExecutionModeNcgO1 &&
1232           bb->blockType != kEntryBlock &&
1233           bb->firstMIRInsn != NULL) {
1234            startOfBasicBlock(bb);
1235            int cg_ret = codeGenBasicBlockJit(cUnit->method, bb);
1236            endOfBasicBlock(bb);
1237            if(cg_ret < 0) {
1238                endOfTrace(true/*freeOnly*/);
1239                cUnit->baseAddr = NULL;
1240                ALOGI("codeGenBasicBlockJit returns negative number");
1241                PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1242                return;
1243            }
1244        } else {
1245        for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
1246            startOfBasicBlock(bb); //why here for O0
1247            Opcode dalvikOpCode = mir->dalvikInsn.opcode;
1248            if((int)dalvikOpCode >= (int)kMirOpFirst) {
1249                handleExtendedMIR(cUnit, mir);
1250                continue;
1251            }
1252            InstructionFormat dalvikFormat =
1253                dexGetFormatFromOpcode(dalvikOpCode);
1254            ALOGV("ready to handle bytecode at offset %x: opcode %d format %d",
1255                  mir->offset, dalvikOpCode, dalvikFormat);
1256            LowOpImm *boundaryLIR = dump_special(ATOM_PSEUDO_DALVIK_BYTECODE_BOUNDARY, mir->offset);
1257            /* Remember the first LIR for this block */
1258            if (headLIR == NULL) {
1259                headLIR = (LowOp*)boundaryLIR;
1260            }
1261            bool notHandled = true;
1262            /*
1263             * Debugging: screen the opcode first to see if it is in the
1264             * do[-not]-compile list
1265             */
1266            bool singleStepMe =
1267                gDvmJit.includeSelectedOp !=
1268                ((gDvmJit.opList[dalvikOpCode >> 3] &
1269                  (1 << (dalvikOpCode & 0x7))) !=
1270                 0);
1271            if (singleStepMe || cUnit->allSingleStep) {
1272            } else {
1273                codePtr = startCodePtr + mir->offset;
1274                //lower each byte code, update LIR
1275                notHandled = lowerByteCodeJit(cUnit->method, cUnit->method->insns+mir->offset, mir);
1276                if(gDvmJit.codeCacheByteUsed + (stream - streamStart) +
1277                   CODE_CACHE_PADDING > gDvmJit.codeCacheSize) {
1278                    ALOGI("JIT code cache full after lowerByteCodeJit (trace uses %uB)", (stream - streamStart));
1279                    gDvmJit.codeCacheFull = true;
1280                    cUnit->baseAddr = NULL;
1281                    endOfTrace(true/*freeOnly*/);
1282                    PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1283                    return;
1284                }
1285            }
1286            if (notHandled) {
1287                ALOGE("%#06x: Opcode 0x%x (%s) / Fmt %d not handled",
1288                     mir->offset,
1289                     dalvikOpCode, dexGetOpcodeName(dalvikOpCode),
1290                     dalvikFormat);
1291                dvmAbort();
1292                break;
1293            }
1294        } // end for
1295        } // end else //JIT + O0 code generator
1296        }
1297        } // end for
1298        /* Eliminate redundant loads/stores and delay stores into later slots */
1299#if 0
1300        dvmCompilerApplyLocalOptimizations(cUnit, (LIR *) headLIR,
1301                                           cUnit->lastLIRInsn);
1302#endif
1303        if (headLIR) headLIR = NULL;
1304gen_fallthrough:
1305        /*
1306         * Check if the block is terminated due to trace length constraint -
1307         * insert an unconditional branch to the chaining cell.
1308         */
1309        if (bb->needFallThroughBranch) {
1310            jumpToBasicBlock(stream, bb->fallThrough->id);
1311        }
1312
1313    }
1314
1315    char* streamChainingStart = (char*)stream;
1316    /* Handle the chaining cells in predefined order */
1317    for (i = 0; i < kChainingCellGap; i++) {
1318        size_t j;
1319        int *blockIdList = (int *) chainingListByType[i].elemList;
1320
1321        cUnit->numChainingCells[i] = chainingListByType[i].numUsed;
1322
1323        /* No chaining cells of this type */
1324        if (cUnit->numChainingCells[i] == 0)
1325            continue;
1326
1327        /* Record the first LIR for a new type of chaining cell */
1328        cUnit->firstChainingLIR[i] = (LIR *) &labelList[blockIdList[0]];
1329        for (j = 0; j < chainingListByType[i].numUsed; j++) {
1330            int blockId = blockIdList[j];
1331            BasicBlock *chainingBlock =
1332                (BasicBlock *) dvmGrowableListGetElement(&cUnit->blockList,
1333                                                         blockId);
1334
1335            labelList[blockId].lop.generic.offset = (stream - streamMethodStart);
1336
1337            /* Align this chaining cell first */
1338#if 0
1339            newLIR0(cUnit, ATOM_PSEUDO_ALIGN4);
1340#endif
1341            /* Insert the pseudo chaining instruction */
1342            dvmCompilerAppendLIR(cUnit, (LIR *) &labelList[blockId]);
1343
1344
1345            switch (chainingBlock->blockType) {
1346                case kChainingCellNormal:
1347                    handleNormalChainingCell(cUnit,
1348                     chainingBlock->startOffset, blockId, labelList);
1349                    break;
1350                case kChainingCellInvokeSingleton:
1351                    handleInvokeSingletonChainingCell(cUnit,
1352                        chainingBlock->containingMethod, blockId, labelList);
1353                    break;
1354                case kChainingCellInvokePredicted:
1355                    handleInvokePredictedChainingCell(cUnit, blockId);
1356                    break;
1357                case kChainingCellHot:
1358                    handleHotChainingCell(cUnit,
1359                        chainingBlock->startOffset, blockId, labelList);
1360                    break;
1361                case kChainingCellBackwardBranch:
1362                    handleBackwardBranchChainingCell(cUnit,
1363                        chainingBlock->startOffset, blockId, labelList);
1364                    break;
1365                default:
1366                    ALOGE("Bad blocktype %d", chainingBlock->blockType);
1367                    dvmAbort();
1368                    break;
1369            }
1370
1371            if (gDvmJit.codeCacheByteUsed + (stream - streamStart) + CODE_CACHE_PADDING > gDvmJit.codeCacheSize) {
1372                ALOGI("JIT code cache full after ChainingCell (trace uses %uB)", (stream - streamStart));
1373                gDvmJit.codeCacheFull = true;
1374                cUnit->baseAddr = NULL;
1375                endOfTrace(true); /* need to free structures */
1376                PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1377                return;
1378            }
1379        }
1380    }
1381#if 0
1382    dvmCompilerApplyGlobalOptimizations(cUnit);
1383#endif
1384    endOfTrace(false);
1385
1386    if (gDvmJit.codeCacheFull) {
1387        /* We hit code cache size limit inside endofTrace(false).
1388         * Bail out for this trace!
1389         */
1390        ALOGI("JIT code cache full after endOfTrace (trace uses %uB)", (stream - streamStart));
1391        cUnit->baseAddr = NULL;
1392        PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1393        return;
1394    }
1395
1396    /* dump section for chaining cell counts, make sure it is 4-byte aligned */
1397    padding = (4 - ((u4)stream & 3)) & 3;
1398    stream += padding;
1399    ChainCellCounts chainCellCounts;
1400    /* Install the chaining cell counts */
1401    for (i=0; i< kChainingCellGap; i++) {
1402        chainCellCounts.u.count[i] = cUnit->numChainingCells[i];
1403    }
1404    char* streamCountStart = (char*)stream;
1405    memcpy((char*)stream, &chainCellCounts, sizeof(chainCellCounts));
1406    stream += sizeof(chainCellCounts);
1407
1408    cUnit->baseAddr = streamMethodStart;
1409    cUnit->totalSize = (stream - streamStart);
1410    if(gDvmJit.codeCacheByteUsed + cUnit->totalSize + CODE_CACHE_PADDING > gDvmJit.codeCacheSize) {
1411        ALOGI("JIT code cache full after ChainingCellCounts (trace uses %uB)", (stream - streamStart));
1412        gDvmJit.codeCacheFull = true;
1413        cUnit->baseAddr = NULL;
1414        PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1415        return;
1416    }
1417
1418    /* write chaining cell count offset & chaining cell offset */
1419    u2* pOffset = (u2*)(streamMethodStart - EXTRA_BYTES_FOR_CHAINING); /* space was already allocated for this purpose */
1420    *pOffset = streamCountStart - streamMethodStart; /* from codeAddr */
1421    pOffset[1] = streamChainingStart - streamMethodStart;
1422
1423    PROTECT_CODE_CACHE(stream, unprotected_code_cache_bytes);
1424
1425    gDvmJit.codeCacheByteUsed += (stream - streamStart);
1426    if (cUnit->printMe) {
1427        unsigned char* codeBaseAddr = (unsigned char *) cUnit->baseAddr;
1428        unsigned char* codeBaseAddrNext = ((unsigned char *) gDvmJit.codeCache) + gDvmJit.codeCacheByteUsed;
1429        ALOGD("-------- Built trace for %s%s, JIT code [%p, %p) cache start %p",
1430              cUnit->method->clazz->descriptor, cUnit->method->name,
1431              codeBaseAddr, codeBaseAddrNext, gDvmJit.codeCache);
1432        ALOGD("** %s%s@0x%x:", cUnit->method->clazz->descriptor,
1433              cUnit->method->name, cUnit->traceDesc->trace[0].info.frag.startOffset);
1434        printEmittedCodeBlock(codeBaseAddr, codeBaseAddrNext);
1435    }
1436    ALOGV("JIT CODE after trace %p to %p size %x START %p", cUnit->baseAddr,
1437          (char *) gDvmJit.codeCache + gDvmJit.codeCacheByteUsed,
1438          cUnit->totalSize, gDvmJit.codeCache);
1439
1440    gDvmJit.numCompilations++;
1441
1442    info->codeAddress = (char*)cUnit->baseAddr;// + cUnit->headerSize;
1443}
1444
1445/*
1446 * Perform translation chain operation.
1447 */
1448void* dvmJitChain(void* tgtAddr, u4* branchAddr)
1449{
1450#ifdef JIT_CHAIN
1451    int relOffset = (int) tgtAddr - (int)branchAddr;
1452
1453    if ((gDvmJit.pProfTable != NULL) && (gDvm.sumThreadSuspendCount == 0) &&
1454        (gDvmJit.codeCacheFull == false)) {
1455
1456        gDvmJit.translationChains++;
1457
1458        //OpndSize immSize = estOpndSizeFromImm(relOffset);
1459        //relOffset -= getJmpCallInstSize(immSize, JmpCall_uncond);
1460        /* Hard coded the jump opnd size to 32 bits, This instruction will replace the "jump 0" in
1461         * the original code sequence.
1462         */
1463        OpndSize immSize = OpndSize_32;
1464        relOffset -= 5;
1465        //can't use stream here since it is used by the compilation thread
1466        UNPROTECT_CODE_CACHE(branchAddr, sizeof(*branchAddr));
1467        dump_imm_with_codeaddr(Mnemonic_JMP, immSize, relOffset, (char*)branchAddr); //dump to branchAddr
1468        PROTECT_CODE_CACHE(branchAddr, sizeof(*branchAddr));
1469
1470        gDvmJit.hasNewChain = true;
1471
1472        COMPILER_TRACE_CHAINING(
1473            ALOGI("Jit Runtime: chaining 0x%x to %p with relOffset %x",
1474                  (int) branchAddr, tgtAddr, relOffset));
1475    }
1476#endif
1477    return tgtAddr;
1478}
1479
1480/*
1481 * Accept the work and start compiling.  Returns true if compilation
1482 * is attempted.
1483 */
1484bool dvmCompilerDoWork(CompilerWorkOrder *work)
1485{
1486    JitTraceDescription *desc;
1487    bool isCompile;
1488    bool success = true;
1489
1490    if (gDvmJit.codeCacheFull) {
1491        return false;
1492    }
1493
1494    switch (work->kind) {
1495        case kWorkOrderTrace:
1496            isCompile = true;
1497            /* Start compilation with maximally allowed trace length */
1498            desc = (JitTraceDescription *)work->info;
1499            success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result,
1500                                        work->bailPtr, 0 /* no hints */);
1501            break;
1502        case kWorkOrderTraceDebug: {
1503            bool oldPrintMe = gDvmJit.printMe;
1504            gDvmJit.printMe = true;
1505            isCompile = true;
1506            /* Start compilation with maximally allowed trace length */
1507            desc = (JitTraceDescription *)work->info;
1508            success = dvmCompileTrace(desc, JIT_MAX_TRACE_LEN, &work->result,
1509                                        work->bailPtr, 0 /* no hints */);
1510            gDvmJit.printMe = oldPrintMe;
1511            break;
1512        }
1513        case kWorkOrderProfileMode:
1514            dvmJitChangeProfileMode((TraceProfilingModes)(int)work->info);
1515            isCompile = false;
1516            break;
1517        default:
1518            isCompile = false;
1519            ALOGE("Jit: unknown work order type");
1520            assert(0);  // Bail if debug build, discard otherwise
1521    }
1522    if (!success)
1523        work->result.codeAddress = NULL;
1524    return isCompile;
1525}
1526
1527void dvmCompilerCacheFlush(long start, long end, long flags) {
1528  /* cacheflush is needed for ARM, but not for IA32 (coherent icache) */
1529}
1530
1531//#endif
1532