1/* libs/pixelflinger/codeflinger/ARMAssembler.cpp
2**
3** Copyright 2006, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9**     http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#define LOG_TAG "ARMAssembler"
19
20#include <stdio.h>
21#include <stdlib.h>
22#include <cutils/log.h>
23#include <cutils/properties.h>
24
25#if defined(WITH_LIB_HARDWARE)
26#include <hardware_legacy/qemu_tracing.h>
27#endif
28
29#include <private/pixelflinger/ggl_context.h>
30
31#include "codeflinger/ARMAssembler.h"
32#include "codeflinger/CodeCache.h"
33#include "codeflinger/disassem.h"
34
35// ----------------------------------------------------------------------------
36
37namespace android {
38
39// ----------------------------------------------------------------------------
40#if 0
41#pragma mark -
42#pragma mark ARMAssembler...
43#endif
44
45ARMAssembler::ARMAssembler(const sp<Assembly>& assembly)
46    :   ARMAssemblerInterface(),
47        mAssembly(assembly)
48{
49    mBase = mPC = (uint32_t *)assembly->base();
50    mDuration = ggl_system_time();
51#if defined(WITH_LIB_HARDWARE)
52    mQemuTracing = true;
53#endif
54}
55
56ARMAssembler::~ARMAssembler()
57{
58}
59
60uint32_t* ARMAssembler::pc() const
61{
62    return mPC;
63}
64
65uint32_t* ARMAssembler::base() const
66{
67    return mBase;
68}
69
70void ARMAssembler::reset()
71{
72    mBase = mPC = (uint32_t *)mAssembly->base();
73    mBranchTargets.clear();
74    mLabels.clear();
75    mLabelsInverseMapping.clear();
76    mComments.clear();
77}
78
79int ARMAssembler::getCodegenArch()
80{
81    return CODEGEN_ARCH_ARM;
82}
83
84// ----------------------------------------------------------------------------
85
86void ARMAssembler::disassemble(const char* name)
87{
88    if (name) {
89        printf("%s:\n", name);
90    }
91    size_t count = pc()-base();
92    uint32_t* i = base();
93    while (count--) {
94        ssize_t label = mLabelsInverseMapping.indexOfKey(i);
95        if (label >= 0) {
96            printf("%s:\n", mLabelsInverseMapping.valueAt(label));
97        }
98        ssize_t comment = mComments.indexOfKey(i);
99        if (comment >= 0) {
100            printf("; %s\n", mComments.valueAt(comment));
101        }
102        printf("%08x:    %08x    ", int(i), int(i[0]));
103        ::disassemble((u_int)i);
104        i++;
105    }
106}
107
108void ARMAssembler::comment(const char* string)
109{
110    mComments.add(mPC, string);
111}
112
113void ARMAssembler::label(const char* theLabel)
114{
115    mLabels.add(theLabel, mPC);
116    mLabelsInverseMapping.add(mPC, theLabel);
117}
118
119void ARMAssembler::B(int cc, const char* label)
120{
121    mBranchTargets.add(branch_target_t(label, mPC));
122    *mPC++ = (cc<<28) | (0xA<<24) | 0;
123}
124
125void ARMAssembler::BL(int cc, const char* label)
126{
127    mBranchTargets.add(branch_target_t(label, mPC));
128    *mPC++ = (cc<<28) | (0xB<<24) | 0;
129}
130
131#if 0
132#pragma mark -
133#pragma mark Prolog/Epilog & Generate...
134#endif
135
136
137void ARMAssembler::prolog()
138{
139    // write dummy prolog code
140    mPrologPC = mPC;
141    STM(AL, FD, SP, 1, LSAVED);
142}
143
144void ARMAssembler::epilog(uint32_t touched)
145{
146    touched &= LSAVED;
147    if (touched) {
148        // write prolog code
149        uint32_t* pc = mPC;
150        mPC = mPrologPC;
151        STM(AL, FD, SP, 1, touched | LLR);
152        mPC = pc;
153        // write epilog code
154        LDM(AL, FD, SP, 1, touched | LLR);
155        BX(AL, LR);
156    } else {   // heh, no registers to save!
157        // write prolog code
158        uint32_t* pc = mPC;
159        mPC = mPrologPC;
160        MOV(AL, 0, R0, R0); // NOP
161        mPC = pc;
162        // write epilog code
163        BX(AL, LR);
164    }
165}
166
167int ARMAssembler::generate(const char* name)
168{
169    // fixup all the branches
170    size_t count = mBranchTargets.size();
171    while (count--) {
172        const branch_target_t& bt = mBranchTargets[count];
173        uint32_t* target_pc = mLabels.valueFor(bt.label);
174        LOG_ALWAYS_FATAL_IF(!target_pc,
175                "error resolving branch targets, target_pc is null");
176        int32_t offset = int32_t(target_pc - (bt.pc+2));
177        *bt.pc |= offset & 0xFFFFFF;
178    }
179
180    mAssembly->resize( int(pc()-base())*4 );
181
182    // the instruction cache is flushed by CodeCache
183    const int64_t duration = ggl_system_time() - mDuration;
184    const char * const format = "generated %s (%d ins) at [%p:%p] in %lld ns\n";
185    ALOGI(format, name, int(pc()-base()), base(), pc(), duration);
186
187#if defined(WITH_LIB_HARDWARE)
188    if (__builtin_expect(mQemuTracing, 0)) {
189        int err = qemu_add_mapping(int(base()), name);
190        mQemuTracing = (err >= 0);
191    }
192#endif
193
194    char value[PROPERTY_VALUE_MAX];
195    property_get("debug.pf.disasm", value, "0");
196    if (atoi(value) != 0) {
197        printf(format, name, int(pc()-base()), base(), pc(), duration);
198        disassemble(name);
199    }
200
201    return NO_ERROR;
202}
203
204uint32_t* ARMAssembler::pcForLabel(const char* label)
205{
206    return mLabels.valueFor(label);
207}
208
209// ----------------------------------------------------------------------------
210
211#if 0
212#pragma mark -
213#pragma mark Data Processing...
214#endif
215
216void ARMAssembler::dataProcessing(int opcode, int cc,
217        int s, int Rd, int Rn, uint32_t Op2)
218{
219    *mPC++ = (cc<<28) | (opcode<<21) | (s<<20) | (Rn<<16) | (Rd<<12) | Op2;
220}
221
222#if 0
223#pragma mark -
224#pragma mark Multiply...
225#endif
226
227// multiply...
228void ARMAssembler::MLA(int cc, int s,
229        int Rd, int Rm, int Rs, int Rn) {
230    if (Rd == Rm) { int t = Rm; Rm=Rs; Rs=t; }
231    LOG_FATAL_IF(Rd==Rm, "MLA(r%u,r%u,r%u,r%u)", Rd,Rm,Rs,Rn);
232    *mPC++ =    (cc<<28) | (1<<21) | (s<<20) |
233                (Rd<<16) | (Rn<<12) | (Rs<<8) | 0x90 | Rm;
234}
235void ARMAssembler::MUL(int cc, int s,
236        int Rd, int Rm, int Rs) {
237    if (Rd == Rm) { int t = Rm; Rm=Rs; Rs=t; }
238    LOG_FATAL_IF(Rd==Rm, "MUL(r%u,r%u,r%u)", Rd,Rm,Rs);
239    *mPC++ = (cc<<28) | (s<<20) | (Rd<<16) | (Rs<<8) | 0x90 | Rm;
240}
241void ARMAssembler::UMULL(int cc, int s,
242        int RdLo, int RdHi, int Rm, int Rs) {
243    LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi,
244                        "UMULL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs);
245    *mPC++ =    (cc<<28) | (1<<23) | (s<<20) |
246                (RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm;
247}
248void ARMAssembler::UMUAL(int cc, int s,
249        int RdLo, int RdHi, int Rm, int Rs) {
250    LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi,
251                        "UMUAL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs);
252    *mPC++ =    (cc<<28) | (1<<23) | (1<<21) | (s<<20) |
253                (RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm;
254}
255void ARMAssembler::SMULL(int cc, int s,
256        int RdLo, int RdHi, int Rm, int Rs) {
257    LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi,
258                        "SMULL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs);
259    *mPC++ =    (cc<<28) | (1<<23) | (1<<22) | (s<<20) |
260                (RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm;
261}
262void ARMAssembler::SMUAL(int cc, int s,
263        int RdLo, int RdHi, int Rm, int Rs) {
264    LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi,
265                        "SMUAL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs);
266    *mPC++ =    (cc<<28) | (1<<23) | (1<<22) | (1<<21) | (s<<20) |
267                (RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm;
268}
269
270#if 0
271#pragma mark -
272#pragma mark Branches...
273#endif
274
275// branches...
276void ARMAssembler::B(int cc, uint32_t* pc)
277{
278    int32_t offset = int32_t(pc - (mPC+2));
279    *mPC++ = (cc<<28) | (0xA<<24) | (offset & 0xFFFFFF);
280}
281
282void ARMAssembler::BL(int cc, uint32_t* pc)
283{
284    int32_t offset = int32_t(pc - (mPC+2));
285    *mPC++ = (cc<<28) | (0xB<<24) | (offset & 0xFFFFFF);
286}
287
288void ARMAssembler::BX(int cc, int Rn)
289{
290    *mPC++ = (cc<<28) | 0x12FFF10 | Rn;
291}
292
293#if 0
294#pragma mark -
295#pragma mark Data Transfer...
296#endif
297
298// data transfert...
299void ARMAssembler::LDR(int cc, int Rd, int Rn, uint32_t offset) {
300    *mPC++ = (cc<<28) | (1<<26) | (1<<20) | (Rn<<16) | (Rd<<12) | offset;
301}
302void ARMAssembler::LDRB(int cc, int Rd, int Rn, uint32_t offset) {
303    *mPC++ = (cc<<28) | (1<<26) | (1<<22) | (1<<20) | (Rn<<16) | (Rd<<12) | offset;
304}
305void ARMAssembler::STR(int cc, int Rd, int Rn, uint32_t offset) {
306    *mPC++ = (cc<<28) | (1<<26) | (Rn<<16) | (Rd<<12) | offset;
307}
308void ARMAssembler::STRB(int cc, int Rd, int Rn, uint32_t offset) {
309    *mPC++ = (cc<<28) | (1<<26) | (1<<22) | (Rn<<16) | (Rd<<12) | offset;
310}
311
312void ARMAssembler::LDRH(int cc, int Rd, int Rn, uint32_t offset) {
313    *mPC++ = (cc<<28) | (1<<20) | (Rn<<16) | (Rd<<12) | 0xB0 | offset;
314}
315void ARMAssembler::LDRSB(int cc, int Rd, int Rn, uint32_t offset) {
316    *mPC++ = (cc<<28) | (1<<20) | (Rn<<16) | (Rd<<12) | 0xD0 | offset;
317}
318void ARMAssembler::LDRSH(int cc, int Rd, int Rn, uint32_t offset) {
319    *mPC++ = (cc<<28) | (1<<20) | (Rn<<16) | (Rd<<12) | 0xF0 | offset;
320}
321void ARMAssembler::STRH(int cc, int Rd, int Rn, uint32_t offset) {
322    *mPC++ = (cc<<28) | (Rn<<16) | (Rd<<12) | 0xB0 | offset;
323}
324
325#if 0
326#pragma mark -
327#pragma mark Block Data Transfer...
328#endif
329
330// block data transfer...
331void ARMAssembler::LDM(int cc, int dir,
332        int Rn, int W, uint32_t reg_list)
333{   //                    ED FD EA FA      IB IA DB DA
334    const uint8_t P[8] = { 1, 0, 1, 0,      1, 0, 1, 0 };
335    const uint8_t U[8] = { 1, 1, 0, 0,      1, 1, 0, 0 };
336    *mPC++ = (cc<<28) | (4<<25) | (uint32_t(P[dir])<<24) |
337            (uint32_t(U[dir])<<23) | (1<<20) | (W<<21) | (Rn<<16) | reg_list;
338}
339
340void ARMAssembler::STM(int cc, int dir,
341        int Rn, int W, uint32_t reg_list)
342{   //                    ED FD EA FA      IB IA DB DA
343    const uint8_t P[8] = { 0, 1, 0, 1,      1, 0, 1, 0 };
344    const uint8_t U[8] = { 0, 0, 1, 1,      1, 1, 0, 0 };
345    *mPC++ = (cc<<28) | (4<<25) | (uint32_t(P[dir])<<24) |
346            (uint32_t(U[dir])<<23) | (0<<20) | (W<<21) | (Rn<<16) | reg_list;
347}
348
349#if 0
350#pragma mark -
351#pragma mark Special...
352#endif
353
354// special...
355void ARMAssembler::SWP(int cc, int Rn, int Rd, int Rm) {
356    *mPC++ = (cc<<28) | (2<<23) | (Rn<<16) | (Rd << 12) | 0x90 | Rm;
357}
358void ARMAssembler::SWPB(int cc, int Rn, int Rd, int Rm) {
359    *mPC++ = (cc<<28) | (2<<23) | (1<<22) | (Rn<<16) | (Rd << 12) | 0x90 | Rm;
360}
361void ARMAssembler::SWI(int cc, uint32_t comment) {
362    *mPC++ = (cc<<28) | (0xF<<24) | comment;
363}
364
365#if 0
366#pragma mark -
367#pragma mark DSP instructions...
368#endif
369
370// DSP instructions...
371void ARMAssembler::PLD(int Rn, uint32_t offset) {
372    LOG_ALWAYS_FATAL_IF(!((offset&(1<<24)) && !(offset&(1<<21))),
373                        "PLD only P=1, W=0");
374    *mPC++ = 0xF550F000 | (Rn<<16) | offset;
375}
376
377void ARMAssembler::CLZ(int cc, int Rd, int Rm)
378{
379    *mPC++ = (cc<<28) | 0x16F0F10| (Rd<<12) | Rm;
380}
381
382void ARMAssembler::QADD(int cc,  int Rd, int Rm, int Rn)
383{
384    *mPC++ = (cc<<28) | 0x1000050 | (Rn<<16) | (Rd<<12) | Rm;
385}
386
387void ARMAssembler::QDADD(int cc,  int Rd, int Rm, int Rn)
388{
389    *mPC++ = (cc<<28) | 0x1400050 | (Rn<<16) | (Rd<<12) | Rm;
390}
391
392void ARMAssembler::QSUB(int cc,  int Rd, int Rm, int Rn)
393{
394    *mPC++ = (cc<<28) | 0x1200050 | (Rn<<16) | (Rd<<12) | Rm;
395}
396
397void ARMAssembler::QDSUB(int cc,  int Rd, int Rm, int Rn)
398{
399    *mPC++ = (cc<<28) | 0x1600050 | (Rn<<16) | (Rd<<12) | Rm;
400}
401
402void ARMAssembler::SMUL(int cc, int xy,
403                int Rd, int Rm, int Rs)
404{
405    *mPC++ = (cc<<28) | 0x1600080 | (Rd<<16) | (Rs<<8) | (xy<<4) | Rm;
406}
407
408void ARMAssembler::SMULW(int cc, int y,
409                int Rd, int Rm, int Rs)
410{
411    *mPC++ = (cc<<28) | 0x12000A0 | (Rd<<16) | (Rs<<8) | (y<<4) | Rm;
412}
413
414void ARMAssembler::SMLA(int cc, int xy,
415                int Rd, int Rm, int Rs, int Rn)
416{
417    *mPC++ = (cc<<28) | 0x1000080 | (Rd<<16) | (Rn<<12) | (Rs<<8) | (xy<<4) | Rm;
418}
419
420void ARMAssembler::SMLAL(int cc, int xy,
421                int RdHi, int RdLo, int Rs, int Rm)
422{
423    *mPC++ = (cc<<28) | 0x1400080 | (RdHi<<16) | (RdLo<<12) | (Rs<<8) | (xy<<4) | Rm;
424}
425
426void ARMAssembler::SMLAW(int cc, int y,
427                int Rd, int Rm, int Rs, int Rn)
428{
429    *mPC++ = (cc<<28) | 0x1200080 | (Rd<<16) | (Rn<<12) | (Rs<<8) | (y<<4) | Rm;
430}
431
432#if 0
433#pragma mark -
434#pragma mark Byte/half word extract and extend (ARMv6+ only)...
435#endif
436
437void ARMAssembler::UXTB16(int cc, int Rd, int Rm, int rotate)
438{
439    *mPC++ = (cc<<28) | 0x6CF0070 | (Rd<<12) | ((rotate >> 3) << 10) | Rm;
440}
441#if 0
442#pragma mark -
443#pragma mark Bit manipulation (ARMv7+ only)...
444#endif
445
446// Bit manipulation (ARMv7+ only)...
447void ARMAssembler::UBFX(int cc, int Rd, int Rn, int lsb, int width)
448{
449    *mPC++ = (cc<<28) | 0x7E00000 | ((width-1)<<16) | (Rd<<12) | (lsb<<7) | 0x50 | Rn;
450}
451
452#if 0
453#pragma mark -
454#pragma mark Addressing modes...
455#endif
456
457int ARMAssembler::buildImmediate(
458        uint32_t immediate, uint32_t& rot, uint32_t& imm)
459{
460    rot = 0;
461    imm = immediate;
462    if (imm > 0x7F) { // skip the easy cases
463        while (!(imm&3)  || (imm&0xFC000000)) {
464            uint32_t newval;
465            newval = imm >> 2;
466            newval |= (imm&3) << 30;
467            imm = newval;
468            rot += 2;
469            if (rot == 32) {
470                rot = 0;
471                break;
472            }
473        }
474    }
475    rot = (16 - (rot>>1)) & 0xF;
476
477    if (imm>=0x100)
478        return -EINVAL;
479
480    if (((imm>>(rot<<1)) | (imm<<(32-(rot<<1)))) != immediate)
481        return -1;
482
483    return 0;
484}
485
486// shifters...
487
488bool ARMAssembler::isValidImmediate(uint32_t immediate)
489{
490    uint32_t rot, imm;
491    return buildImmediate(immediate, rot, imm) == 0;
492}
493
494uint32_t ARMAssembler::imm(uint32_t immediate)
495{
496    uint32_t rot, imm;
497    int err = buildImmediate(immediate, rot, imm);
498
499    LOG_ALWAYS_FATAL_IF(err==-EINVAL,
500                        "immediate %08x cannot be encoded",
501                        immediate);
502
503    LOG_ALWAYS_FATAL_IF(err,
504                        "immediate (%08x) encoding bogus!",
505                        immediate);
506
507    return (1<<25) | (rot<<8) | imm;
508}
509
510uint32_t ARMAssembler::reg_imm(int Rm, int type, uint32_t shift)
511{
512    return ((shift&0x1F)<<7) | ((type&0x3)<<5) | (Rm&0xF);
513}
514
515uint32_t ARMAssembler::reg_rrx(int Rm)
516{
517    return (ROR<<5) | (Rm&0xF);
518}
519
520uint32_t ARMAssembler::reg_reg(int Rm, int type, int Rs)
521{
522    return ((Rs&0xF)<<8) | ((type&0x3)<<5) | (1<<4) | (Rm&0xF);
523}
524
525// addressing modes...
526// LDR(B)/STR(B)/PLD (immediate and Rm can be negative, which indicate U=0)
527uint32_t ARMAssembler::immed12_pre(int32_t immed12, int W)
528{
529    LOG_ALWAYS_FATAL_IF(abs(immed12) >= 0x800,
530                        "LDR(B)/STR(B)/PLD immediate too big (%08x)",
531                        immed12);
532    return (1<<24) | (((uint32_t(immed12)>>31)^1)<<23) |
533            ((W&1)<<21) | (abs(immed12)&0x7FF);
534}
535
536uint32_t ARMAssembler::immed12_post(int32_t immed12)
537{
538    LOG_ALWAYS_FATAL_IF(abs(immed12) >= 0x800,
539                        "LDR(B)/STR(B)/PLD immediate too big (%08x)",
540                        immed12);
541
542    return (((uint32_t(immed12)>>31)^1)<<23) | (abs(immed12)&0x7FF);
543}
544
545uint32_t ARMAssembler::reg_scale_pre(int Rm, int type,
546        uint32_t shift, int W)
547{
548    return  (1<<25) | (1<<24) |
549            (((uint32_t(Rm)>>31)^1)<<23) | ((W&1)<<21) |
550            reg_imm(abs(Rm), type, shift);
551}
552
553uint32_t ARMAssembler::reg_scale_post(int Rm, int type, uint32_t shift)
554{
555    return (1<<25) | (((uint32_t(Rm)>>31)^1)<<23) | reg_imm(abs(Rm), type, shift);
556}
557
558// LDRH/LDRSB/LDRSH/STRH (immediate and Rm can be negative, which indicate U=0)
559uint32_t ARMAssembler::immed8_pre(int32_t immed8, int W)
560{
561    uint32_t offset = abs(immed8);
562
563    LOG_ALWAYS_FATAL_IF(abs(immed8) >= 0x100,
564                        "LDRH/LDRSB/LDRSH/STRH immediate too big (%08x)",
565                        immed8);
566
567    return  (1<<24) | (1<<22) | (((uint32_t(immed8)>>31)^1)<<23) |
568            ((W&1)<<21) | (((offset&0xF0)<<4)|(offset&0xF));
569}
570
571uint32_t ARMAssembler::immed8_post(int32_t immed8)
572{
573    uint32_t offset = abs(immed8);
574
575    LOG_ALWAYS_FATAL_IF(abs(immed8) >= 0x100,
576                        "LDRH/LDRSB/LDRSH/STRH immediate too big (%08x)",
577                        immed8);
578
579    return (1<<22) | (((uint32_t(immed8)>>31)^1)<<23) |
580            (((offset&0xF0)<<4) | (offset&0xF));
581}
582
583uint32_t ARMAssembler::reg_pre(int Rm, int W)
584{
585    return (1<<24) | (((uint32_t(Rm)>>31)^1)<<23) | ((W&1)<<21) | (abs(Rm)&0xF);
586}
587
588uint32_t ARMAssembler::reg_post(int Rm)
589{
590    return (((uint32_t(Rm)>>31)^1)<<23) | (abs(Rm)&0xF);
591}
592
593}; // namespace android
594
595