1/*
2 * Copyright 2011 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23#include "nv50/codegen/nv50_ir.h"
24#include "nv50/codegen/nv50_ir_target.h"
25
26namespace nv50_ir {
27
28const uint8_t Target::operationSrcNr[OP_LAST + 1] =
29{
30   0, 0,                   // NOP, PHI
31   0, 0, 0, 0,             // UNION, SPLIT, MERGE, CONSTRAINT
32   1, 1, 2,                // MOV, LOAD, STORE
33   2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
34   1, 1, 1,                // ABS, NEG, NOT
35   2, 2, 2, 2, 2,          // AND, OR, XOR, SHL, SHR
36   2, 2, 1,                // MAX, MIN, SAT
37   1, 1, 1, 1,             // CEIL, FLOOR, TRUNC, CVT
38   3, 3, 3, 2, 3, 3,       // SET_AND,OR,XOR, SET, SELP, SLCT
39   1, 1, 1, 1, 1, 1,       // RCP, RSQ, LG2, SIN, COS, EX2
40   1, 1, 1, 1, 1, 2,       // EXP, LOG, PRESIN, PREEX2, SQRT, POW
41   0, 0, 0, 0, 0,          // BRA, CALL, RET, CONT, BREAK,
42   0, 0, 0,                // PRERET,CONT,BREAK
43   0, 0, 0, 0, 0, 0,       // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR
44   1, 1, 2, 1, 2,          // VFETCH, PFETCH, EXPORT, LINTERP, PINTERP
45   1, 1,                   // EMIT, RESTART
46   1, 1, 1,                // TEX, TXB, TXL,
47   1, 1, 1, 1, 1,          // TXF, TXQ, TXD, TXG, TEXCSAA
48   1, 2,                   // SULD, SUST
49   1, 1,                   // DFDX, DFDY
50   1, 2, 2, 2, 0, 0,       // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
51   2, 3, 2, 0,             // POPCNT, INSBF, EXTBF, TEXBAR
52   0
53};
54
55const OpClass Target::operationClass[OP_LAST + 1] =
56{
57   // NOP; PHI; UNION, SPLIT, MERGE, CONSTRAINT
58   OPCLASS_OTHER,
59   OPCLASS_PSEUDO,
60   OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO,
61   // MOV; LOAD; STORE
62   OPCLASS_MOVE,
63   OPCLASS_LOAD,
64   OPCLASS_STORE,
65   // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD
66   OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
67   OPCLASS_ARITH, OPCLASS_ARITH,
68   OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
69   // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR
70   OPCLASS_CONVERT, OPCLASS_CONVERT,
71   OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
72   OPCLASS_SHIFT, OPCLASS_SHIFT,
73   // MAX, MIN
74   OPCLASS_COMPARE, OPCLASS_COMPARE,
75   // SAT, CEIL, FLOOR, TRUNC; CVT
76   OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT,
77   OPCLASS_CONVERT,
78   // SET(AND,OR,XOR); SELP, SLCT
79   OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE,
80   OPCLASS_COMPARE, OPCLASS_COMPARE,
81   // RCP, RSQ, LG2, SIN, COS; EX2, EXP, LOG, PRESIN, PREEX2; SQRT, POW
82   OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
83   OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU,
84   OPCLASS_SFU, OPCLASS_SFU,
85   // BRA, CALL, RET; CONT, BREAK, PRE(RET,CONT,BREAK); BRKPT, JOINAT, JOIN
86   OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
87   OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
88   OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW,
89   // DISCARD, EXIT
90   OPCLASS_FLOW, OPCLASS_FLOW,
91   // MEMBAR
92   OPCLASS_OTHER,
93   // VFETCH, PFETCH, EXPORT
94   OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_STORE,
95   // LINTERP, PINTERP
96   OPCLASS_SFU, OPCLASS_SFU,
97   // EMIT, RESTART
98   OPCLASS_OTHER, OPCLASS_OTHER,
99   // TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TEXCSAA
100   OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
101   OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
102   // SULD, SUST
103   OPCLASS_SURFACE, OPCLASS_SURFACE,
104   // DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP
105   OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
106   OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
107   // POPCNT, INSBF, EXTBF
108   OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
109   // TEXBAR
110   OPCLASS_OTHER,
111   OPCLASS_PSEUDO // LAST
112};
113
114
115extern Target *getTargetNVC0(unsigned int chipset);
116extern Target *getTargetNV50(unsigned int chipset);
117
118Target *Target::create(unsigned int chipset)
119{
120   switch (chipset & 0xf0) {
121   case 0xc0:
122   case 0xd0:
123   case 0xe0:
124      return getTargetNVC0(chipset);
125   case 0x50:
126   case 0x80:
127   case 0x90:
128   case 0xa0:
129      return getTargetNV50(chipset);
130   default:
131      ERROR("unsupported target: NV%x\n", chipset);
132      return 0;
133   }
134}
135
136void Target::destroy(Target *targ)
137{
138   delete targ;
139}
140
141CodeEmitter::CodeEmitter(const Target *target) : targ(target)
142{
143}
144
145void
146CodeEmitter::setCodeLocation(void *ptr, uint32_t size)
147{
148   code = reinterpret_cast<uint32_t *>(ptr);
149   codeSize = 0;
150   codeSizeLimit = size;
151}
152
153void
154CodeEmitter::printBinary() const
155{
156   uint32_t *bin = code - codeSize / 4;
157   INFO("program binary (%u bytes)", codeSize);
158   for (unsigned int pos = 0; pos < codeSize / 4; ++pos) {
159      if ((pos % 8) == 0)
160         INFO("\n");
161      INFO("%08x ", bin[pos]);
162   }
163   INFO("\n");
164}
165
166static inline uint32_t sizeToBundlesNVE4(uint32_t size)
167{
168   return (size + 55) / 56;
169}
170
171void
172CodeEmitter::prepareEmission(Program *prog)
173{
174   for (ArrayList::Iterator fi = prog->allFuncs.iterator();
175        !fi.end(); fi.next()) {
176      Function *func = reinterpret_cast<Function *>(fi.get());
177      func->binPos = prog->binSize;
178      prepareEmission(func);
179
180      // adjust sizes & positions for schedulding info:
181      if (prog->getTarget()->hasSWSched) {
182         BasicBlock *bb = NULL;
183         for (int i = 0; i < func->bbCount; ++i) {
184            bb = func->bbArray[i];
185            const uint32_t oldPos = bb->binPos;
186            const uint32_t oldEnd = bb->binPos + bb->binSize;
187            uint32_t adjPos = oldPos + sizeToBundlesNVE4(oldPos) * 8;
188            uint32_t adjEnd = oldEnd + sizeToBundlesNVE4(oldEnd) * 8;
189            bb->binPos = adjPos;
190            bb->binSize = adjEnd - adjPos;
191         }
192         if (bb)
193            func->binSize = bb->binPos + bb->binSize;
194      }
195
196      prog->binSize += func->binSize;
197   }
198}
199
200void
201CodeEmitter::prepareEmission(Function *func)
202{
203   func->bbCount = 0;
204   func->bbArray = new BasicBlock * [func->cfg.getSize()];
205
206   BasicBlock::get(func->cfg.getRoot())->binPos = func->binPos;
207
208   for (IteratorRef it = func->cfg.iteratorCFG(); !it->end(); it->next())
209      prepareEmission(BasicBlock::get(*it));
210}
211
212void
213CodeEmitter::prepareEmission(BasicBlock *bb)
214{
215   Instruction *i, *next;
216   Function *func = bb->getFunction();
217   int j;
218   unsigned int nShort;
219
220   for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j);
221
222   for (; j >= 0; --j) {
223      BasicBlock *in = func->bbArray[j];
224      Instruction *exit = in->getExit();
225
226      if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) {
227         in->binSize -= 8;
228         func->binSize -= 8;
229
230         for (++j; j < func->bbCount; ++j)
231            func->bbArray[j]->binPos -= 8;
232
233         in->remove(exit);
234      }
235      bb->binPos = in->binPos + in->binSize;
236      if (in->binSize) // no more no-op branches to bb
237         break;
238   }
239   func->bbArray[func->bbCount++] = bb;
240
241   if (!bb->getExit())
242      return;
243
244   // determine encoding size, try to group short instructions
245   nShort = 0;
246   for (i = bb->getEntry(); i; i = next) {
247      next = i->next;
248
249      i->encSize = getMinEncodingSize(i);
250      if (next && i->encSize < 8)
251         ++nShort;
252      else
253      if ((nShort & 1) && next && getMinEncodingSize(next) == 4) {
254         if (i->isCommutationLegal(i->next)) {
255            bb->permuteAdjacent(i, next);
256            next->encSize = 4;
257            next = i;
258            i = i->prev;
259            ++nShort;
260         } else
261         if (i->isCommutationLegal(i->prev) && next->next) {
262            bb->permuteAdjacent(i->prev, i);
263            next->encSize = 4;
264            next = next->next;
265            bb->binSize += 4;
266            ++nShort;
267         } else {
268            i->encSize = 8;
269            i->prev->encSize = 8;
270            bb->binSize += 4;
271            nShort = 0;
272         }
273      } else {
274         i->encSize = 8;
275         if (nShort & 1) {
276            i->prev->encSize = 8;
277            bb->binSize += 4;
278         }
279         nShort = 0;
280      }
281      bb->binSize += i->encSize;
282   }
283
284   if (bb->getExit()->encSize == 4) {
285      assert(nShort);
286      bb->getExit()->encSize = 8;
287      bb->binSize += 4;
288
289      if ((bb->getExit()->prev->encSize == 4) && !(nShort & 1)) {
290         bb->binSize += 8;
291         bb->getExit()->prev->encSize = 8;
292      }
293   }
294   assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 8));
295
296   func->binSize += bb->binSize;
297}
298
299void
300Program::emitSymbolTable(struct nv50_ir_prog_info *info)
301{
302   unsigned int n = 0, nMax = allFuncs.getSize();
303
304   info->bin.syms =
305      (struct nv50_ir_prog_symbol *)MALLOC(nMax * sizeof(*info->bin.syms));
306
307   for (ArrayList::Iterator fi = allFuncs.iterator();
308        !fi.end();
309        fi.next(), ++n) {
310      Function *f = (Function *)fi.get();
311      assert(n < nMax);
312
313      info->bin.syms[n].label = f->getLabel();
314      info->bin.syms[n].offset = f->binPos;
315   }
316
317   info->bin.numSyms = n;
318}
319
320bool
321Program::emitBinary(struct nv50_ir_prog_info *info)
322{
323   CodeEmitter *emit = target->getCodeEmitter(progType);
324
325   emit->prepareEmission(this);
326
327   if (dbgFlags & NV50_IR_DEBUG_BASIC)
328      this->print();
329
330   if (!binSize) {
331      code = NULL;
332      return false;
333   }
334   code = reinterpret_cast<uint32_t *>(MALLOC(binSize));
335   if (!code)
336      return false;
337   emit->setCodeLocation(code, binSize);
338
339   for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) {
340      Function *fn = reinterpret_cast<Function *>(fi.get());
341
342      assert(emit->getCodeSize() == fn->binPos);
343
344      for (int b = 0; b < fn->bbCount; ++b)
345         for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next)
346            emit->emitInstruction(i);
347   }
348   info->bin.relocData = emit->getRelocInfo();
349
350   emitSymbolTable(info);
351
352   // the nvc0 driver will print the binary iself together with the header
353   if ((dbgFlags & NV50_IR_DEBUG_BASIC) && getTarget()->getChipset() < 0xc0)
354      emit->printBinary();
355
356   delete emit;
357   return true;
358}
359
360#define RELOC_ALLOC_INCREMENT 8
361
362bool
363CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m,
364                      int s)
365{
366   unsigned int n = relocInfo ? relocInfo->count : 0;
367
368   if (!(n % RELOC_ALLOC_INCREMENT)) {
369      size_t size = sizeof(RelocInfo) + n * sizeof(RelocEntry);
370      relocInfo = reinterpret_cast<RelocInfo *>(
371         REALLOC(relocInfo, n ? size : 0,
372                 size + RELOC_ALLOC_INCREMENT * sizeof(RelocEntry)));
373      if (!relocInfo)
374         return false;
375      if (n == 0)
376         memset(relocInfo, 0, sizeof(RelocInfo));
377   }
378   ++relocInfo->count;
379
380   relocInfo->entry[n].data = data;
381   relocInfo->entry[n].mask = m;
382   relocInfo->entry[n].offset = codeSize + w * 4;
383   relocInfo->entry[n].bitPos = s;
384   relocInfo->entry[n].type = ty;
385
386   return true;
387}
388
389void
390RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const
391{
392   uint32_t value = 0;
393
394   switch (type) {
395   case TYPE_CODE: value = info->codePos; break;
396   case TYPE_BUILTIN: value = info->libPos; break;
397   case TYPE_DATA: value = info->dataPos; break;
398   default:
399      assert(0);
400      break;
401   }
402   value += data;
403   value = (bitPos < 0) ? (value >> -bitPos) : (value << bitPos);
404
405   binary[offset / 4] &= ~mask;
406   binary[offset / 4] |= value & mask;
407}
408
409} // namespace nv50_ir
410
411
412#include "nv50/codegen/nv50_ir_driver.h"
413
414extern "C" {
415
416void
417nv50_ir_relocate_code(void *relocData, uint32_t *code,
418                      uint32_t codePos,
419                      uint32_t libPos,
420                      uint32_t dataPos)
421{
422   nv50_ir::RelocInfo *info = reinterpret_cast<nv50_ir::RelocInfo *>(relocData);
423
424   info->codePos = codePos;
425   info->libPos = libPos;
426   info->dataPos = dataPos;
427
428   for (unsigned int i = 0; i < info->count; ++i)
429      info->entry[i].apply(code, info);
430}
431
432void
433nv50_ir_get_target_library(uint32_t chipset,
434                           const uint32_t **code, uint32_t *size)
435{
436   nv50_ir::Target *targ = nv50_ir::Target::create(chipset);
437   targ->getBuiltinCode(code, size);
438   nv50_ir::Target::destroy(targ);
439}
440
441}
442