1/*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 *    Chia-I Wu <olv@lunarg.com>
26 */
27
28#include <stdlib.h> /* for qsort() */
29#include "toy_compiler.h"
30#include "toy_legalize.h"
31
32/**
33 * Live interval of a VRF register.
34 */
35struct linear_scan_live_interval {
36   int vrf;
37   int startpoint;
38   int endpoint;
39
40   /*
41    * should this be assigned a consecutive register of the previous
42    * interval's?
43    */
44   bool consecutive;
45
46   int reg;
47
48   struct list_head list;
49};
50
51/**
52 * Linear scan.
53 */
54struct linear_scan {
55   struct linear_scan_live_interval *intervals;
56   int max_vrf, num_vrfs;
57
58   int num_regs;
59
60   struct list_head active_list;
61   int *free_regs;
62   int num_free_regs;
63
64   int *vrf_mapping;
65};
66
67/**
68 * Return a chunk of registers to the free register pool.
69 */
70static void
71linear_scan_free_regs(struct linear_scan *ls, int reg, int count)
72{
73   unsigned i;
74
75   for (i = 0; i < count; i++)
76      ls->free_regs[ls->num_free_regs++] = reg + count - 1 - i;
77}
78
79static int
80linear_scan_compare_regs(const void *elem1, const void *elem2)
81{
82   const int *reg1 = elem1;
83   const int *reg2 = elem2;
84
85   /* in reverse order */
86   return (*reg2 - *reg1);
87}
88
89/**
90 * Allocate a chunk of registers from the free register pool.
91 */
92static int
93linear_scan_allocate_regs(struct linear_scan *ls, int count)
94{
95   bool sorted = false;
96   int reg;
97
98   /* simple cases */
99   if (count > ls->num_free_regs)
100      return -1;
101   else if (count == 1)
102      return ls->free_regs[--ls->num_free_regs];
103
104   /* TODO a free register pool */
105   /* TODO reserve some regs for spilling */
106   while (true) {
107      bool found = false;
108      int start;
109
110      /*
111       * find a chunk of registers that have consecutive register
112       * numbers
113       */
114      for (start = ls->num_free_regs - 1; start >= count - 1; start--) {
115         int i;
116
117         for (i = 1; i < count; i++) {
118            if (ls->free_regs[start - i] != ls->free_regs[start] + i)
119               break;
120         }
121
122         if (i >= count) {
123            found = true;
124            break;
125         }
126      }
127
128      if (found) {
129         reg = ls->free_regs[start];
130
131         if (start != ls->num_free_regs - 1) {
132            start++;
133            memmove(&ls->free_regs[start - count],
134                    &ls->free_regs[start],
135                    sizeof(*ls->free_regs) * (ls->num_free_regs - start));
136         }
137         ls->num_free_regs -= count;
138         break;
139      }
140      else if (!sorted) {
141         /* sort and retry */
142         qsort(ls->free_regs, ls->num_free_regs, sizeof(*ls->free_regs),
143               linear_scan_compare_regs);
144         sorted = true;
145      }
146      else {
147         /* failed */
148         reg = -1;
149         break;
150      }
151   }
152
153   return reg;
154}
155
156/**
157 * Add an interval to the active list.
158 */
159static void
160linear_scan_add_active(struct linear_scan *ls,
161                       struct linear_scan_live_interval *interval)
162{
163   struct linear_scan_live_interval *pos;
164
165   /* keep the active list sorted by endpoints */
166   LIST_FOR_EACH_ENTRY(pos, &ls->active_list, list) {
167      if (pos->endpoint >= interval->endpoint)
168         break;
169   }
170
171   list_addtail(&interval->list, &pos->list);
172}
173
174/**
175 * Remove an interval from the active list.
176 */
177static void
178linear_scan_remove_active(struct linear_scan *ls,
179                          struct linear_scan_live_interval *interval)
180{
181   list_del(&interval->list);
182}
183
184/**
185 * Remove intervals that are no longer active from the active list.
186 */
187static void
188linear_scan_expire_active(struct linear_scan *ls, int pc)
189{
190   struct linear_scan_live_interval *interval, *next;
191
192   LIST_FOR_EACH_ENTRY_SAFE(interval, next, &ls->active_list, list) {
193      /*
194       * since we sort intervals on the active list by their endpoints, we
195       * know that this and the rest of the intervals are still active.
196       */
197      if (interval->endpoint >= pc)
198         break;
199
200      linear_scan_remove_active(ls, interval);
201
202      /* recycle the reg */
203      linear_scan_free_regs(ls, interval->reg, 1);
204   }
205}
206
207/**
208 * Spill an interval.
209 */
210static void
211linear_scan_spill(struct linear_scan *ls,
212                  struct linear_scan_live_interval *interval,
213                  bool is_active)
214{
215   assert(!"no spilling support");
216}
217
218/**
219 * Spill a range of intervals.
220 */
221static void
222linear_scan_spill_range(struct linear_scan *ls, int first, int count)
223{
224   unsigned i;
225
226   for (i = 0; i < count; i++) {
227      struct linear_scan_live_interval *interval = &ls->intervals[first + i];
228
229      linear_scan_spill(ls, interval, false);
230   }
231}
232
233/**
234 * Perform linear scan to allocate registers for the intervals.
235 */
236static bool
237linear_scan_run(struct linear_scan *ls)
238{
239   int i;
240
241   i = 0;
242   while (i < ls->num_vrfs) {
243      struct linear_scan_live_interval *first = &ls->intervals[i];
244      int reg, count;
245
246      /*
247       * GEN6_OPCODE_SEND may write to multiple consecutive registers and we need to
248       * support that
249       */
250      for (count = 1; i + count < ls->num_vrfs; count++) {
251         const struct linear_scan_live_interval *interval =
252            &ls->intervals[i + count];
253
254         if (interval->startpoint != first->startpoint ||
255             !interval->consecutive)
256            break;
257      }
258
259      reg = linear_scan_allocate_regs(ls, count);
260
261      /* expire intervals that are no longer active and try again */
262      if (reg < 0) {
263         linear_scan_expire_active(ls, first->startpoint);
264         reg = linear_scan_allocate_regs(ls, count);
265      }
266
267      /* have to spill some intervals */
268      if (reg < 0) {
269         struct linear_scan_live_interval *last_active =
270            container_of(ls->active_list.prev,
271                  (struct linear_scan_live_interval *) NULL, list);
272
273         /* heuristically spill the interval that ends last */
274         if (count > 1 || last_active->endpoint < first->endpoint) {
275            linear_scan_spill_range(ls, i, count);
276            i += count;
277            continue;
278         }
279
280         /* make some room for the new interval */
281         linear_scan_spill(ls, last_active, true);
282         reg = linear_scan_allocate_regs(ls, count);
283         if (reg < 0) {
284            assert(!"failed to spill any register");
285            return false;
286         }
287      }
288
289      while (count--) {
290         struct linear_scan_live_interval *interval = &ls->intervals[i++];
291
292         interval->reg = reg++;
293         linear_scan_add_active(ls, interval);
294
295         ls->vrf_mapping[interval->vrf] = interval->reg;
296
297         /*
298          * this should and must be the case because of how we initialized the
299          * intervals
300          */
301         assert(interval->vrf - first->vrf == interval->reg - first->reg);
302      }
303   }
304
305   return true;
306}
307
308/**
309 * Add a new interval.
310 */
311static void
312linear_scan_add_live_interval(struct linear_scan *ls, int vrf, int pc)
313{
314   if (ls->intervals[vrf].vrf)
315      return;
316
317   ls->intervals[vrf].vrf = vrf;
318   ls->intervals[vrf].startpoint = pc;
319
320   ls->num_vrfs++;
321   if (vrf > ls->max_vrf)
322      ls->max_vrf = vrf;
323}
324
325/**
326 * Perform (oversimplified?) live variable analysis.
327 */
328static void
329linear_scan_init_live_intervals(struct linear_scan *ls,
330                                struct toy_compiler *tc)
331{
332   const struct toy_inst *inst;
333   int pc, do_pc, while_pc;
334
335   pc = 0;
336   do_pc = -1;
337   while_pc = -1;
338
339   tc_head(tc);
340   while ((inst = tc_next_no_skip(tc)) != NULL) {
341      const int startpoint = (pc <= while_pc) ? do_pc : pc;
342      const int endpoint = (pc <= while_pc) ? while_pc : pc;
343      int vrf, i;
344
345      /*
346       * assume all registers used in this outermost loop are live through out
347       * the whole loop
348       */
349      if (inst->marker) {
350         if (pc > while_pc) {
351            struct toy_inst *inst2;
352            int loop_level = 1;
353
354            assert(inst->opcode == TOY_OPCODE_DO);
355            do_pc = pc;
356            while_pc = pc + 1;
357
358            /* find the matching GEN6_OPCODE_WHILE */
359            LIST_FOR_EACH_ENTRY_FROM(inst2, tc->iter_next,
360                  &tc->instructions, list) {
361               if (inst2->marker) {
362                  assert(inst->opcode == TOY_OPCODE_DO);
363                  loop_level++;
364                  continue;
365               }
366
367               if (inst2->opcode == GEN6_OPCODE_WHILE) {
368                  loop_level--;
369                  if (!loop_level)
370                     break;
371               }
372               while_pc++;
373            }
374         }
375
376         continue;
377      }
378
379      if (inst->dst.file == TOY_FILE_VRF) {
380         int num_dst;
381
382         /* TODO this is a hack */
383         if (inst->opcode == GEN6_OPCODE_SEND ||
384             inst->opcode == GEN6_OPCODE_SENDC) {
385            const uint32_t mdesc = inst->src[1].val32;
386            int response_length = (mdesc >> 20) & 0x1f;
387
388            num_dst = response_length;
389            if (num_dst > 1 && inst->exec_size == GEN6_EXECSIZE_16)
390               num_dst /= 2;
391         }
392         else {
393            num_dst = 1;
394         }
395
396         vrf = inst->dst.val32 / TOY_REG_WIDTH;
397
398         for (i = 0; i < num_dst; i++) {
399            /* first use */
400            if (!ls->intervals[vrf].vrf)
401               linear_scan_add_live_interval(ls, vrf, startpoint);
402
403            ls->intervals[vrf].endpoint = endpoint;
404            ls->intervals[vrf].consecutive = (i > 0);
405
406            vrf++;
407         }
408      }
409
410      for (i = 0; i < ARRAY_SIZE(inst->src); i++) {
411         if (inst->src[i].file != TOY_FILE_VRF)
412            continue;
413
414         vrf = inst->src[i].val32 / TOY_REG_WIDTH;
415
416         /* first use */
417         if (!ls->intervals[vrf].vrf)
418            linear_scan_add_live_interval(ls, vrf, startpoint);
419
420         ls->intervals[vrf].endpoint = endpoint;
421      }
422
423      pc++;
424   }
425}
426
427/**
428 * Clean up after performing linear scan.
429 */
430static void
431linear_scan_cleanup(struct linear_scan *ls)
432{
433   FREE(ls->vrf_mapping);
434   FREE(ls->intervals);
435   FREE(ls->free_regs);
436}
437
438static int
439linear_scan_compare_live_intervals(const void *elem1, const void *elem2)
440{
441   const struct linear_scan_live_interval *interval1 = elem1;
442   const struct linear_scan_live_interval *interval2 = elem2;
443
444   /* make unused elements appear at the end */
445   if (!interval1->vrf)
446      return 1;
447   else if (!interval2->vrf)
448      return -1;
449
450   /* sort by startpoints first, and then by vrf */
451   if (interval1->startpoint != interval2->startpoint)
452      return (interval1->startpoint - interval2->startpoint);
453   else
454      return (interval1->vrf - interval2->vrf);
455
456}
457
458/**
459 * Prepare for linear scan.
460 */
461static bool
462linear_scan_init(struct linear_scan *ls, int num_regs,
463                 struct toy_compiler *tc)
464{
465   int num_intervals, i;
466
467   memset(ls, 0, sizeof(*ls));
468
469   /* this may be much larger than ls->num_vrfs... */
470   num_intervals = tc->next_vrf;
471   ls->intervals = CALLOC(num_intervals, sizeof(ls->intervals[0]));
472   if (!ls->intervals)
473      return false;
474
475   linear_scan_init_live_intervals(ls, tc);
476   /* sort intervals by startpoints */
477   qsort(ls->intervals, num_intervals, sizeof(*ls->intervals),
478         linear_scan_compare_live_intervals);
479
480   ls->num_regs = num_regs;
481   ls->num_free_regs = num_regs;
482
483   ls->free_regs = MALLOC(ls->num_regs * sizeof(*ls->free_regs));
484   if (!ls->free_regs) {
485      FREE(ls->intervals);
486      return false;
487   }
488
489   /* add in reverse order as we will allocate from the tail */
490   for (i = 0; i < ls->num_regs; i++)
491      ls->free_regs[i] = num_regs - i - 1;
492
493   list_inithead(&ls->active_list);
494
495   ls->vrf_mapping = CALLOC(ls->max_vrf + 1, sizeof(*ls->vrf_mapping));
496   if (!ls->vrf_mapping) {
497      FREE(ls->intervals);
498      FREE(ls->free_regs);
499      return false;
500   }
501
502   return true;
503}
504
505/**
506 * Allocate registers with linear scan.
507 */
508static void
509linear_scan_allocation(struct toy_compiler *tc,
510                       int start_grf, int end_grf,
511                       int num_grf_per_vrf)
512{
513   const int num_grfs = end_grf - start_grf + 1;
514   struct linear_scan ls;
515   struct toy_inst *inst;
516
517   if (!linear_scan_init(&ls, num_grfs / num_grf_per_vrf, tc))
518      return;
519
520   if (!linear_scan_run(&ls)) {
521      tc_fail(tc, "failed to allocate registers");
522      return;
523   }
524
525
526   tc_head(tc);
527   while ((inst = tc_next(tc)) != NULL) {
528      int i;
529
530      if (inst->dst.file == TOY_FILE_VRF) {
531         const uint32_t val32 = inst->dst.val32;
532         int reg = val32 / TOY_REG_WIDTH;
533         int subreg = val32 % TOY_REG_WIDTH;
534
535         /* map to GRF */
536         reg = ls.vrf_mapping[reg] * num_grf_per_vrf + start_grf;
537
538         inst->dst.file = TOY_FILE_GRF;
539         inst->dst.val32 = reg * TOY_REG_WIDTH + subreg;
540      }
541
542      for (i = 0; i < ARRAY_SIZE(inst->src); i++) {
543         const uint32_t val32 = inst->src[i].val32;
544         int reg, subreg;
545
546         if (inst->src[i].file != TOY_FILE_VRF)
547            continue;
548
549         reg = val32 / TOY_REG_WIDTH;
550         subreg = val32 % TOY_REG_WIDTH;
551
552         /* map to GRF */
553         reg = ls.vrf_mapping[reg] * num_grf_per_vrf + start_grf;
554
555         inst->src[i].file = TOY_FILE_GRF;
556         inst->src[i].val32 = reg * TOY_REG_WIDTH + subreg;
557      }
558   }
559
560   linear_scan_cleanup(&ls);
561}
562
563/**
564 * Trivially allocate registers.
565 */
566static void
567trivial_allocation(struct toy_compiler *tc,
568                   int start_grf, int end_grf,
569                   int num_grf_per_vrf)
570{
571   struct toy_inst *inst;
572   int max_grf = -1;
573
574   tc_head(tc);
575   while ((inst = tc_next(tc)) != NULL) {
576      int i;
577
578      if (inst->dst.file == TOY_FILE_VRF) {
579         const uint32_t val32 = inst->dst.val32;
580         int reg = val32 / TOY_REG_WIDTH;
581         int subreg = val32 % TOY_REG_WIDTH;
582
583         reg = reg * num_grf_per_vrf + start_grf - 1;
584
585         inst->dst.file = TOY_FILE_GRF;
586         inst->dst.val32 = reg * TOY_REG_WIDTH + subreg;
587
588         if (reg > max_grf)
589            max_grf = reg;
590      }
591
592      for (i = 0; i < ARRAY_SIZE(inst->src); i++) {
593         const uint32_t val32 = inst->src[i].val32;
594         int reg, subreg;
595
596         if (inst->src[i].file != TOY_FILE_VRF)
597            continue;
598
599         reg = val32 / TOY_REG_WIDTH;
600         subreg = val32 % TOY_REG_WIDTH;
601
602         reg = reg * num_grf_per_vrf + start_grf - 1;
603
604         inst->src[i].file = TOY_FILE_GRF;
605         inst->src[i].val32 = reg * TOY_REG_WIDTH + subreg;
606
607         if (reg > max_grf)
608            max_grf = reg;
609      }
610   }
611
612   if (max_grf + num_grf_per_vrf - 1 > end_grf)
613      tc_fail(tc, "failed to allocate registers");
614}
615
616/**
617 * Allocate GRF registers to VRF registers.
618 */
619void
620toy_compiler_allocate_registers(struct toy_compiler *tc,
621                                int start_grf, int end_grf,
622                                int num_grf_per_vrf)
623{
624   if (true)
625      linear_scan_allocation(tc, start_grf, end_grf, num_grf_per_vrf);
626   else
627      trivial_allocation(tc, start_grf, end_grf, num_grf_per_vrf);
628}
629