1f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/*
2f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Copyright © 2010 Intel Corporation
3f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
4f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Permission is hereby granted, free of charge, to any person obtaining a
5f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * copy of this software and associated documentation files (the "Software"),
6f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * to deal in the Software without restriction, including without limitation
7f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * and/or sell copies of the Software, and to permit persons to whom the
9f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Software is furnished to do so, subject to the following conditions:
10f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
11f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * The above copyright notice and this permission notice (including the next
12f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * paragraph) shall be included in all copies or substantial portions of the
13f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Software.
14f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
15f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * IN THE SOFTWARE.
22f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
23f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Authors:
24f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *    Eric Anholt <eric@anholt.net>
25f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
26f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */
27f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
28f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/** @file register_allocate.c
29f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
30f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Graph-coloring register allocator.
31f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
32f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * The basic idea of graph coloring is to make a node in a graph for
33f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * every thing that needs a register (color) number assigned, and make
34f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * edges in the graph between nodes that interfere (can't be allocated
35f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * to the same register at the same time).
36f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
37f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * During the "simplify" process, any any node with fewer edges than
38f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * there are registers means that that edge can get assigned a
39f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * register regardless of what its neighbors choose, so that node is
40f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * pushed on a stack and removed (with its edges) from the graph.
41f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * That likely causes other nodes to become trivially colorable as well.
42f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
43f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Then during the "select" process, nodes are popped off of that
44f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * stack, their edges restored, and assigned a color different from
45f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * their neighbors.  Because they were pushed on the stack only when
46f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * they were trivially colorable, any color chosen won't interfere
47f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * with the registers to be popped later.
48f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
49f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * The downside to most graph coloring is that real hardware often has
50f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * limitations, like registers that need to be allocated to a node in
51f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * pairs, or aligned on some boundary.  This implementation follows
52f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the paper "Retargetable Graph-Coloring Register Allocation for
53f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Irregular Architectures" by Johan Runeson and Sven-Olof Nyström.
54f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
55f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * In this system, there are register classes each containing various
56f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * registers, and registers may interfere with other registers.  For
57f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * example, one might have a class of base registers, and a class of
58f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * aligned register pairs that would each interfere with their pair of
59f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the base registers.  Each node has a register class it needs to be
60f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * assigned to.  Define p(B) to be the size of register class B, and
61f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * q(B,C) to be the number of registers in B that the worst choice
62f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * register in C could conflict with.  Then, this system replaces the
63f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * basic graph coloring test of "fewer edges from this node than there
64f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * are registers" with "For this node of class B, the sum of q(B,C)
65f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * for each neighbor node of class C is less than pB".
66f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
67f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * A nice feature of the pq test is that q(B,C) can be computed once
68f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * up front and stored in a 2-dimensional array, so that the cost of
69f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * coloring a node is constant with the number of registers.  We do
70f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * this during ra_set_finalize().
71f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */
72f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
73f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include <ralloc.h>
74f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
75f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "main/imports.h"
76f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "main/macros.h"
77f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "main/mtypes.h"
78f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "register_allocate.h"
79f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
80f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#define NO_REG ~0
81f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
82f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstruct ra_reg {
83f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   GLboolean *conflicts;
84f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned int *conflict_list;
85f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned int conflict_list_size;
86f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned int num_conflicts;
87f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org};
88f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
89f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstruct ra_regs {
90f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   struct ra_reg *regs;
91f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned int count;
92f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
93f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   struct ra_class **classes;
94f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned int class_count;
95f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org};
96f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
97f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstruct ra_class {
98f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   GLboolean *regs;
99f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /**
101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * p(B) in Runeson/Nyström paper.
102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    *
103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * This is "how many regs are in the set."
104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    */
105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned int p;
106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /**
108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * q(B,C) (indexed by C, B is this register class) in
109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * Runeson/Nyström paper.  This is "how many registers of B could
110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * the worst choice register from C conflict with".
111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    */
112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned int *q;
113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org};
114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstruct ra_node {
116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /** @{
117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    *
118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * List of which nodes this node interferes with.  This should be
119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * symmetric with the other node.
120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    */
121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   GLboolean *adjacency;
122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned int *adjacency_list;
123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned int adjacency_count;
124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /** @} */
125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned int class;
127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /* Register, if assigned, or NO_REG. */
129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned int reg;
130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /**
132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * Set when the node is in the trivially colorable stack.  When
133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * set, the adjacency to this node is ignored, to implement the
134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * "remove the edge from the graph" in simplification without
135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * having to actually modify the adjacency_list.
136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    */
137f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   GLboolean in_stack;
138f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
139f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /* For an implementation that needs register spilling, this is the
140f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * approximate cost of spilling this node.
141f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    */
142f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   float spill_cost;
143f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org};
144f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
145f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstruct ra_graph {
146f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   struct ra_regs *regs;
147f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /**
148f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * the variables that need register allocation.
149f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    */
150f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   struct ra_node *nodes;
151f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned int count; /**< count of nodes. */
152f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
153f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned int *stack;
154f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned int stack_count;
155f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org};
156f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
157f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/**
158f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Creates a set of registers for the allocator.
159f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
160f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * mem_ctx is a ralloc context for the allocator.  The reg set may be freed
161f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * using ralloc_free().
162f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */
163f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstruct ra_regs *
164f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgra_alloc_reg_set(void *mem_ctx, unsigned int count)
165f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
166f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned int i;
167f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   struct ra_regs *regs;
168f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
169f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   regs = rzalloc(mem_ctx, struct ra_regs);
170f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   regs->count = count;
171f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   regs->regs = rzalloc_array(regs, struct ra_reg, count);
172f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
173f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (i = 0; i < count; i++) {
174f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      regs->regs[i].conflicts = rzalloc_array(regs->regs, GLboolean, count);
175f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      regs->regs[i].conflicts[i] = GL_TRUE;
176f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
177f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      regs->regs[i].conflict_list = ralloc_array(regs->regs, unsigned int, 4);
178f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      regs->regs[i].conflict_list_size = 4;
179f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      regs->regs[i].conflict_list[0] = i;
180f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      regs->regs[i].num_conflicts = 1;
181f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
182f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
183f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return regs;
184f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
185f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
186f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void
187f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgra_add_conflict_list(struct ra_regs *regs, unsigned int r1, unsigned int r2)
188f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
189f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   struct ra_reg *reg1 = &regs->regs[r1];
190f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
191f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (reg1->conflict_list_size == reg1->num_conflicts) {
192f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      reg1->conflict_list_size *= 2;
193f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      reg1->conflict_list = reralloc(regs->regs, reg1->conflict_list,
194f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org				     unsigned int, reg1->conflict_list_size);
195f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
196f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   reg1->conflict_list[reg1->num_conflicts++] = r2;
197f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   reg1->conflicts[r2] = GL_TRUE;
198f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
199f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
200f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
201f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2)
202f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
203f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (!regs->regs[r1].conflicts[r2]) {
204f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      ra_add_conflict_list(regs, r1, r2);
205f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      ra_add_conflict_list(regs, r2, r1);
206f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
207f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
208f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
209f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/**
210f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Adds a conflict between base_reg and reg, and also between reg and
211f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * anything that base_reg conflicts with.
212f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
213f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * This can simplify code for setting up multiple register classes
214f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * which are aggregates of some base hardware registers, compared to
215f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * explicitly using ra_add_reg_conflict.
216f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */
217f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
218f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgra_add_transitive_reg_conflict(struct ra_regs *regs,
219f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			       unsigned int base_reg, unsigned int reg)
220f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
221f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int i;
222f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
223f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   ra_add_reg_conflict(regs, reg, base_reg);
224f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
225f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (i = 0; i < regs->regs[base_reg].num_conflicts; i++) {
226f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      ra_add_reg_conflict(regs, reg, regs->regs[base_reg].conflict_list[i]);
227f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
228f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
229f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
230f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgunsigned int
231f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgra_alloc_reg_class(struct ra_regs *regs)
232f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
233f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   struct ra_class *class;
234f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
235f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   regs->classes = reralloc(regs->regs, regs->classes, struct ra_class *,
236f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			    regs->class_count + 1);
237f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
238f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   class = rzalloc(regs, struct ra_class);
239f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   regs->classes[regs->class_count] = class;
240f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
241f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   class->regs = rzalloc_array(class, GLboolean, regs->count);
242f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
243f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return regs->class_count++;
244f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
245f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
246f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
247f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgra_class_add_reg(struct ra_regs *regs, unsigned int c, unsigned int r)
248f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
249f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   struct ra_class *class = regs->classes[c];
250f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
251f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   class->regs[r] = GL_TRUE;
252f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   class->p++;
253f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
254f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
255f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/**
256f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Must be called after all conflicts and register classes have been
257f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * set up and before the register set is used for allocation.
258f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */
259f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
260f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgra_set_finalize(struct ra_regs *regs)
261f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
262f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned int b, c;
263f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
264f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (b = 0; b < regs->class_count; b++) {
265f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      regs->classes[b]->q = ralloc_array(regs, unsigned int, regs->class_count);
266f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
267f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
268f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /* Compute, for each class B and C, how many regs of B an
269f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * allocation to C could conflict with.
270f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    */
271f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (b = 0; b < regs->class_count; b++) {
272f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (c = 0; c < regs->class_count; c++) {
273f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 unsigned int rc;
274f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 int max_conflicts = 0;
275f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
276f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 for (rc = 0; rc < regs->count; rc++) {
277f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    int conflicts = 0;
278f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    int i;
279f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
280f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    if (!regs->classes[c]->regs[rc])
281f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	       continue;
282f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
283f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    for (i = 0; i < regs->regs[rc].num_conflicts; i++) {
284f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	       unsigned int rb = regs->regs[rc].conflict_list[i];
285f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	       if (regs->classes[b]->regs[rb])
286f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		  conflicts++;
287f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    }
288f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    max_conflicts = MAX2(max_conflicts, conflicts);
289f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 }
290f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 regs->classes[b]->q[c] = max_conflicts;
291f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
292f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
293f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
294f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
295f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic void
296f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgra_add_node_adjacency(struct ra_graph *g, unsigned int n1, unsigned int n2)
297f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
298f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   g->nodes[n1].adjacency[n2] = GL_TRUE;
299f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   g->nodes[n1].adjacency_list[g->nodes[n1].adjacency_count] = n2;
300f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   g->nodes[n1].adjacency_count++;
301f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
302f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
303f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstruct ra_graph *
304f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgra_alloc_interference_graph(struct ra_regs *regs, unsigned int count)
305f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
306f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   struct ra_graph *g;
307f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned int i;
308f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
309f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   g = rzalloc(regs, struct ra_graph);
310f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   g->regs = regs;
311f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   g->nodes = rzalloc_array(g, struct ra_node, count);
312f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   g->count = count;
313f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
314f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   g->stack = rzalloc_array(g, unsigned int, count);
315f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
316f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (i = 0; i < count; i++) {
317f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      g->nodes[i].adjacency = rzalloc_array(g, GLboolean, count);
318f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      g->nodes[i].adjacency_list = ralloc_array(g, unsigned int, count);
319f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      g->nodes[i].adjacency_count = 0;
320f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      ra_add_node_adjacency(g, i, i);
321f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      g->nodes[i].reg = NO_REG;
322f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
323f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
324f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return g;
325f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
326f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
327f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
328f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgra_set_node_class(struct ra_graph *g,
329f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		  unsigned int n, unsigned int class)
330f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
331f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   g->nodes[n].class = class;
332f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
333f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
334f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
335f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgra_add_node_interference(struct ra_graph *g,
336f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org			 unsigned int n1, unsigned int n2)
337f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
338f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (!g->nodes[n1].adjacency[n2]) {
339f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      ra_add_node_adjacency(g, n1, n2);
340f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      ra_add_node_adjacency(g, n2, n1);
341f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
342f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
343f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
344f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic GLboolean pq_test(struct ra_graph *g, unsigned int n)
345f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
346f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned int j;
347f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned int q = 0;
348f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int n_class = g->nodes[n].class;
349f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
350f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (j = 0; j < g->nodes[n].adjacency_count; j++) {
351f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      unsigned int n2 = g->nodes[n].adjacency_list[j];
352f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      unsigned int n2_class = g->nodes[n2].class;
353f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
354f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (n != n2 && !g->nodes[n2].in_stack) {
355f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 q += g->regs->classes[n_class]->q[n2_class];
356f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
357f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
358f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
359f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return q < g->regs->classes[n_class]->p;
360f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
361f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
362f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/**
363f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Simplifies the interference graph by pushing all
364f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * trivially-colorable nodes into a stack of nodes to be colored,
365f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * removing them from the graph, and rinsing and repeating.
366f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
367f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Returns GL_TRUE if all nodes were removed from the graph.  GL_FALSE
368f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * means that either spilling will be required, or optimistic coloring
369f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * should be applied.
370f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */
371f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgGLboolean
372f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgra_simplify(struct ra_graph *g)
373f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
374f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   GLboolean progress = GL_TRUE;
375f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int i;
376f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
377f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   while (progress) {
378f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      progress = GL_FALSE;
379f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
380f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (i = g->count - 1; i >= 0; i--) {
381f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 if (g->nodes[i].in_stack || g->nodes[i].reg != NO_REG)
382f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    continue;
383f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
384f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 if (pq_test(g, i)) {
385f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    g->stack[g->stack_count] = i;
386f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    g->stack_count++;
387f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    g->nodes[i].in_stack = GL_TRUE;
388f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    progress = GL_TRUE;
389f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 }
390f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
391f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
392f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
393f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (i = 0; i < g->count; i++) {
394f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (!g->nodes[i].in_stack)
395f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 return GL_FALSE;
396f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
397f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
398f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return GL_TRUE;
399f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
400f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
401f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/**
402f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Pops nodes from the stack back into the graph, coloring them with
403f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * registers as they go.
404f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
405f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * If all nodes were trivially colorable, then this must succeed.  If
406f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * not (optimistic coloring), then it may return GL_FALSE;
407f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */
408f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgGLboolean
409f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgra_select(struct ra_graph *g)
410f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
411f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int i;
412f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
413f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   while (g->stack_count != 0) {
414f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      unsigned int r;
415f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      int n = g->stack[g->stack_count - 1];
416f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      struct ra_class *c = g->regs->classes[g->nodes[n].class];
417f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
418f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      /* Find the lowest-numbered reg which is not used by a member
419f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org       * of the graph adjacent to us.
420f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org       */
421f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      for (r = 0; r < g->regs->count; r++) {
422f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 if (!c->regs[r])
423f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    continue;
424f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
425f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 /* Check if any of our neighbors conflict with this register choice. */
426f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 for (i = 0; i < g->nodes[n].adjacency_count; i++) {
427f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    unsigned int n2 = g->nodes[n].adjacency_list[i];
428f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
429f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    if (!g->nodes[n2].in_stack &&
430f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		g->regs->regs[r].conflicts[g->nodes[n2].reg]) {
431f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	       break;
432f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    }
433f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 }
434f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 if (i == g->nodes[n].adjacency_count)
435f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	    break;
436f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
437f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (r == g->regs->count)
438f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 return GL_FALSE;
439f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
440f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      g->nodes[n].reg = r;
441f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      g->nodes[n].in_stack = GL_FALSE;
442f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      g->stack_count--;
443f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
444f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
445f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return GL_TRUE;
446f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
447f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
448f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/**
449f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Optimistic register coloring: Just push the remaining nodes
450f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * on the stack.  They'll be colored first in ra_select(), and
451f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * if they succeed then the locally-colorable nodes are still
452f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * locally-colorable and the rest of the register allocation
453f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * will succeed.
454f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */
455f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
456f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgra_optimistic_color(struct ra_graph *g)
457f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
458f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned int i;
459f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
460f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (i = 0; i < g->count; i++) {
461f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (g->nodes[i].in_stack || g->nodes[i].reg != NO_REG)
462f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 continue;
463f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
464f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      g->stack[g->stack_count] = i;
465f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      g->stack_count++;
466f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      g->nodes[i].in_stack = GL_TRUE;
467f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
468f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
469f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
470f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgGLboolean
471f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgra_allocate_no_spills(struct ra_graph *g)
472f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
473f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   if (!ra_simplify(g)) {
474f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      ra_optimistic_color(g);
475f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
476f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return ra_select(g);
477f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
478f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
479f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgunsigned int
480f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgra_get_node_reg(struct ra_graph *g, unsigned int n)
481f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
482f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return g->nodes[n].reg;
483f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
484f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
485f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/**
486f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Forces a node to a specific register.  This can be used to avoid
487f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * creating a register class containing one node when handling data
488f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * that must live in a fixed location and is known to not conflict
489f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * with other forced register assignment (as is common with shader
490f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * input data).  These nodes do not end up in the stack during
491f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * ra_simplify(), and thus at ra_select() time it is as if they were
492f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the first popped off the stack and assigned their fixed locations.
493f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *
494f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Must be called before ra_simplify().
495f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */
496f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
497f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgra_set_node_reg(struct ra_graph *g, unsigned int n, unsigned int reg)
498f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
499f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   g->nodes[n].reg = reg;
500f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   g->nodes[n].in_stack = GL_FALSE;
501f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
502f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
503f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgstatic float
504f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgra_get_spill_benefit(struct ra_graph *g, unsigned int n)
505f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
506f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int j;
507f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   float benefit = 0;
508f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   int n_class = g->nodes[n].class;
509f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
510f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   /* Define the benefit of eliminating an interference between n, n2
511f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * through spilling as q(C, B) / p(C).  This is similar to the
512f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * "count number of edges" approach of traditional graph coloring,
513f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    * but takes classes into account.
514f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org    */
515f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (j = 0; j < g->nodes[n].adjacency_count; j++) {
516f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      unsigned int n2 = g->nodes[n].adjacency_list[j];
517f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (n != n2) {
518f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 unsigned int n2_class = g->nodes[n2].class;
519f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 benefit += ((float)g->regs->classes[n_class]->q[n2_class] /
520f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org		     g->regs->classes[n_class]->p);
521f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
522f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
523f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
524f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return benefit;
525f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
526f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
527f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/**
528f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Returns a node number to be spilled according to the cost/benefit using
529f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * the pq test, or -1 if there are no spillable nodes.
530f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */
531f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgint
532f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgra_get_best_spill_node(struct ra_graph *g)
533f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
534f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned int best_node = -1;
535f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned int best_benefit = 0.0;
536f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   unsigned int n;
537f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
538f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   for (n = 0; n < g->count; n++) {
539f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      float cost = g->nodes[n].spill_cost;
540f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      float benefit;
541f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
542f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (cost <= 0.0)
543f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 continue;
544f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
545f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      benefit = ra_get_spill_benefit(g, n);
546f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
547f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      if (benefit / cost > best_benefit) {
548f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 best_benefit = benefit / cost;
549f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org	 best_node = n;
550f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org      }
551f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   }
552f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
553f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   return best_node;
554f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
555f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org
556f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/**
557f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * Only nodes with a spill cost set (cost != 0.0) will be considered
558f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org * for register spilling.
559f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org */
560f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid
561f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgra_set_node_spill_cost(struct ra_graph *g, unsigned int n, float cost)
562f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{
563f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org   g->nodes[n].spill_cost = cost;
564f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org}
565