1/*
2 * Copyright © 2013 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include <stdio.h>
25#include <stdlib.h>
26#include "gen_device_info.h"
27#include "compiler/shader_enums.h"
28
29static const struct gen_device_info gen_device_info_i965 = {
30   .gen = 4,
31   .has_negative_rhw_bug = true,
32   .num_slices = 1,
33   .max_vs_threads = 16,
34   .max_gs_threads = 2,
35   .max_wm_threads = 8 * 4,
36   .urb = {
37      .size = 256,
38   },
39};
40
41static const struct gen_device_info gen_device_info_g4x = {
42   .gen = 4,
43   .has_pln = true,
44   .has_compr4 = true,
45   .has_surface_tile_offset = true,
46   .is_g4x = true,
47   .num_slices = 1,
48   .max_vs_threads = 32,
49   .max_gs_threads = 2,
50   .max_wm_threads = 10 * 5,
51   .urb = {
52      .size = 384,
53   },
54};
55
56static const struct gen_device_info gen_device_info_ilk = {
57   .gen = 5,
58   .has_pln = true,
59   .has_compr4 = true,
60   .has_surface_tile_offset = true,
61   .num_slices = 1,
62   .max_vs_threads = 72,
63   .max_gs_threads = 32,
64   .max_wm_threads = 12 * 6,
65   .urb = {
66      .size = 1024,
67   },
68};
69
70static const struct gen_device_info gen_device_info_snb_gt1 = {
71   .gen = 6,
72   .gt = 1,
73   .has_hiz_and_separate_stencil = true,
74   .has_llc = true,
75   .has_pln = true,
76   .has_surface_tile_offset = true,
77   .needs_unlit_centroid_workaround = true,
78   .num_slices = 1,
79   .max_vs_threads = 24,
80   .max_gs_threads = 21, /* conservative; 24 if rendering disabled. */
81   .max_wm_threads = 40,
82   .urb = {
83      .size = 32,
84      .min_entries = {
85         [MESA_SHADER_VERTEX]   = 24,
86      },
87      .max_entries = {
88         [MESA_SHADER_VERTEX]   = 256,
89         [MESA_SHADER_GEOMETRY] = 256,
90      },
91   },
92};
93
94static const struct gen_device_info gen_device_info_snb_gt2 = {
95   .gen = 6,
96   .gt = 2,
97   .has_hiz_and_separate_stencil = true,
98   .has_llc = true,
99   .has_pln = true,
100   .has_surface_tile_offset = true,
101   .needs_unlit_centroid_workaround = true,
102   .num_slices = 1,
103   .max_vs_threads = 60,
104   .max_gs_threads = 60,
105   .max_wm_threads = 80,
106   .urb = {
107      .size = 64,
108      .min_entries = {
109         [MESA_SHADER_VERTEX]   = 24,
110      },
111      .max_entries = {
112         [MESA_SHADER_VERTEX]   = 256,
113         [MESA_SHADER_GEOMETRY] = 256,
114      },
115   },
116};
117
118#define GEN7_FEATURES                               \
119   .gen = 7,                                        \
120   .has_hiz_and_separate_stencil = true,            \
121   .must_use_separate_stencil = true,               \
122   .has_llc = true,                                 \
123   .has_pln = true,                                 \
124   .has_surface_tile_offset = true
125
126static const struct gen_device_info gen_device_info_ivb_gt1 = {
127   GEN7_FEATURES, .is_ivybridge = true, .gt = 1,
128   .num_slices = 1,
129   .max_vs_threads = 36,
130   .max_tcs_threads = 36,
131   .max_tes_threads = 36,
132   .max_gs_threads = 36,
133   .max_wm_threads = 48,
134   .max_cs_threads = 36,
135   .urb = {
136      .size = 128,
137      .min_entries = {
138         [MESA_SHADER_VERTEX]    = 32,
139         [MESA_SHADER_TESS_EVAL] = 10,
140      },
141      .max_entries = {
142         [MESA_SHADER_VERTEX]    = 512,
143         [MESA_SHADER_TESS_CTRL] = 32,
144         [MESA_SHADER_TESS_EVAL] = 288,
145         [MESA_SHADER_GEOMETRY]  = 192,
146      },
147   },
148};
149
150static const struct gen_device_info gen_device_info_ivb_gt2 = {
151   GEN7_FEATURES, .is_ivybridge = true, .gt = 2,
152   .num_slices = 1,
153   .max_vs_threads = 128,
154   .max_tcs_threads = 128,
155   .max_tes_threads = 128,
156   .max_gs_threads = 128,
157   .max_wm_threads = 172,
158   .max_cs_threads = 64,
159   .urb = {
160      .size = 256,
161      .min_entries = {
162         [MESA_SHADER_VERTEX]    = 32,
163         [MESA_SHADER_TESS_EVAL] = 10,
164      },
165      .max_entries = {
166         [MESA_SHADER_VERTEX]    = 704,
167         [MESA_SHADER_TESS_CTRL] = 64,
168         [MESA_SHADER_TESS_EVAL] = 448,
169         [MESA_SHADER_GEOMETRY]  = 320,
170      },
171   },
172};
173
174static const struct gen_device_info gen_device_info_byt = {
175   GEN7_FEATURES, .is_baytrail = true, .gt = 1,
176   .num_slices = 1,
177   .has_llc = false,
178   .max_vs_threads = 36,
179   .max_tcs_threads = 36,
180   .max_tes_threads = 36,
181   .max_gs_threads = 36,
182   .max_wm_threads = 48,
183   .max_cs_threads = 32,
184   .urb = {
185      .size = 128,
186      .min_entries = {
187         [MESA_SHADER_VERTEX]    = 32,
188         [MESA_SHADER_TESS_EVAL] = 10,
189      },
190      .max_entries = {
191         [MESA_SHADER_VERTEX]    = 512,
192         [MESA_SHADER_TESS_CTRL] = 32,
193         [MESA_SHADER_TESS_EVAL] = 288,
194         [MESA_SHADER_GEOMETRY]  = 192,
195      },
196   },
197};
198
199#define HSW_FEATURES             \
200   GEN7_FEATURES,                \
201   .is_haswell = true,           \
202   .supports_simd16_3src = true, \
203   .has_resource_streamer = true
204
205static const struct gen_device_info gen_device_info_hsw_gt1 = {
206   HSW_FEATURES, .gt = 1,
207   .num_slices = 1,
208   .max_vs_threads = 70,
209   .max_tcs_threads = 70,
210   .max_tes_threads = 70,
211   .max_gs_threads = 70,
212   .max_wm_threads = 102,
213   .max_cs_threads = 70,
214   .urb = {
215      .size = 128,
216      .min_entries = {
217         [MESA_SHADER_VERTEX]    = 32,
218         [MESA_SHADER_TESS_EVAL] = 10,
219      },
220      .max_entries = {
221         [MESA_SHADER_VERTEX]    = 640,
222         [MESA_SHADER_TESS_CTRL] = 64,
223         [MESA_SHADER_TESS_EVAL] = 384,
224         [MESA_SHADER_GEOMETRY]  = 256,
225      },
226   },
227};
228
229static const struct gen_device_info gen_device_info_hsw_gt2 = {
230   HSW_FEATURES, .gt = 2,
231   .num_slices = 1,
232   .max_vs_threads = 280,
233   .max_tcs_threads = 256,
234   .max_tes_threads = 280,
235   .max_gs_threads = 256,
236   .max_wm_threads = 204,
237   .max_cs_threads = 70,
238   .urb = {
239      .size = 256,
240      .min_entries = {
241         [MESA_SHADER_VERTEX]    = 64,
242         [MESA_SHADER_TESS_EVAL] = 10,
243      },
244      .max_entries = {
245         [MESA_SHADER_VERTEX]    = 1664,
246         [MESA_SHADER_TESS_CTRL] = 128,
247         [MESA_SHADER_TESS_EVAL] = 960,
248         [MESA_SHADER_GEOMETRY]  = 640,
249      },
250   },
251};
252
253static const struct gen_device_info gen_device_info_hsw_gt3 = {
254   HSW_FEATURES, .gt = 3,
255   .num_slices = 2,
256   .max_vs_threads = 280,
257   .max_tcs_threads = 256,
258   .max_tes_threads = 280,
259   .max_gs_threads = 256,
260   .max_wm_threads = 408,
261   .max_cs_threads = 70,
262   .urb = {
263      .size = 512,
264      .min_entries = {
265         [MESA_SHADER_VERTEX]    = 64,
266         [MESA_SHADER_TESS_EVAL] = 10,
267      },
268      .max_entries = {
269         [MESA_SHADER_VERTEX]    = 1664,
270         [MESA_SHADER_TESS_CTRL] = 128,
271         [MESA_SHADER_TESS_EVAL] = 960,
272         [MESA_SHADER_GEOMETRY]  = 640,
273      },
274   },
275};
276
277#define GEN8_FEATURES                               \
278   .gen = 8,                                        \
279   .has_hiz_and_separate_stencil = true,            \
280   .has_resource_streamer = true,                   \
281   .must_use_separate_stencil = true,               \
282   .has_llc = true,                                 \
283   .has_pln = true,                                 \
284   .supports_simd16_3src = true,                    \
285   .has_surface_tile_offset = true,                 \
286   .max_vs_threads = 504,                           \
287   .max_tcs_threads = 504,                          \
288   .max_tes_threads = 504,                          \
289   .max_gs_threads = 504,                           \
290   .max_wm_threads = 384
291
292static const struct gen_device_info gen_device_info_bdw_gt1 = {
293   GEN8_FEATURES, .gt = 1,
294   .num_slices = 1,
295   .max_cs_threads = 42,
296   .urb = {
297      .size = 192,
298      .min_entries = {
299         [MESA_SHADER_VERTEX]    = 64,
300         [MESA_SHADER_TESS_EVAL] = 34,
301      },
302      .max_entries = {
303         [MESA_SHADER_VERTEX]    = 2560,
304         [MESA_SHADER_TESS_CTRL] = 504,
305         [MESA_SHADER_TESS_EVAL] = 1536,
306         [MESA_SHADER_GEOMETRY]  = 960,
307      },
308   }
309};
310
311static const struct gen_device_info gen_device_info_bdw_gt2 = {
312   GEN8_FEATURES, .gt = 2,
313   .num_slices = 1,
314   .max_cs_threads = 56,
315   .urb = {
316      .size = 384,
317      .min_entries = {
318         [MESA_SHADER_VERTEX]    = 64,
319         [MESA_SHADER_TESS_EVAL] = 34,
320      },
321      .max_entries = {
322         [MESA_SHADER_VERTEX]    = 2560,
323         [MESA_SHADER_TESS_CTRL] = 504,
324         [MESA_SHADER_TESS_EVAL] = 1536,
325         [MESA_SHADER_GEOMETRY]  = 960,
326      },
327   }
328};
329
330static const struct gen_device_info gen_device_info_bdw_gt3 = {
331   GEN8_FEATURES, .gt = 3,
332   .num_slices = 2,
333   .max_cs_threads = 56,
334   .urb = {
335      .size = 384,
336      .min_entries = {
337         [MESA_SHADER_VERTEX]    = 64,
338         [MESA_SHADER_TESS_EVAL] = 34,
339      },
340      .max_entries = {
341         [MESA_SHADER_VERTEX]    = 2560,
342         [MESA_SHADER_TESS_CTRL] = 504,
343         [MESA_SHADER_TESS_EVAL] = 1536,
344         [MESA_SHADER_GEOMETRY]  = 960,
345      },
346   }
347};
348
349static const struct gen_device_info gen_device_info_chv = {
350   GEN8_FEATURES, .is_cherryview = 1, .gt = 1,
351   .has_llc = false,
352   .num_slices = 1,
353   .max_vs_threads = 80,
354   .max_tcs_threads = 80,
355   .max_tes_threads = 80,
356   .max_gs_threads = 80,
357   .max_wm_threads = 128,
358   .max_cs_threads = 6 * 7,
359   .urb = {
360      .size = 192,
361      .min_entries = {
362         [MESA_SHADER_VERTEX]    = 34,
363         [MESA_SHADER_TESS_EVAL] = 34,
364      },
365      .max_entries = {
366         [MESA_SHADER_VERTEX]    = 640,
367         [MESA_SHADER_TESS_CTRL] = 80,
368         [MESA_SHADER_TESS_EVAL] = 384,
369         [MESA_SHADER_GEOMETRY]  = 256,
370      },
371   }
372};
373
374#define GEN9_FEATURES                               \
375   .gen = 9,                                        \
376   .has_hiz_and_separate_stencil = true,            \
377   .has_resource_streamer = true,                   \
378   .must_use_separate_stencil = true,               \
379   .has_llc = true,                                 \
380   .has_pln = true,                                 \
381   .supports_simd16_3src = true,                    \
382   .has_surface_tile_offset = true,                 \
383   .max_vs_threads = 336,                           \
384   .max_gs_threads = 336,                           \
385   .max_tcs_threads = 336,                          \
386   .max_tes_threads = 336,                          \
387   .max_cs_threads = 56,                            \
388   .urb = {                                         \
389      .size = 384,                                  \
390      .min_entries = {                              \
391         [MESA_SHADER_VERTEX]    = 64,              \
392         [MESA_SHADER_TESS_EVAL] = 34,              \
393      },                                            \
394      .max_entries = {                              \
395         [MESA_SHADER_VERTEX]    = 1856,            \
396         [MESA_SHADER_TESS_CTRL] = 672,             \
397         [MESA_SHADER_TESS_EVAL] = 1120,            \
398         [MESA_SHADER_GEOMETRY]  = 640,             \
399      },                                            \
400   }
401
402#define GEN9_LP_FEATURES                           \
403   GEN9_FEATURES,                                  \
404   .is_broxton = 1,                                \
405   .gt = 1,                                        \
406   .has_llc = false,                               \
407   .num_slices = 1,                                \
408   .max_vs_threads = 112,                          \
409   .max_tcs_threads = 112,                         \
410   .max_tes_threads = 112,                         \
411   .max_gs_threads = 112,                          \
412   .max_cs_threads = 6 * 6,                        \
413   .urb = {                                        \
414      .size = 192,                                 \
415      .min_entries = {                             \
416         [MESA_SHADER_VERTEX]    = 34,             \
417         [MESA_SHADER_TESS_EVAL] = 34,             \
418      },                                           \
419      .max_entries = {                             \
420         [MESA_SHADER_VERTEX]    = 704,            \
421         [MESA_SHADER_TESS_CTRL] = 256,            \
422         [MESA_SHADER_TESS_EVAL] = 416,            \
423         [MESA_SHADER_GEOMETRY]  = 256,            \
424      },                                           \
425   }
426
427#define GEN9_LP_FEATURES_2X6                       \
428   GEN9_LP_FEATURES,                               \
429   .max_vs_threads = 56,                           \
430   .max_tcs_threads = 56,                          \
431   .max_tes_threads = 56,                          \
432   .max_gs_threads = 56,                           \
433   .max_cs_threads = 6 * 6,                        \
434   .urb = {                                        \
435      .size = 128,                                 \
436      .min_entries = {                             \
437         [MESA_SHADER_VERTEX]    = 34,             \
438         [MESA_SHADER_TESS_EVAL] = 34,             \
439      },                                           \
440      .max_entries = {                             \
441         [MESA_SHADER_VERTEX]    = 352,            \
442         [MESA_SHADER_TESS_CTRL] = 128,            \
443         [MESA_SHADER_TESS_EVAL] = 208,            \
444         [MESA_SHADER_GEOMETRY]  = 128,            \
445      },                                           \
446   }
447
448static const struct gen_device_info gen_device_info_skl_gt1 = {
449   GEN9_FEATURES, .gt = 1,
450   .num_slices = 1,
451   .urb.size = 192,
452};
453
454static const struct gen_device_info gen_device_info_skl_gt2 = {
455   GEN9_FEATURES, .gt = 2,
456   .num_slices = 1,
457};
458
459static const struct gen_device_info gen_device_info_skl_gt3 = {
460   GEN9_FEATURES, .gt = 3,
461   .num_slices = 2,
462};
463
464static const struct gen_device_info gen_device_info_skl_gt4 = {
465   GEN9_FEATURES, .gt = 4,
466   .num_slices = 3,
467   /* From the "L3 Allocation and Programming" documentation:
468    *
469    * "URB is limited to 1008KB due to programming restrictions.  This is not a
470    * restriction of the L3 implementation, but of the FF and other clients.
471    * Therefore, in a GT4 implementation it is possible for the programmed
472    * allocation of the L3 data array to provide 3*384KB=1152KB for URB, but
473    * only 1008KB of this will be used."
474    */
475   .urb.size = 1008 / 3,
476};
477
478static const struct gen_device_info gen_device_info_bxt = {
479   GEN9_LP_FEATURES
480};
481
482static const struct gen_device_info gen_device_info_bxt_2x6 = {
483   GEN9_LP_FEATURES_2X6
484};
485/*
486 * Note: for all KBL SKUs, the PRM says SKL for GS entries, not SKL+.
487 * There's no KBL entry. Using the default SKL (GEN9) GS entries value.
488 */
489
490static const struct gen_device_info gen_device_info_kbl_gt1 = {
491   GEN9_FEATURES,
492   .is_kabylake = true,
493   .gt = 1,
494
495   .max_cs_threads = 7 * 6,
496   .urb.size = 192,
497   .num_slices = 1,
498};
499
500static const struct gen_device_info gen_device_info_kbl_gt1_5 = {
501   GEN9_FEATURES,
502   .is_kabylake = true,
503   .gt = 1,
504
505   .max_cs_threads = 7 * 6,
506   .num_slices = 1,
507};
508
509static const struct gen_device_info gen_device_info_kbl_gt2 = {
510   GEN9_FEATURES,
511   .is_kabylake = true,
512   .gt = 2,
513
514   .num_slices = 1,
515};
516
517static const struct gen_device_info gen_device_info_kbl_gt3 = {
518   GEN9_FEATURES,
519   .is_kabylake = true,
520   .gt = 3,
521
522   .num_slices = 2,
523};
524
525static const struct gen_device_info gen_device_info_kbl_gt4 = {
526   GEN9_FEATURES,
527   .is_kabylake = true,
528   .gt = 4,
529
530   /*
531    * From the "L3 Allocation and Programming" documentation:
532    *
533    * "URB is limited to 1008KB due to programming restrictions.  This
534    *  is not a restriction of the L3 implementation, but of the FF and
535    *  other clients.  Therefore, in a GT4 implementation it is
536    *  possible for the programmed allocation of the L3 data array to
537    *  provide 3*384KB=1152KB for URB, but only 1008KB of this
538    *  will be used."
539    */
540   .urb.size = 1008 / 3,
541   .num_slices = 3,
542};
543
544static const struct gen_device_info gen_device_info_glk = {
545   GEN9_LP_FEATURES
546};
547
548static const struct gen_device_info gen_device_info_glk_2x6 = {
549   GEN9_LP_FEATURES_2X6
550};
551
552bool
553gen_get_device_info(int devid, struct gen_device_info *devinfo)
554{
555   switch (devid) {
556#undef CHIPSET
557#define CHIPSET(id, family, name) \
558      case id: *devinfo = gen_device_info_##family; break;
559#include "pci_ids/i965_pci_ids.h"
560   default:
561      fprintf(stderr, "i965_dri.so does not support the 0x%x PCI ID.\n", devid);
562      return false;
563   }
564
565   /* From the Skylake PRM, 3DSTATE_PS::Scratch Space Base Pointer:
566    *
567    * "Scratch Space per slice is computed based on 4 sub-slices.  SW must
568    *  allocate scratch space enough so that each slice has 4 slices allowed."
569    *
570    * The equivalent internal documentation says that this programming note
571    * applies to all Gen9+ platforms.
572    *
573    * The hardware typically calculates the scratch space pointer by taking
574    * the base address, and adding per-thread-scratch-space * thread ID.
575    * Extra padding can be necessary depending how the thread IDs are
576    * calculated for a particular shader stage.
577    */
578   if (devinfo->gen >= 9) {
579      devinfo->max_wm_threads = 64 /* threads-per-PSD */
580                              * devinfo->num_slices
581                              * 4; /* effective subslices per slice */
582   }
583
584   return true;
585}
586
587const char *
588gen_get_device_name(int devid)
589{
590   switch (devid) {
591#undef CHIPSET
592#define CHIPSET(id, family, name) case id: return name;
593#include "pci_ids/i965_pci_ids.h"
594   default:
595      return NULL;
596   }
597}
598