1/*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "util/mesa-sha1.h"
25#include "util/hash_table.h"
26#include "util/debug.h"
27#include "anv_private.h"
28
29static size_t
30anv_shader_bin_size(uint32_t prog_data_size, uint32_t nr_params,
31                    uint32_t key_size,
32                    uint32_t surface_count, uint32_t sampler_count)
33{
34   const uint32_t binding_data_size =
35      (surface_count + sampler_count) * sizeof(struct anv_pipeline_binding);
36
37   return align_u32(sizeof(struct anv_shader_bin), 8) +
38          align_u32(prog_data_size, 8) +
39          align_u32(nr_params * sizeof(void *), 8) +
40          align_u32(sizeof(uint32_t) + key_size, 8) +
41          align_u32(binding_data_size, 8);
42}
43
44struct anv_shader_bin *
45anv_shader_bin_create(struct anv_device *device,
46                      const void *key_data, uint32_t key_size,
47                      const void *kernel_data, uint32_t kernel_size,
48                      const struct brw_stage_prog_data *prog_data,
49                      uint32_t prog_data_size, const void *prog_data_param,
50                      const struct anv_pipeline_bind_map *bind_map)
51{
52   const size_t size =
53      anv_shader_bin_size(prog_data_size, prog_data->nr_params, key_size,
54                          bind_map->surface_count, bind_map->sampler_count);
55
56   struct anv_shader_bin *shader =
57      vk_alloc(&device->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
58   if (!shader)
59      return NULL;
60
61   shader->ref_cnt = 1;
62
63   shader->kernel =
64      anv_state_pool_alloc(&device->instruction_state_pool, kernel_size, 64);
65   memcpy(shader->kernel.map, kernel_data, kernel_size);
66   shader->kernel_size = kernel_size;
67   shader->bind_map = *bind_map;
68   shader->prog_data_size = prog_data_size;
69
70   /* Now we fill out the floating data at the end */
71   void *data = shader;
72   data += align_u32(sizeof(struct anv_shader_bin), 8);
73
74   shader->prog_data = data;
75   struct brw_stage_prog_data *new_prog_data = data;
76   memcpy(data, prog_data, prog_data_size);
77   data += align_u32(prog_data_size, 8);
78
79   assert(prog_data->nr_pull_params == 0);
80   assert(prog_data->nr_image_params == 0);
81   new_prog_data->param = data;
82   uint32_t param_size = prog_data->nr_params * sizeof(void *);
83   memcpy(data, prog_data_param, param_size);
84   data += align_u32(param_size, 8);
85
86   shader->key = data;
87   struct anv_shader_bin_key *key = data;
88   key->size = key_size;
89   memcpy(key->data, key_data, key_size);
90   data += align_u32(sizeof(*key) + key_size, 8);
91
92   shader->bind_map.surface_to_descriptor = data;
93   memcpy(data, bind_map->surface_to_descriptor,
94          bind_map->surface_count * sizeof(struct anv_pipeline_binding));
95   data += bind_map->surface_count * sizeof(struct anv_pipeline_binding);
96
97   shader->bind_map.sampler_to_descriptor = data;
98   memcpy(data, bind_map->sampler_to_descriptor,
99          bind_map->sampler_count * sizeof(struct anv_pipeline_binding));
100
101   return shader;
102}
103
104void
105anv_shader_bin_destroy(struct anv_device *device,
106                       struct anv_shader_bin *shader)
107{
108   assert(shader->ref_cnt == 0);
109   anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
110   vk_free(&device->alloc, shader);
111}
112
113static size_t
114anv_shader_bin_data_size(const struct anv_shader_bin *shader)
115{
116   return anv_shader_bin_size(shader->prog_data_size,
117                              shader->prog_data->nr_params, shader->key->size,
118                              shader->bind_map.surface_count,
119                              shader->bind_map.sampler_count) +
120          align_u32(shader->kernel_size, 8);
121}
122
123static void
124anv_shader_bin_write_data(const struct anv_shader_bin *shader, void *data)
125{
126   size_t struct_size =
127      anv_shader_bin_size(shader->prog_data_size,
128                          shader->prog_data->nr_params, shader->key->size,
129                          shader->bind_map.surface_count,
130                          shader->bind_map.sampler_count);
131
132   memcpy(data, shader, struct_size);
133   data += struct_size;
134
135   memcpy(data, shader->kernel.map, shader->kernel_size);
136}
137
138/* Remaining work:
139 *
140 * - Compact binding table layout so it's tight and not dependent on
141 *   descriptor set layout.
142 *
143 * - Review prog_data struct for size and cacheability: struct
144 *   brw_stage_prog_data has binding_table which uses a lot of uint32_t for 8
145 *   bit quantities etc; param, pull_param, and image_params are pointers, we
146 *   just need the compation map. use bit fields for all bools, eg
147 *   dual_src_blend.
148 */
149
150static uint32_t
151shader_bin_key_hash_func(const void *void_key)
152{
153   const struct anv_shader_bin_key *key = void_key;
154   return _mesa_hash_data(key->data, key->size);
155}
156
157static bool
158shader_bin_key_compare_func(const void *void_a, const void *void_b)
159{
160   const struct anv_shader_bin_key *a = void_a, *b = void_b;
161   if (a->size != b->size)
162      return false;
163
164   return memcmp(a->data, b->data, a->size) == 0;
165}
166
167void
168anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
169                        struct anv_device *device,
170                        bool cache_enabled)
171{
172   cache->device = device;
173   pthread_mutex_init(&cache->mutex, NULL);
174
175   if (cache_enabled) {
176      cache->cache = _mesa_hash_table_create(NULL, shader_bin_key_hash_func,
177                                             shader_bin_key_compare_func);
178   } else {
179      cache->cache = NULL;
180   }
181}
182
183void
184anv_pipeline_cache_finish(struct anv_pipeline_cache *cache)
185{
186   pthread_mutex_destroy(&cache->mutex);
187
188   if (cache->cache) {
189      /* This is a bit unfortunate.  In order to keep things from randomly
190       * going away, the shader cache has to hold a reference to all shader
191       * binaries it contains.  We unref them when we destroy the cache.
192       */
193      struct hash_entry *entry;
194      hash_table_foreach(cache->cache, entry)
195         anv_shader_bin_unref(cache->device, entry->data);
196
197      _mesa_hash_table_destroy(cache->cache, NULL);
198   }
199}
200
201void
202anv_hash_shader(unsigned char *hash, const void *key, size_t key_size,
203                struct anv_shader_module *module,
204                const char *entrypoint,
205                const struct anv_pipeline_layout *pipeline_layout,
206                const VkSpecializationInfo *spec_info)
207{
208   struct mesa_sha1 *ctx;
209
210   ctx = _mesa_sha1_init();
211   _mesa_sha1_update(ctx, key, key_size);
212   _mesa_sha1_update(ctx, module->sha1, sizeof(module->sha1));
213   _mesa_sha1_update(ctx, entrypoint, strlen(entrypoint));
214   if (pipeline_layout) {
215      _mesa_sha1_update(ctx, pipeline_layout->sha1,
216                        sizeof(pipeline_layout->sha1));
217   }
218   /* hash in shader stage, pipeline layout? */
219   if (spec_info) {
220      _mesa_sha1_update(ctx, spec_info->pMapEntries,
221                        spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);
222      _mesa_sha1_update(ctx, spec_info->pData, spec_info->dataSize);
223   }
224   _mesa_sha1_final(ctx, hash);
225}
226
227static struct anv_shader_bin *
228anv_pipeline_cache_search_locked(struct anv_pipeline_cache *cache,
229                                 const void *key_data, uint32_t key_size)
230{
231   uint32_t vla[1 + DIV_ROUND_UP(key_size, sizeof(uint32_t))];
232   struct anv_shader_bin_key *key = (void *)vla;
233   key->size = key_size;
234   memcpy(key->data, key_data, key_size);
235
236   struct hash_entry *entry = _mesa_hash_table_search(cache->cache, key);
237   if (entry)
238      return entry->data;
239   else
240      return NULL;
241}
242
243struct anv_shader_bin *
244anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
245                          const void *key_data, uint32_t key_size)
246{
247   if (!cache->cache)
248      return NULL;
249
250   pthread_mutex_lock(&cache->mutex);
251
252   struct anv_shader_bin *shader =
253      anv_pipeline_cache_search_locked(cache, key_data, key_size);
254
255   pthread_mutex_unlock(&cache->mutex);
256
257   /* We increment refcount before handing it to the caller */
258   if (shader)
259      anv_shader_bin_ref(shader);
260
261   return shader;
262}
263
264static struct anv_shader_bin *
265anv_pipeline_cache_add_shader(struct anv_pipeline_cache *cache,
266                              const void *key_data, uint32_t key_size,
267                              const void *kernel_data, uint32_t kernel_size,
268                              const struct brw_stage_prog_data *prog_data,
269                              uint32_t prog_data_size,
270                              const void *prog_data_param,
271                              const struct anv_pipeline_bind_map *bind_map)
272{
273   struct anv_shader_bin *shader =
274      anv_pipeline_cache_search_locked(cache, key_data, key_size);
275   if (shader)
276      return shader;
277
278   struct anv_shader_bin *bin =
279      anv_shader_bin_create(cache->device, key_data, key_size,
280                            kernel_data, kernel_size,
281                            prog_data, prog_data_size, prog_data_param,
282                            bind_map);
283   if (!bin)
284      return NULL;
285
286   _mesa_hash_table_insert(cache->cache, bin->key, bin);
287
288   return bin;
289}
290
291struct anv_shader_bin *
292anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
293                                 const void *key_data, uint32_t key_size,
294                                 const void *kernel_data, uint32_t kernel_size,
295                                 const struct brw_stage_prog_data *prog_data,
296                                 uint32_t prog_data_size,
297                                 const struct anv_pipeline_bind_map *bind_map)
298{
299   if (cache->cache) {
300      pthread_mutex_lock(&cache->mutex);
301
302      struct anv_shader_bin *bin =
303         anv_pipeline_cache_add_shader(cache, key_data, key_size,
304                                       kernel_data, kernel_size,
305                                       prog_data, prog_data_size,
306                                       prog_data->param, bind_map);
307
308      pthread_mutex_unlock(&cache->mutex);
309
310      /* We increment refcount before handing it to the caller */
311      anv_shader_bin_ref(bin);
312
313      return bin;
314   } else {
315      /* In this case, we're not caching it so the caller owns it entirely */
316      return anv_shader_bin_create(cache->device, key_data, key_size,
317                                   kernel_data, kernel_size,
318                                   prog_data, prog_data_size,
319                                   prog_data->param, bind_map);
320   }
321}
322
323struct cache_header {
324   uint32_t header_size;
325   uint32_t header_version;
326   uint32_t vendor_id;
327   uint32_t device_id;
328   uint8_t  uuid[VK_UUID_SIZE];
329};
330
331static void
332anv_pipeline_cache_load(struct anv_pipeline_cache *cache,
333                        const void *data, size_t size)
334{
335   struct anv_device *device = cache->device;
336   struct anv_physical_device *pdevice = &device->instance->physicalDevice;
337   struct cache_header header;
338
339   if (cache->cache == NULL)
340      return;
341
342   if (size < sizeof(header))
343      return;
344   memcpy(&header, data, sizeof(header));
345   if (header.header_size < sizeof(header))
346      return;
347   if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
348      return;
349   if (header.vendor_id != 0x8086)
350      return;
351   if (header.device_id != device->chipset_id)
352      return;
353   if (memcmp(header.uuid, pdevice->uuid, VK_UUID_SIZE) != 0)
354      return;
355
356   const void *end = data + size;
357   const void *p = data + header.header_size;
358
359   /* Count is the total number of valid entries */
360   uint32_t count;
361   if (p + sizeof(count) >= end)
362      return;
363   memcpy(&count, p, sizeof(count));
364   p += align_u32(sizeof(count), 8);
365
366   for (uint32_t i = 0; i < count; i++) {
367      struct anv_shader_bin bin;
368      if (p + sizeof(bin) > end)
369         break;
370      memcpy(&bin, p, sizeof(bin));
371      p += align_u32(sizeof(struct anv_shader_bin), 8);
372
373      const struct brw_stage_prog_data *prog_data = p;
374      p += align_u32(bin.prog_data_size, 8);
375      if (p > end)
376         break;
377
378      uint32_t param_size = prog_data->nr_params * sizeof(void *);
379      const void *prog_data_param = p;
380      p += align_u32(param_size, 8);
381
382      struct anv_shader_bin_key key;
383      if (p + sizeof(key) > end)
384         break;
385      memcpy(&key, p, sizeof(key));
386      const void *key_data = p + sizeof(key);
387      p += align_u32(sizeof(key) + key.size, 8);
388
389      /* We're going to memcpy this so getting rid of const is fine */
390      struct anv_pipeline_binding *bindings = (void *)p;
391      p += align_u32((bin.bind_map.surface_count + bin.bind_map.sampler_count) *
392                     sizeof(struct anv_pipeline_binding), 8);
393      bin.bind_map.surface_to_descriptor = bindings;
394      bin.bind_map.sampler_to_descriptor = bindings + bin.bind_map.surface_count;
395
396      const void *kernel_data = p;
397      p += align_u32(bin.kernel_size, 8);
398
399      if (p > end)
400         break;
401
402      anv_pipeline_cache_add_shader(cache, key_data, key.size,
403                                    kernel_data, bin.kernel_size,
404                                    prog_data, bin.prog_data_size,
405                                    prog_data_param, &bin.bind_map);
406   }
407}
408
409static bool
410pipeline_cache_enabled()
411{
412   static int enabled = -1;
413   if (enabled < 0)
414      enabled = env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true);
415   return enabled;
416}
417
418VkResult anv_CreatePipelineCache(
419    VkDevice                                    _device,
420    const VkPipelineCacheCreateInfo*            pCreateInfo,
421    const VkAllocationCallbacks*                pAllocator,
422    VkPipelineCache*                            pPipelineCache)
423{
424   ANV_FROM_HANDLE(anv_device, device, _device);
425   struct anv_pipeline_cache *cache;
426
427   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
428   assert(pCreateInfo->flags == 0);
429
430   cache = vk_alloc2(&device->alloc, pAllocator,
431                       sizeof(*cache), 8,
432                       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
433   if (cache == NULL)
434      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
435
436   anv_pipeline_cache_init(cache, device, pipeline_cache_enabled());
437
438   if (pCreateInfo->initialDataSize > 0)
439      anv_pipeline_cache_load(cache,
440                              pCreateInfo->pInitialData,
441                              pCreateInfo->initialDataSize);
442
443   *pPipelineCache = anv_pipeline_cache_to_handle(cache);
444
445   return VK_SUCCESS;
446}
447
448void anv_DestroyPipelineCache(
449    VkDevice                                    _device,
450    VkPipelineCache                             _cache,
451    const VkAllocationCallbacks*                pAllocator)
452{
453   ANV_FROM_HANDLE(anv_device, device, _device);
454   ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
455
456   if (!cache)
457      return;
458
459   anv_pipeline_cache_finish(cache);
460
461   vk_free2(&device->alloc, pAllocator, cache);
462}
463
464VkResult anv_GetPipelineCacheData(
465    VkDevice                                    _device,
466    VkPipelineCache                             _cache,
467    size_t*                                     pDataSize,
468    void*                                       pData)
469{
470   ANV_FROM_HANDLE(anv_device, device, _device);
471   ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache);
472   struct anv_physical_device *pdevice = &device->instance->physicalDevice;
473   struct cache_header *header;
474
475   if (pData == NULL) {
476      size_t size = align_u32(sizeof(*header), 8) +
477                    align_u32(sizeof(uint32_t), 8);
478
479      if (cache->cache) {
480         struct hash_entry *entry;
481         hash_table_foreach(cache->cache, entry)
482            size += anv_shader_bin_data_size(entry->data);
483      }
484
485      *pDataSize = size;
486      return VK_SUCCESS;
487   }
488
489   if (*pDataSize < sizeof(*header)) {
490      *pDataSize = 0;
491      return VK_INCOMPLETE;
492   }
493
494   void *p = pData, *end = pData + *pDataSize;
495   header = p;
496   header->header_size = sizeof(*header);
497   header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
498   header->vendor_id = 0x8086;
499   header->device_id = device->chipset_id;
500   memcpy(header->uuid, pdevice->uuid, VK_UUID_SIZE);
501   p += align_u32(header->header_size, 8);
502
503   uint32_t *count = p;
504   p += align_u32(sizeof(*count), 8);
505   *count = 0;
506
507   VkResult result = VK_SUCCESS;
508   if (cache->cache) {
509      struct hash_entry *entry;
510      hash_table_foreach(cache->cache, entry) {
511         struct anv_shader_bin *shader = entry->data;
512         size_t data_size = anv_shader_bin_data_size(entry->data);
513         if (p + data_size > end) {
514            result = VK_INCOMPLETE;
515            break;
516         }
517
518         anv_shader_bin_write_data(shader, p);
519         p += data_size;
520
521         (*count)++;
522      }
523   }
524
525   *pDataSize = p - pData;
526
527   return result;
528}
529
530VkResult anv_MergePipelineCaches(
531    VkDevice                                    _device,
532    VkPipelineCache                             destCache,
533    uint32_t                                    srcCacheCount,
534    const VkPipelineCache*                      pSrcCaches)
535{
536   ANV_FROM_HANDLE(anv_pipeline_cache, dst, destCache);
537
538   if (!dst->cache)
539      return VK_SUCCESS;
540
541   for (uint32_t i = 0; i < srcCacheCount; i++) {
542      ANV_FROM_HANDLE(anv_pipeline_cache, src, pSrcCaches[i]);
543      if (!src->cache)
544         continue;
545
546      struct hash_entry *entry;
547      hash_table_foreach(src->cache, entry) {
548         struct anv_shader_bin *bin = entry->data;
549         if (_mesa_hash_table_search(dst->cache, bin->key))
550            continue;
551
552         anv_shader_bin_ref(bin);
553         _mesa_hash_table_insert(dst->cache, bin->key, bin);
554      }
555   }
556
557   return VK_SUCCESS;
558}
559