1/* 2 * Copyright © 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "util/mesa-sha1.h" 25#include "util/hash_table.h" 26#include "util/debug.h" 27#include "anv_private.h" 28 29static size_t 30anv_shader_bin_size(uint32_t prog_data_size, uint32_t nr_params, 31 uint32_t key_size, 32 uint32_t surface_count, uint32_t sampler_count) 33{ 34 const uint32_t binding_data_size = 35 (surface_count + sampler_count) * sizeof(struct anv_pipeline_binding); 36 37 return align_u32(sizeof(struct anv_shader_bin), 8) + 38 align_u32(prog_data_size, 8) + 39 align_u32(nr_params * sizeof(void *), 8) + 40 align_u32(sizeof(uint32_t) + key_size, 8) + 41 align_u32(binding_data_size, 8); 42} 43 44struct anv_shader_bin * 45anv_shader_bin_create(struct anv_device *device, 46 const void *key_data, uint32_t key_size, 47 const void *kernel_data, uint32_t kernel_size, 48 const struct brw_stage_prog_data *prog_data, 49 uint32_t prog_data_size, const void *prog_data_param, 50 const struct anv_pipeline_bind_map *bind_map) 51{ 52 const size_t size = 53 anv_shader_bin_size(prog_data_size, prog_data->nr_params, key_size, 54 bind_map->surface_count, bind_map->sampler_count); 55 56 struct anv_shader_bin *shader = 57 vk_alloc(&device->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 58 if (!shader) 59 return NULL; 60 61 shader->ref_cnt = 1; 62 63 shader->kernel = 64 anv_state_pool_alloc(&device->instruction_state_pool, kernel_size, 64); 65 memcpy(shader->kernel.map, kernel_data, kernel_size); 66 shader->kernel_size = kernel_size; 67 shader->bind_map = *bind_map; 68 shader->prog_data_size = prog_data_size; 69 70 /* Now we fill out the floating data at the end */ 71 void *data = shader; 72 data += align_u32(sizeof(struct anv_shader_bin), 8); 73 74 shader->prog_data = data; 75 struct brw_stage_prog_data *new_prog_data = data; 76 memcpy(data, prog_data, prog_data_size); 77 data += align_u32(prog_data_size, 8); 78 79 assert(prog_data->nr_pull_params == 0); 80 assert(prog_data->nr_image_params == 0); 81 new_prog_data->param = data; 82 uint32_t param_size = prog_data->nr_params * sizeof(void *); 83 memcpy(data, prog_data_param, param_size); 84 data += align_u32(param_size, 8); 85 86 shader->key = data; 87 struct anv_shader_bin_key *key = data; 88 key->size = key_size; 89 memcpy(key->data, key_data, key_size); 90 data += align_u32(sizeof(*key) + key_size, 8); 91 92 shader->bind_map.surface_to_descriptor = data; 93 memcpy(data, bind_map->surface_to_descriptor, 94 bind_map->surface_count * sizeof(struct anv_pipeline_binding)); 95 data += bind_map->surface_count * sizeof(struct anv_pipeline_binding); 96 97 shader->bind_map.sampler_to_descriptor = data; 98 memcpy(data, bind_map->sampler_to_descriptor, 99 bind_map->sampler_count * sizeof(struct anv_pipeline_binding)); 100 101 return shader; 102} 103 104void 105anv_shader_bin_destroy(struct anv_device *device, 106 struct anv_shader_bin *shader) 107{ 108 assert(shader->ref_cnt == 0); 109 anv_state_pool_free(&device->instruction_state_pool, shader->kernel); 110 vk_free(&device->alloc, shader); 111} 112 113static size_t 114anv_shader_bin_data_size(const struct anv_shader_bin *shader) 115{ 116 return anv_shader_bin_size(shader->prog_data_size, 117 shader->prog_data->nr_params, shader->key->size, 118 shader->bind_map.surface_count, 119 shader->bind_map.sampler_count) + 120 align_u32(shader->kernel_size, 8); 121} 122 123static void 124anv_shader_bin_write_data(const struct anv_shader_bin *shader, void *data) 125{ 126 size_t struct_size = 127 anv_shader_bin_size(shader->prog_data_size, 128 shader->prog_data->nr_params, shader->key->size, 129 shader->bind_map.surface_count, 130 shader->bind_map.sampler_count); 131 132 memcpy(data, shader, struct_size); 133 data += struct_size; 134 135 memcpy(data, shader->kernel.map, shader->kernel_size); 136} 137 138/* Remaining work: 139 * 140 * - Compact binding table layout so it's tight and not dependent on 141 * descriptor set layout. 142 * 143 * - Review prog_data struct for size and cacheability: struct 144 * brw_stage_prog_data has binding_table which uses a lot of uint32_t for 8 145 * bit quantities etc; param, pull_param, and image_params are pointers, we 146 * just need the compation map. use bit fields for all bools, eg 147 * dual_src_blend. 148 */ 149 150static uint32_t 151shader_bin_key_hash_func(const void *void_key) 152{ 153 const struct anv_shader_bin_key *key = void_key; 154 return _mesa_hash_data(key->data, key->size); 155} 156 157static bool 158shader_bin_key_compare_func(const void *void_a, const void *void_b) 159{ 160 const struct anv_shader_bin_key *a = void_a, *b = void_b; 161 if (a->size != b->size) 162 return false; 163 164 return memcmp(a->data, b->data, a->size) == 0; 165} 166 167void 168anv_pipeline_cache_init(struct anv_pipeline_cache *cache, 169 struct anv_device *device, 170 bool cache_enabled) 171{ 172 cache->device = device; 173 pthread_mutex_init(&cache->mutex, NULL); 174 175 if (cache_enabled) { 176 cache->cache = _mesa_hash_table_create(NULL, shader_bin_key_hash_func, 177 shader_bin_key_compare_func); 178 } else { 179 cache->cache = NULL; 180 } 181} 182 183void 184anv_pipeline_cache_finish(struct anv_pipeline_cache *cache) 185{ 186 pthread_mutex_destroy(&cache->mutex); 187 188 if (cache->cache) { 189 /* This is a bit unfortunate. In order to keep things from randomly 190 * going away, the shader cache has to hold a reference to all shader 191 * binaries it contains. We unref them when we destroy the cache. 192 */ 193 struct hash_entry *entry; 194 hash_table_foreach(cache->cache, entry) 195 anv_shader_bin_unref(cache->device, entry->data); 196 197 _mesa_hash_table_destroy(cache->cache, NULL); 198 } 199} 200 201void 202anv_hash_shader(unsigned char *hash, const void *key, size_t key_size, 203 struct anv_shader_module *module, 204 const char *entrypoint, 205 const struct anv_pipeline_layout *pipeline_layout, 206 const VkSpecializationInfo *spec_info) 207{ 208 struct mesa_sha1 *ctx; 209 210 ctx = _mesa_sha1_init(); 211 _mesa_sha1_update(ctx, key, key_size); 212 _mesa_sha1_update(ctx, module->sha1, sizeof(module->sha1)); 213 _mesa_sha1_update(ctx, entrypoint, strlen(entrypoint)); 214 if (pipeline_layout) { 215 _mesa_sha1_update(ctx, pipeline_layout->sha1, 216 sizeof(pipeline_layout->sha1)); 217 } 218 /* hash in shader stage, pipeline layout? */ 219 if (spec_info) { 220 _mesa_sha1_update(ctx, spec_info->pMapEntries, 221 spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]); 222 _mesa_sha1_update(ctx, spec_info->pData, spec_info->dataSize); 223 } 224 _mesa_sha1_final(ctx, hash); 225} 226 227static struct anv_shader_bin * 228anv_pipeline_cache_search_locked(struct anv_pipeline_cache *cache, 229 const void *key_data, uint32_t key_size) 230{ 231 uint32_t vla[1 + DIV_ROUND_UP(key_size, sizeof(uint32_t))]; 232 struct anv_shader_bin_key *key = (void *)vla; 233 key->size = key_size; 234 memcpy(key->data, key_data, key_size); 235 236 struct hash_entry *entry = _mesa_hash_table_search(cache->cache, key); 237 if (entry) 238 return entry->data; 239 else 240 return NULL; 241} 242 243struct anv_shader_bin * 244anv_pipeline_cache_search(struct anv_pipeline_cache *cache, 245 const void *key_data, uint32_t key_size) 246{ 247 if (!cache->cache) 248 return NULL; 249 250 pthread_mutex_lock(&cache->mutex); 251 252 struct anv_shader_bin *shader = 253 anv_pipeline_cache_search_locked(cache, key_data, key_size); 254 255 pthread_mutex_unlock(&cache->mutex); 256 257 /* We increment refcount before handing it to the caller */ 258 if (shader) 259 anv_shader_bin_ref(shader); 260 261 return shader; 262} 263 264static struct anv_shader_bin * 265anv_pipeline_cache_add_shader(struct anv_pipeline_cache *cache, 266 const void *key_data, uint32_t key_size, 267 const void *kernel_data, uint32_t kernel_size, 268 const struct brw_stage_prog_data *prog_data, 269 uint32_t prog_data_size, 270 const void *prog_data_param, 271 const struct anv_pipeline_bind_map *bind_map) 272{ 273 struct anv_shader_bin *shader = 274 anv_pipeline_cache_search_locked(cache, key_data, key_size); 275 if (shader) 276 return shader; 277 278 struct anv_shader_bin *bin = 279 anv_shader_bin_create(cache->device, key_data, key_size, 280 kernel_data, kernel_size, 281 prog_data, prog_data_size, prog_data_param, 282 bind_map); 283 if (!bin) 284 return NULL; 285 286 _mesa_hash_table_insert(cache->cache, bin->key, bin); 287 288 return bin; 289} 290 291struct anv_shader_bin * 292anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache, 293 const void *key_data, uint32_t key_size, 294 const void *kernel_data, uint32_t kernel_size, 295 const struct brw_stage_prog_data *prog_data, 296 uint32_t prog_data_size, 297 const struct anv_pipeline_bind_map *bind_map) 298{ 299 if (cache->cache) { 300 pthread_mutex_lock(&cache->mutex); 301 302 struct anv_shader_bin *bin = 303 anv_pipeline_cache_add_shader(cache, key_data, key_size, 304 kernel_data, kernel_size, 305 prog_data, prog_data_size, 306 prog_data->param, bind_map); 307 308 pthread_mutex_unlock(&cache->mutex); 309 310 /* We increment refcount before handing it to the caller */ 311 anv_shader_bin_ref(bin); 312 313 return bin; 314 } else { 315 /* In this case, we're not caching it so the caller owns it entirely */ 316 return anv_shader_bin_create(cache->device, key_data, key_size, 317 kernel_data, kernel_size, 318 prog_data, prog_data_size, 319 prog_data->param, bind_map); 320 } 321} 322 323struct cache_header { 324 uint32_t header_size; 325 uint32_t header_version; 326 uint32_t vendor_id; 327 uint32_t device_id; 328 uint8_t uuid[VK_UUID_SIZE]; 329}; 330 331static void 332anv_pipeline_cache_load(struct anv_pipeline_cache *cache, 333 const void *data, size_t size) 334{ 335 struct anv_device *device = cache->device; 336 struct anv_physical_device *pdevice = &device->instance->physicalDevice; 337 struct cache_header header; 338 339 if (cache->cache == NULL) 340 return; 341 342 if (size < sizeof(header)) 343 return; 344 memcpy(&header, data, sizeof(header)); 345 if (header.header_size < sizeof(header)) 346 return; 347 if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE) 348 return; 349 if (header.vendor_id != 0x8086) 350 return; 351 if (header.device_id != device->chipset_id) 352 return; 353 if (memcmp(header.uuid, pdevice->uuid, VK_UUID_SIZE) != 0) 354 return; 355 356 const void *end = data + size; 357 const void *p = data + header.header_size; 358 359 /* Count is the total number of valid entries */ 360 uint32_t count; 361 if (p + sizeof(count) >= end) 362 return; 363 memcpy(&count, p, sizeof(count)); 364 p += align_u32(sizeof(count), 8); 365 366 for (uint32_t i = 0; i < count; i++) { 367 struct anv_shader_bin bin; 368 if (p + sizeof(bin) > end) 369 break; 370 memcpy(&bin, p, sizeof(bin)); 371 p += align_u32(sizeof(struct anv_shader_bin), 8); 372 373 const struct brw_stage_prog_data *prog_data = p; 374 p += align_u32(bin.prog_data_size, 8); 375 if (p > end) 376 break; 377 378 uint32_t param_size = prog_data->nr_params * sizeof(void *); 379 const void *prog_data_param = p; 380 p += align_u32(param_size, 8); 381 382 struct anv_shader_bin_key key; 383 if (p + sizeof(key) > end) 384 break; 385 memcpy(&key, p, sizeof(key)); 386 const void *key_data = p + sizeof(key); 387 p += align_u32(sizeof(key) + key.size, 8); 388 389 /* We're going to memcpy this so getting rid of const is fine */ 390 struct anv_pipeline_binding *bindings = (void *)p; 391 p += align_u32((bin.bind_map.surface_count + bin.bind_map.sampler_count) * 392 sizeof(struct anv_pipeline_binding), 8); 393 bin.bind_map.surface_to_descriptor = bindings; 394 bin.bind_map.sampler_to_descriptor = bindings + bin.bind_map.surface_count; 395 396 const void *kernel_data = p; 397 p += align_u32(bin.kernel_size, 8); 398 399 if (p > end) 400 break; 401 402 anv_pipeline_cache_add_shader(cache, key_data, key.size, 403 kernel_data, bin.kernel_size, 404 prog_data, bin.prog_data_size, 405 prog_data_param, &bin.bind_map); 406 } 407} 408 409static bool 410pipeline_cache_enabled() 411{ 412 static int enabled = -1; 413 if (enabled < 0) 414 enabled = env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true); 415 return enabled; 416} 417 418VkResult anv_CreatePipelineCache( 419 VkDevice _device, 420 const VkPipelineCacheCreateInfo* pCreateInfo, 421 const VkAllocationCallbacks* pAllocator, 422 VkPipelineCache* pPipelineCache) 423{ 424 ANV_FROM_HANDLE(anv_device, device, _device); 425 struct anv_pipeline_cache *cache; 426 427 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO); 428 assert(pCreateInfo->flags == 0); 429 430 cache = vk_alloc2(&device->alloc, pAllocator, 431 sizeof(*cache), 8, 432 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 433 if (cache == NULL) 434 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 435 436 anv_pipeline_cache_init(cache, device, pipeline_cache_enabled()); 437 438 if (pCreateInfo->initialDataSize > 0) 439 anv_pipeline_cache_load(cache, 440 pCreateInfo->pInitialData, 441 pCreateInfo->initialDataSize); 442 443 *pPipelineCache = anv_pipeline_cache_to_handle(cache); 444 445 return VK_SUCCESS; 446} 447 448void anv_DestroyPipelineCache( 449 VkDevice _device, 450 VkPipelineCache _cache, 451 const VkAllocationCallbacks* pAllocator) 452{ 453 ANV_FROM_HANDLE(anv_device, device, _device); 454 ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); 455 456 if (!cache) 457 return; 458 459 anv_pipeline_cache_finish(cache); 460 461 vk_free2(&device->alloc, pAllocator, cache); 462} 463 464VkResult anv_GetPipelineCacheData( 465 VkDevice _device, 466 VkPipelineCache _cache, 467 size_t* pDataSize, 468 void* pData) 469{ 470 ANV_FROM_HANDLE(anv_device, device, _device); 471 ANV_FROM_HANDLE(anv_pipeline_cache, cache, _cache); 472 struct anv_physical_device *pdevice = &device->instance->physicalDevice; 473 struct cache_header *header; 474 475 if (pData == NULL) { 476 size_t size = align_u32(sizeof(*header), 8) + 477 align_u32(sizeof(uint32_t), 8); 478 479 if (cache->cache) { 480 struct hash_entry *entry; 481 hash_table_foreach(cache->cache, entry) 482 size += anv_shader_bin_data_size(entry->data); 483 } 484 485 *pDataSize = size; 486 return VK_SUCCESS; 487 } 488 489 if (*pDataSize < sizeof(*header)) { 490 *pDataSize = 0; 491 return VK_INCOMPLETE; 492 } 493 494 void *p = pData, *end = pData + *pDataSize; 495 header = p; 496 header->header_size = sizeof(*header); 497 header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE; 498 header->vendor_id = 0x8086; 499 header->device_id = device->chipset_id; 500 memcpy(header->uuid, pdevice->uuid, VK_UUID_SIZE); 501 p += align_u32(header->header_size, 8); 502 503 uint32_t *count = p; 504 p += align_u32(sizeof(*count), 8); 505 *count = 0; 506 507 VkResult result = VK_SUCCESS; 508 if (cache->cache) { 509 struct hash_entry *entry; 510 hash_table_foreach(cache->cache, entry) { 511 struct anv_shader_bin *shader = entry->data; 512 size_t data_size = anv_shader_bin_data_size(entry->data); 513 if (p + data_size > end) { 514 result = VK_INCOMPLETE; 515 break; 516 } 517 518 anv_shader_bin_write_data(shader, p); 519 p += data_size; 520 521 (*count)++; 522 } 523 } 524 525 *pDataSize = p - pData; 526 527 return result; 528} 529 530VkResult anv_MergePipelineCaches( 531 VkDevice _device, 532 VkPipelineCache destCache, 533 uint32_t srcCacheCount, 534 const VkPipelineCache* pSrcCaches) 535{ 536 ANV_FROM_HANDLE(anv_pipeline_cache, dst, destCache); 537 538 if (!dst->cache) 539 return VK_SUCCESS; 540 541 for (uint32_t i = 0; i < srcCacheCount; i++) { 542 ANV_FROM_HANDLE(anv_pipeline_cache, src, pSrcCaches[i]); 543 if (!src->cache) 544 continue; 545 546 struct hash_entry *entry; 547 hash_table_foreach(src->cache, entry) { 548 struct anv_shader_bin *bin = entry->data; 549 if (_mesa_hash_table_search(dst->cache, bin->key)) 550 continue; 551 552 anv_shader_bin_ref(bin); 553 _mesa_hash_table_insert(dst->cache, bin->key, bin); 554 } 555 } 556 557 return VK_SUCCESS; 558} 559