1/*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "rsovScript.h"
18
19#include "bcinfo/MetadataExtractor.h"
20#include "module.h"
21#include "rsContext.h"
22#include "rsDefines.h"
23#include "rsType.h"
24#include "rsUtils.h"
25#include "rsovAllocation.h"
26#include "rsovContext.h"
27#include "rsovCore.h"
28#include "spirit/file_utils.h"
29#include "spirit/instructions.h"
30#include "spirit/module.h"
31
32#include <fstream>
33#include <functional>
34#include <iostream>
35#include <sstream>
36#include <string>
37
38extern "C" {
39char*  __GPUBlock = nullptr;
40}
41
42namespace android {
43namespace renderscript {
44namespace rsov {
45
46namespace {
47// Layout of this struct has to be the same as the struct in generated SPIR-V
48// TODO: generate this file from some spec that is shared with the compiler
49struct rsovTypeInfo {
50  uint32_t element_size;  // TODO: not implemented
51  uint32_t x_size;
52  uint32_t y_size;
53  uint32_t z_size;
54};
55
56const char *COMPILER_EXE_PATH = "/system/bin/rs2spirv";
57
58std::vector<const char *> setCompilerArgs(const char *bcFileName,
59                                          const char *cacheDir) {
60  rsAssert(bcFileName && cacheDir);
61
62  std::vector<const char *> args;
63
64  args.push_back(COMPILER_EXE_PATH);
65  args.push_back(bcFileName);
66
67  args.push_back(nullptr);
68  return args;
69}
70
71void writeBytes(const char *filename, const char *bytes, size_t size) {
72  std::ofstream ofs(filename, std::ios::binary);
73  ofs.write(bytes, size);
74  ofs.close();
75}
76
77std::vector<uint32_t> readWords(const char *filename) {
78  std::ifstream ifs(filename, std::ios::binary);
79
80  ifs.seekg(0, ifs.end);
81  int length = ifs.tellg();
82  ifs.seekg(0, ifs.beg);
83
84  rsAssert(((length & 3) == 0) && "File size expected to be multiples of 4");
85
86  std::vector<uint32_t> spvWords(length / sizeof(uint32_t));
87
88  ifs.read((char *)(spvWords.data()), length);
89
90  ifs.close();
91
92  return spvWords;
93}
94
95std::vector<uint32_t> compileBitcode(const char *resName, const char *cacheDir,
96                                     const char *bitcode, size_t bitcodeSize,
97                                     std::vector<uint8_t> &modifiedBitcode) {
98  rsAssert(bitcode && bitcodeSize);
99
100  // TODO: Cache the generated code
101
102  std::string bcFileName(cacheDir);
103  bcFileName.append("/");
104  bcFileName.append(resName);
105  bcFileName.append(".bc");
106
107  writeBytes(bcFileName.c_str(), bitcode, bitcodeSize);
108
109  auto args = setCompilerArgs(bcFileName.c_str(), cacheDir);
110
111  if (!rsuExecuteCommand(COMPILER_EXE_PATH, args.size() - 1, args.data())) {
112    ALOGE("compiler command line failed");
113    return std::vector<uint32_t>();
114  }
115
116  ALOGV("compiler command line succeeded");
117
118  std::string spvFileName(cacheDir);
119  spvFileName.append("/");
120  spvFileName.append(resName);
121  spvFileName.append(".spv");
122
123  std::string modifiedBCFileName(cacheDir);
124  modifiedBCFileName.append("/").append(resName).append("_modified.bc");
125
126  args.pop_back();
127  args.push_back("-bc");
128  args.push_back(modifiedBCFileName.c_str());
129  args.push_back(nullptr);
130
131  if (!rsuExecuteCommand(COMPILER_EXE_PATH, args.size() - 1, args.data())) {
132    ALOGE("compiler command line to create modified bitcode failed");
133    return std::vector<uint32_t>();
134  }
135
136  modifiedBitcode = android::spirit::readFile<uint8_t>(modifiedBCFileName);
137
138  return readWords(spvFileName.c_str());
139}
140
141void splitOffsets(const std::string &str, char delimiter,
142                  std::vector<uint32_t> *offsets) {
143  std::stringstream ss(str);
144  std::string tok;
145
146  while (std::getline(ss, tok, delimiter)) {
147    const uint32_t offset = static_cast<uint32_t>(std::stoi(tok));
148    offsets->push_back(offset);
149  }
150}
151
152}  // anonymous namespace
153
154bool RSoVScript::isScriptCpuBacked(const Script *s) {
155  return s->mHal.info.mVersionMinor == CPU_SCRIPT_MAGIC_NUMBER;
156}
157
158void RSoVScript::initScriptOnCpu(Script *s, RsdCpuReference::CpuScript *cs) {
159  s->mHal.drv = cs;
160  s->mHal.info.mVersionMajor = 0;  // Unused. Don't care.
161  s->mHal.info.mVersionMinor = CPU_SCRIPT_MAGIC_NUMBER;
162}
163
164void RSoVScript::initScriptOnRSoV(Script *s, RSoVScript *rsovScript) {
165  s->mHal.drv = rsovScript;
166  s->mHal.info.mVersionMajor = 0;  // Unused. Don't care.
167  s->mHal.info.mVersionMinor = 0;
168}
169
170using android::spirit::Module;
171using android::spirit::Deserialize;
172
173RSoVScript::RSoVScript(RSoVContext *context, std::vector<uint32_t> &&spvWords,
174                       bcinfo::MetadataExtractor *ME,
175                       std::map<std::string, int> *GA2ID)
176    : mRSoV(context),
177      mDevice(context->getDevice()),
178      mSPIRVWords(std::move(spvWords)),
179      mME(ME),
180      mGlobalAllocationMetadata(nullptr),
181      mGAMapping(GA2ID) {
182  std::unique_ptr<Module> module(Deserialize<Module>(mSPIRVWords));
183
184  const std::string &strGlobalSize =
185      module->findStringOfPrefix(".rsov.GlobalSize:");
186  if (strGlobalSize.empty()) {
187    mGlobals.reset(new RSoVBuffer(context, 4));
188    return;
189  }
190  const size_t colonPosSize = strGlobalSize.find(':');
191  const std::string &strVal = strGlobalSize.substr(colonPosSize + 1);
192  const uint64_t globalSize = static_cast<uint64_t>(std::stol(strVal));
193  if (globalSize > 0) {
194    mGlobals.reset(new RSoVBuffer(context, globalSize));
195    __GPUBlock = mGlobals->getHostPtr();
196    const std::string &offsetStr =
197      module->findStringOfPrefix(".rsov.ExportedVars:");
198    const size_t colonPos = offsetStr.find(':');
199    splitOffsets(offsetStr.substr(colonPos + 1), ';', &mExportedVarOffsets);
200  }
201}
202
203RSoVScript::~RSoVScript() {
204  delete mCpuScript;
205  delete mME;
206}
207
208void RSoVScript::populateScript(Script *) {
209}
210
211void RSoVScript::invokeFunction(uint32_t slot, const void *params,
212                                size_t paramLength) {
213  getCpuScript()->invokeFunction(slot, params, paramLength);
214}
215
216int RSoVScript::invokeRoot() { return getCpuScript()->invokeRoot(); }
217
218void RSoVScript::invokeForEach(uint32_t slot, const Allocation **ains,
219                               uint32_t inLen, Allocation *aout,
220                               const void *usr, uint32_t usrLen,
221                               const RsScriptCall *sc) {
222  // TODO: Handle kernel without input Allocation
223  rsAssert(ains);
224  std::vector<RSoVAllocation *> inputAllocations(inLen);
225  for (uint32_t i = 0; i < inLen; ++i) {
226    inputAllocations[i] = static_cast<RSoVAllocation *>(ains[i]->mHal.drv);
227  }
228  RSoVAllocation *outputAllocation =
229      static_cast<RSoVAllocation *>(aout->mHal.drv);
230  runForEach(slot, inLen, inputAllocations, outputAllocation);
231}
232
233void RSoVScript::invokeReduce(uint32_t slot, const Allocation **ains,
234                              uint32_t inLen, Allocation *aout,
235                              const RsScriptCall *sc) {
236  getCpuScript()->invokeReduce(slot, ains, inLen, aout, sc);
237}
238
239void RSoVScript::invokeInit() {
240  getCpuScript()->invokeInit();
241}
242
243void RSoVScript::invokeFreeChildren() {
244  // TODO: implement this
245}
246
247void RSoVScript::setGlobalVar(uint32_t slot, const void *data,
248                              size_t dataLength) {
249  char *basePtr = mGlobals->getHostPtr();
250  rsAssert(basePtr != nullptr);
251  const uint32_t offset = GetExportedVarOffset(slot);
252  memcpy(basePtr + offset, data, dataLength);
253}
254
255void RSoVScript::getGlobalVar(uint32_t slot, void *data, size_t dataLength) {
256  const char *basePtr = mGlobals->getHostPtr();
257  rsAssert(basePtr != nullptr);
258  const uint32_t offset = GetExportedVarOffset(slot);
259  memcpy(data, basePtr + offset, dataLength);
260}
261
262void RSoVScript::setGlobalVarWithElemDims(uint32_t slot, const void *data,
263                                          size_t dataLength, const Element *elem,
264                                          const uint32_t *dims,
265                                          size_t dimLength) {
266  char *basePtr = mGlobals->getHostPtr();
267  rsAssert(basePtr != nullptr);
268  const uint32_t offset = GetExportedVarOffset(slot);
269  char *destPtr = basePtr + offset;
270
271  // We want to look at dimension in terms of integer components,
272  // but dimLength is given in terms of bytes.
273  dimLength /= sizeof(int);
274
275  // Only a single dimension is currently supported.
276  rsAssert(dimLength == 1);
277  if (dimLength != 1) {
278    return;
279  }
280
281  // First do the increment loop.
282  size_t stride = elem->getSizeBytes();
283  const char *cVal = reinterpret_cast<const char *>(data);
284  for (uint32_t i = 0; i < dims[0]; i++) {
285    elem->incRefs(cVal);
286    cVal += stride;
287  }
288
289  // Decrement loop comes after (to prevent race conditions).
290  char *oldVal = destPtr;
291  for (uint32_t i = 0; i < dims[0]; i++) {
292    elem->decRefs(oldVal);
293    oldVal += stride;
294  }
295
296  memcpy(destPtr, data, dataLength);
297}
298
299void RSoVScript::setGlobalBind(uint32_t slot, Allocation *data) {
300  ALOGV("%s succeeded.", __FUNCTION__);
301  // TODO: implement this
302}
303
304void RSoVScript::setGlobalObj(uint32_t slot, ObjectBase *obj) {
305  mCpuScript->setGlobalObj(slot, obj);
306  ALOGV("%s succeeded.", __FUNCTION__);
307}
308
309Allocation *RSoVScript::getAllocationForPointer(const void *ptr) const {
310  // TODO: implement this
311  return nullptr;
312}
313
314int RSoVScript::getGlobalEntries() const {
315  // TODO: implement this
316  return 0;
317}
318
319const char *RSoVScript::getGlobalName(int i) const {
320  // TODO: implement this
321  return nullptr;
322}
323
324const void *RSoVScript::getGlobalAddress(int i) const {
325  // TODO: implement this
326  return nullptr;
327}
328
329size_t RSoVScript::getGlobalSize(int i) const {
330  // TODO: implement this
331  return 0;
332}
333
334uint32_t RSoVScript::getGlobalProperties(int i) const {
335  // TODO: implement this
336  return 0;
337}
338
339void RSoVScript::InitDescriptorAndPipelineLayouts(uint32_t inLen) {
340  // TODO: kernels with zero output allocations
341  std::vector<VkDescriptorSetLayoutBinding> bindings(
342      inLen + 3, {
343                     .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
344                     .descriptorCount = 1,
345                     .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
346                 });
347  for (uint32_t i = 0; i < inLen + 3; i++) {
348    bindings[i].binding = i;
349  }
350
351  VkDescriptorSetLayoutCreateInfo descriptor_layout = {
352      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
353      .pNext = nullptr,
354      .flags = 0,
355      .bindingCount = inLen + 3,
356      .pBindings = bindings.data(),
357  };
358
359  VkResult res;
360
361  mDescLayout.resize(NUM_DESCRIPTOR_SETS);
362  res = vkCreateDescriptorSetLayout(mDevice, &descriptor_layout, NULL,
363                                    mDescLayout.data());
364  rsAssert(res == VK_SUCCESS);
365
366  /* Now use the descriptor layout to create a pipeline layout */
367  VkPipelineLayoutCreateInfo pPipelineLayoutCreateInfo = {
368      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
369      .pNext = nullptr,
370      .pushConstantRangeCount = 0,
371      .pPushConstantRanges = nullptr,
372      .setLayoutCount = NUM_DESCRIPTOR_SETS,
373      .pSetLayouts = mDescLayout.data(),
374  };
375
376  res = vkCreatePipelineLayout(mDevice, &pPipelineLayoutCreateInfo, NULL,
377                               &mPipelineLayout);
378  rsAssert(res == VK_SUCCESS);
379}
380
381void RSoVScript::InitShader(uint32_t slot) {
382  VkResult res;
383
384  mShaderStage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
385  mShaderStage.pNext = nullptr;
386  mShaderStage.pSpecializationInfo = nullptr;
387  mShaderStage.flags = 0;
388  mShaderStage.stage = VK_SHADER_STAGE_COMPUTE_BIT;
389
390  const char **RSKernelNames = mME->getExportForEachNameList();
391  size_t RSKernelNum = mME->getExportForEachSignatureCount();
392  rsAssert(slot < RSKernelNum);
393  rsAssert(RSKernelNames);
394  rsAssert(RSKernelNames[slot]);
395  // ALOGV("slot = %d kernel name = %s", slot, RSKernelNames[slot]);
396  std::string entryName("entry_");
397  entryName.append(RSKernelNames[slot]);
398
399  mShaderStage.pName = strndup(entryName.c_str(), entryName.size());
400
401  VkShaderModuleCreateInfo moduleCreateInfo = {
402      .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
403      .pNext = nullptr,
404      .flags = 0,
405      .codeSize = mSPIRVWords.size() * sizeof(unsigned int),
406      .pCode = mSPIRVWords.data(),
407  };
408  res = vkCreateShaderModule(mDevice, &moduleCreateInfo, NULL,
409                             &mShaderStage.module);
410  rsAssert(res == VK_SUCCESS);
411}
412
413void RSoVScript::InitDescriptorPool(uint32_t inLen) {
414  VkResult res;
415  // 1 global buffer, 1 global allocation metadata buffer, 1 output allocation,
416  // and inLen input allocations
417  VkDescriptorPoolSize type_count[] = {{
418      .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .descriptorCount = inLen + 3,
419  }};
420
421  VkDescriptorPoolCreateInfo descriptor_pool = {
422      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
423      .pNext = nullptr,
424      .maxSets = 1,
425      .poolSizeCount = NELEM(type_count),
426      .pPoolSizes = type_count,
427  };
428
429  res = vkCreateDescriptorPool(mDevice, &descriptor_pool, NULL, &mDescPool);
430  rsAssert(res == VK_SUCCESS);
431}
432
433// Iterate through a list of global allocations that are used inside the module
434// and marshal their type information to a dedicated Vulkan Buffer
435void RSoVScript::MarshalTypeInfo(void) {
436  // Marshal global allocation metadata to the device
437  auto *cs = getCpuScript();
438  int nr_globals = mGAMapping->size();
439  if (mGlobalAllocationMetadata == nullptr) {
440    mGlobalAllocationMetadata.reset(
441        new RSoVBuffer(mRSoV, sizeof(struct rsovTypeInfo) * nr_globals));
442  }
443  struct rsovTypeInfo *mappedMetadata =
444      (struct rsovTypeInfo *)mGlobalAllocationMetadata->getHostPtr();
445  for (int i = 0; i < nr_globals; ++i) {
446    if (getGlobalRsType(cs->getGlobalProperties(i)) ==
447        RsDataType::RS_TYPE_ALLOCATION) {
448      ALOGV("global variable %d is an allocation!", i);
449      const void *host_buf;
450      cs->getGlobalVar(i, (void *)&host_buf, sizeof(host_buf));
451      if (!host_buf) continue;
452      const android::renderscript::Allocation *GA =
453          static_cast<const android::renderscript::Allocation *>(host_buf);
454      const android::renderscript::Type *T = GA->getType();
455      rsAssert(T);
456
457      auto global_it = mGAMapping->find(cs->getGlobalName(i));
458      rsAssert(global_it != (*mGAMapping).end());
459      int id = global_it->second;
460      ALOGV("global allocation %s is mapped to ID %d", cs->getGlobalName(i),
461            id);
462      // TODO: marshal other properties
463      mappedMetadata[id].x_size = T->getDimX();
464      mappedMetadata[id].y_size = T->getDimY();
465      mappedMetadata[id].z_size = T->getDimZ();
466    }
467  }
468}
469
470void RSoVScript::InitDescriptorSet(
471    const std::vector<RSoVAllocation *> &inputAllocations,
472    RSoVAllocation *outputAllocation) {
473  VkResult res;
474
475  VkDescriptorSetAllocateInfo alloc_info = {
476      .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
477      .pNext = NULL,
478      .descriptorPool = mDescPool,
479      .descriptorSetCount = NUM_DESCRIPTOR_SETS,
480      .pSetLayouts = mDescLayout.data(),
481  };
482
483  mDescSet.resize(NUM_DESCRIPTOR_SETS);
484  res = vkAllocateDescriptorSets(mDevice, &alloc_info, mDescSet.data());
485  rsAssert(res == VK_SUCCESS);
486
487  std::vector<VkWriteDescriptorSet> writes{
488      // Global variables
489      {
490          .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
491          .dstSet = mDescSet[0],
492          .dstBinding = 0,
493          .dstArrayElement = 0,
494          .descriptorCount = 1,
495          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
496          .pBufferInfo = mGlobals->getBufferInfo(),
497      },
498      // Metadata for global Allocations
499      {
500          .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
501          .dstSet = mDescSet[0],
502          .dstBinding = 1,
503          .dstArrayElement = 0,
504          .descriptorCount = 1,
505          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
506          .pBufferInfo = mGlobalAllocationMetadata->getBufferInfo(),
507      },
508      // Output Allocation
509      {
510          .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
511          .dstSet = mDescSet[0],
512          .dstBinding = 2,
513          .dstArrayElement = 0,
514          .descriptorCount = 1,
515          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
516          .pBufferInfo = outputAllocation->getBuffer()->getBufferInfo(),
517      },
518  };
519
520  // Input Allocations
521  for (uint32_t i = 0; i < inputAllocations.size(); ++i) {
522    writes.push_back({
523        .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
524        .dstSet = mDescSet[0],
525        .dstBinding = 3 + i,  // input allocations start from binding #3
526        .dstArrayElement = 0,
527        .descriptorCount = 1,
528        .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
529        .pBufferInfo = inputAllocations[i]->getBuffer()->getBufferInfo(),
530    });
531  }
532
533  vkUpdateDescriptorSets(mDevice, writes.size(), writes.data(), 0, NULL);
534}
535
536void RSoVScript::InitPipeline() {
537  // DEPENDS on mShaderStage, i.e., InitShader()
538
539  VkResult res;
540
541  VkComputePipelineCreateInfo pipeline_info = {
542      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
543      .pNext = nullptr,
544      .layout = mPipelineLayout,
545      .basePipelineHandle = VK_NULL_HANDLE,
546      .basePipelineIndex = 0,
547      .flags = 0,
548      .stage = mShaderStage,
549  };
550  res = vkCreateComputePipelines(mDevice, VK_NULL_HANDLE, 1, &pipeline_info,
551                                 NULL, &mComputePipeline);
552  rsAssert(res == VK_SUCCESS);
553}
554
555void RSoVScript::runForEach(
556    uint32_t slot, uint32_t inLen,
557    const std::vector<RSoVAllocation *> &inputAllocations,
558    RSoVAllocation *outputAllocation) {
559  VkResult res;
560
561  InitShader(slot);
562  InitDescriptorPool(inLen);
563  InitDescriptorAndPipelineLayouts(inLen);
564  MarshalTypeInfo();
565  InitDescriptorSet(inputAllocations, outputAllocation);
566  // InitPipelineCache();
567  InitPipeline();
568
569  VkCommandBuffer cmd;
570
571  VkCommandBufferAllocateInfo cmd_info = {
572      .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
573      .pNext = nullptr,
574      .commandPool = mRSoV->getCmdPool(),
575      .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
576      .commandBufferCount = 1,
577  };
578
579  res = vkAllocateCommandBuffers(mDevice, &cmd_info, &cmd);
580  rsAssert(res == VK_SUCCESS);
581
582  VkCommandBufferBeginInfo cmd_buf_info = {
583      .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
584      .pNext = nullptr,
585      .flags = 0,
586      .pInheritanceInfo = nullptr,
587  };
588
589  res = vkBeginCommandBuffer(cmd, &cmd_buf_info);
590  rsAssert(res == VK_SUCCESS);
591
592  vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, mComputePipeline);
593
594  vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, mPipelineLayout,
595                          0, mDescSet.size(), mDescSet.data(), 0, nullptr);
596  // Assuming all input allocations are of the same dimensionality
597  const uint32_t width = inputAllocations[0]->getWidth();
598  const uint32_t height = rsMax(inputAllocations[0]->getHeight(), 1U);
599  const uint32_t depth = rsMax(inputAllocations[0]->getDepth(), 1U);
600  vkCmdDispatch(cmd, width, height, depth);
601
602  res = vkEndCommandBuffer(cmd);
603  assert(res == VK_SUCCESS);
604
605  VkSubmitInfo submit_info = {
606      .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
607      .commandBufferCount = 1,
608      .pCommandBuffers = &cmd,
609  };
610
611  VkFence fence;
612
613  VkFenceCreateInfo fenceInfo = {
614      .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
615      .pNext = nullptr,
616      .flags = 0,
617  };
618
619  vkCreateFence(mDevice, &fenceInfo, NULL, &fence);
620
621  vkQueueSubmit(mRSoV->getQueue(), 1, &submit_info, fence);
622
623  // Make sure command buffer is finished
624  do {
625    res = vkWaitForFences(mDevice, 1, &fence, VK_TRUE, 100000);
626  } while (res == VK_TIMEOUT);
627
628  rsAssert(res == VK_SUCCESS);
629
630  vkDestroyFence(mDevice, fence, NULL);
631
632  // TODO: shall we reuse command buffers?
633  VkCommandBuffer cmd_bufs[] = {cmd};
634  vkFreeCommandBuffers(mDevice, mRSoV->getCmdPool(), 1, cmd_bufs);
635
636  vkDestroyPipeline(mDevice, mComputePipeline, nullptr);
637  for (int i = 0; i < NUM_DESCRIPTOR_SETS; i++)
638    vkDestroyDescriptorSetLayout(mDevice, mDescLayout[i], nullptr);
639  vkDestroyPipelineLayout(mDevice, mPipelineLayout, nullptr);
640  vkFreeDescriptorSets(mDevice, mDescPool, NUM_DESCRIPTOR_SETS,
641                       mDescSet.data());
642  vkDestroyDescriptorPool(mDevice, mDescPool, nullptr);
643  free((void *)mShaderStage.pName);
644  vkDestroyShaderModule(mDevice, mShaderStage.module, nullptr);
645}
646
647}  // namespace rsov
648}  // namespace renderscript
649}  // namespace android
650
651using android::renderscript::Allocation;
652using android::renderscript::Context;
653using android::renderscript::Element;
654using android::renderscript::ObjectBase;
655using android::renderscript::RsdCpuReference;
656using android::renderscript::Script;
657using android::renderscript::ScriptC;
658using android::renderscript::rs_script;
659using android::renderscript::rsov::RSoVContext;
660using android::renderscript::rsov::RSoVScript;
661using android::renderscript::rsov::compileBitcode;
662
663namespace {
664// A class to parse global allocation metadata; essentially a subset of JSON
665// it would look like {"__RSoV_GA": {"g":42}}
666// The result is stored in a refence to a map<string, int>
667class ParseMD {
668 public:
669  ParseMD(std::string s, std::map<std::string, int> &map)
670      : mString(s), mMapping(map) {}
671
672  bool parse(void) {
673    // remove outermose two pairs of braces
674    mString = removeBraces(mString);
675    if (mString.empty()) {
676      return false;
677    }
678
679    mString = removeBraces(mString);
680    if (mString.empty()) {
681      return false;
682    }
683
684    // Now we are supposed to have a comma-separated list that looks like:
685    // "foo":42, "bar":56
686    split<','>(mString, [&](auto s) {
687      split<':'>(s, nullptr, [&](auto pair) {
688        rsAssert(pair.size() == 2);
689        std::string ga_name = removeQuotes(pair[0]);
690        int id = atoi(pair[1].c_str());
691        ALOGV("ParseMD: global allocation %s has ID %d", ga_name.c_str(), id);
692        mMapping[ga_name] = id;
693      });
694    });
695    return true;
696  }
697
698 private:
699  template <char L, char R>
700  static std::string removeMatching(const std::string &s) {
701    auto leftCBrace = s.find(L);
702    if (leftCBrace == std::string::npos) {
703      return "";
704    }
705    leftCBrace++;
706    return s.substr(leftCBrace, s.rfind(R) - leftCBrace);
707  }
708
709  static std::string removeBraces(const std::string &s) {
710    return removeMatching<'{', '}'>(s);
711  }
712
713  static std::string removeQuotes(const std::string &s) {
714    return removeMatching<'"', '"'>(s);
715  }
716
717  // Splitting a string, and call "each" and/or "all" with individal elements
718  // and a vector of all tokenized elements
719  template <char D>
720  static void split(const std::string &s,
721                    std::function<void(const std::string &)> each,
722                    std::function<void(const std::vector<const std::string> &)>
723                        all = nullptr) {
724    std::vector<const std::string> result;
725    for (std::string::size_type pos = 0; pos < s.size(); pos++) {
726      std::string::size_type begin = pos;
727
728      while (s[pos] != D && pos <= s.size()) pos++;
729      std::string found = s.substr(begin, pos - begin);
730      if (each) each(found);
731      if (all) result.push_back(found);
732    }
733    if (all) all(result);
734  }
735
736  std::string mString;
737  std::map<std::string, int> &mMapping;
738};
739
740}  // namespace
741
742class ExtractRSoVMD : public android::spirit::DoNothingVisitor {
743 public:
744  ExtractRSoVMD() : mGAMapping(new std::map<std::string, int>) {}
745
746  void visit(android::spirit::StringInst *s) {
747    ALOGV("ExtractRSoVMD: string = %s", s->mOperand1.c_str());
748    std::map<std::string, int> mapping;
749    ParseMD p(s->mOperand1, mapping);
750    if (p.parse()) {
751      *mGAMapping = std::move(mapping);
752    }
753  }
754
755  std::map<std::string, int> *takeMapping(void) { return mGAMapping.release(); }
756
757 private:
758  std::unique_ptr<std::map<std::string, int> > mGAMapping;
759};
760
761bool rsovScriptInit(const Context *rsc, ScriptC *script, char const *resName,
762                    char const *cacheDir, uint8_t const *bitcode,
763                    size_t bitcodeSize, uint32_t flags) {
764  RSoVHal *hal = static_cast<RSoVHal *>(rsc->mHal.drv);
765
766  std::unique_ptr<bcinfo::MetadataExtractor> bitcodeMetadata(
767      new bcinfo::MetadataExtractor((const char *)bitcode, bitcodeSize));
768  if (!bitcodeMetadata || !bitcodeMetadata->extract()) {
769    ALOGE("Could not extract metadata from bitcode from %s", resName);
770    return false;
771  }
772
773  std::vector<uint8_t> modifiedBitcode;
774  auto spvWords =
775    compileBitcode(resName, cacheDir, (const char *)bitcode, bitcodeSize, modifiedBitcode);
776  if (!spvWords.empty() && !modifiedBitcode.empty()) {
777    // Extract compiler metadata on allocation->binding mapping
778    android::spirit::Module *module =
779        android::spirit::Deserialize<android::spirit::Module>(spvWords);
780    rsAssert(module);
781    ExtractRSoVMD ga_md;
782    module->accept(&ga_md);
783
784    RSoVScript *rsovScript =
785        new RSoVScript(hal->mRSoV, std::move(spvWords),
786                       bitcodeMetadata.release(), ga_md.takeMapping());
787    if (rsovScript) {
788      std::string modifiedResName(resName);
789      modifiedResName.append("_modified");
790      RsdCpuReference::CpuScript *cs = hal->mCpuRef->createScript(
791          script, modifiedResName.c_str(), cacheDir, modifiedBitcode.data(),
792          modifiedBitcode.size(), flags);
793      if (cs != nullptr) {
794        cs->populateScript(script);
795        rsovScript->setCpuScript(cs);
796        RSoVScript::initScriptOnRSoV(script, rsovScript);
797        return true;
798      }
799    }
800  }
801
802  ALOGD("Failed creating an RSoV script for %s", resName);
803  // Fall back to CPU driver instead
804
805  std::unique_ptr<RsdCpuReference::CpuScript> cs(hal->mCpuRef->createScript(
806      script, resName, cacheDir, bitcode, bitcodeSize, flags));
807  if (cs == nullptr) {
808    ALOGE("Failed creating a CPU script %p for %s (%p)", cs.get(), resName,
809          script);
810    return false;
811  }
812  cs->populateScript(script);
813
814  RSoVScript::initScriptOnCpu(script, cs.release());
815
816  return true;
817}
818
819bool rsovInitIntrinsic(const Context *rsc, Script *s, RsScriptIntrinsicID iid,
820                       Element *e) {
821  RSoVHal *dc = (RSoVHal *)rsc->mHal.drv;
822  RsdCpuReference::CpuScript *cs = dc->mCpuRef->createIntrinsic(s, iid, e);
823  if (cs == nullptr) {
824    return false;
825  }
826  s->mHal.drv = cs;
827  cs->populateScript(s);
828  return true;
829}
830
831void rsovScriptInvokeForEach(const Context *rsc, Script *s, uint32_t slot,
832                             const Allocation *ain, Allocation *aout,
833                             const void *usr, size_t usrLen,
834                             const RsScriptCall *sc) {
835  if (ain == nullptr) {
836    rsovScriptInvokeForEachMulti(rsc, s, slot, nullptr, 0, aout, usr, usrLen,
837                                 sc);
838  } else {
839    const Allocation *ains[1] = {ain};
840
841    rsovScriptInvokeForEachMulti(rsc, s, slot, ains, 1, aout, usr, usrLen, sc);
842  }
843}
844
845void rsovScriptInvokeForEachMulti(const Context *rsc, Script *s, uint32_t slot,
846                                  const Allocation **ains, size_t inLen,
847                                  Allocation *aout, const void *usr,
848                                  size_t usrLen, const RsScriptCall *sc) {
849  RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
850  cs->invokeForEach(slot, ains, inLen, aout, usr, usrLen, sc);
851}
852
853int rsovScriptInvokeRoot(const Context *dc, Script *s) {
854  RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
855  return cs->invokeRoot();
856}
857
858void rsovScriptInvokeInit(const Context *dc, Script *s) {
859  RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
860  cs->invokeInit();
861}
862
863void rsovScriptInvokeFreeChildren(const Context *dc, Script *s) {
864  RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
865  cs->invokeFreeChildren();
866}
867
868void rsovScriptInvokeFunction(const Context *dc, Script *s, uint32_t slot,
869                              const void *params, size_t paramLength) {
870  RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
871  cs->invokeFunction(slot, params, paramLength);
872}
873
874void rsovScriptInvokeReduce(const Context *dc, Script *s, uint32_t slot,
875                            const Allocation **ains, size_t inLen,
876                            Allocation *aout, const RsScriptCall *sc) {
877  RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
878  cs->invokeReduce(slot, ains, inLen, aout, sc);
879}
880
881void rsovScriptSetGlobalVar(const Context *dc, const Script *s, uint32_t slot,
882                            void *data, size_t dataLength) {
883  RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
884  cs->setGlobalVar(slot, data, dataLength);
885}
886
887void rsovScriptGetGlobalVar(const Context *dc, const Script *s, uint32_t slot,
888                            void *data, size_t dataLength) {
889  RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
890  cs->getGlobalVar(slot, data, dataLength);
891}
892
893void rsovScriptSetGlobalVarWithElemDims(
894    const Context *dc, const Script *s, uint32_t slot, void *data,
895    size_t dataLength, const android::renderscript::Element *elem,
896    const uint32_t *dims, size_t dimLength) {
897  RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
898  cs->setGlobalVarWithElemDims(slot, data, dataLength, elem, dims, dimLength);
899}
900
901void rsovScriptSetGlobalBind(const Context *dc, const Script *s, uint32_t slot,
902                             Allocation *data) {
903  RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
904  cs->setGlobalBind(slot, data);
905}
906
907void rsovScriptSetGlobalObj(const Context *dc, const Script *s, uint32_t slot,
908                            ObjectBase *data) {
909  RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
910  cs->setGlobalObj(slot, data);
911}
912
913void rsovScriptDestroy(const Context *dc, Script *s) {
914  RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
915  delete cs;
916  s->mHal.drv = nullptr;
917}
918
919Allocation *rsovScriptGetAllocationForPointer(
920    const android::renderscript::Context *dc,
921    const android::renderscript::Script *sc, const void *ptr) {
922  RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)sc->mHal.drv;
923  return cs->getAllocationForPointer(ptr);
924}
925
926void rsovScriptUpdateCachedObject(const Context *rsc, const Script *script,
927                                  rs_script *obj) {
928  obj->p = script;
929#ifdef __LP64__
930  obj->unused1 = nullptr;
931  obj->unused2 = nullptr;
932  obj->unused3 = nullptr;
933#endif
934}
935