1/*
2 * Copyright 2017 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "GrCCPathParser.h"
9
10#include "GrCaps.h"
11#include "GrGpuCommandBuffer.h"
12#include "GrOnFlushResourceProvider.h"
13#include "GrOpFlushState.h"
14#include "SkMathPriv.h"
15#include "SkPath.h"
16#include "SkPathPriv.h"
17#include "SkPoint.h"
18#include "ccpr/GrCCGeometry.h"
19
20using TriangleInstance = GrCCCoverageProcessor::TriangleInstance;
21using CubicInstance = GrCCCoverageProcessor::CubicInstance;
22
23GrCCPathParser::GrCCPathParser(int maxTotalPaths, int maxPathPoints, int numSkPoints,
24                               int numSkVerbs)
25        : fLocalDevPtsBuffer(maxPathPoints + 1)  // Overallocate by one point to accomodate for
26                                                 // overflow with Sk4f. (See parsePath.)
27        , fGeometry(numSkPoints, numSkVerbs)
28        , fPathsInfo(maxTotalPaths)
29        , fScissorSubBatches(maxTotalPaths)
30        , fTotalPrimitiveCounts{PrimitiveTallies(), PrimitiveTallies()} {
31    // Batches decide what to draw by looking where the previous one ended. Define initial batches
32    // that "end" at the beginning of the data. These will not be drawn, but will only be be read by
33    // the first actual batch.
34    fScissorSubBatches.push_back() = {PrimitiveTallies(), SkIRect::MakeEmpty()};
35    fCoverageCountBatches.push_back() = {PrimitiveTallies(), fScissorSubBatches.count()};
36}
37
38void GrCCPathParser::parsePath(const SkMatrix& m, const SkPath& path, SkRect* devBounds,
39                               SkRect* devBounds45) {
40    const SkPoint* pts = SkPathPriv::PointData(path);
41    int numPts = path.countPoints();
42    SkASSERT(numPts + 1 <= fLocalDevPtsBuffer.count());
43
44    if (!numPts) {
45        devBounds->setEmpty();
46        devBounds45->setEmpty();
47        this->parsePath(path, nullptr);
48        return;
49    }
50
51    // m45 transforms path points into "45 degree" device space. A bounding box in this space gives
52    // the circumscribing octagon's diagonals. We could use SK_ScalarRoot2Over2, but an orthonormal
53    // transform is not necessary as long as the shader uses the correct inverse.
54    SkMatrix m45;
55    m45.setSinCos(1, 1);
56    m45.preConcat(m);
57
58    // X,Y,T are two parallel view matrices that accumulate two bounding boxes as they map points:
59    // device-space bounds and "45 degree" device-space bounds (| 1 -1 | * devCoords).
60    //                                                          | 1  1 |
61    Sk4f X = Sk4f(m.getScaleX(), m.getSkewY(), m45.getScaleX(), m45.getSkewY());
62    Sk4f Y = Sk4f(m.getSkewX(), m.getScaleY(), m45.getSkewX(), m45.getScaleY());
63    Sk4f T = Sk4f(m.getTranslateX(), m.getTranslateY(), m45.getTranslateX(), m45.getTranslateY());
64
65    // Map the path's points to device space and accumulate bounding boxes.
66    Sk4f devPt = SkNx_fma(Y, Sk4f(pts[0].y()), T);
67    devPt = SkNx_fma(X, Sk4f(pts[0].x()), devPt);
68    Sk4f topLeft = devPt;
69    Sk4f bottomRight = devPt;
70
71    // Store all 4 values [dev.x, dev.y, dev45.x, dev45.y]. We are only interested in the first two,
72    // and will overwrite [dev45.x, dev45.y] with the next point. This is why the dst buffer must
73    // be at least one larger than the number of points.
74    devPt.store(&fLocalDevPtsBuffer[0]);
75
76    for (int i = 1; i < numPts; ++i) {
77        devPt = SkNx_fma(Y, Sk4f(pts[i].y()), T);
78        devPt = SkNx_fma(X, Sk4f(pts[i].x()), devPt);
79        topLeft = Sk4f::Min(topLeft, devPt);
80        bottomRight = Sk4f::Max(bottomRight, devPt);
81        devPt.store(&fLocalDevPtsBuffer[i]);
82    }
83
84    SkPoint topLeftPts[2], bottomRightPts[2];
85    topLeft.store(topLeftPts);
86    bottomRight.store(bottomRightPts);
87    devBounds->setLTRB(topLeftPts[0].x(), topLeftPts[0].y(), bottomRightPts[0].x(),
88                       bottomRightPts[0].y());
89    devBounds45->setLTRB(topLeftPts[1].x(), topLeftPts[1].y(), bottomRightPts[1].x(),
90                         bottomRightPts[1].y());
91
92    this->parsePath(path, fLocalDevPtsBuffer.get());
93}
94
95void GrCCPathParser::parseDeviceSpacePath(const SkPath& deviceSpacePath) {
96    this->parsePath(deviceSpacePath, SkPathPriv::PointData(deviceSpacePath));
97}
98
99void GrCCPathParser::parsePath(const SkPath& path, const SkPoint* deviceSpacePts) {
100    SkASSERT(!fInstanceBuffer); // Can't call after finalize().
101    SkASSERT(!fParsingPath); // Call saveParsedPath() or discardParsedPath() for the last one first.
102    SkDEBUGCODE(fParsingPath = true);
103    SkASSERT(path.isEmpty() || deviceSpacePts);
104
105    fCurrPathPointsIdx = fGeometry.points().count();
106    fCurrPathVerbsIdx = fGeometry.verbs().count();
107    fCurrPathPrimitiveCounts = PrimitiveTallies();
108
109    fGeometry.beginPath();
110
111    if (path.isEmpty()) {
112        return;
113    }
114
115    int ptsIdx = 0;
116    bool insideContour = false;
117
118    for (SkPath::Verb verb : SkPathPriv::Verbs(path)) {
119        switch (verb) {
120            case SkPath::kMove_Verb:
121                this->endContourIfNeeded(insideContour);
122                fGeometry.beginContour(deviceSpacePts[ptsIdx]);
123                ++ptsIdx;
124                insideContour = true;
125                continue;
126            case SkPath::kClose_Verb:
127                this->endContourIfNeeded(insideContour);
128                insideContour = false;
129                continue;
130            case SkPath::kLine_Verb:
131                fGeometry.lineTo(deviceSpacePts[ptsIdx]);
132                ++ptsIdx;
133                continue;
134            case SkPath::kQuad_Verb:
135                fGeometry.quadraticTo(deviceSpacePts[ptsIdx], deviceSpacePts[ptsIdx + 1]);
136                ptsIdx += 2;
137                continue;
138            case SkPath::kCubic_Verb:
139                fGeometry.cubicTo(deviceSpacePts[ptsIdx], deviceSpacePts[ptsIdx + 1],
140                                  deviceSpacePts[ptsIdx + 2]);
141                ptsIdx += 3;
142                continue;
143            case SkPath::kConic_Verb:
144                SK_ABORT("Conics are not supported.");
145            default:
146                SK_ABORT("Unexpected path verb.");
147        }
148    }
149
150    this->endContourIfNeeded(insideContour);
151}
152
153void GrCCPathParser::endContourIfNeeded(bool insideContour) {
154    if (insideContour) {
155        fCurrPathPrimitiveCounts += fGeometry.endContour();
156    }
157}
158
159void GrCCPathParser::saveParsedPath(ScissorMode scissorMode, const SkIRect& clippedDevIBounds,
160                                    int16_t atlasOffsetX, int16_t atlasOffsetY) {
161    SkASSERT(fParsingPath);
162
163    fPathsInfo.push_back() = {scissorMode, atlasOffsetX, atlasOffsetY};
164    fTotalPrimitiveCounts[(int)scissorMode] += fCurrPathPrimitiveCounts;
165
166    if (ScissorMode::kScissored == scissorMode) {
167        fScissorSubBatches.push_back() = {fTotalPrimitiveCounts[(int)ScissorMode::kScissored],
168                                          clippedDevIBounds.makeOffset(atlasOffsetX, atlasOffsetY)};
169    }
170
171    SkDEBUGCODE(fParsingPath = false);
172}
173
174void GrCCPathParser::discardParsedPath() {
175    SkASSERT(fParsingPath);
176    fGeometry.resize_back(fCurrPathPointsIdx, fCurrPathVerbsIdx);
177    SkDEBUGCODE(fParsingPath = false);
178}
179
180GrCCPathParser::CoverageCountBatchID GrCCPathParser::closeCurrentBatch() {
181    SkASSERT(!fInstanceBuffer);
182    SkASSERT(!fCoverageCountBatches.empty());
183
184    int maxMeshes = 1 + fScissorSubBatches.count() -
185                        fCoverageCountBatches.back().fEndScissorSubBatchIdx;
186    fMaxMeshesPerDraw = SkTMax(fMaxMeshesPerDraw, maxMeshes);
187
188    fCoverageCountBatches.push_back() = {
189        fTotalPrimitiveCounts[(int)ScissorMode::kNonScissored],
190        fScissorSubBatches.count()
191    };
192    return fCoverageCountBatches.count() - 1;
193}
194
195// Emits a contour's triangle fan.
196//
197// Classic Redbook fanning would be the triangles: [0  1  2], [0  2  3], ..., [0  n-2  n-1].
198//
199// This function emits the triangle: [0  n/3  n*2/3], and then recurses on all three sides. The
200// advantage to this approach is that for a convex-ish contour, it generates larger triangles.
201// Classic fanning tends to generate long, skinny triangles, which are expensive to draw since they
202// have a longer perimeter to rasterize and antialias.
203//
204// The indices array indexes the fan's points (think: glDrawElements), and must have at least log3
205// elements past the end for this method to use as scratch space.
206//
207// Returns the next triangle instance after the final one emitted.
208static TriangleInstance* emit_recursive_fan(const SkTArray<SkPoint, true>& pts,
209                                            SkTArray<int32_t, true>& indices, int firstIndex,
210                                            int indexCount, const Sk2f& atlasOffset,
211                                            TriangleInstance out[]) {
212    if (indexCount < 3) {
213        return out;
214    }
215
216    int32_t oneThirdCount = indexCount / 3;
217    int32_t twoThirdsCount = (2 * indexCount) / 3;
218    out++->set(pts[indices[firstIndex]], pts[indices[firstIndex + oneThirdCount]],
219               pts[indices[firstIndex + twoThirdsCount]], atlasOffset);
220
221    out = emit_recursive_fan(pts, indices, firstIndex, oneThirdCount + 1, atlasOffset, out);
222    out = emit_recursive_fan(pts, indices, firstIndex + oneThirdCount,
223                             twoThirdsCount - oneThirdCount + 1, atlasOffset, out);
224
225    int endIndex = firstIndex + indexCount;
226    int32_t oldValue = indices[endIndex];
227    indices[endIndex] = indices[firstIndex];
228    out = emit_recursive_fan(pts, indices, firstIndex + twoThirdsCount,
229                             indexCount - twoThirdsCount + 1, atlasOffset, out);
230    indices[endIndex] = oldValue;
231
232    return out;
233}
234
235bool GrCCPathParser::finalize(GrOnFlushResourceProvider* onFlushRP) {
236    SkASSERT(!fParsingPath); // Call saveParsedPath() or discardParsedPath().
237    SkASSERT(fCoverageCountBatches.back().fEndNonScissorIndices == // Call closeCurrentBatch().
238             fTotalPrimitiveCounts[(int)ScissorMode::kNonScissored]);
239    SkASSERT(fCoverageCountBatches.back().fEndScissorSubBatchIdx == fScissorSubBatches.count());
240
241    // Here we build a single instance buffer to share with every internal batch.
242    //
243    // CCPR processs 3 different types of primitives: triangles, quadratics, cubics. Each primitive
244    // type is further divided into instances that require a scissor and those that don't. This
245    // leaves us with 3*2 = 6 independent instance arrays to build for the GPU.
246    //
247    // Rather than place each instance array in its own GPU buffer, we allocate a single
248    // megabuffer and lay them all out side-by-side. We can offset the "baseInstance" parameter in
249    // our draw calls to direct the GPU to the applicable elements within a given array.
250    //
251    // We already know how big to make each of the 6 arrays from fTotalPrimitiveCounts, so layout is
252    // straightforward. Start with triangles and quadratics. They both view the instance buffer as
253    // an array of TriangleInstance[], so we can begin at zero and lay them out one after the other.
254    fBaseInstances[0].fTriangles = 0;
255    fBaseInstances[1].fTriangles = fBaseInstances[0].fTriangles +
256                                   fTotalPrimitiveCounts[0].fTriangles;
257    fBaseInstances[0].fQuadratics = fBaseInstances[1].fTriangles +
258                                    fTotalPrimitiveCounts[1].fTriangles;
259    fBaseInstances[1].fQuadratics = fBaseInstances[0].fQuadratics +
260                                    fTotalPrimitiveCounts[0].fQuadratics;
261    int triEndIdx = fBaseInstances[1].fQuadratics + fTotalPrimitiveCounts[1].fQuadratics;
262
263    // Cubics view the same instance buffer as an array of CubicInstance[]. So, reinterpreting the
264    // instance data as CubicInstance[], we start them on the first index that will not overwrite
265    // previous TriangleInstance data.
266    int cubicBaseIdx =
267            GR_CT_DIV_ROUND_UP(triEndIdx * sizeof(TriangleInstance), sizeof(CubicInstance));
268    fBaseInstances[0].fCubics = cubicBaseIdx;
269    fBaseInstances[1].fCubics = fBaseInstances[0].fCubics + fTotalPrimitiveCounts[0].fCubics;
270    int cubicEndIdx = fBaseInstances[1].fCubics + fTotalPrimitiveCounts[1].fCubics;
271
272    fInstanceBuffer = onFlushRP->makeBuffer(kVertex_GrBufferType,
273                                            cubicEndIdx * sizeof(CubicInstance));
274    if (!fInstanceBuffer) {
275        return false;
276    }
277
278    TriangleInstance* triangleInstanceData = static_cast<TriangleInstance*>(fInstanceBuffer->map());
279    CubicInstance* cubicInstanceData = reinterpret_cast<CubicInstance*>(triangleInstanceData);
280    SkASSERT(cubicInstanceData);
281
282    PathInfo* currPathInfo = fPathsInfo.begin();
283    float atlasOffsetX = 0.0, atlasOffsetY = 0.0;
284    Sk2f atlasOffset;
285    int ptsIdx = -1;
286    PrimitiveTallies instanceIndices[2] = {fBaseInstances[0], fBaseInstances[1]};
287    PrimitiveTallies* currIndices = nullptr;
288    SkSTArray<256, int32_t, true> currFan;
289
290    const SkTArray<SkPoint, true>& pts = fGeometry.points();
291
292    // Expand the ccpr verbs into GPU instance buffers.
293    for (GrCCGeometry::Verb verb : fGeometry.verbs()) {
294        switch (verb) {
295            case GrCCGeometry::Verb::kBeginPath:
296                SkASSERT(currFan.empty());
297                currIndices = &instanceIndices[(int)currPathInfo->fScissorMode];
298                atlasOffsetX = static_cast<float>(currPathInfo->fAtlasOffsetX);
299                atlasOffsetY = static_cast<float>(currPathInfo->fAtlasOffsetY);
300                atlasOffset = {atlasOffsetX, atlasOffsetY};
301                ++currPathInfo;
302                continue;
303
304            case GrCCGeometry::Verb::kBeginContour:
305                SkASSERT(currFan.empty());
306                currFan.push_back(++ptsIdx);
307                continue;
308
309            case GrCCGeometry::Verb::kLineTo:
310                SkASSERT(!currFan.empty());
311                currFan.push_back(++ptsIdx);
312                continue;
313
314            case GrCCGeometry::Verb::kMonotonicQuadraticTo:
315                SkASSERT(!currFan.empty());
316                triangleInstanceData[currIndices->fQuadratics++].set(&pts[ptsIdx], atlasOffset);
317                currFan.push_back(ptsIdx += 2);
318                continue;
319
320            case GrCCGeometry::Verb::kMonotonicCubicTo:
321                SkASSERT(!currFan.empty());
322                cubicInstanceData[currIndices->fCubics++].set(&pts[ptsIdx], atlasOffsetX,
323                                                              atlasOffsetY);
324                currFan.push_back(ptsIdx += 3);
325                continue;
326
327            case GrCCGeometry::Verb::kEndClosedContour:  // endPt == startPt.
328                SkASSERT(!currFan.empty());
329                currFan.pop_back();
330            // fallthru.
331            case GrCCGeometry::Verb::kEndOpenContour:  // endPt != startPt.
332                if (currFan.count() >= 3) {
333                    int fanSize = currFan.count();
334                    // Reserve space for emit_recursive_fan. Technically this can grow to
335                    // fanSize + log3(fanSize), but we approximate with log2.
336                    currFan.push_back_n(SkNextLog2(fanSize));
337                    SkDEBUGCODE(TriangleInstance* end =)
338                            emit_recursive_fan(pts, currFan, 0, fanSize, atlasOffset,
339                                               triangleInstanceData + currIndices->fTriangles);
340                    currIndices->fTriangles += fanSize - 2;
341                    SkASSERT(triangleInstanceData + currIndices->fTriangles == end);
342                }
343                currFan.reset();
344                continue;
345        }
346    }
347
348    fInstanceBuffer->unmap();
349
350    SkASSERT(currPathInfo == fPathsInfo.end());
351    SkASSERT(ptsIdx == pts.count() - 1);
352    SkASSERT(instanceIndices[0].fTriangles == fBaseInstances[1].fTriangles);
353    SkASSERT(instanceIndices[1].fTriangles == fBaseInstances[0].fQuadratics);
354    SkASSERT(instanceIndices[0].fQuadratics == fBaseInstances[1].fQuadratics);
355    SkASSERT(instanceIndices[1].fQuadratics == triEndIdx);
356    SkASSERT(instanceIndices[0].fCubics == fBaseInstances[1].fCubics);
357    SkASSERT(instanceIndices[1].fCubics == cubicEndIdx);
358
359    fMeshesScratchBuffer.reserve(fMaxMeshesPerDraw);
360    fDynamicStatesScratchBuffer.reserve(fMaxMeshesPerDraw);
361
362    return true;
363}
364
365void GrCCPathParser::drawCoverageCount(GrOpFlushState* flushState, CoverageCountBatchID batchID,
366                                       const SkIRect& drawBounds) const {
367    using RenderPass = GrCCCoverageProcessor::RenderPass;
368
369    SkASSERT(fInstanceBuffer);
370
371    GrPipeline pipeline(flushState->drawOpArgs().fProxy, GrPipeline::ScissorState::kEnabled,
372                        SkBlendMode::kPlus);
373
374    // Triangles.
375    this->drawRenderPass(flushState, pipeline, batchID, RenderPass::kTriangleHulls,
376                         &PrimitiveTallies::fTriangles, drawBounds);
377    this->drawRenderPass(flushState, pipeline, batchID, RenderPass::kTriangleEdges,
378                         &PrimitiveTallies::fTriangles, drawBounds);  // Might get skipped.
379    this->drawRenderPass(flushState, pipeline, batchID, RenderPass::kTriangleCorners,
380                         &PrimitiveTallies::fTriangles, drawBounds);
381
382    // Quadratics.
383    this->drawRenderPass(flushState, pipeline, batchID, RenderPass::kQuadraticHulls,
384                         &PrimitiveTallies::fQuadratics, drawBounds);
385    this->drawRenderPass(flushState, pipeline, batchID, RenderPass::kQuadraticCorners,
386                         &PrimitiveTallies::fQuadratics, drawBounds);
387
388    // Cubics.
389    this->drawRenderPass(flushState, pipeline, batchID, RenderPass::kCubicHulls,
390                         &PrimitiveTallies::fCubics, drawBounds);
391    this->drawRenderPass(flushState, pipeline, batchID, RenderPass::kCubicCorners,
392                         &PrimitiveTallies::fCubics, drawBounds);
393}
394
395void GrCCPathParser::drawRenderPass(GrOpFlushState* flushState, const GrPipeline& pipeline,
396                                    CoverageCountBatchID batchID,
397                                    GrCCCoverageProcessor::RenderPass renderPass,
398                                    int PrimitiveTallies::*instanceType,
399                                    const SkIRect& drawBounds) const {
400    SkASSERT(pipeline.getScissorState().enabled());
401
402    if (!GrCCCoverageProcessor::DoesRenderPass(renderPass, flushState->caps())) {
403        return;
404    }
405
406    // Don't call reset(), as that also resets the reserve count.
407    fMeshesScratchBuffer.pop_back_n(fMeshesScratchBuffer.count());
408    fDynamicStatesScratchBuffer.pop_back_n(fDynamicStatesScratchBuffer.count());
409
410    GrCCCoverageProcessor proc(flushState->resourceProvider(), renderPass, flushState->caps());
411
412    SkASSERT(batchID > 0);
413    SkASSERT(batchID < fCoverageCountBatches.count());
414    const CoverageCountBatch& previousBatch = fCoverageCountBatches[batchID - 1];
415    const CoverageCountBatch& batch = fCoverageCountBatches[batchID];
416
417    if (int instanceCount = batch.fEndNonScissorIndices.*instanceType -
418                            previousBatch.fEndNonScissorIndices.*instanceType) {
419        SkASSERT(instanceCount > 0);
420        int baseInstance = fBaseInstances[(int)ScissorMode::kNonScissored].*instanceType +
421                           previousBatch.fEndNonScissorIndices.*instanceType;
422        proc.appendMesh(fInstanceBuffer.get(), instanceCount, baseInstance, &fMeshesScratchBuffer);
423        fDynamicStatesScratchBuffer.push_back().fScissorRect.setXYWH(0, 0, drawBounds.width(),
424                                                                     drawBounds.height());
425    }
426
427    SkASSERT(previousBatch.fEndScissorSubBatchIdx > 0);
428    SkASSERT(batch.fEndScissorSubBatchIdx <= fScissorSubBatches.count());
429    int baseScissorInstance = fBaseInstances[(int)ScissorMode::kScissored].*instanceType;
430    for (int i = previousBatch.fEndScissorSubBatchIdx; i < batch.fEndScissorSubBatchIdx; ++i) {
431        const ScissorSubBatch& previousSubBatch = fScissorSubBatches[i - 1];
432        const ScissorSubBatch& scissorSubBatch = fScissorSubBatches[i];
433        int startIndex = previousSubBatch.fEndPrimitiveIndices.*instanceType;
434        int instanceCount = scissorSubBatch.fEndPrimitiveIndices.*instanceType - startIndex;
435        if (!instanceCount) {
436            continue;
437        }
438        SkASSERT(instanceCount > 0);
439        proc.appendMesh(fInstanceBuffer.get(), instanceCount,
440                        baseScissorInstance + startIndex, &fMeshesScratchBuffer);
441        fDynamicStatesScratchBuffer.push_back().fScissorRect = scissorSubBatch.fScissor;
442    }
443
444    SkASSERT(fMeshesScratchBuffer.count() == fDynamicStatesScratchBuffer.count());
445    SkASSERT(fMeshesScratchBuffer.count() <= fMaxMeshesPerDraw);
446
447    if (!fMeshesScratchBuffer.empty()) {
448        SkASSERT(flushState->rtCommandBuffer());
449        flushState->rtCommandBuffer()->draw(pipeline, proc, fMeshesScratchBuffer.begin(),
450                                            fDynamicStatesScratchBuffer.begin(),
451                                            fMeshesScratchBuffer.count(), SkRect::Make(drawBounds));
452    }
453}
454