SkRasterPipeline.h revision 1f4a874addd7c039fb8b434181040cd6a8a35339
1/*
2 * Copyright 2016 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#ifndef SkRasterPipeline_DEFINED
9#define SkRasterPipeline_DEFINED
10
11#include "SkNx.h"
12#include "SkTArray.h"
13#include "SkTypes.h"
14
15/**
16 * SkRasterPipeline provides a cheap way to chain together a pixel processing pipeline.
17 *
18 * It's particularly designed for situations where the potential pipeline is extremely
19 * combinatoric: {N dst formats} x {M source formats} x {K mask formats} x {C transfer modes} ...
20 * No one wants to write specialized routines for all those combinations, and if we did, we'd
21 * end up bloating our code size dramatically.  SkRasterPipeline stages can be chained together
22 * at runtime, so we can scale this problem linearly rather than combinatorically.
23 *
24 * Each stage is represented by a function conforming to a common interface, SkRasterPipeline::Fn,
25 * and by an arbitrary context pointer.  Fn's arguments, and sometimes custom calling convention,
26 * are designed to maximize the amount of data we can pass along the pipeline cheaply.
27 * On many machines all arguments stay in registers the entire time.
28 *
29 * The meaning of the arguments to Fn are sometimes fixed...
30 *    - The Stage* always represents the current stage, mainly providing access to ctx().
31 *    - The size_t is always the destination x coordinate.  If you need y, put it in your context.
32 *    - By the time the shader's done, the first four vectors should hold source red,
33 *      green, blue, and alpha, up to 4 pixels' worth each.
34 *
35 * ...and sometimes flexible:
36 *    - In the shader, the first four vectors can be used for anything, e.g. sample coordinates.
37 *    - The last four vectors are scratch registers that can be used to communicate between
38 *      stages; transfer modes use these to hold the original destination pixel components.
39 *
40 * On some platforms the last four vectors are slower to work with than the other arguments.
41 *
42 * When done mutating its arguments and/or context, a stage can either:
43 *   1) call st->next() with its mutated arguments, chaining to the next stage of the pipeline; or
44 *   2) return, indicating the pipeline is complete for these pixels.
45 *
46 * Some obvious stages that typically return are those that write a color to a destination pointer,
47 * but any stage can short-circuit the rest of the pipeline by returning instead of calling next().
48 *
49 * Most simple pipeline stages can use the SK_RASTER_STAGE macro to define a static EasyFn,
50 * which simplifies the user interface a bit:
51 *    - the context pointer is available directly as the first parameter;
52 *    - instead of manually calling a next() function, just modify registers in place.
53 *
54 * To add an EasyFn stage to the pipeline, call append<fn>() instead of append(&fn).
55 * For the last stage of a pipeline, it's a slight performance benefit to call last<fn>().
56 */
57
58class SkRasterPipeline {
59public:
60    struct Stage;
61    using Fn = void(SK_VECTORCALL *)(Stage*, size_t, Sk4f,Sk4f,Sk4f,Sk4f,
62                                                     Sk4f,Sk4f,Sk4f,Sk4f);
63    using EasyFn = void(void*, size_t, Sk4f&, Sk4f&, Sk4f&, Sk4f&,
64                                       Sk4f&, Sk4f&, Sk4f&, Sk4f&);
65
66    struct Stage {
67        template <typename T>
68        T ctx() { return static_cast<T>(fCtx); }
69
70        void SK_VECTORCALL next(size_t x, Sk4f v0, Sk4f v1, Sk4f v2, Sk4f v3,
71                                          Sk4f v4, Sk4f v5, Sk4f v6, Sk4f v7) {
72            // Stages are logically a pipeline, and physically are contiguous in an array.
73            // To get to the next stage, we just increment our pointer to the next array element.
74            fNext(this+1, x, v0,v1,v2,v3, v4,v5,v6,v7);
75        }
76
77        // It makes next() a good bit cheaper if we hold the next function to call here,
78        // rather than logically simpler choice of the function implementing this stage.
79        Fn fNext;
80        void* fCtx;
81    };
82
83
84    SkRasterPipeline();
85
86    // Run the pipeline constructed with append(), walking x through [x,x+n),
87    // generally in 4 pixel steps, but sometimes 1 pixel at a time.
88    void run(size_t x, size_t n);
89    void run(size_t n) { this->run(0, n); }
90
91    // Use this append() if your stage is sensitive to the number of pixels you're working with:
92    //   - body will always be called for a full 4 pixels
93    //   - tail will always be called for a single pixel
94    // Typically this is only an essential distintion for stages that read or write memory.
95    void append(Fn body, const void* body_ctx,
96                Fn tail, const void* tail_ctx);
97
98    // Most stages don't actually care if they're working on 4 or 1 pixel.
99    void append(Fn fn, const void* ctx = nullptr) {
100        this->append(fn, ctx, fn, ctx);
101    }
102
103    // Most 4 pixel or 1 pixel variants share the same context pointer.
104    void append(Fn body, Fn tail, const void* ctx = nullptr) {
105        this->append(body, ctx, tail, ctx);
106    }
107
108
109    // Versions of append that can be used with static EasyFns (see SK_RASTER_STAGE).
110    template <EasyFn body, EasyFn tail>
111    void append(const void* body_ctx, const void* tail_ctx) {
112        this->append(Easy<body>, body_ctx,
113                     Easy<tail>, tail_ctx);
114    }
115    template <EasyFn body, EasyFn tail>
116    void last(const void* body_ctx, const void* tail_ctx) {
117        this->append(Last<body>, body_ctx,
118                     Last<tail>, tail_ctx);
119    }
120
121    template <EasyFn fn>
122    void append(const void* ctx = nullptr) { this->append<fn, fn>(ctx, ctx); }
123    template <EasyFn fn>
124    void last(const void* ctx = nullptr) { this->last<fn, fn>(ctx, ctx); }
125
126    template <EasyFn body, EasyFn tail>
127    void append(const void* ctx = nullptr) { this->append<body, tail>(ctx, ctx); }
128    template <EasyFn body, EasyFn tail>
129    void last(const void* ctx = nullptr) { this->last<body, tail>(ctx, ctx); }
130
131
132    // Append all stages to this pipeline.
133    void extend(const SkRasterPipeline&);
134
135private:
136    using Stages = SkSTArray<10, Stage, /*MEM_COPY=*/true>;
137
138    // This no-op default makes fBodyStart and fTailStart unconditionally safe to call,
139    // and is always the last stage's fNext as a sort of safety net to make sure even a
140    // buggy pipeline can't walk off its own end.
141    static void SK_VECTORCALL JustReturn(Stage*, size_t, Sk4f,Sk4f,Sk4f,Sk4f,
142                                                         Sk4f,Sk4f,Sk4f,Sk4f);
143
144    template <EasyFn kernel>
145    static void SK_VECTORCALL Easy(SkRasterPipeline::Stage* st, size_t x,
146                                   Sk4f  r, Sk4f  g, Sk4f  b, Sk4f  a,
147                                   Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
148        kernel(st->ctx<void*>(), x, r,g,b,a, dr,dg,db,da);
149        st->next(x, r,g,b,a, dr,dg,db,da);
150    }
151
152    template <EasyFn kernel>
153    static void SK_VECTORCALL Last(SkRasterPipeline::Stage* st, size_t x,
154                                   Sk4f  r, Sk4f  g, Sk4f  b, Sk4f  a,
155                                   Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
156        kernel(st->ctx<void*>(), x, r,g,b,a, dr,dg,db,da);
157    }
158
159    Stages fBody,
160           fTail;
161    Fn fBodyStart = &JustReturn,
162       fTailStart = &JustReturn;
163};
164
165// These are always static, and we _really_ want them to inline.
166// If you find yourself wanting a non-inline stage, write a SkRasterPipeline::Fn directly.
167#define SK_RASTER_STAGE(name)                                       \
168    static SK_ALWAYS_INLINE void name(void* ctx, size_t x,          \
169                            Sk4f&  r, Sk4f&  g, Sk4f&  b, Sk4f&  a, \
170                            Sk4f& dr, Sk4f& dg, Sk4f& db, Sk4f& da)
171
172#endif//SkRasterPipeline_DEFINED
173