1/****************************************************************************
2* Copyright (C) 2016 Intel Corporation.   All Rights Reserved.
3*
4* Permission is hereby granted, free of charge, to any person obtaining a
5* copy of this software and associated documentation files (the "Software"),
6* to deal in the Software without restriction, including without limitation
7* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8* and/or sell copies of the Software, and to permit persons to whom the
9* Software is furnished to do so, subject to the following conditions:
10*
11* The above copyright notice and this permission notice (including the next
12* paragraph) shall be included in all copies or substantial portions of the
13* Software.
14*
15* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21* IN THE SOFTWARE.
22*
23* @file archrast.h
24*
25* @brief Definitions for archrast.
26*
27******************************************************************************/
28#include <atomic>
29
30#include "common/os.h"
31#include "archrast/archrast.h"
32#include "archrast/eventmanager.h"
33#include "gen_ar_eventhandlerfile.h"
34
35namespace ArchRast
36{
37    //////////////////////////////////////////////////////////////////////////
38    /// @brief struct that keeps track of depth and stencil event information
39    struct DepthStencilStats
40    {
41        uint32_t earlyZTestPassCount = 0;
42        uint32_t earlyZTestFailCount = 0;
43        uint32_t lateZTestPassCount = 0;
44        uint32_t lateZTestFailCount = 0;
45        uint32_t earlyStencilTestPassCount = 0;
46        uint32_t earlyStencilTestFailCount = 0;
47        uint32_t lateStencilTestPassCount = 0;
48        uint32_t lateStencilTestFailCount = 0;
49        uint32_t earlyZTestCount = 0;
50        uint32_t lateZTestCount = 0;
51        uint32_t earlyStencilTestCount = 0;
52        uint32_t lateStencilTestCount = 0;
53    };
54
55    struct CStats
56    {
57        uint32_t clippedVerts = 0;
58    };
59
60    struct TEStats
61    {
62        uint32_t inputPrims = 0;
63        //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine.
64    };
65
66    struct GSStats
67    {
68        uint32_t inputPrimCount;
69        uint32_t primGeneratedCount;
70        uint32_t vertsInput;
71    };
72
73    //////////////////////////////////////////////////////////////////////////
74    /// @brief Event handler that saves stat events to event files. This
75    ///        handler filters out unwanted events.
76    class EventHandlerStatsFile : public EventHandlerFile
77    {
78    public:
79        DepthStencilStats DSSingleSample = {};
80        DepthStencilStats DSSampleRate = {};
81        DepthStencilStats DSPixelRate = {};
82        DepthStencilStats DSNullPS = {};
83        DepthStencilStats DSOmZ = {};
84        CStats CS = {};
85        TEStats TS = {};
86        GSStats GS = {};
87
88        EventHandlerStatsFile(uint32_t id) : EventHandlerFile(id) {}
89
90        // These are events that we're not interested in saving in stats event files.
91        virtual void Handle(Start& event) {}
92        virtual void Handle(End& event) {}
93
94        virtual void Handle(EarlyDepthStencilInfoSingleSample& event)
95        {
96            //earlyZ test compute
97            DSSingleSample.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
98            DSSingleSample.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
99            DSSingleSample.earlyZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask));
100
101            //earlyStencil test compute
102            DSSingleSample.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
103            DSSingleSample.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
104            DSSingleSample.earlyStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask));
105
106            //outputerMerger test compute
107            DSOmZ.earlyZTestPassCount += DSSingleSample.earlyZTestPassCount;
108            DSOmZ.earlyZTestFailCount += DSSingleSample.earlyZTestFailCount;
109            DSOmZ.earlyZTestCount += DSSingleSample.earlyZTestCount;
110            DSOmZ.earlyStencilTestPassCount += DSSingleSample.earlyStencilTestPassCount;
111            DSOmZ.earlyStencilTestFailCount += DSSingleSample.earlyStencilTestFailCount;
112            DSOmZ.earlyStencilTestCount += DSSingleSample.earlyStencilTestCount;
113        }
114
115        virtual void Handle(EarlyDepthStencilInfoSampleRate& event)
116        {
117            //earlyZ test compute
118            DSSampleRate.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
119            DSSampleRate.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
120            DSSampleRate.earlyZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask));
121
122            //earlyStencil test compute
123            DSSampleRate.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
124            DSSampleRate.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
125            DSSampleRate.earlyStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask));
126
127            //outputerMerger test compute
128            DSOmZ.earlyZTestPassCount += DSSampleRate.earlyZTestPassCount;
129            DSOmZ.earlyZTestFailCount += DSSampleRate.earlyZTestFailCount;
130            DSOmZ.earlyZTestCount += DSSampleRate.earlyZTestCount;
131            DSOmZ.earlyStencilTestPassCount += DSSampleRate.earlyStencilTestPassCount;
132            DSOmZ.earlyStencilTestFailCount += DSSampleRate.earlyStencilTestFailCount;
133            DSOmZ.earlyStencilTestCount += DSSampleRate.earlyStencilTestCount;
134        }
135
136        virtual void Handle(EarlyDepthStencilInfoNullPS& event)
137        {
138            //earlyZ test compute
139            DSNullPS.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
140            DSNullPS.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
141            DSNullPS.earlyZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask));
142
143            //earlyStencil test compute
144            DSNullPS.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
145            DSNullPS.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
146            DSNullPS.earlyStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask));
147
148            //outputerMerger test compute
149            DSOmZ.earlyZTestPassCount += DSNullPS.earlyZTestPassCount;
150            DSOmZ.earlyZTestFailCount += DSNullPS.earlyZTestFailCount;
151            DSOmZ.earlyZTestCount += DSNullPS.earlyZTestCount;
152            DSOmZ.earlyStencilTestPassCount += DSNullPS.earlyStencilTestPassCount;
153            DSOmZ.earlyStencilTestFailCount += DSNullPS.earlyStencilTestFailCount;
154            DSOmZ.earlyStencilTestCount += DSNullPS.earlyStencilTestCount;
155        }
156
157        virtual void Handle(LateDepthStencilInfoSingleSample& event)
158        {
159            //lateZ test compute
160            DSSingleSample.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
161            DSSingleSample.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
162            DSSingleSample.lateZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask));
163
164            //lateStencil test compute
165            DSSingleSample.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
166            DSSingleSample.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
167            DSSingleSample.lateStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask));
168
169            //outputerMerger test compute
170            DSOmZ.lateZTestPassCount += DSSingleSample.lateZTestPassCount;
171            DSOmZ.lateZTestFailCount += DSSingleSample.lateZTestFailCount;
172            DSOmZ.lateZTestCount += DSSingleSample.lateZTestCount;
173            DSOmZ.lateStencilTestPassCount += DSSingleSample.lateStencilTestPassCount;
174            DSOmZ.lateStencilTestFailCount += DSSingleSample.lateStencilTestFailCount;
175            DSOmZ.lateStencilTestCount += DSSingleSample.lateStencilTestCount;
176        }
177
178        virtual void Handle(LateDepthStencilInfoSampleRate& event)
179        {
180            //lateZ test compute
181            DSSampleRate.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
182            DSSampleRate.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
183            DSSampleRate.lateZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask));
184
185            //lateStencil test compute
186            DSSampleRate.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
187            DSSampleRate.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
188            DSSampleRate.lateStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask));
189
190            //outputerMerger test compute
191            DSOmZ.lateZTestPassCount += DSSampleRate.lateZTestPassCount;
192            DSOmZ.lateZTestFailCount += DSSampleRate.lateZTestFailCount;
193            DSOmZ.lateZTestCount += DSSampleRate.lateZTestCount;
194            DSOmZ.lateStencilTestPassCount += DSSampleRate.lateStencilTestPassCount;
195            DSOmZ.lateStencilTestFailCount += DSSampleRate.lateStencilTestFailCount;
196            DSOmZ.lateStencilTestCount += DSSampleRate.lateStencilTestCount;
197        }
198
199        virtual void Handle(LateDepthStencilInfoNullPS& event)
200        {
201            //lateZ test compute
202            DSNullPS.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
203            DSNullPS.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
204            DSNullPS.lateZTestCount += (_mm_popcnt_u32(event.data.depthPassMask) + _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask));
205
206            //lateStencil test compute
207            DSNullPS.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
208            DSNullPS.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
209            DSNullPS.lateStencilTestCount += (_mm_popcnt_u32(event.data.stencilPassMask) + _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask));
210
211            //outputerMerger test compute
212            DSOmZ.lateZTestPassCount += DSNullPS.lateZTestPassCount;
213            DSOmZ.lateZTestFailCount += DSNullPS.lateZTestFailCount;
214            DSOmZ.lateZTestCount += DSNullPS.lateZTestCount;
215            DSOmZ.lateStencilTestPassCount += DSNullPS.lateStencilTestPassCount;
216            DSOmZ.lateStencilTestFailCount += DSNullPS.lateStencilTestFailCount;
217            DSOmZ.lateStencilTestCount += DSNullPS.lateStencilTestCount;
218        }
219
220        virtual void Handle(EarlyDepthInfoPixelRate& event)
221        {
222            //earlyZ test compute
223            DSPixelRate.earlyZTestCount += _mm_popcnt_u32(event.data.activeLanes);
224            DSPixelRate.earlyZTestPassCount += event.data.depthPassCount;
225            DSPixelRate.earlyZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
226
227            //outputerMerger test compute
228            DSOmZ.earlyZTestPassCount += DSPixelRate.earlyZTestPassCount;
229            DSOmZ.earlyZTestFailCount += DSPixelRate.earlyZTestFailCount;
230            DSOmZ.earlyZTestCount += DSPixelRate.earlyZTestCount;
231        }
232
233
234        virtual void Handle(LateDepthInfoPixelRate& event)
235        {
236            //lateZ test compute
237            DSPixelRate.lateZTestCount += _mm_popcnt_u32(event.data.activeLanes);
238            DSPixelRate.lateZTestPassCount += event.data.depthPassCount;
239            DSPixelRate.lateZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
240
241            //outputerMerger test compute
242            DSOmZ.lateZTestPassCount += DSPixelRate.lateZTestPassCount;
243            DSOmZ.lateZTestFailCount += DSPixelRate.lateZTestFailCount;
244            DSOmZ.lateZTestCount += DSPixelRate.lateZTestCount;
245
246        }
247
248
249        virtual void Handle(BackendDrawEndEvent& event)
250        {
251            //singleSample
252            EventHandlerFile::Handle(EarlyZSingleSample(event.data.drawId, DSSingleSample.earlyZTestPassCount, DSSingleSample.earlyZTestFailCount, DSSingleSample.earlyZTestCount));
253            EventHandlerFile::Handle(LateZSingleSample(event.data.drawId, DSSingleSample.lateZTestPassCount, DSSingleSample.lateZTestFailCount, DSSingleSample.lateZTestCount));
254            EventHandlerFile::Handle(EarlyStencilSingleSample(event.data.drawId, DSSingleSample.earlyStencilTestPassCount, DSSingleSample.earlyStencilTestFailCount, DSSingleSample.earlyStencilTestCount));
255            EventHandlerFile::Handle(LateStencilSingleSample(event.data.drawId, DSSingleSample.lateStencilTestPassCount, DSSingleSample.lateStencilTestFailCount, DSSingleSample.lateStencilTestCount));
256
257            //sampleRate
258            EventHandlerFile::Handle(EarlyZSampleRate(event.data.drawId, DSSampleRate.earlyZTestPassCount, DSSampleRate.earlyZTestFailCount, DSSampleRate.earlyZTestCount));
259            EventHandlerFile::Handle(LateZSampleRate(event.data.drawId, DSSampleRate.lateZTestPassCount, DSSampleRate.lateZTestFailCount, DSSampleRate.lateZTestCount));
260            EventHandlerFile::Handle(EarlyStencilSampleRate(event.data.drawId, DSSampleRate.earlyStencilTestPassCount, DSSampleRate.earlyStencilTestFailCount, DSSampleRate.earlyStencilTestCount));
261            EventHandlerFile::Handle(LateStencilSampleRate(event.data.drawId, DSSampleRate.lateStencilTestPassCount, DSSampleRate.lateStencilTestFailCount, DSSampleRate.lateStencilTestCount));
262
263            //pixelRate
264            EventHandlerFile::Handle(EarlyZPixelRate(event.data.drawId, DSPixelRate.earlyZTestPassCount, DSPixelRate.earlyZTestFailCount, DSPixelRate.earlyZTestCount));
265            EventHandlerFile::Handle(LateZPixelRate(event.data.drawId, DSPixelRate.lateZTestPassCount, DSPixelRate.lateZTestFailCount, DSPixelRate.lateZTestCount));
266
267
268            //NullPS
269            EventHandlerFile::Handle(EarlyZNullPS(event.data.drawId, DSNullPS.earlyZTestPassCount, DSNullPS.earlyZTestFailCount, DSNullPS.earlyZTestCount));
270            EventHandlerFile::Handle(EarlyStencilNullPS(event.data.drawId, DSNullPS.earlyStencilTestPassCount, DSNullPS.earlyStencilTestFailCount, DSNullPS.earlyStencilTestCount));
271
272            //OmZ
273            EventHandlerFile::Handle(EarlyOmZ(event.data.drawId, DSOmZ.earlyZTestPassCount, DSOmZ.earlyZTestFailCount, DSOmZ.earlyZTestCount));
274            EventHandlerFile::Handle(EarlyOmStencil(event.data.drawId, DSOmZ.earlyStencilTestPassCount, DSOmZ.earlyStencilTestFailCount, DSOmZ.earlyStencilTestCount));
275            EventHandlerFile::Handle(LateOmZ(event.data.drawId, DSOmZ.lateZTestPassCount, DSOmZ.lateZTestFailCount, DSOmZ.lateZTestCount));
276            EventHandlerFile::Handle(LateOmStencil(event.data.drawId, DSOmZ.lateStencilTestPassCount, DSOmZ.lateStencilTestFailCount, DSOmZ.lateStencilTestCount));
277
278            //Reset Internal Counters
279            DSSingleSample = {};
280            DSSampleRate = {};
281            DSPixelRate = {};
282            DSNullPS = {};
283            DSOmZ = {};
284        }
285
286        virtual void Handle(FrontendDrawEndEvent& event)
287        {
288            //Clipper
289            EventHandlerFile::Handle(VertsClipped(event.data.drawId, CS.clippedVerts));
290
291            //Tesselator
292            EventHandlerFile::Handle(TessPrims(event.data.drawId, TS.inputPrims));
293
294            //Geometry Shader
295            EventHandlerFile::Handle(GSInputPrims(event.data.drawId, GS.inputPrimCount));
296            EventHandlerFile::Handle(GSPrimsGen(event.data.drawId, GS.primGeneratedCount));
297            EventHandlerFile::Handle(GSVertsInput(event.data.drawId, GS.vertsInput));
298
299            //Reset Internal Counters
300            CS = {};
301            TS = {};
302            GS = {};
303        }
304
305        virtual void Handle(GSPrimInfo& event)
306        {
307            GS.inputPrimCount += event.data.inputPrimCount;
308            GS.primGeneratedCount += event.data.primGeneratedCount;
309            GS.vertsInput += event.data.vertsInput;
310        }
311
312        virtual void Handle(ClipVertexCount& event)
313        {
314            CS.clippedVerts += (_mm_popcnt_u32(event.data.primMask) * event.data.vertsPerPrim);
315        }
316
317        virtual void Handle(TessPrimCount& event)
318        {
319            TS.inputPrims += event.data.primCount;
320        }
321    };
322
323    static EventManager* FromHandle(HANDLE hThreadContext)
324    {
325        return reinterpret_cast<EventManager*>(hThreadContext);
326    }
327
328    // Construct an event manager and associate a handler with it.
329    HANDLE CreateThreadContext(AR_THREAD type)
330    {
331        // Can we assume single threaded here?
332        static std::atomic<uint32_t> counter(0);
333        uint32_t id = counter.fetch_add(1);
334
335        EventManager* pManager = new EventManager();
336        EventHandlerFile* pHandler = new EventHandlerStatsFile(id);
337
338        if (pManager && pHandler)
339        {
340            pManager->Attach(pHandler);
341
342            if (type == AR_THREAD::API)
343            {
344                pHandler->Handle(ThreadStartApiEvent());
345            }
346            else
347            {
348                pHandler->Handle(ThreadStartWorkerEvent());
349            }
350            pHandler->MarkHeader();
351
352            return pManager;
353        }
354
355        SWR_ASSERT(0, "Failed to register thread.");
356        return nullptr;
357    }
358
359    void DestroyThreadContext(HANDLE hThreadContext)
360    {
361        EventManager* pManager = FromHandle(hThreadContext);
362        SWR_ASSERT(pManager != nullptr);
363
364        delete pManager;
365    }
366
367    // Dispatch event for this thread.
368    void Dispatch(HANDLE hThreadContext, Event& event)
369    {
370        EventManager* pManager = FromHandle(hThreadContext);
371        SWR_ASSERT(pManager != nullptr);
372
373        pManager->Dispatch(event);
374    }
375}
376