1c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/**************************************************************************** 2c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. 3c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* 4c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* Permission is hereby granted, free of charge, to any person obtaining a 5c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* copy of this software and associated documentation files (the "Software"), 6c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* to deal in the Software without restriction, including without limitation 7c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* the rights to use, copy, modify, merge, publish, distribute, sublicense, 8c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* and/or sell copies of the Software, and to permit persons to whom the 9c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* Software is furnished to do so, subject to the following conditions: 10c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* 11c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* The above copyright notice and this permission notice (including the next 12c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* paragraph) shall be included in all copies or substantial portions of the 13c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* Software. 14c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* 15c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* IN THE SOFTWARE. 22c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* 23c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* @file frontend.cpp 24c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* 25c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* @brief Implementation for Frontend which handles vertex processing, 26c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* primitive assembly, clipping, binning, etc. 27c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley* 28c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley******************************************************************************/ 29c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 30c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley#include "api.h" 31c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley#include "frontend.h" 32c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley#include "backend.h" 33c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley#include "context.h" 34c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley#include "rdtsc_core.h" 35c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley#include "utils.h" 36c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley#include "threads.h" 37c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley#include "pa.h" 38c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley#include "clip.h" 39c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley#include "tilemgr.h" 40c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley#include "tessellator.h" 410487377dcec9122173c963360f8d302d071d3434Tim Rowley#include <limits> 42c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 43c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley////////////////////////////////////////////////////////////////////////// 44c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @brief Helper macro to generate a bitmask 45c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowleystatic INLINE uint32_t GenMask(uint32_t numBits) 46c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley{ 47c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_ASSERT(numBits <= (sizeof(uint32_t) * 8), "Too many bits (%d) for %s", numBits, __FUNCTION__); 48c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley return ((1U << numBits) - 1); 49c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley} 50c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 51c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley////////////////////////////////////////////////////////////////////////// 52c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @brief FE handler for SwrSync. 53c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param pContext - pointer to SWR context. 54c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param pDC - pointer to draw context. 55c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param workerId - thread's worker id. Even thread has a unique id. 56c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param pUserData - Pointer to user data passed back to sync callback. 57c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @todo This should go away when we switch this to use compute threading. 58c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowleyvoid ProcessSync( 59c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_CONTEXT *pContext, 60c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley DRAW_CONTEXT *pDC, 61c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t workerId, 62c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley void *pUserData) 63c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley{ 64c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley BE_WORK work; 65c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley work.type = SYNC; 66c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley work.pfnWork = ProcessSyncBE; 67c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 68c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley MacroTileMgr *pTileMgr = pDC->pTileMgr; 69c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley pTileMgr->enqueue(0, 0, &work); 70c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley} 71c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 72c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley////////////////////////////////////////////////////////////////////////// 7392ec820244710e1b13267d8e93f3a81d7114080eTim Rowley/// @brief FE handler for SwrDestroyContext. 7492ec820244710e1b13267d8e93f3a81d7114080eTim Rowley/// @param pContext - pointer to SWR context. 7592ec820244710e1b13267d8e93f3a81d7114080eTim Rowley/// @param pDC - pointer to draw context. 7692ec820244710e1b13267d8e93f3a81d7114080eTim Rowley/// @param workerId - thread's worker id. Even thread has a unique id. 7792ec820244710e1b13267d8e93f3a81d7114080eTim Rowley/// @param pUserData - Pointer to user data passed back to sync callback. 7892ec820244710e1b13267d8e93f3a81d7114080eTim Rowleyvoid ProcessShutdown( 7992ec820244710e1b13267d8e93f3a81d7114080eTim Rowley SWR_CONTEXT *pContext, 8092ec820244710e1b13267d8e93f3a81d7114080eTim Rowley DRAW_CONTEXT *pDC, 8192ec820244710e1b13267d8e93f3a81d7114080eTim Rowley uint32_t workerId, 8292ec820244710e1b13267d8e93f3a81d7114080eTim Rowley void *pUserData) 8392ec820244710e1b13267d8e93f3a81d7114080eTim Rowley{ 8492ec820244710e1b13267d8e93f3a81d7114080eTim Rowley BE_WORK work; 8592ec820244710e1b13267d8e93f3a81d7114080eTim Rowley work.type = SHUTDOWN; 8692ec820244710e1b13267d8e93f3a81d7114080eTim Rowley work.pfnWork = ProcessShutdownBE; 8792ec820244710e1b13267d8e93f3a81d7114080eTim Rowley 8892ec820244710e1b13267d8e93f3a81d7114080eTim Rowley MacroTileMgr *pTileMgr = pDC->pTileMgr; 8992ec820244710e1b13267d8e93f3a81d7114080eTim Rowley // Enqueue at least 1 work item for each worker thread 9092ec820244710e1b13267d8e93f3a81d7114080eTim Rowley // account for number of numa nodes 9192ec820244710e1b13267d8e93f3a81d7114080eTim Rowley uint32_t numNumaNodes = pContext->threadPool.numaMask + 1; 9292ec820244710e1b13267d8e93f3a81d7114080eTim Rowley 9392ec820244710e1b13267d8e93f3a81d7114080eTim Rowley for (uint32_t i = 0; i < pContext->threadPool.numThreads; ++i) 9492ec820244710e1b13267d8e93f3a81d7114080eTim Rowley { 9592ec820244710e1b13267d8e93f3a81d7114080eTim Rowley for (uint32_t n = 0; n < numNumaNodes; ++n) 9692ec820244710e1b13267d8e93f3a81d7114080eTim Rowley { 9792ec820244710e1b13267d8e93f3a81d7114080eTim Rowley pTileMgr->enqueue(i, n, &work); 9892ec820244710e1b13267d8e93f3a81d7114080eTim Rowley } 9992ec820244710e1b13267d8e93f3a81d7114080eTim Rowley } 10092ec820244710e1b13267d8e93f3a81d7114080eTim Rowley} 10192ec820244710e1b13267d8e93f3a81d7114080eTim Rowley 10292ec820244710e1b13267d8e93f3a81d7114080eTim Rowley////////////////////////////////////////////////////////////////////////// 103c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @brief FE handler for SwrClearRenderTarget. 104c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param pContext - pointer to SWR context. 105c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param pDC - pointer to draw context. 106c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param workerId - thread's worker id. Even thread has a unique id. 107c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param pUserData - Pointer to user data passed back to clear callback. 108c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @todo This should go away when we switch this to use compute threading. 109c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowleyvoid ProcessClear( 110c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_CONTEXT *pContext, 111c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley DRAW_CONTEXT *pDC, 112c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t workerId, 113c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley void *pUserData) 114c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley{ 1150ff57446e3786243c6d752c91be2108595f2663eTim Rowley CLEAR_DESC *pDesc = (CLEAR_DESC*)pUserData; 116c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley MacroTileMgr *pTileMgr = pDC->pTileMgr; 117c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 118c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // queue a clear to each macro tile 1190ff57446e3786243c6d752c91be2108595f2663eTim Rowley // compute macro tile bounds for the specified rect 1200ff57446e3786243c6d752c91be2108595f2663eTim Rowley uint32_t macroTileXMin = pDesc->rect.xmin / KNOB_MACROTILE_X_DIM; 1210ff57446e3786243c6d752c91be2108595f2663eTim Rowley uint32_t macroTileXMax = (pDesc->rect.xmax - 1) / KNOB_MACROTILE_X_DIM; 1220ff57446e3786243c6d752c91be2108595f2663eTim Rowley uint32_t macroTileYMin = pDesc->rect.ymin / KNOB_MACROTILE_Y_DIM; 1230ff57446e3786243c6d752c91be2108595f2663eTim Rowley uint32_t macroTileYMax = (pDesc->rect.ymax - 1) / KNOB_MACROTILE_Y_DIM; 124c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 125c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley BE_WORK work; 126c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley work.type = CLEAR; 127c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley work.pfnWork = ProcessClearBE; 1280ff57446e3786243c6d752c91be2108595f2663eTim Rowley work.desc.clear = *pDesc; 129c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1300ff57446e3786243c6d752c91be2108595f2663eTim Rowley for (uint32_t y = macroTileYMin; y <= macroTileYMax; ++y) 131c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1320ff57446e3786243c6d752c91be2108595f2663eTim Rowley for (uint32_t x = macroTileXMin; x <= macroTileXMax; ++x) 133c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 134c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley pTileMgr->enqueue(x, y, &work); 135c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 136c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 137c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley} 138c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 139c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley////////////////////////////////////////////////////////////////////////// 140c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @brief FE handler for SwrStoreTiles. 141c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param pContext - pointer to SWR context. 142c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param pDC - pointer to draw context. 143c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param workerId - thread's worker id. Even thread has a unique id. 144c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param pUserData - Pointer to user data passed back to callback. 145c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @todo This should go away when we switch this to use compute threading. 146c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowleyvoid ProcessStoreTiles( 147c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_CONTEXT *pContext, 148c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley DRAW_CONTEXT *pDC, 149c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t workerId, 150c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley void *pUserData) 151c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley{ 1522f86a9577adf5c43e892f899224d0f73ff1d37c2Tim Rowley AR_BEGIN(FEProcessStoreTiles, pDC->drawId); 153c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley MacroTileMgr *pTileMgr = pDC->pTileMgr; 1540ff57446e3786243c6d752c91be2108595f2663eTim Rowley STORE_TILES_DESC* pDesc = (STORE_TILES_DESC*)pUserData; 155c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 156c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // queue a store to each macro tile 1570ff57446e3786243c6d752c91be2108595f2663eTim Rowley // compute macro tile bounds for the specified rect 1580ff57446e3786243c6d752c91be2108595f2663eTim Rowley uint32_t macroTileXMin = pDesc->rect.xmin / KNOB_MACROTILE_X_DIM; 1590ff57446e3786243c6d752c91be2108595f2663eTim Rowley uint32_t macroTileXMax = (pDesc->rect.xmax - 1) / KNOB_MACROTILE_X_DIM; 1600ff57446e3786243c6d752c91be2108595f2663eTim Rowley uint32_t macroTileYMin = pDesc->rect.ymin / KNOB_MACROTILE_Y_DIM; 1610ff57446e3786243c6d752c91be2108595f2663eTim Rowley uint32_t macroTileYMax = (pDesc->rect.ymax - 1) / KNOB_MACROTILE_Y_DIM; 162c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 163c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // store tiles 164c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley BE_WORK work; 165c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley work.type = STORETILES; 166a907b7a5f74169906c04e9702f3c8fda99636c56Tim Rowley work.pfnWork = ProcessStoreTilesBE; 1670ff57446e3786243c6d752c91be2108595f2663eTim Rowley work.desc.storeTiles = *pDesc; 168c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1690ff57446e3786243c6d752c91be2108595f2663eTim Rowley for (uint32_t y = macroTileYMin; y <= macroTileYMax; ++y) 170c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1710ff57446e3786243c6d752c91be2108595f2663eTim Rowley for (uint32_t x = macroTileXMin; x <= macroTileXMax; ++x) 172c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 173c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley pTileMgr->enqueue(x, y, &work); 174c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 175c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 176c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1772f86a9577adf5c43e892f899224d0f73ff1d37c2Tim Rowley AR_END(FEProcessStoreTiles, 0); 178c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley} 179c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 180c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley////////////////////////////////////////////////////////////////////////// 181c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @brief FE handler for SwrInvalidateTiles. 182c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param pContext - pointer to SWR context. 183c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param pDC - pointer to draw context. 184c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param workerId - thread's worker id. Even thread has a unique id. 185c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param pUserData - Pointer to user data passed back to callback. 186c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @todo This should go away when we switch this to use compute threading. 187e374d2d24b0d755c9380da0eb33e4151b1ad145fTim Rowleyvoid ProcessDiscardInvalidateTiles( 188c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_CONTEXT *pContext, 189c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley DRAW_CONTEXT *pDC, 190c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t workerId, 191c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley void *pUserData) 192c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley{ 1932f86a9577adf5c43e892f899224d0f73ff1d37c2Tim Rowley AR_BEGIN(FEProcessInvalidateTiles, pDC->drawId); 1940ff57446e3786243c6d752c91be2108595f2663eTim Rowley DISCARD_INVALIDATE_TILES_DESC *pDesc = (DISCARD_INVALIDATE_TILES_DESC*)pUserData; 195c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley MacroTileMgr *pTileMgr = pDC->pTileMgr; 196c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1970ff57446e3786243c6d752c91be2108595f2663eTim Rowley // compute macro tile bounds for the specified rect 1980ff57446e3786243c6d752c91be2108595f2663eTim Rowley uint32_t macroTileXMin = (pDesc->rect.xmin + KNOB_MACROTILE_X_DIM - 1) / KNOB_MACROTILE_X_DIM; 1990ff57446e3786243c6d752c91be2108595f2663eTim Rowley uint32_t macroTileXMax = (pDesc->rect.xmax / KNOB_MACROTILE_X_DIM) - 1; 2000ff57446e3786243c6d752c91be2108595f2663eTim Rowley uint32_t macroTileYMin = (pDesc->rect.ymin + KNOB_MACROTILE_Y_DIM - 1) / KNOB_MACROTILE_Y_DIM; 2010ff57446e3786243c6d752c91be2108595f2663eTim Rowley uint32_t macroTileYMax = (pDesc->rect.ymax / KNOB_MACROTILE_Y_DIM) - 1; 202e374d2d24b0d755c9380da0eb33e4151b1ad145fTim Rowley 2030ff57446e3786243c6d752c91be2108595f2663eTim Rowley if (pDesc->fullTilesOnly == false) 204e374d2d24b0d755c9380da0eb33e4151b1ad145fTim Rowley { 205e374d2d24b0d755c9380da0eb33e4151b1ad145fTim Rowley // include partial tiles 2060ff57446e3786243c6d752c91be2108595f2663eTim Rowley macroTileXMin = pDesc->rect.xmin / KNOB_MACROTILE_X_DIM; 2070ff57446e3786243c6d752c91be2108595f2663eTim Rowley macroTileXMax = (pDesc->rect.xmax - 1) / KNOB_MACROTILE_X_DIM; 2080ff57446e3786243c6d752c91be2108595f2663eTim Rowley macroTileYMin = pDesc->rect.ymin / KNOB_MACROTILE_Y_DIM; 2090ff57446e3786243c6d752c91be2108595f2663eTim Rowley macroTileYMax = (pDesc->rect.ymax - 1) / KNOB_MACROTILE_Y_DIM; 210e374d2d24b0d755c9380da0eb33e4151b1ad145fTim Rowley } 211c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 2120ff57446e3786243c6d752c91be2108595f2663eTim Rowley SWR_ASSERT(macroTileXMax <= KNOB_NUM_HOT_TILES_X); 2130ff57446e3786243c6d752c91be2108595f2663eTim Rowley SWR_ASSERT(macroTileYMax <= KNOB_NUM_HOT_TILES_Y); 214fee56fda6fd78f7fb10b0e8fced0a604ca43f0c0Tim Rowley 2150ff57446e3786243c6d752c91be2108595f2663eTim Rowley macroTileXMax = std::min<int32_t>(macroTileXMax, KNOB_NUM_HOT_TILES_X); 2160ff57446e3786243c6d752c91be2108595f2663eTim Rowley macroTileYMax = std::min<int32_t>(macroTileYMax, KNOB_NUM_HOT_TILES_Y); 217fee56fda6fd78f7fb10b0e8fced0a604ca43f0c0Tim Rowley 218c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // load tiles 219c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley BE_WORK work; 220e374d2d24b0d755c9380da0eb33e4151b1ad145fTim Rowley work.type = DISCARDINVALIDATETILES; 221e374d2d24b0d755c9380da0eb33e4151b1ad145fTim Rowley work.pfnWork = ProcessDiscardInvalidateTilesBE; 2220ff57446e3786243c6d752c91be2108595f2663eTim Rowley work.desc.discardInvalidateTiles = *pDesc; 223c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 2240ff57446e3786243c6d752c91be2108595f2663eTim Rowley for (uint32_t x = macroTileXMin; x <= macroTileXMax; ++x) 225c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 2260ff57446e3786243c6d752c91be2108595f2663eTim Rowley for (uint32_t y = macroTileYMin; y <= macroTileYMax; ++y) 227c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 228c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley pTileMgr->enqueue(x, y, &work); 229c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 230c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 231c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 2322f86a9577adf5c43e892f899224d0f73ff1d37c2Tim Rowley AR_END(FEProcessInvalidateTiles, 0); 233c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley} 234c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 235c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley////////////////////////////////////////////////////////////////////////// 236c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @brief Computes the number of primitives given the number of verts. 237c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param mode - primitive topology for draw operation. 238c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param numPrims - number of vertices or indices for draw. 239c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @todo Frontend needs to be refactored. This will go in appropriate place then. 240c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowleyuint32_t GetNumPrims( 241c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley PRIMITIVE_TOPOLOGY mode, 242c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t numPrims) 243c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley{ 244c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley switch (mode) 245c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 246c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_POINT_LIST: return numPrims; 247c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_TRIANGLE_LIST: return numPrims / 3; 248c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_TRIANGLE_STRIP: return numPrims < 3 ? 0 : numPrims - 2; 249c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_TRIANGLE_FAN: return numPrims < 3 ? 0 : numPrims - 2; 250c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_TRIANGLE_DISC: return numPrims < 2 ? 0 : numPrims - 1; 251c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_QUAD_LIST: return numPrims / 4; 252c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_QUAD_STRIP: return numPrims < 4 ? 0 : (numPrims - 2) / 2; 253c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_LINE_STRIP: return numPrims < 2 ? 0 : numPrims - 1; 254c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_LINE_LIST: return numPrims / 2; 255c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_LINE_LOOP: return numPrims; 256c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_RECT_LIST: return numPrims / 3; 257c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_LINE_LIST_ADJ: return numPrims / 4; 258c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_LISTSTRIP_ADJ: return numPrims < 3 ? 0 : numPrims - 3; 259c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_TRI_LIST_ADJ: return numPrims / 6; 260c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_TRI_STRIP_ADJ: return numPrims < 4 ? 0 : (numPrims / 2) - 2; 261c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 262c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_1: 263c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_2: 264c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_3: 265c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_4: 266c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_5: 267c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_6: 268c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_7: 269c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_8: 270c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_9: 271c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_10: 272c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_11: 273c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_12: 274c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_13: 275c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_14: 276c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_15: 277c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_16: 278c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_17: 279c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_18: 280c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_19: 281c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_20: 282c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_21: 283c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_22: 284c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_23: 285c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_24: 286c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_25: 287c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_26: 288c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_27: 289c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_28: 290c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_29: 291c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_30: 292c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_31: 293c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_32: 294c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley return numPrims / (mode - TOP_PATCHLIST_BASE); 295c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 296c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_POLYGON: 297c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_POINT_LIST_BF: 298c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_LINE_STRIP_CONT: 299c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_LINE_STRIP_BF: 300c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_LINE_STRIP_CONT_BF: 301c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_TRIANGLE_FAN_NOSTIPPLE: 302c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_TRI_STRIP_REVERSE: 303c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_BASE: 304c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_UNKNOWN: 305c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_ASSERT(false, "Unsupported topology: %d", mode); 306c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley return 0; 307c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 308c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 309c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley return 0; 310c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley} 311c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 312c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley////////////////////////////////////////////////////////////////////////// 313c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @brief Computes the number of verts given the number of primitives. 314c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param mode - primitive topology for draw operation. 315c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param numPrims - number of primitives for draw. 316c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowleyuint32_t GetNumVerts( 317c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley PRIMITIVE_TOPOLOGY mode, 318c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t numPrims) 319c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley{ 320c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley switch (mode) 321c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 322c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_POINT_LIST: return numPrims; 323c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_TRIANGLE_LIST: return numPrims * 3; 324c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_TRIANGLE_STRIP: return numPrims ? numPrims + 2 : 0; 325c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_TRIANGLE_FAN: return numPrims ? numPrims + 2 : 0; 326c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_TRIANGLE_DISC: return numPrims ? numPrims + 1 : 0; 327c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_QUAD_LIST: return numPrims * 4; 328c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_QUAD_STRIP: return numPrims ? numPrims * 2 + 2 : 0; 329c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_LINE_STRIP: return numPrims ? numPrims + 1 : 0; 330c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_LINE_LIST: return numPrims * 2; 331c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_LINE_LOOP: return numPrims; 332c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_RECT_LIST: return numPrims * 3; 333c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_LINE_LIST_ADJ: return numPrims * 4; 334c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_LISTSTRIP_ADJ: return numPrims ? numPrims + 3 : 0; 335c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_TRI_LIST_ADJ: return numPrims * 6; 336c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_TRI_STRIP_ADJ: return numPrims ? (numPrims + 2) * 2 : 0; 337c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 338c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_1: 339c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_2: 340c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_3: 341c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_4: 342c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_5: 343c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_6: 344c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_7: 345c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_8: 346c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_9: 347c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_10: 348c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_11: 349c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_12: 350c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_13: 351c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_14: 352c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_15: 353c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_16: 354c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_17: 355c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_18: 356c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_19: 357c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_20: 358c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_21: 359c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_22: 360c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_23: 361c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_24: 362c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_25: 363c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_26: 364c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_27: 365c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_28: 366c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_29: 367c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_30: 368c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_31: 369c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_32: 370c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley return numPrims * (mode - TOP_PATCHLIST_BASE); 371c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 372c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_POLYGON: 373c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_POINT_LIST_BF: 374c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_LINE_STRIP_CONT: 375c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_LINE_STRIP_BF: 376c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_LINE_STRIP_CONT_BF: 377c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_TRIANGLE_FAN_NOSTIPPLE: 378c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_TRI_STRIP_REVERSE: 379c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_BASE: 380c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_UNKNOWN: 381c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_ASSERT(false, "Unsupported topology: %d", mode); 382c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley return 0; 383c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 384c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 385c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley return 0; 386c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley} 387c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 388c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley////////////////////////////////////////////////////////////////////////// 389c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @brief Return number of verts per primitive. 390c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param topology - topology 391c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param includeAdjVerts - include adjacent verts in primitive vertices 392c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim RowleyINLINE uint32_t NumVertsPerPrim(PRIMITIVE_TOPOLOGY topology, bool includeAdjVerts) 393c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley{ 394c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t numVerts = 0; 395c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley switch (topology) 396c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 397c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_POINT_LIST: 398c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_POINT_LIST_BF: 399c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley numVerts = 1; 400c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 401c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_LINE_LIST: 402c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_LINE_STRIP: 403c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_LINE_LIST_ADJ: 404c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_LINE_LOOP: 405c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_LINE_STRIP_CONT: 406c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_LINE_STRIP_BF: 407c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_LISTSTRIP_ADJ: 408c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley numVerts = 2; 409c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 410c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_TRIANGLE_LIST: 411c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_TRIANGLE_STRIP: 412c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_TRIANGLE_FAN: 413c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_TRI_LIST_ADJ: 414c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_TRI_STRIP_ADJ: 415c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_TRI_STRIP_REVERSE: 416c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_RECT_LIST: 417c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley numVerts = 3; 418c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 419c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_QUAD_LIST: 420c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_QUAD_STRIP: 421c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley numVerts = 4; 422c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 423c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_1: 424c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_2: 425c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_3: 426c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_4: 427c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_5: 428c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_6: 429c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_7: 430c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_8: 431c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_9: 432c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_10: 433c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_11: 434c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_12: 435c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_13: 436c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_14: 437c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_15: 438c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_16: 439c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_17: 440c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_18: 441c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_19: 442c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_20: 443c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_21: 444c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_22: 445c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_23: 446c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_24: 447c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_25: 448c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_26: 449c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_27: 450c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_28: 451c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_29: 452c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_30: 453c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_31: 454c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_PATCHLIST_32: 455c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley numVerts = topology - TOP_PATCHLIST_BASE; 456c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 457c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley default: 458c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_ASSERT(false, "Unsupported topology: %d", topology); 459c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 460c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 461c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 462c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if (includeAdjVerts) 463c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 464c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley switch (topology) 465c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 466c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_LISTSTRIP_ADJ: 467c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_LINE_LIST_ADJ: numVerts = 4; break; 468c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_TRI_STRIP_ADJ: 469c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_TRI_LIST_ADJ: numVerts = 6; break; 470c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley default: break; 471c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 472c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 473c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 474c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley return numVerts; 475c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley} 476c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 477c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley////////////////////////////////////////////////////////////////////////// 478c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @brief Generate mask from remaining work. 479c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param numWorkItems - Number of items being worked on by a SIMD. 480c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowleystatic INLINE simdscalari GenerateMask(uint32_t numItemsRemaining) 481c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley{ 482c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t numActive = (numItemsRemaining >= KNOB_SIMD_WIDTH) ? KNOB_SIMD_WIDTH : numItemsRemaining; 483c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t mask = (numActive > 0) ? ((1 << numActive) - 1) : 0; 484c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley return _simd_castps_si(vMask(mask)); 485c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley} 486c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 487c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley////////////////////////////////////////////////////////////////////////// 488c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @brief StreamOut - Streams vertex data out to SO buffers. 489c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// Generally, we are only streaming out a SIMDs worth of triangles. 490c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param pDC - pointer to draw context. 491c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param workerId - thread's worker id. Even thread has a unique id. 492c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param numPrims - Number of prims to streamout (e.g. points, lines, tris) 493c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowleystatic void StreamOut( 494c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley DRAW_CONTEXT* pDC, 495c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley PA_STATE& pa, 496c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t workerId, 497c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t* pPrimData, 498c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t streamIndex) 499c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley{ 5002f86a9577adf5c43e892f899224d0f73ff1d37c2Tim Rowley SWR_CONTEXT *pContext = pDC->pContext; 5012f86a9577adf5c43e892f899224d0f73ff1d37c2Tim Rowley 5022f86a9577adf5c43e892f899224d0f73ff1d37c2Tim Rowley AR_BEGIN(FEStreamout, pDC->drawId); 503c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 504c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley const API_STATE& state = GetApiState(pDC); 505c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley const SWR_STREAMOUT_STATE &soState = state.soState; 506c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 507c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t soVertsPerPrim = NumVertsPerPrim(pa.binTopology, false); 508c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 509c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // The pPrimData buffer is sparse in that we allocate memory for all 32 attributes for each vertex. 510c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t primDataDwordVertexStride = (KNOB_NUM_ATTRIBUTES * sizeof(float) * 4) / sizeof(uint32_t); 511c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 512c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_STREAMOUT_CONTEXT soContext = { 0 }; 513c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 514c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // Setup buffer state pointers. 515c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley for (uint32_t i = 0; i < 4; ++i) 516c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 517c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley soContext.pBuffer[i] = &state.soBuffer[i]; 518c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 519c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 520c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t numPrims = pa.NumPrims(); 521c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley for (uint32_t primIndex = 0; primIndex < numPrims; ++primIndex) 522c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 523c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley DWORD slot = 0; 524c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t soMask = soState.streamMasks[streamIndex]; 525c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 526c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // Write all entries into primitive data buffer for SOS. 527c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley while (_BitScanForward(&slot, soMask)) 528c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 529c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley __m128 attrib[MAX_NUM_VERTS_PER_PRIM]; // prim attribs (always 4 wide) 530c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t paSlot = slot + VERTEX_ATTRIB_START_SLOT; 531c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley pa.AssembleSingle(paSlot, primIndex, attrib); 532c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 533c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // Attribute offset is relative offset from start of vertex. 534c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // Note that attributes start at slot 1 in the PA buffer. We need to write this 535c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // to prim data starting at slot 0. Which is why we do (slot - 1). 536c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // Also note: GL works slightly differently, and needs slot 0 537c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t primDataAttribOffset = slot * sizeof(float) * 4 / sizeof(uint32_t); 538c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 539c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // Store each vertex's attrib at appropriate locations in pPrimData buffer. 540c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley for (uint32_t v = 0; v < soVertsPerPrim; ++v) 541c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 542c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t* pPrimDataAttrib = pPrimData + primDataAttribOffset + (v * primDataDwordVertexStride); 543c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 544c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley _mm_store_ps((float*)pPrimDataAttrib, attrib[v]); 545c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 546c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley soMask &= ~(1 << slot); 547c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 548c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 549c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // Update pPrimData pointer 550c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley soContext.pPrimData = pPrimData; 551c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 552c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // Call SOS 553c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_ASSERT(state.pfnSoFunc[streamIndex] != nullptr, "Trying to execute uninitialized streamout jit function."); 554c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley state.pfnSoFunc[streamIndex](soContext); 555c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 556c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 557c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // Update SO write offset. The driver provides memory for the update. 558c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley for (uint32_t i = 0; i < 4; ++i) 559c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 560c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if (state.soBuffer[i].pWriteOffset) 561c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 562c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley *state.soBuffer[i].pWriteOffset = soContext.pBuffer[i]->streamOffset * sizeof(uint32_t); 5637cf187d08ae6a64c959de1cdf9004f5fb2fd097aTim Rowley } 564c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 5657cf187d08ae6a64c959de1cdf9004f5fb2fd097aTim Rowley if (state.soBuffer[i].soWriteEnable) 5667cf187d08ae6a64c959de1cdf9004f5fb2fd097aTim Rowley { 5677cf187d08ae6a64c959de1cdf9004f5fb2fd097aTim Rowley pDC->dynState.SoWriteOffset[i] = soContext.pBuffer[i]->streamOffset * sizeof(uint32_t); 5687cf187d08ae6a64c959de1cdf9004f5fb2fd097aTim Rowley pDC->dynState.SoWriteOffsetDirty[i] = true; 569c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 570c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 571c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 5724e8763cb0904c30d1962cf5ad52fe3a87be7b4bdTim Rowley UPDATE_STAT_FE(SoPrimStorageNeeded[streamIndex], soContext.numPrimStorageNeeded); 5734e8763cb0904c30d1962cf5ad52fe3a87be7b4bdTim Rowley UPDATE_STAT_FE(SoNumPrimsWritten[streamIndex], soContext.numPrimsWritten); 574c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 5752f86a9577adf5c43e892f899224d0f73ff1d37c2Tim Rowley AR_END(FEStreamout, 1); 576c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley} 577c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 578c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley////////////////////////////////////////////////////////////////////////// 579c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @brief Computes number of invocations. The current index represents 580c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// the start of the SIMD. The max index represents how much work 5810ff57446e3786243c6d752c91be2108595f2663eTim Rowley/// items are remaining. If there is less then a SIMD's xmin of work 582c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// then return the remaining amount of work. 583c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param curIndex - The start index for the SIMD. 584c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param maxIndex - The last index for all work items. 585c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowleystatic INLINE uint32_t GetNumInvocations( 586c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t curIndex, 587c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t maxIndex) 588c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley{ 589c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t remainder = (maxIndex - curIndex); 590c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley return (remainder >= KNOB_SIMD_WIDTH) ? KNOB_SIMD_WIDTH : remainder; 591c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley} 592c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 593c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley////////////////////////////////////////////////////////////////////////// 594c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @brief Converts a streamId buffer to a cut buffer for the given stream id. 595c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// The geometry shader will loop over each active streamout buffer, assembling 596c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// primitives for the downstream stages. When multistream output is enabled, 597c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// the generated stream ID buffer from the GS needs to be converted to a cut 598c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// buffer for the primitive assembler. 599c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param stream - stream id to generate the cut buffer for 600c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param pStreamIdBase - pointer to the stream ID buffer 601c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param numEmittedVerts - Number of total verts emitted by the GS 602c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param pCutBuffer - output buffer to write cuts to 603c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowleyvoid ProcessStreamIdBuffer(uint32_t stream, uint8_t* pStreamIdBase, uint32_t numEmittedVerts, uint8_t *pCutBuffer) 604c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley{ 605c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_ASSERT(stream < MAX_SO_STREAMS); 606c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 607c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t numInputBytes = (numEmittedVerts * 2 + 7) / 8; 608c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t numOutputBytes = std::max(numInputBytes / 2, 1U); 609c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 610c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley for (uint32_t b = 0; b < numOutputBytes; ++b) 611c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 612c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint8_t curInputByte = pStreamIdBase[2*b]; 613c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint8_t outByte = 0; 614c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley for (uint32_t i = 0; i < 4; ++i) 615c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 616c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if ((curInputByte & 0x3) != stream) 617c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 618c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley outByte |= (1 << i); 619c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 620c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley curInputByte >>= 2; 621c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 622c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 623c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley curInputByte = pStreamIdBase[2 * b + 1]; 624c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley for (uint32_t i = 0; i < 4; ++i) 625c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 626c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if ((curInputByte & 0x3) != stream) 627c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 628c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley outByte |= (1 << (i + 4)); 629c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 630c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley curInputByte >>= 2; 631c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 632c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 633c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley *pCutBuffer++ = outByte; 634c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 635c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley} 636c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 637bef222db22365c2518110d30cd1227625a86195bTim RowleyTHREAD SWR_GS_CONTEXT tlsGsContext; 638bef222db22365c2518110d30cd1227625a86195bTim Rowley 639c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley////////////////////////////////////////////////////////////////////////// 640c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @brief Implements GS stage. 641c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param pDC - pointer to draw context. 642c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param workerId - thread's worker id. Even thread has a unique id. 643c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param pa - The primitive assembly object. 644c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param pGsOut - output stream for GS 645c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowleytemplate < 64627cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley typename HasStreamOutT, 64727cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley typename HasRastT> 648c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowleystatic void GeometryShaderStage( 649c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley DRAW_CONTEXT *pDC, 650c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t workerId, 651c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley PA_STATE& pa, 652c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley void* pGsOut, 653c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley void* pCutBuffer, 654c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley void* pStreamCutBuffer, 655c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t* pSoPrimData, 656c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley simdscalari primID) 657c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley{ 6582f86a9577adf5c43e892f899224d0f73ff1d37c2Tim Rowley SWR_CONTEXT *pContext = pDC->pContext; 6592f86a9577adf5c43e892f899224d0f73ff1d37c2Tim Rowley 6602f86a9577adf5c43e892f899224d0f73ff1d37c2Tim Rowley AR_BEGIN(FEGeometryShader, pDC->drawId); 661c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 662c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley const API_STATE& state = GetApiState(pDC); 663c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley const SWR_GS_STATE* pState = &state.gsState; 664c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 665c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_ASSERT(pGsOut != nullptr, "GS output buffer should be initialized"); 666c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_ASSERT(pCutBuffer != nullptr, "GS output cut buffer should be initialized"); 667c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 668bef222db22365c2518110d30cd1227625a86195bTim Rowley tlsGsContext.pStream = (uint8_t*)pGsOut; 669bef222db22365c2518110d30cd1227625a86195bTim Rowley tlsGsContext.pCutOrStreamIdBuffer = (uint8_t*)pCutBuffer; 670bef222db22365c2518110d30cd1227625a86195bTim Rowley tlsGsContext.PrimitiveID = primID; 671c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 672c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t numVertsPerPrim = NumVertsPerPrim(pa.binTopology, true); 673c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley simdvector attrib[MAX_ATTRIBUTES]; 674c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 675c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // assemble all attributes for the input primitive 676c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley for (uint32_t slot = 0; slot < pState->numInputAttribs; ++slot) 677c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 678c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + slot; 679c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley pa.Assemble(attribSlot, attrib); 680c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 681c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley for (uint32_t i = 0; i < numVertsPerPrim; ++i) 682c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 683bef222db22365c2518110d30cd1227625a86195bTim Rowley tlsGsContext.vert[i].attrib[attribSlot] = attrib[i]; 684c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 685c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 686c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 687c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // assemble position 688c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley pa.Assemble(VERTEX_POSITION_SLOT, attrib); 689c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley for (uint32_t i = 0; i < numVertsPerPrim; ++i) 690c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 691bef222db22365c2518110d30cd1227625a86195bTim Rowley tlsGsContext.vert[i].attrib[VERTEX_POSITION_SLOT] = attrib[i]; 692c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 693c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 694c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley const uint32_t vertexStride = sizeof(simdvertex); 695c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley const uint32_t numSimdBatches = (state.gsState.maxNumVerts + KNOB_SIMD_WIDTH - 1) / KNOB_SIMD_WIDTH; 696c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley const uint32_t inputPrimStride = numSimdBatches * vertexStride; 697c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley const uint32_t instanceStride = inputPrimStride * KNOB_SIMD_WIDTH; 698c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t cutPrimStride; 699c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t cutInstanceStride; 700c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 701c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if (pState->isSingleStream) 702c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 703c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley cutPrimStride = (state.gsState.maxNumVerts + 7) / 8; 704c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley cutInstanceStride = cutPrimStride * KNOB_SIMD_WIDTH; 705c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 706c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley else 707c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 708c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley cutPrimStride = AlignUp(state.gsState.maxNumVerts * 2 / 8, 4); 709c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley cutInstanceStride = cutPrimStride * KNOB_SIMD_WIDTH; 710c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 711c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 712c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // record valid prims from the frontend to avoid over binning the newly generated 713c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // prims from the GS 714c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t numInputPrims = pa.NumPrims(); 715c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 716c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley for (uint32_t instance = 0; instance < pState->instanceCount; ++instance) 717c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 718bef222db22365c2518110d30cd1227625a86195bTim Rowley tlsGsContext.InstanceID = instance; 719bef222db22365c2518110d30cd1227625a86195bTim Rowley tlsGsContext.mask = GenerateMask(numInputPrims); 720c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 721c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // execute the geometry shader 722bef222db22365c2518110d30cd1227625a86195bTim Rowley state.pfnGsFunc(GetPrivateState(pDC), &tlsGsContext); 723c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 724bef222db22365c2518110d30cd1227625a86195bTim Rowley tlsGsContext.pStream += instanceStride; 725bef222db22365c2518110d30cd1227625a86195bTim Rowley tlsGsContext.pCutOrStreamIdBuffer += cutInstanceStride; 726c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 727c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 728c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // set up new binner and state for the GS output topology 729c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley PFN_PROCESS_PRIMS pfnClipFunc = nullptr; 73027cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley if (HasRastT::value) 731c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 732c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley switch (pState->outputTopology) 733c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 734c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_TRIANGLE_STRIP: pfnClipFunc = ClipTriangles; break; 735c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_LINE_STRIP: pfnClipFunc = ClipLines; break; 736c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_POINT_LIST: pfnClipFunc = ClipPoints; break; 737c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley default: SWR_ASSERT(false, "Unexpected GS output topology: %d", pState->outputTopology); 738c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 739c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 740c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 741c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // foreach input prim: 742c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // - setup a new PA based on the emitted verts for that prim 743c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // - loop over the new verts, calling PA to assemble each prim 744bef222db22365c2518110d30cd1227625a86195bTim Rowley uint32_t* pVertexCount = (uint32_t*)&tlsGsContext.vertexCount; 745c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t* pPrimitiveId = (uint32_t*)&primID; 746c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 747c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t totalPrimsGenerated = 0; 748c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley for (uint32_t inputPrim = 0; inputPrim < numInputPrims; ++inputPrim) 749c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 750c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint8_t* pInstanceBase = (uint8_t*)pGsOut + inputPrim * inputPrimStride; 751c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint8_t* pCutBufferBase = (uint8_t*)pCutBuffer + inputPrim * cutPrimStride; 752c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley for (uint32_t instance = 0; instance < pState->instanceCount; ++instance) 753c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 754c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t numEmittedVerts = pVertexCount[inputPrim]; 755c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if (numEmittedVerts == 0) 756c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 757c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley continue; 758c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 759c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 760c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint8_t* pBase = pInstanceBase + instance * instanceStride; 761c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint8_t* pCutBase = pCutBufferBase + instance * cutInstanceStride; 762c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 763efdaf5fa3e74ca4f3d9217dc6955aef6dc698a68Tim Rowley uint32_t numAttribs = state.feNumAttributes; 764c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 765c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley for (uint32_t stream = 0; stream < MAX_SO_STREAMS; ++stream) 766c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 767c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley bool processCutVerts = false; 768c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 769c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint8_t* pCutBuffer = pCutBase; 770c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 771c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // assign default stream ID, only relevant when GS is outputting a single stream 772c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t streamID = 0; 773c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if (pState->isSingleStream) 774c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 775c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley processCutVerts = true; 776c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley streamID = pState->singleStreamID; 777c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if (streamID != stream) continue; 778c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 779c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley else 780c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 781c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // early exit if this stream is not enabled for streamout 78227cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley if (HasStreamOutT::value && !state.soState.streamEnable[stream]) 783c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 784c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley continue; 785c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 786c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 787c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // multi-stream output, need to translate StreamID buffer to a cut buffer 788c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley ProcessStreamIdBuffer(stream, pCutBase, numEmittedVerts, (uint8_t*)pStreamCutBuffer); 789c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley pCutBuffer = (uint8_t*)pStreamCutBuffer; 790c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley processCutVerts = false; 791c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 792c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 793c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley PA_STATE_CUT gsPa(pDC, pBase, numEmittedVerts, pCutBuffer, numEmittedVerts, numAttribs, pState->outputTopology, processCutVerts); 794c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 795c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley while (gsPa.GetNextStreamOutput()) 796c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 797c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley do 798c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 799c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley bool assemble = gsPa.Assemble(VERTEX_POSITION_SLOT, attrib); 800c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 801c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if (assemble) 802c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 803c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley totalPrimsGenerated += gsPa.NumPrims(); 804c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 80527cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley if (HasStreamOutT::value) 806c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 807c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley StreamOut(pDC, gsPa, workerId, pSoPrimData, stream); 808c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 809c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 81027cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley if (HasRastT::value && state.soState.streamToRasterizer == stream) 811c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 812c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley simdscalari vPrimId; 813c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // pull primitiveID from the GS output if available 814c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if (state.gsState.emitsPrimitiveID) 815c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 816c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley simdvector primIdAttrib[3]; 817c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley gsPa.Assemble(VERTEX_PRIMID_SLOT, primIdAttrib); 818c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley vPrimId = _simd_castps_si(primIdAttrib[0].x); 819c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 820c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley else 821c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 822c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley vPrimId = _simd_set1_epi32(pPrimitiveId[inputPrim]); 823c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 824c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 82592621ac5d526e73469c43d524068315a81bbc869Tim Rowley // use viewport array index if GS declares it as an output attribute. Otherwise use index 0. 82692621ac5d526e73469c43d524068315a81bbc869Tim Rowley simdscalari vViewPortIdx; 82792621ac5d526e73469c43d524068315a81bbc869Tim Rowley if (state.gsState.emitsViewportArrayIndex) 82892621ac5d526e73469c43d524068315a81bbc869Tim Rowley { 82992621ac5d526e73469c43d524068315a81bbc869Tim Rowley simdvector vpiAttrib[3]; 83092621ac5d526e73469c43d524068315a81bbc869Tim Rowley gsPa.Assemble(VERTEX_VIEWPORT_ARRAY_INDEX_SLOT, vpiAttrib); 83192621ac5d526e73469c43d524068315a81bbc869Tim Rowley 83292621ac5d526e73469c43d524068315a81bbc869Tim Rowley // OOB indices => forced to zero. 83392621ac5d526e73469c43d524068315a81bbc869Tim Rowley simdscalari vNumViewports = _simd_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS); 834b311bdf92d65a0704faf8e89caa506d6d18c07e6Tim Rowley simdscalari vClearMask = _simd_cmplt_epi32(_simd_castps_si(vpiAttrib[0].x), vNumViewports); 835b311bdf92d65a0704faf8e89caa506d6d18c07e6Tim Rowley vpiAttrib[0].x = _simd_and_ps(_simd_castsi_ps(vClearMask), vpiAttrib[0].x); 83692621ac5d526e73469c43d524068315a81bbc869Tim Rowley 83792621ac5d526e73469c43d524068315a81bbc869Tim Rowley vViewPortIdx = _simd_castps_si(vpiAttrib[0].x); 83892621ac5d526e73469c43d524068315a81bbc869Tim Rowley } 83992621ac5d526e73469c43d524068315a81bbc869Tim Rowley else 84092621ac5d526e73469c43d524068315a81bbc869Tim Rowley { 84192621ac5d526e73469c43d524068315a81bbc869Tim Rowley vViewPortIdx = _simd_set1_epi32(0); 84292621ac5d526e73469c43d524068315a81bbc869Tim Rowley } 84392621ac5d526e73469c43d524068315a81bbc869Tim Rowley 84492621ac5d526e73469c43d524068315a81bbc869Tim Rowley pfnClipFunc(pDC, gsPa, workerId, attrib, GenMask(gsPa.NumPrims()), vPrimId, vViewPortIdx); 845c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 846c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 847c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } while (gsPa.NextPrim()); 848c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 849c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 850c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 851c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 852c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 853c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // update GS pipeline stats 8544e8763cb0904c30d1962cf5ad52fe3a87be7b4bdTim Rowley UPDATE_STAT_FE(GsInvocations, numInputPrims * pState->instanceCount); 8554e8763cb0904c30d1962cf5ad52fe3a87be7b4bdTim Rowley UPDATE_STAT_FE(GsPrimitives, totalPrimsGenerated); 856cd8d840ce1a8abab490d1e8c7bafa2cbb5399c4fTim Rowley AR_EVENT(GSPrimInfo(numInputPrims, totalPrimsGenerated, numVertsPerPrim*numInputPrims)); 8572f86a9577adf5c43e892f899224d0f73ff1d37c2Tim Rowley AR_END(FEGeometryShader, 1); 858c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley} 859c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 860c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley////////////////////////////////////////////////////////////////////////// 861c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @brief Allocate GS buffers 862c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param pDC - pointer to draw context. 863c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param state - API state 864c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param ppGsOut - pointer to GS output buffer allocation 865c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param ppCutBuffer - pointer to GS output cut buffer allocation 866c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowleystatic INLINE void AllocateGsBuffers(DRAW_CONTEXT* pDC, const API_STATE& state, void** ppGsOut, void** ppCutBuffer, 867c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley void **ppStreamCutBuffer) 868c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley{ 869ec9d4c4b372df773e4453c228b938e7c6c526c4cTim Rowley auto pArena = pDC->pArena; 870c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_ASSERT(pArena != nullptr); 871c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_ASSERT(state.gsState.gsEnable); 872c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // allocate arena space to hold GS output verts 873c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // @todo pack attribs 874c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // @todo support multiple streams 875c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley const uint32_t vertexStride = sizeof(simdvertex); 876c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley const uint32_t numSimdBatches = (state.gsState.maxNumVerts + KNOB_SIMD_WIDTH - 1) / KNOB_SIMD_WIDTH; 877c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t size = state.gsState.instanceCount * numSimdBatches * vertexStride * KNOB_SIMD_WIDTH; 878c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley *ppGsOut = pArena->AllocAligned(size, KNOB_SIMD_WIDTH * sizeof(float)); 879c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 880c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley const uint32_t cutPrimStride = (state.gsState.maxNumVerts + 7) / 8; 881c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley const uint32_t streamIdPrimStride = AlignUp(state.gsState.maxNumVerts * 2 / 8, 4); 882c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley const uint32_t cutBufferSize = cutPrimStride * state.gsState.instanceCount * KNOB_SIMD_WIDTH; 883c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley const uint32_t streamIdSize = streamIdPrimStride * state.gsState.instanceCount * KNOB_SIMD_WIDTH; 884c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 885c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // allocate arena space to hold cut or streamid buffer, which is essentially a bitfield sized to the 886c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // maximum vertex output as defined by the GS state, per SIMD lane, per GS instance 887c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 888c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // allocate space for temporary per-stream cut buffer if multi-stream is enabled 889c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if (state.gsState.isSingleStream) 890c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 891c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley *ppCutBuffer = pArena->AllocAligned(cutBufferSize, KNOB_SIMD_WIDTH * sizeof(float)); 892c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley *ppStreamCutBuffer = nullptr; 893c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 894c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley else 895c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 896c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley *ppCutBuffer = pArena->AllocAligned(streamIdSize, KNOB_SIMD_WIDTH * sizeof(float)); 897c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley *ppStreamCutBuffer = pArena->AllocAligned(cutBufferSize, KNOB_SIMD_WIDTH * sizeof(float)); 898c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 899c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 900c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley} 901c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 902c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley////////////////////////////////////////////////////////////////////////// 903c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @brief Contains all data generated by the HS and passed to the 904c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// tessellator and DS. 905c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowleystruct TessellationThreadLocalData 906c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley{ 907c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_HS_CONTEXT hsContext; 908c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley ScalarPatch patchData[KNOB_SIMD_WIDTH]; 909c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley void* pTxCtx; 910c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley size_t tsCtxSize; 911c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 912c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley simdscalar* pDSOutput; 913c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley size_t numDSOutputVectors; 914c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley}; 915c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 916c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim RowleyTHREAD TessellationThreadLocalData* gt_pTessellationThreadData = nullptr; 917c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 918c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley////////////////////////////////////////////////////////////////////////// 919c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @brief Allocate tessellation data for this worker thread. 920c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim RowleyINLINE 921c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowleystatic void AllocateTessellationData(SWR_CONTEXT* pContext) 922c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley{ 923c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley /// @TODO - Don't use thread local storage. Use Worker local storage instead. 924c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if (gt_pTessellationThreadData == nullptr) 925c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 926c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley gt_pTessellationThreadData = (TessellationThreadLocalData*) 9274997169779069692c0e64df2dfb89185ae48f193Tim Rowley AlignedMalloc(sizeof(TessellationThreadLocalData), 64); 928c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley memset(gt_pTessellationThreadData, 0, sizeof(*gt_pTessellationThreadData)); 929c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 930c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley} 931c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 932c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley////////////////////////////////////////////////////////////////////////// 933c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @brief Implements Tessellation Stages. 934c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param pDC - pointer to draw context. 935c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param workerId - thread's worker id. Even thread has a unique id. 936c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param pa - The primitive assembly object. 937c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param pGsOut - output stream for GS 938c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowleytemplate < 93927cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley typename HasGeometryShaderT, 94027cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley typename HasStreamOutT, 94127cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley typename HasRastT> 942c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowleystatic void TessellationStages( 943c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley DRAW_CONTEXT *pDC, 944c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t workerId, 945c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley PA_STATE& pa, 946c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley void* pGsOut, 947c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley void* pCutBuffer, 948c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley void* pCutStreamBuffer, 949c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t* pSoPrimData, 950c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley simdscalari primID) 951c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley{ 9522f86a9577adf5c43e892f899224d0f73ff1d37c2Tim Rowley SWR_CONTEXT *pContext = pDC->pContext; 953c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley const API_STATE& state = GetApiState(pDC); 954c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley const SWR_TS_STATE& tsState = state.tsState; 955c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 956c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_ASSERT(gt_pTessellationThreadData); 957c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 958c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley HANDLE tsCtx = TSInitCtx( 959c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley tsState.domain, 960c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley tsState.partitioning, 961c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley tsState.tsOutputTopology, 962c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley gt_pTessellationThreadData->pTxCtx, 963c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley gt_pTessellationThreadData->tsCtxSize); 964c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if (tsCtx == nullptr) 965c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 9664997169779069692c0e64df2dfb89185ae48f193Tim Rowley gt_pTessellationThreadData->pTxCtx = AlignedMalloc(gt_pTessellationThreadData->tsCtxSize, 64); 967c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley tsCtx = TSInitCtx( 968c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley tsState.domain, 969c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley tsState.partitioning, 970c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley tsState.tsOutputTopology, 971c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley gt_pTessellationThreadData->pTxCtx, 972c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley gt_pTessellationThreadData->tsCtxSize); 973c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 974c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_ASSERT(tsCtx); 975c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 976c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley PFN_PROCESS_PRIMS pfnClipFunc = nullptr; 97727cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley if (HasRastT::value) 978c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 979c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley switch (tsState.postDSTopology) 980c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 981c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_TRIANGLE_LIST: pfnClipFunc = ClipTriangles; break; 982c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_LINE_LIST: pfnClipFunc = ClipLines; break; 983c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case TOP_POINT_LIST: pfnClipFunc = ClipPoints; break; 984c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley default: SWR_ASSERT(false, "Unexpected DS output topology: %d", tsState.postDSTopology); 985c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 986c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 987c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 988c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_HS_CONTEXT& hsContext = gt_pTessellationThreadData->hsContext; 989c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley hsContext.pCPout = gt_pTessellationThreadData->patchData; 990c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley hsContext.PrimitiveID = primID; 991c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 992c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t numVertsPerPrim = NumVertsPerPrim(pa.binTopology, false); 993c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // Max storage for one attribute for an entire simdprimitive 994c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley simdvector simdattrib[MAX_NUM_VERTS_PER_PRIM]; 995c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 996c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // assemble all attributes for the input primitives 997c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley for (uint32_t slot = 0; slot < tsState.numHsInputAttribs; ++slot) 998c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 999c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t attribSlot = VERTEX_ATTRIB_START_SLOT + slot; 1000c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley pa.Assemble(attribSlot, simdattrib); 1001c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1002c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley for (uint32_t i = 0; i < numVertsPerPrim; ++i) 1003c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1004c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley hsContext.vert[i].attrib[attribSlot] = simdattrib[i]; 1005c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 1006c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 1007c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1008c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley#if defined(_DEBUG) 1009c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley memset(hsContext.pCPout, 0x90, sizeof(ScalarPatch) * KNOB_SIMD_WIDTH); 1010c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley#endif 1011c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1012c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t numPrims = pa.NumPrims(); 1013c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley hsContext.mask = GenerateMask(numPrims); 1014c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1015c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // Run the HS 10162f86a9577adf5c43e892f899224d0f73ff1d37c2Tim Rowley AR_BEGIN(FEHullShader, pDC->drawId); 1017c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley state.pfnHsFunc(GetPrivateState(pDC), &hsContext); 10182f86a9577adf5c43e892f899224d0f73ff1d37c2Tim Rowley AR_END(FEHullShader, 0); 1019c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 10204e8763cb0904c30d1962cf5ad52fe3a87be7b4bdTim Rowley UPDATE_STAT_FE(HsInvocations, numPrims); 1021c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1022c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley const uint32_t* pPrimId = (const uint32_t*)&primID; 1023c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1024c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley for (uint32_t p = 0; p < numPrims; ++p) 1025c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1026c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // Run Tessellator 1027c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_TS_TESSELLATED_DATA tsData = { 0 }; 10282f86a9577adf5c43e892f899224d0f73ff1d37c2Tim Rowley AR_BEGIN(FETessellation, pDC->drawId); 1029c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley TSTessellate(tsCtx, hsContext.pCPout[p].tessFactors, tsData); 1030cd8d840ce1a8abab490d1e8c7bafa2cbb5399c4fTim Rowley AR_EVENT(TessPrimCount(1)); 10312f86a9577adf5c43e892f899224d0f73ff1d37c2Tim Rowley AR_END(FETessellation, 0); 1032c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1033c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if (tsData.NumPrimitives == 0) 1034c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1035c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley continue; 1036c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 1037c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_ASSERT(tsData.NumDomainPoints); 1038c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1039c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // Allocate DS Output memory 1040c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t requiredDSVectorInvocations = AlignUp(tsData.NumDomainPoints, KNOB_SIMD_WIDTH) / KNOB_SIMD_WIDTH; 1041c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley size_t requiredDSOutputVectors = requiredDSVectorInvocations * tsState.numDsOutputAttribs; 1042c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley size_t requiredAllocSize = sizeof(simdvector) * requiredDSOutputVectors; 1043c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if (requiredDSOutputVectors > gt_pTessellationThreadData->numDSOutputVectors) 1044c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 10454997169779069692c0e64df2dfb89185ae48f193Tim Rowley AlignedFree(gt_pTessellationThreadData->pDSOutput); 10464997169779069692c0e64df2dfb89185ae48f193Tim Rowley gt_pTessellationThreadData->pDSOutput = (simdscalar*)AlignedMalloc(requiredAllocSize, 64); 1047c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley gt_pTessellationThreadData->numDSOutputVectors = requiredDSOutputVectors; 1048c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 1049c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_ASSERT(gt_pTessellationThreadData->pDSOutput); 1050c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_ASSERT(gt_pTessellationThreadData->numDSOutputVectors >= requiredDSOutputVectors); 1051c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1052c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley#if defined(_DEBUG) 1053c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley memset(gt_pTessellationThreadData->pDSOutput, 0x90, requiredAllocSize); 1054c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley#endif 1055c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1056c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // Run Domain Shader 1057c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_DS_CONTEXT dsContext; 1058c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley dsContext.PrimitiveID = pPrimId[p]; 1059c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley dsContext.pCpIn = &hsContext.pCPout[p]; 1060c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley dsContext.pDomainU = (simdscalar*)tsData.pDomainPointsU; 1061c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley dsContext.pDomainV = (simdscalar*)tsData.pDomainPointsV; 1062c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley dsContext.pOutputData = gt_pTessellationThreadData->pDSOutput; 1063c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley dsContext.vectorStride = requiredDSVectorInvocations; 1064c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1065c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t dsInvocations = 0; 1066c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1067c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley for (dsContext.vectorOffset = 0; dsContext.vectorOffset < requiredDSVectorInvocations; ++dsContext.vectorOffset) 1068c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1069c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley dsContext.mask = GenerateMask(tsData.NumDomainPoints - dsInvocations); 1070c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 10712f86a9577adf5c43e892f899224d0f73ff1d37c2Tim Rowley AR_BEGIN(FEDomainShader, pDC->drawId); 1072c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley state.pfnDsFunc(GetPrivateState(pDC), &dsContext); 10732f86a9577adf5c43e892f899224d0f73ff1d37c2Tim Rowley AR_END(FEDomainShader, 0); 1074c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1075c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley dsInvocations += KNOB_SIMD_WIDTH; 1076c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 10774e8763cb0904c30d1962cf5ad52fe3a87be7b4bdTim Rowley UPDATE_STAT_FE(DsInvocations, tsData.NumDomainPoints); 1078c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1079c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley PA_TESS tessPa( 1080c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley pDC, 1081c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley dsContext.pOutputData, 1082c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley dsContext.vectorStride, 1083c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley tsState.numDsOutputAttribs, 1084c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley tsData.ppIndices, 1085c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley tsData.NumPrimitives, 1086c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley tsState.postDSTopology); 1087c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1088c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley while (tessPa.HasWork()) 1089c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 109027cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley if (HasGeometryShaderT::value) 1091c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1092c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley GeometryShaderStage<HasStreamOutT, HasRastT>( 1093c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley pDC, workerId, tessPa, pGsOut, pCutBuffer, pCutStreamBuffer, pSoPrimData, 1094c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley _simd_set1_epi32(dsContext.PrimitiveID)); 1095c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 1096c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley else 1097c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 109827cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley if (HasStreamOutT::value) 1099c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1100c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley StreamOut(pDC, tessPa, workerId, pSoPrimData, 0); 1101c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 1102c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 110327cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley if (HasRastT::value) 1104c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1105c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley simdvector prim[3]; // Only deal with triangles, lines, or points 11062f86a9577adf5c43e892f899224d0f73ff1d37c2Tim Rowley AR_BEGIN(FEPAAssemble, pDC->drawId); 11072be7c3e780977678eb423e75cf063f92f7d03916Tim Rowley#if SWR_ENABLE_ASSERTS 11082be7c3e780977678eb423e75cf063f92f7d03916Tim Rowley bool assemble = 11092be7c3e780977678eb423e75cf063f92f7d03916Tim Rowley#endif 11102be7c3e780977678eb423e75cf063f92f7d03916Tim Rowley tessPa.Assemble(VERTEX_POSITION_SLOT, prim); 11112f86a9577adf5c43e892f899224d0f73ff1d37c2Tim Rowley AR_END(FEPAAssemble, 1); 1112c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_ASSERT(assemble); 1113c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1114c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_ASSERT(pfnClipFunc); 1115c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley pfnClipFunc(pDC, tessPa, workerId, prim, 111692621ac5d526e73469c43d524068315a81bbc869Tim Rowley GenMask(tessPa.NumPrims()), _simd_set1_epi32(dsContext.PrimitiveID), _simd_set1_epi32(0)); 1117c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 1118c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 1119c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1120c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley tessPa.NextPrim(); 1121c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1122c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } // while (tessPa.HasWork()) 1123c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } // for (uint32_t p = 0; p < numPrims; ++p) 1124c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1125c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley TSDestroyCtx(tsCtx); 1126c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley} 1127c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1128c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley////////////////////////////////////////////////////////////////////////// 1129c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @brief FE handler for SwrDraw. 1130c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @tparam IsIndexedT - Is indexed drawing enabled 1131c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @tparam HasTessellationT - Is tessellation enabled 113227cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley/// @tparam HasGeometryShaderT::value - Is the geometry shader stage enabled 1133c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @tparam HasStreamOutT - Is stream-out enabled 1134c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @tparam HasRastT - Is rasterization enabled 1135c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param pContext - pointer to SWR context. 1136c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param pDC - pointer to draw context. 1137c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param workerId - thread's worker id. 1138c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley/// @param pUserData - Pointer to DRAW_WORK 1139c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowleytemplate < 114027cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley typename IsIndexedT, 1141c8835a592471a0238e296f6529b5dadb431cc622Bruce Cherniak typename IsCutIndexEnabledT, 114227cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley typename HasTessellationT, 114327cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley typename HasGeometryShaderT, 114427cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley typename HasStreamOutT, 114527cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley typename HasRastT> 1146c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowleyvoid ProcessDraw( 1147c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_CONTEXT *pContext, 1148c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley DRAW_CONTEXT *pDC, 1149c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t workerId, 1150c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley void *pUserData) 1151c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley{ 1152c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1153c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley#if KNOB_ENABLE_TOSS_POINTS 1154c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if (KNOB_TOSS_QUEUE_FE) 1155c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1156c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley return; 1157c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 1158c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley#endif 1159c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 11602f86a9577adf5c43e892f899224d0f73ff1d37c2Tim Rowley AR_BEGIN(FEProcessDraw, pDC->drawId); 1161c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1162c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley DRAW_WORK& work = *(DRAW_WORK*)pUserData; 1163c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley const API_STATE& state = GetApiState(pDC); 1164c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley __m256i vScale = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0); 1165c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_VS_CONTEXT vsContext; 1166c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley simdvertex vin; 1167c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1168c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley int indexSize = 0; 1169c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t endVertex = work.numVerts; 1170c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1171c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley const int32_t* pLastRequestedIndex = nullptr; 117227cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley if (IsIndexedT::value) 1173c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1174c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley switch (work.type) 1175c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1176c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case R32_UINT: 1177c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley indexSize = sizeof(uint32_t); 1178c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley pLastRequestedIndex = &(work.pIB[endVertex]); 1179c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 1180c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case R16_UINT: 1181c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley indexSize = sizeof(uint16_t); 1182c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // nasty address offset to last index 1183c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley pLastRequestedIndex = (int32_t*)(&(((uint16_t*)work.pIB)[endVertex])); 1184c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 1185c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley case R8_UINT: 1186c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley indexSize = sizeof(uint8_t); 1187c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // nasty address offset to last index 1188c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley pLastRequestedIndex = (int32_t*)(&(((uint8_t*)work.pIB)[endVertex])); 1189c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley break; 1190c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley default: 1191c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_ASSERT(0); 1192c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 1193c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 1194c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley else 1195c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1196c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // No cuts, prune partial primitives. 1197c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley endVertex = GetNumVerts(state.topology, GetNumPrims(state.topology, work.numVerts)); 1198c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 1199c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1200c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_FETCH_CONTEXT fetchInfo = { 0 }; 1201c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley fetchInfo.pStreams = &state.vertexBuffers[0]; 1202c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley fetchInfo.StartInstance = work.startInstance; 1203c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley fetchInfo.StartVertex = 0; 1204c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1205c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley vsContext.pVin = &vin; 1206c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 120727cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley if (IsIndexedT::value) 1208c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1209c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley fetchInfo.BaseVertex = work.baseVertex; 1210c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1211c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // if the entire index buffer isn't being consumed, set the last index 1212c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // so that fetches < a SIMD wide will be masked off 1213542d7dec7b8748b164150bd0818e880ed31918e3Tim Rowley fetchInfo.pLastIndex = (const int32_t*)(((uint8_t*)state.indexBuffer.pIndices) + state.indexBuffer.size); 1214c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if (pLastRequestedIndex < fetchInfo.pLastIndex) 1215c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1216c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley fetchInfo.pLastIndex = pLastRequestedIndex; 1217c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 1218c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 1219c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley else 1220c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1221c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley fetchInfo.StartVertex = work.startVertex; 1222c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 1223c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 12244198520a82463aa392762ec156231b95bf2685acTim Rowley#if defined(KNOB_ENABLE_RDTSC) || defined(KNOB_ENABLE_AR) 1225c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t numPrims = GetNumPrims(state.topology, work.numVerts); 1226c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley#endif 1227c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1228c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley void* pGsOut = nullptr; 1229c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley void* pCutBuffer = nullptr; 1230c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley void* pStreamCutBuffer = nullptr; 123127cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley if (HasGeometryShaderT::value) 1232c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1233c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley AllocateGsBuffers(pDC, state, &pGsOut, &pCutBuffer, &pStreamCutBuffer); 1234c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 1235c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 123627cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley if (HasTessellationT::value) 1237c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1238c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_ASSERT(state.tsState.tsEnable == true); 1239c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_ASSERT(state.pfnHsFunc != nullptr); 1240c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_ASSERT(state.pfnDsFunc != nullptr); 1241c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1242c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley AllocateTessellationData(pContext); 1243c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 1244c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley else 1245c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1246c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_ASSERT(state.tsState.tsEnable == false); 1247c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_ASSERT(state.pfnHsFunc == nullptr); 1248c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_ASSERT(state.pfnDsFunc == nullptr); 1249c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 1250c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1251c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // allocate space for streamout input prim data 1252c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t* pSoPrimData = nullptr; 125327cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley if (HasStreamOutT::value) 1254c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1255c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley pSoPrimData = (uint32_t*)pDC->pArena->AllocAligned(4096, 16); 1256c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 1257c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1258c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // choose primitive assembler 1259c8835a592471a0238e296f6529b5dadb431cc622Bruce Cherniak PA_FACTORY<IsIndexedT, IsCutIndexEnabledT> paFactory(pDC, state.topology, work.numVerts); 1260c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley PA_STATE& pa = paFactory.GetPA(); 1261c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1262c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley /// @todo: temporarily move instance loop in the FE to ensure SO ordering 1263c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley for (uint32_t instanceNum = 0; instanceNum < work.numInstances; instanceNum++) 1264c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1265c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley simdscalari vIndex; 1266c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley uint32_t i = 0; 1267c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 126827cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley if (IsIndexedT::value) 1269c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1270c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley fetchInfo.pIndices = work.pIB; 1271c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 1272c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley else 1273c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1274c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley vIndex = _simd_add_epi32(_simd_set1_epi32(work.startVertexID), vScale); 1275c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley fetchInfo.pIndices = (const int32_t*)&vIndex; 1276c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 1277c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1278c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley fetchInfo.CurInstance = instanceNum; 1279c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley vsContext.InstanceID = instanceNum; 1280c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1281c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley while (pa.HasWork()) 1282c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1283c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // PaGetNextVsOutput currently has the side effect of updating some PA state machine state. 1284c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // So we need to keep this outside of (i < endVertex) check. 1285c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley simdmask* pvCutIndices = nullptr; 128627cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley if (IsIndexedT::value) 1287c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1288c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley pvCutIndices = &pa.GetNextVsIndices(); 1289c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 1290c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1291c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley simdvertex& vout = pa.GetNextVsOutput(); 1292c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley vsContext.pVout = &vout; 1293c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1294c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if (i < endVertex) 1295c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1296c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1297c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // 1. Execute FS/VS for a single SIMD. 12982f86a9577adf5c43e892f899224d0f73ff1d37c2Tim Rowley AR_BEGIN(FEFetchShader, pDC->drawId); 1299c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley state.pfnFetchFunc(fetchInfo, vin); 13002f86a9577adf5c43e892f899224d0f73ff1d37c2Tim Rowley AR_END(FEFetchShader, 0); 1301c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1302c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // forward fetch generated vertex IDs to the vertex shader 1303c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley vsContext.VertexID = fetchInfo.VertexID; 1304c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1305c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // Setup active mask for vertex shader. 1306c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley vsContext.mask = GenerateMask(endVertex - i); 1307c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1308c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // forward cut mask to the PA 130927cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley if (IsIndexedT::value) 1310c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1311c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley *pvCutIndices = _simd_movemask_ps(_simd_castsi_ps(fetchInfo.CutMask)); 1312c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 1313c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 13144e8763cb0904c30d1962cf5ad52fe3a87be7b4bdTim Rowley UPDATE_STAT_FE(IaVertices, GetNumInvocations(i, endVertex)); 1315c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1316c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley#if KNOB_ENABLE_TOSS_POINTS 1317c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if (!KNOB_TOSS_FETCH) 1318c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley#endif 1319c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 13202f86a9577adf5c43e892f899224d0f73ff1d37c2Tim Rowley AR_BEGIN(FEVertexShader, pDC->drawId); 1321c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley state.pfnVertexFunc(GetPrivateState(pDC), &vsContext); 13222f86a9577adf5c43e892f899224d0f73ff1d37c2Tim Rowley AR_END(FEVertexShader, 0); 1323c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 13244e8763cb0904c30d1962cf5ad52fe3a87be7b4bdTim Rowley UPDATE_STAT_FE(VsInvocations, GetNumInvocations(i, endVertex)); 1325c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 1326c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 1327c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1328c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // 2. Assemble primitives given the last two SIMD. 1329c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley do 1330c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1331c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley simdvector prim[MAX_NUM_VERTS_PER_PRIM]; 1332c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // PaAssemble returns false if there is not enough verts to assemble. 13332f86a9577adf5c43e892f899224d0f73ff1d37c2Tim Rowley AR_BEGIN(FEPAAssemble, pDC->drawId); 1334c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley bool assemble = pa.Assemble(VERTEX_POSITION_SLOT, prim); 13352f86a9577adf5c43e892f899224d0f73ff1d37c2Tim Rowley AR_END(FEPAAssemble, 1); 1336c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1337c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley#if KNOB_ENABLE_TOSS_POINTS 1338c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if (!KNOB_TOSS_FETCH) 1339c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley#endif 1340c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1341c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley#if KNOB_ENABLE_TOSS_POINTS 1342c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if (!KNOB_TOSS_VS) 1343c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley#endif 1344c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1345c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley if (assemble) 1346c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 13474e8763cb0904c30d1962cf5ad52fe3a87be7b4bdTim Rowley UPDATE_STAT_FE(IaPrimitives, pa.NumPrims()); 1348c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 134927cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley if (HasTessellationT::value) 1350c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1351c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley TessellationStages<HasGeometryShaderT, HasStreamOutT, HasRastT>( 1352c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley pDC, workerId, pa, pGsOut, pCutBuffer, pStreamCutBuffer, pSoPrimData, pa.GetPrimID(work.startPrimID)); 1353c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 135427cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley else if (HasGeometryShaderT::value) 1355c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1356c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley GeometryShaderStage<HasStreamOutT, HasRastT>( 1357c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley pDC, workerId, pa, pGsOut, pCutBuffer, pStreamCutBuffer, pSoPrimData, pa.GetPrimID(work.startPrimID)); 1358c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 1359c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley else 1360c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1361c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley // If streamout is enabled then stream vertices out to memory. 136227cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley if (HasStreamOutT::value) 1363c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1364c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley StreamOut(pDC, pa, workerId, pSoPrimData, 0); 1365c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 1366c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 136727cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley if (HasRastT::value) 1368c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1369c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley SWR_ASSERT(pDC->pState->pfnProcessPrims); 1370c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley pDC->pState->pfnProcessPrims(pDC, pa, workerId, prim, 137192621ac5d526e73469c43d524068315a81bbc869Tim Rowley GenMask(pa.NumPrims()), pa.GetPrimID(work.startPrimID), _simd_set1_epi32(0)); 1372c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 1373c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 1374c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 1375c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 1376c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 1377c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } while (pa.NextPrim()); 1378c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 1379c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley i += KNOB_SIMD_WIDTH; 138027cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley if (IsIndexedT::value) 1381c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1382542d7dec7b8748b164150bd0818e880ed31918e3Tim Rowley fetchInfo.pIndices = (int*)((uint8_t*)fetchInfo.pIndices + KNOB_SIMD_WIDTH * indexSize); 1383c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 1384c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley else 1385c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley { 1386c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley vIndex = _simd_add_epi32(vIndex, _simd_set1_epi32(KNOB_SIMD_WIDTH)); 1387c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 1388c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 1389c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley pa.Reset(); 1390c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley } 1391c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley 13925912552947cc84e794543589388d18aaf4ff6c86Tim Rowley 13932f86a9577adf5c43e892f899224d0f73ff1d37c2Tim Rowley AR_END(FEProcessDraw, numPrims * work.numInstances); 1394c6e67f5a9373e916a8d2333585cb5787aa5f7bb7Tim Rowley} 139527cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley 139627cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowleystruct FEDrawChooser 139727cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley{ 139827cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley typedef PFN_FE_WORK_FUNC FuncType; 139927cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley 140027cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley template <typename... ArgsB> 140127cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley static FuncType GetFunc() 140227cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley { 140327cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley return ProcessDraw<ArgsB...>; 140427cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley } 140527cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley}; 140627cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley 140727cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley 140827cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley// Selector for correct templated Draw front-end function 140927cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim RowleyPFN_FE_WORK_FUNC GetProcessDrawFunc( 141027cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley bool IsIndexed, 1411c8835a592471a0238e296f6529b5dadb431cc622Bruce Cherniak bool IsCutIndexEnabled, 141227cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley bool HasTessellation, 141327cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley bool HasGeometryShader, 141427cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley bool HasStreamOut, 141527cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley bool HasRasterization) 141627cc5924ea95d5a00ddb9d5c6ffb8853c92b1f4eTim Rowley{ 1417c8835a592471a0238e296f6529b5dadb431cc622Bruce Cherniak return TemplateArgUnroller<FEDrawChooser>::GetFunc(IsIndexed, IsCutIndexEnabled, HasTessellation, HasGeometryShader, HasStreamOut, HasRasterization); 1418ada27b503eab3c53d9ec1bca2cef48c5353e81f9Tim Rowley}