1/* 2 * Copyright 2011 Christoph Bumiller 3 * Copyright 2015 Samuel Pitoiset 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 */ 23 24#define NV50_PUSH_EXPLICIT_SPACE_CHECKING 25 26#include "nv50/nv50_context.h" 27#include "nv50/nv50_query_hw.h" 28#include "nv50/nv50_query_hw_metric.h" 29#include "nv50/nv50_query_hw_sm.h" 30#include "nv_object.xml.h" 31 32#define NV50_HW_QUERY_STATE_READY 0 33#define NV50_HW_QUERY_STATE_ACTIVE 1 34#define NV50_HW_QUERY_STATE_ENDED 2 35#define NV50_HW_QUERY_STATE_FLUSHED 3 36 37/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts 38 * (since we use only a single GPU channel per screen) will not work properly. 39 * 40 * The first is not that big of an issue because OpenGL does not allow nested 41 * queries anyway. 42 */ 43 44#define NV50_HW_QUERY_ALLOC_SPACE 256 45 46bool 47nv50_hw_query_allocate(struct nv50_context *nv50, struct nv50_query *q, 48 int size) 49{ 50 struct nv50_screen *screen = nv50->screen; 51 struct nv50_hw_query *hq = nv50_hw_query(q); 52 int ret; 53 54 if (hq->bo) { 55 nouveau_bo_ref(NULL, &hq->bo); 56 if (hq->mm) { 57 if (hq->state == NV50_HW_QUERY_STATE_READY) 58 nouveau_mm_free(hq->mm); 59 else 60 nouveau_fence_work(screen->base.fence.current, 61 nouveau_mm_free_work, hq->mm); 62 } 63 } 64 if (size) { 65 hq->mm = nouveau_mm_allocate(screen->base.mm_GART, size, 66 &hq->bo, &hq->base_offset); 67 if (!hq->bo) 68 return false; 69 hq->offset = hq->base_offset; 70 71 ret = nouveau_bo_map(hq->bo, 0, screen->base.client); 72 if (ret) { 73 nv50_hw_query_allocate(nv50, q, 0); 74 return false; 75 } 76 hq->data = (uint32_t *)((uint8_t *)hq->bo->map + hq->base_offset); 77 } 78 return true; 79} 80 81static void 82nv50_hw_query_get(struct nouveau_pushbuf *push, struct nv50_query *q, 83 unsigned offset, uint32_t get) 84{ 85 struct nv50_hw_query *hq = nv50_hw_query(q); 86 87 offset += hq->offset; 88 89 PUSH_SPACE(push, 5); 90 PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR); 91 BEGIN_NV04(push, NV50_3D(QUERY_ADDRESS_HIGH), 4); 92 PUSH_DATAh(push, hq->bo->offset + offset); 93 PUSH_DATA (push, hq->bo->offset + offset); 94 PUSH_DATA (push, hq->sequence); 95 PUSH_DATA (push, get); 96} 97 98static inline void 99nv50_hw_query_update(struct nv50_query *q) 100{ 101 struct nv50_hw_query *hq = nv50_hw_query(q); 102 103 if (hq->is64bit) { 104 if (nouveau_fence_signalled(hq->fence)) 105 hq->state = NV50_HW_QUERY_STATE_READY; 106 } else { 107 if (hq->data[0] == hq->sequence) 108 hq->state = NV50_HW_QUERY_STATE_READY; 109 } 110} 111 112static void 113nv50_hw_destroy_query(struct nv50_context *nv50, struct nv50_query *q) 114{ 115 struct nv50_hw_query *hq = nv50_hw_query(q); 116 117 if (hq->funcs && hq->funcs->destroy_query) { 118 hq->funcs->destroy_query(nv50, hq); 119 return; 120 } 121 122 nv50_hw_query_allocate(nv50, q, 0); 123 nouveau_fence_ref(NULL, &hq->fence); 124 FREE(hq); 125} 126 127static boolean 128nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q) 129{ 130 struct nouveau_pushbuf *push = nv50->base.pushbuf; 131 struct nv50_hw_query *hq = nv50_hw_query(q); 132 133 if (hq->funcs && hq->funcs->begin_query) 134 return hq->funcs->begin_query(nv50, hq); 135 136 /* For occlusion queries we have to change the storage, because a previous 137 * query might set the initial render condition to false even *after* we re- 138 * initialized it to true. 139 */ 140 if (hq->rotate) { 141 hq->offset += hq->rotate; 142 hq->data += hq->rotate / sizeof(*hq->data); 143 if (hq->offset - hq->base_offset == NV50_HW_QUERY_ALLOC_SPACE) 144 nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE); 145 146 /* XXX: can we do this with the GPU, and sync with respect to a previous 147 * query ? 148 */ 149 hq->data[0] = hq->sequence; /* initialize sequence */ 150 hq->data[1] = 1; /* initial render condition = true */ 151 hq->data[4] = hq->sequence + 1; /* for comparison COND_MODE */ 152 hq->data[5] = 0; 153 } 154 if (!hq->is64bit) 155 hq->data[0] = hq->sequence++; /* the previously used one */ 156 157 switch (q->type) { 158 case PIPE_QUERY_OCCLUSION_COUNTER: 159 case PIPE_QUERY_OCCLUSION_PREDICATE: 160 hq->nesting = nv50->screen->num_occlusion_queries_active++; 161 if (hq->nesting) { 162 nv50_hw_query_get(push, q, 0x10, 0x0100f002); 163 } else { 164 PUSH_SPACE(push, 4); 165 BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1); 166 PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT); 167 BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); 168 PUSH_DATA (push, 1); 169 } 170 break; 171 case PIPE_QUERY_PRIMITIVES_GENERATED: 172 nv50_hw_query_get(push, q, 0x10, 0x06805002); 173 break; 174 case PIPE_QUERY_PRIMITIVES_EMITTED: 175 nv50_hw_query_get(push, q, 0x10, 0x05805002); 176 break; 177 case PIPE_QUERY_SO_STATISTICS: 178 nv50_hw_query_get(push, q, 0x20, 0x05805002); 179 nv50_hw_query_get(push, q, 0x30, 0x06805002); 180 break; 181 case PIPE_QUERY_PIPELINE_STATISTICS: 182 nv50_hw_query_get(push, q, 0x80, 0x00801002); /* VFETCH, VERTICES */ 183 nv50_hw_query_get(push, q, 0x90, 0x01801002); /* VFETCH, PRIMS */ 184 nv50_hw_query_get(push, q, 0xa0, 0x02802002); /* VP, LAUNCHES */ 185 nv50_hw_query_get(push, q, 0xb0, 0x03806002); /* GP, LAUNCHES */ 186 nv50_hw_query_get(push, q, 0xc0, 0x04806002); /* GP, PRIMS_OUT */ 187 nv50_hw_query_get(push, q, 0xd0, 0x07804002); /* RAST, PRIMS_IN */ 188 nv50_hw_query_get(push, q, 0xe0, 0x08804002); /* RAST, PRIMS_OUT */ 189 nv50_hw_query_get(push, q, 0xf0, 0x0980a002); /* ROP, PIXELS */ 190 break; 191 case PIPE_QUERY_TIME_ELAPSED: 192 nv50_hw_query_get(push, q, 0x10, 0x00005002); 193 break; 194 default: 195 assert(0); 196 return false; 197 } 198 hq->state = NV50_HW_QUERY_STATE_ACTIVE; 199 return true; 200} 201 202static void 203nv50_hw_end_query(struct nv50_context *nv50, struct nv50_query *q) 204{ 205 struct nouveau_pushbuf *push = nv50->base.pushbuf; 206 struct nv50_hw_query *hq = nv50_hw_query(q); 207 208 if (hq->funcs && hq->funcs->end_query) { 209 hq->funcs->end_query(nv50, hq); 210 return; 211 } 212 213 hq->state = NV50_HW_QUERY_STATE_ENDED; 214 215 switch (q->type) { 216 case PIPE_QUERY_OCCLUSION_COUNTER: 217 case PIPE_QUERY_OCCLUSION_PREDICATE: 218 nv50_hw_query_get(push, q, 0, 0x0100f002); 219 if (--nv50->screen->num_occlusion_queries_active == 0) { 220 PUSH_SPACE(push, 2); 221 BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); 222 PUSH_DATA (push, 0); 223 } 224 break; 225 case PIPE_QUERY_PRIMITIVES_GENERATED: 226 nv50_hw_query_get(push, q, 0, 0x06805002); 227 break; 228 case PIPE_QUERY_PRIMITIVES_EMITTED: 229 nv50_hw_query_get(push, q, 0, 0x05805002); 230 break; 231 case PIPE_QUERY_SO_STATISTICS: 232 nv50_hw_query_get(push, q, 0x00, 0x05805002); 233 nv50_hw_query_get(push, q, 0x10, 0x06805002); 234 break; 235 case PIPE_QUERY_PIPELINE_STATISTICS: 236 nv50_hw_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */ 237 nv50_hw_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */ 238 nv50_hw_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */ 239 nv50_hw_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */ 240 nv50_hw_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */ 241 nv50_hw_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */ 242 nv50_hw_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */ 243 nv50_hw_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */ 244 break; 245 case PIPE_QUERY_TIMESTAMP: 246 hq->sequence++; 247 /* fall through */ 248 case PIPE_QUERY_TIME_ELAPSED: 249 nv50_hw_query_get(push, q, 0, 0x00005002); 250 break; 251 case PIPE_QUERY_GPU_FINISHED: 252 hq->sequence++; 253 nv50_hw_query_get(push, q, 0, 0x1000f010); 254 break; 255 case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: 256 hq->sequence++; 257 nv50_hw_query_get(push, q, 0, 0x0d005002 | (q->index << 5)); 258 break; 259 case PIPE_QUERY_TIMESTAMP_DISJOINT: 260 /* This query is not issued on GPU because disjoint is forced to false */ 261 hq->state = NV50_HW_QUERY_STATE_READY; 262 break; 263 default: 264 assert(0); 265 break; 266 } 267 if (hq->is64bit) 268 nouveau_fence_ref(nv50->screen->base.fence.current, &hq->fence); 269} 270 271static boolean 272nv50_hw_get_query_result(struct nv50_context *nv50, struct nv50_query *q, 273 boolean wait, union pipe_query_result *result) 274{ 275 struct nv50_hw_query *hq = nv50_hw_query(q); 276 uint64_t *res64 = (uint64_t *)result; 277 uint32_t *res32 = (uint32_t *)result; 278 uint8_t *res8 = (uint8_t *)result; 279 uint64_t *data64 = (uint64_t *)hq->data; 280 int i; 281 282 if (hq->funcs && hq->funcs->get_query_result) 283 return hq->funcs->get_query_result(nv50, hq, wait, result); 284 285 if (hq->state != NV50_HW_QUERY_STATE_READY) 286 nv50_hw_query_update(q); 287 288 if (hq->state != NV50_HW_QUERY_STATE_READY) { 289 if (!wait) { 290 /* for broken apps that spin on GL_QUERY_RESULT_AVAILABLE */ 291 if (hq->state != NV50_HW_QUERY_STATE_FLUSHED) { 292 hq->state = NV50_HW_QUERY_STATE_FLUSHED; 293 PUSH_KICK(nv50->base.pushbuf); 294 } 295 return false; 296 } 297 if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nv50->screen->base.client)) 298 return false; 299 } 300 hq->state = NV50_HW_QUERY_STATE_READY; 301 302 switch (q->type) { 303 case PIPE_QUERY_GPU_FINISHED: 304 res8[0] = true; 305 break; 306 case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */ 307 res64[0] = hq->data[1] - hq->data[5]; 308 break; 309 case PIPE_QUERY_OCCLUSION_PREDICATE: 310 res8[0] = hq->data[1] != hq->data[5]; 311 break; 312 case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */ 313 case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */ 314 res64[0] = data64[0] - data64[2]; 315 break; 316 case PIPE_QUERY_SO_STATISTICS: 317 res64[0] = data64[0] - data64[4]; 318 res64[1] = data64[2] - data64[6]; 319 break; 320 case PIPE_QUERY_PIPELINE_STATISTICS: 321 for (i = 0; i < 8; ++i) 322 res64[i] = data64[i * 2] - data64[16 + i * 2]; 323 break; 324 case PIPE_QUERY_TIMESTAMP: 325 res64[0] = data64[1]; 326 break; 327 case PIPE_QUERY_TIMESTAMP_DISJOINT: 328 res64[0] = 1000000000; 329 res8[8] = false; 330 break; 331 case PIPE_QUERY_TIME_ELAPSED: 332 res64[0] = data64[1] - data64[3]; 333 break; 334 case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: 335 res32[0] = hq->data[1]; 336 break; 337 default: 338 assert(0); 339 return false; 340 } 341 342 return true; 343} 344 345static const struct nv50_query_funcs hw_query_funcs = { 346 .destroy_query = nv50_hw_destroy_query, 347 .begin_query = nv50_hw_begin_query, 348 .end_query = nv50_hw_end_query, 349 .get_query_result = nv50_hw_get_query_result, 350}; 351 352struct nv50_query * 353nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned index) 354{ 355 struct nv50_hw_query *hq; 356 struct nv50_query *q; 357 358 hq = nv50_hw_sm_create_query(nv50, type); 359 if (hq) { 360 hq->base.funcs = &hw_query_funcs; 361 return (struct nv50_query *)hq; 362 } 363 364 hq = nv50_hw_metric_create_query(nv50, type); 365 if (hq) { 366 hq->base.funcs = &hw_query_funcs; 367 return (struct nv50_query *)hq; 368 } 369 370 hq = CALLOC_STRUCT(nv50_hw_query); 371 if (!hq) 372 return NULL; 373 374 q = &hq->base; 375 q->funcs = &hw_query_funcs; 376 q->type = type; 377 378 switch (q->type) { 379 case PIPE_QUERY_OCCLUSION_COUNTER: 380 case PIPE_QUERY_OCCLUSION_PREDICATE: 381 hq->rotate = 32; 382 break; 383 case PIPE_QUERY_PRIMITIVES_GENERATED: 384 case PIPE_QUERY_PRIMITIVES_EMITTED: 385 case PIPE_QUERY_SO_STATISTICS: 386 case PIPE_QUERY_PIPELINE_STATISTICS: 387 hq->is64bit = true; 388 break; 389 case PIPE_QUERY_TIME_ELAPSED: 390 case PIPE_QUERY_TIMESTAMP: 391 case PIPE_QUERY_TIMESTAMP_DISJOINT: 392 case PIPE_QUERY_GPU_FINISHED: 393 case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: 394 break; 395 default: 396 debug_printf("invalid query type: %u\n", type); 397 FREE(q); 398 return NULL; 399 } 400 401 if (!nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE)) { 402 FREE(hq); 403 return NULL; 404 } 405 406 if (hq->rotate) { 407 /* we advance before query_begin ! */ 408 hq->offset -= hq->rotate; 409 hq->data -= hq->rotate / sizeof(*hq->data); 410 } 411 412 return q; 413} 414 415int 416nv50_hw_get_driver_query_info(struct nv50_screen *screen, unsigned id, 417 struct pipe_driver_query_info *info) 418{ 419 int num_hw_sm_queries = 0, num_hw_metric_queries = 0; 420 421 num_hw_sm_queries = nv50_hw_sm_get_driver_query_info(screen, 0, NULL); 422 num_hw_metric_queries = 423 nv50_hw_metric_get_driver_query_info(screen, 0, NULL); 424 425 if (!info) 426 return num_hw_sm_queries + num_hw_metric_queries; 427 428 if (id < num_hw_sm_queries) 429 return nv50_hw_sm_get_driver_query_info(screen, id, info); 430 431 return nv50_hw_metric_get_driver_query_info(screen, 432 id - num_hw_sm_queries, info); 433} 434 435void 436nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method, 437 struct nv50_query *q, unsigned result_offset) 438{ 439 struct nv50_hw_query *hq = nv50_hw_query(q); 440 441 nv50_hw_query_update(q); 442 if (hq->state != NV50_HW_QUERY_STATE_READY) 443 nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, push->client); 444 hq->state = NV50_HW_QUERY_STATE_READY; 445 446 BEGIN_NV04(push, SUBC_3D(method), 1); 447 PUSH_DATA (push, hq->data[result_offset / 4]); 448} 449 450void 451nv84_hw_query_fifo_wait(struct nouveau_pushbuf *push, struct nv50_query *q) 452{ 453 struct nv50_hw_query *hq = nv50_hw_query(q); 454 unsigned offset = hq->offset; 455 456 PUSH_SPACE(push, 5); 457 PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); 458 BEGIN_NV04(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4); 459 PUSH_DATAh(push, hq->bo->offset + offset); 460 PUSH_DATA (push, hq->bo->offset + offset); 461 PUSH_DATA (push, hq->sequence); 462 PUSH_DATA (push, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL); 463} 464