1/*
2 * Copyright (C) 2011 Francisco Jerez.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial
15 * portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 */
26
27#include <fcntl.h>
28#include <stdio.h>
29#include <sys/stat.h>
30#include <inttypes.h>
31#include "pipe/p_state.h"
32#include "pipe/p_context.h"
33#include "pipe/p_screen.h"
34#include "pipe/p_defines.h"
35#include "pipe/p_shader_tokens.h"
36#include "util/u_memory.h"
37#include "util/u_inlines.h"
38#include "util/u_sampler.h"
39#include "util/u_format.h"
40#include "tgsi/tgsi_text.h"
41#include "pipe-loader/pipe_loader.h"
42
43#define MAX_RESOURCES 4
44
45struct context {
46        struct pipe_loader_device *dev;
47        struct pipe_screen *screen;
48        struct pipe_context *pipe;
49        void *hwcs;
50        void *hwsmp[MAX_RESOURCES];
51        struct pipe_resource *tex[MAX_RESOURCES];
52        bool tex_rw[MAX_RESOURCES];
53        struct pipe_sampler_view *view[MAX_RESOURCES];
54        struct pipe_surface *surf[MAX_RESOURCES];
55};
56
57#define DUMP_COMPUTE_PARAM(p, c) do {                                   \
58                uint64_t __v[4];                                        \
59                int __i, __n;                                           \
60                                                                        \
61                __n = ctx->screen->get_compute_param(ctx->screen, c, __v); \
62                printf("%s: {", #c);                                    \
63                                                                        \
64                for (__i = 0; __i < __n / sizeof(*__v); ++__i)          \
65                        printf(" %"PRIu64, __v[__i]);                   \
66                                                                        \
67                printf(" }\n");                                         \
68        } while (0)
69
70static void init_ctx(struct context *ctx)
71{
72        int ret;
73
74        ret = pipe_loader_probe(&ctx->dev, 1);
75        assert(ret);
76
77        ctx->screen = pipe_loader_create_screen(ctx->dev, PIPE_SEARCH_DIR);
78        assert(ctx->screen);
79
80        ctx->pipe = ctx->screen->context_create(ctx->screen, NULL);
81        assert(ctx->pipe);
82
83        DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_GRID_DIMENSION);
84        DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_MAX_GRID_SIZE);
85        DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
86}
87
88static void destroy_ctx(struct context *ctx)
89{
90        ctx->pipe->destroy(ctx->pipe);
91        ctx->screen->destroy(ctx->screen);
92        pipe_loader_release(&ctx->dev, 1);
93        FREE(ctx);
94}
95
96static char *
97preprocess_prog(struct context *ctx, const char *src, const char *defs)
98{
99        const char header[] =
100                "#define RGLOBAL        RES[32767]\n"
101                "#define RLOCAL         RES[32766]\n"
102                "#define RPRIVATE       RES[32765]\n"
103                "#define RINPUT         RES[32764]\n";
104        char cmd[512];
105        char tmp[] = "/tmp/test-compute.tgsi-XXXXXX";
106        char *buf;
107        int fd, ret;
108        struct stat st;
109        FILE *p;
110
111        /* Open a temporary file */
112        fd = mkstemp(tmp);
113        assert(fd >= 0);
114        snprintf(cmd, sizeof(cmd), "cpp -P -nostdinc -undef %s > %s",
115                 defs ? defs : "", tmp);
116
117        /* Preprocess */
118        p = popen(cmd, "w");
119        fwrite(header, strlen(header), 1, p);
120        fwrite(src, strlen(src), 1, p);
121        ret = pclose(p);
122        assert(!ret);
123
124        /* Read back */
125        ret = fstat(fd, &st);
126        assert(!ret);
127
128        buf = malloc(st.st_size + 1);
129        ret = read(fd, buf, st.st_size);
130        assert(ret == st.st_size);
131        buf[ret] = 0;
132
133        /* Clean up */
134        close(fd);
135        unlink(tmp);
136
137        return buf;
138}
139
140static void init_prog(struct context *ctx, unsigned local_sz,
141                      unsigned private_sz, unsigned input_sz,
142                      const char *src, const char *defs)
143{
144        struct pipe_context *pipe = ctx->pipe;
145        struct tgsi_token prog[1024];
146        struct pipe_compute_state cs = {
147                .prog = prog,
148                .req_local_mem = local_sz,
149                .req_private_mem = private_sz,
150                .req_input_mem = input_sz
151        };
152        char *psrc = preprocess_prog(ctx, src, defs);
153        int ret;
154
155        ret = tgsi_text_translate(psrc, prog, Elements(prog));
156        assert(ret);
157        free(psrc);
158
159        ctx->hwcs = pipe->create_compute_state(pipe, &cs);
160        assert(ctx->hwcs);
161
162        pipe->bind_compute_state(pipe, ctx->hwcs);
163}
164
165static void destroy_prog(struct context *ctx)
166{
167        struct pipe_context *pipe = ctx->pipe;
168
169        pipe->delete_compute_state(pipe, ctx->hwcs);
170        ctx->hwcs = NULL;
171}
172
173static void init_tex(struct context *ctx, int slot,
174                     enum pipe_texture_target target, bool rw,
175                     enum pipe_format format, int w, int h,
176                     void (*init)(void *, int, int, int))
177{
178        struct pipe_context *pipe = ctx->pipe;
179        struct pipe_resource **tex = &ctx->tex[slot];
180        struct pipe_resource ttex = {
181                .target = target,
182                .format = format,
183                .width0 = w,
184                .height0 = h,
185                .depth0 = 1,
186                .array_size = 1,
187                .bind = (PIPE_BIND_SAMPLER_VIEW |
188                         PIPE_BIND_COMPUTE_RESOURCE |
189                         PIPE_BIND_GLOBAL)
190        };
191        int dx = util_format_get_blocksize(format);
192        int dy = util_format_get_stride(format, w);
193        int nx = (target == PIPE_BUFFER ? (w / dx) :
194                  util_format_get_nblocksx(format, w));
195        int ny = (target == PIPE_BUFFER ? 1 :
196                  util_format_get_nblocksy(format, h));
197        struct pipe_transfer *xfer;
198        char *map;
199        int x, y;
200
201        *tex = ctx->screen->resource_create(ctx->screen, &ttex);
202        assert(*tex);
203
204        xfer = pipe->get_transfer(pipe, *tex, 0, PIPE_TRANSFER_WRITE,
205                                  &(struct pipe_box) { .width = w,
206                                                  .height = h,
207                                                  .depth = 1 });
208        assert(xfer);
209
210        map = pipe->transfer_map(pipe, xfer);
211        assert(map);
212
213        for (y = 0; y < ny; ++y) {
214                for (x = 0; x < nx; ++x) {
215                        init(map + y * dy + x * dx, slot, x, y);
216                }
217        }
218
219        pipe->transfer_unmap(pipe, xfer);
220        pipe->transfer_destroy(pipe, xfer);
221
222        ctx->tex_rw[slot] = rw;
223}
224
225static bool default_check(void *x, void *y, int sz) {
226        return !memcmp(x, y, sz);
227}
228
229static void check_tex(struct context *ctx, int slot,
230                      void (*expect)(void *, int, int, int),
231                      bool (*check)(void *, void *, int))
232{
233        struct pipe_context *pipe = ctx->pipe;
234        struct pipe_resource *tex = ctx->tex[slot];
235        int dx = util_format_get_blocksize(tex->format);
236        int dy = util_format_get_stride(tex->format, tex->width0);
237        int nx = (tex->target == PIPE_BUFFER ? (tex->width0 / dx) :
238                  util_format_get_nblocksx(tex->format, tex->width0));
239        int ny = (tex->target == PIPE_BUFFER ? 1 :
240                  util_format_get_nblocksy(tex->format, tex->height0));
241        struct pipe_transfer *xfer;
242        char *map;
243        int x, y, i;
244        int err = 0;
245
246        if (!check)
247                check = default_check;
248
249        xfer = pipe->get_transfer(pipe, tex, 0, PIPE_TRANSFER_READ,
250                                  &(struct pipe_box) { .width = tex->width0,
251                                        .height = tex->height0,
252                                        .depth = 1 });
253        assert(xfer);
254
255        map = pipe->transfer_map(pipe, xfer);
256        assert(map);
257
258        for (y = 0; y < ny; ++y) {
259                for (x = 0; x < nx; ++x) {
260                        uint32_t exp[4];
261                        uint32_t *res = (uint32_t *)(map + y * dy + x * dx);
262
263                        expect(exp, slot, x, y);
264                        if (check(res, exp, dx) || (++err) > 20)
265                                continue;
266
267                        if (dx < 4) {
268                                uint32_t u = 0, v = 0;
269
270                                for (i = 0; i < dx; i++) {
271                                        u |= ((uint8_t *)exp)[i] << (8 * i);
272                                        v |= ((uint8_t *)res)[i] << (8 * i);
273                                }
274                                printf("(%d, %d): got 0x%x, expected 0x%x\n",
275                                       x, y, v, u);
276                        } else {
277                                for (i = 0; i < dx / 4; i++) {
278                                        printf("(%d, %d)[%d]: got 0x%x/%f,"
279                                               " expected 0x%x/%f\n", x, y, i,
280                                               res[i], ((float *)res)[i],
281                                               exp[i], ((float *)exp)[i]);
282                                }
283                        }
284                }
285        }
286
287        pipe->transfer_unmap(pipe, xfer);
288        pipe->transfer_destroy(pipe, xfer);
289
290        if (err)
291                printf("(%d, %d): \x1b[31mFAIL\x1b[0m (%d)\n", x, y, err);
292        else
293                printf("(%d, %d): \x1b[32mOK\x1b[0m\n", x, y);
294}
295
296static void destroy_tex(struct context *ctx)
297{
298        int i;
299
300        for (i = 0; i < MAX_RESOURCES; ++i) {
301                if (ctx->tex[i])
302                        pipe_resource_reference(&ctx->tex[i], NULL);
303        }
304}
305
306static void init_sampler_views(struct context *ctx, const int *slots)
307{
308        struct pipe_context *pipe = ctx->pipe;
309        struct pipe_sampler_view tview;
310        int i;
311
312        for (i = 0; *slots >= 0; ++i, ++slots) {
313                u_sampler_view_default_template(&tview, ctx->tex[*slots],
314                                                ctx->tex[*slots]->format);
315
316                ctx->view[i] = pipe->create_sampler_view(pipe, ctx->tex[*slots],
317                                                         &tview);
318                assert(ctx->view[i]);
319        }
320
321        pipe->set_compute_sampler_views(pipe, 0, i, ctx->view);
322}
323
324static void destroy_sampler_views(struct context *ctx)
325{
326        struct pipe_context *pipe = ctx->pipe;
327        int i;
328
329        pipe->set_compute_sampler_views(pipe, 0, MAX_RESOURCES, NULL);
330
331        for (i = 0; i < MAX_RESOURCES; ++i) {
332                if (ctx->view[i]) {
333                        pipe->sampler_view_destroy(pipe, ctx->view[i]);
334                        ctx->view[i] = NULL;
335                }
336        }
337}
338
339static void init_compute_resources(struct context *ctx, const int *slots)
340{
341        struct pipe_context *pipe = ctx->pipe;
342        int i;
343
344        for (i = 0; *slots >= 0; ++i, ++slots) {
345                struct pipe_surface tsurf = {
346                        .format = ctx->tex[*slots]->format,
347                        .usage = ctx->tex[*slots]->bind,
348                        .writable = ctx->tex_rw[*slots]
349                };
350
351                if (ctx->tex[*slots]->target == PIPE_BUFFER)
352                        tsurf.u.buf.last_element = ctx->tex[*slots]->width0 - 1;
353
354                ctx->surf[i] = pipe->create_surface(pipe, ctx->tex[*slots],
355                                                    &tsurf);
356                assert(ctx->surf[i]);
357        }
358
359        pipe->set_compute_resources(pipe, 0, i, ctx->surf);
360}
361
362static void destroy_compute_resources(struct context *ctx)
363{
364        struct pipe_context *pipe = ctx->pipe;
365        int i;
366
367        pipe->set_compute_resources(pipe, 0, MAX_RESOURCES, NULL);
368
369        for (i = 0; i < MAX_RESOURCES; ++i) {
370                if (ctx->surf[i]) {
371                        pipe->surface_destroy(pipe, ctx->surf[i]);
372                        ctx->surf[i] = NULL;
373                }
374        }
375}
376
377static void init_sampler_states(struct context *ctx, int n)
378{
379        struct pipe_context *pipe = ctx->pipe;
380        struct pipe_sampler_state smp = {
381                .normalized_coords = 1,
382        };
383        int i;
384
385        for (i = 0; i < n; ++i) {
386                ctx->hwsmp[i] = pipe->create_sampler_state(pipe, &smp);
387                assert(ctx->hwsmp[i]);
388        }
389
390        pipe->bind_compute_sampler_states(pipe, 0, i, ctx->hwsmp);
391}
392
393static void destroy_sampler_states(struct context *ctx)
394{
395        struct pipe_context *pipe = ctx->pipe;
396        int i;
397
398        pipe->bind_compute_sampler_states(pipe, 0, MAX_RESOURCES, NULL);
399
400        for (i = 0; i < MAX_RESOURCES; ++i) {
401                if (ctx->hwsmp[i]) {
402                        pipe->delete_sampler_state(pipe, ctx->hwsmp[i]);
403                        ctx->hwsmp[i] = NULL;
404                }
405        }
406}
407
408static void init_globals(struct context *ctx, const int *slots,
409                         uint32_t **handles)
410{
411        struct pipe_context *pipe = ctx->pipe;
412        struct pipe_resource *res[MAX_RESOURCES];
413        int i;
414
415        for (i = 0; *slots >= 0; ++i, ++slots)
416                res[i] = ctx->tex[*slots];
417
418        pipe->set_global_binding(pipe, 0, i, res, handles);
419}
420
421static void destroy_globals(struct context *ctx)
422{
423        struct pipe_context *pipe = ctx->pipe;
424
425        pipe->set_global_binding(pipe, 0, MAX_RESOURCES, NULL, NULL);
426}
427
428static void launch_grid(struct context *ctx, const uint *block_layout,
429                        const uint *grid_layout, uint32_t pc,
430                        const void *input)
431{
432        struct pipe_context *pipe = ctx->pipe;
433
434        pipe->launch_grid(pipe, block_layout, grid_layout, pc, input);
435}
436
437static void test_system_values(struct context *ctx)
438{
439        const char *src = "COMP\n"
440                "DCL RES[0], BUFFER, RAW, WR\n"
441                "DCL SV[0], BLOCK_ID[0]\n"
442                "DCL SV[1], BLOCK_SIZE[0]\n"
443                "DCL SV[2], GRID_SIZE[0]\n"
444                "DCL SV[3], THREAD_ID[0]\n"
445                "DCL TEMP[0], LOCAL\n"
446                "DCL TEMP[1], LOCAL\n"
447                "IMM UINT32 { 64, 0, 0, 0 }\n"
448                "IMM UINT32 { 16, 0, 0, 0 }\n"
449                "IMM UINT32 { 0, 0, 0, 0 }\n"
450                "\n"
451                "BGNSUB"
452                "  UMUL TEMP[0], SV[0], SV[1]\n"
453                "  UADD TEMP[0], TEMP[0], SV[3]\n"
454                "  UMUL TEMP[1], SV[1], SV[2]\n"
455                "  UMUL TEMP[0].w, TEMP[0], TEMP[1].zzzz\n"
456                "  UMUL TEMP[0].zw, TEMP[0], TEMP[1].yyyy\n"
457                "  UMUL TEMP[0].yzw, TEMP[0], TEMP[1].xxxx\n"
458                "  UADD TEMP[0].xy, TEMP[0].xyxy, TEMP[0].zwzw\n"
459                "  UADD TEMP[0].x, TEMP[0].xxxx, TEMP[0].yyyy\n"
460                "  UMUL TEMP[0].x, TEMP[0], IMM[0]\n"
461                "  STORE RES[0].xyzw, TEMP[0], SV[0]\n"
462                "  UADD TEMP[0].x, TEMP[0], IMM[1]\n"
463                "  STORE RES[0].xyzw, TEMP[0], SV[1]\n"
464                "  UADD TEMP[0].x, TEMP[0], IMM[1]\n"
465                "  STORE RES[0].xyzw, TEMP[0], SV[2]\n"
466                "  UADD TEMP[0].x, TEMP[0], IMM[1]\n"
467                "  STORE RES[0].xyzw, TEMP[0], SV[3]\n"
468                "  RET\n"
469                "ENDSUB\n";
470        void init(void *p, int s, int x, int y) {
471                *(uint32_t *)p = 0xdeadbeef;
472        }
473        void expect(void *p, int s, int x, int y) {
474                int id = x / 16, sv = (x % 16) / 4, c = x % 4;
475                int tid[] = { id % 20, (id % 240) / 20, id / 240, 0 };
476                int bsz[] = { 4, 3, 5, 1};
477                int gsz[] = { 5, 4, 1, 1};
478
479                switch (sv) {
480                case 0:
481                        *(uint32_t *)p = tid[c] / bsz[c];
482                        break;
483                case 1:
484                        *(uint32_t *)p = bsz[c];
485                        break;
486                case 2:
487                        *(uint32_t *)p = gsz[c];
488                        break;
489                case 3:
490                        *(uint32_t *)p = tid[c] % bsz[c];
491                        break;
492                }
493        }
494
495        printf("- %s\n", __func__);
496
497        init_prog(ctx, 0, 0, 0, src, NULL);
498        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
499                 76800, 0, init);
500        init_compute_resources(ctx, (int []) { 0, -1 });
501        launch_grid(ctx, (uint []){4, 3, 5}, (uint []){5, 4, 1}, 0, NULL);
502        check_tex(ctx, 0, expect, NULL);
503        destroy_compute_resources(ctx);
504        destroy_tex(ctx);
505        destroy_prog(ctx);
506}
507
508static void test_resource_access(struct context *ctx)
509{
510        const char *src = "COMP\n"
511                "DCL RES[0], BUFFER, RAW, WR\n"
512                "DCL RES[1], 2D, RAW, WR\n"
513                "DCL SV[0], BLOCK_ID[0]\n"
514                "DCL TEMP[0], LOCAL\n"
515                "DCL TEMP[1], LOCAL\n"
516                "IMM UINT32 { 15, 0, 0, 0 }\n"
517                "IMM UINT32 { 16, 1, 0, 0 }\n"
518                "\n"
519                "    BGNSUB\n"
520                "       UADD TEMP[0].x, SV[0].xxxx, SV[0].yyyy\n"
521                "       AND TEMP[0].x, TEMP[0], IMM[0]\n"
522                "       UMUL TEMP[0].x, TEMP[0], IMM[1]\n"
523                "       LOAD TEMP[0].xyzw, RES[0], TEMP[0]\n"
524                "       UMUL TEMP[1], SV[0], IMM[1]\n"
525                "       STORE RES[1].xyzw, TEMP[1], TEMP[0]\n"
526                "       RET\n"
527                "    ENDSUB\n";
528        void init0(void *p, int s, int x, int y) {
529                *(float *)p = 8.0 - (float)x;
530        }
531        void init1(void *p, int s, int x, int y) {
532                *(uint32_t *)p = 0xdeadbeef;
533        }
534        void expect(void *p, int s, int x, int y) {
535                *(float *)p = 8.0 - (float)((x + 4*y) & 0x3f);
536        }
537
538        printf("- %s\n", __func__);
539
540        init_prog(ctx, 0, 0, 0, src, NULL);
541        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
542                 256, 0, init0);
543        init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
544                 60, 12, init1);
545        init_compute_resources(ctx, (int []) { 0, 1, -1 });
546        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){15, 12, 1}, 0, NULL);
547        check_tex(ctx, 1, expect, NULL);
548        destroy_compute_resources(ctx);
549        destroy_tex(ctx);
550        destroy_prog(ctx);
551}
552
553static void test_function_calls(struct context *ctx)
554{
555        const char *src = "COMP\n"
556                "DCL RES[0], 2D, RAW, WR\n"
557                "DCL SV[0], BLOCK_ID[0]\n"
558                "DCL SV[1], BLOCK_SIZE[0]\n"
559                "DCL SV[2], GRID_SIZE[0]\n"
560                "DCL SV[3], THREAD_ID[0]\n"
561                "DCL TEMP[0]\n"
562                "DCL TEMP[1]\n"
563                "DCL TEMP[2], LOCAL\n"
564                "IMM UINT32 { 0, 11, 22, 33 }\n"
565                "IMM FLT32 { 11, 33, 55, 99 }\n"
566                "IMM UINT32 { 4, 1, 0, 0 }\n"
567                "IMM UINT32 { 12, 0, 0, 0 }\n"
568                "\n"
569                "00: BGNSUB\n"
570                "01:  UMUL TEMP[0].x, TEMP[0], TEMP[0]\n"
571                "02:  UADD TEMP[1].x, TEMP[1], IMM[2].yyyy\n"
572                "03:  USLT TEMP[0].x, TEMP[0], IMM[0]\n"
573                "04:  RET\n"
574                "05: ENDSUB\n"
575                "06: BGNSUB\n"
576                "07:  UMUL TEMP[0].x, TEMP[0], TEMP[0]\n"
577                "08:  UADD TEMP[1].x, TEMP[1], IMM[2].yyyy\n"
578                "09:  USLT TEMP[0].x, TEMP[0], IMM[0].yyyy\n"
579                "10:  IF TEMP[0].xxxx\n"
580                "11:   CAL :0\n"
581                "12:  ENDIF\n"
582                "13:  RET\n"
583                "14: ENDSUB\n"
584                "15: BGNSUB\n"
585                "16:  UMUL TEMP[2], SV[0], SV[1]\n"
586                "17:  UADD TEMP[2], TEMP[2], SV[3]\n"
587                "18:  UMUL TEMP[2], TEMP[2], IMM[2]\n"
588                "00:  MOV TEMP[1].x, IMM[2].wwww\n"
589                "19:  LOAD TEMP[0].x, RES[0].xxxx, TEMP[2]\n"
590                "20:  CAL :6\n"
591                "21:  STORE RES[0].x, TEMP[2], TEMP[1].xxxx\n"
592                "22:  RET\n"
593                "23: ENDSUB\n";
594        void init(void *p, int s, int x, int y) {
595                *(uint32_t *)p = 15 * y + x;
596        }
597        void expect(void *p, int s, int x, int y) {
598                *(uint32_t *)p = (15 * y + x) < 4 ? 2 : 1 ;
599        }
600
601        printf("- %s\n", __func__);
602
603        init_prog(ctx, 0, 0, 0, src, NULL);
604        init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
605                 15, 12, init);
606        init_compute_resources(ctx, (int []) { 0, -1 });
607        launch_grid(ctx, (uint []){3, 3, 3}, (uint []){5, 4, 1}, 15, NULL);
608        check_tex(ctx, 0, expect, NULL);
609        destroy_compute_resources(ctx);
610        destroy_tex(ctx);
611        destroy_prog(ctx);
612}
613
614static void test_input_global(struct context *ctx)
615{
616        const char *src = "COMP\n"
617                "DCL SV[0], THREAD_ID[0]\n"
618                "DCL TEMP[0], LOCAL\n"
619                "DCL TEMP[1], LOCAL\n"
620                "IMM UINT32 { 8, 0, 0, 0 }\n"
621                "\n"
622                "    BGNSUB\n"
623                "       UMUL TEMP[0], SV[0], IMM[0]\n"
624                "       LOAD TEMP[1].xy, RINPUT, TEMP[0]\n"
625                "       LOAD TEMP[0].x, RGLOBAL, TEMP[1].yyyy\n"
626                "       UADD TEMP[1].x, TEMP[0], -TEMP[1]\n"
627                "       STORE RGLOBAL.x, TEMP[1].yyyy, TEMP[1]\n"
628                "       RET\n"
629                "    ENDSUB\n";
630        void init(void *p, int s, int x, int y) {
631                *(uint32_t *)p = 0xdeadbeef;
632        }
633        void expect(void *p, int s, int x, int y) {
634                *(uint32_t *)p = 0xdeadbeef - (x == 0 ? 0x10001 + 2 * s : 0);
635        }
636        uint32_t input[8] = { 0x10001, 0x10002, 0x10003, 0x10004,
637                              0x10005, 0x10006, 0x10007, 0x10008 };
638
639        printf("- %s\n", __func__);
640
641        init_prog(ctx, 0, 0, 32, src, NULL);
642        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, init);
643        init_tex(ctx, 1, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, init);
644        init_tex(ctx, 2, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, init);
645        init_tex(ctx, 3, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, init);
646        init_globals(ctx, (int []){ 0, 1, 2, 3, -1 },
647                     (uint32_t *[]){ &input[1], &input[3],
648                                     &input[5], &input[7] });
649        launch_grid(ctx, (uint []){4, 1, 1}, (uint []){1, 1, 1}, 0, input);
650        check_tex(ctx, 0, expect, NULL);
651        check_tex(ctx, 1, expect, NULL);
652        check_tex(ctx, 2, expect, NULL);
653        check_tex(ctx, 3, expect, NULL);
654        destroy_globals(ctx);
655        destroy_tex(ctx);
656        destroy_prog(ctx);
657}
658
659static void test_private(struct context *ctx)
660{
661        const char *src = "COMP\n"
662                "DCL RES[0], BUFFER, RAW, WR\n"
663                "DCL SV[0], BLOCK_ID[0]\n"
664                "DCL SV[1], BLOCK_SIZE[0]\n"
665                "DCL SV[2], THREAD_ID[0]\n"
666                "DCL TEMP[0], LOCAL\n"
667                "DCL TEMP[1], LOCAL\n"
668                "DCL TEMP[2], LOCAL\n"
669                "IMM UINT32 { 128, 0, 0, 0 }\n"
670                "IMM UINT32 { 4, 0, 0, 0 }\n"
671                "\n"
672                "    BGNSUB\n"
673                "       UMUL TEMP[0].x, SV[0], SV[1]\n"
674                "       UADD TEMP[0].x, TEMP[0], SV[2]\n"
675                "       MOV TEMP[1].x, IMM[0].wwww\n"
676                "       BGNLOOP\n"
677                "               USEQ TEMP[2].x, TEMP[1], IMM[0]\n"
678                "               IF TEMP[2]\n"
679                "                       BRK\n"
680                "               ENDIF\n"
681                "               UDIV TEMP[2].x, TEMP[1], IMM[1]\n"
682                "               UADD TEMP[2].x, TEMP[2], TEMP[0]\n"
683                "               STORE RPRIVATE.x, TEMP[1], TEMP[2]\n"
684                "               UADD TEMP[1].x, TEMP[1], IMM[1]\n"
685                "       ENDLOOP\n"
686                "       MOV TEMP[1].x, IMM[0].wwww\n"
687                "       UMUL TEMP[0].x, TEMP[0], IMM[0]\n"
688                "       BGNLOOP\n"
689                "               USEQ TEMP[2].x, TEMP[1], IMM[0]\n"
690                "               IF TEMP[2]\n"
691                "                       BRK\n"
692                "               ENDIF\n"
693                "               LOAD TEMP[2].x, RPRIVATE, TEMP[1]\n"
694                "               STORE RES[0].x, TEMP[0], TEMP[2]\n"
695                "               UADD TEMP[0].x, TEMP[0], IMM[1]\n"
696                "               UADD TEMP[1].x, TEMP[1], IMM[1]\n"
697                "       ENDLOOP\n"
698                "       RET\n"
699                "    ENDSUB\n";
700        void init(void *p, int s, int x, int y) {
701                *(uint32_t *)p = 0xdeadbeef;
702        }
703        void expect(void *p, int s, int x, int y) {
704                *(uint32_t *)p = (x / 32) + x % 32;
705        }
706
707        printf("- %s\n", __func__);
708
709        init_prog(ctx, 0, 128, 0, src, NULL);
710        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
711                 32768, 0, init);
712        init_compute_resources(ctx, (int []) { 0, -1 });
713        launch_grid(ctx, (uint []){16, 1, 1}, (uint []){16, 1, 1}, 0, NULL);
714        check_tex(ctx, 0, expect, NULL);
715        destroy_compute_resources(ctx);
716        destroy_tex(ctx);
717        destroy_prog(ctx);
718}
719
720static void test_local(struct context *ctx)
721{
722        const char *src = "COMP\n"
723                "DCL RES[0], BUFFER, RAW, WR\n"
724                "DCL SV[0], BLOCK_ID[0]\n"
725                "DCL SV[1], BLOCK_SIZE[0]\n"
726                "DCL SV[2], THREAD_ID[0]\n"
727                "DCL TEMP[0], LOCAL\n"
728                "DCL TEMP[1], LOCAL\n"
729                "DCL TEMP[2], LOCAL\n"
730                "IMM UINT32 { 1, 0, 0, 0 }\n"
731                "IMM UINT32 { 2, 0, 0, 0 }\n"
732                "IMM UINT32 { 4, 0, 0, 0 }\n"
733                "IMM UINT32 { 32, 0, 0, 0 }\n"
734                "IMM UINT32 { 128, 0, 0, 0 }\n"
735                "\n"
736                "    BGNSUB\n"
737                "       UMUL TEMP[0].x, SV[2], IMM[2]\n"
738                "       STORE RLOCAL.x, TEMP[0], IMM[0].wwww\n"
739                "       MFENCE RLOCAL\n"
740                "       USLT TEMP[1].x, SV[2], IMM[3]\n"
741                "       IF TEMP[1]\n"
742                "               UADD TEMP[1].x, TEMP[0], IMM[4]\n"
743                "               BGNLOOP\n"
744                "                       LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"
745                "                       USEQ TEMP[2].x, TEMP[2], IMM[0]\n"
746                "                       IF TEMP[2]\n"
747                "                               BRK\n"
748                "                       ENDIF\n"
749                "               ENDLOOP\n"
750                "               STORE RLOCAL.x, TEMP[0], IMM[0]\n"
751                "               MFENCE RLOCAL\n"
752                "               BGNLOOP\n"
753                "                       LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"
754                "                       USEQ TEMP[2].x, TEMP[2], IMM[1]\n"
755                "                       IF TEMP[2]\n"
756                "                               BRK\n"
757                "                       ENDIF\n"
758                "               ENDLOOP\n"
759                "       ELSE\n"
760                "               UADD TEMP[1].x, TEMP[0], -IMM[4]\n"
761                "               BGNLOOP\n"
762                "                       LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"
763                "                       USEQ TEMP[2].x, TEMP[2], IMM[0].wwww\n"
764                "                       IF TEMP[2]\n"
765                "                               BRK\n"
766                "                       ENDIF\n"
767                "               ENDLOOP\n"
768                "               STORE RLOCAL.x, TEMP[0], IMM[0]\n"
769                "               MFENCE RLOCAL\n"
770                "               BGNLOOP\n"
771                "                       LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"
772                "                       USEQ TEMP[2].x, TEMP[2], IMM[0]\n"
773                "                       IF TEMP[2]\n"
774                "                               BRK\n"
775                "                       ENDIF\n"
776                "               ENDLOOP\n"
777                "               STORE RLOCAL.x, TEMP[0], IMM[1]\n"
778                "               MFENCE RLOCAL\n"
779                "       ENDIF\n"
780                "       UMUL TEMP[1].x, SV[0], SV[1]\n"
781                "       UMUL TEMP[1].x, TEMP[1], IMM[2]\n"
782                "       UADD TEMP[1].x, TEMP[1], TEMP[0]\n"
783                "       LOAD TEMP[0].x, RLOCAL, TEMP[0]\n"
784                "       STORE RES[0].x, TEMP[1], TEMP[0]\n"
785                "       RET\n"
786                "    ENDSUB\n";
787        void init(void *p, int s, int x, int y) {
788                *(uint32_t *)p = 0xdeadbeef;
789        }
790        void expect(void *p, int s, int x, int y) {
791                *(uint32_t *)p = x & 0x20 ? 2 : 1;
792        }
793
794        printf("- %s\n", __func__);
795
796        init_prog(ctx, 256, 0, 0, src, NULL);
797        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
798                 4096, 0, init);
799        init_compute_resources(ctx, (int []) { 0, -1 });
800        launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL);
801        check_tex(ctx, 0, expect, NULL);
802        destroy_compute_resources(ctx);
803        destroy_tex(ctx);
804        destroy_prog(ctx);
805}
806
807static void test_sample(struct context *ctx)
808{
809        const char *src = "COMP\n"
810                "DCL SVIEW[0], 2D, FLOAT\n"
811                "DCL RES[0], 2D, RAW, WR\n"
812                "DCL SAMP[0]\n"
813                "DCL SV[0], BLOCK_ID[0]\n"
814                "DCL TEMP[0], LOCAL\n"
815                "DCL TEMP[1], LOCAL\n"
816                "IMM UINT32 { 16, 1, 0, 0 }\n"
817                "IMM FLT32 { 128, 32, 0, 0 }\n"
818                "\n"
819                "    BGNSUB\n"
820                "       I2F TEMP[1], SV[0]\n"
821                "       DIV TEMP[1], TEMP[1], IMM[1]\n"
822                "       SAMPLE TEMP[1], TEMP[1], SVIEW[0], SAMP[0]\n"
823                "       UMUL TEMP[0], SV[0], IMM[0]\n"
824                "       STORE RES[0].xyzw, TEMP[0], TEMP[1]\n"
825                "       RET\n"
826                "    ENDSUB\n";
827        void init(void *p, int s, int x, int y) {
828                *(float *)p = s ? 1 : x * y;
829        }
830        void expect(void *p, int s, int x, int y) {
831                switch (x % 4) {
832                case 0:
833                        *(float *)p = x / 4 * y;
834                        break;
835                case 1:
836                case 2:
837                        *(float *)p = 0;
838                        break;
839                case 3:
840                        *(float *)p = 1;
841                        break;
842                }
843        }
844
845        printf("- %s\n", __func__);
846
847        init_prog(ctx, 0, 0, 0, src, NULL);
848        init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
849                 128, 32, init);
850        init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
851                 512, 32, init);
852        init_compute_resources(ctx, (int []) { 1, -1 });
853        init_sampler_views(ctx, (int []) { 0, -1 });
854        init_sampler_states(ctx, 2);
855        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0, NULL);
856        check_tex(ctx, 1, expect, NULL);
857        destroy_sampler_states(ctx);
858        destroy_sampler_views(ctx);
859        destroy_compute_resources(ctx);
860        destroy_tex(ctx);
861        destroy_prog(ctx);
862}
863
864static void test_many_kern(struct context *ctx)
865{
866        const char *src = "COMP\n"
867                "DCL RES[0], BUFFER, RAW, WR\n"
868                "DCL TEMP[0], LOCAL\n"
869                "IMM UINT32 { 0, 1, 2, 3 }\n"
870                "IMM UINT32 { 4, 0, 0, 0 }\n"
871                "\n"
872                "    BGNSUB\n"
873                "       UMUL TEMP[0].x, IMM[0].xxxx, IMM[1].xxxx\n"
874                "       STORE RES[0].x, TEMP[0], IMM[0].xxxx\n"
875                "       RET\n"
876                "    ENDSUB\n"
877                "    BGNSUB\n"
878                "       UMUL TEMP[0].x, IMM[0].yyyy, IMM[1].xxxx\n"
879                "       STORE RES[0].x, TEMP[0], IMM[0].yyyy\n"
880                "       RET\n"
881                "    ENDSUB\n"
882                "    BGNSUB\n"
883                "       UMUL TEMP[0].x, IMM[0].zzzz, IMM[1].xxxx\n"
884                "       STORE RES[0].x, TEMP[0], IMM[0].zzzz\n"
885                "       RET\n"
886                "    ENDSUB\n"
887                "    BGNSUB\n"
888                "       UMUL TEMP[0].x, IMM[0].wwww, IMM[1].xxxx\n"
889                "       STORE RES[0].x, TEMP[0], IMM[0].wwww\n"
890                "       RET\n"
891                "    ENDSUB\n";
892        void init(void *p, int s, int x, int y) {
893                *(uint32_t *)p = 0xdeadbeef;
894        }
895        void expect(void *p, int s, int x, int y) {
896                *(uint32_t *)p = x;
897        }
898
899        printf("- %s\n", __func__);
900
901        init_prog(ctx, 0, 0, 0, src, NULL);
902        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
903                 16, 0, init);
904        init_compute_resources(ctx, (int []) { 0, -1 });
905        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 0, NULL);
906        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 5, NULL);
907        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 10, NULL);
908        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 15, NULL);
909        check_tex(ctx, 0, expect, NULL);
910        destroy_compute_resources(ctx);
911        destroy_tex(ctx);
912        destroy_prog(ctx);
913}
914
915static void test_constant(struct context *ctx)
916{
917        const char *src = "COMP\n"
918                "DCL RES[0], BUFFER, RAW\n"
919                "DCL RES[1], BUFFER, RAW, WR\n"
920                "DCL SV[0], BLOCK_ID[0]\n"
921                "DCL TEMP[0], LOCAL\n"
922                "DCL TEMP[1], LOCAL\n"
923                "IMM UINT32 { 4, 0, 0, 0 }\n"
924                "\n"
925                "    BGNSUB\n"
926                "       UMUL TEMP[0].x, SV[0], IMM[0]\n"
927                "       LOAD TEMP[1].x, RES[0], TEMP[0]\n"
928                "       STORE RES[1].x, TEMP[0], TEMP[1]\n"
929                "       RET\n"
930                "    ENDSUB\n";
931        void init(void *p, int s, int x, int y) {
932                *(float *)p = s ? 0xdeadbeef : 8.0 - (float)x;
933        }
934        void expect(void *p, int s, int x, int y) {
935                *(float *)p = 8.0 - (float)x;
936        }
937
938        printf("- %s\n", __func__);
939
940        init_prog(ctx, 0, 0, 0, src, NULL);
941        init_tex(ctx, 0, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,
942                 256, 0, init);
943        init_tex(ctx, 1, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
944                 256, 0, init);
945        init_compute_resources(ctx, (int []) { 0, 1, -1 });
946        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){64, 1, 1}, 0, NULL);
947        check_tex(ctx, 1, expect, NULL);
948        destroy_compute_resources(ctx);
949        destroy_tex(ctx);
950        destroy_prog(ctx);
951}
952
953static void test_resource_indirect(struct context *ctx)
954{
955        const char *src = "COMP\n"
956                "DCL RES[0], BUFFER, RAW, WR\n"
957                "DCL RES[1..3], BUFFER, RAW\n"
958                "DCL SV[0], BLOCK_ID[0]\n"
959                "DCL TEMP[0], LOCAL\n"
960                "DCL TEMP[1], LOCAL\n"
961                "IMM UINT32 { 4, 0, 0, 0 }\n"
962                "\n"
963                "    BGNSUB\n"
964                "       UMUL TEMP[0].x, SV[0], IMM[0]\n"
965                "       LOAD TEMP[1].x, RES[1], TEMP[0]\n"
966                "       LOAD TEMP[1].x, RES[TEMP[1].x+2], TEMP[0]\n"
967                "       STORE RES[0].x, TEMP[0], TEMP[1]\n"
968                "       RET\n"
969                "    ENDSUB\n";
970        void init(void *p, int s, int x, int y) {
971                *(uint32_t *)p = s == 0 ? 0xdeadbeef :
972                   s == 1 ? x % 2 :
973                   s == 2 ? 2 * x :
974                   2 * x + 1;
975        }
976        void expect(void *p, int s, int x, int y) {
977           *(uint32_t *)p = 2 * x + (x % 2 ? 1 : 0);
978        }
979
980        printf("- %s\n", __func__);
981
982        init_prog(ctx, 0, 0, 0, src, NULL);
983        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
984                 256, 0, init);
985        init_tex(ctx, 1, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,
986                 256, 0, init);
987        init_tex(ctx, 2, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,
988                 256, 0, init);
989        init_tex(ctx, 3, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,
990                 256, 0, init);
991        init_compute_resources(ctx, (int []) { 0, 1, 2, 3, -1 });
992        launch_grid(ctx, (uint []){1, 1, 1}, (uint []){64, 1, 1}, 0, NULL);
993        check_tex(ctx, 0, expect, NULL);
994        destroy_compute_resources(ctx);
995        destroy_tex(ctx);
996        destroy_prog(ctx);
997}
998
999enum pipe_format surface_fmts[] = {
1000        PIPE_FORMAT_B8G8R8A8_UNORM,
1001        PIPE_FORMAT_B8G8R8X8_UNORM,
1002        PIPE_FORMAT_A8R8G8B8_UNORM,
1003        PIPE_FORMAT_X8R8G8B8_UNORM,
1004        PIPE_FORMAT_X8R8G8B8_UNORM,
1005        PIPE_FORMAT_L8_UNORM,
1006        PIPE_FORMAT_A8_UNORM,
1007        PIPE_FORMAT_I8_UNORM,
1008        PIPE_FORMAT_L8A8_UNORM,
1009        PIPE_FORMAT_R32_FLOAT,
1010        PIPE_FORMAT_R32G32_FLOAT,
1011        PIPE_FORMAT_R32G32B32A32_FLOAT,
1012        PIPE_FORMAT_R32_UNORM,
1013        PIPE_FORMAT_R32G32_UNORM,
1014        PIPE_FORMAT_R32G32B32A32_UNORM,
1015        PIPE_FORMAT_R32_SNORM,
1016        PIPE_FORMAT_R32G32_SNORM,
1017        PIPE_FORMAT_R32G32B32A32_SNORM,
1018        PIPE_FORMAT_R8_UINT,
1019        PIPE_FORMAT_R8G8_UINT,
1020        PIPE_FORMAT_R8G8B8A8_UINT,
1021        PIPE_FORMAT_R8_SINT,
1022        PIPE_FORMAT_R8G8_SINT,
1023        PIPE_FORMAT_R8G8B8A8_SINT,
1024        PIPE_FORMAT_R32_UINT,
1025        PIPE_FORMAT_R32G32_UINT,
1026        PIPE_FORMAT_R32G32B32A32_UINT,
1027        PIPE_FORMAT_R32_SINT,
1028        PIPE_FORMAT_R32G32_SINT,
1029        PIPE_FORMAT_R32G32B32A32_SINT
1030};
1031
1032static void test_surface_ld(struct context *ctx)
1033{
1034        const char *src = "COMP\n"
1035                "DCL RES[0], 2D\n"
1036                "DCL RES[1], 2D, RAW, WR\n"
1037                "DCL SV[0], BLOCK_ID[0]\n"
1038                "DCL TEMP[0], LOCAL\n"
1039                "DCL TEMP[1], LOCAL\n"
1040                "IMM UINT32 { 16, 1, 0, 0 }\n"
1041                "\n"
1042                "    BGNSUB\n"
1043                "       LOAD TEMP[1], RES[0], SV[0]\n"
1044                "       UMUL TEMP[0], SV[0], IMM[0]\n"
1045                "       STORE RES[1].xyzw, TEMP[0], TEMP[1]\n"
1046                "       RET\n"
1047                "    ENDSUB\n";
1048        int i = 0;
1049        void init0f(void *p, int s, int x, int y) {
1050                float v[] = { 1.0, -.75, .50, -.25 };
1051                util_format_write_4f(surface_fmts[i], v, 0,
1052                                     p, 0, 0, 0, 1, 1);
1053        }
1054        void init0i(void *p, int s, int x, int y) {
1055                int v[] = { 0xffffffff, 0xffff, 0xff, 0xf };
1056                util_format_write_4i(surface_fmts[i], v, 0,
1057                                     p, 0, 0, 0, 1, 1);
1058        }
1059        void init1(void *p, int s, int x, int y) {
1060                *(uint32_t *)p = 0xdeadbeef;
1061        }
1062        void expectf(void *p, int s, int x, int y) {
1063                float v[4], w[4];
1064                init0f(v, s, x / 4, y);
1065                util_format_read_4f(surface_fmts[i], w, 0,
1066                                    v, 0, 0, 0, 1, 1);
1067                *(float *)p = w[x % 4];
1068        }
1069        void expecti(void *p, int s, int x, int y) {
1070                int32_t v[4], w[4];
1071                init0i(v, s, x / 4, y);
1072                util_format_read_4i(surface_fmts[i], w, 0,
1073                                    v, 0, 0, 0, 1, 1);
1074                *(uint32_t *)p = w[x % 4];
1075        }
1076
1077        printf("- %s\n", __func__);
1078
1079        init_prog(ctx, 0, 0, 0, src, NULL);
1080
1081        for (i = 0; i < Elements(surface_fmts); i++) {
1082                bool is_int = util_format_is_pure_integer(surface_fmts[i]);
1083
1084                printf("   - %s\n", util_format_name(surface_fmts[i]));
1085
1086                init_tex(ctx, 0, PIPE_TEXTURE_2D, true, surface_fmts[i],
1087                         128, 32, (is_int ? init0i : init0f));
1088                init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
1089                         512, 32, init1);
1090                init_compute_resources(ctx, (int []) { 0, 1, -1 });
1091                init_sampler_states(ctx, 2);
1092                launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0,
1093                            NULL);
1094                check_tex(ctx, 1, (is_int ? expecti : expectf), NULL);
1095                destroy_sampler_states(ctx);
1096                destroy_compute_resources(ctx);
1097                destroy_tex(ctx);
1098        }
1099
1100        destroy_prog(ctx);
1101}
1102
1103static void test_surface_st(struct context *ctx)
1104{
1105        const char *src = "COMP\n"
1106                "DCL RES[0], 2D, RAW\n"
1107                "DCL RES[1], 2D, WR\n"
1108                "DCL SV[0], BLOCK_ID[0]\n"
1109                "DCL TEMP[0], LOCAL\n"
1110                "DCL TEMP[1], LOCAL\n"
1111                "IMM UINT32 { 16, 1, 0, 0 }\n"
1112                "\n"
1113                "    BGNSUB\n"
1114                "       UMUL TEMP[0], SV[0], IMM[0]\n"
1115                "       LOAD TEMP[1], RES[0], TEMP[0]\n"
1116                "       STORE RES[1], SV[0], TEMP[1]\n"
1117                "       RET\n"
1118                "    ENDSUB\n";
1119        int i = 0;
1120        void init0f(void *p, int s, int x, int y) {
1121                float v[] = { 1.0, -.75, 0.5, -.25 };
1122                *(float *)p = v[x % 4];
1123        }
1124        void init0i(void *p, int s, int x, int y) {
1125                int v[] = { 0xffffffff, 0xffff, 0xff, 0xf };
1126                *(int32_t *)p = v[x % 4];
1127        }
1128        void init1(void *p, int s, int x, int y) {
1129                memset(p, 1, util_format_get_blocksize(surface_fmts[i]));
1130        }
1131        void expectf(void *p, int s, int x, int y) {
1132                float vf[4];
1133                int j;
1134
1135                for (j = 0; j < 4; j++)
1136                        init0f(&vf[j], s, 4 * x + j, y);
1137                util_format_write_4f(surface_fmts[i], vf, 0,
1138                                     p, 0, 0, 0, 1, 1);
1139        }
1140        void expects(void *p, int s, int x, int y) {
1141                int32_t v[4];
1142                int j;
1143
1144                for (j = 0; j < 4; j++)
1145                        init0i(&v[j], s, 4 * x + j, y);
1146                util_format_write_4i(surface_fmts[i], v, 0,
1147                                     p, 0, 0, 0, 1, 1);
1148        }
1149        void expectu(void *p, int s, int x, int y) {
1150                uint32_t v[4];
1151                int j;
1152
1153                for (j = 0; j < 4; j++)
1154                        init0i(&v[j], s, 4 * x + j, y);
1155                util_format_write_4ui(surface_fmts[i], v, 0,
1156                                      p, 0, 0, 0, 1, 1);
1157        }
1158        bool check(void *x, void *y, int sz) {
1159                int j;
1160
1161                if (util_format_is_float(surface_fmts[i])) {
1162                        return fabs(*(float *)x - *(float *)y) < 3.92156863e-3;
1163
1164                } else if ((sz % 4) == 0) {
1165                        for (j = 0; j < sz / 4; j++)
1166                                if (abs(((uint32_t *)x)[j] -
1167                                        ((uint32_t *)y)[j]) > 1)
1168                                        return false;
1169                        return true;
1170                } else {
1171                        return !memcmp(x, y, sz);
1172                }
1173        }
1174
1175        printf("- %s\n", __func__);
1176
1177        init_prog(ctx, 0, 0, 0, src, NULL);
1178
1179        for (i = 0; i < Elements(surface_fmts); i++) {
1180                bool is_signed = (util_format_description(surface_fmts[i])
1181                                  ->channel[0].type == UTIL_FORMAT_TYPE_SIGNED);
1182                bool is_int = util_format_is_pure_integer(surface_fmts[i]);
1183
1184                printf("   - %s\n", util_format_name(surface_fmts[i]));
1185
1186                init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,
1187                         512, 32, (is_int ? init0i : init0f));
1188                init_tex(ctx, 1, PIPE_TEXTURE_2D, true, surface_fmts[i],
1189                         128, 32, init1);
1190                init_compute_resources(ctx, (int []) { 0, 1, -1 });
1191                init_sampler_states(ctx, 2);
1192                launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0,
1193                            NULL);
1194                check_tex(ctx, 1, (is_int && is_signed ? expects :
1195                                   is_int && !is_signed ? expectu :
1196                                   expectf), check);
1197                destroy_sampler_states(ctx);
1198                destroy_compute_resources(ctx);
1199                destroy_tex(ctx);
1200        }
1201
1202        destroy_prog(ctx);
1203}
1204
1205static void test_barrier(struct context *ctx)
1206{
1207        const char *src = "COMP\n"
1208                "DCL RES[0], BUFFER, RAW, WR\n"
1209                "DCL SV[0], BLOCK_ID[0]\n"
1210                "DCL SV[1], BLOCK_SIZE[0]\n"
1211                "DCL SV[2], THREAD_ID[0]\n"
1212                "DCL TEMP[0], LOCAL\n"
1213                "DCL TEMP[1], LOCAL\n"
1214                "DCL TEMP[2], LOCAL\n"
1215                "DCL TEMP[3], LOCAL\n"
1216                "IMM UINT32 { 1, 0, 0, 0 }\n"
1217                "IMM UINT32 { 4, 0, 0, 0 }\n"
1218                "IMM UINT32 { 32, 0, 0, 0 }\n"
1219                "\n"
1220                "    BGNSUB\n"
1221                "       UMUL TEMP[0].x, SV[2], IMM[1]\n"
1222                "       MOV TEMP[1].x, IMM[0].wwww\n"
1223                "       BGNLOOP\n"
1224                "               BARRIER\n"
1225                "               STORE RLOCAL.x, TEMP[0], TEMP[1]\n"
1226                "               BARRIER\n"
1227                "               MOV TEMP[2].x, IMM[0].wwww\n"
1228                "               BGNLOOP\n"
1229                "                       UMUL TEMP[3].x, TEMP[2], IMM[1]\n"
1230                "                       LOAD TEMP[3].x, RLOCAL, TEMP[3]\n"
1231                "                       USNE TEMP[3].x, TEMP[3], TEMP[1]\n"
1232                "                       IF TEMP[3]\n"
1233                "                               END\n"
1234                "                       ENDIF\n"
1235                "                       UADD TEMP[2].x, TEMP[2], IMM[0]\n"
1236                "                       USEQ TEMP[3].x, TEMP[2], SV[1]\n"
1237                "                       IF TEMP[3]\n"
1238                "                               BRK\n"
1239                "                       ENDIF\n"
1240                "               ENDLOOP\n"
1241                "               UADD TEMP[1].x, TEMP[1], IMM[0]\n"
1242                "               USEQ TEMP[2].x, TEMP[1], IMM[2]\n"
1243                "               IF TEMP[2]\n"
1244                "                       BRK\n"
1245                "               ENDIF\n"
1246                "       ENDLOOP\n"
1247                "       UMUL TEMP[1].x, SV[0], SV[1]\n"
1248                "       UMUL TEMP[1].x, TEMP[1], IMM[1]\n"
1249                "       UADD TEMP[1].x, TEMP[1], TEMP[0]\n"
1250                "       LOAD TEMP[0].x, RLOCAL, TEMP[0]\n"
1251                "       STORE RES[0].x, TEMP[1], TEMP[0]\n"
1252                "       RET\n"
1253                "    ENDSUB\n";
1254        void init(void *p, int s, int x, int y) {
1255                *(uint32_t *)p = 0xdeadbeef;
1256        }
1257        void expect(void *p, int s, int x, int y) {
1258                *(uint32_t *)p = 31;
1259        }
1260
1261        printf("- %s\n", __func__);
1262
1263        init_prog(ctx, 256, 0, 0, src, NULL);
1264        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
1265                 4096, 0, init);
1266        init_compute_resources(ctx, (int []) { 0, -1 });
1267        launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL);
1268        check_tex(ctx, 0, expect, NULL);
1269        destroy_compute_resources(ctx);
1270        destroy_tex(ctx);
1271        destroy_prog(ctx);
1272}
1273
1274static void test_atom_ops(struct context *ctx, bool global)
1275{
1276        const char *src = "COMP\n"
1277                "#ifdef TARGET_GLOBAL\n"
1278                "#define target RES[0]\n"
1279                "#else\n"
1280                "#define target RLOCAL\n"
1281                "#endif\n"
1282                ""
1283                "DCL RES[0], BUFFER, RAW, WR\n"
1284                "#define threadid SV[0]\n"
1285                "DCL threadid, THREAD_ID[0]\n"
1286                ""
1287                "#define offset TEMP[0]\n"
1288                "DCL offset, LOCAL\n"
1289                "#define tmp TEMP[1]\n"
1290                "DCL tmp, LOCAL\n"
1291                ""
1292                "#define k0 IMM[0]\n"
1293                "IMM UINT32 { 0, 0, 0, 0 }\n"
1294                "#define k1 IMM[1]\n"
1295                "IMM UINT32 { 1, 0, 0, 0 }\n"
1296                "#define k2 IMM[2]\n"
1297                "IMM UINT32 { 2, 0, 0, 0 }\n"
1298                "#define k3 IMM[3]\n"
1299                "IMM UINT32 { 3, 0, 0, 0 }\n"
1300                "#define k4 IMM[4]\n"
1301                "IMM UINT32 { 4, 0, 0, 0 }\n"
1302                "#define k5 IMM[5]\n"
1303                "IMM UINT32 { 5, 0, 0, 0 }\n"
1304                "#define k6 IMM[6]\n"
1305                "IMM UINT32 { 6, 0, 0, 0 }\n"
1306                "#define k7 IMM[7]\n"
1307                "IMM UINT32 { 7, 0, 0, 0 }\n"
1308                "#define k8 IMM[8]\n"
1309                "IMM UINT32 { 8, 0, 0, 0 }\n"
1310                "#define k9 IMM[9]\n"
1311                "IMM UINT32 { 9, 0, 0, 0 }\n"
1312                "#define korig IMM[10].xxxx\n"
1313                "#define karg IMM[10].yyyy\n"
1314                "IMM UINT32 { 3735928559, 286331153, 0, 0 }\n"
1315                "\n"
1316                "    BGNSUB\n"
1317                "       UMUL offset.x, threadid, k4\n"
1318                "       STORE target.x, offset, korig\n"
1319                "       USEQ tmp.x, threadid, k0\n"
1320                "       IF tmp\n"
1321                "               ATOMUADD tmp.x, target, offset, karg\n"
1322                "               ATOMUADD tmp.x, target, offset, tmp\n"
1323                "       ENDIF\n"
1324                "       USEQ tmp.x, threadid, k1\n"
1325                "       IF tmp\n"
1326                "               ATOMXCHG tmp.x, target, offset, karg\n"
1327                "               ATOMXCHG tmp.x, target, offset, tmp\n"
1328                "       ENDIF\n"
1329                "       USEQ tmp.x, threadid, k2\n"
1330                "       IF tmp\n"
1331                "               ATOMCAS tmp.x, target, offset, korig, karg\n"
1332                "               ATOMCAS tmp.x, target, offset, tmp, k0\n"
1333                "       ENDIF\n"
1334                "       USEQ tmp.x, threadid, k3\n"
1335                "       IF tmp\n"
1336                "               ATOMAND tmp.x, target, offset, karg\n"
1337                "               ATOMAND tmp.x, target, offset, tmp\n"
1338                "       ENDIF\n"
1339                "       USEQ tmp.x, threadid, k4\n"
1340                "       IF tmp\n"
1341                "               ATOMOR tmp.x, target, offset, karg\n"
1342                "               ATOMOR tmp.x, target, offset, tmp\n"
1343                "       ENDIF\n"
1344                "       USEQ tmp.x, threadid, k5\n"
1345                "       IF tmp\n"
1346                "               ATOMXOR tmp.x, target, offset, karg\n"
1347                "               ATOMXOR tmp.x, target, offset, tmp\n"
1348                "       ENDIF\n"
1349                "       USEQ tmp.x, threadid, k6\n"
1350                "       IF tmp\n"
1351                "               ATOMUMIN tmp.x, target, offset, karg\n"
1352                "               ATOMUMIN tmp.x, target, offset, tmp\n"
1353                "       ENDIF\n"
1354                "       USEQ tmp.x, threadid, k7\n"
1355                "       IF tmp\n"
1356                "               ATOMUMAX tmp.x, target, offset, karg\n"
1357                "               ATOMUMAX tmp.x, target, offset, tmp\n"
1358                "       ENDIF\n"
1359                "       USEQ tmp.x, threadid, k8\n"
1360                "       IF tmp\n"
1361                "               ATOMIMIN tmp.x, target, offset, karg\n"
1362                "               ATOMIMIN tmp.x, target, offset, tmp\n"
1363                "       ENDIF\n"
1364                "       USEQ tmp.x, threadid, k9\n"
1365                "       IF tmp\n"
1366                "               ATOMIMAX tmp.x, target, offset, karg\n"
1367                "               ATOMIMAX tmp.x, target, offset, tmp\n"
1368                "       ENDIF\n"
1369                "#ifdef TARGET_LOCAL\n"
1370                "       LOAD tmp.x, RLOCAL, offset\n"
1371                "       STORE RES[0].x, offset, tmp\n"
1372                "#endif\n"
1373                "       RET\n"
1374                "    ENDSUB\n";
1375
1376        void init(void *p, int s, int x, int y) {
1377                *(uint32_t *)p = 0xbad;
1378        }
1379        void expect(void *p, int s, int x, int y) {
1380                switch (x) {
1381                case 0:
1382                        *(uint32_t *)p = 0xce6c8eef;
1383                        break;
1384                case 1:
1385                        *(uint32_t *)p = 0xdeadbeef;
1386                        break;
1387                case 2:
1388                        *(uint32_t *)p = 0x11111111;
1389                        break;
1390                case 3:
1391                        *(uint32_t *)p = 0x10011001;
1392                        break;
1393                case 4:
1394                        *(uint32_t *)p = 0xdfbdbfff;
1395                        break;
1396                case 5:
1397                        *(uint32_t *)p = 0x11111111;
1398                        break;
1399                case 6:
1400                        *(uint32_t *)p = 0x11111111;
1401                        break;
1402                case 7:
1403                        *(uint32_t *)p = 0xdeadbeef;
1404                        break;
1405                case 8:
1406                        *(uint32_t *)p = 0xdeadbeef;
1407                        break;
1408                case 9:
1409                        *(uint32_t *)p = 0x11111111;
1410                        break;
1411                }
1412        }
1413
1414        printf("- %s (%s)\n", __func__, global ? "global" : "local");
1415
1416        init_prog(ctx, 40, 0, 0, src,
1417                  (global ? "-DTARGET_GLOBAL" : "-DTARGET_LOCAL"));
1418        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
1419                 40, 0, init);
1420        init_compute_resources(ctx, (int []) { 0, -1 });
1421        launch_grid(ctx, (uint []){10, 1, 1}, (uint []){1, 1, 1}, 0, NULL);
1422        check_tex(ctx, 0, expect, NULL);
1423        destroy_compute_resources(ctx);
1424        destroy_tex(ctx);
1425        destroy_prog(ctx);
1426}
1427
1428static void test_atom_race(struct context *ctx, bool global)
1429{
1430        const char *src = "COMP\n"
1431                "#ifdef TARGET_GLOBAL\n"
1432                "#define target RES[0]\n"
1433                "#else\n"
1434                "#define target RLOCAL\n"
1435                "#endif\n"
1436                ""
1437                "DCL RES[0], BUFFER, RAW, WR\n"
1438                ""
1439                "#define blockid SV[0]\n"
1440                "DCL blockid, BLOCK_ID[0]\n"
1441                "#define blocksz SV[1]\n"
1442                "DCL blocksz, BLOCK_SIZE[0]\n"
1443                "#define threadid SV[2]\n"
1444                "DCL threadid, THREAD_ID[0]\n"
1445                ""
1446                "#define offset TEMP[0]\n"
1447                "DCL offset, LOCAL\n"
1448                "#define arg TEMP[1]\n"
1449                "DCL arg, LOCAL\n"
1450                "#define count TEMP[2]\n"
1451                "DCL count, LOCAL\n"
1452                "#define vlocal TEMP[3]\n"
1453                "DCL vlocal, LOCAL\n"
1454                "#define vshared TEMP[4]\n"
1455                "DCL vshared, LOCAL\n"
1456                "#define last TEMP[5]\n"
1457                "DCL last, LOCAL\n"
1458                "#define tmp0 TEMP[6]\n"
1459                "DCL tmp0, LOCAL\n"
1460                "#define tmp1 TEMP[7]\n"
1461                "DCL tmp1, LOCAL\n"
1462                ""
1463                "#define k0 IMM[0]\n"
1464                "IMM UINT32 { 0, 0, 0, 0 }\n"
1465                "#define k1 IMM[1]\n"
1466                "IMM UINT32 { 1, 0, 0, 0 }\n"
1467                "#define k4 IMM[2]\n"
1468                "IMM UINT32 { 4, 0, 0, 0 }\n"
1469                "#define k32 IMM[3]\n"
1470                "IMM UINT32 { 32, 0, 0, 0 }\n"
1471                "#define k128 IMM[4]\n"
1472                "IMM UINT32 { 128, 0, 0, 0 }\n"
1473                "#define kdeadcafe IMM[5]\n"
1474                "IMM UINT32 { 3735931646, 0, 0, 0 }\n"
1475                "#define kallowed_set IMM[6]\n"
1476                "IMM UINT32 { 559035650, 0, 0, 0 }\n"
1477                "#define k11111111 IMM[7]\n"
1478                "IMM UINT32 { 286331153, 0, 0, 0 }\n"
1479                "\n"
1480                "    BGNSUB\n"
1481                "       MOV offset.x, threadid\n"
1482                "#ifdef TARGET_GLOBAL\n"
1483                "       UMUL tmp0.x, blockid, blocksz\n"
1484                "       UADD offset.x, offset, tmp0\n"
1485                "#endif\n"
1486                "       UMUL offset.x, offset, k4\n"
1487                "       USLT tmp0.x, threadid, k32\n"
1488                "       STORE target.x, offset, k0\n"
1489                "       BARRIER\n"
1490                "       IF tmp0\n"
1491                "               MOV vlocal.x, k0\n"
1492                "               MOV arg.x, kdeadcafe\n"
1493                "               BGNLOOP\n"
1494                "                       INEG arg.x, arg\n"
1495                "                       ATOMUADD vshared.x, target, offset, arg\n"
1496                "                       SFENCE target\n"
1497                "                       USNE tmp0.x, vshared, vlocal\n"
1498                "                       IF tmp0\n"
1499                "                               BRK\n"
1500                "                       ENDIF\n"
1501                "                       UADD vlocal.x, vlocal, arg\n"
1502                "               ENDLOOP\n"
1503                "               UADD vlocal.x, vshared, arg\n"
1504                "               LOAD vshared.x, target, offset\n"
1505                "               USEQ tmp0.x, vshared, vlocal\n"
1506                "               STORE target.x, offset, tmp0\n"
1507                "       ELSE\n"
1508                "               UADD offset.x, offset, -k128\n"
1509                "               MOV count.x, k0\n"
1510                "               MOV last.x, k0\n"
1511                "               BGNLOOP\n"
1512                "                       LOAD vshared.x, target, offset\n"
1513                "                       USEQ tmp0.x, vshared, kallowed_set.xxxx\n"
1514                "                       USEQ tmp1.x, vshared, kallowed_set.yyyy\n"
1515                "                       OR tmp0.x, tmp0, tmp1\n"
1516                "                       IF tmp0\n"
1517                "                               USEQ tmp0.x, vshared, last\n"
1518                "                               IF tmp0\n"
1519                "                                       CONT\n"
1520                "                               ENDIF\n"
1521                "                               MOV last.x, vshared\n"
1522                "                       ELSE\n"
1523                "                               END\n"
1524                "                       ENDIF\n"
1525                "                       UADD count.x, count, k1\n"
1526                "                       USEQ tmp0.x, count, k128\n"
1527                "                       IF tmp0\n"
1528                "                               BRK\n"
1529                "                       ENDIF\n"
1530                "               ENDLOOP\n"
1531                "               ATOMXCHG tmp0.x, target, offset, k11111111\n"
1532                "               UADD offset.x, offset, k128\n"
1533                "               ATOMXCHG tmp0.x, target, offset, k11111111\n"
1534                "               SFENCE target\n"
1535                "       ENDIF\n"
1536                "#ifdef TARGET_LOCAL\n"
1537                "       LOAD tmp0.x, RLOCAL, offset\n"
1538                "       UMUL tmp1.x, blockid, blocksz\n"
1539                "       UMUL tmp1.x, tmp1, k4\n"
1540                "       UADD offset.x, offset, tmp1\n"
1541                "       STORE RES[0].x, offset, tmp0\n"
1542                "#endif\n"
1543                "       RET\n"
1544                "    ENDSUB\n";
1545
1546        void init(void *p, int s, int x, int y) {
1547                *(uint32_t *)p = 0xdeadbeef;
1548        }
1549        void expect(void *p, int s, int x, int y) {
1550                *(uint32_t *)p = x & 0x20 ? 0x11111111 : 0xffffffff;
1551        }
1552
1553        printf("- %s (%s)\n", __func__, global ? "global" : "local");
1554
1555        init_prog(ctx, 256, 0, 0, src,
1556                  (global ? "-DTARGET_GLOBAL" : "-DTARGET_LOCAL"));
1557        init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,
1558                 4096, 0, init);
1559        init_compute_resources(ctx, (int []) { 0, -1 });
1560        launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL);
1561        check_tex(ctx, 0, expect, NULL);
1562        destroy_compute_resources(ctx);
1563        destroy_tex(ctx);
1564        destroy_prog(ctx);
1565}
1566
1567int main(int argc, char *argv[])
1568{
1569        struct context *ctx = CALLOC_STRUCT(context);
1570
1571        init_ctx(ctx);
1572        test_system_values(ctx);
1573        test_resource_access(ctx);
1574        test_function_calls(ctx);
1575        test_input_global(ctx);
1576        test_private(ctx);
1577        test_local(ctx);
1578        test_sample(ctx);
1579        test_many_kern(ctx);
1580        test_constant(ctx);
1581        test_resource_indirect(ctx);
1582        test_surface_ld(ctx);
1583        test_surface_st(ctx);
1584        test_barrier(ctx);
1585        test_atom_ops(ctx, true);
1586        test_atom_race(ctx, true);
1587        test_atom_ops(ctx, false);
1588        test_atom_race(ctx, false);
1589        destroy_ctx(ctx);
1590
1591        return 0;
1592}
1593