1/*
2 * Copyright 2012 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 *
22 * Authors: Ben Skeggs
23 *
24 */
25
26#define XFER_ARGS                                                              \
27   struct nv30_context *nv30, enum nv30_transfer_filter filter,                \
28   struct nv30_rect *src, struct nv30_rect *dst
29
30#include "util/u_math.h"
31
32#include "nouveau/nv_object.xml.h"
33#include "nouveau/nv_m2mf.xml.h"
34#include "nv01_2d.xml.h"
35#include "nv30-40_3d.xml.h"
36
37#include "nv30_context.h"
38#include "nv30_transfer.h"
39
40/* Various helper functions to transfer different types of data in a number
41 * of different ways.
42 */
43
44static INLINE boolean
45nv30_transfer_scaled(struct nv30_rect *src, struct nv30_rect *dst)
46{
47   if (src->x1 - src->x0 != dst->x1 - dst->x0)
48      return TRUE;
49   if (src->y1 - src->y0 != dst->y1 - dst->y0)
50      return TRUE;
51   return FALSE;
52}
53
54static INLINE boolean
55nv30_transfer_blit(XFER_ARGS)
56{
57   if (nv30->screen->eng3d->oclass < NV40_3D_CLASS)
58      return FALSE;
59   if (dst->offset & 63 || dst->pitch & 63 || dst->d > 1)
60      return FALSE;
61   if (dst->w < 2 || dst->h < 2)
62      return FALSE;
63   if (dst->cpp > 4 || (dst->cpp == 1 && !dst->pitch))
64      return FALSE;
65   if (src->cpp > 4)
66      return FALSE;
67   return TRUE;
68}
69
70static INLINE struct nouveau_heap *
71nv30_transfer_rect_vertprog(struct nv30_context *nv30)
72{
73   struct nouveau_heap *heap = nv30->screen->vp_exec_heap;
74   struct nouveau_heap *vp;
75
76   vp = nv30->blit_vp;
77   if (!vp) {
78      if (nouveau_heap_alloc(heap, 2, &nv30->blit_vp, &nv30->blit_vp)) {
79         while (heap->next && heap->size < 2) {
80            struct nouveau_heap **evict = heap->next->priv;
81            nouveau_heap_free(evict);
82         }
83
84         if (nouveau_heap_alloc(heap, 2, &nv30->blit_vp, &nv30->blit_vp))
85            return NULL;
86      }
87
88      vp = nv30->blit_vp;
89      if (vp) {
90         struct nouveau_pushbuf *push = nv30->base.pushbuf;
91
92         BEGIN_NV04(push, NV30_3D(VP_UPLOAD_FROM_ID), 1);
93         PUSH_DATA (push, vp->start);
94         BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4);
95         PUSH_DATA (push, 0x401f9c6c); /* mov o[hpos], a[0]; */
96         PUSH_DATA (push, 0x0040000d);
97         PUSH_DATA (push, 0x8106c083);
98         PUSH_DATA (push, 0x6041ff80);
99         BEGIN_NV04(push, NV30_3D(VP_UPLOAD_INST(0)), 4);
100         PUSH_DATA (push, 0x401f9c6c); /* mov o[tex0], a[8]; end; */
101         PUSH_DATA (push, 0x0040080d);
102         PUSH_DATA (push, 0x8106c083);
103         PUSH_DATA (push, 0x6041ff9d);
104      }
105   }
106
107   return vp;
108}
109
110
111static INLINE struct nv04_resource *
112nv30_transfer_rect_fragprog(struct nv30_context *nv30)
113{
114   struct nv04_resource *fp = nv04_resource(nv30->blit_fp);
115   struct pipe_context *pipe = &nv30->base.pipe;
116
117   if (!fp) {
118      nv30->blit_fp = pipe_buffer_create(pipe->screen, 0, 0, 12 * 4);
119      if (nv30->blit_fp) {
120         struct pipe_transfer *transfer;
121         u32 *map = pipe_buffer_map(pipe, nv30->blit_fp,
122                                    PIPE_TRANSFER_WRITE, &transfer);
123         if (map) {
124            map[0] = 0x17009e00; /* texr r0, i[tex0], texture[0]; end; */
125            map[1] = 0x1c9dc801;
126            map[2] = 0x0001c800;
127            map[3] = 0x3fe1c800;
128            map[4] = 0x01401e81; /* end; */
129            map[5] = 0x1c9dc800;
130            map[6] = 0x0001c800;
131            map[7] = 0x0001c800;
132            pipe_buffer_unmap(pipe, transfer);
133         }
134
135         fp = nv04_resource(nv30->blit_fp);
136         nouveau_buffer_migrate(&nv30->base, fp, NOUVEAU_BO_VRAM);
137      }
138   }
139
140   return fp;
141}
142
143static void
144nv30_transfer_rect_blit(XFER_ARGS)
145{
146   struct nv04_resource *fp = nv30_transfer_rect_fragprog(nv30);
147   struct nouveau_heap *vp = nv30_transfer_rect_vertprog(nv30);
148   struct nouveau_pushbuf *push = nv30->base.pushbuf;
149   struct nouveau_pushbuf_refn refs[] = {
150      { fp->bo, fp->domain | NOUVEAU_BO_RD },
151      { src->bo, src->domain | NOUVEAU_BO_RD },
152      { dst->bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR },
153   };
154   u32 texfmt, texswz;
155   u32 format, stride;
156
157   if (nouveau_pushbuf_space(push, 512, 8, 0) ||
158       nouveau_pushbuf_refn (push, refs, sizeof(refs) / sizeof(refs[0])))
159      return;
160
161   /* various switches depending on cpp of the transfer */
162   switch (dst->cpp) {
163   case 4:
164      format = NV30_3D_RT_FORMAT_COLOR_A8R8G8B8 |
165               NV30_3D_RT_FORMAT_ZETA_Z24S8;
166      texfmt = NV40_3D_TEX_FORMAT_FORMAT_A8R8G8B8;
167      texswz = 0x0000aae4;
168      break;
169   case 2:
170      format = NV30_3D_RT_FORMAT_COLOR_R5G6B5 |
171               NV30_3D_RT_FORMAT_ZETA_Z16;
172      texfmt = NV40_3D_TEX_FORMAT_FORMAT_R5G6B5;
173      texswz = 0x0000a9e4;
174      break;
175   case 1:
176      format = NV30_3D_RT_FORMAT_COLOR_B8 |
177               NV30_3D_RT_FORMAT_ZETA_Z16;
178      texfmt = NV40_3D_TEX_FORMAT_FORMAT_L8;
179      texswz = 0x0000aaff;
180      break;
181   default:
182      assert(0);
183      return;
184   }
185
186   /* render target */
187   if (!dst->pitch) {
188      format |= NV30_3D_RT_FORMAT_TYPE_SWIZZLED;
189      format |= util_logbase2(dst->w) << 16;
190      format |= util_logbase2(dst->h) << 24;
191      stride  = 64;
192   } else {
193      format |= NV30_3D_RT_FORMAT_TYPE_LINEAR;
194      stride  = dst->pitch;
195   }
196
197   BEGIN_NV04(push, NV30_3D(VIEWPORT_HORIZ), 2);
198   PUSH_DATA (push, dst->w << 16);
199   PUSH_DATA (push, dst->h << 16);
200   BEGIN_NV04(push, NV30_3D(RT_HORIZ), 5);
201   PUSH_DATA (push, dst->w << 16);
202   PUSH_DATA (push, dst->h << 16);
203   PUSH_DATA (push, format);
204   PUSH_DATA (push, stride);
205   PUSH_RELOC(push, dst->bo, dst->offset, NOUVEAU_BO_LOW, 0, 0);
206   BEGIN_NV04(push, NV30_3D(RT_ENABLE), 1);
207   PUSH_DATA (push, NV30_3D_RT_ENABLE_COLOR0);
208
209   nv30->dirty |= NV30_NEW_FRAMEBUFFER;
210
211   /* viewport state */
212   BEGIN_NV04(push, NV30_3D(VIEWPORT_TRANSLATE_X), 8);
213   PUSH_DATAf(push, 0.0);
214   PUSH_DATAf(push, 0.0);
215   PUSH_DATAf(push, 0.0);
216   PUSH_DATAf(push, 0.0);
217   PUSH_DATAf(push, 1.0);
218   PUSH_DATAf(push, 1.0);
219   PUSH_DATAf(push, 1.0);
220   PUSH_DATAf(push, 1.0);
221   BEGIN_NV04(push, NV30_3D(DEPTH_RANGE_NEAR), 2);
222   PUSH_DATAf(push, 0.0);
223   PUSH_DATAf(push, 1.0);
224
225   nv30->dirty |= NV30_NEW_VIEWPORT;
226
227   /* blend state */
228   BEGIN_NV04(push, NV30_3D(COLOR_LOGIC_OP_ENABLE), 1);
229   PUSH_DATA (push, 0);
230   BEGIN_NV04(push, NV30_3D(DITHER_ENABLE), 1);
231   PUSH_DATA (push, 0);
232   BEGIN_NV04(push, NV30_3D(BLEND_FUNC_ENABLE), 1);
233   PUSH_DATA (push, 0);
234   BEGIN_NV04(push, NV30_3D(COLOR_MASK), 1);
235   PUSH_DATA (push, 0x01010101);
236
237   nv30->dirty |= NV30_NEW_BLEND;
238
239   /* depth-stencil-alpha state */
240   BEGIN_NV04(push, NV30_3D(DEPTH_WRITE_ENABLE), 2);
241   PUSH_DATA (push, 0);
242   PUSH_DATA (push, 0);
243   BEGIN_NV04(push, NV30_3D(STENCIL_ENABLE(0)), 1);
244   PUSH_DATA (push, 0);
245   BEGIN_NV04(push, NV30_3D(STENCIL_ENABLE(1)), 1);
246   PUSH_DATA (push, 0);
247   BEGIN_NV04(push, NV30_3D(ALPHA_FUNC_ENABLE), 1);
248   PUSH_DATA (push, 0);
249
250   nv30->dirty |= NV30_NEW_ZSA;
251
252   /* rasterizer state */
253   BEGIN_NV04(push, NV30_3D(SHADE_MODEL), 1);
254   PUSH_DATA (push, NV30_3D_SHADE_MODEL_FLAT);
255   BEGIN_NV04(push, NV30_3D(CULL_FACE_ENABLE), 1);
256   PUSH_DATA (push, 0);
257   BEGIN_NV04(push, NV30_3D(POLYGON_MODE_FRONT), 2);
258   PUSH_DATA (push, NV30_3D_POLYGON_MODE_FRONT_FILL);
259   PUSH_DATA (push, NV30_3D_POLYGON_MODE_BACK_FILL);
260   BEGIN_NV04(push, NV30_3D(POLYGON_OFFSET_FILL_ENABLE), 1);
261   PUSH_DATA (push, 0);
262   BEGIN_NV04(push, NV30_3D(POLYGON_STIPPLE_ENABLE), 1);
263   PUSH_DATA (push, 0);
264
265   nv30->state.scissor_off = 0;
266   nv30->dirty |= NV30_NEW_RASTERIZER;
267
268   /* vertex program */
269   BEGIN_NV04(push, NV30_3D(VP_START_FROM_ID), 1);
270   PUSH_DATA (push, vp->start);
271   BEGIN_NV04(push, NV40_3D(VP_ATTRIB_EN), 2);
272   PUSH_DATA (push, 0x00000101); /* attrib: 0, 8 */
273   PUSH_DATA (push, 0x00004000); /* result: hpos, tex0 */
274   BEGIN_NV04(push, NV30_3D(ENGINE), 1);
275   PUSH_DATA (push, 0x00000103);
276   BEGIN_NV04(push, NV30_3D(VP_CLIP_PLANES_ENABLE), 1);
277   PUSH_DATA (push, 0x00000000);
278
279   nv30->dirty |= NV30_NEW_VERTPROG;
280   nv30->dirty |= NV30_NEW_CLIP;
281
282   /* fragment program */
283   BEGIN_NV04(push, NV30_3D(FP_ACTIVE_PROGRAM), 1);
284   PUSH_RELOC(push, fp->bo, fp->offset, fp->domain |
285                    NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
286                    NV30_3D_FP_ACTIVE_PROGRAM_DMA0,
287                    NV30_3D_FP_ACTIVE_PROGRAM_DMA1);
288   BEGIN_NV04(push, NV30_3D(FP_CONTROL), 1);
289   PUSH_DATA (push, 0x02000000);
290
291   nv30->state.fragprog = NULL;
292   nv30->dirty |= NV30_NEW_FRAGPROG;
293
294   /* texture */
295   texfmt |= 1 << NV40_3D_TEX_FORMAT_MIPMAP_COUNT__SHIFT;
296   texfmt |= NV30_3D_TEX_FORMAT_NO_BORDER;
297   texfmt |= NV40_3D_TEX_FORMAT_RECT;
298   texfmt |= 0x00008000;
299   if (src->d < 2)
300      texfmt |= NV30_3D_TEX_FORMAT_DIMS_2D;
301   else
302      texfmt |= NV30_3D_TEX_FORMAT_DIMS_3D;
303   if (src->pitch)
304      texfmt |= NV40_3D_TEX_FORMAT_LINEAR;
305
306   BEGIN_NV04(push, NV30_3D(TEX_OFFSET(0)), 8);
307   PUSH_RELOC(push, src->bo, src->offset, NOUVEAU_BO_LOW, 0, 0);
308   PUSH_RELOC(push, src->bo, texfmt, NOUVEAU_BO_OR,
309                    NV30_3D_TEX_FORMAT_DMA0, NV30_3D_TEX_FORMAT_DMA1);
310   PUSH_DATA (push, NV30_3D_TEX_WRAP_S_CLAMP_TO_EDGE |
311                    NV30_3D_TEX_WRAP_T_CLAMP_TO_EDGE |
312                    NV30_3D_TEX_WRAP_R_CLAMP_TO_EDGE);
313   PUSH_DATA (push, NV40_3D_TEX_ENABLE_ENABLE);
314   PUSH_DATA (push, texswz);
315   switch (filter) {
316   case BILINEAR:
317      PUSH_DATA (push, NV30_3D_TEX_FILTER_MIN_LINEAR |
318                       NV30_3D_TEX_FILTER_MAG_LINEAR | 0x00002000);
319      break;
320   default:
321      PUSH_DATA (push, NV30_3D_TEX_FILTER_MIN_NEAREST |
322                       NV30_3D_TEX_FILTER_MAG_NEAREST | 0x00002000);
323      break;
324   }
325   PUSH_DATA (push, (src->w << 16) | src->h);
326   PUSH_DATA (push, 0x00000000);
327   BEGIN_NV04(push, NV40_3D(TEX_SIZE1(0)), 1);
328   PUSH_DATA (push, 0x00100000 | src->pitch);
329   BEGIN_NV04(push, SUBC_3D(0x0b40), 1);
330   PUSH_DATA (push, src->d < 2 ? 0x00000001 : 0x00000000);
331   BEGIN_NV04(push, NV40_3D(TEX_CACHE_CTL), 1);
332   PUSH_DATA (push, 1);
333
334   nv30->fragprog.dirty_samplers |= 1;
335   nv30->dirty |= NV30_NEW_FRAGTEX;
336
337   /* blit! */
338   BEGIN_NV04(push, NV30_3D(SCISSOR_HORIZ), 2);
339   PUSH_DATA (push, (dst->x1 - dst->x0) << 16 | dst->x0);
340   PUSH_DATA (push, (dst->y1 - dst->y0) << 16 | dst->y0);
341   BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1);
342   PUSH_DATA (push, NV30_3D_VERTEX_BEGIN_END_QUADS);
343   BEGIN_NV04(push, NV30_3D(VTX_ATTR_3F(8)), 3);
344   PUSH_DATAf(push, src->x0);
345   PUSH_DATAf(push, src->y0);
346   PUSH_DATAf(push, src->z);
347   BEGIN_NV04(push, NV30_3D(VTX_ATTR_2I(0)), 1);
348   PUSH_DATA (push, (dst->y0 << 16) | dst->x0);
349   BEGIN_NV04(push, NV30_3D(VTX_ATTR_3F(8)), 3);
350   PUSH_DATAf(push, src->x1);
351   PUSH_DATAf(push, src->y0);
352   PUSH_DATAf(push, src->z);
353   BEGIN_NV04(push, NV30_3D(VTX_ATTR_2I(0)), 1);
354   PUSH_DATA (push, (dst->y0 << 16) | dst->x1);
355   BEGIN_NV04(push, NV30_3D(VTX_ATTR_3F(8)), 3);
356   PUSH_DATAf(push, src->x1);
357   PUSH_DATAf(push, src->y1);
358   PUSH_DATAf(push, src->z);
359   BEGIN_NV04(push, NV30_3D(VTX_ATTR_2I(0)), 1);
360   PUSH_DATA (push, (dst->y1 << 16) | dst->x1);
361   BEGIN_NV04(push, NV30_3D(VTX_ATTR_3F(8)), 3);
362   PUSH_DATAf(push, src->x0);
363   PUSH_DATAf(push, src->y1);
364   PUSH_DATAf(push, src->z);
365   BEGIN_NV04(push, NV30_3D(VTX_ATTR_2I(0)), 1);
366   PUSH_DATA (push, (dst->y1 << 16) | dst->x0);
367   BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1);
368   PUSH_DATA (push, NV30_3D_VERTEX_BEGIN_END_STOP);
369}
370
371static boolean
372nv30_transfer_sifm(XFER_ARGS)
373{
374   if (!src->pitch || (src->w | src->h) > 1024 || src->w < 2 || src->h < 2)
375      return FALSE;
376
377   if (src->d > 1 || dst->d > 1)
378      return FALSE;
379
380   if (dst->offset & 63)
381      return FALSE;
382
383   if (!dst->pitch) {
384      if ((dst->w | dst->h) > 2048 || dst->w < 2 || dst->h < 2)
385         return FALSE;
386   } else {
387      if (dst->domain != NOUVEAU_BO_VRAM)
388         return FALSE;
389      if (dst->pitch & 63)
390         return FALSE;
391   }
392
393   return TRUE;
394}
395
396static void
397nv30_transfer_rect_sifm(XFER_ARGS)
398
399{
400   struct nouveau_pushbuf *push = nv30->base.pushbuf;
401   struct nouveau_pushbuf_refn refs[] = {
402      { src->bo, src->domain | NOUVEAU_BO_RD },
403      { dst->bo, dst->domain | NOUVEAU_BO_WR },
404   };
405   struct nv04_fifo *fifo = push->channel->data;
406   unsigned si_fmt, si_arg;
407   unsigned ss_fmt;
408
409   switch (dst->cpp) {
410   case 4: ss_fmt = NV04_SURFACE_SWZ_FORMAT_COLOR_A8R8G8B8; break;
411   case 2: ss_fmt = NV04_SURFACE_SWZ_FORMAT_COLOR_R5G6B5; break;
412   default:
413      ss_fmt = NV04_SURFACE_SWZ_FORMAT_COLOR_Y8;
414      break;
415   }
416
417   switch (src->cpp) {
418   case 4: si_fmt = NV03_SIFM_COLOR_FORMAT_A8R8G8B8; break;
419   case 2: si_fmt = NV03_SIFM_COLOR_FORMAT_R5G6B5; break;
420   default:
421      si_fmt = NV03_SIFM_COLOR_FORMAT_AY8;
422      break;
423   }
424
425   if (filter == NEAREST) {
426      si_arg  = NV03_SIFM_FORMAT_ORIGIN_CENTER;
427      si_arg |= NV03_SIFM_FORMAT_FILTER_POINT_SAMPLE;
428   } else {
429      si_arg  = NV03_SIFM_FORMAT_ORIGIN_CORNER;
430      si_arg |= NV03_SIFM_FORMAT_FILTER_BILINEAR;
431   }
432
433   if (nouveau_pushbuf_space(push, 32, 6, 0) ||
434       nouveau_pushbuf_refn (push, refs, 2))
435      return;
436
437   if (dst->pitch) {
438      BEGIN_NV04(push, NV04_SF2D(DMA_IMAGE_SOURCE), 2);
439      PUSH_RELOC(push, dst->bo, 0, NOUVEAU_BO_OR, fifo->vram, fifo->gart);
440      PUSH_RELOC(push, dst->bo, 0, NOUVEAU_BO_OR, fifo->vram, fifo->gart);
441      BEGIN_NV04(push, NV04_SF2D(FORMAT), 4);
442      PUSH_DATA (push, ss_fmt);
443      PUSH_DATA (push, dst->pitch << 16 | dst->pitch);
444      PUSH_RELOC(push, dst->bo, dst->offset, NOUVEAU_BO_LOW, 0, 0);
445      PUSH_RELOC(push, dst->bo, dst->offset, NOUVEAU_BO_LOW, 0, 0);
446      BEGIN_NV04(push, NV05_SIFM(SURFACE), 1);
447      PUSH_DATA (push, nv30->screen->surf2d->handle);
448   } else {
449      BEGIN_NV04(push, NV04_SSWZ(DMA_IMAGE), 1);
450      PUSH_RELOC(push, dst->bo, 0, NOUVEAU_BO_OR, fifo->vram, fifo->gart);
451      BEGIN_NV04(push, NV04_SSWZ(FORMAT), 2);
452      PUSH_DATA (push, ss_fmt | (util_logbase2(dst->w) << 16) |
453                                (util_logbase2(dst->h) << 24));
454      PUSH_RELOC(push, dst->bo, dst->offset, NOUVEAU_BO_LOW, 0, 0);
455      BEGIN_NV04(push, NV05_SIFM(SURFACE), 1);
456      PUSH_DATA (push, nv30->screen->swzsurf->handle);
457   }
458
459   BEGIN_NV04(push, NV03_SIFM(DMA_IMAGE), 1);
460   PUSH_RELOC(push, src->bo, 0, NOUVEAU_BO_OR, fifo->vram, fifo->gart);
461   BEGIN_NV04(push, NV03_SIFM(COLOR_FORMAT), 8);
462   PUSH_DATA (push, si_fmt);
463   PUSH_DATA (push, NV03_SIFM_OPERATION_SRCCOPY);
464   PUSH_DATA (push, (           dst->y0  << 16) |            dst->x0);
465   PUSH_DATA (push, ((dst->y1 - dst->y0) << 16) | (dst->x1 - dst->x0));
466   PUSH_DATA (push, (           dst->y0  << 16) |            dst->x0);
467   PUSH_DATA (push, ((dst->y1 - dst->y0) << 16) | (dst->x1 - dst->x0));
468   PUSH_DATA (push, ((src->x1 - src->x0) << 20) / (dst->x1 - dst->x0));
469   PUSH_DATA (push, ((src->y1 - src->y0) << 20) / (dst->y1 - dst->y0));
470   BEGIN_NV04(push, NV03_SIFM(SIZE), 4);
471   PUSH_DATA (push, align(src->h, 2) << 16 | align(src->w, 2));
472   PUSH_DATA (push, src->pitch | si_arg);
473   PUSH_RELOC(push, src->bo, src->offset, NOUVEAU_BO_LOW, 0, 0);
474   PUSH_DATA (push, (src->y0 << 20) | src->x0 << 4);
475}
476
477/* The NOP+OFFSET_OUT stuff after each M2MF transfer *is* actually required
478 * to prevent some odd things from happening, easily reproducible by
479 * attempting to do conditional rendering that has a M2MF transfer done
480 * some time before it.  0x1e98 will fail with a DMA_W_PROTECTION (assuming
481 * that name is still accurate on nv4x) error.
482 */
483
484static boolean
485nv30_transfer_m2mf(XFER_ARGS)
486{
487   if (!src->pitch || !dst->pitch)
488      return FALSE;
489   if (nv30_transfer_scaled(src, dst))
490      return FALSE;
491   return TRUE;
492}
493
494static void
495nv30_transfer_rect_m2mf(XFER_ARGS)
496{
497   struct nouveau_pushbuf *push = nv30->base.pushbuf;
498   struct nouveau_pushbuf_refn refs[] = {
499      { src->bo, src->domain | NOUVEAU_BO_RD },
500      { dst->bo, dst->domain | NOUVEAU_BO_WR },
501   };
502   struct nv04_fifo *fifo = push->channel->data;
503   unsigned src_offset = src->offset;
504   unsigned dst_offset = dst->offset;
505   unsigned w = dst->x1 - dst->x0;
506   unsigned h = dst->y1 - dst->y0;
507
508   src_offset += (src->y0 * src->pitch) + (src->x0 * src->cpp);
509   dst_offset += (dst->y0 * dst->pitch) + (dst->x0 * dst->cpp);
510
511   BEGIN_NV04(push, NV03_M2MF(DMA_BUFFER_IN), 2);
512   PUSH_DATA (push, (src->domain == NOUVEAU_BO_VRAM) ? fifo->vram : fifo->gart);
513   PUSH_DATA (push, (dst->domain == NOUVEAU_BO_VRAM) ? fifo->vram : fifo->gart);
514
515   while (h) {
516      unsigned lines = (h > 2047) ? 2047 : h;
517
518      if (nouveau_pushbuf_space(push, 13, 2, 0) ||
519          nouveau_pushbuf_refn (push, refs, 2))
520         return;
521
522      BEGIN_NV04(push, NV03_M2MF(OFFSET_IN), 8);
523      PUSH_RELOC(push, src->bo, src_offset, NOUVEAU_BO_LOW, 0, 0);
524      PUSH_RELOC(push, dst->bo, dst_offset, NOUVEAU_BO_LOW, 0, 0);
525      PUSH_DATA (push, src->pitch);
526      PUSH_DATA (push, dst->pitch);
527      PUSH_DATA (push, w * src->cpp);
528      PUSH_DATA (push, lines);
529      PUSH_DATA (push, NV03_M2MF_FORMAT_INPUT_INC_1 |
530                       NV03_M2MF_FORMAT_OUTPUT_INC_1);
531      PUSH_DATA (push, 0x00000000);
532      BEGIN_NV04(push, NV04_GRAPH(M2MF, NOP), 1);
533      PUSH_DATA (push, 0x00000000);
534      BEGIN_NV04(push, NV03_M2MF(OFFSET_OUT), 1);
535      PUSH_DATA (push, 0x00000000);
536
537      h -= lines;
538      src_offset += src->pitch * lines;
539      dst_offset += dst->pitch * lines;
540   }
541}
542
543static boolean
544nv30_transfer_cpu(XFER_ARGS)
545{
546   if (nv30_transfer_scaled(src, dst))
547      return FALSE;
548   return TRUE;
549}
550
551static char *
552linear_ptr(struct nv30_rect *rect, char *base, int x, int y, int z)
553{
554   return base + (y * rect->pitch) + (x * rect->cpp);
555}
556
557static INLINE unsigned
558swizzle2d(unsigned v, unsigned s)
559{
560   v = (v | (v << 8)) & 0x00ff00ff;
561   v = (v | (v << 4)) & 0x0f0f0f0f;
562   v = (v | (v << 2)) & 0x33333333;
563   v = (v | (v << 1)) & 0x55555555;
564   return v << s;
565}
566
567static char *
568swizzle2d_ptr(struct nv30_rect *rect, char *base, int x, int y, int z)
569{
570   unsigned k = util_logbase2(MIN2(rect->w, rect->h));
571   unsigned km = (1 << k) - 1;
572   unsigned nx = rect->w >> k;
573   unsigned tx = x >> k;
574   unsigned ty = y >> k;
575   unsigned m;
576
577   m  = swizzle2d(x & km, 0);
578   m |= swizzle2d(y & km, 1);
579   m += ((ty * nx) + tx) << k << k;
580
581   return base + (m * rect->cpp);
582}
583
584static char *
585swizzle3d_ptr(struct nv30_rect *rect, char *base, int x, int y, int z)
586{
587   unsigned w = rect->w >> 1;
588   unsigned h = rect->h >> 1;
589   unsigned d = rect->d >> 1;
590   unsigned i = 0, o;
591   unsigned v = 0;
592
593   do {
594      o = i;
595      if (w) {
596         v |= (x & 1) << i++;
597         x >>= 1;
598         w >>= 1;
599      }
600      if (h) {
601         v |= (y & 1) << i++;
602         y >>= 1;
603         h >>= 1;
604      }
605      if (d) {
606         v |= (z & 1) << i++;
607         z >>= 1;
608         d >>= 1;
609      }
610   } while(o != i);
611
612   return base + (v * rect->cpp);
613}
614
615typedef char *(*get_ptr_t)(struct nv30_rect *, char *, int, int, int);
616
617static INLINE get_ptr_t
618get_ptr(struct nv30_rect *rect)
619{
620   if (rect->pitch)
621      return linear_ptr;
622
623   if (rect->d <= 1)
624      return swizzle2d_ptr;
625
626   return swizzle3d_ptr;
627}
628
629static void
630nv30_transfer_rect_cpu(XFER_ARGS)
631{
632   get_ptr_t sp = get_ptr(src);
633   get_ptr_t dp = get_ptr(dst);
634   char *srcmap, *dstmap;
635   int x, y;
636
637   nouveau_bo_map(src->bo, NOUVEAU_BO_RD, nv30->base.client);
638   nouveau_bo_map(dst->bo, NOUVEAU_BO_WR, nv30->base.client);
639   srcmap = src->bo->map + src->offset;
640   dstmap = dst->bo->map + dst->offset;
641
642   for (y = 0; y < (dst->y1 - dst->y0); y++) {
643      for (x = 0; x < (dst->x1 - dst->x0); x++) {
644         memcpy(dp(dst, dstmap, dst->x0 + x, dst->y0 + y, dst->z),
645                sp(src, srcmap, src->x0 + x, src->y0 + y, src->z), dst->cpp);
646      }
647   }
648}
649
650void
651nv30_transfer_rect(struct nv30_context *nv30, enum nv30_transfer_filter filter,
652                   struct nv30_rect *src, struct nv30_rect *dst)
653{
654   static const struct {
655      char *name;
656      boolean (*possible)(XFER_ARGS);
657      void (*execute)(XFER_ARGS);
658   } *method, methods[] = {
659      { "m2mf", nv30_transfer_m2mf, nv30_transfer_rect_m2mf },
660      { "sifm", nv30_transfer_sifm, nv30_transfer_rect_sifm },
661      { "blit", nv30_transfer_blit, nv30_transfer_rect_blit },
662      { "rect", nv30_transfer_cpu, nv30_transfer_rect_cpu },
663      {}
664   };
665
666   method = methods - 1;
667   while ((++method)->possible) {
668      if (method->possible(nv30, filter, src, dst)) {
669         method->execute(nv30, filter, src, dst);
670         return;
671      }
672   }
673
674   assert(0);
675}
676
677void
678nv30_transfer_push_data(struct nouveau_context *nv,
679                        struct nouveau_bo *bo, unsigned offset, unsigned domain,
680                        unsigned size, void *data)
681{
682   /* use ifc, or scratch + copy_data? */
683   fprintf(stderr, "nv30: push_data not implemented\n");
684}
685
686void
687nv30_transfer_copy_data(struct nouveau_context *nv,
688                        struct nouveau_bo *dst, unsigned d_off, unsigned d_dom,
689                        struct nouveau_bo *src, unsigned s_off, unsigned s_dom,
690                        unsigned size)
691{
692   struct nv04_fifo *fifo = nv->screen->channel->data;
693   struct nouveau_pushbuf_refn refs[] = {
694      { src, s_dom | NOUVEAU_BO_RD },
695      { dst, d_dom | NOUVEAU_BO_WR },
696   };
697   struct nouveau_pushbuf *push = nv->pushbuf;
698   unsigned pages, lines;
699
700   pages = size >> 12;
701   size -= (pages << 12);
702
703   BEGIN_NV04(push, NV03_M2MF(DMA_BUFFER_IN), 2);
704   PUSH_DATA (push, (s_dom == NOUVEAU_BO_VRAM) ? fifo->vram : fifo->gart);
705   PUSH_DATA (push, (d_dom == NOUVEAU_BO_VRAM) ? fifo->vram : fifo->gart);
706
707   while (pages) {
708      lines  = (pages > 2047) ? 2047 : pages;
709      pages -= lines;
710
711      if (nouveau_pushbuf_space(push, 13, 2, 0) ||
712          nouveau_pushbuf_refn (push, refs, 2))
713         return;
714
715      BEGIN_NV04(push, NV03_M2MF(OFFSET_IN), 8);
716      PUSH_RELOC(push, src, s_off, NOUVEAU_BO_LOW, 0, 0);
717      PUSH_RELOC(push, dst, d_off, NOUVEAU_BO_LOW, 0, 0);
718      PUSH_DATA (push, 4096);
719      PUSH_DATA (push, 4096);
720      PUSH_DATA (push, 4096);
721      PUSH_DATA (push, lines);
722      PUSH_DATA (push, NV03_M2MF_FORMAT_INPUT_INC_1 |
723                       NV03_M2MF_FORMAT_OUTPUT_INC_1);
724      PUSH_DATA (push, 0x00000000);
725      BEGIN_NV04(push, NV04_GRAPH(M2MF, NOP), 1);
726      PUSH_DATA (push, 0x00000000);
727      BEGIN_NV04(push, NV03_M2MF(OFFSET_OUT), 1);
728      PUSH_DATA (push, 0x00000000);
729
730      s_off += (lines << 12);
731      d_off += (lines << 12);
732   }
733
734   if (size) {
735      if (nouveau_pushbuf_space(push, 13, 2, 0) ||
736          nouveau_pushbuf_refn (push, refs, 2))
737         return;
738
739      BEGIN_NV04(push, NV03_M2MF(OFFSET_IN), 8);
740      PUSH_RELOC(push, src, s_off, NOUVEAU_BO_LOW, 0, 0);
741      PUSH_RELOC(push, dst, d_off, NOUVEAU_BO_LOW, 0, 0);
742      PUSH_DATA (push, size);
743      PUSH_DATA (push, size);
744      PUSH_DATA (push, size);
745      PUSH_DATA (push, 1);
746      PUSH_DATA (push, NV03_M2MF_FORMAT_INPUT_INC_1 |
747                       NV03_M2MF_FORMAT_OUTPUT_INC_1);
748      PUSH_DATA (push, 0x00000000);
749      BEGIN_NV04(push, NV04_GRAPH(M2MF, NOP), 1);
750      PUSH_DATA (push, 0x00000000);
751      BEGIN_NV04(push, NV03_M2MF(OFFSET_OUT), 1);
752      PUSH_DATA (push, 0x00000000);
753   }
754}
755