188a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora/*
288a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora * Copyright (C) 2010 Maciej Cencora <m.cencora@gmail.com>
388a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora *
488a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora * All Rights Reserved.
588a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora *
688a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora * Permission is hereby granted, free of charge, to any person obtaining
788a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora * a copy of this software and associated documentation files (the
888a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora * "Software"), to deal in the Software without restriction, including
988a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora * without limitation the rights to use, copy, modify, merge, publish,
1088a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora * distribute, sublicense, and/or sell copies of the Software, and to
1188a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora * permit persons to whom the Software is furnished to do so, subject to
1288a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora * the following conditions:
1388a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora *
1488a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora * The above copyright notice and this permission notice (including the
1588a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora * next paragraph) shall be included in all copies or substantial
1688a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora * portions of the Software.
1788a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora *
1888a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
1988a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
2088a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
2188a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
2288a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
2388a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
2488a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2588a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora *
2688a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora */
2788a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora
2888a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora#include "radeon_tile.h"
2988a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora
3088a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora#include <stdint.h>
3188a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora#include <string.h>
3288a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora
338014b5f06853a5926ee772af232abdc9cd4e90c4Maciej Cencora#include "main/macros.h"
348014b5f06853a5926ee772af232abdc9cd4e90c4Maciej Cencora#include "radeon_debug.h"
3588a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora
3688a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora#define MICRO_TILE_SIZE 32
3788a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora
3888a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencorastatic void micro_tile_8_x_4_8bit(const void * const src, unsigned src_pitch,
3988a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                                  void * const dst, unsigned dst_pitch,
4088a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                                  unsigned width, unsigned height)
4188a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora{
4288a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    unsigned row; /* current source row */
4388a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    unsigned col; /* current source column */
4488a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    unsigned k; /* number of processed tiles */
4588a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    const unsigned tile_width = 8, tile_height = 4;
4688a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
4788a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora
4888a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    k = 0;
4988a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    for (row = 0; row < height; row += tile_height)
5088a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    {
5188a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora        for (col = 0; col < width; col += tile_width, ++k)
5288a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora        {
5388a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            uint8_t *src2 = (uint8_t *)src + src_pitch * row + col;
5488a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            uint8_t *dst2 = (uint8_t *)dst + row * dst_pitch +
5588a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint8_t);
5688a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            unsigned j;
5788a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora
5888a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            for (j = 0; j < MIN2(tile_height, height - row); ++j)
5988a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            {
6088a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                unsigned columns = MIN2(tile_width, width - col);
6188a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                memcpy(dst2, src2, columns * sizeof(uint8_t));
6288a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                dst2 += tile_width;
6388a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                src2 += src_pitch;
6488a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            }
6588a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora        }
6688a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    }
6788a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora}
6888a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora
6988a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencorastatic void micro_tile_4_x_4_16bit(const void * const src, unsigned src_pitch,
7088a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                                   void * const dst, unsigned dst_pitch,
7188a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                                   unsigned width, unsigned height)
7288a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora{
7388a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    unsigned row; /* current source row */
7488a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    unsigned col; /* current source column */
7588a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    unsigned k; /* number of processed tiles */
7688a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    const unsigned tile_width = 4, tile_height = 4;
7788a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
7888a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora
7988a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    k = 0;
8088a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    for (row = 0; row < height; row += tile_height)
8188a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    {
8288a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora        for (col = 0; col < width; col += tile_width, ++k)
8388a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora        {
8488a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            uint16_t *src2 = (uint16_t *)src + src_pitch * row + col;
8588a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            uint16_t *dst2 = (uint16_t *)dst + row * dst_pitch +
8688a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
8788a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            unsigned j;
8888a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora
8988a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            for (j = 0; j < MIN2(tile_height, height - row); ++j)
9088a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            {
9188a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                unsigned columns = MIN2(tile_width, width - col);
9288a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                memcpy(dst2, src2, columns * sizeof(uint16_t));
9388a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                dst2 += tile_width;
9488a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                src2 += src_pitch;
9588a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            }
9688a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora        }
9788a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    }
9888a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora}
9988a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora
10088a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencorastatic void micro_tile_8_x_2_16bit(const void * const src, unsigned src_pitch,
10188a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                                   void * const dst, unsigned dst_pitch,
10288a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                                   unsigned width, unsigned height)
10388a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora{
10488a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    unsigned row; /* current source row */
10588a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    unsigned col; /* current source column */
10688a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    unsigned k; /* number of processed tiles */
10788a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    const unsigned tile_width = 8, tile_height = 2;
10888a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
10988a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora
11088a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    k = 0;
11188a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    for (row = 0; row < height; row += tile_height)
11288a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    {
11388a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora        for (col = 0; col < width; col += tile_width, ++k)
11488a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora        {
11588a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            uint16_t *src2 = (uint16_t *)src + src_pitch * row + col;
11688a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            uint16_t *dst2 = (uint16_t *)dst + row * dst_pitch +
11788a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
11888a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            unsigned j;
11988a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora
12088a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            for (j = 0; j < MIN2(tile_height, height - row); ++j)
12188a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            {
12288a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                unsigned columns = MIN2(tile_width, width - col);
12388a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                memcpy(dst2, src2, columns * sizeof(uint16_t));
12488a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                dst2 += tile_width;
12588a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                src2 += src_pitch;
12688a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            }
12788a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora        }
12888a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    }
12988a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora}
13088a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora
13188a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencorastatic void micro_tile_4_x_2_32bit(const void * const src, unsigned src_pitch,
13288a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                                   void * const dst, unsigned dst_pitch,
13388a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                                   unsigned width, unsigned height)
13488a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora{
13588a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    unsigned row; /* current source row */
13688a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    unsigned col; /* current source column */
13788a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    unsigned k; /* number of processed tiles */
13888a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    const unsigned tile_width = 4, tile_height = 2;
13988a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
14088a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora
14188a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    k = 0;
14288a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    for (row = 0; row < height; row += tile_height)
14388a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    {
14488a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora        for (col = 0; col < width; col += tile_width, ++k)
14588a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora        {
14688a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            uint32_t *src2 = (uint32_t *)src + src_pitch * row + col;
14788a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            uint32_t *dst2 = (uint32_t *)dst + row * dst_pitch +
14888a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint32_t);
14988a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            unsigned j;
15088a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora
15188a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            for (j = 0; j < MIN2(tile_height, height - row); ++j)
15288a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            {
15388a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                unsigned columns = MIN2(tile_width, width - col);
15488a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                memcpy(dst2, src2, columns * sizeof(uint32_t));
15588a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                dst2 += tile_width;
15688a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                src2 += src_pitch;
15788a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            }
15888a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora        }
15988a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    }
16088a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora}
16188a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora
16288a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencorastatic void micro_tile_2_x_2_64bit(const void * const src, unsigned src_pitch,
16388a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                                   void * const dst, unsigned dst_pitch,
16488a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                                   unsigned width, unsigned height)
16588a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora{
16688a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    unsigned row; /* current source row */
16788a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    unsigned col; /* current source column */
16888a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    unsigned k; /* number of processed tiles */
16988a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    const unsigned tile_width = 2, tile_height = 2;
17088a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
17188a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora
17288a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    k = 0;
17388a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    for (row = 0; row < height; row += tile_height)
17488a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    {
17588a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora        for (col = 0; col < width; col += tile_width, ++k)
17688a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora        {
17788a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            uint64_t *src2 = (uint64_t *)src + src_pitch * row + col;
17888a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            uint64_t *dst2 = (uint64_t *)dst + row * dst_pitch +
17988a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint64_t);
18088a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            unsigned j;
18188a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora
18288a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            for (j = 0; j < MIN2(tile_height, height - row); ++j)
18388a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            {
18488a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                unsigned columns = MIN2(tile_width, width - col);
18588a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                memcpy(dst2, src2, columns * sizeof(uint64_t));
18688a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                dst2 += tile_width;
18788a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                src2 += src_pitch;
18888a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            }
18988a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora        }
19088a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    }
19188a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora}
19288a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora
19388a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencorastatic void micro_tile_1_x_1_128bit(const void * src, unsigned src_pitch,
19488a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                                    void * dst, unsigned dst_pitch,
19588a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                                    unsigned width, unsigned height)
19688a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora{
19788a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    unsigned i, j;
19888a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    const unsigned elem_size = 16; /* sizeof(uint128_t) */
19988a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora
20088a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    for (j = 0; j < height; ++j)
20188a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    {
20288a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora        for (i = 0; i < width; ++i)
20388a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora        {
20488a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            memcpy(dst, src, width * elem_size);
20588a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            dst += dst_pitch * elem_size;
20688a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            src += src_pitch * elem_size;
20788a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora        }
20888a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    }
20988a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora}
21088a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora
21188a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencoravoid tile_image(const void * src, unsigned src_pitch,
21288a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                void *dst, unsigned dst_pitch,
21388a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                gl_format format, unsigned width, unsigned height)
21488a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora{
21588a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    assert(src_pitch >= width);
21688a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    assert(dst_pitch >= width);
21788a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora
2188014b5f06853a5926ee772af232abdc9cd4e90c4Maciej Cencora    radeon_print(RADEON_TEXTURE, RADEON_TRACE,
2198014b5f06853a5926ee772af232abdc9cd4e90c4Maciej Cencora                 "Software tiling: src_pitch %d, dst_pitch %d, width %d, height %d, bpp %d\n",
2208014b5f06853a5926ee772af232abdc9cd4e90c4Maciej Cencora                 src_pitch, dst_pitch, width, height, _mesa_get_format_bytes(format));
2218014b5f06853a5926ee772af232abdc9cd4e90c4Maciej Cencora
22288a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    switch (_mesa_get_format_bytes(format))
22388a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    {
22488a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora        case 16:
22588a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            micro_tile_1_x_1_128bit(src, src_pitch, dst, dst_pitch, width, height);
22688a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            break;
22788a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora        case 8:
22888a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            micro_tile_2_x_2_64bit(src, src_pitch, dst, dst_pitch, width, height);
22988a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            break;
23088a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora        case 4:
23188a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            micro_tile_4_x_2_32bit(src, src_pitch, dst, dst_pitch, width, height);
23288a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            break;
23388a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora        case 2:
23488a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
23588a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            {
23688a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                micro_tile_4_x_4_16bit(src, src_pitch, dst, dst_pitch, width, height);
23788a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            }
23888a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            else
23988a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            {
24088a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora                micro_tile_8_x_2_16bit(src, src_pitch, dst, dst_pitch, width, height);
24188a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            }
24288a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            break;
24388a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora        case 1:
24488a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            micro_tile_8_x_4_8bit(src, src_pitch, dst, dst_pitch, width, height);
2455fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            break;
2465fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora        default:
2475fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            assert(0);
2485fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            break;
2495fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    }
2505fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora}
2515fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora
2525fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencorastatic void micro_untile_8_x_4_8bit(const void * const src, unsigned src_pitch,
2535fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                                    void * const dst, unsigned dst_pitch,
2545fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                                    unsigned width, unsigned height)
2555fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora{
2565fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    unsigned row; /* current destination row */
2575fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    unsigned col; /* current destination column */
2585fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    unsigned k; /* current tile number */
2595fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    const unsigned tile_width = 8, tile_height = 4;
2605fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
2615fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora
2625fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    assert(src_pitch % tile_width == 0);
2635fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora
2645fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    k = 0;
2655fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    for (row = 0; row < height; row += tile_height)
2665fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    {
2675fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora        for (col = 0; col < width; col += tile_width, ++k)
2685fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora        {
2695fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            uint8_t *src2 = (uint8_t *)src + row * src_pitch +
2705fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint8_t);
2715fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            uint8_t *dst2 = (uint8_t *)dst + dst_pitch * row + col;
2725fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            unsigned j;
2735fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora
2745fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            for (j = 0; j < MIN2(tile_height, height - row); ++j)
2755fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            {
2765fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                unsigned columns = MIN2(tile_width, width - col);
2775fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                memcpy(dst2, src2, columns * sizeof(uint8_t));
2785fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                dst2 += dst_pitch;
2795fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                src2 += tile_width;
2805fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            }
2815fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora        }
2825fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    }
2835fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora}
2845fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora
2855fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencorastatic void micro_untile_8_x_2_16bit(const void * const src, unsigned src_pitch,
2865fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                                     void * const dst, unsigned dst_pitch,
2875fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                                     unsigned width, unsigned height)
2885fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora{
2895fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    unsigned row; /* current destination row */
2905fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    unsigned col; /* current destination column */
2915fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    unsigned k; /* current tile number */
2925fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    const unsigned tile_width = 8, tile_height = 2;
2935fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
2945fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora
2955fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    assert(src_pitch % tile_width == 0);
2965fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora
2975fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    k = 0;
2985fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    for (row = 0; row < height; row += tile_height)
2995fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    {
3005fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora        for (col = 0; col < width; col += tile_width, ++k)
3015fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora        {
3025fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            uint16_t *src2 = (uint16_t *)src + row * src_pitch +
3035fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
3045fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            uint16_t *dst2 = (uint16_t *)dst + dst_pitch * row + col;
3055fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            unsigned j;
3065fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora
3075fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            for (j = 0; j < MIN2(tile_height, height - row); ++j)
3085fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            {
3095fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                unsigned columns = MIN2(tile_width, width - col);
3105fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                memcpy(dst2, src2, columns * sizeof(uint16_t));
3115fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                dst2 += dst_pitch;
3125fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                src2 += tile_width;
3135fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            }
3145fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora        }
3155fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    }
3165fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora}
3175fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora
3185fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencorastatic void micro_untile_4_x_4_16bit(const void * const src, unsigned src_pitch,
3195fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                                     void * const dst, unsigned dst_pitch,
3205fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                                     unsigned width, unsigned height)
3215fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora{
3225fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    unsigned row; /* current destination row */
3235fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    unsigned col; /* current destination column */
3245fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    unsigned k; /* current tile number */
3255fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    const unsigned tile_width = 4, tile_height = 4;
3265fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
3275fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora
3285fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    assert(src_pitch % tile_width == 0);
3295fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora
3305fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    k = 0;
3315fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    for (row = 0; row < height; row += tile_height)
3325fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    {
3335fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora        for (col = 0; col < width; col += tile_width, ++k)
3345fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora        {
3355fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            uint16_t *src2 = (uint16_t *)src + row * src_pitch +
3365fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
3375fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            uint16_t *dst2 = (uint16_t *)dst + dst_pitch * row + col;
3385fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            unsigned j;
3395fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora
3405fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            for (j = 0; j < MIN2(tile_height, height - row); ++j)
3415fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            {
3425fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                unsigned columns = MIN2(tile_width, width - col);
3435fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                memcpy(dst2, src2, columns * sizeof(uint16_t));
3445fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                dst2 += dst_pitch;
3455fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                src2 += tile_width;
3465fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            }
3475fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora        }
3485fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    }
3495fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora}
3505fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora
3515fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencorastatic void micro_untile_4_x_2_32bit(const void * const src, unsigned src_pitch,
3525fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                                     void * const dst, unsigned dst_pitch,
3535fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                                     unsigned width, unsigned height)
3545fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora{
3555fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    unsigned row; /* current destination row */
3565fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    unsigned col; /* current destination column */
3575fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    unsigned k; /* current tile number */
3585fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    const unsigned tile_width = 4, tile_height = 2;
3595fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
3605fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora
3615fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    assert(src_pitch % tile_width == 0);
3625fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora
3635fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    k = 0;
3645fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    for (row = 0; row < height; row += tile_height)
3655fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    {
3665fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora        for (col = 0; col < width; col += tile_width, ++k)
3675fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora        {
3685fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            uint32_t *src2 = (uint32_t *)src + row * src_pitch +
3695fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint32_t);
3705fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            uint32_t *dst2 = (uint32_t *)dst + dst_pitch * row + col;
3715fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            unsigned j;
3725fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora
3735fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            for (j = 0; j < MIN2(tile_height, height - row); ++j)
3745fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            {
3755fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                unsigned columns = MIN2(tile_width, width - col);
3765fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                memcpy(dst2, src2, columns * sizeof(uint32_t));
3775fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                dst2 += dst_pitch;
3785fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                src2 += tile_width;
3795fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            }
3805fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora        }
3815fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    }
3825fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora}
3835fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora
3845fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencorastatic void micro_untile_2_x_2_64bit(const void * const src, unsigned src_pitch,
3855fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                                     void * const dst, unsigned dst_pitch,
3865fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                                     unsigned width, unsigned height)
3875fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora{
3885fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    unsigned row; /* current destination row */
3895fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    unsigned col; /* current destination column */
3905fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    unsigned k; /* current tile number */
3915fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    const unsigned tile_width = 2, tile_height = 2;
3925fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
3935fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora
3945fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    assert(src_pitch % tile_width == 0);
3955fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora
3965fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    k = 0;
3975fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    for (row = 0; row < height; row += tile_height)
3985fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    {
3995fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora        for (col = 0; col < width; col += tile_width, ++k)
4005fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora        {
4015fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            uint64_t *src2 = (uint64_t *)src + row * src_pitch +
4025fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint64_t);
4035fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            uint64_t *dst2 = (uint64_t *)dst + dst_pitch * row + col;
4045fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            unsigned j;
4055fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora
4065fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            for (j = 0; j < MIN2(tile_height, height - row); ++j)
4075fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            {
4085fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                unsigned columns = MIN2(tile_width, width - col);
4095fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                memcpy(dst2, src2, columns * sizeof(uint64_t));
4105fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                dst2 += dst_pitch;
4115fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                src2 += tile_width;
4125fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            }
4135fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora        }
4145fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    }
4155fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora}
4165fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora
4175fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencorastatic void micro_untile_1_x_1_128bit(const void * src, unsigned src_pitch,
4185fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                                      void * dst, unsigned dst_pitch,
4195fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                                      unsigned width, unsigned height)
4205fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora{
4215fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    unsigned i, j;
4225fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    const unsigned elem_size = 16; /* sizeof(uint128_t) */
4235fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora
4245fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    for (j = 0; j < height; ++j)
4255fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    {
4265fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora        for (i = 0; i < width; ++i)
4275fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora        {
4285fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            memcpy(dst, src, width * elem_size);
4295fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            dst += dst_pitch * elem_size;
4305fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            src += src_pitch * elem_size;
4315fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora        }
4325fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    }
4335fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora}
4345fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora
4355fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencoravoid untile_image(const void * src, unsigned src_pitch,
4365fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                  void *dst, unsigned dst_pitch,
4375fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                  gl_format format, unsigned width, unsigned height)
4385fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora{
4395fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    assert(src_pitch >= width);
4405fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    assert(dst_pitch >= width);
4415fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora
4428014b5f06853a5926ee772af232abdc9cd4e90c4Maciej Cencora    radeon_print(RADEON_TEXTURE, RADEON_TRACE,
4438014b5f06853a5926ee772af232abdc9cd4e90c4Maciej Cencora                 "Software untiling: src_pitch %d, dst_pitch %d, width %d, height %d, bpp %d\n",
4448014b5f06853a5926ee772af232abdc9cd4e90c4Maciej Cencora                 src_pitch, dst_pitch, width, height, _mesa_get_format_bytes(format));
4458014b5f06853a5926ee772af232abdc9cd4e90c4Maciej Cencora
4465fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    switch (_mesa_get_format_bytes(format))
4475fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora    {
4485fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora        case 16:
4495fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            micro_untile_1_x_1_128bit(src, src_pitch, dst, dst_pitch, width, height);
4505fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            break;
4515fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora        case 8:
4525fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            micro_untile_2_x_2_64bit(src, src_pitch, dst, dst_pitch, width, height);
4535fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            break;
4545fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora        case 4:
4555fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            micro_untile_4_x_2_32bit(src, src_pitch, dst, dst_pitch, width, height);
4565fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            break;
4575fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora        case 2:
4585fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
4595fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            {
4605fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                micro_untile_4_x_4_16bit(src, src_pitch, dst, dst_pitch, width, height);
4615fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            }
4625fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            else
4635fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            {
4645fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora                micro_untile_8_x_2_16bit(src, src_pitch, dst, dst_pitch, width, height);
4655fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            }
4665fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            break;
4675fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora        case 1:
4685fefca5dbae9a5b00a3d624da0b5e582c00ad280Maciej Cencora            micro_untile_8_x_4_8bit(src, src_pitch, dst, dst_pitch, width, height);
46988a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            break;
47088a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora        default:
47188a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            assert(0);
47288a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora            break;
47388a99bb305186fd0eaaae9bd0dbfa4c45f14cac7Maciej Cencora    }
47465faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora}
47565faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora
47665faa27aa639e7352708a30105db3318f22f5f93Maciej Cencoravoid get_tile_size(gl_format format, unsigned *block_width, unsigned *block_height)
47765faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora{
47865faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora    switch (_mesa_get_format_bytes(format))
47965faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora    {
48065faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora        case 16:
48165faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora            *block_width = 1;
48265faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora            *block_height = 1;
48365faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora            break;
48465faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora        case 8:
48565faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora            *block_width = 2;
48665faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora            *block_height = 2;
48765faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora            break;
48865faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora        case 4:
48965faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora            *block_width = 4;
49065faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora            *block_height = 2;
49165faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora            break;
49265faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora        case 2:
49365faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora            if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
49465faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora            {
49565faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora                *block_width = 4;
49665faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora                *block_height = 4;
49765faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora            }
49865faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora            else
49965faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora            {
50065faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora                *block_width = 8;
50165faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora                *block_height = 2;
50265faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora            }
50365faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora            break;
50465faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora        case 1:
50565faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora            *block_width = 8;
50665faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora            *block_height = 4;
50765faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora            break;
50865faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora        default:
50965faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora            assert(0);
51065faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora            break;
51165faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora    }
51265faa27aa639e7352708a30105db3318f22f5f93Maciej Cencora}
513