1/* 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include <stdlib.h> 12 13#include "./vpx_dsp_rtcd.h" 14#include "vpx/vpx_integer.h" 15#include "vpx_dsp/mips/common_dspr2.h" 16#include "vpx_dsp/mips/loopfilter_filters_dspr2.h" 17#include "vpx_dsp/mips/loopfilter_macros_dspr2.h" 18#include "vpx_dsp/mips/loopfilter_masks_dspr2.h" 19#include "vpx_mem/vpx_mem.h" 20 21#if HAVE_DSPR2 22void vpx_lpf_horizontal_16_dspr2(unsigned char *s, 23 int pitch, 24 const uint8_t *blimit, 25 const uint8_t *limit, 26 const uint8_t *thresh, 27 int count) { 28 uint32_t mask; 29 uint32_t hev, flat, flat2; 30 uint8_t i; 31 uint8_t *sp7, *sp6, *sp5, *sp4, *sp3, *sp2, *sp1, *sp0; 32 uint8_t *sq0, *sq1, *sq2, *sq3, *sq4, *sq5, *sq6, *sq7; 33 uint32_t thresh_vec, flimit_vec, limit_vec; 34 uint32_t uflimit, ulimit, uthresh; 35 uint32_t p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7; 36 uint32_t p1_f0, p0_f0, q0_f0, q1_f0; 37 uint32_t p7_l, p6_l, p5_l, p4_l, p3_l, p2_l, p1_l, p0_l; 38 uint32_t q0_l, q1_l, q2_l, q3_l, q4_l, q5_l, q6_l, q7_l; 39 uint32_t p7_r, p6_r, p5_r, p4_r, p3_r, p2_r, p1_r, p0_r; 40 uint32_t q0_r, q1_r, q2_r, q3_r, q4_r, q5_r, q6_r, q7_r; 41 uint32_t p2_l_f1, p1_l_f1, p0_l_f1, p2_r_f1, p1_r_f1, p0_r_f1; 42 uint32_t q0_l_f1, q1_l_f1, q2_l_f1, q0_r_f1, q1_r_f1, q2_r_f1; 43 44 uflimit = *blimit; 45 ulimit = *limit; 46 uthresh = *thresh; 47 48 /* create quad-byte */ 49 __asm__ __volatile__ ( 50 "replv.qb %[thresh_vec], %[uthresh] \n\t" 51 "replv.qb %[flimit_vec], %[uflimit] \n\t" 52 "replv.qb %[limit_vec], %[ulimit] \n\t" 53 54 : [thresh_vec] "=&r" (thresh_vec), [flimit_vec] "=&r" (flimit_vec), 55 [limit_vec] "=r" (limit_vec) 56 : [uthresh] "r" (uthresh), [uflimit] "r" (uflimit), [ulimit] "r" (ulimit) 57 ); 58 59 /* prefetch data for store */ 60 prefetch_store(s); 61 62 for (i = 0; i < (2 * count); i++) { 63 sp7 = s - (pitch << 3); 64 sp6 = sp7 + pitch; 65 sp5 = sp6 + pitch; 66 sp4 = sp5 + pitch; 67 sp3 = sp4 + pitch; 68 sp2 = sp3 + pitch; 69 sp1 = sp2 + pitch; 70 sp0 = sp1 + pitch; 71 sq0 = s; 72 sq1 = s + pitch; 73 sq2 = sq1 + pitch; 74 sq3 = sq2 + pitch; 75 sq4 = sq3 + pitch; 76 sq5 = sq4 + pitch; 77 sq6 = sq5 + pitch; 78 sq7 = sq6 + pitch; 79 80 __asm__ __volatile__ ( 81 "lw %[p7], (%[sp7]) \n\t" 82 "lw %[p6], (%[sp6]) \n\t" 83 "lw %[p5], (%[sp5]) \n\t" 84 "lw %[p4], (%[sp4]) \n\t" 85 "lw %[p3], (%[sp3]) \n\t" 86 "lw %[p2], (%[sp2]) \n\t" 87 "lw %[p1], (%[sp1]) \n\t" 88 "lw %[p0], (%[sp0]) \n\t" 89 90 : [p3] "=&r" (p3), [p2] "=&r" (p2), [p1] "=&r" (p1), [p0] "=&r" (p0), 91 [p7] "=&r" (p7), [p6] "=&r" (p6), [p5] "=&r" (p5), [p4] "=&r" (p4) 92 : [sp3] "r" (sp3), [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0), 93 [sp4] "r" (sp4), [sp5] "r" (sp5), [sp6] "r" (sp6), [sp7] "r" (sp7) 94 ); 95 96 __asm__ __volatile__ ( 97 "lw %[q0], (%[sq0]) \n\t" 98 "lw %[q1], (%[sq1]) \n\t" 99 "lw %[q2], (%[sq2]) \n\t" 100 "lw %[q3], (%[sq3]) \n\t" 101 "lw %[q4], (%[sq4]) \n\t" 102 "lw %[q5], (%[sq5]) \n\t" 103 "lw %[q6], (%[sq6]) \n\t" 104 "lw %[q7], (%[sq7]) \n\t" 105 106 : [q3] "=&r" (q3), [q2] "=&r" (q2), [q1] "=&r" (q1), [q0] "=&r" (q0), 107 [q7] "=&r" (q7), [q6] "=&r" (q6), [q5] "=&r" (q5), [q4] "=&r" (q4) 108 : [sq3] "r" (sq3), [sq2] "r" (sq2), [sq1] "r" (sq1), [sq0] "r" (sq0), 109 [sq4] "r" (sq4), [sq5] "r" (sq5), [sq6] "r" (sq6), [sq7] "r" (sq7) 110 ); 111 112 filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec, 113 p1, p0, p3, p2, q0, q1, q2, q3, 114 &hev, &mask, &flat); 115 116 flatmask5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, &flat2); 117 118 /* f0 */ 119 if (((flat2 == 0) && (flat == 0) && (mask != 0)) || 120 ((flat2 != 0) && (flat == 0) && (mask != 0))) { 121 filter1_dspr2(mask, hev, p1, p0, q0, q1, 122 &p1_f0, &p0_f0, &q0_f0, &q1_f0); 123 124 __asm__ __volatile__ ( 125 "sw %[p1_f0], (%[sp1]) \n\t" 126 "sw %[p0_f0], (%[sp0]) \n\t" 127 "sw %[q0_f0], (%[sq0]) \n\t" 128 "sw %[q1_f0], (%[sq1]) \n\t" 129 130 : 131 : [p1_f0] "r" (p1_f0), [p0_f0] "r" (p0_f0), 132 [q0_f0] "r" (q0_f0), [q1_f0] "r" (q1_f0), 133 [sp1] "r" (sp1), [sp0] "r" (sp0), 134 [sq0] "r" (sq0), [sq1] "r" (sq1) 135 ); 136 } else if ((flat2 == 0XFFFFFFFF) && (flat == 0xFFFFFFFF) && 137 (mask == 0xFFFFFFFF)) { 138 /* f2 */ 139 PACK_LEFT_0TO3() 140 PACK_LEFT_4TO7() 141 wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l, 142 &p3_l, &p2_l, &p1_l, &p0_l, 143 &q0_l, &q1_l, &q2_l, &q3_l, 144 &q4_l, &q5_l, &q6_l, &q7_l); 145 146 PACK_RIGHT_0TO3() 147 PACK_RIGHT_4TO7() 148 wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r, 149 &p3_r, &p2_r, &p1_r, &p0_r, 150 &q0_r, &q1_r, &q2_r, &q3_r, 151 &q4_r, &q5_r, &q6_r, &q7_r); 152 153 COMBINE_LEFT_RIGHT_0TO2() 154 COMBINE_LEFT_RIGHT_3TO6() 155 156 __asm__ __volatile__ ( 157 "sw %[p6], (%[sp6]) \n\t" 158 "sw %[p5], (%[sp5]) \n\t" 159 "sw %[p4], (%[sp4]) \n\t" 160 "sw %[p3], (%[sp3]) \n\t" 161 "sw %[p2], (%[sp2]) \n\t" 162 "sw %[p1], (%[sp1]) \n\t" 163 "sw %[p0], (%[sp0]) \n\t" 164 165 : 166 : [p6] "r" (p6), [p5] "r" (p5), [p4] "r" (p4), [p3] "r" (p3), 167 [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0), 168 [sp6] "r" (sp6), [sp5] "r" (sp5), [sp4] "r" (sp4), [sp3] "r" (sp3), 169 [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0) 170 ); 171 172 __asm__ __volatile__ ( 173 "sw %[q6], (%[sq6]) \n\t" 174 "sw %[q5], (%[sq5]) \n\t" 175 "sw %[q4], (%[sq4]) \n\t" 176 "sw %[q3], (%[sq3]) \n\t" 177 "sw %[q2], (%[sq2]) \n\t" 178 "sw %[q1], (%[sq1]) \n\t" 179 "sw %[q0], (%[sq0]) \n\t" 180 181 : 182 : [q6] "r" (q6), [q5] "r" (q5), [q4] "r" (q4), [q3] "r" (q3), 183 [q2] "r" (q2), [q1] "r" (q1), [q0] "r" (q0), 184 [sq6] "r" (sq6), [sq5] "r" (sq5), [sq4] "r" (sq4), [sq3] "r" (sq3), 185 [sq2] "r" (sq2), [sq1] "r" (sq1), [sq0] "r" (sq0) 186 ); 187 } else if ((flat2 == 0) && (flat == 0xFFFFFFFF) && (mask == 0xFFFFFFFF)) { 188 /* f1 */ 189 /* left 2 element operation */ 190 PACK_LEFT_0TO3() 191 mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, 192 &q0_l, &q1_l, &q2_l, &q3_l); 193 194 /* right 2 element operation */ 195 PACK_RIGHT_0TO3() 196 mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, 197 &q0_r, &q1_r, &q2_r, &q3_r); 198 199 COMBINE_LEFT_RIGHT_0TO2() 200 201 __asm__ __volatile__ ( 202 "sw %[p2], (%[sp2]) \n\t" 203 "sw %[p1], (%[sp1]) \n\t" 204 "sw %[p0], (%[sp0]) \n\t" 205 "sw %[q0], (%[sq0]) \n\t" 206 "sw %[q1], (%[sq1]) \n\t" 207 "sw %[q2], (%[sq2]) \n\t" 208 209 : 210 : [p2] "r" (p2), [p1] "r" (p1), [p0] "r" (p0), 211 [q0] "r" (q0), [q1] "r" (q1), [q2] "r" (q2), 212 [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0), 213 [sq0] "r" (sq0), [sq1] "r" (sq1), [sq2] "r" (sq2) 214 ); 215 } else if ((flat2 == 0) && (flat != 0) && (mask != 0)) { 216 /* f0+f1 */ 217 filter1_dspr2(mask, hev, p1, p0, q0, q1, 218 &p1_f0, &p0_f0, &q0_f0, &q1_f0); 219 220 /* left 2 element operation */ 221 PACK_LEFT_0TO3() 222 mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, 223 &q0_l, &q1_l, &q2_l, &q3_l); 224 225 /* right 2 element operation */ 226 PACK_RIGHT_0TO3() 227 mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, 228 &q0_r, &q1_r, &q2_r, &q3_r); 229 230 if (mask & flat & 0x000000FF) { 231 __asm__ __volatile__ ( 232 "sb %[p2_r], (%[sp2]) \n\t" 233 "sb %[p1_r], (%[sp1]) \n\t" 234 "sb %[p0_r], (%[sp0]) \n\t" 235 "sb %[q0_r], (%[sq0]) \n\t" 236 "sb %[q1_r], (%[sq1]) \n\t" 237 "sb %[q2_r], (%[sq2]) \n\t" 238 239 : 240 : [p2_r] "r" (p2_r), [p1_r] "r" (p1_r), [p0_r] "r" (p0_r), 241 [q0_r] "r" (q0_r), [q1_r] "r" (q1_r), [q2_r] "r" (q2_r), 242 [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0), 243 [sq0] "r" (sq0), [sq1] "r" (sq1), [sq2] "r" (sq2) 244 ); 245 } else if (mask & 0x000000FF) { 246 __asm__ __volatile__ ( 247 "sb %[p1_f0], (%[sp1]) \n\t" 248 "sb %[p0_f0], (%[sp0]) \n\t" 249 "sb %[q0_f0], (%[sq0]) \n\t" 250 "sb %[q1_f0], (%[sq1]) \n\t" 251 252 : 253 : [p1_f0] "r" (p1_f0), [p0_f0] "r" (p0_f0), 254 [q0_f0] "r" (q0_f0), [q1_f0] "r" (q1_f0), 255 [sp1] "r" (sp1), [sp0] "r" (sp0), 256 [sq0] "r" (sq0), [sq1] "r" (sq1) 257 ); 258 } 259 260 __asm__ __volatile__ ( 261 "srl %[p2_r], %[p2_r], 16 \n\t" 262 "srl %[p1_r], %[p1_r], 16 \n\t" 263 "srl %[p0_r], %[p0_r], 16 \n\t" 264 "srl %[q0_r], %[q0_r], 16 \n\t" 265 "srl %[q1_r], %[q1_r], 16 \n\t" 266 "srl %[q2_r], %[q2_r], 16 \n\t" 267 "srl %[p1_f0], %[p1_f0], 8 \n\t" 268 "srl %[p0_f0], %[p0_f0], 8 \n\t" 269 "srl %[q0_f0], %[q0_f0], 8 \n\t" 270 "srl %[q1_f0], %[q1_f0], 8 \n\t" 271 272 : [p2_r] "+r" (p2_r), [p1_r] "+r" (p1_r), [p0_r] "+r" (p0_r), 273 [q0_r] "+r" (q0_r), [q1_r] "+r" (q1_r), [q2_r] "+r" (q2_r), 274 [p1_f0] "+r" (p1_f0), [p0_f0] "+r" (p0_f0), 275 [q0_f0] "+r" (q0_f0), [q1_f0] "+r" (q1_f0) 276 : 277 ); 278 279 if (mask & flat & 0x0000FF00) { 280 __asm__ __volatile__ ( 281 "sb %[p2_r], +1(%[sp2]) \n\t" 282 "sb %[p1_r], +1(%[sp1]) \n\t" 283 "sb %[p0_r], +1(%[sp0]) \n\t" 284 "sb %[q0_r], +1(%[sq0]) \n\t" 285 "sb %[q1_r], +1(%[sq1]) \n\t" 286 "sb %[q2_r], +1(%[sq2]) \n\t" 287 288 : 289 : [p2_r] "r" (p2_r), [p1_r] "r" (p1_r), [p0_r] "r" (p0_r), 290 [q0_r] "r" (q0_r), [q1_r] "r" (q1_r), [q2_r] "r" (q2_r), 291 [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0), 292 [sq0] "r" (sq0), [sq1] "r" (sq1), [sq2] "r" (sq2) 293 ); 294 } else if (mask & 0x0000FF00) { 295 __asm__ __volatile__ ( 296 "sb %[p1_f0], +1(%[sp1]) \n\t" 297 "sb %[p0_f0], +1(%[sp0]) \n\t" 298 "sb %[q0_f0], +1(%[sq0]) \n\t" 299 "sb %[q1_f0], +1(%[sq1]) \n\t" 300 301 : 302 : [p1_f0] "r" (p1_f0), [p0_f0] "r" (p0_f0), 303 [q0_f0] "r" (q0_f0), [q1_f0] "r" (q1_f0), 304 [sp1] "r" (sp1), [sp0] "r" (sp0), 305 [sq0] "r" (sq0), [sq1] "r" (sq1) 306 ); 307 } 308 309 __asm__ __volatile__ ( 310 "srl %[p1_f0], %[p1_f0], 8 \n\t" 311 "srl %[p0_f0], %[p0_f0], 8 \n\t" 312 "srl %[q0_f0], %[q0_f0], 8 \n\t" 313 "srl %[q1_f0], %[q1_f0], 8 \n\t" 314 315 : [p1_f0] "+r" (p1_f0), [p0_f0] "+r" (p0_f0), 316 [q0_f0] "+r" (q0_f0), [q1_f0] "+r" (q1_f0) 317 : 318 ); 319 320 if (mask & flat & 0x00FF0000) { 321 __asm__ __volatile__ ( 322 "sb %[p2_l], +2(%[sp2]) \n\t" 323 "sb %[p1_l], +2(%[sp1]) \n\t" 324 "sb %[p0_l], +2(%[sp0]) \n\t" 325 "sb %[q0_l], +2(%[sq0]) \n\t" 326 "sb %[q1_l], +2(%[sq1]) \n\t" 327 "sb %[q2_l], +2(%[sq2]) \n\t" 328 329 : 330 : [p2_l] "r" (p2_l), [p1_l] "r" (p1_l), [p0_l] "r" (p0_l), 331 [q0_l] "r" (q0_l), [q1_l] "r" (q1_l), [q2_l] "r" (q2_l), 332 [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0), 333 [sq0] "r" (sq0), [sq1] "r" (sq1), [sq2] "r" (sq2) 334 ); 335 } else if (mask & 0x00FF0000) { 336 __asm__ __volatile__ ( 337 "sb %[p1_f0], +2(%[sp1]) \n\t" 338 "sb %[p0_f0], +2(%[sp0]) \n\t" 339 "sb %[q0_f0], +2(%[sq0]) \n\t" 340 "sb %[q1_f0], +2(%[sq1]) \n\t" 341 342 : 343 : [p1_f0] "r" (p1_f0), [p0_f0] "r" (p0_f0), 344 [q0_f0] "r" (q0_f0), [q1_f0] "r" (q1_f0), 345 [sp1] "r" (sp1), [sp0] "r" (sp0), 346 [sq0] "r" (sq0), [sq1] "r" (sq1) 347 ); 348 } 349 350 __asm__ __volatile__ ( 351 "srl %[p2_l], %[p2_l], 16 \n\t" 352 "srl %[p1_l], %[p1_l], 16 \n\t" 353 "srl %[p0_l], %[p0_l], 16 \n\t" 354 "srl %[q0_l], %[q0_l], 16 \n\t" 355 "srl %[q1_l], %[q1_l], 16 \n\t" 356 "srl %[q2_l], %[q2_l], 16 \n\t" 357 "srl %[p1_f0], %[p1_f0], 8 \n\t" 358 "srl %[p0_f0], %[p0_f0], 8 \n\t" 359 "srl %[q0_f0], %[q0_f0], 8 \n\t" 360 "srl %[q1_f0], %[q1_f0], 8 \n\t" 361 362 : [p2_l] "+r" (p2_l), [p1_l] "+r" (p1_l), [p0_l] "+r" (p0_l), 363 [q0_l] "+r" (q0_l), [q1_l] "+r" (q1_l), [q2_l] "+r" (q2_l), 364 [p1_f0] "+r" (p1_f0), [p0_f0] "+r" (p0_f0), 365 [q0_f0] "+r" (q0_f0), [q1_f0] "+r" (q1_f0) 366 : 367 ); 368 369 if (mask & flat & 0xFF000000) { 370 __asm__ __volatile__ ( 371 "sb %[p2_l], +3(%[sp2]) \n\t" 372 "sb %[p1_l], +3(%[sp1]) \n\t" 373 "sb %[p0_l], +3(%[sp0]) \n\t" 374 "sb %[q0_l], +3(%[sq0]) \n\t" 375 "sb %[q1_l], +3(%[sq1]) \n\t" 376 "sb %[q2_l], +3(%[sq2]) \n\t" 377 378 : 379 : [p2_l] "r" (p2_l), [p1_l] "r" (p1_l), [p0_l] "r" (p0_l), 380 [q0_l] "r" (q0_l), [q1_l] "r" (q1_l), [q2_l] "r" (q2_l), 381 [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0), 382 [sq0] "r" (sq0), [sq1] "r" (sq1), [sq2] "r" (sq2) 383 ); 384 } else if (mask & 0xFF000000) { 385 __asm__ __volatile__ ( 386 "sb %[p1_f0], +3(%[sp1]) \n\t" 387 "sb %[p0_f0], +3(%[sp0]) \n\t" 388 "sb %[q0_f0], +3(%[sq0]) \n\t" 389 "sb %[q1_f0], +3(%[sq1]) \n\t" 390 391 : 392 : [p1_f0] "r" (p1_f0), [p0_f0] "r" (p0_f0), 393 [q0_f0] "r" (q0_f0), [q1_f0] "r" (q1_f0), 394 [sp1] "r" (sp1), [sp0] "r" (sp0), 395 [sq0] "r" (sq0), [sq1] "r" (sq1) 396 ); 397 } 398 } else if ((flat2 != 0) && (flat != 0) && (mask != 0)) { 399 /* f0 + f1 + f2 */ 400 /* f0 function */ 401 filter1_dspr2(mask, hev, p1, p0, q0, q1, 402 &p1_f0, &p0_f0, &q0_f0, &q1_f0); 403 404 /* f1 function */ 405 /* left 2 element operation */ 406 PACK_LEFT_0TO3() 407 mbfilter1_dspr2(p3_l, p2_l, p1_l, p0_l, 408 q0_l, q1_l, q2_l, q3_l, 409 &p2_l_f1, &p1_l_f1, &p0_l_f1, 410 &q0_l_f1, &q1_l_f1, &q2_l_f1); 411 412 /* right 2 element operation */ 413 PACK_RIGHT_0TO3() 414 mbfilter1_dspr2(p3_r, p2_r, p1_r, p0_r, 415 q0_r, q1_r, q2_r, q3_r, 416 &p2_r_f1, &p1_r_f1, &p0_r_f1, 417 &q0_r_f1, &q1_r_f1, &q2_r_f1); 418 419 /* f2 function */ 420 PACK_LEFT_4TO7() 421 wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l, 422 &p3_l, &p2_l, &p1_l, &p0_l, 423 &q0_l, &q1_l, &q2_l, &q3_l, 424 &q4_l, &q5_l, &q6_l, &q7_l); 425 426 PACK_RIGHT_4TO7() 427 wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r, 428 &p3_r, &p2_r, &p1_r, &p0_r, 429 &q0_r, &q1_r, &q2_r, &q3_r, 430 &q4_r, &q5_r, &q6_r, &q7_r); 431 432 if (mask & flat & flat2 & 0x000000FF) { 433 __asm__ __volatile__ ( 434 "sb %[p6_r], (%[sp6]) \n\t" 435 "sb %[p5_r], (%[sp5]) \n\t" 436 "sb %[p4_r], (%[sp4]) \n\t" 437 "sb %[p3_r], (%[sp3]) \n\t" 438 "sb %[p2_r], (%[sp2]) \n\t" 439 "sb %[p1_r], (%[sp1]) \n\t" 440 "sb %[p0_r], (%[sp0]) \n\t" 441 442 : 443 : [p6_r] "r" (p6_r), [p5_r] "r" (p5_r), [p4_r] "r" (p4_r), 444 [p3_r] "r" (p3_r), [p2_r] "r" (p2_r), [p1_r] "r" (p1_r), 445 [sp6] "r" (sp6), [sp5] "r" (sp5), [sp4] "r" (sp4), 446 [sp3] "r" (sp3), [sp2] "r" (sp2), [sp1] "r" (sp1), 447 [p0_r] "r" (p0_r), [sp0] "r" (sp0) 448 ); 449 450 __asm__ __volatile__ ( 451 "sb %[q0_r], (%[sq0]) \n\t" 452 "sb %[q1_r], (%[sq1]) \n\t" 453 "sb %[q2_r], (%[sq2]) \n\t" 454 "sb %[q3_r], (%[sq3]) \n\t" 455 "sb %[q4_r], (%[sq4]) \n\t" 456 "sb %[q5_r], (%[sq5]) \n\t" 457 "sb %[q6_r], (%[sq6]) \n\t" 458 459 : 460 : [q0_r] "r" (q0_r), [q1_r] "r" (q1_r), [q2_r] "r" (q2_r), 461 [q3_r] "r" (q3_r), [q4_r] "r" (q4_r), [q5_r] "r" (q5_r), 462 [q6_r] "r" (q6_r), 463 [sq0] "r" (sq0), [sq1] "r" (sq1), [sq2] "r" (sq2), 464 [sq3] "r" (sq3), [sq4] "r" (sq4), [sq5] "r" (sq5), 465 [sq6] "r" (sq6) 466 ); 467 } else if (mask & flat & 0x000000FF) { 468 __asm__ __volatile__ ( 469 "sb %[p2_r_f1], (%[sp2]) \n\t" 470 "sb %[p1_r_f1], (%[sp1]) \n\t" 471 "sb %[p0_r_f1], (%[sp0]) \n\t" 472 "sb %[q0_r_f1], (%[sq0]) \n\t" 473 "sb %[q1_r_f1], (%[sq1]) \n\t" 474 "sb %[q2_r_f1], (%[sq2]) \n\t" 475 476 : 477 : [p2_r_f1] "r" (p2_r_f1), [p1_r_f1] "r" (p1_r_f1), 478 [p0_r_f1] "r" (p0_r_f1), [q0_r_f1] "r" (q0_r_f1), 479 [q1_r_f1] "r" (q1_r_f1), [q2_r_f1] "r" (q2_r_f1), 480 [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0), 481 [sq0] "r" (sq0), [sq1] "r" (sq1), [sq2] "r" (sq2) 482 ); 483 } else if (mask & 0x000000FF) { 484 __asm__ __volatile__ ( 485 "sb %[p1_f0], (%[sp1]) \n\t" 486 "sb %[p0_f0], (%[sp0]) \n\t" 487 "sb %[q0_f0], (%[sq0]) \n\t" 488 "sb %[q1_f0], (%[sq1]) \n\t" 489 490 : 491 : [p1_f0] "r" (p1_f0), [p0_f0] "r" (p0_f0), [q0_f0] "r" (q0_f0), 492 [q1_f0] "r" (q1_f0), [sp1] "r" (sp1), [sp0] "r" (sp0), 493 [sq0] "r" (sq0), [sq1] "r" (sq1) 494 ); 495 } 496 497 __asm__ __volatile__ ( 498 "srl %[p6_r], %[p6_r], 16 \n\t" 499 "srl %[p5_r], %[p5_r], 16 \n\t" 500 "srl %[p4_r], %[p4_r], 16 \n\t" 501 "srl %[p3_r], %[p3_r], 16 \n\t" 502 "srl %[p2_r], %[p2_r], 16 \n\t" 503 "srl %[p1_r], %[p1_r], 16 \n\t" 504 "srl %[p0_r], %[p0_r], 16 \n\t" 505 "srl %[q0_r], %[q0_r], 16 \n\t" 506 "srl %[q1_r], %[q1_r], 16 \n\t" 507 "srl %[q2_r], %[q2_r], 16 \n\t" 508 "srl %[q3_r], %[q3_r], 16 \n\t" 509 "srl %[q4_r], %[q4_r], 16 \n\t" 510 "srl %[q5_r], %[q5_r], 16 \n\t" 511 "srl %[q6_r], %[q6_r], 16 \n\t" 512 513 : [q0_r] "+r" (q0_r), [q1_r] "+r" (q1_r), [q2_r] "+r" (q2_r), 514 [q3_r] "+r" (q3_r), [q4_r] "+r" (q4_r), [q5_r] "+r" (q5_r), 515 [p6_r] "+r" (p6_r), [p5_r] "+r" (p5_r), [p4_r] "+r" (p4_r), 516 [p3_r] "+r" (p3_r), [p2_r] "+r" (p2_r), [p1_r] "+r" (p1_r), 517 [q6_r] "+r" (q6_r), [p0_r] "+r" (p0_r) 518 : 519 ); 520 521 __asm__ __volatile__ ( 522 "srl %[p2_r_f1], %[p2_r_f1], 16 \n\t" 523 "srl %[p1_r_f1], %[p1_r_f1], 16 \n\t" 524 "srl %[p0_r_f1], %[p0_r_f1], 16 \n\t" 525 "srl %[q0_r_f1], %[q0_r_f1], 16 \n\t" 526 "srl %[q1_r_f1], %[q1_r_f1], 16 \n\t" 527 "srl %[q2_r_f1], %[q2_r_f1], 16 \n\t" 528 "srl %[p1_f0], %[p1_f0], 8 \n\t" 529 "srl %[p0_f0], %[p0_f0], 8 \n\t" 530 "srl %[q0_f0], %[q0_f0], 8 \n\t" 531 "srl %[q1_f0], %[q1_f0], 8 \n\t" 532 533 : [p2_r_f1] "+r" (p2_r_f1), [p1_r_f1] "+r" (p1_r_f1), 534 [p0_r_f1] "+r" (p0_r_f1), [q0_r_f1] "+r" (q0_r_f1), 535 [q1_r_f1] "+r" (q1_r_f1), [q2_r_f1] "+r" (q2_r_f1), 536 [p1_f0] "+r" (p1_f0), [p0_f0] "+r" (p0_f0), 537 [q0_f0] "+r" (q0_f0), [q1_f0] "+r" (q1_f0) 538 : 539 ); 540 541 if (mask & flat & flat2 & 0x0000FF00) { 542 __asm__ __volatile__ ( 543 "sb %[p6_r], +1(%[sp6]) \n\t" 544 "sb %[p5_r], +1(%[sp5]) \n\t" 545 "sb %[p4_r], +1(%[sp4]) \n\t" 546 "sb %[p3_r], +1(%[sp3]) \n\t" 547 "sb %[p2_r], +1(%[sp2]) \n\t" 548 "sb %[p1_r], +1(%[sp1]) \n\t" 549 "sb %[p0_r], +1(%[sp0]) \n\t" 550 551 : 552 : [p6_r] "r" (p6_r), [p5_r] "r" (p5_r), [p4_r] "r" (p4_r), 553 [p3_r] "r" (p3_r), [p2_r] "r" (p2_r), [p1_r] "r" (p1_r), 554 [p0_r] "r" (p0_r), [sp6] "r" (sp6), [sp5] "r" (sp5), 555 [sp4] "r" (sp4), [sp3] "r" (sp3), 556 [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0) 557 ); 558 559 __asm__ __volatile__ ( 560 "sb %[q0_r], +1(%[sq0]) \n\t" 561 "sb %[q1_r], +1(%[sq1]) \n\t" 562 "sb %[q2_r], +1(%[sq2]) \n\t" 563 "sb %[q3_r], +1(%[sq3]) \n\t" 564 "sb %[q4_r], +1(%[sq4]) \n\t" 565 "sb %[q5_r], +1(%[sq5]) \n\t" 566 "sb %[q6_r], +1(%[sq6]) \n\t" 567 568 : 569 : [q0_r] "r" (q0_r), [q1_r] "r" (q1_r), [q2_r] "r" (q2_r), 570 [q3_r] "r" (q3_r), [q4_r] "r" (q4_r), [q5_r] "r" (q5_r), 571 [q6_r] "r" (q6_r), [sq0] "r" (sq0), [sq1] "r" (sq1), 572 [sq2] "r" (sq2), [sq3] "r" (sq3), 573 [sq4] "r" (sq4), [sq5] "r" (sq5), [sq6] "r" (sq6) 574 ); 575 } else if (mask & flat & 0x0000FF00) { 576 __asm__ __volatile__ ( 577 "sb %[p2_r_f1], +1(%[sp2]) \n\t" 578 "sb %[p1_r_f1], +1(%[sp1]) \n\t" 579 "sb %[p0_r_f1], +1(%[sp0]) \n\t" 580 "sb %[q0_r_f1], +1(%[sq0]) \n\t" 581 "sb %[q1_r_f1], +1(%[sq1]) \n\t" 582 "sb %[q2_r_f1], +1(%[sq2]) \n\t" 583 584 : 585 : [p2_r_f1] "r" (p2_r_f1), [p1_r_f1] "r" (p1_r_f1), 586 [p0_r_f1] "r" (p0_r_f1), [q0_r_f1] "r" (q0_r_f1), 587 [q1_r_f1] "r" (q1_r_f1), [q2_r_f1] "r" (q2_r_f1), 588 [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0), 589 [sq0] "r" (sq0), [sq1] "r" (sq1), [sq2] "r" (sq2) 590 ); 591 } else if (mask & 0x0000FF00) { 592 __asm__ __volatile__ ( 593 "sb %[p1_f0], +1(%[sp1]) \n\t" 594 "sb %[p0_f0], +1(%[sp0]) \n\t" 595 "sb %[q0_f0], +1(%[sq0]) \n\t" 596 "sb %[q1_f0], +1(%[sq1]) \n\t" 597 598 : 599 : [p1_f0] "r" (p1_f0), [p0_f0] "r" (p0_f0), [q0_f0] "r" (q0_f0), 600 [q1_f0] "r" (q1_f0), [sp1] "r" (sp1), [sp0] "r" (sp0), 601 [sq0] "r" (sq0), [sq1] "r" (sq1) 602 ); 603 } 604 605 __asm__ __volatile__ ( 606 "srl %[p1_f0], %[p1_f0], 8 \n\t" 607 "srl %[p0_f0], %[p0_f0], 8 \n\t" 608 "srl %[q0_f0], %[q0_f0], 8 \n\t" 609 "srl %[q1_f0], %[q1_f0], 8 \n\t" 610 611 : [p1_f0] "+r" (p1_f0), [p0_f0] "+r" (p0_f0), 612 [q0_f0] "+r" (q0_f0), [q1_f0] "+r" (q1_f0) 613 : 614 ); 615 616 if (mask & flat & flat2 & 0x00FF0000) { 617 __asm__ __volatile__ ( 618 "sb %[p6_l], +2(%[sp6]) \n\t" 619 "sb %[p5_l], +2(%[sp5]) \n\t" 620 "sb %[p4_l], +2(%[sp4]) \n\t" 621 "sb %[p3_l], +2(%[sp3]) \n\t" 622 "sb %[p2_l], +2(%[sp2]) \n\t" 623 "sb %[p1_l], +2(%[sp1]) \n\t" 624 "sb %[p0_l], +2(%[sp0]) \n\t" 625 626 : 627 : [p6_l] "r" (p6_l), [p5_l] "r" (p5_l), [p4_l] "r" (p4_l), 628 [p3_l] "r" (p3_l), [p2_l] "r" (p2_l), [p1_l] "r" (p1_l), 629 [p0_l] "r" (p0_l), [sp6] "r" (sp6), [sp5] "r" (sp5), 630 [sp4] "r" (sp4), [sp3] "r" (sp3), 631 [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0) 632 ); 633 634 __asm__ __volatile__ ( 635 "sb %[q0_l], +2(%[sq0]) \n\t" 636 "sb %[q1_l], +2(%[sq1]) \n\t" 637 "sb %[q2_l], +2(%[sq2]) \n\t" 638 "sb %[q3_l], +2(%[sq3]) \n\t" 639 "sb %[q4_l], +2(%[sq4]) \n\t" 640 "sb %[q5_l], +2(%[sq5]) \n\t" 641 "sb %[q6_l], +2(%[sq6]) \n\t" 642 643 : 644 : [q0_l] "r" (q0_l), [q1_l] "r" (q1_l), [q2_l] "r" (q2_l), 645 [q3_l] "r" (q3_l), [q4_l] "r" (q4_l), [q5_l] "r" (q5_l), 646 [q6_l] "r" (q6_l), [sq0] "r" (sq0), [sq1] "r" (sq1), 647 [sq2] "r" (sq2), [sq3] "r" (sq3), 648 [sq4] "r" (sq4), [sq5] "r" (sq5), [sq6] "r" (sq6) 649 ); 650 } else if (mask & flat & 0x00FF0000) { 651 __asm__ __volatile__ ( 652 "sb %[p2_l_f1], +2(%[sp2]) \n\t" 653 "sb %[p1_l_f1], +2(%[sp1]) \n\t" 654 "sb %[p0_l_f1], +2(%[sp0]) \n\t" 655 "sb %[q0_l_f1], +2(%[sq0]) \n\t" 656 "sb %[q1_l_f1], +2(%[sq1]) \n\t" 657 "sb %[q2_l_f1], +2(%[sq2]) \n\t" 658 659 : 660 : [p2_l_f1] "r" (p2_l_f1), [p1_l_f1] "r" (p1_l_f1), 661 [p0_l_f1] "r" (p0_l_f1), [q0_l_f1] "r" (q0_l_f1), 662 [q1_l_f1] "r" (q1_l_f1), [q2_l_f1] "r" (q2_l_f1), 663 [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0), 664 [sq0] "r" (sq0), [sq1] "r" (sq1), [sq2] "r" (sq2) 665 ); 666 } else if (mask & 0x00FF0000) { 667 __asm__ __volatile__ ( 668 "sb %[p1_f0], +2(%[sp1]) \n\t" 669 "sb %[p0_f0], +2(%[sp0]) \n\t" 670 "sb %[q0_f0], +2(%[sq0]) \n\t" 671 "sb %[q1_f0], +2(%[sq1]) \n\t" 672 673 : 674 : [p1_f0] "r" (p1_f0), [p0_f0] "r" (p0_f0), [q0_f0] "r" (q0_f0), 675 [q1_f0] "r" (q1_f0), [sp1] "r" (sp1), [sp0] "r" (sp0), 676 [sq0] "r" (sq0), [sq1] "r" (sq1) 677 ); 678 } 679 680 __asm__ __volatile__ ( 681 "srl %[p6_l], %[p6_l], 16 \n\t" 682 "srl %[p5_l], %[p5_l], 16 \n\t" 683 "srl %[p4_l], %[p4_l], 16 \n\t" 684 "srl %[p3_l], %[p3_l], 16 \n\t" 685 "srl %[p2_l], %[p2_l], 16 \n\t" 686 "srl %[p1_l], %[p1_l], 16 \n\t" 687 "srl %[p0_l], %[p0_l], 16 \n\t" 688 "srl %[q0_l], %[q0_l], 16 \n\t" 689 "srl %[q1_l], %[q1_l], 16 \n\t" 690 "srl %[q2_l], %[q2_l], 16 \n\t" 691 "srl %[q3_l], %[q3_l], 16 \n\t" 692 "srl %[q4_l], %[q4_l], 16 \n\t" 693 "srl %[q5_l], %[q5_l], 16 \n\t" 694 "srl %[q6_l], %[q6_l], 16 \n\t" 695 696 : [q0_l] "+r" (q0_l), [q1_l] "+r" (q1_l), [q2_l] "+r" (q2_l), 697 [q3_l] "+r" (q3_l), [q4_l] "+r" (q4_l), [q5_l] "+r" (q5_l), 698 [q6_l] "+r" (q6_l), [p6_l] "+r" (p6_l), [p5_l] "+r" (p5_l), 699 [p4_l] "+r" (p4_l), [p3_l] "+r" (p3_l), [p2_l] "+r" (p2_l), 700 [p1_l] "+r" (p1_l), [p0_l] "+r" (p0_l) 701 : 702 ); 703 704 __asm__ __volatile__ ( 705 "srl %[p2_l_f1], %[p2_l_f1], 16 \n\t" 706 "srl %[p1_l_f1], %[p1_l_f1], 16 \n\t" 707 "srl %[p0_l_f1], %[p0_l_f1], 16 \n\t" 708 "srl %[q0_l_f1], %[q0_l_f1], 16 \n\t" 709 "srl %[q1_l_f1], %[q1_l_f1], 16 \n\t" 710 "srl %[q2_l_f1], %[q2_l_f1], 16 \n\t" 711 "srl %[p1_f0], %[p1_f0], 8 \n\t" 712 "srl %[p0_f0], %[p0_f0], 8 \n\t" 713 "srl %[q0_f0], %[q0_f0], 8 \n\t" 714 "srl %[q1_f0], %[q1_f0], 8 \n\t" 715 716 : [p2_l_f1] "+r" (p2_l_f1), [p1_l_f1] "+r" (p1_l_f1), 717 [p0_l_f1] "+r" (p0_l_f1), [q0_l_f1] "+r" (q0_l_f1), 718 [q1_l_f1] "+r" (q1_l_f1), [q2_l_f1] "+r" (q2_l_f1), 719 [p1_f0] "+r" (p1_f0), [p0_f0] "+r" (p0_f0), 720 [q0_f0] "+r" (q0_f0), [q1_f0] "+r" (q1_f0) 721 : 722 ); 723 724 if (mask & flat & flat2 & 0xFF000000) { 725 __asm__ __volatile__ ( 726 "sb %[p6_l], +3(%[sp6]) \n\t" 727 "sb %[p5_l], +3(%[sp5]) \n\t" 728 "sb %[p4_l], +3(%[sp4]) \n\t" 729 "sb %[p3_l], +3(%[sp3]) \n\t" 730 "sb %[p2_l], +3(%[sp2]) \n\t" 731 "sb %[p1_l], +3(%[sp1]) \n\t" 732 "sb %[p0_l], +3(%[sp0]) \n\t" 733 734 : 735 : [p6_l] "r" (p6_l), [p5_l] "r" (p5_l), [p4_l] "r" (p4_l), 736 [p3_l] "r" (p3_l), [p2_l] "r" (p2_l), [p1_l] "r" (p1_l), 737 [p0_l] "r" (p0_l), [sp6] "r" (sp6), [sp5] "r" (sp5), 738 [sp4] "r" (sp4), [sp3] "r" (sp3), [sp2] "r" (sp2), 739 [sp1] "r" (sp1), [sp0] "r" (sp0) 740 ); 741 742 __asm__ __volatile__ ( 743 "sb %[q0_l], +3(%[sq0]) \n\t" 744 "sb %[q1_l], +3(%[sq1]) \n\t" 745 "sb %[q2_l], +3(%[sq2]) \n\t" 746 "sb %[q3_l], +3(%[sq3]) \n\t" 747 "sb %[q4_l], +3(%[sq4]) \n\t" 748 "sb %[q5_l], +3(%[sq5]) \n\t" 749 "sb %[q6_l], +3(%[sq6]) \n\t" 750 751 : 752 : [q0_l] "r" (q0_l), [q1_l] "r" (q1_l), 753 [q2_l] "r" (q2_l), [q3_l] "r" (q3_l), 754 [q4_l] "r" (q4_l), [q5_l] "r" (q5_l), 755 [sq0] "r" (sq0), [sq1] "r" (sq1), [sq2] "r" (sq2), 756 [sq3] "r" (sq3), [sq4] "r" (sq4), [sq5] "r" (sq5), 757 [q6_l] "r" (q6_l), [sq6] "r" (sq6) 758 ); 759 } else if (mask & flat & 0xFF000000) { 760 __asm__ __volatile__ ( 761 "sb %[p2_l_f1], +3(%[sp2]) \n\t" 762 "sb %[p1_l_f1], +3(%[sp1]) \n\t" 763 "sb %[p0_l_f1], +3(%[sp0]) \n\t" 764 "sb %[q0_l_f1], +3(%[sq0]) \n\t" 765 "sb %[q1_l_f1], +3(%[sq1]) \n\t" 766 "sb %[q2_l_f1], +3(%[sq2]) \n\t" 767 768 : 769 : [p2_l_f1] "r" (p2_l_f1), [p1_l_f1] "r" (p1_l_f1), 770 [p0_l_f1] "r" (p0_l_f1), [q0_l_f1] "r" (q0_l_f1), 771 [q1_l_f1] "r" (q1_l_f1), [q2_l_f1] "r" (q2_l_f1), 772 [sp2] "r" (sp2), [sp1] "r" (sp1), [sp0] "r" (sp0), 773 [sq0] "r" (sq0), [sq1] "r" (sq1), [sq2] "r" (sq2) 774 ); 775 } else if (mask & 0xFF000000) { 776 __asm__ __volatile__ ( 777 "sb %[p1_f0], +3(%[sp1]) \n\t" 778 "sb %[p0_f0], +3(%[sp0]) \n\t" 779 "sb %[q0_f0], +3(%[sq0]) \n\t" 780 "sb %[q1_f0], +3(%[sq1]) \n\t" 781 782 : 783 : [p1_f0] "r" (p1_f0), [p0_f0] "r" (p0_f0), 784 [q0_f0] "r" (q0_f0), [q1_f0] "r" (q1_f0), 785 [sp1] "r" (sp1), [sp0] "r" (sp0), 786 [sq0] "r" (sq0), [sq1] "r" (sq1) 787 ); 788 } 789 } 790 791 s = s + 4; 792 } 793} 794#endif // #if HAVE_DSPR2 795