15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); Copyright (c) 2011 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)%include "media/base/simd/media_export.asm"
62a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)%include "third_party/x86inc/x86inc.asm"
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles);
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); This file uses MMX instructions.
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles);
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  SECTION_TEXT
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CPU       MMX
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles);void LinearScaleYUVToRGB32Row_MMX_X64(const uint8* y_buf,
152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles);                                      const uint8* u_buf,
162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles);                                      const uint8* v_buf,
172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles);                                      uint8* rgb_buf,
182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles);                                      ptrdiff_t width,
192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles);                                      ptrdiff_t source_dx);
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)%define SYMBOL LinearScaleYUVToRGB32Row_MMX_X64
2190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)  EXPORT    SYMBOL
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  align     function_align
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)mangle(SYMBOL):
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  %assign   stack_offset 0
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  extern    mangle(kCoefficientsRgbY)
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); Parameters are in the following order:
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); 1. Y plane
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); 2. U plane
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); 3. V plane
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); 4. ARGB frame
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); 5. Width
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles); 6. Source dx
35010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles); 7. Conversion lookup table
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
37010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)PROLOGUE  7, 7, 3, Y, U, V, ARGB, WIDTH, SOURCE_DX, R1
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)%define     TABLEq     r10
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)%define     Xq         r11
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)%define     INDEXq     r12
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)%define     COMPRd     r13d
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)%define     COMPRq     r13
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)%define     FRACTIONq  r14
45010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)%define     COMPL      R1
46010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)%define     COMPLq     R1q
47010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)%define     COMPLd     R1d
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  PUSH      TABLEq
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  PUSH      Xq
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  PUSH      INDEXq
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  PUSH      COMPRq
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  PUSH      FRACTIONq
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)%macro EPILOGUE 0
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  POP       FRACTIONq
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  POP       COMPRq
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  POP       INDEXq
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  POP       Xq
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  POP       TABLEq
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)%endmacro
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
63010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  mov       TABLEq, R1q
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  imul      WIDTHq, SOURCE_DXq           ; source_width = width * source_dx
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  xor       Xq, Xq                       ; x = 0
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  cmp       SOURCE_DXq, 0x20000
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  jl        .lscaleend
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  mov       Xq, 0x8000                   ; x = 0.5 for 1/2 or less
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  jmp       .lscaleend
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles).lscaleloop:
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ; Interpolate U
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  mov       INDEXq, Xq
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sar       INDEXq, 0x11
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  movzx     COMPLd, BYTE [Uq + INDEXq]
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  movzx     COMPRd, BYTE [Uq + INDEXq + 1]
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  mov       FRACTIONq, Xq
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  and       FRACTIONq, 0x1fffe
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  imul      COMPRq, FRACTIONq
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  xor       FRACTIONq, 0x1fffe
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  imul      COMPLq, FRACTIONq
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  add       COMPLq, COMPRq
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  shr       COMPLq, 17
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  movq      mm0, [TABLEq + 2048 + 8 * COMPLq]
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ; Interpolate V
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  movzx     COMPLd, BYTE [Vq + INDEXq]
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  movzx     COMPRd, BYTE [Vq + INDEXq + 1]
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ; Trick here to imul COMPL first then COMPR.
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ; Saves two instruction. :)
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  imul      COMPLq, FRACTIONq
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  xor       FRACTIONq, 0x1fffe
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  imul      COMPRq, FRACTIONq
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  add       COMPLq, COMPRq
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  shr       COMPLq, 17
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  paddsw    mm0, [TABLEq + 4096 + 8 * COMPLq]
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ; Interpolate first Y1.
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  lea       INDEXq, [Xq + SOURCE_DXq]   ; INDEXq now points to next pixel.
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                        ; Xq points to current pixel.
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  mov       FRACTIONq, Xq
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sar       Xq, 0x10
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  movzx     COMPLd, BYTE [Yq + Xq]
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  movzx     COMPRd, BYTE [Yq + Xq + 1]
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  and       FRACTIONq, 0xffff
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  imul      COMPRq, FRACTIONq
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  xor       FRACTIONq, 0xffff
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  imul      COMPLq, FRACTIONq
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  add       COMPLq, COMPRq
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  shr       COMPLq, 16
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  movq      mm1, [TABLEq + 8 * COMPLq]
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ; Interpolate Y2 if available.
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  cmp       INDEXq, WIDTHq
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  jge       .lscalelastpixel
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  lea       Xq, [INDEXq + SOURCE_DXq]    ; Xq points to next pixel.
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                         ; INDEXq points to current pixel.
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  mov       FRACTIONq, INDEXq
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sar       INDEXq, 0x10
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  movzx     COMPLd, BYTE [Yq + INDEXq]
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  movzx     COMPRd, BYTE [Yq + INDEXq + 1]
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  and       FRACTIONq, 0xffff
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  imul      COMPRq, FRACTIONq
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  xor       FRACTIONq, 0xffff
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  imul      COMPLq, FRACTIONq
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  add       COMPLq, COMPRq
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  shr       COMPLq, 16
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  movq      mm2, [TABLEq + 8 * COMPLq]
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  paddsw    mm1, mm0
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  paddsw    mm2, mm0
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  psraw     mm1, 0x6
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  psraw     mm2, 0x6
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  packuswb  mm1, mm2
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  movntq    [ARGBq], mm1
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  add       ARGBq, 0x8
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles).lscaleend:
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  cmp       Xq, WIDTHq
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  jl        .lscaleloop
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  jmp       .epilogue
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles).lscalelastpixel:
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  paddsw    mm1, mm0
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  psraw     mm1, 6
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  packuswb  mm1, mm1
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  movd      [ARGBq], mm1
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles).epilogue
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  EPILOGUE
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  RET
154