19b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
29b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    .arch armv7-a
39b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    .text
49b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    .global csc_ARGB8888_to_YUV420SP_NEON
59b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    .type   csc_ARGB8888_to_YUV420SP_NEON, %function
69b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Changcsc_ARGB8888_to_YUV420SP_NEON:
79b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    .fnstart
89b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
99b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    @r0     pDstY
109b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    @r1     pDstUV
119b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    @r2     pSrcRGB
129b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    @r3     nWidth
139b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    @r4     pDstY2 = pDstY + nWidth
149b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    @r5     pSrcRGB2 = pSrcRGB + nWidthx2
159b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    @r6     temp7, nWidth16m
169b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    @r7     temp6, accumilator
179b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    @r8     temp5, nWidthTemp
189b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    @r9     temp4, Raw RGB565
199b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    @r10    temp3, r,g,b
209b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    @r11    temp2, immediate operand
219b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    @r12    temp1, nHeight
229b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    @r14    temp0, debugging pointer
239b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
249b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    .equ CACHE_LINE_SIZE, 32
259b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    .equ PRE_LOAD_OFFSET, 6
269b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
279b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    stmfd       sp!, {r4-r12,r14}       @ backup registers
289b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    ldr         r12, [sp, #40]           @ load nHeight
299b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    @ldr         r14, [sp, #44]          @ load pTest
309b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    add         r4, r0, r3             @r4: pDstY2 = pDstY + nWidth
319b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    add         r5, r2, r3, lsl #2     @r5: pSrcRGB2 = tmpSrcRGB + nWidthx4
329b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    sub         r8, r3, #16                @r8: nWidthTmp = nWidth -16
339b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
349b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    @q0: temp1, R
359b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    @q1: temp2, GB
369b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    @q2: R
379b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    @q3: G
389b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    @q4: B
399b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    @q5: temp3, output
409b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
419b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
429b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 q6, #66 @coefficient assignment
439b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 q7, #129
449b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 q8, #25
459b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 q9,  #0x8080  @ 128<<8 + 128
469b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
479b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 q10, #0x1000  @ 16<<8 + 128
489b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vorr.u16 q10, #0x0080
499b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
509b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 q11, #38 @#-38
519b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 q12, #74 @#-74
529b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 q13, #112
539b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 q14, #94 @#-94
549b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 q15, #18 @#-18
559b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
569b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
579b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
589b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
599b81eb7531233e152b01ee518e7c5df9b042ae77Jiho ChangLOOP_NHEIGHT2:
609b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    stmfd       sp!, {r12}       @ backup registers
619b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
629b81eb7531233e152b01ee518e7c5df9b042ae77Jiho ChangLOOP_NWIDTH16:
639b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    pld         [r2, #(CACHE_LINE_SIZE*PRE_LOAD_OFFSET)]
649b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang   @-------------------------------------------YUV ------------------------------------------
659b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 q14, #94 @#94
669b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 q15, #18 @#18
679b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vld4.8   {d0,d1,d2,d3}, [r2]! @loadRGB interleavely
689b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vld4.8   {d4,d5,d6,d7}, [r2]! @loadRGB interleavely
699b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
709b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
719b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 d8,d2
729b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 d9,d6
739b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 d10,d1
749b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 d11,d5
759b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 d12,d0
769b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 d13,d4
779b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
789b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vand.u16 q4,#0x00FF  @R
799b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vand.u16 q5,#0x00FF  @G
809b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vand.u16 q6,#0x00FF  @B
819b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
829b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 q8,q9   @ CalcU()
839b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmla.u16 q8,q6,q13  @112 * B[k]
849b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmls.u16 q8,q4,q11  @q0:U -(38 * R[k]) @128<<6+ 32 + u>>2
859b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmls.u16 q8,q5,q12  @-(74 * G[k])
869b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vshr.u16 q8,q8, #8  @(128<<8+ 128 + u)>>8
879b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
889b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 q7,q9      @CalcV()
899b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmla.u16 q7,q4,q13  @112 * R[k]
909b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmls.u16 q7,q5,q14  @q0:U -(94 * G[k])  @128<<6+ 32 + v>>2
919b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmls.u16 q7,q6,q15  @-(18 * B[k])
929b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vshr.u16 q7,q7, #8  @(128<<8+ 128 + v)>>8
939b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
949b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
959b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vtrn.8 q8,q7
969b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vst1.8  {q8}, [r1]!    @write UV component to yuv420_buffer+linear_ylanesiez
979b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
989b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    @-------------------------------------------Y ------------------------------------------
999b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
1009b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 q14, #66 @#66
1019b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 q15, #129 @#129
1029b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 q8, #25 @#25
1039b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
1049b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    @CalcY_Y()
1059b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
1069b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmul.u16 q7,q4,q14  @q0 = 66 *R[k]
1079b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmla.u16 q7,q5,q15  @q0 += 129 *G[k]
1089b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmla.u16 q7,q6,q8  @q0 += 25 *B[k]
1099b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
1109b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vadd.u16 q7,q7,q10
1119b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vshr.u16 q7,q7, #8
1129b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
1139b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 d8,d2
1149b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 d9,d6
1159b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 d10,d1
1169b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 d11,d5
1179b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 d12,d0
1189b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 d13,d4
1199b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
1209b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vshr.u16 q4,q4,#8  @R
1219b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vshr.u16 q5,q5,#8  @G
1229b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vshr.u16 q6,q6,#8  @B
1239b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
1249b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmul.u16 q0,q4,q14  @q0 = 66 *R[k]
1259b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmla.u16 q0,q5,q15  @q0 += 129 *G[k]
1269b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmla.u16 q0,q6,q8  @q0 += 25 *B[k]
1279b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vadd.u16 q0,q0,q10
1289b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vshr.u16 q0,q0, #8
1299b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
1309b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vtrn.8 q7,q0
1319b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vst1.8  {q7}, [r0]!@write to Y to yuv420_buffer
1329b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
1339b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
1349b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
1359b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang   @-------------------------------------------Y ------------------------------------------
1369b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
1379b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang            @---------------------------------------------Y1-------------------------------------------
1389b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
1399b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    pld         [r5, #(CACHE_LINE_SIZE*PRE_LOAD_OFFSET)]
1409b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vld4.8   {d0,d1,d2,d3}, [r5]! @loadRGB interleavely
1419b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vld4.8   {d4,d5,d6,d7}, [r5]! @loadRGB interleavely
1429b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
1439b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 d8,d2
1449b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 d9,d6
1459b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 d10,d1
1469b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 d11,d5
1479b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 d12,d0
1489b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 d13,d4
1499b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
1509b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
1519b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vand.u16 q4,#0x00FF  @R
1529b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vand.u16 q5,#0x00FF  @G
1539b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vand.u16 q6,#0x00FF  @B
1549b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
1559b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
1569b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
1579b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmul.u16 q7,q4,q14  @q0 = 66 *R[k]
1589b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmla.u16 q7,q5,q15  @q0 += 129 *G[k]
1599b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmla.u16 q7,q6,q8  @q0 += 25 *B[k]
1609b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vadd.u16 q7,q7,q10
1619b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vshr.u16 q7,q7, #8
1629b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
1639b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 d8,d2
1649b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 d9,d6
1659b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 d10,d1
1669b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 d11,d5
1679b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 d12,d0
1689b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmov.u16 d13,d4
1699b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
1709b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vshr.u16 q4,q4,#8  @R
1719b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vshr.u16 q5,q5,#8  @G
1729b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vshr.u16 q6,q6,#8  @B
1739b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
1749b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmul.u16 q0,q4,q14  @q0 = 66 *R[k]
1759b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmla.u16 q0,q5,q15  @q0 += 129 *G[k]
1769b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vmla.u16 q0,q6,q8  @q0 += 25 *B[k]
1779b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vadd.u16 q0,q0,q10
1789b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vshr.u16 q0,q0, #8
1799b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
1809b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vtrn.8 q7,q0
1819b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    vst1.8  {q7}, [r4]!@write to Y to yuv420_buffer
1829b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
1839b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    subs r8,r8,#16                       @nWidth16--
1849b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    BPL LOOP_NWIDTH16                @if nWidth16>0
1859b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    @-----------------------------------unaligned ---------------------------------------
1869b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
1879b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    adds r8,r8,#16 @ + 16 - 2
1889b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    BEQ NO_UNALIGNED  @in case that nWidht is multiple of 16
1899b81eb7531233e152b01ee518e7c5df9b042ae77Jiho ChangLOOP_NWIDTH2:
1909b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    @----------------------------------pDstRGB1--Y------------------------------------------
1919b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    @stmfd sp!, {r14} @backup r14
1929b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
1939b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
1949b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    ldr r9,  [r2], #4 @loadRGB  int
1959b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    ldr r12,  [r2], #4 @loadRGB  int
1969b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
1979b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mov r10, r9,lsr #16    @copy to r10
1989b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mov r14, r12    @copy to r10
1999b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
2009b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    ldr r6, =0x000000FF
2019b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    and r10, r10, r6 @R: (rgbIn[k] & 0xF800) >> 10;
2029b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    ldr r6, =0x00FF0000
2039b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    and r14, r14, r6 @R: (rgbIn[k] & 0xF800) >> 10;
2049b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    add r10,r10,r14
2059b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
2069b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mov r11, #66 @accumilator += R*66
2079b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mul r7, r10, r11
2089b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
2099b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mov r10, r9,lsr #8    @copy to r10
2109b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mov r14, r12,lsl #8    @copy to r10
2119b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
2129b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    ldr r6, =0x000000FF
2139b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    and r10, r10, r6 @G:
2149b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    ldr r6, =0x00FF0000
2159b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    and r14, r14, r6 @G:
2169b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    add r10,r10,r14
2179b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
2189b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mov r11, #129 @accumilator += G *129
2199b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mla r7, r10, r11, r7
2209b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
2219b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mov r10, r9    @copy to r10
2229b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mov r14, r12,lsl #16    @copy to r10
2239b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
2249b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    ldr r6, =0x000000FF
2259b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    and r10, r10, r6 @B
2269b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    ldr r6, =0x00FF0000
2279b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    and r14, r14, r6 @B
2289b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    add r10,r10,r14
2299b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
2309b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mov r11, #25 @accumilator 1 -= B *25
2319b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mla r7, r10, r11, r7
2329b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
2339b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    ldr r6, =0x10801080
2349b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    add  r7, r6
2359b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
2369b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    lsr r7, #8
2379b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    strb r7, [r0],#1
2389b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    lsr r7,#16
2399b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    strb r7, [r0],#1
2409b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    @ldmfd sp!, {r14} @load r14
2419b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
2429b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
2439b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    @----------------------------------pDstRGB2--UV------------------------------------------
2449b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
2459b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mov r10, r9    @copy to r10
2469b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    ldr  r7,=0x00008080
2479b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mov  r12,r7
2489b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
2499b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    ldr r6, =0x000000FF
2509b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    and r10, r10, r6 @B:
2519b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
2529b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mov r11, #112 @accumilator += B*112
2539b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mla r7, r10, r11, r7
2549b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
2559b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
2569b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mov r11, #18 @accumilator -= B*18
2579b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mul r11, r10, r11
2589b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    sub r12, r12, r11
2599b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
2609b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
2619b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
2629b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
2639b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mov r10, r9, lsr #16    @copy to r10
2649b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    ldr r6, =0x000000FF
2659b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    and r10, r10, r6 @R: (rgbIn[k] & 0xF800) >> 10;
2669b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
2679b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mov r11, #38 @accumilator -= R *38
2689b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mul r11, r10, r11
2699b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    sub r7, r7, r11
2709b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
2719b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mov r11, #112 @accumilator  = R *112
2729b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mla r12, r10, r11, r12
2739b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
2749b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mov r10, r9,lsr #8    @copy to r10
2759b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    ldr r6, =0x000000FF
2769b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    and r10, r10, r6  @G: (rgbIn[k] & 0x07E0) >> 5;
2779b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
2789b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mov r11, #74 @accumilator -= G*74
2799b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mul r11, r10, r11
2809b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    sub r7, r7, r11
2819b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
2829b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mov r11, #94 @accumilator -= G*94
2839b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mul r11, r10, r11
2849b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    sub r12, r12, r11
2859b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
2869b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    lsr r7, #8 @ >>8
2879b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    strb r7, [r1],#1
2889b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    lsr r12, #8 @ >>8
2899b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    strb r12, [r1],#1
2909b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
2919b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    @----------------------------------pDstRGB2--Y------------------------------------------
2929b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    @stmfd sp!, {r14} @backup r14
2939b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
2949b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
2959b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    ldr r9,  [r5], #4 @loadRGB  int
2969b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    ldr r12,  [r5], #4 @loadRGB  int
2979b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
2989b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mov r10, r9,lsr #16    @copy to r10
2999b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mov r14, r12    @copy to r10
3009b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
3019b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    ldr r6, =0x000000FF
3029b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    and r10, r10, r6 @R: (rgbIn[k] & 0xF800) >> 10;
3039b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    ldr r6, =0x00FF0000
3049b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    and r14, r14, r6 @R: (rgbIn[k] & 0xF800) >> 10;
3059b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    add r10,r10,r14
3069b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
3079b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mov r11, #66 @accumilator += R*66
3089b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mul r7, r10, r11
3099b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
3109b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mov r10, r9,lsr #8    @copy to r10
3119b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mov r14, r12,lsl #8    @copy to r10
3129b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
3139b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    ldr r6, =0x000000FF
3149b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    and r10, r10, r6 @G:
3159b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    ldr r6, =0x00FF0000
3169b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    and r14, r14, r6 @G:
3179b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    add r10,r10,r14
3189b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
3199b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mov r11, #129 @accumilator += G *129
3209b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mla r7, r10, r11, r7
3219b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
3229b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mov r10, r9    @copy to r10
3239b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mov r14, r12,lsl #16    @copy to r10
3249b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
3259b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    ldr r6, =0x000000FF
3269b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    and r10, r10, r6 @B
3279b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    ldr r6, =0x00FF0000
3289b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    and r14, r14, r6 @B
3299b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    add r10,r10,r14
3309b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
3319b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
3329b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
3339b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
3349b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mov r11, #25 @accumilator 1 -= B *25
3359b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    mla r7, r10, r11, r7
3369b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
3379b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    ldr r6, =0x10801080
3389b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    add  r7, r6
3399b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    lsr r7, #8
3409b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
3419b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    strb r7, [r4],#1
3429b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    lsr r7,#16
3439b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    strb r7, [r4],#1
3449b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    @ldmfd sp!, {r14} @load r14
3459b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
3469b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
3479b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    subs r8,r8,#2                      @ nWidth2 -= 2
3489b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    BGT LOOP_NWIDTH2                @ if nWidth2>0
3499b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
3509b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
3519b81eb7531233e152b01ee518e7c5df9b042ae77Jiho ChangNO_UNALIGNED: @in case that nWidht is multiple of 16
3529b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
3539b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    @-----------------------------------------------------------------------------
3549b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    sub         r8, r3, #16                @r8: nWidthTmp = nWidth -16
3559b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    add r0, r0,  r3   @pDstY +  nwidth
3569b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    add r2, r2, r3, lsl #2    @pSrcRGB +  nwidthx4
3579b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    add r4, r4,  r3   @pDstY2 +  nwidth
3589b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    add r5, r5, r3, lsl #2   @pSrcRGB2 +  nwidthx4
3599b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
3609b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    ldmfd sp!, {r12}
3619b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    subs r12,r12,#2                       @nHeight -=2
3629b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    BGT LOOP_NHEIGHT2                @if nHeight2>0
3639b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang
3649b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    ldmfd       sp!, {r4-r12,pc}       @ backup registers
3659b81eb7531233e152b01ee518e7c5df9b042ae77Jiho Chang    .fnend
366