7zCrcOpt.asm revision baa3858d3f5d128a5c8466b700098109edcad5f2
15f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles); 7zCrcOpt.asm -- CRC32 calculation : optimized version
25f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles); 2009-12-12 : Igor Pavlov : Public domain
35f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
45f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)include 7zAsm.asm
55f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
65f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)MY_ASM_START
75f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
85f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)rD   equ  r2
95f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)rN   equ  r7
105f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
115f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)ifdef x64
125f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    num_VAR     equ r8
135f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    table_VAR   equ r9
145f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)else
155f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    data_size   equ (REG_SIZE * 5)
165f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    crc_table   equ (REG_SIZE + data_size)
175f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    num_VAR     equ [r4 + data_size]
185f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    table_VAR   equ [r4 + crc_table]
195f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)endif
205f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
215f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)SRCDAT  equ  rN + rD + 4 *
225f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
235f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)CRC macro op:req, dest:req, src:req, t:req
245f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    op      dest, DWORD PTR [r5 + src * 4 + 0400h * t]
255f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)endm
265f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
275f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)CRC_XOR macro dest:req, src:req, t:req
285f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    CRC xor, dest, src, t
295f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)endm
305f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
315f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)CRC_MOV macro dest:req, src:req, t:req
325f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    CRC mov, dest, src, t
335f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)endm
345f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
355f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)CRC1b macro
365f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    movzx   x6, BYTE PTR [rD]
375f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    inc     rD
385f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    movzx   x3, x0_L
395f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    xor     x6, x3
405f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    shr     x0, 8
415f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    CRC     xor, x0, r6, 0
425f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    dec     rN
435f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)endm
445f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
455f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)MY_PROLOG macro crc_end:req
465f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    MY_PUSH_4_REGS
475f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    
485f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    mov     x0, x1
495f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    mov     rN, num_VAR
505f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    mov     r5, table_VAR
515f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    test    rN, rN
525f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    jz      crc_end
535f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  @@:
545f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    test    rD, 7
555f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    jz      @F
565f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    CRC1b
575f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    jnz     @B
585f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  @@:
595f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    cmp     rN, 16
605f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    jb      crc_end
615f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    add     rN, rD
625f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    mov     num_VAR, rN
635f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    sub     rN, 8
645f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    and     rN, NOT 7
655f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    sub     rD, rN
665f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    xor     x0, [SRCDAT 0]
675f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)endm
685f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
695f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)MY_EPILOG macro crc_end:req
705f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    xor     x0, [SRCDAT 0]
715f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    mov     rD, rN
725f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    mov     rN, num_VAR
735f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    sub     rN, rD
745f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  crc_end:
755f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    test    rN, rN
765f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    jz      @F
775f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    CRC1b
785f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    jmp     crc_end
795f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  @@:
805f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    MY_POP_4_REGS
815f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)endm
825f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
835f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)MY_PROC CrcUpdateT8, 4
845f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    MY_PROLOG crc_end_8
855f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    mov     x1, [SRCDAT 1]
865f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    align 16
875f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  main_loop_8:
885f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    mov     x6, [SRCDAT 2]
895f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    movzx   x3, x1_L
905f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    CRC_XOR x6, r3, 3
915f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    movzx   x3, x1_H
925f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    CRC_XOR x6, r3, 2
935f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    shr     x1, 16
945f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    movzx   x3, x1_L
955f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    movzx   x1, x1_H
965f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    CRC_XOR x6, r3, 1
975f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    movzx   x3, x0_L
985f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    CRC_XOR x6, r1, 0
995f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
1005f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    mov     x1, [SRCDAT 3]
1015f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    CRC_XOR x6, r3, 7
1025f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    movzx   x3, x0_H
1035f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    shr     x0, 16
1045f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    CRC_XOR x6, r3, 6
1055f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    movzx   x3, x0_L
1065f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    CRC_XOR x6, r3, 5
1075f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    movzx   x3, x0_H
1085f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    CRC_MOV x0, r3, 4
1095f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    xor     x0, x6
1105f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    add     rD, 8
1115f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    jnz     main_loop_8
1125f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
1135f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    MY_EPILOG crc_end_8
1145f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)MY_ENDP
1155f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
1165f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)MY_PROC CrcUpdateT4, 4
1175f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    MY_PROLOG crc_end_4
1185f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    align 16
1195f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  main_loop_4:
1205f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    movzx   x1, x0_L
1215f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    movzx   x3, x0_H
1225f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    shr     x0, 16
1235f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    movzx   x6, x0_H
1245f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    and     x0, 0FFh
1255f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    CRC_MOV x1, r1, 3
1265f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    xor     x1, [SRCDAT 1]
1275f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    CRC_XOR x1, r3, 2
1285f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    CRC_XOR x1, r6, 0
1295f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    CRC_XOR x1, r0, 1
1305f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) 
1315f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    movzx   x0, x1_L
1325f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    movzx   x3, x1_H
1335f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    shr     x1, 16
1345f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    movzx   x6, x1_H
1355f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    and     x1, 0FFh
1365f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    CRC_MOV x0, r0, 3
1375f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    xor     x0, [SRCDAT 2]
1385f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    CRC_XOR x0, r3, 2
1395f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    CRC_XOR x0, r6, 0
1405f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    CRC_XOR x0, r1, 1
1415f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    add     rD, 8
1425f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    jnz     main_loop_4
1435f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
1445f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    MY_EPILOG crc_end_4
1455f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)MY_ENDP
1465f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
1475f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)end
1485f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)