1baa3858d3f5d128a5c8466b700098109edcad5f2repo sync; 7zCrcOpt.asm -- CRC32 calculation : optimized version
2baa3858d3f5d128a5c8466b700098109edcad5f2repo sync; 2009-12-12 : Igor Pavlov : Public domain
3baa3858d3f5d128a5c8466b700098109edcad5f2repo sync
4baa3858d3f5d128a5c8466b700098109edcad5f2repo syncinclude 7zAsm.asm
5baa3858d3f5d128a5c8466b700098109edcad5f2repo sync
6baa3858d3f5d128a5c8466b700098109edcad5f2repo syncMY_ASM_START
7baa3858d3f5d128a5c8466b700098109edcad5f2repo sync
8baa3858d3f5d128a5c8466b700098109edcad5f2repo syncrD   equ  r2
9baa3858d3f5d128a5c8466b700098109edcad5f2repo syncrN   equ  r7
10baa3858d3f5d128a5c8466b700098109edcad5f2repo sync
11baa3858d3f5d128a5c8466b700098109edcad5f2repo syncifdef x64
12baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    num_VAR     equ r8
13baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    table_VAR   equ r9
14baa3858d3f5d128a5c8466b700098109edcad5f2repo syncelse
15baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    data_size   equ (REG_SIZE * 5)
16baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    crc_table   equ (REG_SIZE + data_size)
17baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    num_VAR     equ [r4 + data_size]
18baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    table_VAR   equ [r4 + crc_table]
19baa3858d3f5d128a5c8466b700098109edcad5f2repo syncendif
20baa3858d3f5d128a5c8466b700098109edcad5f2repo sync
21baa3858d3f5d128a5c8466b700098109edcad5f2repo syncSRCDAT  equ  rN + rD + 4 *
22baa3858d3f5d128a5c8466b700098109edcad5f2repo sync
23baa3858d3f5d128a5c8466b700098109edcad5f2repo syncCRC macro op:req, dest:req, src:req, t:req
24baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    op      dest, DWORD PTR [r5 + src * 4 + 0400h * t]
25baa3858d3f5d128a5c8466b700098109edcad5f2repo syncendm
26baa3858d3f5d128a5c8466b700098109edcad5f2repo sync
27baa3858d3f5d128a5c8466b700098109edcad5f2repo syncCRC_XOR macro dest:req, src:req, t:req
28baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    CRC xor, dest, src, t
29baa3858d3f5d128a5c8466b700098109edcad5f2repo syncendm
30baa3858d3f5d128a5c8466b700098109edcad5f2repo sync
31baa3858d3f5d128a5c8466b700098109edcad5f2repo syncCRC_MOV macro dest:req, src:req, t:req
32baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    CRC mov, dest, src, t
33baa3858d3f5d128a5c8466b700098109edcad5f2repo syncendm
34baa3858d3f5d128a5c8466b700098109edcad5f2repo sync
35baa3858d3f5d128a5c8466b700098109edcad5f2repo syncCRC1b macro
36baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    movzx   x6, BYTE PTR [rD]
37baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    inc     rD
38baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    movzx   x3, x0_L
39baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    xor     x6, x3
40baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    shr     x0, 8
41baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    CRC     xor, x0, r6, 0
42baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    dec     rN
43baa3858d3f5d128a5c8466b700098109edcad5f2repo syncendm
44baa3858d3f5d128a5c8466b700098109edcad5f2repo sync
45baa3858d3f5d128a5c8466b700098109edcad5f2repo syncMY_PROLOG macro crc_end:req
46baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    MY_PUSH_4_REGS
47baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    
48baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    mov     x0, x1
49baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    mov     rN, num_VAR
50baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    mov     r5, table_VAR
51baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    test    rN, rN
52baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    jz      crc_end
53baa3858d3f5d128a5c8466b700098109edcad5f2repo sync  @@:
54baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    test    rD, 7
55baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    jz      @F
56baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    CRC1b
57baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    jnz     @B
58baa3858d3f5d128a5c8466b700098109edcad5f2repo sync  @@:
59baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    cmp     rN, 16
60baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    jb      crc_end
61baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    add     rN, rD
62baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    mov     num_VAR, rN
63baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    sub     rN, 8
64baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    and     rN, NOT 7
65baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    sub     rD, rN
66baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    xor     x0, [SRCDAT 0]
67baa3858d3f5d128a5c8466b700098109edcad5f2repo syncendm
68baa3858d3f5d128a5c8466b700098109edcad5f2repo sync
69baa3858d3f5d128a5c8466b700098109edcad5f2repo syncMY_EPILOG macro crc_end:req
70baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    xor     x0, [SRCDAT 0]
71baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    mov     rD, rN
72baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    mov     rN, num_VAR
73baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    sub     rN, rD
74baa3858d3f5d128a5c8466b700098109edcad5f2repo sync  crc_end:
75baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    test    rN, rN
76baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    jz      @F
77baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    CRC1b
78baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    jmp     crc_end
79baa3858d3f5d128a5c8466b700098109edcad5f2repo sync  @@:
80baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    MY_POP_4_REGS
81baa3858d3f5d128a5c8466b700098109edcad5f2repo syncendm
82baa3858d3f5d128a5c8466b700098109edcad5f2repo sync
83baa3858d3f5d128a5c8466b700098109edcad5f2repo syncMY_PROC CrcUpdateT8, 4
84baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    MY_PROLOG crc_end_8
85baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    mov     x1, [SRCDAT 1]
86baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    align 16
87baa3858d3f5d128a5c8466b700098109edcad5f2repo sync  main_loop_8:
88baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    mov     x6, [SRCDAT 2]
89baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    movzx   x3, x1_L
90baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    CRC_XOR x6, r3, 3
91baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    movzx   x3, x1_H
92baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    CRC_XOR x6, r3, 2
93baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    shr     x1, 16
94baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    movzx   x3, x1_L
95baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    movzx   x1, x1_H
96baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    CRC_XOR x6, r3, 1
97baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    movzx   x3, x0_L
98baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    CRC_XOR x6, r1, 0
99baa3858d3f5d128a5c8466b700098109edcad5f2repo sync
100baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    mov     x1, [SRCDAT 3]
101baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    CRC_XOR x6, r3, 7
102baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    movzx   x3, x0_H
103baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    shr     x0, 16
104baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    CRC_XOR x6, r3, 6
105baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    movzx   x3, x0_L
106baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    CRC_XOR x6, r3, 5
107baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    movzx   x3, x0_H
108baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    CRC_MOV x0, r3, 4
109baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    xor     x0, x6
110baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    add     rD, 8
111baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    jnz     main_loop_8
112baa3858d3f5d128a5c8466b700098109edcad5f2repo sync
113baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    MY_EPILOG crc_end_8
114baa3858d3f5d128a5c8466b700098109edcad5f2repo syncMY_ENDP
115baa3858d3f5d128a5c8466b700098109edcad5f2repo sync
116baa3858d3f5d128a5c8466b700098109edcad5f2repo syncMY_PROC CrcUpdateT4, 4
117baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    MY_PROLOG crc_end_4
118baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    align 16
119baa3858d3f5d128a5c8466b700098109edcad5f2repo sync  main_loop_4:
120baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    movzx   x1, x0_L
121baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    movzx   x3, x0_H
122baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    shr     x0, 16
123baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    movzx   x6, x0_H
124baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    and     x0, 0FFh
125baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    CRC_MOV x1, r1, 3
126baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    xor     x1, [SRCDAT 1]
127baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    CRC_XOR x1, r3, 2
128baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    CRC_XOR x1, r6, 0
129baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    CRC_XOR x1, r0, 1
130baa3858d3f5d128a5c8466b700098109edcad5f2repo sync 
131baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    movzx   x0, x1_L
132baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    movzx   x3, x1_H
133baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    shr     x1, 16
134baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    movzx   x6, x1_H
135baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    and     x1, 0FFh
136baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    CRC_MOV x0, r0, 3
137baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    xor     x0, [SRCDAT 2]
138baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    CRC_XOR x0, r3, 2
139baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    CRC_XOR x0, r6, 0
140baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    CRC_XOR x0, r1, 1
141baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    add     rD, 8
142baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    jnz     main_loop_4
143baa3858d3f5d128a5c8466b700098109edcad5f2repo sync
144baa3858d3f5d128a5c8466b700098109edcad5f2repo sync    MY_EPILOG crc_end_4
145baa3858d3f5d128a5c8466b700098109edcad5f2repo syncMY_ENDP
146baa3858d3f5d128a5c8466b700098109edcad5f2repo sync
147baa3858d3f5d128a5c8466b700098109edcad5f2repo syncend
148