7zCrcOpt.asm revision baa3858d3f5d128a5c8466b700098109edcad5f2
15f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles); 7zCrcOpt.asm -- CRC32 calculation : optimized version 25f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles); 2009-12-12 : Igor Pavlov : Public domain 35f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) 45f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)include 7zAsm.asm 55f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) 65f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)MY_ASM_START 75f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) 85f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)rD equ r2 95f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)rN equ r7 105f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) 115f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)ifdef x64 125f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) num_VAR equ r8 135f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) table_VAR equ r9 145f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)else 155f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) data_size equ (REG_SIZE * 5) 165f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) crc_table equ (REG_SIZE + data_size) 175f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) num_VAR equ [r4 + data_size] 185f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) table_VAR equ [r4 + crc_table] 195f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)endif 205f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) 215f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)SRCDAT equ rN + rD + 4 * 225f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) 235f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)CRC macro op:req, dest:req, src:req, t:req 245f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) op dest, DWORD PTR [r5 + src * 4 + 0400h * t] 255f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)endm 265f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) 275f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)CRC_XOR macro dest:req, src:req, t:req 285f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) CRC xor, dest, src, t 295f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)endm 305f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) 315f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)CRC_MOV macro dest:req, src:req, t:req 325f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) CRC mov, dest, src, t 335f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)endm 345f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) 355f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)CRC1b macro 365f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) movzx x6, BYTE PTR [rD] 375f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) inc rD 385f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) movzx x3, x0_L 395f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) xor x6, x3 405f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) shr x0, 8 415f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) CRC xor, x0, r6, 0 425f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) dec rN 435f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)endm 445f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) 455f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)MY_PROLOG macro crc_end:req 465f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) MY_PUSH_4_REGS 475f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) 485f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) mov x0, x1 495f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) mov rN, num_VAR 505f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) mov r5, table_VAR 515f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) test rN, rN 525f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) jz crc_end 535f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) @@: 545f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) test rD, 7 555f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) jz @F 565f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) CRC1b 575f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) jnz @B 585f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) @@: 595f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) cmp rN, 16 605f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) jb crc_end 615f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) add rN, rD 625f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) mov num_VAR, rN 635f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) sub rN, 8 645f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) and rN, NOT 7 655f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) sub rD, rN 665f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) xor x0, [SRCDAT 0] 675f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)endm 685f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) 695f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)MY_EPILOG macro crc_end:req 705f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) xor x0, [SRCDAT 0] 715f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) mov rD, rN 725f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) mov rN, num_VAR 735f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) sub rN, rD 745f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) crc_end: 755f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) test rN, rN 765f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) jz @F 775f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) CRC1b 785f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) jmp crc_end 795f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) @@: 805f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) MY_POP_4_REGS 815f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)endm 825f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) 835f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)MY_PROC CrcUpdateT8, 4 845f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) MY_PROLOG crc_end_8 855f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) mov x1, [SRCDAT 1] 865f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) align 16 875f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) main_loop_8: 885f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) mov x6, [SRCDAT 2] 895f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) movzx x3, x1_L 905f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) CRC_XOR x6, r3, 3 915f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) movzx x3, x1_H 925f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) CRC_XOR x6, r3, 2 935f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) shr x1, 16 945f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) movzx x3, x1_L 955f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) movzx x1, x1_H 965f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) CRC_XOR x6, r3, 1 975f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) movzx x3, x0_L 985f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) CRC_XOR x6, r1, 0 995f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) 1005f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) mov x1, [SRCDAT 3] 1015f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) CRC_XOR x6, r3, 7 1025f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) movzx x3, x0_H 1035f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) shr x0, 16 1045f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) CRC_XOR x6, r3, 6 1055f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) movzx x3, x0_L 1065f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) CRC_XOR x6, r3, 5 1075f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) movzx x3, x0_H 1085f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) CRC_MOV x0, r3, 4 1095f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) xor x0, x6 1105f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) add rD, 8 1115f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) jnz main_loop_8 1125f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) 1135f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) MY_EPILOG crc_end_8 1145f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)MY_ENDP 1155f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) 1165f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)MY_PROC CrcUpdateT4, 4 1175f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) MY_PROLOG crc_end_4 1185f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) align 16 1195f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) main_loop_4: 1205f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) movzx x1, x0_L 1215f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) movzx x3, x0_H 1225f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) shr x0, 16 1235f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) movzx x6, x0_H 1245f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) and x0, 0FFh 1255f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) CRC_MOV x1, r1, 3 1265f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) xor x1, [SRCDAT 1] 1275f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) CRC_XOR x1, r3, 2 1285f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) CRC_XOR x1, r6, 0 1295f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) CRC_XOR x1, r0, 1 1305f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) 1315f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) movzx x0, x1_L 1325f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) movzx x3, x1_H 1335f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) shr x1, 16 1345f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) movzx x6, x1_H 1355f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) and x1, 0FFh 1365f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) CRC_MOV x0, r0, 3 1375f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) xor x0, [SRCDAT 2] 1385f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) CRC_XOR x0, r3, 2 1395f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) CRC_XOR x0, r6, 0 1405f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) CRC_XOR x0, r1, 1 1415f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) add rD, 8 1425f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) jnz main_loop_4 1435f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) 1445f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) MY_EPILOG crc_end_4 1455f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)MY_ENDP 1465f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) 1475f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)end 1485f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)