1dbebecc2bf00530ce09b3658641d1514d807aeeenjn# When trying (and failing) to instrument at the basic block level 2dbebecc2bf00530ce09b3658641d1514d807aeeenjn# I thought up a lot of corner-cases in the rep code. This tries 3dbebecc2bf00530ce09b3658641d1514d807aeeenjn# to catch some of them 4dbebecc2bf00530ce09b3658641d1514d807aeeenjn 5dbebecc2bf00530ce09b3658641d1514d807aeeenjn# Performance counters give us 8207 insns 6dbebecc2bf00530ce09b3658641d1514d807aeeenjn# 11 + 8*1024 + 3 = 8206 7dbebecc2bf00530ce09b3658641d1514d807aeeenjn 8dbebecc2bf00530ce09b3658641d1514d807aeeenjn .globl _start 9dbebecc2bf00530ce09b3658641d1514d807aeeenjn_start: 10dbebecc2bf00530ce09b3658641d1514d807aeeenjn cld # we want these to happen forward 11dbebecc2bf00530ce09b3658641d1514d807aeeenjn 12dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $0xfeb1378,%eax # value to store 13dbebecc2bf00530ce09b3658641d1514d807aeeenjn 14dbebecc2bf00530ce09b3658641d1514d807aeeenjn # test back-to-back rep/stosb's 15dbebecc2bf00530ce09b3658641d1514d807aeeenjn 16dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $1024,%ecx 17dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $buffer1, %edi # set destination 18dbebecc2bf00530ce09b3658641d1514d807aeeenjn rep stosb # store 1024 times 19dbebecc2bf00530ce09b3658641d1514d807aeeenjn rep stosb # should store 0 times 20dbebecc2bf00530ce09b3658641d1514d807aeeenjn rep stosb # should store 0 times 21dbebecc2bf00530ce09b3658641d1514d807aeeenjn 22dbebecc2bf00530ce09b3658641d1514d807aeeenjn 23dbebecc2bf00530ce09b3658641d1514d807aeeenjn # test stosb where cx is 0 24dbebecc2bf00530ce09b3658641d1514d807aeeenjn 25dbebecc2bf00530ce09b3658641d1514d807aeeenjn xor %ecx,%ecx 26dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $buffer1, %edi # set destination 27dbebecc2bf00530ce09b3658641d1514d807aeeenjn rep stosb # should not load at all 28dbebecc2bf00530ce09b3658641d1514d807aeeenjn 29dbebecc2bf00530ce09b3658641d1514d807aeeenjn # test rep inside of a loop 30dbebecc2bf00530ce09b3658641d1514d807aeeenjn 31dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $1024, %ebx 32dbebecc2bf00530ce09b3658641d1514d807aeeenjnrep_loop: 33dbebecc2bf00530ce09b3658641d1514d807aeeenjn 34dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $1024,%ecx 35dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $buffer1, %edi # set destination 36dbebecc2bf00530ce09b3658641d1514d807aeeenjn rep stosb 37dbebecc2bf00530ce09b3658641d1514d807aeeenjn 38dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $1024,%ecx 39dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $buffer1, %edi # set destination 40dbebecc2bf00530ce09b3658641d1514d807aeeenjn rep stosb 41dbebecc2bf00530ce09b3658641d1514d807aeeenjn 42dbebecc2bf00530ce09b3658641d1514d807aeeenjn dec %ebx 43dbebecc2bf00530ce09b3658641d1514d807aeeenjn jnz rep_loop 44dbebecc2bf00530ce09b3658641d1514d807aeeenjn 45dbebecc2bf00530ce09b3658641d1514d807aeeenjn 46dbebecc2bf00530ce09b3658641d1514d807aeeenjn #================================ 47dbebecc2bf00530ce09b3658641d1514d807aeeenjn # Exit 48dbebecc2bf00530ce09b3658641d1514d807aeeenjn #================================ 49dbebecc2bf00530ce09b3658641d1514d807aeeenjnexit: 50dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $1,%eax 51dbebecc2bf00530ce09b3658641d1514d807aeeenjn#ifdef VGO_darwin 52dbebecc2bf00530ce09b3658641d1514d807aeeenjn pushl $0 53dbebecc2bf00530ce09b3658641d1514d807aeeenjn#else 54dbebecc2bf00530ce09b3658641d1514d807aeeenjn xor %ebx,%ebx # we return 0 55dbebecc2bf00530ce09b3658641d1514d807aeeenjn#endif 56dbebecc2bf00530ce09b3658641d1514d807aeeenjn int $0x80 # and exit 57dbebecc2bf00530ce09b3658641d1514d807aeeenjn 58dbebecc2bf00530ce09b3658641d1514d807aeeenjn 59dbebecc2bf00530ce09b3658641d1514d807aeeenjn#.bss 60dbebecc2bf00530ce09b3658641d1514d807aeeenjn 61dbebecc2bf00530ce09b3658641d1514d807aeeenjn.lcomm buffer1, 16384 62dbebecc2bf00530ce09b3658641d1514d807aeeenjn 63