1dbebecc2bf00530ce09b3658641d1514d807aeeenjn# When trying (and failing) to instrument at the basic block level 2dbebecc2bf00530ce09b3658641d1514d807aeeenjn# I thought up a lot of corner-cases in the rep code. This tries 3dbebecc2bf00530ce09b3658641d1514d807aeeenjn# to catch some of them 4dbebecc2bf00530ce09b3658641d1514d807aeeenjn 5dbebecc2bf00530ce09b3658641d1514d807aeeenjn# Performance counters give us 8207 insns 6dbebecc2bf00530ce09b3658641d1514d807aeeenjn# 11 + 8*1024 + 3 = 8206 7dbebecc2bf00530ce09b3658641d1514d807aeeenjn 8dbebecc2bf00530ce09b3658641d1514d807aeeenjn .globl _start 9dbebecc2bf00530ce09b3658641d1514d807aeeenjn_start: 10dbebecc2bf00530ce09b3658641d1514d807aeeenjn cld # we want these to happen forward 11dbebecc2bf00530ce09b3658641d1514d807aeeenjn 12dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $0xfeb131978,%rax # value to store 13dbebecc2bf00530ce09b3658641d1514d807aeeenjn 14dbebecc2bf00530ce09b3658641d1514d807aeeenjn # test back-to-back rep/stosb's 15dbebecc2bf00530ce09b3658641d1514d807aeeenjn 16dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $1024,%rcx 17dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $buffer1, %rdi # set destination 18dbebecc2bf00530ce09b3658641d1514d807aeeenjn rep stosb # store 1024 times 19dbebecc2bf00530ce09b3658641d1514d807aeeenjn rep stosb # should store 0 times 20dbebecc2bf00530ce09b3658641d1514d807aeeenjn rep stosb # should store 0 times 21dbebecc2bf00530ce09b3658641d1514d807aeeenjn 22dbebecc2bf00530ce09b3658641d1514d807aeeenjn 23dbebecc2bf00530ce09b3658641d1514d807aeeenjn # test stosb where cx is 0 24dbebecc2bf00530ce09b3658641d1514d807aeeenjn 25dbebecc2bf00530ce09b3658641d1514d807aeeenjn xor %rcx,%rcx 26dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $buffer1, %rdi # set destination 27dbebecc2bf00530ce09b3658641d1514d807aeeenjn rep stosb # should not load at all 28dbebecc2bf00530ce09b3658641d1514d807aeeenjn 29dbebecc2bf00530ce09b3658641d1514d807aeeenjn # test rep inside of a loop 30dbebecc2bf00530ce09b3658641d1514d807aeeenjn 31dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $1024, %rbx 32dbebecc2bf00530ce09b3658641d1514d807aeeenjnrep_loop: 33dbebecc2bf00530ce09b3658641d1514d807aeeenjn 34dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $1024,%rcx 35dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $buffer1, %rdi # set destination 36dbebecc2bf00530ce09b3658641d1514d807aeeenjn rep stosb 37dbebecc2bf00530ce09b3658641d1514d807aeeenjn 38dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $1024,%rcx 39dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $buffer1, %rdi # set destination 40dbebecc2bf00530ce09b3658641d1514d807aeeenjn rep stosb 41dbebecc2bf00530ce09b3658641d1514d807aeeenjn 42dbebecc2bf00530ce09b3658641d1514d807aeeenjn dec %rbx 43dbebecc2bf00530ce09b3658641d1514d807aeeenjn jnz rep_loop 44dbebecc2bf00530ce09b3658641d1514d807aeeenjn 45dbebecc2bf00530ce09b3658641d1514d807aeeenjn 46dbebecc2bf00530ce09b3658641d1514d807aeeenjn #================================ 47dbebecc2bf00530ce09b3658641d1514d807aeeenjn # Exit 48dbebecc2bf00530ce09b3658641d1514d807aeeenjn #================================ 49dbebecc2bf00530ce09b3658641d1514d807aeeenjnexit: 50dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $60,%rax 51dbebecc2bf00530ce09b3658641d1514d807aeeenjn xor %rdi,%rdi # we return 0 52dbebecc2bf00530ce09b3658641d1514d807aeeenjn syscall # and exit 53dbebecc2bf00530ce09b3658641d1514d807aeeenjn 54dbebecc2bf00530ce09b3658641d1514d807aeeenjn 55dbebecc2bf00530ce09b3658641d1514d807aeeenjn.bss 56dbebecc2bf00530ce09b3658641d1514d807aeeenjn 57dbebecc2bf00530ce09b3658641d1514d807aeeenjn.lcomm buffer1, 16384 58dbebecc2bf00530ce09b3658641d1514d807aeeenjn 59