1de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj 2de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj/* 3de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardjThis is a regression test for the following problem, noticed by 4de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardjGreg Parker: 5de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj 6de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardjvex ppc64 generates bad code for instruction sequences like this: 7de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj 8de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj li r0, 2 9de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj stdx r3, r1, r0 10de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj 11de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardjgcc emits code like this when manipulating packed structures 12de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardjwith 8-byte fields on 2-byte boundaries. 13de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj 14de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardjFirst, vex's optimizer substitutes a constant 0x2 for r0: 15de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj 16de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj ------ IMark(0x100000F34, 4) ------ 17de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj PUT(1024) = 0x100000F34:I64 18de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj t3 = GET:I64(24) 19de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj t14 = GET:I64(8) 20de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj t13 = Add64(t14,0x2:I64) 21de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj STbe(t13) = t3 22de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj 23de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardjThen instruction selection chooses `std` with an index not divisible by 4: 24de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj 25de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj -- STbe(Add64(GET:I64(8),0x2:I64)) = GET:I64(24) 26de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj ldz %vR22,8(%r31) 27de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj ldz %vR23,24(%r31) 28de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj std %vR23,2(%vR22) 29de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj 30de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardjFinally, the assembler silently strips the index&3 part, 31de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardjbecause `std` can't encode that: 32de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj 33de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj std %r6,2(%r5) 34de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj F8 C5 00 00 35de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj 36de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj...but 0xF8C50000 is `std r6, 0(r5)`, which writes to the wrong address. 37de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj*/ 38de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj 39de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj#include <stdio.h> 40de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj#include <stdlib.h> 41de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj#include <assert.h> 42de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj 43de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardjtypedef 44de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardjstruct __attribute__ ((__packed__)) { 45de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj char before[2]; 46de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj unsigned long long int w64; 47de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj char after[6]; 48de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj} 49de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardjT; 50de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj 51de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardjvoid foo (T* t, unsigned long long int w) 52de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj{ 53de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj __asm__ __volatile__( 54de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj "stdx %0,%1,%2" 55de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj : : "b"(w), "b"(t), "b"(2) : "memory" 56de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj ); 57de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj} 58de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj 59de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardjint main ( void ) 60de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj{ 61de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj T* t; 62de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj unsigned char* p; 63de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj int i; 64de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj assert(sizeof(T) == 16); 65de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj t = calloc(sizeof(T),1); 66de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj assert(t); 67de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj /* check t is 8-aligned. This causes the write done by 'foo' to be 68de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj misaligned by 2 as desired, triggering the bug. */ 69de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj assert(0 == (((unsigned long)t) & 7)); 70de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj foo(t, 0x1122334455667788); 71de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj p = (unsigned char*)t; 72de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj for (i = 0; i < 16; i++) 73de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj if (p[i] == 0) 74de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj printf(".."); 75de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj else 76de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj printf("%02x", (int)p[i]); 77de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj printf("\n"); 78de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj return 0; 79de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj} 80