1de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj
2de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj/*
3de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardjThis is a regression test for the following problem, noticed by
4de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardjGreg Parker:
5de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj
6de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardjvex ppc64 generates bad code for instruction sequences like this:
7de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj
8de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj    li    r0, 2
9de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj    stdx  r3, r1, r0
10de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj
11de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardjgcc emits code like this when manipulating packed structures
12de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardjwith 8-byte fields on 2-byte boundaries.
13de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj
14de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardjFirst, vex's optimizer substitutes a constant 0x2 for r0:
15de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj
16de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj    ------ IMark(0x100000F34, 4) ------
17de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj    PUT(1024) = 0x100000F34:I64
18de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj    t3 = GET:I64(24)
19de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj    t14 = GET:I64(8)
20de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj    t13 = Add64(t14,0x2:I64)
21de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj    STbe(t13) = t3
22de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj
23de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardjThen instruction selection chooses `std` with an index not divisible by 4:
24de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj
25de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj    -- STbe(Add64(GET:I64(8),0x2:I64)) = GET:I64(24)
26de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj    ldz %vR22,8(%r31)
27de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj    ldz %vR23,24(%r31)
28de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj    std %vR23,2(%vR22)
29de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj
30de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardjFinally, the assembler silently strips the index&3 part,
31de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardjbecause `std` can't encode that:
32de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj
33de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj    std %r6,2(%r5)
34de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj    F8 C5 00 00
35de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj
36de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj...but 0xF8C50000 is `std r6, 0(r5)`, which writes to the wrong address.
37de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj*/
38de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj
39de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj#include <stdio.h>
40de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj#include <stdlib.h>
41de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj#include <assert.h>
42de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj
43de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardjtypedef
44de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardjstruct __attribute__ ((__packed__)) {
45de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj  char before[2];
46de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj  unsigned long long int w64;
47de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj  char after[6];
48de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj}
49de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardjT;
50de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj
51de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardjvoid foo (T* t, unsigned long long int w)
52de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj{
53de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj  __asm__ __volatile__(
54de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj     "stdx %0,%1,%2"
55de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj     : : "b"(w), "b"(t), "b"(2) : "memory"
56de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj  );
57de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj}
58de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj
59de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardjint main ( void )
60de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj{
61de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj  T* t;
62de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj  unsigned char* p;
63de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj  int i;
64de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj  assert(sizeof(T) == 16);
65de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj  t = calloc(sizeof(T),1);
66de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj  assert(t);
67de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj  /* check t is 8-aligned.  This causes the write done by 'foo' to be
68de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj     misaligned by 2 as desired, triggering the bug. */
69de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj  assert(0 == (((unsigned long)t) & 7));
70de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj  foo(t, 0x1122334455667788);
71de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj  p = (unsigned char*)t;
72de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj  for (i = 0; i < 16; i++)
73de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj    if (p[i] == 0)
74de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj      printf("..");
75de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj    else
76de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj      printf("%02x", (int)p[i]);
77de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj  printf("\n");
78de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj  return 0;
79de21b954afb4d9902726331a3f8a7d3cf8bd45e4sewardj}
80