1/* K=7 r=1/2 Viterbi decoder for PowerPC G4/G5 Altivec instructions 2 * Feb 2004, Phil Karn, KA9Q 3 */ 4#include <stdio.h> 5#include <memory.h> 6#include <stdlib.h> 7#include "fec.h" 8 9typedef union { long long p; unsigned char c[64]; vector bool char v[4]; } decision_t; 10typedef union { long long p; unsigned char c[64]; vector unsigned char v[4]; } metric_t; 11 12static union branchtab27 { unsigned char c[32]; vector unsigned char v[2];} Branchtab27[2]; 13static int Init = 0; 14 15/* State info for instance of Viterbi decoder 16 * Don't change this without also changing references in [mmx|sse|sse2]bfly29.s! 17 */ 18struct v27 { 19 metric_t metrics1; /* path metric buffer 1 */ 20 metric_t metrics2; /* path metric buffer 2 */ 21 decision_t *dp; /* Pointer to current decision */ 22 metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ 23 decision_t *decisions; /* Beginning of decisions for block */ 24}; 25 26/* Initialize Viterbi decoder for start of new frame */ 27int init_viterbi27_av(void *p,int starting_state){ 28 struct v27 *vp = p; 29 int i; 30 31 if(p == NULL) 32 return -1; 33 for(i=0;i<4;i++) 34 vp->metrics1.v[i] = (vector unsigned char)(63); 35 vp->old_metrics = &vp->metrics1; 36 vp->new_metrics = &vp->metrics2; 37 vp->dp = vp->decisions; 38 vp->old_metrics->c[starting_state & 63] = 0; /* Bias known start state */ 39 return 0; 40} 41 42void set_viterbi27_polynomial_av(int polys[2]){ 43 int state; 44 45 for(state=0;state < 32;state++){ 46 Branchtab27[0].c[state] = (polys[0] < 0) ^ parity((2*state) & abs(polys[0])) ? 255 : 0; 47 Branchtab27[1].c[state] = (polys[1] < 0) ^ parity((2*state) & abs(polys[1])) ? 255 : 0; 48 } 49 Init++; 50} 51 52/* Create a new instance of a Viterbi decoder */ 53void *create_viterbi27_av(int len){ 54 struct v27 *vp; 55 56 if(!Init){ 57 int polys[2] = { V27POLYA,V27POLYB }; 58 set_viterbi27_polynomial_av(polys); 59 } 60 if((vp = (struct v27 *)malloc(sizeof(struct v27))) == NULL) 61 return NULL; 62 if((vp->decisions = (decision_t *)malloc((len+6)*sizeof(decision_t))) == NULL){ 63 free(vp); 64 return NULL; 65 } 66 init_viterbi27_av(vp,0); 67 return vp; 68} 69 70/* Viterbi chainback */ 71int chainback_viterbi27_av( 72 void *p, 73 unsigned char *data, /* Decoded output data */ 74 unsigned int nbits, /* Number of data bits */ 75 unsigned int endstate){ /* Terminal encoder state */ 76 struct v27 *vp = p; 77 decision_t *d = (decision_t *)vp->decisions; 78 79 if(p == NULL) 80 return -1; 81 82 /* Make room beyond the end of the encoder register so we can 83 * accumulate a full byte of decoded data 84 */ 85 endstate %= 64; 86 endstate <<= 2; 87 88 /* The store into data[] only needs to be done every 8 bits. 89 * But this avoids a conditional branch, and the writes will 90 * combine in the cache anyway 91 */ 92 d += 6; /* Look past tail */ 93 while(nbits-- != 0){ 94 int k; 95 96 k = d[nbits].c[endstate>>2] & 1; 97 data[nbits>>3] = endstate = (endstate >> 1) | (k << 7); 98 } 99 return 0; 100} 101 102/* Delete instance of a Viterbi decoder */ 103void delete_viterbi27_av(void *p){ 104 struct v27 *vp = p; 105 106 if(vp != NULL){ 107 free(vp->decisions); 108 free(vp); 109 } 110} 111 112/* Process received symbols */ 113int update_viterbi27_blk_av(void *p,unsigned char *syms,int nbits){ 114 struct v27 *vp = p; 115 decision_t *d; 116 117 if(p == NULL) 118 return -1; 119 d = (decision_t *)vp->dp; 120 while(nbits--){ 121 vector unsigned char survivor0,survivor1,sym0v,sym1v; 122 vector bool char decision0,decision1; 123 vector unsigned char metric,m_metric,m0,m1,m2,m3; 124 void *tmp; 125 126 /* sym0v.0 = syms[0]; sym0v.1 = syms[1] */ 127 sym0v = vec_perm(vec_ld(0,syms),vec_ld(1,syms),vec_lvsl(0,syms)); 128 129 sym1v = vec_splat(sym0v,1); /* Splat syms[1] across sym1v */ 130 sym0v = vec_splat(sym0v,0); /* Splat syms[0] across sym0v */ 131 syms += 2; 132 133 /* Do the 32 butterflies as two interleaved groups of 16 each to keep the pipes full */ 134 135 /* Form first set of 16 branch metrics */ 136 metric = vec_avg(vec_xor(Branchtab27[0].v[0],sym0v),vec_xor(Branchtab27[1].v[0],sym1v)); 137 metric = vec_sr(metric,(vector unsigned char)(3)); 138 m_metric = vec_sub((vector unsigned char)(31),metric); 139 140 /* Form first set of path metrics */ 141 m0 = vec_adds(vp->old_metrics->v[0],metric); 142 m3 = vec_adds(vp->old_metrics->v[2],metric); 143 m1 = vec_adds(vp->old_metrics->v[2],m_metric); 144 m2 = vec_adds(vp->old_metrics->v[0],m_metric); 145 146 /* Form second set of 16 branch metrics */ 147 metric = vec_avg(vec_xor(Branchtab27[0].v[1],sym0v),vec_xor(Branchtab27[1].v[1],sym1v)); 148 metric = vec_sr(metric,(vector unsigned char)(3)); 149 m_metric = vec_sub((vector unsigned char)(31),metric); 150 151 /* Compare and select first set */ 152 decision0 = vec_cmpgt(m0,m1); 153 decision1 = vec_cmpgt(m2,m3); 154 survivor0 = vec_min(m0,m1); 155 survivor1 = vec_min(m2,m3); 156 157 /* Compute second set of path metrics */ 158 m0 = vec_adds(vp->old_metrics->v[1],metric); 159 m3 = vec_adds(vp->old_metrics->v[3],metric); 160 m1 = vec_adds(vp->old_metrics->v[3],m_metric); 161 m2 = vec_adds(vp->old_metrics->v[1],m_metric); 162 163 /* Interleave and store first decisions and survivors */ 164 d->v[0] = vec_mergeh(decision0,decision1); 165 d->v[1] = vec_mergel(decision0,decision1); 166 vp->new_metrics->v[0] = vec_mergeh(survivor0,survivor1); 167 vp->new_metrics->v[1] = vec_mergel(survivor0,survivor1); 168 169 /* Compare and select second set */ 170 decision0 = vec_cmpgt(m0,m1); 171 decision1 = vec_cmpgt(m2,m3); 172 survivor0 = vec_min(m0,m1); 173 survivor1 = vec_min(m2,m3); 174 175 /* Interleave and store second set of decisions and survivors */ 176 d->v[2] = vec_mergeh(decision0,decision1); 177 d->v[3] = vec_mergel(decision0,decision1); 178 vp->new_metrics->v[2] = vec_mergeh(survivor0,survivor1); 179 vp->new_metrics->v[3] = vec_mergel(survivor0,survivor1); 180 181 /* renormalize if necessary */ 182 if(vp->new_metrics->c[0] >= 105){ 183 vector unsigned char scale0,scale1; 184 185 /* Find smallest metric and splat */ 186 scale0 = vec_min(vp->new_metrics->v[0],vp->new_metrics->v[1]); 187 scale1 = vec_min(vp->new_metrics->v[2],vp->new_metrics->v[3]); 188 scale0 = vec_min(scale0,scale1); 189 scale0 = vec_min(scale0,vec_sld(scale0,scale0,8)); 190 scale0 = vec_min(scale0,vec_sld(scale0,scale0,4)); 191 scale0 = vec_min(scale0,vec_sld(scale0,scale0,2)); 192 scale0 = vec_min(scale0,vec_sld(scale0,scale0,1)); 193 194 /* Now subtract from all metrics */ 195 vp->new_metrics->v[0] = vec_subs(vp->new_metrics->v[0],scale0); 196 vp->new_metrics->v[1] = vec_subs(vp->new_metrics->v[1],scale0); 197 vp->new_metrics->v[2] = vec_subs(vp->new_metrics->v[2],scale0); 198 vp->new_metrics->v[3] = vec_subs(vp->new_metrics->v[3],scale0); 199 } 200 d++; 201 /* Swap pointers to old and new metrics */ 202 tmp = vp->old_metrics; 203 vp->old_metrics = vp->new_metrics; 204 vp->new_metrics = tmp; 205 } 206 vp->dp = d; 207 208 return 0; 209} 210 211