14e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi/* K=7 r=1/2 Viterbi decoder for PowerPC G4/G5 Altivec instructions 24e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi * Feb 2004, Phil Karn, KA9Q 34e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi */ 44e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi#include <stdio.h> 54e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi#include <memory.h> 64e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi#include <stdlib.h> 74e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi#include "fec.h" 84e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 94e213d510f437769f8a28578dd4f786fb7d16c4Bill Yitypedef union { long long p; unsigned char c[64]; vector bool char v[4]; } decision_t; 104e213d510f437769f8a28578dd4f786fb7d16c4Bill Yitypedef union { long long p; unsigned char c[64]; vector unsigned char v[4]; } metric_t; 114e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 124e213d510f437769f8a28578dd4f786fb7d16c4Bill Yistatic union branchtab27 { unsigned char c[32]; vector unsigned char v[2];} Branchtab27[2]; 134e213d510f437769f8a28578dd4f786fb7d16c4Bill Yistatic int Init = 0; 144e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 154e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi/* State info for instance of Viterbi decoder 164e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi * Don't change this without also changing references in [mmx|sse|sse2]bfly29.s! 174e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi */ 184e213d510f437769f8a28578dd4f786fb7d16c4Bill Yistruct v27 { 194e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi metric_t metrics1; /* path metric buffer 1 */ 204e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi metric_t metrics2; /* path metric buffer 2 */ 214e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi decision_t *dp; /* Pointer to current decision */ 224e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ 234e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi decision_t *decisions; /* Beginning of decisions for block */ 244e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi}; 254e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 264e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi/* Initialize Viterbi decoder for start of new frame */ 274e213d510f437769f8a28578dd4f786fb7d16c4Bill Yiint init_viterbi27_av(void *p,int starting_state){ 284e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi struct v27 *vp = p; 294e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi int i; 304e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 314e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi if(p == NULL) 324e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi return -1; 334e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi for(i=0;i<4;i++) 344e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi vp->metrics1.v[i] = (vector unsigned char)(63); 354e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi vp->old_metrics = &vp->metrics1; 364e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi vp->new_metrics = &vp->metrics2; 374e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi vp->dp = vp->decisions; 384e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi vp->old_metrics->c[starting_state & 63] = 0; /* Bias known start state */ 394e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi return 0; 404e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi} 414e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 424e213d510f437769f8a28578dd4f786fb7d16c4Bill Yivoid set_viterbi27_polynomial_av(int polys[2]){ 434e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi int state; 444e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 454e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi for(state=0;state < 32;state++){ 464e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi Branchtab27[0].c[state] = (polys[0] < 0) ^ parity((2*state) & abs(polys[0])) ? 255 : 0; 474e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi Branchtab27[1].c[state] = (polys[1] < 0) ^ parity((2*state) & abs(polys[1])) ? 255 : 0; 484e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi } 494e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi Init++; 504e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi} 514e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 524e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi/* Create a new instance of a Viterbi decoder */ 534e213d510f437769f8a28578dd4f786fb7d16c4Bill Yivoid *create_viterbi27_av(int len){ 544e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi struct v27 *vp; 554e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 564e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi if(!Init){ 574e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi int polys[2] = { V27POLYA,V27POLYB }; 584e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi set_viterbi27_polynomial_av(polys); 594e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi } 604e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi if((vp = (struct v27 *)malloc(sizeof(struct v27))) == NULL) 614e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi return NULL; 624e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi if((vp->decisions = (decision_t *)malloc((len+6)*sizeof(decision_t))) == NULL){ 634e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi free(vp); 644e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi return NULL; 654e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi } 664e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi init_viterbi27_av(vp,0); 674e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi return vp; 684e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi} 694e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 704e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi/* Viterbi chainback */ 714e213d510f437769f8a28578dd4f786fb7d16c4Bill Yiint chainback_viterbi27_av( 724e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi void *p, 734e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi unsigned char *data, /* Decoded output data */ 744e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi unsigned int nbits, /* Number of data bits */ 754e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi unsigned int endstate){ /* Terminal encoder state */ 764e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi struct v27 *vp = p; 774e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi decision_t *d = (decision_t *)vp->decisions; 784e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 794e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi if(p == NULL) 804e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi return -1; 814e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 824e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi /* Make room beyond the end of the encoder register so we can 834e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi * accumulate a full byte of decoded data 844e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi */ 854e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi endstate %= 64; 864e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi endstate <<= 2; 874e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 884e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi /* The store into data[] only needs to be done every 8 bits. 894e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi * But this avoids a conditional branch, and the writes will 904e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi * combine in the cache anyway 914e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi */ 924e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi d += 6; /* Look past tail */ 934e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi while(nbits-- != 0){ 944e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi int k; 954e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 964e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi k = d[nbits].c[endstate>>2] & 1; 974e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi data[nbits>>3] = endstate = (endstate >> 1) | (k << 7); 984e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi } 994e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi return 0; 1004e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi} 1014e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 1024e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi/* Delete instance of a Viterbi decoder */ 1034e213d510f437769f8a28578dd4f786fb7d16c4Bill Yivoid delete_viterbi27_av(void *p){ 1044e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi struct v27 *vp = p; 1054e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 1064e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi if(vp != NULL){ 1074e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi free(vp->decisions); 1084e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi free(vp); 1094e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi } 1104e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi} 1114e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 1124e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi/* Process received symbols */ 1134e213d510f437769f8a28578dd4f786fb7d16c4Bill Yiint update_viterbi27_blk_av(void *p,unsigned char *syms,int nbits){ 1144e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi struct v27 *vp = p; 1154e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi decision_t *d; 1164e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 1174e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi if(p == NULL) 1184e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi return -1; 1194e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi d = (decision_t *)vp->dp; 1204e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi while(nbits--){ 1214e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi vector unsigned char survivor0,survivor1,sym0v,sym1v; 1224e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi vector bool char decision0,decision1; 1234e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi vector unsigned char metric,m_metric,m0,m1,m2,m3; 1244e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi void *tmp; 1254e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 1264e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi /* sym0v.0 = syms[0]; sym0v.1 = syms[1] */ 1274e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi sym0v = vec_perm(vec_ld(0,syms),vec_ld(1,syms),vec_lvsl(0,syms)); 1284e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 1294e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi sym1v = vec_splat(sym0v,1); /* Splat syms[1] across sym1v */ 1304e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi sym0v = vec_splat(sym0v,0); /* Splat syms[0] across sym0v */ 1314e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi syms += 2; 1324e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 1334e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi /* Do the 32 butterflies as two interleaved groups of 16 each to keep the pipes full */ 1344e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 1354e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi /* Form first set of 16 branch metrics */ 1364e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi metric = vec_avg(vec_xor(Branchtab27[0].v[0],sym0v),vec_xor(Branchtab27[1].v[0],sym1v)); 1374e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi metric = vec_sr(metric,(vector unsigned char)(3)); 1384e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi m_metric = vec_sub((vector unsigned char)(31),metric); 1394e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 1404e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi /* Form first set of path metrics */ 1414e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi m0 = vec_adds(vp->old_metrics->v[0],metric); 1424e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi m3 = vec_adds(vp->old_metrics->v[2],metric); 1434e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi m1 = vec_adds(vp->old_metrics->v[2],m_metric); 1444e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi m2 = vec_adds(vp->old_metrics->v[0],m_metric); 1454e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 1464e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi /* Form second set of 16 branch metrics */ 1474e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi metric = vec_avg(vec_xor(Branchtab27[0].v[1],sym0v),vec_xor(Branchtab27[1].v[1],sym1v)); 1484e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi metric = vec_sr(metric,(vector unsigned char)(3)); 1494e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi m_metric = vec_sub((vector unsigned char)(31),metric); 1504e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 1514e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi /* Compare and select first set */ 1524e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi decision0 = vec_cmpgt(m0,m1); 1534e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi decision1 = vec_cmpgt(m2,m3); 1544e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi survivor0 = vec_min(m0,m1); 1554e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi survivor1 = vec_min(m2,m3); 1564e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 1574e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi /* Compute second set of path metrics */ 1584e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi m0 = vec_adds(vp->old_metrics->v[1],metric); 1594e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi m3 = vec_adds(vp->old_metrics->v[3],metric); 1604e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi m1 = vec_adds(vp->old_metrics->v[3],m_metric); 1614e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi m2 = vec_adds(vp->old_metrics->v[1],m_metric); 1624e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 1634e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi /* Interleave and store first decisions and survivors */ 1644e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi d->v[0] = vec_mergeh(decision0,decision1); 1654e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi d->v[1] = vec_mergel(decision0,decision1); 1664e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi vp->new_metrics->v[0] = vec_mergeh(survivor0,survivor1); 1674e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi vp->new_metrics->v[1] = vec_mergel(survivor0,survivor1); 1684e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 1694e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi /* Compare and select second set */ 1704e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi decision0 = vec_cmpgt(m0,m1); 1714e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi decision1 = vec_cmpgt(m2,m3); 1724e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi survivor0 = vec_min(m0,m1); 1734e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi survivor1 = vec_min(m2,m3); 1744e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 1754e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi /* Interleave and store second set of decisions and survivors */ 1764e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi d->v[2] = vec_mergeh(decision0,decision1); 1774e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi d->v[3] = vec_mergel(decision0,decision1); 1784e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi vp->new_metrics->v[2] = vec_mergeh(survivor0,survivor1); 1794e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi vp->new_metrics->v[3] = vec_mergel(survivor0,survivor1); 1804e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 1814e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi /* renormalize if necessary */ 1824e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi if(vp->new_metrics->c[0] >= 105){ 1834e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi vector unsigned char scale0,scale1; 1844e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 1854e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi /* Find smallest metric and splat */ 1864e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi scale0 = vec_min(vp->new_metrics->v[0],vp->new_metrics->v[1]); 1874e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi scale1 = vec_min(vp->new_metrics->v[2],vp->new_metrics->v[3]); 1884e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi scale0 = vec_min(scale0,scale1); 1894e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi scale0 = vec_min(scale0,vec_sld(scale0,scale0,8)); 1904e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi scale0 = vec_min(scale0,vec_sld(scale0,scale0,4)); 1914e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi scale0 = vec_min(scale0,vec_sld(scale0,scale0,2)); 1924e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi scale0 = vec_min(scale0,vec_sld(scale0,scale0,1)); 1934e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 1944e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi /* Now subtract from all metrics */ 1954e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi vp->new_metrics->v[0] = vec_subs(vp->new_metrics->v[0],scale0); 1964e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi vp->new_metrics->v[1] = vec_subs(vp->new_metrics->v[1],scale0); 1974e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi vp->new_metrics->v[2] = vec_subs(vp->new_metrics->v[2],scale0); 1984e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi vp->new_metrics->v[3] = vec_subs(vp->new_metrics->v[3],scale0); 1994e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi } 2004e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi d++; 2014e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi /* Swap pointers to old and new metrics */ 2024e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi tmp = vp->old_metrics; 2034e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi vp->old_metrics = vp->new_metrics; 2044e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi vp->new_metrics = tmp; 2054e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi } 2064e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi vp->dp = d; 2074e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 2084e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi return 0; 2094e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi} 2104e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi 211