14e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi/* K=9 r=1/3 Viterbi decoder for PowerPC G4/G5 Altivec vector instructions
24e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi * 8-bit offset-binary soft decision samples
34e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi * Copyright Aug 2006, Phil Karn, KA9Q
44e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi * May be used under the terms of the GNU Lesser General Public License (LGPL)
54e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi */
64e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi#include <stdio.h>
74e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi#include <stdlib.h>
84e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi#include <memory.h>
94e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi#include <limits.h>
104e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi#include "fec.h"
114e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
124e213d510f437769f8a28578dd4f786fb7d16c4Bill Yitypedef union { unsigned char c[2][16]; vector unsigned char v[2]; } decision_t;
134e213d510f437769f8a28578dd4f786fb7d16c4Bill Yitypedef union { unsigned short s[256]; vector unsigned short v[32]; } metric_t;
144e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
154e213d510f437769f8a28578dd4f786fb7d16c4Bill Yistatic union branchtab39 { unsigned short s[128]; vector unsigned short v[16];} Branchtab39[3];
164e213d510f437769f8a28578dd4f786fb7d16c4Bill Yistatic int Init = 0;
174e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
184e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi/* State info for instance of Viterbi decoder */
194e213d510f437769f8a28578dd4f786fb7d16c4Bill Yistruct v39 {
204e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  metric_t metrics1; /* path metric buffer 1 */
214e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  metric_t metrics2; /* path metric buffer 2 */
224e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  void *dp;          /* Pointer to current decision */
234e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */
244e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  void *decisions;   /* Beginning of decisions for block */
254e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi};
264e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
274e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi/* Initialize Viterbi decoder for start of new frame */
284e213d510f437769f8a28578dd4f786fb7d16c4Bill Yiint init_viterbi39_av(void *p,int starting_state){
294e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  struct v39 *vp = p;
304e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  int i;
314e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
324e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  for(i=0;i<32;i++)
334e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    vp->metrics1.v[i] = (vector unsigned short)(1000);
344e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
354e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  vp->old_metrics = &vp->metrics1;
364e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  vp->new_metrics = &vp->metrics2;
374e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  vp->dp = vp->decisions;
384e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  vp->old_metrics->s[starting_state & 255] = 0; /* Bias known start state */
394e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  return 0;
404e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi}
414e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
424e213d510f437769f8a28578dd4f786fb7d16c4Bill Yivoid set_viterbi39_polynomial_av(int polys[3]){
434e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  int state;
444e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
454e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  for(state=0;state < 128;state++){
464e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    Branchtab39[0].s[state] = (polys[0] < 0) ^ parity((2*state) & abs(polys[0])) ? 255 : 0;
474e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    Branchtab39[1].s[state] = (polys[1] < 0) ^ parity((2*state) & abs(polys[1])) ? 255 : 0;
484e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    Branchtab39[2].s[state] = (polys[2] < 0) ^ parity((2*state) & abs(polys[2])) ? 255 : 0;
494e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  }
504e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  Init++;
514e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi}
524e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
534e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi/* Create a new instance of a Viterbi decoder */
544e213d510f437769f8a28578dd4f786fb7d16c4Bill Yivoid *create_viterbi39_av(int len){
554e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  struct v39 *vp;
564e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
574e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  if(!Init){
584e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    int polys[3] = { V39POLYA, V39POLYB, V39POLYC };
594e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
604e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    set_viterbi39_polynomial_av(polys);
614e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  }
624e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  vp = (struct v39 *)malloc(sizeof(struct v39));
634e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  vp->decisions = malloc(sizeof(decision_t)*(len+8));
644e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  init_viterbi39_av(vp,0);
654e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  return vp;
664e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi}
674e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
684e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi/* Viterbi chainback */
694e213d510f437769f8a28578dd4f786fb7d16c4Bill Yiint chainback_viterbi39_av(
704e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      void *p,
714e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      unsigned char *data, /* Decoded output data */
724e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      unsigned int nbits, /* Number of data bits */
734e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      unsigned int endstate){ /* Terminal encoder state */
744e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  struct v39 *vp = p;
754e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  decision_t *d = (decision_t *)vp->decisions;
764e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  int path_metric;
774e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
784e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  /* Make room beyond the end of the encoder register so we can
794e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi   * accumulate a full byte of decoded data
804e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi   */
814e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  endstate %= 256;
824e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
834e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  path_metric = vp->old_metrics->s[endstate];
844e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
854e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  /* The store into data[] only needs to be done every 8 bits.
864e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi   * But this avoids a conditional branch, and the writes will
874e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi   * combine in the cache anyway
884e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi   */
894e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  d += 8; /* Look past tail */
904e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  while(nbits-- != 0){
914e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    int k;
924e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
934e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    k = (d[nbits].c[endstate >> 7][endstate & 15] & (0x80 >> ((endstate>>4)&7)) ) ? 1 : 0;
944e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    endstate = (k << 7) | (endstate >> 1);
954e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    data[nbits>>3] = endstate;
964e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  }
974e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  return path_metric;
984e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi}
994e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
1004e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi/* Delete instance of a Viterbi decoder */
1014e213d510f437769f8a28578dd4f786fb7d16c4Bill Yivoid delete_viterbi39_av(void *p){
1024e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  struct v39 *vp = p;
1034e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
1044e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  if(vp != NULL){
1054e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    free(vp->decisions);
1064e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    free(vp);
1074e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  }
1084e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi}
1094e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
1104e213d510f437769f8a28578dd4f786fb7d16c4Bill Yiint update_viterbi39_blk_av(void *p,unsigned char *syms,int nbits){
1114e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  struct v39 *vp = p;
1124e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  decision_t *d = (decision_t *)vp->dp;
1134e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  int path_metric = 0;
1144e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  vector unsigned char decisions = (vector unsigned char)(0);
1154e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
1164e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  while(nbits--){
1174e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    vector unsigned short symv,sym0v,sym1v,sym2v;
1184e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    vector unsigned char s;
1194e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    void *tmp;
1204e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    int i;
1214e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
1224e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    /* Splat the 0th symbol across sym0v, the 1st symbol across sym1v, etc */
1234e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    s = (vector unsigned char)vec_perm(vec_ld(0,syms),vec_ld(5,syms),vec_lvsl(0,syms));
1244e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
1254e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    symv = (vector unsigned short)vec_mergeh((vector unsigned char)(0),s);    /* Unsigned byte->word unpack */
1264e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    sym0v = vec_splat(symv,0);
1274e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    sym1v = vec_splat(symv,1);
1284e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    sym2v = vec_splat(symv,2);
1294e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    syms += 3;
1304e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
1314e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    for(i=0;i<16;i++){
1324e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      vector bool short decision0,decision1;
1334e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      vector unsigned short metric,m_metric,m0,m1,m2,m3,survivor0,survivor1;
1344e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
1354e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      /* Form branch metrics
1364e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi       * Because Branchtab takes on values 0 and 255, and the values of sym?v are offset binary in the range 0-255,
1374e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi       * the XOR operations constitute conditional negation.
1384e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi       * the metrics are in the range 0-765
1394e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi       */
1404e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      m0 = vec_add(vec_xor(Branchtab39[0].v[i],sym0v),vec_xor(Branchtab39[1].v[i],sym1v));
1414e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      m1 = vec_xor(Branchtab39[2].v[i],sym2v);
1424e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      metric = vec_add(m0,m1);
1434e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      m_metric = vec_sub((vector unsigned short)(765),metric);
1444e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
1454e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      /* Add branch metrics to path metrics */
1464e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      m0 = vec_adds(vp->old_metrics->v[i],metric);
1474e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      m3 = vec_adds(vp->old_metrics->v[16+i],metric);
1484e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      m1 = vec_adds(vp->old_metrics->v[16+i],m_metric);
1494e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      m2 = vec_adds(vp->old_metrics->v[i],m_metric);
1504e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
1514e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      /* Compare and select */
1524e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      decision0 = vec_cmpgt(m0,m1);
1534e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      decision1 = vec_cmpgt(m2,m3);
1544e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      survivor0 = vec_min(m0,m1);
1554e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      survivor1 = vec_min(m2,m3);
1564e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
1574e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      /* Store decisions and survivors.
1584e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi       * To save space without SSE2's handy PMOVMSKB instruction, we pack and store them in
1594e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi       * a funny interleaved fashion that we undo in the chainback function.
1604e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi       */
1614e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      decisions = vec_add(decisions,decisions); /* Shift each byte 1 bit to the left */
1624e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
1634e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      /* Booleans are either 0xff or 0x00. Subtracting 0x00 leaves the lsb zero; subtracting
1644e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi       * 0xff is equivalent to adding 1, which sets the lsb.
1654e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi       */
1664e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      decisions = vec_sub(decisions,(vector unsigned char)vec_pack(vec_mergeh(decision0,decision1),vec_mergel(decision0,decision1)));
1674e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
1684e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      vp->new_metrics->v[2*i] = vec_mergeh(survivor0,survivor1);
1694e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      vp->new_metrics->v[2*i+1] = vec_mergel(survivor0,survivor1);
1704e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
1714e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      if((i % 8) == 7){
1724e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi	/* We've accumulated a total of 128 decisions, stash and start again */
1734e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi	d->v[i>>3] = decisions; /* No need to clear, the new bits will replace the old */
1744e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      }
1754e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    }
1764e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi#if 0
1774e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    /* Experimentally determine metric spread
1784e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi     * The results are fixed for a given code and input symbol size
1794e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi     */
1804e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    {
1814e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      int i;
1824e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      vector unsigned short min_metric;
1834e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      vector unsigned short max_metric;
1844e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      union { vector unsigned short v; unsigned short s[8];} t;
1854e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      int minimum,maximum;
1864e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      static int max_spread = 0;
1874e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
1884e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      min_metric = max_metric = vp->new_metrics->v[0];
1894e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      for(i=1;i<32;i++){
1904e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi	min_metric = vec_min(min_metric,vp->new_metrics->v[i]);
1914e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi	max_metric = vec_max(max_metric,vp->new_metrics->v[i]);
1924e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      }
1934e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      min_metric = vec_min(min_metric,vec_sld(min_metric,min_metric,8));
1944e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      max_metric = vec_max(max_metric,vec_sld(max_metric,max_metric,8));
1954e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      min_metric = vec_min(min_metric,vec_sld(min_metric,min_metric,4));
1964e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      max_metric = vec_max(max_metric,vec_sld(max_metric,max_metric,4));
1974e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      min_metric = vec_min(min_metric,vec_sld(min_metric,min_metric,2));
1984e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      max_metric = vec_max(max_metric,vec_sld(max_metric,max_metric,2));
1994e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
2004e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      t.v = min_metric;
2014e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      minimum = t.s[0];
2024e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      t.v = max_metric;
2034e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      maximum = t.s[0];
2044e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      if(maximum-minimum > max_spread){
2054e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi	max_spread = maximum-minimum;
2064e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi	printf("metric spread = %d\n",max_spread);
2074e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      }
2084e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    }
2094e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi#endif
2104e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
2114e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    /* Renormalize if necessary. This deserves some explanation.
2124e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi     * The maximum possible spread, found by experiment, for 8 bit symbols is about 3825
2134e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi     * So by looking at one arbitrary metric we can tell if any of them have possibly saturated.
2144e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi     * However, this is very conservative. Large spreads occur only at very high Eb/No, where
2154e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi     * saturating a bad path metric doesn't do much to increase its chances of being erroneously chosen as a survivor.
2164e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
2174e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi     * At more interesting (low) Eb/No ratios, the spreads are much smaller so our chances of saturating a metric
2184e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi     * by not not normalizing when we should are extremely low. So either way, the risk to performance is small.
2194e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
2204e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi     * All this is borne out by experiment.
2214e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi     */
2224e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    if(vp->new_metrics->s[0] >= USHRT_MAX-5000){
2234e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      vector unsigned short scale;
2244e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      union { vector unsigned short v; unsigned short s[8];} t;
2254e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
2264e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      /* Find smallest metric and splat */
2274e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      scale = vp->new_metrics->v[0];
2284e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      for(i=1;i<32;i++)
2294e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi	scale = vec_min(scale,vp->new_metrics->v[i]);
2304e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
2314e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      scale = vec_min(scale,vec_sld(scale,scale,8));
2324e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      scale = vec_min(scale,vec_sld(scale,scale,4));
2334e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      scale = vec_min(scale,vec_sld(scale,scale,2));
2344e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
2354e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      /* Subtract it from all metrics
2364e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi       * Work backwards to try to improve the cache hit ratio, assuming LRU
2374e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi       */
2384e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      for(i=31;i>=0;i--)
2394e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi	vp->new_metrics->v[i] = vec_subs(vp->new_metrics->v[i],scale);
2404e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      t.v = scale;
2414e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      path_metric += t.s[0];
2424e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    }
2434e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    d++;
2444e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    /* Swap pointers to old and new metrics */
2454e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    tmp = vp->old_metrics;
2464e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    vp->old_metrics = vp->new_metrics;
2474e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    vp->new_metrics = tmp;
2484e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  }
2494e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  vp->dp = d;
2504e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  return path_metric;
2514e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi}
252