14e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi/* Return the largest absolute value of a vector of signed shorts
24e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
34e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi * This is the Altivec SIMD version.
44e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
54e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi * Copyright 2004 Phil Karn, KA9Q
64e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi * May be used under the terms of the GNU Lesser General Public License (LGPL)
74e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi */
84e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
94e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi#include "fec.h"
104e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
114e213d510f437769f8a28578dd4f786fb7d16c4Bill Yisigned short peakval_av(signed short *in,int cnt){
124e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  vector signed short x;
134e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  int pad;
144e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  union { vector signed char cv; vector signed short hv; signed short s[8]; signed char c[16];} s;
154e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  vector signed short smallest,largest;
164e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
174e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  smallest = (vector signed short)(0);
184e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  largest = (vector signed short)(0);
194e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  if((pad = (int)in & 15)!=0){
204e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    /* Load unaligned leading word */
214e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    x = vec_perm(vec_ld(0,in),(vector signed short)(0),vec_lvsl(0,in));
224e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    if(cnt < 8){ /* Shift right to chop stuff beyond end of short block */
234e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      s.c[15] = (8-cnt)<<4;
244e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi      x = vec_sro(x,s.cv);
254e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    }
264e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    smallest = vec_min(smallest,x);
274e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    largest = vec_max(largest,x);
284e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    in += 8-pad/2;
294e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    cnt -= 8-pad/2;
304e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  }
314e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  /* Everything is now aligned, rip through most of the block */
324e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  while(cnt >= 8){
334e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    x = vec_ld(0,in);
344e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    smallest = vec_min(smallest,x);
354e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    largest = vec_max(largest,x);
364e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    in += 8;
374e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    cnt -= 8;
384e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  }
394e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  /* Handle trailing fragment, if any */
404e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  if(cnt > 0){
414e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    x = vec_ld(0,in);
424e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    s.c[15] = (8-cnt)<<4;
434e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    x = vec_sro(x,s.cv);
444e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    smallest = vec_min(smallest,x);
454e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi    largest = vec_max(largest,x);
464e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  }
474e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  /* Combine and extract result */
484e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  largest = vec_max(largest,vec_abs(smallest));
494e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
504e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  s.c[15] = 64; /* Shift right four 16-bit words */
514e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  largest = vec_max(largest,vec_sro(largest,s.cv));
524e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
534e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  s.c[15] = 32; /* Shift right two 16-bit words */
544e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  largest = vec_max(largest,vec_sro(largest,s.cv));
554e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
564e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  s.c[15] = 16; /* Shift right one 16-bit word */
574e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  largest = vec_max(largest,vec_sro(largest,s.cv));
584e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi
594e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  s.hv = largest;
604e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi  return s.s[7];
614e213d510f437769f8a28578dd4f786fb7d16c4Bill Yi}
62