1c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#include <typeinfo> 2c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#include <iostream> 3c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#include <Eigen/Core> 4c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#include "BenchTimer.h" 5c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathusing namespace Eigen; 6c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathusing namespace std; 7c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 8c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename T> 92b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao WangEIGEN_DONT_INLINE typename T::Scalar sqsumNorm(T& v) 10c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 11c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return v.norm(); 12c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 13c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 14c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename T> 152b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao WangEIGEN_DONT_INLINE typename T::Scalar stableNorm(T& v) 162b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 172b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return v.stableNorm(); 182b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 192b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 202b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangtemplate<typename T> 212b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao WangEIGEN_DONT_INLINE typename T::Scalar hypotNorm(T& v) 22c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 23c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return v.hypotNorm(); 24c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 25c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 26c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename T> 272b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao WangEIGEN_DONT_INLINE typename T::Scalar blueNorm(T& v) 28c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 29c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return v.blueNorm(); 30c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 31c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 32c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename T> 33c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_DONT_INLINE typename T::Scalar lapackNorm(T& v) 34c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 35c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename T::Scalar Scalar; 36c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int n = v.size(); 37c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Scalar scale = 0; 38c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Scalar ssq = 1; 39c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (int i=0;i<n;++i) 40c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 412b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Scalar ax = std::abs(v.coeff(i)); 42c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if (scale >= ax) 43c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 442b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang ssq += numext::abs2(ax/scale); 45c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 46c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath else 47c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 482b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang ssq = Scalar(1) + ssq * numext::abs2(scale/ax); 49c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath scale = ax; 50c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 51c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 522b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return scale * std::sqrt(ssq); 53c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 54c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 55c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename T> 56c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_DONT_INLINE typename T::Scalar twopassNorm(T& v) 57c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 58c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename T::Scalar Scalar; 592b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Scalar s = v.array().abs().maxCoeff(); 60c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return s*(v/s).norm(); 61c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 62c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 63c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename T> 64c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_DONT_INLINE typename T::Scalar bl2passNorm(T& v) 65c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 66c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return v.stableNorm(); 67c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 68c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 69c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename T> 70c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_DONT_INLINE typename T::Scalar divacNorm(T& v) 71c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 72c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int n =v.size() / 2; 73c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (int i=0;i<n;++i) 74c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath v(i) = v(2*i)*v(2*i) + v(2*i+1)*v(2*i+1); 75c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath n = n/2; 76c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath while (n>0) 77c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 78c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (int i=0;i<n;++i) 79c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath v(i) = v(2*i) + v(2*i+1); 80c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath n = n/2; 81c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 822b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return std::sqrt(v(0)); 83c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 84c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 852b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangnamespace Eigen { 862b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangnamespace internal { 87c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#ifdef EIGEN_VECTORIZE 882b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao WangPacket4f plt(const Packet4f& a, Packet4f& b) { return _mm_cmplt_ps(a,b); } 892b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao WangPacket2d plt(const Packet2d& a, Packet2d& b) { return _mm_cmplt_pd(a,b); } 90c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 912b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao WangPacket4f pandnot(const Packet4f& a, Packet4f& b) { return _mm_andnot_ps(a,b); } 922b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao WangPacket2d pandnot(const Packet2d& a, Packet2d& b) { return _mm_andnot_pd(a,b); } 93c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#endif 942b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 952b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 96c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 97c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathtemplate<typename T> 98c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan KamathEIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v) 99c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 100c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #ifndef EIGEN_VECTORIZE 101c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return v.blueNorm(); 102c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #else 103c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename T::Scalar Scalar; 104c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 105c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static int nmax = 0; 106c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath static Scalar b1, b2, s1m, s2m, overfl, rbig, relerr; 107c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int n; 108c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 109c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if(nmax <= 0) 110c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 111c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int nbig, ibeta, it, iemin, iemax, iexp; 112c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Scalar abig, eps; 113c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 114c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath nbig = std::numeric_limits<int>::max(); // largest integer 115c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath ibeta = std::numeric_limits<Scalar>::radix; //NumTraits<Scalar>::Base; // base for floating-point numbers 116c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath it = std::numeric_limits<Scalar>::digits; //NumTraits<Scalar>::Mantissa; // number of base-beta digits in mantissa 117c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath iemin = std::numeric_limits<Scalar>::min_exponent; // minimum exponent 118c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath iemax = std::numeric_limits<Scalar>::max_exponent; // maximum exponent 119c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath rbig = std::numeric_limits<Scalar>::max(); // largest floating-point number 120c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 121c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath // Check the basic machine-dependent constants. 122c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if(iemin > 1 - 2*it || 1+it>iemax || (it==2 && ibeta<5) 123c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath || (it<=4 && ibeta <= 3 ) || it<2) 124c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 125c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath eigen_assert(false && "the algorithm cannot be guaranteed on this computer"); 126c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 127c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath iexp = -((1-iemin)/2); 128c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath b1 = std::pow(ibeta, iexp); // lower boundary of midrange 129c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath iexp = (iemax + 1 - it)/2; 130c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath b2 = std::pow(ibeta,iexp); // upper boundary of midrange 131c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 132c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath iexp = (2-iemin)/2; 133c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath s1m = std::pow(ibeta,iexp); // scaling factor for lower range 134c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath iexp = - ((iemax+it)/2); 135c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath s2m = std::pow(ibeta,iexp); // scaling factor for upper range 136c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 137c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath overfl = rbig*s2m; // overfow boundary for abig 138c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath eps = std::pow(ibeta, 1-it); 1392b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang relerr = std::sqrt(eps); // tolerance for neglecting asml 140c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath abig = 1.0/eps - 1.0; 141c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if (Scalar(nbig)>abig) nmax = abig; // largest safe n 142c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath else nmax = nbig; 143c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 144c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 145c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath typedef typename internal::packet_traits<Scalar>::type Packet; 146c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath const int ps = internal::packet_traits<Scalar>::size; 1472b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet pasml = internal::pset1<Packet>(Scalar(0)); 1482b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet pamed = internal::pset1<Packet>(Scalar(0)); 1492b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet pabig = internal::pset1<Packet>(Scalar(0)); 1502b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet ps2m = internal::pset1<Packet>(s2m); 1512b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet ps1m = internal::pset1<Packet>(s1m); 1522b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet pb2 = internal::pset1<Packet>(b2); 1532b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang Packet pb1 = internal::pset1<Packet>(b1); 154c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for(int j=0; j<v.size(); j+=ps) 155c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 156c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet ax = internal::pabs(v.template packet<Aligned>(j)); 157c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet ax_s2m = internal::pmul(ax,ps2m); 158c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet ax_s1m = internal::pmul(ax,ps1m); 159c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet maskBig = internal::plt(pb2,ax); 160c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Packet maskSml = internal::plt(ax,pb1); 161c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 162c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Packet maskMed = internal::pand(maskSml,maskBig); 163c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// Packet scale = internal::pset1(Scalar(0)); 164c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// scale = internal::por(scale, internal::pand(maskBig,ps2m)); 165c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// scale = internal::por(scale, internal::pand(maskSml,ps1m)); 166c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// scale = internal::por(scale, internal::pandnot(internal::pset1(Scalar(1)),maskMed)); 167c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// ax = internal::pmul(ax,scale); 168c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// ax = internal::pmul(ax,ax); 169c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// pabig = internal::padd(pabig, internal::pand(maskBig, ax)); 170c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// pasml = internal::padd(pasml, internal::pand(maskSml, ax)); 171c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// pamed = internal::padd(pamed, internal::pandnot(ax,maskMed)); 172c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 173c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 174c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath pabig = internal::padd(pabig, internal::pand(maskBig, internal::pmul(ax_s2m,ax_s2m))); 175c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath pasml = internal::padd(pasml, internal::pand(maskSml, internal::pmul(ax_s1m,ax_s1m))); 176c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath pamed = internal::padd(pamed, internal::pandnot(internal::pmul(ax,ax),internal::pand(maskSml,maskBig))); 177c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 178c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Scalar abig = internal::predux(pabig); 179c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Scalar asml = internal::predux(pasml); 180c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Scalar amed = internal::predux(pamed); 181c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if(abig > Scalar(0)) 182c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 1832b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang abig = std::sqrt(abig); 184c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if(abig > overfl) 185c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 186c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath eigen_assert(false && "overflow"); 187c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return rbig; 188c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 189c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if(amed > Scalar(0)) 190c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 191c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath abig = abig/s2m; 1922b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang amed = std::sqrt(amed); 193c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 194c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath else 195c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 196c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return abig/s2m; 197c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 198c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 199c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 200c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath else if(asml > Scalar(0)) 201c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 202c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if (amed > Scalar(0)) 203c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 2042b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang abig = std::sqrt(amed); 2052b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang amed = std::sqrt(asml) / s1m; 206c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 207c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath else 208c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 2092b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return std::sqrt(asml)/s1m; 210c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 211c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 212c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath else 213c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 2142b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return std::sqrt(amed); 215c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 216c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath asml = std::min(abig, amed); 217c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath abig = std::max(abig, amed); 218c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath if(asml <= abig*relerr) 219c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath return abig; 220c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath else 2212b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return abig * std::sqrt(Scalar(1) + numext::abs2(asml/abig)); 222c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath #endif 223c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 224c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 225c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath#define BENCH_PERF(NRM) { \ 2262b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang float af = 0; double ad = 0; std::complex<float> ac = 0; \ 227c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath Eigen::BenchTimer tf, td, tcf; tf.reset(); td.reset(); tcf.reset();\ 228c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (int k=0; k<tries; ++k) { \ 229c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath tf.start(); \ 2302b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang for (int i=0; i<iters; ++i) { af += NRM(vf); } \ 231c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath tf.stop(); \ 232c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } \ 233c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (int k=0; k<tries; ++k) { \ 234c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath td.start(); \ 2352b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang for (int i=0; i<iters; ++i) { ad += NRM(vd); } \ 236c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath td.stop(); \ 237c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } \ 2382b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang /*for (int k=0; k<std::max(1,tries/3); ++k) { \ 239c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath tcf.start(); \ 2402b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang for (int i=0; i<iters; ++i) { ac += NRM(vcf); } \ 241c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath tcf.stop(); \ 2422b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } */\ 243c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << #NRM << "\t" << tf.value() << " " << td.value() << " " << tcf.value() << "\n"; \ 244c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 245c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 246c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathvoid check_accuracy(double basef, double based, int s) 247c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 2482b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang double yf = basef * std::abs(internal::random<double>()); 2492b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang double yd = based * std::abs(internal::random<double>()); 250c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath VectorXf vf = VectorXf::Ones(s) * yf; 251c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath VectorXd vd = VectorXd::Ones(s) * yd; 252c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 2532b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang std::cout << "reference\t" << std::sqrt(double(s))*yf << "\t" << std::sqrt(double(s))*yd << "\n"; 254c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\n"; 255c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\n"; 256c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\n"; 257c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << "pblueNorm\t" << pblueNorm(vf) << "\t" << pblueNorm(vd) << "\n"; 258c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << "lapackNorm\t" << lapackNorm(vf) << "\t" << lapackNorm(vd) << "\n"; 259c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << "twopassNorm\t" << twopassNorm(vf) << "\t" << twopassNorm(vd) << "\n"; 260c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << "bl2passNorm\t" << bl2passNorm(vf) << "\t" << bl2passNorm(vd) << "\n"; 261c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 262c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 263c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathvoid check_accuracy_var(int ef0, int ef1, int ed0, int ed1, int s) 264c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 265c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath VectorXf vf(s); 266c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath VectorXd vd(s); 267c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (int i=0; i<s; ++i) 268c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 2692b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang vf[i] = std::abs(internal::random<double>()) * std::pow(double(10), internal::random<int>(ef0,ef1)); 2702b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang vd[i] = std::abs(internal::random<double>()) * std::pow(double(10), internal::random<int>(ed0,ed1)); 271c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 272c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 273c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath //std::cout << "reference\t" << internal::sqrt(double(s))*yf << "\t" << internal::sqrt(double(s))*yd << "\n"; 274c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\t" << sqsumNorm(vf.cast<long double>()) << "\t" << sqsumNorm(vd.cast<long double>()) << "\n"; 275c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\t" << hypotNorm(vf.cast<long double>()) << "\t" << hypotNorm(vd.cast<long double>()) << "\n"; 276c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\t" << blueNorm(vf.cast<long double>()) << "\t" << blueNorm(vd.cast<long double>()) << "\n"; 277c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << "pblueNorm\t" << pblueNorm(vf) << "\t" << pblueNorm(vd) << "\t" << blueNorm(vf.cast<long double>()) << "\t" << blueNorm(vd.cast<long double>()) << "\n"; 278c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << "lapackNorm\t" << lapackNorm(vf) << "\t" << lapackNorm(vd) << "\t" << lapackNorm(vf.cast<long double>()) << "\t" << lapackNorm(vd.cast<long double>()) << "\n"; 279c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << "twopassNorm\t" << twopassNorm(vf) << "\t" << twopassNorm(vd) << "\t" << twopassNorm(vf.cast<long double>()) << "\t" << twopassNorm(vd.cast<long double>()) << "\n"; 280c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// std::cout << "bl2passNorm\t" << bl2passNorm(vf) << "\t" << bl2passNorm(vd) << "\t" << bl2passNorm(vf.cast<long double>()) << "\t" << bl2passNorm(vd.cast<long double>()) << "\n"; 281c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 282c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 283c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamathint main(int argc, char** argv) 284c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath{ 285c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int tries = 10; 286c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int iters = 100000; 287c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath double y = 1.1345743233455785456788e12 * internal::random<double>(); 288c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath VectorXf v = VectorXf::Ones(1024) * y; 289c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 290c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath// return 0; 291c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int s = 10000; 292c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath double basef_ok = 1.1345743233455785456788e15; 293c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath double based_ok = 1.1345743233455785456788e95; 294c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 295c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath double basef_under = 1.1345743233455785456788e-27; 296c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath double based_under = 1.1345743233455785456788e-303; 297c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 298c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath double basef_over = 1.1345743233455785456788e+27; 299c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath double based_over = 1.1345743233455785456788e+302; 300c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 301c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout.precision(20); 302c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 303c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cerr << "\nNo under/overflow:\n"; 304c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath check_accuracy(basef_ok, based_ok, s); 305c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 306c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cerr << "\nUnderflow:\n"; 307c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath check_accuracy(basef_under, based_under, s); 308c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 309c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cerr << "\nOverflow:\n"; 310c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath check_accuracy(basef_over, based_over, s); 311c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 312c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cerr << "\nVarying (over):\n"; 313c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (int k=0; k<1; ++k) 314c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 315c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath check_accuracy_var(20,27,190,302,s); 316c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << "\n"; 317c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 318c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 319c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cerr << "\nVarying (under):\n"; 320c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath for (int k=0; k<1; ++k) 321c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 322c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath check_accuracy_var(-27,20,-302,-190,s); 323c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout << "\n"; 324c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 325c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 3262b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang y = 1; 327c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath std::cout.precision(4); 3282b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang int s1 = 1024*1024*32; 3292b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang std::cerr << "Performance (out of cache, " << s1 << "):\n"; 330c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 331c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int iters = 1; 3322b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang VectorXf vf = VectorXf::Random(s1) * y; 3332b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang VectorXd vd = VectorXd::Random(s1) * y; 3342b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang VectorXcf vcf = VectorXcf::Random(s1) * y; 335c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath BENCH_PERF(sqsumNorm); 3362b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang BENCH_PERF(stableNorm); 337c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath BENCH_PERF(blueNorm); 3382b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang BENCH_PERF(pblueNorm); 3392b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang BENCH_PERF(lapackNorm); 3402b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang BENCH_PERF(hypotNorm); 3412b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang BENCH_PERF(twopassNorm); 342c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath BENCH_PERF(bl2passNorm); 343c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 344c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath 3452b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang std::cerr << "\nPerformance (in cache, " << 512 << "):\n"; 346c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath { 347c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath int iters = 100000; 348c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath VectorXf vf = VectorXf::Random(512) * y; 349c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath VectorXd vd = VectorXd::Random(512) * y; 350c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath VectorXcf vcf = VectorXcf::Random(512) * y; 351c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath BENCH_PERF(sqsumNorm); 3522b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang BENCH_PERF(stableNorm); 353c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath BENCH_PERF(blueNorm); 3542b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang BENCH_PERF(pblueNorm); 3552b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang BENCH_PERF(lapackNorm); 3562b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang BENCH_PERF(hypotNorm); 3572b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang BENCH_PERF(twopassNorm); 358c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath BENCH_PERF(bl2passNorm); 359c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath } 360c981c48f5bc9aefeffc0bcb0cc3934c2fae179ddNarayan Kamath} 361