1307d839f4084d7ae5c56dff5487956ad0cad2bcbAndrew Hsieh#include <arm_neon.h> 2307d839f4084d7ae5c56dff5487956ad0cad2bcbAndrew Hsieh 3307d839f4084d7ae5c56dff5487956ad0cad2bcbAndrew Hsiehstruct Matrix43 { 4307d839f4084d7ae5c56dff5487956ad0cad2bcbAndrew Hsieh float32x4_t row0; 5307d839f4084d7ae5c56dff5487956ad0cad2bcbAndrew Hsieh float32x4_t row1; 6307d839f4084d7ae5c56dff5487956ad0cad2bcbAndrew Hsieh float32x4_t row2; 7307d839f4084d7ae5c56dff5487956ad0cad2bcbAndrew Hsieh float32x4_t row3; 8307d839f4084d7ae5c56dff5487956ad0cad2bcbAndrew Hsieh}; 9307d839f4084d7ae5c56dff5487956ad0cad2bcbAndrew Hsieh 10307d839f4084d7ae5c56dff5487956ad0cad2bcbAndrew Hsieh__attribute__((always_inline)) inline Matrix43 operator*(const Matrix43& m1, const Matrix43& m2) { 11307d839f4084d7ae5c56dff5487956ad0cad2bcbAndrew Hsieh Matrix43 rr; 12307d839f4084d7ae5c56dff5487956ad0cad2bcbAndrew Hsieh rr.row0 = vmulq_n_f32( m2.row0, vgetq_lane_f32(m1.row0, 0)); 13307d839f4084d7ae5c56dff5487956ad0cad2bcbAndrew Hsieh rr.row0 = vmlaq_n_f32(rr.row0, m2.row1, vgetq_lane_f32(m1.row0, 1)); 14307d839f4084d7ae5c56dff5487956ad0cad2bcbAndrew Hsieh rr.row0 = vmlaq_n_f32(rr.row0, m2.row2, vgetq_lane_f32(m1.row0, 2)); 15307d839f4084d7ae5c56dff5487956ad0cad2bcbAndrew Hsieh 16307d839f4084d7ae5c56dff5487956ad0cad2bcbAndrew Hsieh rr.row1 = vmulq_n_f32( m2.row0, vgetq_lane_f32(m1.row1, 0)); 17307d839f4084d7ae5c56dff5487956ad0cad2bcbAndrew Hsieh rr.row1 = vmlaq_n_f32(rr.row1, m2.row1, vgetq_lane_f32(m1.row1, 1)); 18307d839f4084d7ae5c56dff5487956ad0cad2bcbAndrew Hsieh rr.row1 = vmlaq_n_f32(rr.row1, m2.row2, vgetq_lane_f32(m1.row1, 2)); 19307d839f4084d7ae5c56dff5487956ad0cad2bcbAndrew Hsieh 20307d839f4084d7ae5c56dff5487956ad0cad2bcbAndrew Hsieh rr.row2 = vmulq_n_f32( m2.row0, vgetq_lane_f32(m1.row2, 0)); 21307d839f4084d7ae5c56dff5487956ad0cad2bcbAndrew Hsieh rr.row2 = vmlaq_n_f32(rr.row2, m2.row1, vgetq_lane_f32(m1.row2, 1)); 22307d839f4084d7ae5c56dff5487956ad0cad2bcbAndrew Hsieh rr.row2 = vmlaq_n_f32(rr.row2, m2.row2, vgetq_lane_f32(m1.row2, 2)); 23307d839f4084d7ae5c56dff5487956ad0cad2bcbAndrew Hsieh 24307d839f4084d7ae5c56dff5487956ad0cad2bcbAndrew Hsieh rr.row3 = vmlaq_n_f32(m2.row3, m2.row0, vgetq_lane_f32(m1.row3, 0)); 25307d839f4084d7ae5c56dff5487956ad0cad2bcbAndrew Hsieh rr.row3 = vmlaq_n_f32(rr.row3, m2.row1, vgetq_lane_f32(m1.row3, 1)); 26307d839f4084d7ae5c56dff5487956ad0cad2bcbAndrew Hsieh rr.row3 = vmlaq_n_f32(rr.row3, m2.row2, vgetq_lane_f32(m1.row3, 2)); 27307d839f4084d7ae5c56dff5487956ad0cad2bcbAndrew Hsieh return rr; 28307d839f4084d7ae5c56dff5487956ad0cad2bcbAndrew Hsieh} 29307d839f4084d7ae5c56dff5487956ad0cad2bcbAndrew Hsieh 30307d839f4084d7ae5c56dff5487956ad0cad2bcbAndrew Hsiehvoid _f_with_internal_compiler_error(const Matrix43& m, const void* a1, const void* a2) { 31307d839f4084d7ae5c56dff5487956ad0cad2bcbAndrew Hsieh m * m * m; 32307d839f4084d7ae5c56dff5487956ad0cad2bcbAndrew Hsieh} 33