1//******************************************************************************
2//*
3//* Copyright (C) 2015 The Android Open Source Project
4//*
5//* Licensed under the Apache License, Version 2.0 (the "License");
6//* you may not use this file except in compliance with the License.
7//* You may obtain a copy of the License at:
8//*
9//* http://www.apache.org/licenses/LICENSE-2.0
10//*
11//* Unless required by applicable law or agreed to in writing, software
12//* distributed under the License is distributed on an "AS IS" BASIS,
13//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14//* See the License for the specific language governing permissions and
15//* limitations under the License.
16//*
17//*****************************************************************************
18//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19//*/
20
21//******************************************************************************
22//*
23//*
24//* @brief
25//*  This file contains definitions of routines for variance caclulation
26//*
27//* @author
28//*  Ittiam
29//*
30//* @par List of Functions:
31//*  - icv_variance_8x4_av8()
32//*
33//* @remarks
34//*  None
35//*
36//*******************************************************************************
37
38
39//******************************************************************************
40//*
41//*  @brief computes variance of a 8x4  block
42//*
43//*
44//*  @par   Description
45//*   This functions computes variance of a 8x4  block
46//*
47//* @param[in] pu1_src
48//*  UWORD8 pointer to the source
49//*
50//* @param[in] src_strd
51//*  integer source stride
52//*
53//* @param[in] wd
54//*  Width (assumed to be 8)
55//*
56//* @param[in] ht
57//*  Height (assumed to be 4)
58//*
59//* @returns
60//*     variance value in x0
61//*
62//* @remarks
63//*
64//******************************************************************************
65
66    .global icv_variance_8x4_av8
67
68icv_variance_8x4_av8:
69
70    // Load 8x4 source
71    ld1     {v0.8b},    [x0],     x1
72    ld1     {v1.8b},    [x0],     x1
73    ld1     {v2.8b},    [x0],     x1
74    ld1     {v3.8b},    [x0],     x1
75
76    // Calculate Sum(values)
77    uaddl   v4.8h,  v0.8b,  v1.8b
78    uaddl   v6.8h,  v2.8b,  v3.8b
79    add     v4.8h,  v4.8h,  v6.8h
80
81    addp    v4.8h,  v4.8h,  v4.8h
82    addp    v4.4h,  v4.4h,  v4.4h
83    addp    v4.4h,  v4.4h,  v4.4h
84
85    // Calculate SumOfSquares
86    umull   v20.8h, v0.8b,  v0.8b
87    umull   v22.8h, v1.8b,  v1.8b
88    umull   v24.8h, v2.8b,  v2.8b
89    umull   v26.8h, v3.8b,  v3.8b
90
91    uaddl   v21.4s,    v20.4h,    v22.4h
92    uaddl   v25.4s,    v24.4h,    v26.4h
93    uaddl2  v20.4s,    v20.8h,    v22.8h
94    uaddl2  v24.4s,    v24.8h,    v26.8h
95
96    add     v20.4s,     v20.4s,  v21.4s
97    add     v22.4s,     v24.4s,  v25.4s
98    add     v20.4s,     v20.4s,  v22.4s
99    addp    v20.4s,     v20.4s,  v20.4s
100    addp    v20.2s,     v20.2s,  v20.2s
101
102    // Sum(values)
103    smov    x0,     v4.h[0]
104
105    // SumOfSquares
106    smov    x1,     v20.s[0]
107
108    // SquareOfSums
109    mul     x3,     x0,     x0
110
111    // SumOfSquares * 8 * 4 - SquareOfSums
112    sub     x1,     x3,     x1,        LSL #5
113    neg     x0,     x1
114
115    // Divide by 32 * 32
116
117    ASR     x0,     x0,     #10
118    ret
119