1964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang/* Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang * Use of this source code is governed by a BSD-style license that can be
3964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang * found in the LICENSE file.
4964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang */
5964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
6964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang/* Copyright (C) 2011 Google Inc. All rights reserved.
7964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang * Use of this source code is governed by a BSD-style license that can be
8964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang * found in the LICENSE.WEBKIT file.
9964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang */
10964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
11964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang#include <stdio.h>
12964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang#include <stdlib.h>
13964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang#include <string.h>
14964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
15964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang#include "drc_math.h"
16964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang#include "drc_kernel.h"
17964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
18964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang#define MAX_PRE_DELAY_FRAMES 1024
19964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang#define MAX_PRE_DELAY_FRAMES_MASK (MAX_PRE_DELAY_FRAMES - 1)
20964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang#define DEFAULT_PRE_DELAY_FRAMES 256
21964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang#define DIVISION_FRAMES 32
22964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang#define DIVISION_FRAMES_MASK (DIVISION_FRAMES - 1)
23964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
24964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang#define assert_on_compile(e) ((void)sizeof(char[1 - 2 * !(e)]))
25964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang#define assert_on_compile_is_power_of_2(n) \
26964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	assert_on_compile((n) != 0 && (((n) & ((n) - 1)) == 0))
27964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
28964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Changconst float uninitialized_value = -1;
29964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Changstatic int drc_math_initialized;
30964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
31964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Changvoid dk_init(struct drc_kernel *dk, float sample_rate)
32964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang{
33964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	int i;
34964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
35964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	if (!drc_math_initialized) {
36964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		drc_math_initialized = 1;
37964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		drc_math_init();
38964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	}
39964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
40964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	dk->sample_rate = sample_rate;
41964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	dk->detector_average = 0;
42964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	dk->compressor_gain = 1;
43964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	dk->enabled = 0;
44964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	dk->processed = 0;
45964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	dk->last_pre_delay_frames = DEFAULT_PRE_DELAY_FRAMES;
46964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	dk->pre_delay_read_index = 0;
47964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	dk->pre_delay_write_index = DEFAULT_PRE_DELAY_FRAMES;
48964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	dk->max_attack_compression_diff_db = -INFINITY;
49964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	dk->ratio = uninitialized_value;
50964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	dk->slope = uninitialized_value;
51964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	dk->linear_threshold = uninitialized_value;
52964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	dk->db_threshold = uninitialized_value;
53964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	dk->db_knee = uninitialized_value;
54964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	dk->knee_threshold = uninitialized_value;
55964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	dk->ratio_base = uninitialized_value;
56964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	dk->K = uninitialized_value;
57964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
58d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang	assert_on_compile_is_power_of_2(DIVISION_FRAMES);
59d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang	assert_on_compile(DIVISION_FRAMES % 4 == 0);
60964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	/* Allocate predelay buffers */
61964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	assert_on_compile_is_power_of_2(MAX_PRE_DELAY_FRAMES);
62964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	for (i = 0; i < DRC_NUM_CHANNELS; i++) {
63964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		size_t size = sizeof(float) * MAX_PRE_DELAY_FRAMES;
64964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		dk->pre_delay_buffers[i] = (float *)calloc(1, size);
65964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	}
66964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang}
67964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
68964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Changvoid dk_free(struct drc_kernel *dk)
69964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang{
70964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	int i;
71964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	for (i = 0; i < DRC_NUM_CHANNELS; ++i)
72964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		free(dk->pre_delay_buffers[i]);
73964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang}
74964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
75964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang/* Sets the pre-delay (lookahead) buffer size */
76964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Changstatic void set_pre_delay_time(struct drc_kernel *dk, float pre_delay_time)
77964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang{
78964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	int i;
79964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	/* Re-configure look-ahead section pre-delay if delay time has
80964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	 * changed. */
81964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	unsigned pre_delay_frames = pre_delay_time * dk->sample_rate;
827c9b3e27c33588ad3519294377783d3a87fda842Chih-Chung Chang	pre_delay_frames = min(pre_delay_frames, MAX_PRE_DELAY_FRAMES - 1);
837c9b3e27c33588ad3519294377783d3a87fda842Chih-Chung Chang
847c9b3e27c33588ad3519294377783d3a87fda842Chih-Chung Chang	/* Make pre_delay_frames multiplies of DIVISION_FRAMES. This way we
857c9b3e27c33588ad3519294377783d3a87fda842Chih-Chung Chang	 * won't split a division of samples into two blocks of memory, so it is
867c9b3e27c33588ad3519294377783d3a87fda842Chih-Chung Chang	 * easier to process. This may make the actual delay time slightly less
877c9b3e27c33588ad3519294377783d3a87fda842Chih-Chung Chang	 * than the specified value, but the difference is less than 1ms. */
887c9b3e27c33588ad3519294377783d3a87fda842Chih-Chung Chang	pre_delay_frames &= ~DIVISION_FRAMES_MASK;
897c9b3e27c33588ad3519294377783d3a87fda842Chih-Chung Chang
907c9b3e27c33588ad3519294377783d3a87fda842Chih-Chung Chang	/* We need at least one division buffer, so the incoming data won't
917c9b3e27c33588ad3519294377783d3a87fda842Chih-Chung Chang	 * overwrite the output data */
927c9b3e27c33588ad3519294377783d3a87fda842Chih-Chung Chang	pre_delay_frames = max(pre_delay_frames, DIVISION_FRAMES);
93964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
94964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	if (dk->last_pre_delay_frames != pre_delay_frames) {
95964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		dk->last_pre_delay_frames = pre_delay_frames;
96964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		for (i = 0; i < DRC_NUM_CHANNELS; ++i) {
97964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang			size_t size = sizeof(float) * MAX_PRE_DELAY_FRAMES;
98964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang			memset(dk->pre_delay_buffers[i], 0, size);
99964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		}
100964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
101964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		dk->pre_delay_read_index = 0;
102964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		dk->pre_delay_write_index = pre_delay_frames;
103964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	}
104964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang}
105964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
106964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang/* Exponential curve for the knee.  It is 1st derivative matched at
107964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang * dk->linear_threshold and asymptotically approaches the value
108964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang * dk->linear_threshold + 1 / k.
109964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang *
110964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang * This is used only when calculating the static curve, not used when actually
111964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang * compress the input data (knee_curveK below is used instead).
112964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang */
113964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Changstatic float knee_curve(struct drc_kernel *dk, float x, float k)
114964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang{
115964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	/* Linear up to threshold. */
116964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	if (x < dk->linear_threshold)
117964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		return x;
118964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
119964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	return dk->linear_threshold +
120964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		(1 - knee_expf(-k * (x - dk->linear_threshold))) / k;
121964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang}
122964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
123964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang/* Approximate 1st derivative with input and output expressed in dB.  This slope
124964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang * is equal to the inverse of the compression "ratio".  In other words, a
125964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang * compression ratio of 20 would be a slope of 1/20.
126964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang */
127964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Changstatic float slope_at(struct drc_kernel *dk, float x, float k)
128964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang{
129964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	if (x < dk->linear_threshold)
130964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		return 1;
131964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
132964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	float x2 = x * 1.001;
133964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
134964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	float x_db = linear_to_decibels(x);
135964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	float x2Db = linear_to_decibels(x2);
136964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
137964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	float y_db = linear_to_decibels(knee_curve(dk, x, k));
138964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	float y2Db = linear_to_decibels(knee_curve(dk, x2, k));
139964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
140964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	float m = (y2Db - y_db) / (x2Db - x_db);
141964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
142964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	return m;
143964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang}
144964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
145964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Changstatic float k_at_slope(struct drc_kernel *dk, float desired_slope)
146964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang{
147964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	float x_db = dk->db_threshold + dk->db_knee;
148964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	float x = decibels_to_linear(x_db);
149964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
150964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	/* Approximate k given initial values. */
151964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	float minK = 0.1;
152964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	float maxK = 10000;
153964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	float k = 5;
154964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	int i;
155964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
156964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	for (i = 0; i < 15; ++i) {
157964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		/* A high value for k will more quickly asymptotically approach
158964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		 * a slope of 0. */
159964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		float slope = slope_at(dk, x, k);
160964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
161964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		if (slope < desired_slope) {
162964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang			/* k is too high. */
163964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang			maxK = k;
164964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		} else {
165964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang			/* k is too low. */
166964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang			minK = k;
167964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		}
168964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
169964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		/* Re-calculate based on geometric mean. */
170964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		k = sqrtf(minK * maxK);
171964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	}
172964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
173964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	return k;
174964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang}
175964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
176964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Changstatic void update_static_curve_parameters(struct drc_kernel *dk,
177964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang					   float db_threshold,
178964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang					   float db_knee, float ratio)
179964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang{
180964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	if (db_threshold != dk->db_threshold || db_knee != dk->db_knee ||
181964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	    ratio != dk->ratio) {
182964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		/* Threshold and knee. */
183964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		dk->db_threshold = db_threshold;
184964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		dk->linear_threshold = decibels_to_linear(db_threshold);
185964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		dk->db_knee = db_knee;
186964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
187964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		/* Compute knee parameters. */
188964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		dk->ratio = ratio;
189964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		dk->slope = 1 / dk->ratio;
190964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
191964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		float k = k_at_slope(dk, 1 / dk->ratio);
192964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		dk->K = k;
193964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		/* See knee_curveK() for details */
194964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		dk->knee_alpha = dk->linear_threshold + 1 / k;
195964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		dk->knee_beta = -expf(k * dk->linear_threshold) / k;
196964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
197964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		dk->knee_threshold = decibels_to_linear(db_threshold + db_knee);
198964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		/* See volume_gain() for details */
199964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		float y0 = knee_curve(dk, dk->knee_threshold, k);
200964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		dk->ratio_base = y0 * powf(dk->knee_threshold, -dk->slope);
201964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	}
202964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang}
203964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
204964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang/* This is the knee part of the compression curve. Returns the output level
205964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang * given the input level x. */
206964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Changstatic float knee_curveK(struct drc_kernel *dk, float x)
207964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang{
208964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	/* The formula in knee_curveK is dk->linear_threshold +
209964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	 * (1 - expf(-k * (x - dk->linear_threshold))) / k
210964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	 * which simplifies to (alpha + beta * expf(gamma))
211964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	 * where alpha = dk->linear_threshold + 1 / k
212964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	 *	 beta = -expf(k * dk->linear_threshold) / k
213964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	 *	 gamma = -k * x
214964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	 */
215964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	return dk->knee_alpha + dk->knee_beta * knee_expf(-dk->K * x);
216964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang}
217964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
218964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang/* Full compression curve with constant ratio after knee. Returns the ratio of
219964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang * output and input signal. */
220964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Changstatic float volume_gain(struct drc_kernel *dk, float x)
221964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang{
222964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	float y;
223964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
224964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	if (x < dk->knee_threshold) {
225964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		if (x < dk->linear_threshold)
226964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang			return 1;
227964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		y = knee_curveK(dk, x) / x;
228964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	} else {
229964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		/* Constant ratio after knee.
230964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		 * log(y/y0) = s * log(x/x0)
231964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		 * => y = y0 * (x/x0)^s
232964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		 * => y = [y0 * (1/x0)^s] * x^s
233964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		 * => y = dk->ratio_base * x^s
234964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		 * => y/x = dk->ratio_base * x^(s - 1)
2356e9cc2bc1cef31a17a0f9e9fce230509c7202f9fChih-Chung Chang		 * => y/x = dk->ratio_base * e^(log(x) * (s - 1))
236964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		 */
2376e9cc2bc1cef31a17a0f9e9fce230509c7202f9fChih-Chung Chang		y = dk->ratio_base * knee_expf(logf(x) * (dk->slope - 1));
238964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	}
239964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
240964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	return y;
241964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang}
242964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
243964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Changvoid dk_set_parameters(struct drc_kernel *dk,
244964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		       float db_threshold,
245964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		       float db_knee,
246964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		       float ratio,
247964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		       float attack_time,
248964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		       float release_time,
249964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		       float pre_delay_time,
250964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		       float db_post_gain,
251964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		       float releaseZone1,
252964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		       float releaseZone2,
253964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		       float releaseZone3,
254964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		       float releaseZone4)
255964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang{
256964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	float sample_rate = dk->sample_rate;
257964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
258964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	update_static_curve_parameters(dk, db_threshold, db_knee, ratio);
259964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
260964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	/* Makeup gain. */
261964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	float full_range_gain = volume_gain(dk, 1);
262964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	float full_range_makeup_gain = 1 / full_range_gain;
263964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
264964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	/* Empirical/perceptual tuning. */
265964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	full_range_makeup_gain = powf(full_range_makeup_gain, 0.6f);
266964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
267964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	dk->master_linear_gain = decibels_to_linear(db_post_gain) *
268964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		full_range_makeup_gain;
269964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
270964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	/* Attack parameters. */
271964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	attack_time = max(0.001f, attack_time);
272964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	dk->attack_frames = attack_time * sample_rate;
273964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
274964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	/* Release parameters. */
275964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	float release_frames = sample_rate * release_time;
276964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
277964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	/* Detector release time. */
278964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	float sat_release_time = 0.0025f;
279964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	float sat_release_frames = sat_release_time * sample_rate;
280964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	dk->sat_release_frames_inv_neg = -1 / sat_release_frames;
2818578fcbd8bc76e3fe4a9edca45a3aeb227e3b956Chih-Chung Chang	dk->sat_release_rate_at_neg_two_db =
2828578fcbd8bc76e3fe4a9edca45a3aeb227e3b956Chih-Chung Chang		decibels_to_linear(-2 * dk->sat_release_frames_inv_neg) - 1;
283964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
284964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	/* Create a smooth function which passes through four points.
285964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	 * Polynomial of the form y = a + b*x + c*x^2 + d*x^3 + e*x^4
286964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	 */
287964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	float y1 = release_frames * releaseZone1;
288964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	float y2 = release_frames * releaseZone2;
289964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	float y3 = release_frames * releaseZone3;
290964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	float y4 = release_frames * releaseZone4;
291964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
292964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	/* All of these coefficients were derived for 4th order polynomial curve
293964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	 * fitting where the y values match the evenly spaced x values as
294964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	 * follows: (y1 : x == 0, y2 : x == 1, y3 : x == 2, y4 : x == 3)
295964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	 */
296964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	dk->kA = 0.9999999999999998f*y1 + 1.8432219684323923e-16f*y2
297964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		- 1.9373394351676423e-16f*y3 + 8.824516011816245e-18f*y4;
298964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	dk->kB = -1.5788320352845888f*y1 + 2.3305837032074286f*y2
299964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		- 0.9141194204840429f*y3 + 0.1623677525612032f*y4;
300964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	dk->kC = 0.5334142869106424f*y1 - 1.272736789213631f*y2
301964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		+ 0.9258856042207512f*y3 - 0.18656310191776226f*y4;
302964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	dk->kD = 0.08783463138207234f*y1 - 0.1694162967925622f*y2
303964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		+ 0.08588057951595272f*y3 - 0.00429891410546283f*y4;
304964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	dk->kE = -0.042416883008123074f*y1 + 0.1115693827987602f*y2
305964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		- 0.09764676325265872f*y3 + 0.028494263462021576f*y4;
306964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
307964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	/* x ranges from 0 -> 3	      0	   1	2   3
308964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	 *			     -15  -10  -5   0db
309964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	 *
310964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	 * y calculates adaptive release frames depending on the amount of
311964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	 * compression.
312964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	 */
313964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	set_pre_delay_time(dk, pre_delay_time);
314964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang}
315964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
316964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Changvoid dk_set_enabled(struct drc_kernel *dk, int enabled)
317964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang{
318964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	dk->enabled = enabled;
319964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang}
320964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
321964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang/* Updates the envelope_rate used for the next division */
322cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Changstatic void dk_update_envelope(struct drc_kernel *dk)
323964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang{
324964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	const float kA = dk->kA;
325964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	const float kB = dk->kB;
326964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	const float kC = dk->kC;
327964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	const float kD = dk->kD;
328964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	const float kE = dk->kE;
329964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	const float attack_frames = dk->attack_frames;
330964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
331964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	/* Calculate desired gain */
332964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	float desired_gain = dk->detector_average;
333964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
334964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	/* Pre-warp so we get desired_gain after sin() warp below. */
335964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	float scaled_desired_gain = warp_asinf(desired_gain);
336964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
337964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	/* Deal with envelopes */
338964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
339964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	/* envelope_rate is the rate we slew from current compressor level to
340964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	 * the desired level.  The exact rate depends on if we're attacking or
341964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	 * releasing and by how much.
342964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	 */
343964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	float envelope_rate;
344964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
345964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	int is_releasing = scaled_desired_gain > dk->compressor_gain;
346964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
347964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	/* compression_diff_db is the difference between current compression
348964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	 * level and the desired level. */
349964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	float compression_diff_db = linear_to_decibels(
350964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		dk->compressor_gain / scaled_desired_gain);
351964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
352964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	if (is_releasing) {
353964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		/* Release mode - compression_diff_db should be negative dB */
354964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		dk->max_attack_compression_diff_db = -INFINITY;
355964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
356964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		/* Fix gremlins. */
357964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		if (isbadf(compression_diff_db))
358964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang			compression_diff_db = -1;
359964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
360964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		/* Adaptive release - higher compression (lower
361964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		 * compression_diff_db) releases faster. Contain within range:
362964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		 * -12 -> 0 then scale to go from 0 -> 3
363964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		 */
364964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		float x = compression_diff_db;
365964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		x = max(-12.0f, x);
366964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		x = min(0.0f, x);
367964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		x = 0.25f * (x + 12);
368964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
369964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		/* Compute adaptive release curve using 4th order polynomial.
370964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		 * Normal values for the polynomial coefficients would create a
371964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		 * monotonically increasing function.
372964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		 */
373964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		float x2 = x * x;
374964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		float x3 = x2 * x;
375964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		float x4 = x2 * x2;
376964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		float release_frames = kA + kB * x + kC * x2 + kD * x3 +
377964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang			kE * x4;
378964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
379964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang#define kSpacingDb 5
380964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		float db_per_frame = kSpacingDb / release_frames;
381964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		envelope_rate = decibels_to_linear(db_per_frame);
382964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	} else {
383964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		/* Attack mode - compression_diff_db should be positive dB */
384964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
385964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		/* Fix gremlins. */
386964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		if (isbadf(compression_diff_db))
387964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang			compression_diff_db = 1;
388964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
389964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		/* As long as we're still in attack mode, use a rate based off
390964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		 * the largest compression_diff_db we've encountered so far.
391964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		 */
392964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		dk->max_attack_compression_diff_db = max(
393964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang			dk->max_attack_compression_diff_db,
394964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang			compression_diff_db);
395964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
396964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		float eff_atten_diff_db =
397964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang			max(0.5f, dk->max_attack_compression_diff_db);
398964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
399964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		float x = 0.25f / eff_atten_diff_db;
400964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		envelope_rate = 1 - powf(x, 1 / attack_frames);
401964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	}
402964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
403964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	dk->envelope_rate = envelope_rate;
404964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	dk->scaled_desired_gain = scaled_desired_gain;
405964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang}
406964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
407790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang/* For a division of frames, take the absolute values of left channel and right
408790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang * channel, store the maximum of them in output. */
40997d77ef88059a56945264dda07fa7be28ccd2953Frank Barchard#if defined(__aarch64__)
41097d77ef88059a56945264dda07fa7be28ccd2953Frank Barchardstatic inline void max_abs_division(float *output,
41197d77ef88059a56945264dda07fa7be28ccd2953Frank Barchard				    const float *data0, const float *data1)
41297d77ef88059a56945264dda07fa7be28ccd2953Frank Barchard{
41397d77ef88059a56945264dda07fa7be28ccd2953Frank Barchard	int count = DIVISION_FRAMES / 4;
41497d77ef88059a56945264dda07fa7be28ccd2953Frank Barchard
41597d77ef88059a56945264dda07fa7be28ccd2953Frank Barchard	__asm__ __volatile__(
41697d77ef88059a56945264dda07fa7be28ccd2953Frank Barchard		"1:                                     \n"
41797d77ef88059a56945264dda07fa7be28ccd2953Frank Barchard		"ld1 {v0.4s}, [%[data0]], #16           \n"
41897d77ef88059a56945264dda07fa7be28ccd2953Frank Barchard		"ld1 {v1.4s}, [%[data1]], #16           \n"
41997d77ef88059a56945264dda07fa7be28ccd2953Frank Barchard		"fabs v0.4s, v0.4s                      \n"
42097d77ef88059a56945264dda07fa7be28ccd2953Frank Barchard		"fabs v1.4s, v1.4s                      \n"
42197d77ef88059a56945264dda07fa7be28ccd2953Frank Barchard		"fmax v0.4s, v0.4s, v1.4s               \n"
42297d77ef88059a56945264dda07fa7be28ccd2953Frank Barchard		"st1 {v0.4s}, [%[output]], #16          \n"
42397d77ef88059a56945264dda07fa7be28ccd2953Frank Barchard		"subs %w[count], %w[count], #1          \n"
42497d77ef88059a56945264dda07fa7be28ccd2953Frank Barchard		"b.ne 1b                                \n"
42597d77ef88059a56945264dda07fa7be28ccd2953Frank Barchard		: /* output */
42697d77ef88059a56945264dda07fa7be28ccd2953Frank Barchard		  [data0]"+r"(data0),
42797d77ef88059a56945264dda07fa7be28ccd2953Frank Barchard		  [data1]"+r"(data1),
42897d77ef88059a56945264dda07fa7be28ccd2953Frank Barchard		  [output]"+r"(output),
42997d77ef88059a56945264dda07fa7be28ccd2953Frank Barchard		  [count]"+r"(count)
43097d77ef88059a56945264dda07fa7be28ccd2953Frank Barchard		: /* input */
43197d77ef88059a56945264dda07fa7be28ccd2953Frank Barchard		: /* clobber */
43297d77ef88059a56945264dda07fa7be28ccd2953Frank Barchard		  "v0", "v1", "memory", "cc"
43397d77ef88059a56945264dda07fa7be28ccd2953Frank Barchard		);
43497d77ef88059a56945264dda07fa7be28ccd2953Frank Barchard}
43597d77ef88059a56945264dda07fa7be28ccd2953Frank Barchard#elif defined(__ARM_NEON__)
436f766e15817cb4104c7a69cf2a40554798f40202eFrank Barchardstatic inline void max_abs_division(float *output,
437f766e15817cb4104c7a69cf2a40554798f40202eFrank Barchard				    const float *data0, const float *data1)
438790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang{
439790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang	int count = DIVISION_FRAMES / 4;
440790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang
441790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang	__asm__ __volatile__(
442790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang		"1:                                     \n"
443f766e15817cb4104c7a69cf2a40554798f40202eFrank Barchard		"vld1.32 {q0}, [%[data0]]!              \n"
444f766e15817cb4104c7a69cf2a40554798f40202eFrank Barchard		"vld1.32 {q1}, [%[data1]]!              \n"
445f766e15817cb4104c7a69cf2a40554798f40202eFrank Barchard		"vabs.f32 q0, q0                        \n"
446f766e15817cb4104c7a69cf2a40554798f40202eFrank Barchard		"vabs.f32 q1, q1                        \n"
447f766e15817cb4104c7a69cf2a40554798f40202eFrank Barchard		"vmax.f32 q0, q1                        \n"
448f766e15817cb4104c7a69cf2a40554798f40202eFrank Barchard		"vst1.32 {q0}, [%[output]]!             \n"
449790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang		"subs %[count], #1                      \n"
450790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang		"bne 1b                                 \n"
451790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang		: /* output */
452f766e15817cb4104c7a69cf2a40554798f40202eFrank Barchard		  [data0]"+r"(data0),
453f766e15817cb4104c7a69cf2a40554798f40202eFrank Barchard		  [data1]"+r"(data1),
454f766e15817cb4104c7a69cf2a40554798f40202eFrank Barchard		  [output]"+r"(output),
455f766e15817cb4104c7a69cf2a40554798f40202eFrank Barchard		  [count]"+r"(count)
456790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang		: /* input */
457790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang		: /* clobber */
458f766e15817cb4104c7a69cf2a40554798f40202eFrank Barchard		  "q0", "q1", "memory", "cc"
459790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang		);
460790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang}
461790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang#elif defined(__SSE3__)
462790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang#include <emmintrin.h>
463f766e15817cb4104c7a69cf2a40554798f40202eFrank Barchardstatic inline void max_abs_division(float *output,
464f766e15817cb4104c7a69cf2a40554798f40202eFrank Barchard				    const float *data0, const float *data1)
465790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang{
466790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang	__m128 x, y;
467790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang	int count = DIVISION_FRAMES / 4;
468790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang
469790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang	__asm__ __volatile__(
470790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang		"1:                                     \n"
471790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang		"lddqu (%[data0]), %[x]                 \n"
472790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang		"lddqu (%[data1]), %[y]                 \n"
473790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang		"andps %[mask], %[x]                    \n"
474790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang		"andps %[mask], %[y]                    \n"
475790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang		"maxps %[y], %[x]                       \n"
476790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang		"movdqu %[x], (%[output])               \n"
477790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang		"add $16, %[data0]                      \n"
478790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang		"add $16, %[data1]                      \n"
479790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang		"add $16, %[output]                     \n"
480790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang		"sub $1, %[count]                       \n"
481790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang		"jnz 1b                                 \n"
482790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang		: /* output */
483790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang		  [data0]"+r"(data0),
484790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang		  [data1]"+r"(data1),
485790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang		  [output]"+r"(output),
486790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang		  [count]"+r"(count),
487790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang		  [x]"=&x"(x),
488790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang		  [y]"=&x"(y)
489790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang		: /* input */
490790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang		  [mask]"x"(_mm_set1_epi32(0x7fffffff))
491790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang		: /* clobber */
492790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang		  "memory", "cc"
493790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang		);
494790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang}
495790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang#else
496f766e15817cb4104c7a69cf2a40554798f40202eFrank Barchardstatic inline void max_abs_division(float *output,
497f766e15817cb4104c7a69cf2a40554798f40202eFrank Barchard				    const float *data0, const float *data1)
498790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang{
499790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang	int i;
500790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang	for (i = 0; i < DIVISION_FRAMES; i++)
501790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang		output[i] = fmaxf(fabsf(data0[i]), fabsf(data1[i]));
502790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang}
503790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang#endif
504790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang
505cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang/* Update detector_average from the last input division. */
506cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Changstatic void dk_update_detector_average(struct drc_kernel *dk)
507964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang{
508790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang	float abs_input_array[DIVISION_FRAMES];
509964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	const float sat_release_frames_inv_neg = dk->sat_release_frames_inv_neg;
5108578fcbd8bc76e3fe4a9edca45a3aeb227e3b956Chih-Chung Chang	const float sat_release_rate_at_neg_two_db =
5118578fcbd8bc76e3fe4a9edca45a3aeb227e3b956Chih-Chung Chang		dk->sat_release_rate_at_neg_two_db;
512964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	float detector_average = dk->detector_average;
513cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang	int div_start, i;
514cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang
515cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang	/* Calculate the start index of the last input division */
516cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang	if (dk->pre_delay_write_index == 0) {
517cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang		div_start = MAX_PRE_DELAY_FRAMES - DIVISION_FRAMES;
518cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang	} else {
519cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang		div_start = dk->pre_delay_write_index - DIVISION_FRAMES;
520cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang	}
521964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
522790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang	/* The max abs value across all channels for this frame */
523790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang	max_abs_division(abs_input_array,
524790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang			 &dk->pre_delay_buffers[0][div_start],
525790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang			 &dk->pre_delay_buffers[1][div_start]);
526790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang
527cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang	for (i = 0; i < DIVISION_FRAMES; i++) {
528790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang		/* Compute compression amount from un-delayed signal */
529790c92f332fe510b20b55c94531bd0574c16ae46Chih-Chung Chang		float abs_input = abs_input_array[i];
530964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
531964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		/* Calculate shaped power on undelayed input.  Put through
532964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		 * shaping curve. This is linear up to the threshold, then
533964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		 * enters a "knee" portion followed by the "ratio" portion. The
534964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		 * transition from the threshold to the knee is smooth (1st
535964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		 * derivative matched). The transition from the knee to the
536964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		 * ratio portion is smooth (1st derivative matched).
537964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		 */
538964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		float gain = volume_gain(dk, abs_input);
539964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		int is_release = (gain > detector_average);
540964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		if (is_release) {
5418578fcbd8bc76e3fe4a9edca45a3aeb227e3b956Chih-Chung Chang			if (gain > NEG_TWO_DB) {
5428578fcbd8bc76e3fe4a9edca45a3aeb227e3b956Chih-Chung Chang				detector_average += (gain - detector_average) *
5438578fcbd8bc76e3fe4a9edca45a3aeb227e3b956Chih-Chung Chang					sat_release_rate_at_neg_two_db;
5448578fcbd8bc76e3fe4a9edca45a3aeb227e3b956Chih-Chung Chang			} else {
5458578fcbd8bc76e3fe4a9edca45a3aeb227e3b956Chih-Chung Chang				float gain_db = linear_to_decibels(gain);
5468578fcbd8bc76e3fe4a9edca45a3aeb227e3b956Chih-Chung Chang				float db_per_frame = gain_db *
5478578fcbd8bc76e3fe4a9edca45a3aeb227e3b956Chih-Chung Chang					sat_release_frames_inv_neg;
5488578fcbd8bc76e3fe4a9edca45a3aeb227e3b956Chih-Chung Chang				float sat_release_rate =
5498578fcbd8bc76e3fe4a9edca45a3aeb227e3b956Chih-Chung Chang					decibels_to_linear(db_per_frame) - 1;
5508578fcbd8bc76e3fe4a9edca45a3aeb227e3b956Chih-Chung Chang				detector_average += (gain - detector_average) *
5518578fcbd8bc76e3fe4a9edca45a3aeb227e3b956Chih-Chung Chang					sat_release_rate;
5528578fcbd8bc76e3fe4a9edca45a3aeb227e3b956Chih-Chung Chang			}
553964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		} else {
554964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang			detector_average = gain;
555964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		}
556964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
557cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang		/* Fix gremlins. */
558cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang		if (isbadf(detector_average))
559cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang			detector_average = 1.0f;
560cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang		else
561cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang			detector_average = min(detector_average, 1.0f);
562cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang	}
563cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang
564cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang	dk->detector_average = detector_average;
565cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang}
566cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang
567cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang/* Calculate compress_gain from the envelope and apply total_gain to compress
568cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang * the next output division. */
569f766e15817cb4104c7a69cf2a40554798f40202eFrank Barchard/* TODO(fbarchard): Port to aarch64 */
570e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang#if defined(__ARM_NEON__)
571e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang#include <arm_neon.h>
572e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Changstatic void dk_compress_output(struct drc_kernel *dk)
573e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang{
574e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang	const float master_linear_gain = dk->master_linear_gain;
575e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang	const float envelope_rate = dk->envelope_rate;
576e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang	const float scaled_desired_gain = dk->scaled_desired_gain;
577e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang	const float compressor_gain = dk->compressor_gain;
578e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang	const int div_start = dk->pre_delay_read_index;
579e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang	float *ptr_left = &dk->pre_delay_buffers[0][div_start];
580e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang	float *ptr_right = &dk->pre_delay_buffers[1][div_start];
581e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang	int count = DIVISION_FRAMES / 4;
582e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang
583e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang	/* See warp_sinf() for the details for the constants. */
584e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang	const float32x4_t A7 = vdupq_n_f32(-4.3330336920917034149169921875e-3f);
585e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang	const float32x4_t A5 = vdupq_n_f32(7.9434238374233245849609375e-2f);
586e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang	const float32x4_t A3 = vdupq_n_f32(-0.645892798900604248046875f);
587e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang	const float32x4_t A1 = vdupq_n_f32(1.5707910060882568359375f);
588e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang
589e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang	/* Exponential approach to desired gain. */
590e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang	if (envelope_rate < 1) {
591e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang		float c = compressor_gain - scaled_desired_gain;
592e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang		float r = 1 - envelope_rate;
593e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang		float32x4_t x0 = {c*r, c*r*r, c*r*r*r, c*r*r*r*r};
594e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang		float32x4_t x, x2, x4, left, right, tmp1, tmp2;
595e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang
596e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang		__asm__ __volatile(
597e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"b 2f                                               \n"
598e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"1:                                                 \n"
599e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"vmul.f32 %q[x0], %q[r4]                            \n"
600e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"2:                                                 \n"
601e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"vld1.32 {%e[left],%f[left]}, [%[ptr_left]]         \n"
602e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"vld1.32 {%e[right],%f[right]}, [%[ptr_right]]      \n"
603e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"vadd.f32 %q[x], %q[x0], %q[base]                   \n"
604e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			/* Calculate warp_sin() for four values in x. */
605e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"vmul.f32 %q[x2], %q[x], %q[x]                      \n"
606e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"vmov.f32 %q[tmp1], %q[A5]                          \n"
607e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"vmov.f32 %q[tmp2], %q[A1]                          \n"
608e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"vmul.f32 %q[x4], %q[x2], %q[x2]                    \n"
609e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"vmla.f32 %q[tmp1], %q[A7], %q[x2]                  \n"
610e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"vmla.f32 %q[tmp2], %q[A3], %q[x2]                  \n"
611e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"vmla.f32 %q[tmp2], %q[tmp1], %q[x4]                \n"
612e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"vmul.f32 %q[tmp2], %q[tmp2], %q[x]                 \n"
613e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			/* Now tmp2 contains the result of warp_sin(). */
614e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"vmul.f32 %q[tmp2], %q[tmp2], %q[g]                 \n"
615e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"vmul.f32 %q[left], %q[tmp2]                        \n"
616e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"vmul.f32 %q[right], %q[tmp2]                       \n"
617e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"vst1.32 {%e[left],%f[left]}, [%[ptr_left]]!        \n"
618e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"vst1.32 {%e[right],%f[right]}, [%[ptr_right]]!     \n"
619e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"subs %[count], #1                                  \n"
620e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"bne 1b                                             \n"
621e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			: /* output */
622e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  "=r"(count),
623e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  "=r"(ptr_left),
624e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  "=r"(ptr_right),
625e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  "=w"(x0),
626e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [x]"=&w"(x),
627e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [x2]"=&w"(x2),
628e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [x4]"=&w"(x4),
629e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [left]"=&w"(left),
630e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [right]"=&w"(right),
631e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [tmp1]"=&w"(tmp1),
632e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [tmp2]"=&w"(tmp2)
633e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			: /* input */
634e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [count]"0"(count),
635e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [ptr_left]"1"(ptr_left),
636e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [ptr_right]"2"(ptr_right),
637e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [x0]"3"(x0),
638e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [A1]"w"(A1),
639e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [A3]"w"(A3),
640e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [A5]"w"(A5),
641e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [A7]"w"(A7),
642e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [base]"w"(vdupq_n_f32(scaled_desired_gain)),
643e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [r4]"w"(vdupq_n_f32(r*r*r*r)),
644e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [g]"w"(vdupq_n_f32(master_linear_gain))
645e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			: /* clobber */
646e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  "memory", "cc"
647e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			);
648e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang		dk->compressor_gain = x[3];
649e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang	} else {
650e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang		float c = compressor_gain;
651e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang		float r = envelope_rate;
652e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang		float32x4_t x = {c*r, c*r*r, c*r*r*r, c*r*r*r*r};
653e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang		float32x4_t x2, x4, left, right, tmp1, tmp2;
654e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang
655e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang		__asm__ __volatile(
656e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"b 2f                                               \n"
657e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"1:                                                 \n"
658e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"vmul.f32 %q[x], %q[r4]                             \n"
659e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"2:                                                 \n"
660e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"vld1.32 {%e[left],%f[left]}, [%[ptr_left]]         \n"
661e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"vld1.32 {%e[right],%f[right]}, [%[ptr_right]]      \n"
662e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"vmin.f32 %q[x], %q[one]                            \n"
663e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			/* Calculate warp_sin() for four values in x. */
664e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"vmul.f32 %q[x2], %q[x], %q[x]                      \n"
665e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"vmov.f32 %q[tmp1], %q[A5]                          \n"
666e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"vmov.f32 %q[tmp2], %q[A1]                          \n"
667e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"vmul.f32 %q[x4], %q[x2], %q[x2]                    \n"
668e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"vmla.f32 %q[tmp1], %q[A7], %q[x2]                  \n"
669e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"vmla.f32 %q[tmp2], %q[A3], %q[x2]                  \n"
670e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"vmla.f32 %q[tmp2], %q[tmp1], %q[x4]                \n"
671e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"vmul.f32 %q[tmp2], %q[tmp2], %q[x]                 \n"
672e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			/* Now tmp2 contains the result of warp_sin(). */
673e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"vmul.f32 %q[tmp2], %q[tmp2], %q[g]                 \n"
674e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"vmul.f32 %q[left], %q[tmp2]                        \n"
675e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"vmul.f32 %q[right], %q[tmp2]                       \n"
676e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"vst1.32 {%e[left],%f[left]}, [%[ptr_left]]!        \n"
677e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"vst1.32 {%e[right],%f[right]}, [%[ptr_right]]!     \n"
678e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"subs %[count], #1                                  \n"
679e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"bne 1b                                             \n"
680e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			: /* output */
681e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  "=r"(count),
682e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  "=r"(ptr_left),
683e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  "=r"(ptr_right),
684e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  "=w"(x),
685e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [x2]"=&w"(x2),
686e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [x4]"=&w"(x4),
687e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [left]"=&w"(left),
688e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [right]"=&w"(right),
689e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [tmp1]"=&w"(tmp1),
690e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [tmp2]"=&w"(tmp2)
691e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			: /* input */
692e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [count]"0"(count),
693e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [ptr_left]"1"(ptr_left),
694e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [ptr_right]"2"(ptr_right),
695e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [x]"3"(x),
696e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [A1]"w"(A1),
697e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [A3]"w"(A3),
698e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [A5]"w"(A5),
699e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [A7]"w"(A7),
700e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [one]"w"(vdupq_n_f32(1)),
701e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [r4]"w"(vdupq_n_f32(r*r*r*r)),
702e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [g]"w"(vdupq_n_f32(master_linear_gain))
703e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			: /* clobber */
704e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  "memory", "cc"
705e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			);
706e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang		dk->compressor_gain = x[3];
707e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang	}
708e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang}
709e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang#elif defined(__SSE3__) && defined(__x86_64__)
710e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang#include <emmintrin.h>
711e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Changstatic void dk_compress_output(struct drc_kernel *dk)
712e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang{
713e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang	const float master_linear_gain = dk->master_linear_gain;
714e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang	const float envelope_rate = dk->envelope_rate;
715e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang	const float scaled_desired_gain = dk->scaled_desired_gain;
716e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang	const float compressor_gain = dk->compressor_gain;
717e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang	const int div_start = dk->pre_delay_read_index;
718e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang	float *ptr_left = &dk->pre_delay_buffers[0][div_start];
719e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang	float *ptr_right = &dk->pre_delay_buffers[1][div_start];
720e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang	int count = DIVISION_FRAMES / 4;
721e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang
722e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang	/* See warp_sinf() for the details for the constants. */
723e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang	const __m128 A7 = _mm_set1_ps(-4.3330336920917034149169921875e-3f);
724e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang	const __m128 A5 = _mm_set1_ps(7.9434238374233245849609375e-2f);
725e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang	const __m128 A3 = _mm_set1_ps(-0.645892798900604248046875f);
726e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang	const __m128 A1 = _mm_set1_ps(1.5707910060882568359375f);
727e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang
728e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang	/* Exponential approach to desired gain. */
729e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang	if (envelope_rate < 1) {
730e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang		float c = compressor_gain - scaled_desired_gain;
731e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang		float r = 1 - envelope_rate;
732e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang		__m128 x0 = {c*r, c*r*r, c*r*r*r, c*r*r*r*r};
733e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang		__m128 x, x2, x4, left, right, tmp1, tmp2;
734e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang
735e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang		__asm__ __volatile(
736e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"jmp 2f                                     \n"
737e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"1:                                         \n"
738e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"mulps %[r4], %[x0]                         \n"
739e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"2:                                         \n"
740e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"lddqu (%[ptr_left]), %[left]               \n"
741e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"lddqu (%[ptr_right]), %[right]             \n"
742e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"movaps %[x0], %[x]                         \n"
743e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"addps %[base], %[x]                        \n"
744e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			/* Calculate warp_sin() for four values in x. */
745e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"movaps %[x], %[x2]                         \n"
746e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"mulps %[x], %[x2]                          \n"
747e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"movaps %[x2], %[x4]                        \n"
748e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"movaps %[x2], %[tmp1]                      \n"
749e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"movaps %[x2], %[tmp2]                      \n"
750e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"mulps %[x2], %[x4]                         \n"
751e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"mulps %[A7], %[tmp1]                       \n"
752e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"mulps %[A3], %[tmp2]                       \n"
753e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"addps %[A5], %[tmp1]                       \n"
754e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"addps %[A1], %[tmp2]                       \n"
755e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"mulps %[x4], %[tmp1]                       \n"
756e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"addps %[tmp1], %[tmp2]                     \n"
757e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"mulps %[x], %[tmp2]                        \n"
758e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			/* Now tmp2 contains the result of warp_sin(). */
759e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"mulps %[g], %[tmp2]                        \n"
760e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"mulps %[tmp2], %[left]                     \n"
761e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"mulps %[tmp2], %[right]                    \n"
762e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"movdqu %[left], (%[ptr_left])              \n"
763e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"movdqu %[right], (%[ptr_right])            \n"
764e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"add $16, %[ptr_left]                       \n"
765e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"add $16, %[ptr_right]                      \n"
766e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"sub $1, %[count]                           \n"
767e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"jne 1b                                     \n"
768e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			: /* output */
769e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  "=r"(count),
770e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  "=r"(ptr_left),
771e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  "=r"(ptr_right),
772e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  "=x"(x0),
773e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [x]"=&x"(x),
774e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [x2]"=&x"(x2),
775e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [x4]"=&x"(x4),
776e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [left]"=&x"(left),
777e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [right]"=&x"(right),
778e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [tmp1]"=&x"(tmp1),
779e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [tmp2]"=&x"(tmp2)
780e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			: /* input */
781e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [count]"0"(count),
782e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [ptr_left]"1"(ptr_left),
783e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [ptr_right]"2"(ptr_right),
784e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [x0]"3"(x0),
785e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [A1]"x"(A1),
786e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [A3]"x"(A3),
787e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [A5]"x"(A5),
788e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [A7]"x"(A7),
789e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [base]"x"(_mm_set1_ps(scaled_desired_gain)),
790e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [r4]"x"(_mm_set1_ps(r*r*r*r)),
791e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [g]"x"(_mm_set1_ps(master_linear_gain))
792e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			: /* clobber */
793e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  "memory", "cc"
794e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			);
795e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang		dk->compressor_gain = x[3];
796e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang	} else {
797e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang		/* See warp_sinf() for the details for the constants. */
798e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang		__m128 A7 = _mm_set1_ps(-4.3330336920917034149169921875e-3f);
799e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang		__m128 A5 = _mm_set1_ps(7.9434238374233245849609375e-2f);
800e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang		__m128 A3 = _mm_set1_ps(-0.645892798900604248046875f);
801e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang		__m128 A1 = _mm_set1_ps(1.5707910060882568359375f);
802e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang
803e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang		float c = compressor_gain;
804e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang		float r = envelope_rate;
805e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang		__m128 x = {c*r, c*r*r, c*r*r*r, c*r*r*r*r};
806e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang		__m128 x2, x4, left, right, tmp1, tmp2;
807e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang
808e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang		__asm__ __volatile(
809e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"jmp 2f                                     \n"
810e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"1:                                         \n"
811e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"mulps %[r4], %[x]                          \n"
812e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"2:                                         \n"
813e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"lddqu (%[ptr_left]), %[left]               \n"
814e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"lddqu (%[ptr_right]), %[right]             \n"
815e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"minps %[one], %[x]                         \n"
816e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			/* Calculate warp_sin() for four values in x. */
817e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"movaps %[x], %[x2]                         \n"
818e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"mulps %[x], %[x2]                          \n"
819e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"movaps %[x2], %[x4]                        \n"
820e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"movaps %[x2], %[tmp1]                      \n"
821e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"movaps %[x2], %[tmp2]                      \n"
822e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"mulps %[x2], %[x4]                         \n"
823e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"mulps %[A7], %[tmp1]                       \n"
824e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"mulps %[A3], %[tmp2]                       \n"
825e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"addps %[A5], %[tmp1]                       \n"
826e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"addps %[A1], %[tmp2]                       \n"
827e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"mulps %[x4], %[tmp1]                       \n"
828e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"addps %[tmp1], %[tmp2]                     \n"
829e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"mulps %[x], %[tmp2]                        \n"
830e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			/* Now tmp2 contains the result of warp_sin(). */
831e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"mulps %[g], %[tmp2]                        \n"
832e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"mulps %[tmp2], %[left]                     \n"
833e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"mulps %[tmp2], %[right]                    \n"
834e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"movdqu %[left], (%[ptr_left])              \n"
835e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"movdqu %[right], (%[ptr_right])            \n"
836e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"add $16, %[ptr_left]                       \n"
837e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"add $16, %[ptr_right]                      \n"
838e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"sub $1, %[count]                           \n"
839e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			"jne 1b                                     \n"
840e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			: /* output */
841e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  "=r"(count),
842e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  "=r"(ptr_left),
843e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  "=r"(ptr_right),
844e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  "=x"(x),
845e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [x2]"=&x"(x2),
846e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [x4]"=&x"(x4),
847e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [left]"=&x"(left),
848e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [right]"=&x"(right),
849e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [tmp1]"=&x"(tmp1),
850e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [tmp2]"=&x"(tmp2)
851e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			: /* input */
852e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [count]"0"(count),
853e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [ptr_left]"1"(ptr_left),
854e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [ptr_right]"2"(ptr_right),
855e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [x]"3"(x),
856e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [A1]"x"(A1),
857e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [A3]"x"(A3),
858e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [A5]"x"(A5),
859e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [A7]"x"(A7),
860e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [one]"x"(_mm_set1_ps(1)),
861e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [r4]"x"(_mm_set1_ps(r*r*r*r)),
862e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  [g]"x"(_mm_set1_ps(master_linear_gain))
863e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			: /* clobber */
864e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			  "memory", "cc"
865e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang			);
866e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang		dk->compressor_gain = x[3];
867e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang	}
868e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang}
869e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang#else
870cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Changstatic void dk_compress_output(struct drc_kernel *dk)
871cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang{
872cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang	const float master_linear_gain = dk->master_linear_gain;
873cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang	const float envelope_rate = dk->envelope_rate;
874cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang	const float scaled_desired_gain = dk->scaled_desired_gain;
875d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang	const float compressor_gain = dk->compressor_gain;
876d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang	const int div_start = dk->pre_delay_read_index;
877d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang	float *ptr_left = &dk->pre_delay_buffers[0][div_start];
878d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang	float *ptr_right = &dk->pre_delay_buffers[1][div_start];
879d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang	int count = DIVISION_FRAMES / 4;
880d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang
881cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang	int i, j;
882cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang
883d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang	/* Exponential approach to desired gain. */
884d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang	if (envelope_rate < 1) {
885d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang		/* Attack - reduce gain to desired. */
886d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang		float c = compressor_gain - scaled_desired_gain;
887d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang		float base = scaled_desired_gain;
888d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang		float r = 1 - envelope_rate;
889d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang		float x[4] = {c*r, c*r*r, c*r*r*r, c*r*r*r*r};
890d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang		float r4 = r*r*r*r;
891d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang
892d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang		i = 0;
893d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang		while (1) {
894d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang			for (j = 0; j < 4; j++) {
895d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang				/* Warp pre-compression gain to smooth out sharp
896d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang				 * exponential transition points.
897d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang				 */
898d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang				float post_warp_compressor_gain =
899d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang					warp_sinf(x[j] + base);
900d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang
901d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang				/* Calculate total gain using master gain. */
902d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang				float total_gain = master_linear_gain *
903d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang					post_warp_compressor_gain;
904d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang
905d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang				/* Apply final gain. */
906d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang				*ptr_left++ *= total_gain;
907d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang				*ptr_right++ *= total_gain;
908d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang			}
909d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang
910d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang			if (++i == count)
911d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang				break;
912d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang
913d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang			for (j = 0; j < 4; j++)
914d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang				x[j] = x[j] * r4;
915964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		}
916964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
917d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang		dk->compressor_gain = x[3] + base;
918d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang	} else {
919d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang		/* Release - exponentially increase gain to 1.0 */
920d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang		float c = compressor_gain;
921d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang		float r = envelope_rate;
922d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang		float x[4] = {c*r, c*r*r, c*r*r*r, c*r*r*r*r};
923d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang		float r4 = r*r*r*r;
924d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang
925d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang		i = 0;
926d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang		while (1) {
927d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang			for (j = 0; j < 4; j++) {
928d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang				/* Warp pre-compression gain to smooth out sharp
929d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang				 * exponential transition points.
930d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang				 */
931d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang				float post_warp_compressor_gain =
932d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang					warp_sinf(x[j]);
933d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang
934d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang				/* Calculate total gain using master gain. */
935d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang				float total_gain = master_linear_gain *
936d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang					post_warp_compressor_gain;
937d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang
938d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang				/* Apply final gain. */
939d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang				*ptr_left++ *= total_gain;
940d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang				*ptr_right++ *= total_gain;
941d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang			}
942d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang
943d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang			if (++i == count)
944d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang				break;
945d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang
946d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang			for (j = 0; j < 4; j++)
947d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang				x[j] = min(1.0f, x[j] * r4);
948d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang		}
949964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
950d6478504f2aaf2f98d1b901975e18f7751097cd9Chih-Chung Chang		dk->compressor_gain = x[3];
951964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	}
952964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang}
953e54edd37424ba218e56a911a93a492425f214c4fChih-Chung Chang#endif
954964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
955cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang/* After one complete divison of samples have been received (and one divison of
956cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang * samples have been output), we calculate shaped power average
957cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang * (detector_average) from the input division, update envelope parameters from
958cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang * detector_average, then prepare the next output division by applying the
959cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang * envelope to compress the samples.
960cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang */
961cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Changstatic void dk_process_one_division(struct drc_kernel *dk)
962cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang{
963cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang	dk_update_detector_average(dk);
964cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang	dk_update_envelope(dk);
965cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang	dk_compress_output(dk);
966cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang}
967cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang
968cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang/* Copy the input data to the pre-delay buffer, and copy the output data back to
969cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang * the input buffer */
970cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Changstatic void dk_copy_fragment(struct drc_kernel *dk, float *data_channels[],
971cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang			     unsigned frame_index, int frames_to_process)
972cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang{
973cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang	int write_index = dk->pre_delay_write_index;
974cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang	int read_index = dk->pre_delay_read_index;
975cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang	int j;
976cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang
977cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang	for (j = 0; j < DRC_NUM_CHANNELS; ++j) {
978cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang		memcpy(&dk->pre_delay_buffers[j][write_index],
979cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang		       &data_channels[j][frame_index],
980cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang		       frames_to_process * sizeof(float));
981cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang		memcpy(&data_channels[j][frame_index],
982cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang		       &dk->pre_delay_buffers[j][read_index],
983cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang		       frames_to_process * sizeof(float));
984cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang	}
985cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang
986cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang	dk->pre_delay_write_index = (write_index + frames_to_process) &
987cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang		MAX_PRE_DELAY_FRAMES_MASK;
988cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang	dk->pre_delay_read_index = (read_index + frames_to_process) &
989cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang		MAX_PRE_DELAY_FRAMES_MASK;
990cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang}
991cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang
992964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang/* Delay the input sample only and don't do other processing. This is used when
993964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang * the kernel is disabled. We want to do this to match the processing delay in
994964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang * kernels of other bands.
995964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang */
996964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Changstatic void dk_process_delay_only(struct drc_kernel *dk, float *data_channels[],
997964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang				  unsigned count)
998964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang{
999964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	int read_index = dk->pre_delay_read_index;
1000964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	int write_index = dk->pre_delay_write_index;
100118df86b4776c38fd41cbc190e377c2e63c75bd3eChih-Chung Chang	int i = 0;
1002964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
100318df86b4776c38fd41cbc190e377c2e63c75bd3eChih-Chung Chang	while (i < count) {
100418df86b4776c38fd41cbc190e377c2e63c75bd3eChih-Chung Chang		int j;
100518df86b4776c38fd41cbc190e377c2e63c75bd3eChih-Chung Chang		int small = min(read_index, write_index);
100618df86b4776c38fd41cbc190e377c2e63c75bd3eChih-Chung Chang		int large = max(read_index, write_index);
100718df86b4776c38fd41cbc190e377c2e63c75bd3eChih-Chung Chang		/* chunk is the minimum of readable samples in contiguous
100818df86b4776c38fd41cbc190e377c2e63c75bd3eChih-Chung Chang		 * buffer, writable samples in contiguous buffer, and the
100918df86b4776c38fd41cbc190e377c2e63c75bd3eChih-Chung Chang		 * available input samples. */
101018df86b4776c38fd41cbc190e377c2e63c75bd3eChih-Chung Chang		int chunk = min(large - small, MAX_PRE_DELAY_FRAMES - large);
101118df86b4776c38fd41cbc190e377c2e63c75bd3eChih-Chung Chang		chunk = min(chunk, count - i);
101218df86b4776c38fd41cbc190e377c2e63c75bd3eChih-Chung Chang		for (j = 0; j < DRC_NUM_CHANNELS; ++j) {
101318df86b4776c38fd41cbc190e377c2e63c75bd3eChih-Chung Chang			memcpy(&dk->pre_delay_buffers[j][write_index],
101418df86b4776c38fd41cbc190e377c2e63c75bd3eChih-Chung Chang			       &data_channels[j][i],
101518df86b4776c38fd41cbc190e377c2e63c75bd3eChih-Chung Chang			       chunk * sizeof(float));
101618df86b4776c38fd41cbc190e377c2e63c75bd3eChih-Chung Chang			memcpy(&data_channels[j][i],
101718df86b4776c38fd41cbc190e377c2e63c75bd3eChih-Chung Chang			       &dk->pre_delay_buffers[j][read_index],
101818df86b4776c38fd41cbc190e377c2e63c75bd3eChih-Chung Chang			       chunk * sizeof(float));
1019964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		}
102018df86b4776c38fd41cbc190e377c2e63c75bd3eChih-Chung Chang		read_index = (read_index + chunk) & MAX_PRE_DELAY_FRAMES_MASK;
102118df86b4776c38fd41cbc190e377c2e63c75bd3eChih-Chung Chang		write_index = (write_index + chunk) & MAX_PRE_DELAY_FRAMES_MASK;
102218df86b4776c38fd41cbc190e377c2e63c75bd3eChih-Chung Chang		i += chunk;
1023964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	}
1024964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
1025964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	dk->pre_delay_read_index = read_index;
1026964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	dk->pre_delay_write_index = write_index;
1027964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang}
1028964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
1029964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Changvoid dk_process(struct drc_kernel *dk, float *data_channels[], unsigned count)
1030964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang{
1031964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	int i = 0;
1032964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	int fragment;
1033964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
1034964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	if (!dk->enabled) {
1035964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		dk_process_delay_only(dk, data_channels, count);
1036964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		return;
1037964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	}
1038964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
1039cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang	if (!dk->processed) {
1040cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang		dk_update_envelope(dk);
1041cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang		dk_compress_output(dk);
1042cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang		dk->processed = 1;
1043cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang	}
1044964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
1045cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang	int offset = dk->pre_delay_write_index & DIVISION_FRAMES_MASK;
1046964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang	while (i < count) {
1047964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		fragment = min(DIVISION_FRAMES - offset, count - i);
1048cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang		dk_copy_fragment(dk, data_channels, i, fragment);
1049964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang		i += fragment;
1050cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang		offset = (offset + fragment) & DIVISION_FRAMES_MASK;
1051964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang
1052cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang		/* Process the input division (32 frames). */
1053cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang		if (offset == 0)
1054cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang			dk_process_one_division(dk);
1055cff48112fb1c2a7072362f0303e77fd3aa3fdfbeChih-Chung Chang	}
1056964a3cb49811b2d00c48ee10474c2a8454b95a70Chih-Chung Chang}
1057