1/* vim: set ts=8 sw=8 noexpandtab: */
2//  qcms
3//  Copyright (C) 2009 Mozilla Corporation
4//  Copyright (C) 1998-2007 Marti Maria
5//
6// Permission is hereby granted, free of charge, to any person obtaining
7// a copy of this software and associated documentation files (the "Software"),
8// to deal in the Software without restriction, including without limitation
9// the rights to use, copy, modify, merge, publish, distribute, sublicense,
10// and/or sell copies of the Software, and to permit persons to whom the Software
11// is furnished to do so, subject to the following conditions:
12//
13// The above copyright notice and this permission notice shall be included in
14// all copies or substantial portions of the Software.
15//
16// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
18// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
24#include <stdlib.h>
25#include <math.h>
26#include <assert.h>
27#include <string.h> //memcpy
28#include "qcmsint.h"
29#include "transform_util.h"
30#include "matrix.h"
31
32static struct matrix build_lut_matrix(struct lutType *lut)
33{
34	struct matrix result;
35	if (lut) {
36		result.m[0][0] = s15Fixed16Number_to_float(lut->e00);
37		result.m[0][1] = s15Fixed16Number_to_float(lut->e01);
38		result.m[0][2] = s15Fixed16Number_to_float(lut->e02);
39		result.m[1][0] = s15Fixed16Number_to_float(lut->e10);
40		result.m[1][1] = s15Fixed16Number_to_float(lut->e11);
41		result.m[1][2] = s15Fixed16Number_to_float(lut->e12);
42		result.m[2][0] = s15Fixed16Number_to_float(lut->e20);
43		result.m[2][1] = s15Fixed16Number_to_float(lut->e21);
44		result.m[2][2] = s15Fixed16Number_to_float(lut->e22);
45		result.invalid = false;
46	} else {
47		memset(&result, 0, sizeof(struct matrix));
48		result.invalid = true;
49	}
50	return result;
51}
52
53static struct matrix build_mAB_matrix(struct lutmABType *lut)
54{
55	struct matrix result;
56	if (lut) {
57		result.m[0][0] = s15Fixed16Number_to_float(lut->e00);
58		result.m[0][1] = s15Fixed16Number_to_float(lut->e01);
59		result.m[0][2] = s15Fixed16Number_to_float(lut->e02);
60		result.m[1][0] = s15Fixed16Number_to_float(lut->e10);
61		result.m[1][1] = s15Fixed16Number_to_float(lut->e11);
62		result.m[1][2] = s15Fixed16Number_to_float(lut->e12);
63		result.m[2][0] = s15Fixed16Number_to_float(lut->e20);
64		result.m[2][1] = s15Fixed16Number_to_float(lut->e21);
65		result.m[2][2] = s15Fixed16Number_to_float(lut->e22);
66		result.invalid = false;
67	} else {
68		memset(&result, 0, sizeof(struct matrix));
69		result.invalid = true;
70	}
71	return result;
72}
73
74//Based on lcms cmsLab2XYZ
75#define f(t) (t <= (24.0f/116.0f)*(24.0f/116.0f)*(24.0f/116.0f)) ? ((841.0/108.0) * t + (16.0/116.0)) : pow(t,1.0/3.0)
76#define f_1(t) (t <= (24.0f/116.0f)) ? ((108.0/841.0) * (t - (16.0/116.0))) : (t * t * t)
77static void qcms_transform_module_LAB_to_XYZ(struct qcms_modular_transform *transform, float *src, float *dest, size_t length)
78{
79	size_t i;
80	// lcms: D50 XYZ values
81	float WhitePointX = 0.9642f;
82	float WhitePointY = 1.0f;
83	float WhitePointZ = 0.8249f;
84	for (i = 0; i < length; i++) {
85		float device_L = *src++ * 100.0f;
86		float device_a = *src++ * 255.0f - 128.0f;
87		float device_b = *src++ * 255.0f - 128.0f;
88		float y = (device_L + 16.0f) / 116.0f;
89
90		float X = f_1((y + 0.002f * device_a)) * WhitePointX;
91		float Y = f_1(y) * WhitePointY;
92		float Z = f_1((y - 0.005f * device_b)) * WhitePointZ;
93		*dest++ = X / (1.0 + 32767.0/32768.0);
94		*dest++ = Y / (1.0 + 32767.0/32768.0);
95		*dest++ = Z / (1.0 + 32767.0/32768.0);
96	}
97}
98
99//Based on lcms cmsXYZ2Lab
100static void qcms_transform_module_XYZ_to_LAB(struct qcms_modular_transform *transform, float *src, float *dest, size_t length)
101{
102	size_t i;
103        // lcms: D50 XYZ values
104        float WhitePointX = 0.9642f;
105        float WhitePointY = 1.0f;
106        float WhitePointZ = 0.8249f;
107        for (i = 0; i < length; i++) {
108                float device_x = *src++ * (1.0 + 32767.0/32768.0) / WhitePointX;
109                float device_y = *src++ * (1.0 + 32767.0/32768.0) / WhitePointY;
110                float device_z = *src++ * (1.0 + 32767.0/32768.0) / WhitePointZ;
111
112		float fx = f(device_x);
113		float fy = f(device_y);
114		float fz = f(device_z);
115
116                float L = 116.0f*fy - 16.0f;
117                float a = 500.0f*(fx - fy);
118                float b = 200.0f*(fy - fz);
119                *dest++ = L / 100.0f;
120                *dest++ = (a+128.0f) / 255.0f;
121                *dest++ = (b+128.0f) / 255.0f;
122        }
123
124}
125
126static void qcms_transform_module_clut_only(struct qcms_modular_transform *transform, float *src, float *dest, size_t length)
127{
128	size_t i;
129	int xy_len = 1;
130	int x_len = transform->grid_size;
131	int len = x_len * x_len;
132	float* r_table = transform->r_clut;
133	float* g_table = transform->g_clut;
134	float* b_table = transform->b_clut;
135
136	for (i = 0; i < length; i++) {
137		float linear_r = *src++;
138		float linear_g = *src++;
139		float linear_b = *src++;
140
141		int x = floor(linear_r * (transform->grid_size-1));
142		int y = floor(linear_g * (transform->grid_size-1));
143		int z = floor(linear_b * (transform->grid_size-1));
144		int x_n = ceil(linear_r * (transform->grid_size-1));
145		int y_n = ceil(linear_g * (transform->grid_size-1));
146		int z_n = ceil(linear_b * (transform->grid_size-1));
147		float x_d = linear_r * (transform->grid_size-1) - x;
148		float y_d = linear_g * (transform->grid_size-1) - y;
149		float z_d = linear_b * (transform->grid_size-1) - z;
150
151		float r_x1 = lerp(CLU(r_table,x,y,z), CLU(r_table,x_n,y,z), x_d);
152		float r_x2 = lerp(CLU(r_table,x,y_n,z), CLU(r_table,x_n,y_n,z), x_d);
153		float r_y1 = lerp(r_x1, r_x2, y_d);
154		float r_x3 = lerp(CLU(r_table,x,y,z_n), CLU(r_table,x_n,y,z_n), x_d);
155		float r_x4 = lerp(CLU(r_table,x,y_n,z_n), CLU(r_table,x_n,y_n,z_n), x_d);
156		float r_y2 = lerp(r_x3, r_x4, y_d);
157		float clut_r = lerp(r_y1, r_y2, z_d);
158
159		float g_x1 = lerp(CLU(g_table,x,y,z), CLU(g_table,x_n,y,z), x_d);
160		float g_x2 = lerp(CLU(g_table,x,y_n,z), CLU(g_table,x_n,y_n,z), x_d);
161		float g_y1 = lerp(g_x1, g_x2, y_d);
162		float g_x3 = lerp(CLU(g_table,x,y,z_n), CLU(g_table,x_n,y,z_n), x_d);
163		float g_x4 = lerp(CLU(g_table,x,y_n,z_n), CLU(g_table,x_n,y_n,z_n), x_d);
164		float g_y2 = lerp(g_x3, g_x4, y_d);
165		float clut_g = lerp(g_y1, g_y2, z_d);
166
167		float b_x1 = lerp(CLU(b_table,x,y,z), CLU(b_table,x_n,y,z), x_d);
168		float b_x2 = lerp(CLU(b_table,x,y_n,z), CLU(b_table,x_n,y_n,z), x_d);
169		float b_y1 = lerp(b_x1, b_x2, y_d);
170		float b_x3 = lerp(CLU(b_table,x,y,z_n), CLU(b_table,x_n,y,z_n), x_d);
171		float b_x4 = lerp(CLU(b_table,x,y_n,z_n), CLU(b_table,x_n,y_n,z_n), x_d);
172		float b_y2 = lerp(b_x3, b_x4, y_d);
173		float clut_b = lerp(b_y1, b_y2, z_d);
174
175		*dest++ = clamp_float(clut_r);
176		*dest++ = clamp_float(clut_g);
177		*dest++ = clamp_float(clut_b);
178	}
179}
180
181static void qcms_transform_module_clut(struct qcms_modular_transform *transform, float *src, float *dest, size_t length)
182{
183	size_t i;
184	int xy_len = 1;
185	int x_len = transform->grid_size;
186	int len = x_len * x_len;
187	float* r_table = transform->r_clut;
188	float* g_table = transform->g_clut;
189	float* b_table = transform->b_clut;
190	for (i = 0; i < length; i++) {
191		float device_r = *src++;
192		float device_g = *src++;
193		float device_b = *src++;
194		float linear_r = lut_interp_linear_float(device_r,
195				transform->input_clut_table_r, transform->input_clut_table_length);
196		float linear_g = lut_interp_linear_float(device_g,
197				transform->input_clut_table_g, transform->input_clut_table_length);
198		float linear_b = lut_interp_linear_float(device_b,
199				transform->input_clut_table_b, transform->input_clut_table_length);
200
201		int x = floor(linear_r * (transform->grid_size-1));
202		int y = floor(linear_g * (transform->grid_size-1));
203		int z = floor(linear_b * (transform->grid_size-1));
204		int x_n = ceil(linear_r * (transform->grid_size-1));
205		int y_n = ceil(linear_g * (transform->grid_size-1));
206		int z_n = ceil(linear_b * (transform->grid_size-1));
207		float x_d = linear_r * (transform->grid_size-1) - x;
208		float y_d = linear_g * (transform->grid_size-1) - y;
209		float z_d = linear_b * (transform->grid_size-1) - z;
210
211		float r_x1 = lerp(CLU(r_table,x,y,z), CLU(r_table,x_n,y,z), x_d);
212		float r_x2 = lerp(CLU(r_table,x,y_n,z), CLU(r_table,x_n,y_n,z), x_d);
213		float r_y1 = lerp(r_x1, r_x2, y_d);
214		float r_x3 = lerp(CLU(r_table,x,y,z_n), CLU(r_table,x_n,y,z_n), x_d);
215		float r_x4 = lerp(CLU(r_table,x,y_n,z_n), CLU(r_table,x_n,y_n,z_n), x_d);
216		float r_y2 = lerp(r_x3, r_x4, y_d);
217		float clut_r = lerp(r_y1, r_y2, z_d);
218
219		float g_x1 = lerp(CLU(g_table,x,y,z), CLU(g_table,x_n,y,z), x_d);
220		float g_x2 = lerp(CLU(g_table,x,y_n,z), CLU(g_table,x_n,y_n,z), x_d);
221		float g_y1 = lerp(g_x1, g_x2, y_d);
222		float g_x3 = lerp(CLU(g_table,x,y,z_n), CLU(g_table,x_n,y,z_n), x_d);
223		float g_x4 = lerp(CLU(g_table,x,y_n,z_n), CLU(g_table,x_n,y_n,z_n), x_d);
224		float g_y2 = lerp(g_x3, g_x4, y_d);
225		float clut_g = lerp(g_y1, g_y2, z_d);
226
227		float b_x1 = lerp(CLU(b_table,x,y,z), CLU(b_table,x_n,y,z), x_d);
228		float b_x2 = lerp(CLU(b_table,x,y_n,z), CLU(b_table,x_n,y_n,z), x_d);
229		float b_y1 = lerp(b_x1, b_x2, y_d);
230		float b_x3 = lerp(CLU(b_table,x,y,z_n), CLU(b_table,x_n,y,z_n), x_d);
231		float b_x4 = lerp(CLU(b_table,x,y_n,z_n), CLU(b_table,x_n,y_n,z_n), x_d);
232		float b_y2 = lerp(b_x3, b_x4, y_d);
233		float clut_b = lerp(b_y1, b_y2, z_d);
234
235		float pcs_r = lut_interp_linear_float(clut_r,
236				transform->output_clut_table_r, transform->output_clut_table_length);
237		float pcs_g = lut_interp_linear_float(clut_g,
238				transform->output_clut_table_g, transform->output_clut_table_length);
239		float pcs_b = lut_interp_linear_float(clut_b,
240				transform->output_clut_table_b, transform->output_clut_table_length);
241
242		*dest++ = clamp_float(pcs_r);
243		*dest++ = clamp_float(pcs_g);
244		*dest++ = clamp_float(pcs_b);
245	}
246}
247
248/* NOT USED
249static void qcms_transform_module_tetra_clut(struct qcms_modular_transform *transform, float *src, float *dest, size_t length)
250{
251	size_t i;
252	int xy_len = 1;
253	int x_len = transform->grid_size;
254	int len = x_len * x_len;
255	float* r_table = transform->r_clut;
256	float* g_table = transform->g_clut;
257	float* b_table = transform->b_clut;
258	float c0_r, c1_r, c2_r, c3_r;
259	float c0_g, c1_g, c2_g, c3_g;
260	float c0_b, c1_b, c2_b, c3_b;
261	float clut_r, clut_g, clut_b;
262	float pcs_r, pcs_g, pcs_b;
263	for (i = 0; i < length; i++) {
264		float device_r = *src++;
265		float device_g = *src++;
266		float device_b = *src++;
267		float linear_r = lut_interp_linear_float(device_r,
268				transform->input_clut_table_r, transform->input_clut_table_length);
269		float linear_g = lut_interp_linear_float(device_g,
270				transform->input_clut_table_g, transform->input_clut_table_length);
271		float linear_b = lut_interp_linear_float(device_b,
272				transform->input_clut_table_b, transform->input_clut_table_length);
273
274		int x = floor(linear_r * (transform->grid_size-1));
275		int y = floor(linear_g * (transform->grid_size-1));
276		int z = floor(linear_b * (transform->grid_size-1));
277		int x_n = ceil(linear_r * (transform->grid_size-1));
278		int y_n = ceil(linear_g * (transform->grid_size-1));
279		int z_n = ceil(linear_b * (transform->grid_size-1));
280		float rx = linear_r * (transform->grid_size-1) - x;
281		float ry = linear_g * (transform->grid_size-1) - y;
282		float rz = linear_b * (transform->grid_size-1) - z;
283
284		c0_r = CLU(r_table, x, y, z);
285		c0_g = CLU(g_table, x, y, z);
286		c0_b = CLU(b_table, x, y, z);
287		if( rx >= ry ) {
288			if (ry >= rz) { //rx >= ry && ry >= rz
289				c1_r = CLU(r_table, x_n, y, z) - c0_r;
290				c2_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x_n, y, z);
291				c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z);
292				c1_g = CLU(g_table, x_n, y, z) - c0_g;
293				c2_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x_n, y, z);
294				c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z);
295				c1_b = CLU(b_table, x_n, y, z) - c0_b;
296				c2_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x_n, y, z);
297				c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z);
298			} else {
299				if (rx >= rz) { //rx >= rz && rz >= ry
300					c1_r = CLU(r_table, x_n, y, z) - c0_r;
301					c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n);
302					c3_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x_n, y, z);
303					c1_g = CLU(g_table, x_n, y, z) - c0_g;
304					c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n);
305					c3_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x_n, y, z);
306					c1_b = CLU(b_table, x_n, y, z) - c0_b;
307					c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n);
308					c3_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x_n, y, z);
309				} else { //rz > rx && rx >= ry
310					c1_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x, y, z_n);
311					c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n);
312					c3_r = CLU(r_table, x, y, z_n) - c0_r;
313					c1_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x, y, z_n);
314					c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n);
315					c3_g = CLU(g_table, x, y, z_n) - c0_g;
316					c1_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x, y, z_n);
317					c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n);
318					c3_b = CLU(b_table, x, y, z_n) - c0_b;
319				}
320			}
321		} else {
322			if (rx >= rz) { //ry > rx && rx >= rz
323				c1_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x, y_n, z);
324				c2_r = CLU(r_table, x_n, y_n, z) - c0_r;
325				c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z);
326				c1_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x, y_n, z);
327				c2_g = CLU(g_table, x_n, y_n, z) - c0_g;
328				c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z);
329				c1_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x, y_n, z);
330				c2_b = CLU(b_table, x_n, y_n, z) - c0_b;
331				c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z);
332			} else {
333				if (ry >= rz) { //ry >= rz && rz > rx
334					c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n);
335					c2_r = CLU(r_table, x, y_n, z) - c0_r;
336					c3_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y_n, z);
337					c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n);
338					c2_g = CLU(g_table, x, y_n, z) - c0_g;
339					c3_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y_n, z);
340					c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n);
341					c2_b = CLU(b_table, x, y_n, z) - c0_b;
342					c3_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y_n, z);
343				} else { //rz > ry && ry > rx
344					c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n);
345					c2_r = CLU(r_table, x, y_n, z) - c0_r;
346					c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z);
347					c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n);
348					c2_g = CLU(g_table, x, y_n, z) - c0_g;
349					c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z);
350					c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n);
351					c2_b = CLU(b_table, x, y_n, z) - c0_b;
352					c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z);
353				}
354			}
355		}
356
357		clut_r = c0_r + c1_r*rx + c2_r*ry + c3_r*rz;
358		clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz;
359		clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz;
360
361		pcs_r = lut_interp_linear_float(clut_r,
362				transform->output_clut_table_r, transform->output_clut_table_length);
363		pcs_g = lut_interp_linear_float(clut_g,
364				transform->output_clut_table_g, transform->output_clut_table_length);
365		pcs_b = lut_interp_linear_float(clut_b,
366				transform->output_clut_table_b, transform->output_clut_table_length);
367		*dest++ = clamp_float(pcs_r);
368		*dest++ = clamp_float(pcs_g);
369		*dest++ = clamp_float(pcs_b);
370	}
371}
372*/
373
374static void qcms_transform_module_gamma_table(struct qcms_modular_transform *transform, float *src, float *dest, size_t length)
375{
376	size_t i;
377	float out_r, out_g, out_b;
378	for (i = 0; i < length; i++) {
379		float in_r = *src++;
380		float in_g = *src++;
381		float in_b = *src++;
382
383		out_r = lut_interp_linear_float(in_r, transform->input_clut_table_r, 256);
384		out_g = lut_interp_linear_float(in_g, transform->input_clut_table_g, 256);
385		out_b = lut_interp_linear_float(in_b, transform->input_clut_table_b, 256);
386
387		*dest++ = clamp_float(out_r);
388		*dest++ = clamp_float(out_g);
389		*dest++ = clamp_float(out_b);
390	}
391}
392
393static void qcms_transform_module_gamma_lut(struct qcms_modular_transform *transform, float *src, float *dest, size_t length)
394{
395	size_t i;
396	float out_r, out_g, out_b;
397	for (i = 0; i < length; i++) {
398		float in_r = *src++;
399		float in_g = *src++;
400		float in_b = *src++;
401
402		out_r = lut_interp_linear(in_r,
403				transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
404		out_g = lut_interp_linear(in_g,
405				transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
406		out_b = lut_interp_linear(in_b,
407				transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
408
409		*dest++ = clamp_float(out_r);
410		*dest++ = clamp_float(out_g);
411		*dest++ = clamp_float(out_b);
412	}
413}
414
415static void qcms_transform_module_matrix_translate(struct qcms_modular_transform *transform, float *src, float *dest, size_t length)
416{
417	size_t i;
418	struct matrix mat;
419
420	/* store the results in column major mode
421	 * this makes doing the multiplication with sse easier */
422	mat.m[0][0] = transform->matrix.m[0][0];
423	mat.m[1][0] = transform->matrix.m[0][1];
424	mat.m[2][0] = transform->matrix.m[0][2];
425	mat.m[0][1] = transform->matrix.m[1][0];
426	mat.m[1][1] = transform->matrix.m[1][1];
427	mat.m[2][1] = transform->matrix.m[1][2];
428	mat.m[0][2] = transform->matrix.m[2][0];
429	mat.m[1][2] = transform->matrix.m[2][1];
430	mat.m[2][2] = transform->matrix.m[2][2];
431
432	for (i = 0; i < length; i++) {
433		float in_r = *src++;
434		float in_g = *src++;
435		float in_b = *src++;
436
437		float out_r = mat.m[0][0]*in_r + mat.m[1][0]*in_g + mat.m[2][0]*in_b + transform->tx;
438		float out_g = mat.m[0][1]*in_r + mat.m[1][1]*in_g + mat.m[2][1]*in_b + transform->ty;
439		float out_b = mat.m[0][2]*in_r + mat.m[1][2]*in_g + mat.m[2][2]*in_b + transform->tz;
440
441		*dest++ = clamp_float(out_r);
442		*dest++ = clamp_float(out_g);
443		*dest++ = clamp_float(out_b);
444	}
445}
446
447static void qcms_transform_module_matrix(struct qcms_modular_transform *transform, float *src, float *dest, size_t length)
448{
449	size_t i;
450	struct matrix mat;
451
452	/* store the results in column major mode
453	 * this makes doing the multiplication with sse easier */
454	mat.m[0][0] = transform->matrix.m[0][0];
455	mat.m[1][0] = transform->matrix.m[0][1];
456	mat.m[2][0] = transform->matrix.m[0][2];
457	mat.m[0][1] = transform->matrix.m[1][0];
458	mat.m[1][1] = transform->matrix.m[1][1];
459	mat.m[2][1] = transform->matrix.m[1][2];
460	mat.m[0][2] = transform->matrix.m[2][0];
461	mat.m[1][2] = transform->matrix.m[2][1];
462	mat.m[2][2] = transform->matrix.m[2][2];
463
464	for (i = 0; i < length; i++) {
465		float in_r = *src++;
466		float in_g = *src++;
467		float in_b = *src++;
468
469		float out_r = mat.m[0][0]*in_r + mat.m[1][0]*in_g + mat.m[2][0]*in_b;
470		float out_g = mat.m[0][1]*in_r + mat.m[1][1]*in_g + mat.m[2][1]*in_b;
471		float out_b = mat.m[0][2]*in_r + mat.m[1][2]*in_g + mat.m[2][2]*in_b;
472
473		*dest++ = clamp_float(out_r);
474		*dest++ = clamp_float(out_g);
475		*dest++ = clamp_float(out_b);
476	}
477}
478
479static struct qcms_modular_transform* qcms_modular_transform_alloc() {
480	return calloc(1, sizeof(struct qcms_modular_transform));
481}
482
483static void qcms_modular_transform_release(struct qcms_modular_transform *transform)
484{
485	struct qcms_modular_transform *next_transform;
486	while (transform != NULL) {
487		next_transform = transform->next_transform;
488		// clut may use a single block of memory.
489		// Perhaps we should remove this to simply the code.
490		if (transform->input_clut_table_r + transform->input_clut_table_length == transform->input_clut_table_g && transform->input_clut_table_g + transform->input_clut_table_length == transform->input_clut_table_b) {
491			if (transform->input_clut_table_r) free(transform->input_clut_table_r);
492		} else {
493			if (transform->input_clut_table_r) free(transform->input_clut_table_r);
494			if (transform->input_clut_table_g) free(transform->input_clut_table_g);
495			if (transform->input_clut_table_b) free(transform->input_clut_table_b);
496		}
497		if (transform->r_clut + 1 == transform->g_clut && transform->g_clut + 1 == transform->b_clut) {
498			if (transform->r_clut) free(transform->r_clut);
499		} else {
500			if (transform->r_clut) free(transform->r_clut);
501			if (transform->g_clut) free(transform->g_clut);
502			if (transform->b_clut) free(transform->b_clut);
503		}
504		if (transform->output_clut_table_r + transform->output_clut_table_length == transform->output_clut_table_g && transform->output_clut_table_g+ transform->output_clut_table_length == transform->output_clut_table_b) {
505			if (transform->output_clut_table_r) free(transform->output_clut_table_r);
506		} else {
507			if (transform->output_clut_table_r) free(transform->output_clut_table_r);
508			if (transform->output_clut_table_g) free(transform->output_clut_table_g);
509			if (transform->output_clut_table_b) free(transform->output_clut_table_b);
510		}
511		if (transform->output_gamma_lut_r) free(transform->output_gamma_lut_r);
512		if (transform->output_gamma_lut_g) free(transform->output_gamma_lut_g);
513		if (transform->output_gamma_lut_b) free(transform->output_gamma_lut_b);
514		free(transform);
515		transform = next_transform;
516	}
517}
518
519/* Set transform to be the next element in the linked list. */
520static void append_transform(struct qcms_modular_transform *transform, struct qcms_modular_transform ***next_transform)
521{
522	**next_transform = transform;
523	while (transform) {
524		*next_transform = &(transform->next_transform);
525		transform = transform->next_transform;
526	}
527}
528
529/* reverse the transformation list (used by mBA) */
530static struct qcms_modular_transform* reverse_transform(struct qcms_modular_transform *transform)
531{
532	struct qcms_modular_transform *prev_transform = NULL;
533	while (transform != NULL) {
534		struct qcms_modular_transform *next_transform = transform->next_transform;
535		transform->next_transform = prev_transform;
536		prev_transform = transform;
537		transform = next_transform;
538	}
539
540	return prev_transform;
541}
542
543#define EMPTY_TRANSFORM_LIST NULL
544static struct qcms_modular_transform* qcms_modular_transform_create_mAB(struct lutmABType *lut)
545{
546	struct qcms_modular_transform *first_transform = NULL;
547	struct qcms_modular_transform **next_transform = &first_transform;
548	struct qcms_modular_transform *transform = NULL;
549
550	if (lut->a_curves[0] != NULL) {
551		size_t clut_length;
552		float *clut;
553
554		// If the A curve is present this also implies the
555		// presence of a CLUT.
556		if (!lut->clut_table)
557			goto fail;
558
559		// Prepare A curve.
560		transform = qcms_modular_transform_alloc();
561		if (!transform)
562			goto fail;
563		append_transform(transform, &next_transform);
564		transform->input_clut_table_r = build_input_gamma_table(lut->a_curves[0]);
565		transform->input_clut_table_g = build_input_gamma_table(lut->a_curves[1]);
566		transform->input_clut_table_b = build_input_gamma_table(lut->a_curves[2]);
567		transform->transform_module_fn = qcms_transform_module_gamma_table;
568		if (lut->num_grid_points[0] != lut->num_grid_points[1] ||
569			lut->num_grid_points[1] != lut->num_grid_points[2] ) {
570			//XXX: We don't currently support clut that are not squared!
571			goto fail;
572		}
573
574		// Prepare CLUT
575		transform = qcms_modular_transform_alloc();
576		if (!transform)
577			goto fail;
578		append_transform(transform, &next_transform);
579		clut_length = sizeof(float)*pow(lut->num_grid_points[0], 3)*3;
580		clut = malloc(clut_length);
581		if (!clut)
582			goto fail;
583		memcpy(clut, lut->clut_table, clut_length);
584		transform->r_clut = clut + 0;
585		transform->g_clut = clut + 1;
586		transform->b_clut = clut + 2;
587		transform->grid_size = lut->num_grid_points[0];
588		transform->transform_module_fn = qcms_transform_module_clut_only;
589	}
590	if (lut->m_curves[0] != NULL) {
591		// M curve imples the presence of a Matrix
592
593		// Prepare M curve
594		transform = qcms_modular_transform_alloc();
595		if (!transform)
596			goto fail;
597		append_transform(transform, &next_transform);
598		transform->input_clut_table_r = build_input_gamma_table(lut->m_curves[0]);
599		transform->input_clut_table_g = build_input_gamma_table(lut->m_curves[1]);
600		transform->input_clut_table_b = build_input_gamma_table(lut->m_curves[2]);
601		transform->transform_module_fn = qcms_transform_module_gamma_table;
602
603		// Prepare Matrix
604		transform = qcms_modular_transform_alloc();
605		if (!transform)
606			goto fail;
607		append_transform(transform, &next_transform);
608		transform->matrix = build_mAB_matrix(lut);
609		if (transform->matrix.invalid)
610			goto fail;
611		transform->tx = s15Fixed16Number_to_float(lut->e03);
612		transform->ty = s15Fixed16Number_to_float(lut->e13);
613		transform->tz = s15Fixed16Number_to_float(lut->e23);
614		transform->transform_module_fn = qcms_transform_module_matrix_translate;
615	}
616	if (lut->b_curves[0] != NULL) {
617		// Prepare B curve
618		transform = qcms_modular_transform_alloc();
619		if (!transform)
620			goto fail;
621		append_transform(transform, &next_transform);
622		transform->input_clut_table_r = build_input_gamma_table(lut->b_curves[0]);
623		transform->input_clut_table_g = build_input_gamma_table(lut->b_curves[1]);
624		transform->input_clut_table_b = build_input_gamma_table(lut->b_curves[2]);
625		transform->transform_module_fn = qcms_transform_module_gamma_table;
626	} else {
627		// B curve is mandatory
628		goto fail;
629	}
630
631	if (lut->reversed) {
632		// mBA are identical to mAB except that the transformation order
633		// is reversed
634		first_transform = reverse_transform(first_transform);
635	}
636
637	return first_transform;
638fail:
639	qcms_modular_transform_release(first_transform);
640	return NULL;
641}
642
643static struct qcms_modular_transform* qcms_modular_transform_create_lut(struct lutType *lut)
644{
645	struct qcms_modular_transform *first_transform = NULL;
646	struct qcms_modular_transform **next_transform = &first_transform;
647	struct qcms_modular_transform *transform = NULL;
648
649	size_t in_curve_len, clut_length, out_curve_len;
650	float *in_curves, *clut, *out_curves;
651
652	// Prepare Matrix
653	transform = qcms_modular_transform_alloc();
654	if (!transform)
655		goto fail;
656	append_transform(transform, &next_transform);
657	transform->matrix = build_lut_matrix(lut);
658	if (transform->matrix.invalid)
659		goto fail;
660	transform->transform_module_fn = qcms_transform_module_matrix;
661
662	// Prepare input curves
663	transform = qcms_modular_transform_alloc();
664	if (!transform)
665		goto fail;
666	append_transform(transform, &next_transform);
667	in_curve_len = sizeof(float)*lut->num_input_table_entries * 3;
668	in_curves = malloc(in_curve_len);
669	if (!in_curves)
670		goto fail;
671	memcpy(in_curves, lut->input_table, in_curve_len);
672	transform->input_clut_table_r = in_curves + lut->num_input_table_entries * 0;
673	transform->input_clut_table_g = in_curves + lut->num_input_table_entries * 1;
674	transform->input_clut_table_b = in_curves + lut->num_input_table_entries * 2;
675	transform->input_clut_table_length = lut->num_input_table_entries;
676
677	// Prepare table
678	clut_length = sizeof(float)*pow(lut->num_clut_grid_points, 3)*3;
679	clut = malloc(clut_length);
680	if (!clut)
681		goto fail;
682	memcpy(clut, lut->clut_table, clut_length);
683	transform->r_clut = clut + 0;
684	transform->g_clut = clut + 1;
685	transform->b_clut = clut + 2;
686	transform->grid_size = lut->num_clut_grid_points;
687
688	// Prepare output curves
689	out_curve_len = sizeof(float) * lut->num_output_table_entries * 3;
690	out_curves = malloc(out_curve_len);
691	if (!out_curves)
692		goto fail;
693	memcpy(out_curves, lut->output_table, out_curve_len);
694	transform->output_clut_table_r = out_curves + lut->num_output_table_entries * 0;
695	transform->output_clut_table_g = out_curves + lut->num_output_table_entries * 1;
696	transform->output_clut_table_b = out_curves + lut->num_output_table_entries * 2;
697	transform->output_clut_table_length = lut->num_output_table_entries;
698	transform->transform_module_fn = qcms_transform_module_clut;
699
700	return first_transform;
701fail:
702	qcms_modular_transform_release(first_transform);
703	return NULL;
704}
705
706struct qcms_modular_transform* qcms_modular_transform_create_input(qcms_profile *in)
707{
708	struct qcms_modular_transform *first_transform = NULL;
709	struct qcms_modular_transform **next_transform = &first_transform;
710
711	if (in->A2B0) {
712		struct qcms_modular_transform *lut_transform;
713		lut_transform = qcms_modular_transform_create_lut(in->A2B0);
714		if (!lut_transform)
715			goto fail;
716		append_transform(lut_transform, &next_transform);
717	} else if (in->mAB && in->mAB->num_in_channels == 3 && in->mAB->num_out_channels == 3) {
718		struct qcms_modular_transform *mAB_transform;
719		mAB_transform = qcms_modular_transform_create_mAB(in->mAB);
720		if (!mAB_transform)
721			goto fail;
722		append_transform(mAB_transform, &next_transform);
723
724	} else {
725		struct qcms_modular_transform *transform;
726
727		transform = qcms_modular_transform_alloc();
728		if (!transform)
729			goto fail;
730		append_transform(transform, &next_transform);
731		transform->input_clut_table_r = build_input_gamma_table(in->redTRC);
732		transform->input_clut_table_g = build_input_gamma_table(in->greenTRC);
733		transform->input_clut_table_b = build_input_gamma_table(in->blueTRC);
734		transform->transform_module_fn = qcms_transform_module_gamma_table;
735		if (!transform->input_clut_table_r || !transform->input_clut_table_g ||
736				!transform->input_clut_table_b) {
737			goto fail;
738		}
739
740		transform = qcms_modular_transform_alloc();
741		if (!transform)
742			goto fail;
743		append_transform(transform, &next_transform);
744		transform->matrix.m[0][0] = 1/1.999969482421875f;
745		transform->matrix.m[0][1] = 0.f;
746		transform->matrix.m[0][2] = 0.f;
747		transform->matrix.m[1][0] = 0.f;
748		transform->matrix.m[1][1] = 1/1.999969482421875f;
749		transform->matrix.m[1][2] = 0.f;
750		transform->matrix.m[2][0] = 0.f;
751		transform->matrix.m[2][1] = 0.f;
752		transform->matrix.m[2][2] = 1/1.999969482421875f;
753		transform->matrix.invalid = false;
754		transform->transform_module_fn = qcms_transform_module_matrix;
755
756		transform = qcms_modular_transform_alloc();
757		if (!transform)
758			goto fail;
759		append_transform(transform, &next_transform);
760		transform->matrix = build_colorant_matrix(in);
761		transform->transform_module_fn = qcms_transform_module_matrix;
762	}
763
764	return first_transform;
765fail:
766	qcms_modular_transform_release(first_transform);
767	return EMPTY_TRANSFORM_LIST;
768}
769static struct qcms_modular_transform* qcms_modular_transform_create_output(qcms_profile *out)
770{
771	struct qcms_modular_transform *first_transform = NULL;
772	struct qcms_modular_transform **next_transform = &first_transform;
773
774	if (out->B2A0) {
775		struct qcms_modular_transform *lut_transform;
776		lut_transform = qcms_modular_transform_create_lut(out->B2A0);
777		if (!lut_transform)
778			goto fail;
779		append_transform(lut_transform, &next_transform);
780	} else if (out->mBA && out->mBA->num_in_channels == 3 && out->mBA->num_out_channels == 3) {
781		struct qcms_modular_transform *lut_transform;
782		lut_transform = qcms_modular_transform_create_mAB(out->mBA);
783		if (!lut_transform)
784			goto fail;
785		append_transform(lut_transform, &next_transform);
786	} else if (out->redTRC && out->greenTRC && out->blueTRC) {
787		struct qcms_modular_transform *transform;
788
789		transform = qcms_modular_transform_alloc();
790		if (!transform)
791			goto fail;
792		append_transform(transform, &next_transform);
793		transform->matrix = matrix_invert(build_colorant_matrix(out));
794		transform->transform_module_fn = qcms_transform_module_matrix;
795
796		transform = qcms_modular_transform_alloc();
797		if (!transform)
798			goto fail;
799		append_transform(transform, &next_transform);
800		transform->matrix.m[0][0] = 1.999969482421875f;
801		transform->matrix.m[0][1] = 0.f;
802		transform->matrix.m[0][2] = 0.f;
803		transform->matrix.m[1][0] = 0.f;
804		transform->matrix.m[1][1] = 1.999969482421875f;
805		transform->matrix.m[1][2] = 0.f;
806		transform->matrix.m[2][0] = 0.f;
807		transform->matrix.m[2][1] = 0.f;
808		transform->matrix.m[2][2] = 1.999969482421875f;
809		transform->matrix.invalid = false;
810		transform->transform_module_fn = qcms_transform_module_matrix;
811
812		transform = qcms_modular_transform_alloc();
813		if (!transform)
814			goto fail;
815		append_transform(transform, &next_transform);
816		build_output_lut(out->redTRC, &transform->output_gamma_lut_r,
817			&transform->output_gamma_lut_r_length);
818		build_output_lut(out->greenTRC, &transform->output_gamma_lut_g,
819			&transform->output_gamma_lut_g_length);
820		build_output_lut(out->blueTRC, &transform->output_gamma_lut_b,
821			&transform->output_gamma_lut_b_length);
822		transform->transform_module_fn = qcms_transform_module_gamma_lut;
823
824		if (!transform->output_gamma_lut_r || !transform->output_gamma_lut_g ||
825				!transform->output_gamma_lut_b) {
826			goto fail;
827		}
828	} else {
829		assert(0 && "Unsupported output profile workflow.");
830		return NULL;
831	}
832
833	return first_transform;
834fail:
835	qcms_modular_transform_release(first_transform);
836	return EMPTY_TRANSFORM_LIST;
837}
838
839/* Not Completed
840// Simplify the transformation chain to an equivalent transformation chain
841static struct qcms_modular_transform* qcms_modular_transform_reduce(struct qcms_modular_transform *transform)
842{
843	struct qcms_modular_transform *first_transform = NULL;
844	struct qcms_modular_transform *curr_trans = transform;
845	struct qcms_modular_transform *prev_trans = NULL;
846	while (curr_trans) {
847		struct qcms_modular_transform *next_trans = curr_trans->next_transform;
848		if (curr_trans->transform_module_fn == qcms_transform_module_matrix) {
849			if (next_trans && next_trans->transform_module_fn == qcms_transform_module_matrix) {
850				curr_trans->matrix = matrix_multiply(curr_trans->matrix, next_trans->matrix);
851				goto remove_next;
852			}
853		}
854		if (curr_trans->transform_module_fn == qcms_transform_module_gamma_table) {
855			bool isLinear = true;
856			uint16_t i;
857			for (i = 0; isLinear && i < 256; i++) {
858				isLinear &= (int)(curr_trans->input_clut_table_r[i] * 255) == i;
859				isLinear &= (int)(curr_trans->input_clut_table_g[i] * 255) == i;
860				isLinear &= (int)(curr_trans->input_clut_table_b[i] * 255) == i;
861			}
862			goto remove_current;
863		}
864
865next_transform:
866		if (!next_trans) break;
867		prev_trans = curr_trans;
868		curr_trans = next_trans;
869		continue;
870remove_current:
871		if (curr_trans == transform) {
872			//Update head
873			transform = next_trans;
874		} else {
875			prev_trans->next_transform = next_trans;
876		}
877		curr_trans->next_transform = NULL;
878		qcms_modular_transform_release(curr_trans);
879		//return transform;
880		return qcms_modular_transform_reduce(transform);
881remove_next:
882		curr_trans->next_transform = next_trans->next_transform;
883		next_trans->next_transform = NULL;
884		qcms_modular_transform_release(next_trans);
885		continue;
886	}
887	return transform;
888}
889*/
890
891static struct qcms_modular_transform* qcms_modular_transform_create(qcms_profile *in, qcms_profile *out)
892{
893	struct qcms_modular_transform *first_transform = NULL;
894	struct qcms_modular_transform **next_transform = &first_transform;
895
896	if (in->color_space == RGB_SIGNATURE) {
897		struct qcms_modular_transform* rgb_to_pcs;
898		rgb_to_pcs = qcms_modular_transform_create_input(in);
899		if (!rgb_to_pcs)
900			goto fail;
901		append_transform(rgb_to_pcs, &next_transform);
902	} else {
903		assert(0 && "input color space not supported");
904		goto fail;
905	}
906
907	if (in->pcs == LAB_SIGNATURE && out->pcs == XYZ_SIGNATURE) {
908		struct qcms_modular_transform* lab_to_pcs;
909		lab_to_pcs = qcms_modular_transform_alloc();
910		if (!lab_to_pcs)
911			goto fail;
912		append_transform(lab_to_pcs, &next_transform);
913		lab_to_pcs->transform_module_fn = qcms_transform_module_LAB_to_XYZ;
914	}
915
916	// This does not improve accuracy in practice, something is wrong here.
917	//if (in->chromaticAdaption.invalid == false) {
918	//	struct qcms_modular_transform* chromaticAdaption;
919	//	chromaticAdaption = qcms_modular_transform_alloc();
920	//	if (!chromaticAdaption)
921	//		goto fail;
922	//	append_transform(chromaticAdaption, &next_transform);
923	//	chromaticAdaption->matrix = matrix_invert(in->chromaticAdaption);
924	//	chromaticAdaption->transform_module_fn = qcms_transform_module_matrix;
925	//}
926
927        if (in->pcs == XYZ_SIGNATURE && out->pcs == LAB_SIGNATURE) {
928		struct qcms_modular_transform* pcs_to_lab;
929		pcs_to_lab = qcms_modular_transform_alloc();
930		if (!pcs_to_lab)
931			goto fail;
932		append_transform(pcs_to_lab, &next_transform);
933		pcs_to_lab->transform_module_fn = qcms_transform_module_XYZ_to_LAB;
934	}
935
936	if (out->color_space == RGB_SIGNATURE) {
937		struct qcms_modular_transform* pcs_to_rgb;
938		pcs_to_rgb = qcms_modular_transform_create_output(out);
939		if (!pcs_to_rgb)
940			goto fail;
941		append_transform(pcs_to_rgb, &next_transform);
942	} else {
943		assert(0 && "output color space not supported");
944		goto fail;
945	}
946	// Not Completed
947	//return qcms_modular_transform_reduce(first_transform);
948	return first_transform;
949fail:
950	qcms_modular_transform_release(first_transform);
951	return EMPTY_TRANSFORM_LIST;
952}
953
954static float* qcms_modular_transform_data(struct qcms_modular_transform *transform, float *src, float *dest, size_t len)
955{
956        while (transform != NULL) {
957                // Keep swaping src/dest when performing a transform to use less memory.
958                float *new_src = dest;
959		const transform_module_fn_t transform_fn = transform->transform_module_fn;
960		if (transform_fn != qcms_transform_module_gamma_table &&
961		    transform_fn != qcms_transform_module_gamma_lut &&
962		    transform_fn != qcms_transform_module_clut &&
963		    transform_fn != qcms_transform_module_clut_only &&
964		    transform_fn != qcms_transform_module_matrix &&
965		    transform_fn != qcms_transform_module_matrix_translate &&
966		    transform_fn != qcms_transform_module_LAB_to_XYZ &&
967		    transform_fn != qcms_transform_module_XYZ_to_LAB) {
968			assert(0 && "Unsupported transform module");
969			return NULL;
970		}
971                transform->transform_module_fn(transform,src,dest,len);
972                dest = src;
973                src = new_src;
974                transform = transform->next_transform;
975        }
976        // The results end up in the src buffer because of the switching
977        return src;
978}
979
980float* qcms_chain_transform(qcms_profile *in, qcms_profile *out, float *src, float *dest, size_t lutSize)
981{
982	struct qcms_modular_transform *transform_list = qcms_modular_transform_create(in, out);
983	if (transform_list != NULL) {
984		float *lut = qcms_modular_transform_data(transform_list, src, dest, lutSize/3);
985		qcms_modular_transform_release(transform_list);
986		return lut;
987	}
988	return NULL;
989}
990