1/* vim: set ts=8 sw=8 noexpandtab: */
2//  qcms
3//  Copyright (C) 2009 Mozilla Foundation
4//
5// Permission is hereby granted, free of charge, to any person obtaining
6// a copy of this software and associated documentation files (the "Software"),
7// to deal in the Software without restriction, including without limitation
8// the rights to use, copy, modify, merge, publish, distribute, sublicense,
9// and/or sell copies of the Software, and to permit persons to whom the Software
10// is furnished to do so, subject to the following conditions:
11//
12// The above copyright notice and this permission notice shall be included in
13// all copies or substantial portions of the Software.
14//
15// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
17// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
23#include "qcms.h"
24#include "qcmstypes.h"
25
26/* used as a lookup table for the output transformation.
27 * we refcount them so we only need to have one around per output
28 * profile, instead of duplicating them per transform */
29struct precache_output
30{
31	int ref_count;
32	/* We previously used a count of 65536 here but that seems like more
33	 * precision than we actually need.  By reducing the size we can
34	 * improve startup performance and reduce memory usage. ColorSync on
35	 * 10.5 uses 4097 which is perhaps because they use a fixed point
36	 * representation where 1. is represented by 0x1000. */
37#define PRECACHE_OUTPUT_SIZE 8192
38#define PRECACHE_OUTPUT_MAX (PRECACHE_OUTPUT_SIZE-1)
39	uint8_t data[PRECACHE_OUTPUT_SIZE];
40};
41
42#ifdef _MSC_VER
43#define ALIGN __declspec(align(16))
44#else
45#define ALIGN __attribute__(( aligned (16) ))
46#endif
47
48typedef struct _qcms_format_type {
49	int r;
50	int b;
51} qcms_format_type;
52
53struct _qcms_transform {
54	float ALIGN matrix[3][4];
55	float *input_gamma_table_r;
56	float *input_gamma_table_g;
57	float *input_gamma_table_b;
58
59	float *input_clut_table_r;
60	float *input_clut_table_g;
61	float *input_clut_table_b;
62	uint16_t input_clut_table_length;
63	float *r_clut;
64	float *g_clut;
65	float *b_clut;
66	uint16_t grid_size;
67	float *output_clut_table_r;
68	float *output_clut_table_g;
69	float *output_clut_table_b;
70	uint16_t output_clut_table_length;
71
72	float *input_gamma_table_gray;
73
74	float out_gamma_r;
75	float out_gamma_g;
76	float out_gamma_b;
77
78	float out_gamma_gray;
79
80	uint16_t *output_gamma_lut_r;
81	uint16_t *output_gamma_lut_g;
82	uint16_t *output_gamma_lut_b;
83
84	uint16_t *output_gamma_lut_gray;
85
86	size_t output_gamma_lut_r_length;
87	size_t output_gamma_lut_g_length;
88	size_t output_gamma_lut_b_length;
89
90	size_t output_gamma_lut_gray_length;
91
92	struct precache_output *output_table_r;
93	struct precache_output *output_table_g;
94	struct precache_output *output_table_b;
95
96	void (*transform_fn)(struct _qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, struct _qcms_format_type output_format);
97};
98
99struct matrix {
100	float m[3][3];
101	bool invalid;
102};
103
104struct qcms_modular_transform;
105
106typedef void (*transform_module_fn_t)(struct qcms_modular_transform *transform, float *src, float *dest, size_t length);
107
108struct qcms_modular_transform {
109	struct matrix matrix;
110	float tx, ty, tz;
111
112	float *input_clut_table_r;
113	float *input_clut_table_g;
114	float *input_clut_table_b;
115	uint16_t input_clut_table_length;
116	float *r_clut;
117	float *g_clut;
118	float *b_clut;
119	uint16_t grid_size;
120	float *output_clut_table_r;
121	float *output_clut_table_g;
122	float *output_clut_table_b;
123	uint16_t output_clut_table_length;
124
125	uint16_t *output_gamma_lut_r;
126	uint16_t *output_gamma_lut_g;
127	uint16_t *output_gamma_lut_b;
128
129	size_t output_gamma_lut_r_length;
130	size_t output_gamma_lut_g_length;
131	size_t output_gamma_lut_b_length;
132
133	transform_module_fn_t transform_module_fn;
134	struct qcms_modular_transform *next_transform;
135};
136
137typedef int32_t s15Fixed16Number;
138typedef uint16_t uInt16Number;
139typedef uint8_t uInt8Number;
140
141struct XYZNumber {
142	s15Fixed16Number X;
143	s15Fixed16Number Y;
144	s15Fixed16Number Z;
145};
146
147struct curveType {
148	uint32_t type;
149	uint32_t count;
150	float parameter[7];
151	uInt16Number data[];
152};
153
154struct lutmABType {
155	uint8_t num_in_channels;
156	uint8_t num_out_channels;
157	// 16 is the upperbound, actual is 0..num_in_channels.
158	uint8_t num_grid_points[16];
159
160	s15Fixed16Number e00;
161	s15Fixed16Number e01;
162	s15Fixed16Number e02;
163	s15Fixed16Number e03;
164	s15Fixed16Number e10;
165	s15Fixed16Number e11;
166	s15Fixed16Number e12;
167	s15Fixed16Number e13;
168	s15Fixed16Number e20;
169	s15Fixed16Number e21;
170	s15Fixed16Number e22;
171	s15Fixed16Number e23;
172
173	// reversed elements (for mBA)
174	bool reversed;
175
176	float *clut_table;
177	struct curveType *a_curves[10];
178	struct curveType *b_curves[10];
179	struct curveType *m_curves[10];
180	float clut_table_data[];
181};
182
183/* should lut8Type and lut16Type be different types? */
184struct lutType { // used by lut8Type/lut16Type (mft2) only
185	uint8_t num_input_channels;
186	uint8_t num_output_channels;
187	uint8_t num_clut_grid_points;
188
189	s15Fixed16Number e00;
190	s15Fixed16Number e01;
191	s15Fixed16Number e02;
192	s15Fixed16Number e10;
193	s15Fixed16Number e11;
194	s15Fixed16Number e12;
195	s15Fixed16Number e20;
196	s15Fixed16Number e21;
197	s15Fixed16Number e22;
198
199	uint16_t num_input_table_entries;
200	uint16_t num_output_table_entries;
201
202	float *input_table;
203	float *clut_table;
204	float *output_table;
205
206	float table_data[];
207};
208#if 0
209/* this is from an intial idea of having the struct correspond to the data in
210 * the file. I decided that it wasn't a good idea.
211 */
212struct tag_value {
213	uint32_t type;
214	union {
215		struct {
216			uint32_t reserved;
217			struct {
218				s15Fixed16Number X;
219				s15Fixed16Number Y;
220				s15Fixed16Number Z;
221			} XYZNumber;
222		} XYZType;
223	};
224}; // I guess we need to pack this?
225#endif
226
227#define RGB_SIGNATURE  0x52474220
228#define GRAY_SIGNATURE 0x47524159
229#define XYZ_SIGNATURE  0x58595A20
230#define LAB_SIGNATURE  0x4C616220
231
232struct _qcms_profile {
233	char description[64];
234	uint32_t class;
235	uint32_t color_space;
236	uint32_t pcs;
237	qcms_intent rendering_intent;
238	struct XYZNumber redColorant;
239	struct XYZNumber blueColorant;
240	struct XYZNumber greenColorant;
241	struct curveType *redTRC;
242	struct curveType *blueTRC;
243	struct curveType *greenTRC;
244	struct curveType *grayTRC;
245	struct lutType *A2B0;
246	struct lutType *B2A0;
247	struct lutmABType *mAB;
248	struct lutmABType *mBA;
249	struct matrix chromaticAdaption;
250
251	struct precache_output *output_table_r;
252	struct precache_output *output_table_g;
253	struct precache_output *output_table_b;
254};
255
256#ifdef _MSC_VER
257#define inline _inline
258#endif
259
260/* produces the nearest float to 'a' with a maximum error
261 * of 1/1024 which happens for large values like 0x40000040 */
262static inline float s15Fixed16Number_to_float(s15Fixed16Number a)
263{
264	return ((int32_t)a)/65536.f;
265}
266
267static inline s15Fixed16Number double_to_s15Fixed16Number(double v)
268{
269	return (int32_t)(v*65536);
270}
271
272static inline float uInt8Number_to_float(uInt8Number a)
273{
274	return ((int32_t)a)/255.f;
275}
276
277static inline float uInt16Number_to_float(uInt16Number a)
278{
279	return ((int32_t)a)/65535.f;
280}
281
282
283void precache_release(struct precache_output *p);
284qcms_bool set_rgb_colorants(qcms_profile *profile, qcms_CIE_xyY white_point, qcms_CIE_xyYTRIPLE primaries);
285
286void qcms_transform_data_rgb_out_lut_sse2(qcms_transform *transform,
287                                          unsigned char *src,
288                                          unsigned char *dest,
289                                          size_t length,
290                                          qcms_format_type output_format);
291void qcms_transform_data_rgba_out_lut_sse2(qcms_transform *transform,
292                                          unsigned char *src,
293                                          unsigned char *dest,
294                                          size_t length,
295                                          qcms_format_type output_format);
296void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
297                                          unsigned char *src,
298                                          unsigned char *dest,
299                                          size_t length,
300                                          qcms_format_type output_format);
301void qcms_transform_data_rgba_out_lut_sse1(qcms_transform *transform,
302                                          unsigned char *src,
303                                          unsigned char *dest,
304                                          size_t length,
305                                          qcms_format_type output_format);
306
307extern qcms_bool qcms_supports_iccv4;
308
309
310#ifdef _MSC_VER
311
312long __cdecl _InterlockedIncrement(long volatile *);
313long __cdecl _InterlockedDecrement(long volatile *);
314#pragma intrinsic(_InterlockedIncrement)
315#pragma intrinsic(_InterlockedDecrement)
316
317#define qcms_atomic_increment(x) _InterlockedIncrement((long volatile *)&x)
318#define qcms_atomic_decrement(x) _InterlockedDecrement((long volatile*)&x)
319
320#else
321
322#define qcms_atomic_increment(x) __sync_add_and_fetch(&x, 1)
323#define qcms_atomic_decrement(x) __sync_sub_and_fetch(&x, 1)
324
325#endif
326