1// =================================================================================================
2// ADOBE SYSTEMS INCORPORATED
3// Copyright 2006 Adobe Systems Incorporated
4// All Rights Reserved
5//
6// NOTICE:  Adobe permits you to use, modify, and distribute this file in accordance with the terms
7// of the Adobe license agreement accompanying it.
8// =================================================================================================
9
10package com.adobe.xmp.impl;
11
12
13import com.adobe.xmp.XMPConst;
14
15
16/**
17 * Utility functions for the XMPToolkit implementation.
18 *
19 * @since 06.06.2006
20 */
21public class Utils implements XMPConst
22{
23	/** segments of a UUID */
24	public static final int UUID_SEGMENT_COUNT = 4;
25	/** length of a UUID */
26	public static final int UUID_LENGTH = 32 + UUID_SEGMENT_COUNT;
27	/** table of XML name start chars (<= 0xFF) */
28	private  static boolean[] xmlNameStartChars;
29	/** table of XML name chars (<= 0xFF) */
30	private static boolean[] xmlNameChars;
31	/** init char tables */
32	static
33	{
34		initCharTables();
35	}
36
37
38	/**
39	 * Private constructor
40	 */
41	private Utils()
42	{
43		// EMPTY
44	}
45
46
47	/**
48	 * Normalize an xml:lang value so that comparisons are effectively case
49	 * insensitive as required by RFC 3066 (which superceeds RFC 1766). The
50	 * normalization rules:
51	 * <ul>
52	 * <li> The primary subtag is lower case, the suggested practice of ISO 639.
53	 * <li> All 2 letter secondary subtags are upper case, the suggested
54	 * practice of ISO 3166.
55	 * <li> All other subtags are lower case.
56	 * </ul>
57	 *
58	 * @param value
59	 *            raw value
60	 * @return Returns the normalized value.
61	 */
62	public static String normalizeLangValue(String value)
63	{
64		// don't normalize x-default
65		if (XMPConst.X_DEFAULT.equals(value))
66		{
67			return value;
68		}
69
70		int subTag = 1;
71		StringBuffer buffer = new StringBuffer();
72
73		for (int i = 0; i < value.length(); i++)
74		{
75			switch (value.charAt(i))
76			{
77			case '-':
78			case '_':
79				// move to next subtag and convert underscore to hyphen
80				buffer.append('-');
81				subTag++;
82				break;
83			case ' ':
84				// remove spaces
85				break;
86			default:
87				// convert second subtag to uppercase, all other to lowercase
88				if (subTag != 2)
89				{
90					buffer.append(Character.toLowerCase(value.charAt(i)));
91				}
92				else
93				{
94					buffer.append(Character.toUpperCase(value.charAt(i)));
95				}
96			}
97
98		}
99		return buffer.toString();
100	}
101
102
103	/**
104	 * Split the name and value parts for field and qualifier selectors:
105	 * <ul>
106	 * <li>[qualName="value"] - An element in an array of structs, chosen by a
107	 * field value.
108	 * <li>[?qualName="value"] - An element in an array, chosen by a qualifier
109	 * value.
110	 * </ul>
111	 * The value portion is a string quoted by ''' or '"'. The value may contain
112	 * any character including a doubled quoting character. The value may be
113	 * empty. <em>Note:</em> It is assumed that the expression is formal
114	 * correct
115	 *
116	 * @param selector
117	 *            the selector
118	 * @return Returns an array where the first entry contains the name and the
119	 *         second the value.
120	 */
121	static String[] splitNameAndValue(String selector)
122	{
123		// get the name
124		int eq = selector.indexOf('=');
125		int pos = 1;
126		if (selector.charAt(pos) == '?')
127		{
128			pos++;
129		}
130		String name = selector.substring(pos, eq);
131
132		// get the value
133		pos = eq + 1;
134		char quote = selector.charAt(pos);
135		pos++;
136		int end = selector.length() - 2; // quote and ]
137		StringBuffer value = new StringBuffer(end - eq);
138		while (pos < end)
139		{
140			value.append(selector.charAt(pos));
141			pos++;
142			if (selector.charAt(pos) == quote)
143			{
144				// skip one quote in value
145				pos++;
146			}
147		}
148		return new String[] { name, value.toString() };
149	}
150
151
152	/**
153	 *
154	 * @param schema
155	 *            a schema namespace
156	 * @param prop
157	 *            an XMP Property
158	 * @return Returns true if the property is defined as &quot;Internal
159	 *         Property&quot;, see XMP Specification.
160	 */
161	static boolean isInternalProperty(String schema, String prop)
162	{
163		boolean isInternal = false;
164
165		if (NS_DC.equals(schema))
166		{
167			if ("dc:format".equals(prop) || "dc:language".equals(prop))
168			{
169				isInternal = true;
170			}
171		}
172		else if (NS_XMP.equals(schema))
173		{
174			if ("xmp:BaseURL".equals(prop) || "xmp:CreatorTool".equals(prop)
175					|| "xmp:Format".equals(prop) || "xmp:Locale".equals(prop)
176					|| "xmp:MetadataDate".equals(prop) || "xmp:ModifyDate".equals(prop))
177			{
178				isInternal = true;
179			}
180		}
181		else if (NS_PDF.equals(schema))
182		{
183			if ("pdf:BaseURL".equals(prop) || "pdf:Creator".equals(prop)
184					|| "pdf:ModDate".equals(prop) || "pdf:PDFVersion".equals(prop)
185					|| "pdf:Producer".equals(prop))
186			{
187				isInternal = true;
188			}
189		}
190		else if (NS_TIFF.equals(schema))
191		{
192			isInternal = true;
193			if ("tiff:ImageDescription".equals(prop) || "tiff:Artist".equals(prop)
194					|| "tiff:Copyright".equals(prop))
195			{
196				isInternal = false;
197			}
198		}
199		else if (NS_EXIF.equals(schema))
200		{
201			isInternal = true;
202			if ("exif:UserComment".equals(prop))
203			{
204				isInternal = false;
205			}
206		}
207		else if (NS_EXIF_AUX.equals(schema))
208		{
209			isInternal = true;
210		}
211		else if (NS_PHOTOSHOP.equals(schema))
212		{
213			if ("photoshop:ICCProfile".equals(prop))
214			{
215				isInternal = true;
216			}
217		}
218		else if (NS_CAMERARAW.equals(schema))
219		{
220			if ("crs:Version".equals(prop) || "crs:RawFileName".equals(prop)
221					|| "crs:ToneCurveName".equals(prop))
222			{
223				isInternal = true;
224			}
225		}
226		else if (NS_ADOBESTOCKPHOTO.equals(schema))
227		{
228			isInternal = true;
229		}
230		else if (NS_XMP_MM.equals(schema))
231		{
232			isInternal = true;
233		}
234		else if (TYPE_TEXT.equals(schema))
235		{
236			isInternal = true;
237		}
238		else if (TYPE_PAGEDFILE.equals(schema))
239		{
240			isInternal = true;
241		}
242		else if (TYPE_GRAPHICS.equals(schema))
243		{
244			isInternal = true;
245		}
246		else if (TYPE_IMAGE.equals(schema))
247		{
248			isInternal = true;
249		}
250		else if (TYPE_FONT.equals(schema))
251		{
252			isInternal = true;
253		}
254
255		return isInternal;
256	}
257
258
259	/**
260	 * Check some requirements for an UUID:
261	 * <ul>
262	 * <li>Length of the UUID is 32</li>
263	 * <li>The Delimiter count is 4 and all the 4 delimiter are on their right
264	 * position (8,13,18,23)</li>
265	 * </ul>
266	 *
267	 *
268	 * @param uuid uuid to test
269	 * @return true - this is a well formed UUID, false - UUID has not the expected format
270	 */
271
272	static boolean checkUUIDFormat(String uuid)
273	{
274		boolean result = true;
275		int delimCnt = 0;
276		int delimPos = 0;
277
278		if (uuid == null)
279		{
280			return false;
281		}
282
283		for (delimPos = 0; delimPos < uuid.length(); delimPos++)
284		{
285			if (uuid.charAt(delimPos) == '-')
286			{
287				delimCnt++;
288				result = result  &&
289					(delimPos == 8 || delimPos == 13 || delimPos == 18 || delimPos == 23);
290			}
291		}
292
293		return result && UUID_SEGMENT_COUNT == delimCnt && UUID_LENGTH == delimPos;
294	}
295
296
297	/**
298	 * Simple check for valid XMLNames. Within ASCII range<br>
299	 * ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6]<br>
300	 * are accepted, above all characters (which is not entirely
301	 * correct according to the XML Spec.
302	 *
303	 * @param name an XML Name
304	 * @return Return <code>true</code> if the name is correct.
305	 */
306	public static boolean isXMLName(String name)
307	{
308		if (name.length() > 0  &&  !isNameStartChar(name.charAt(0)))
309		{
310			return false;
311		}
312		for (int i = 1; i < name.length(); i++)
313		{
314			if (!isNameChar(name.charAt(i)))
315			{
316				return false;
317			}
318		}
319		return true;
320	}
321
322
323	/**
324	 * Checks if the value is a legal "unqualified" XML name, as
325	 * defined in the XML Namespaces proposed recommendation.
326	 * These are XML names, except that they must not contain a colon.
327	 * @param name the value to check
328	 * @return Returns true if the name is a valid "unqualified" XML name.
329	 */
330	public static boolean isXMLNameNS(String name)
331	{
332		if (name.length() > 0  &&  (!isNameStartChar(name.charAt(0))  ||  name.charAt(0) == ':'))
333		{
334			return false;
335		}
336		for (int i = 1; i < name.length(); i++)
337		{
338			if (!isNameChar(name.charAt(i))  ||  name.charAt(i) == ':')
339			{
340				return false;
341			}
342		}
343		return true;
344	}
345
346
347	/**
348	 * @param c  a char
349	 * @return Returns true if the char is an ASCII control char.
350	 */
351	static boolean isControlChar(char c)
352	{
353		return (c <= 0x1F  ||  c == 0x7F)  &&
354				c != 0x09  &&  c != 0x0A  &&  c != 0x0D;
355	}
356
357
358	/**
359	 * Serializes the node value in XML encoding. Its used for tag bodies and
360	 * attributes.<br>
361	 * <em>Note:</em> The attribute is always limited by quotes,
362	 * thats why <code>&amp;apos;</code> is never serialized.<br>
363	 * <em>Note:</em> Control chars are written unescaped, but if the user uses others than tab, LF
364	 * and CR the resulting XML will become invalid.
365	 * @param value a string
366	 * @param forAttribute flag if string is attribute value (need to additional escape quotes)
367	 * @param escapeWhitespaces Decides if LF, CR and TAB are escaped.
368	 * @return Returns the value ready for XML output.
369	 */
370	public static String escapeXML(String value, boolean forAttribute, boolean escapeWhitespaces)
371	{
372		// quick check if character are contained that need special treatment
373		boolean needsEscaping = false;
374		for (int i = 0; i < value.length (); i++)
375        {
376            char c = value.charAt (i);
377			if (
378				 c == '<'  ||  c == '>'  ||  c == '&'  ||							    // XML chars
379				(escapeWhitespaces  &&  (c == '\t'  ||  c == '\n'  ||  c == '\r'))  ||
380				(forAttribute  &&  c == '"'))
381			{
382				needsEscaping = true;
383				break;
384			}
385        }
386
387		if (!needsEscaping)
388		{
389			// fast path
390			return value;
391		}
392		else
393		{
394			// slow path with escaping
395			StringBuffer buffer = new StringBuffer(value.length() * 4 / 3);
396	        for (int i = 0; i < value.length (); i++)
397	        {
398	            char c = value.charAt (i);
399	            if (!(escapeWhitespaces  &&  (c == '\t'  ||  c == '\n'  ||  c == '\r')))
400	            {
401	            	switch (c)
402		            {
403	            		// we do what "Canonical XML" expects
404	            		// AUDIT: &apos; not serialized as only outer qoutes are used
405		              	case '<':	buffer.append("&lt;"); continue;
406		              	case '>':	buffer.append("&gt;"); continue;
407		              	case '&':	buffer.append("&amp;"); continue;
408		              	case '"': 	buffer.append(forAttribute ? "&quot;" : "\""); continue;
409		              	default:	buffer.append(c); continue;
410		            }
411		        }
412	            else
413	            {
414	            	// write control chars escaped,
415	            	// if there are others than tab, LF and CR the xml will become invalid.
416	            	buffer.append("&#x");
417	            	buffer.append(Integer.toHexString(c).toUpperCase());
418	            	buffer.append(';');
419	            }
420	        }
421	        return buffer.toString();
422		}
423	}
424
425
426	/**
427	 * Replaces the ASCII control chars with a space.
428	 *
429	 * @param value
430	 *            a node value
431	 * @return Returns the cleaned up value
432	 */
433	static String removeControlChars(String value)
434	{
435		StringBuffer buffer = new StringBuffer(value);
436		for (int i = 0; i < buffer.length(); i++)
437		{
438			if (isControlChar(buffer.charAt(i)))
439			{
440				buffer.setCharAt(i, ' ');
441			}
442		}
443		return buffer.toString();
444	}
445
446
447	/**
448	 * Simple check if a character is a valid XML start name char.
449	 * Within ASCII range<br>
450	 * ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6]<br>
451	 * are accepted, above all characters (which is not entirely
452	 * correct according to the XML Spec)
453	 *
454	 * @param ch a character
455	 * @return Returns true if the character is a valid first char of an XML name.
456	 */
457	private static boolean isNameStartChar(char ch)
458	{
459		return ch > 0xFF  ||  xmlNameStartChars[ch];
460	}
461
462
463	/**
464	 * Simple check if a character is a valid XML name char
465	 * (every char except the first one).
466	 * Within ASCII range<br>
467	 * ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6]<br>
468	 * are accepted, above all characters (which is not entirely
469	 * correct according to the XML Spec)
470	 *
471	 * @param ch a character
472	 * @return Returns true if the character is a valid char of an XML name.
473	 */
474	private static boolean isNameChar(char ch)
475	{
476		return ch > 0xFF  ||  xmlNameChars[ch];
477	}
478
479
480	/**
481	 * Initializes the char tables for later use.
482	 */
483	private static void initCharTables()
484	{
485		xmlNameChars = new boolean[0x0100];
486		xmlNameStartChars = new boolean[0x0100];
487
488		for (char ch = 0; ch < xmlNameChars.length; ch++)
489		{
490			xmlNameStartChars[ch] =
491				('a' <= ch  &&  ch <= 'z')  ||
492				('A' <= ch  &&  ch <= 'Z')  ||
493				ch == ':'  ||
494				ch == '_'  ||
495				(0xC0 <= ch  &&  ch <= 0xD6)  ||
496				(0xD8 <= ch  &&  ch <= 0xF6);
497
498			xmlNameChars[ch] =
499				('a' <= ch  &&  ch <= 'z')  ||
500				('A' <= ch  &&  ch <= 'Z')  ||
501				('0' <= ch  &&  ch <= '9')  ||
502				ch == ':'  ||
503				ch == '_'  ||
504				ch == '-'  ||
505				ch == '.'  ||
506				ch == 0xB7  ||
507				(0xC0 <= ch  &&  ch <= 0xD6)  ||
508				(0xD8 <= ch  &&  ch <= 0xF6);
509		}
510	}
511}