1// =================================================================================================
2// ADOBE SYSTEMS INCORPORATED
3// Copyright 2006 Adobe Systems Incorporated
4// All Rights Reserved
5//
6// NOTICE:  Adobe permits you to use, modify, and distribute this file in accordance with the terms
7// of the Adobe license agreement accompanying it.
8// =================================================================================================
9
10package com.adobe.xmp.impl;
11
12import java.util.Calendar;
13import java.util.HashMap;
14import java.util.Iterator;
15import java.util.Map;
16
17import com.adobe.xmp.XMPConst;
18import com.adobe.xmp.XMPDateTime;
19import com.adobe.xmp.XMPError;
20import com.adobe.xmp.XMPException;
21import com.adobe.xmp.XMPMeta;
22import com.adobe.xmp.XMPMetaFactory;
23import com.adobe.xmp.XMPUtils;
24import com.adobe.xmp.impl.xpath.XMPPath;
25import com.adobe.xmp.impl.xpath.XMPPathParser;
26import com.adobe.xmp.options.ParseOptions;
27import com.adobe.xmp.options.PropertyOptions;
28import com.adobe.xmp.properties.XMPAliasInfo;
29
30/**
31 * @since   Aug 18, 2006
32 */
33public class XMPNormalizer
34{
35	/** caches the correct dc-property array forms */
36	private static Map dcArrayForms;
37	/** init char tables */
38	static
39	{
40		initDCArrays();
41	}
42
43
44	/**
45	 * Hidden constructor
46	 */
47	private XMPNormalizer()
48	{
49		// EMPTY
50	}
51
52
53	/**
54	 * Normalizes a raw parsed XMPMeta-Object
55	 * @param xmp the raw metadata object
56	 * @param options the parsing options
57	 * @return Returns the normalized metadata object
58	 * @throws XMPException Collects all severe processing errors.
59	 */
60	static XMPMeta process(XMPMetaImpl xmp, ParseOptions options) throws XMPException
61	{
62		XMPNode tree = xmp.getRoot();
63
64		touchUpDataModel(xmp);
65		moveExplicitAliases(tree, options);
66
67		tweakOldXMP(tree);
68
69		deleteEmptySchemas(tree);
70
71		return xmp;
72	}
73
74
75	/**
76	 * Tweak old XMP: Move an instance ID from rdf:about to the
77	 * <em>xmpMM:InstanceID</em> property. An old instance ID usually looks
78	 * like &quot;uuid:bac965c4-9d87-11d9-9a30-000d936b79c4&quot;, plus InDesign
79	 * 3.0 wrote them like &quot;bac965c4-9d87-11d9-9a30-000d936b79c4&quot;. If
80	 * the name looks like a UUID simply move it to <em>xmpMM:InstanceID</em>,
81	 * don't worry about any existing <em>xmpMM:InstanceID</em>. Both will
82	 * only be present when a newer file with the <em>xmpMM:InstanceID</em>
83	 * property is updated by an old app that uses <em>rdf:about</em>.
84	 *
85	 * @param tree the root of the metadata tree
86	 * @throws XMPException Thrown if tweaking fails.
87	 */
88	private static void tweakOldXMP(XMPNode tree) throws XMPException
89	{
90		if (tree.getName() != null  &&  tree.getName().length() >= Utils.UUID_LENGTH)
91		{
92			String nameStr = tree.getName().toLowerCase();
93			if (nameStr.startsWith("uuid:"))
94			{
95				nameStr = nameStr.substring(5);
96			}
97
98			if (Utils.checkUUIDFormat(nameStr))
99			{
100				// move UUID to xmpMM:InstanceID and remove it from the root node
101				XMPPath path = XMPPathParser.expandXPath(XMPConst.NS_XMP_MM, "InstanceID");
102				XMPNode idNode = XMPNodeUtils.findNode (tree, path, true, null);
103				if (idNode != null)
104				{
105					idNode.setOptions(null);	// Clobber any existing xmpMM:InstanceID.
106					idNode.setValue("uuid:" + nameStr);
107					idNode.removeChildren();
108					idNode.removeQualifiers();
109					tree.setName(null);
110				}
111				else
112				{
113					throw new XMPException("Failure creating xmpMM:InstanceID",
114							XMPError.INTERNALFAILURE);
115				}
116			}
117		}
118	}
119
120
121	/**
122	 * Visit all schemas to do general fixes and handle special cases.
123	 *
124	 * @param xmp the metadata object implementation
125	 * @throws XMPException Thrown if the normalisation fails.
126	 */
127	private static void touchUpDataModel(XMPMetaImpl xmp) throws XMPException
128	{
129		// make sure the DC schema is existing, because it might be needed within the normalization
130		// if not touched it will be removed by removeEmptySchemas
131		XMPNodeUtils.findSchemaNode(xmp.getRoot(), XMPConst.NS_DC, true);
132
133		// Do the special case fixes within each schema.
134		for (Iterator it = xmp.getRoot().iterateChildren(); it.hasNext();)
135		{
136			XMPNode currSchema = (XMPNode) it.next();
137			if (XMPConst.NS_DC.equals(currSchema.getName()))
138			{
139				normalizeDCArrays(currSchema);
140			}
141			else if (XMPConst.NS_EXIF.equals(currSchema.getName()))
142			{
143				// Do a special case fix for exif:GPSTimeStamp.
144				fixGPSTimeStamp(currSchema);
145				XMPNode arrayNode = XMPNodeUtils.findChildNode(currSchema, "exif:UserComment",
146						false);
147				if (arrayNode != null)
148				{
149					repairAltText(arrayNode);
150				}
151			}
152			else if (XMPConst.NS_DM.equals(currSchema.getName()))
153			{
154				// Do a special case migration of xmpDM:copyright to
155				// dc:rights['x-default'].
156				XMPNode dmCopyright = XMPNodeUtils.findChildNode(currSchema, "xmpDM:copyright",
157						false);
158				if (dmCopyright != null)
159				{
160					migrateAudioCopyright(xmp, dmCopyright);
161				}
162			}
163			else if (XMPConst.NS_XMP_RIGHTS.equals(currSchema.getName()))
164			{
165				XMPNode arrayNode = XMPNodeUtils.findChildNode(currSchema, "xmpRights:UsageTerms",
166						false);
167				if (arrayNode != null)
168				{
169					repairAltText(arrayNode);
170				}
171			}
172		}
173	}
174
175
176	/**
177	 * Undo the denormalization performed by the XMP used in Acrobat 5.<br>
178	 * If a Dublin Core array had only one item, it was serialized as a simple
179	 * property. <br>
180	 * The <code>xml:lang</code> attribute was dropped from an
181	 * <code>alt-text</code> item if the language was <code>x-default</code>.
182	 *
183	 * @param dcSchema the DC schema node
184	 * @throws XMPException Thrown if normalization fails
185	 */
186	private static void normalizeDCArrays(XMPNode dcSchema) throws XMPException
187	{
188		for (int i = 1; i <= dcSchema.getChildrenLength(); i++)
189		{
190			XMPNode currProp = dcSchema.getChild(i);
191
192			PropertyOptions arrayForm = (PropertyOptions) dcArrayForms.get(currProp.getName());
193			if (arrayForm == null)
194			{
195				continue;
196			}
197			else if (currProp.getOptions().isSimple())
198			{
199				// create a new array and add the current property as child,
200				// if it was formerly simple
201				XMPNode newArray = new XMPNode(currProp.getName(), arrayForm);
202				currProp.setName(XMPConst.ARRAY_ITEM_NAME);
203				newArray.addChild(currProp);
204				dcSchema.replaceChild(i, newArray);
205
206				// fix language alternatives
207				if (arrayForm.isArrayAltText()  &&  !currProp.getOptions().getHasLanguage())
208				{
209					XMPNode newLang = new XMPNode(XMPConst.XML_LANG, XMPConst.X_DEFAULT, null);
210					currProp.addQualifier(newLang);
211				}
212			}
213			else
214			{
215				// clear array options and add corrected array form if it has been an array before
216				currProp.getOptions().setOption(
217					PropertyOptions.ARRAY  |
218					PropertyOptions.ARRAY_ORDERED  |
219					PropertyOptions.ARRAY_ALTERNATE  |
220					PropertyOptions.ARRAY_ALT_TEXT,
221					false);
222				currProp.getOptions().mergeWith(arrayForm);
223
224				if (arrayForm.isArrayAltText())
225				{
226					// applying for "dc:description", "dc:rights", "dc:title"
227					repairAltText(currProp);
228				}
229			}
230
231		}
232	}
233
234
235	/**
236	 * Make sure that the array is well-formed AltText. Each item must be simple
237	 * and have an "xml:lang" qualifier. If repairs are needed, keep simple
238	 * non-empty items by adding the "xml:lang" with value "x-repair".
239	 * @param arrayNode the property node of the array to repair.
240	 * @throws XMPException Forwards unexpected exceptions.
241	 */
242	private static void repairAltText(XMPNode arrayNode) throws XMPException
243	{
244		if (arrayNode == null  ||
245			!arrayNode.getOptions().isArray())
246		{
247			// Already OK or not even an array.
248			return;
249		}
250
251		// fix options
252		arrayNode.getOptions().setArrayOrdered(true).setArrayAlternate(true).setArrayAltText(true);
253
254		for (Iterator it = arrayNode.iterateChildren(); it.hasNext();)
255		{
256			XMPNode currChild = (XMPNode) it.next();
257			if (currChild.getOptions().isCompositeProperty())
258			{
259				// Delete non-simple children.
260				it.remove();
261			}
262			else if (!currChild.getOptions().getHasLanguage())
263			{
264				String childValue = currChild.getValue();
265				if (childValue == null  ||  childValue.length() == 0)
266				{
267					// Delete empty valued children that have no xml:lang.
268					it.remove();
269				}
270				else
271				{
272					// Add an xml:lang qualifier with the value "x-repair".
273					XMPNode repairLang = new XMPNode(XMPConst.XML_LANG, "x-repair", null);
274					currChild.addQualifier(repairLang);
275				}
276			}
277		}
278	}
279
280
281	/**
282	 * Visit all of the top level nodes looking for aliases. If there is
283	 * no base, transplant the alias subtree. If there is a base and strict
284	 * aliasing is on, make sure the alias and base subtrees match.
285	 *
286	 * @param tree the root of the metadata tree
287	 * @param options th parsing options
288	 * @throws XMPException Forwards XMP errors
289	 */
290	private static void moveExplicitAliases(XMPNode tree, ParseOptions options)
291			throws XMPException
292	{
293		if (!tree.getHasAliases())
294		{
295			return;
296		}
297		tree.setHasAliases(false);
298
299		boolean strictAliasing = options.getStrictAliasing();
300
301		for (Iterator schemaIt = tree.getUnmodifiableChildren().iterator(); schemaIt.hasNext();)
302		{
303			XMPNode currSchema = (XMPNode) schemaIt.next();
304			if (!currSchema.getHasAliases())
305			{
306				continue;
307			}
308
309			for (Iterator propertyIt = currSchema.iterateChildren(); propertyIt.hasNext();)
310			{
311				XMPNode currProp = (XMPNode) propertyIt.next();
312
313				if (!currProp.isAlias())
314				{
315					continue;
316				}
317
318				currProp.setAlias(false);
319
320				// Find the base path, look for the base schema and root node.
321				XMPAliasInfo info = XMPMetaFactory.getSchemaRegistry()
322						.findAlias(currProp.getName());
323				if (info != null)
324				{
325					// find or create schema
326					XMPNode baseSchema = XMPNodeUtils.findSchemaNode(tree, info
327							.getNamespace(), null, true);
328					baseSchema.setImplicit(false);
329
330					XMPNode baseNode = XMPNodeUtils
331							.findChildNode(baseSchema,
332								info.getPrefix() + info.getPropName(), false);
333					if (baseNode == null)
334					{
335						if (info.getAliasForm().isSimple())
336						{
337							// A top-to-top alias, transplant the property.
338							// change the alias property name to the base name
339							String qname = info.getPrefix() + info.getPropName();
340							currProp.setName(qname);
341							baseSchema.addChild(currProp);
342							// remove the alias property
343							propertyIt.remove();
344						}
345						else
346						{
347							// An alias to an array item,
348							// create the array and transplant the property.
349							baseNode = new XMPNode(info.getPrefix() + info.getPropName(), info
350									.getAliasForm().toPropertyOptions());
351							baseSchema.addChild(baseNode);
352							transplantArrayItemAlias (propertyIt, currProp, baseNode);
353						}
354
355					}
356					else if (info.getAliasForm().isSimple())
357					{
358						// The base node does exist and this is a top-to-top alias.
359						// Check for conflicts if strict aliasing is on.
360						// Remove and delete the alias subtree.
361						if (strictAliasing)
362						{
363							compareAliasedSubtrees (currProp, baseNode, true);
364						}
365
366						propertyIt.remove();
367					}
368					else
369					{
370						// This is an alias to an array item and the array exists.
371						// Look for the aliased item.
372						// Then transplant or check & delete as appropriate.
373
374						XMPNode  itemNode = null;
375						if (info.getAliasForm().isArrayAltText())
376						{
377							int xdIndex = XMPNodeUtils.lookupLanguageItem(baseNode,
378									XMPConst.X_DEFAULT);
379							if (xdIndex != -1)
380							{
381								itemNode = baseNode.getChild(xdIndex);
382							}
383						}
384						else if (baseNode.hasChildren())
385						{
386							itemNode = baseNode.getChild(1);
387						}
388
389						if (itemNode == null)
390						{
391							transplantArrayItemAlias (propertyIt, currProp, baseNode);
392						}
393						else
394						{
395							if (strictAliasing)
396							{
397								compareAliasedSubtrees (currProp, itemNode, true);
398							}
399
400							propertyIt.remove();
401						}
402					}
403				}
404			}
405			currSchema.setHasAliases(false);
406		}
407	}
408
409
410	/**
411	 * Moves an alias node of array form to another schema into an array
412	 * @param propertyIt the property iterator of the old schema (used to delete the property)
413	 * @param childNode the node to be moved
414	 * @param baseArray the base array for the array item
415	 * @throws XMPException Forwards XMP errors
416	 */
417	private static void transplantArrayItemAlias(Iterator propertyIt, XMPNode childNode,
418			XMPNode baseArray) throws XMPException
419	{
420		if (baseArray.getOptions().isArrayAltText())
421		{
422			if (childNode.getOptions().getHasLanguage())
423			{
424				throw new XMPException("Alias to x-default already has a language qualifier",
425						XMPError.BADXMP);
426			}
427
428			XMPNode langQual = new XMPNode(XMPConst.XML_LANG, XMPConst.X_DEFAULT, null);
429			childNode.addQualifier(langQual);
430		}
431
432		propertyIt.remove();
433		childNode.setName(XMPConst.ARRAY_ITEM_NAME);
434		baseArray.addChild(childNode);
435	}
436
437
438	/**
439	 * Fixes the GPS Timestamp in EXIF.
440	 * @param exifSchema the EXIF schema node
441	 * @throws XMPException Thrown if the date conversion fails.
442	 */
443	private static void fixGPSTimeStamp(XMPNode exifSchema)
444			throws XMPException
445	{
446		// Note: if dates are not found the convert-methods throws an exceptions,
447		// 		 and this methods returns.
448		XMPNode gpsDateTime = XMPNodeUtils.findChildNode(exifSchema, "exif:GPSTimeStamp", false);
449		if (gpsDateTime == null)
450		{
451			return;
452		}
453
454		try
455		{
456			XMPDateTime binGPSStamp;
457			XMPDateTime binOtherDate;
458
459			binGPSStamp = XMPUtils.convertToDate(gpsDateTime.getValue());
460			if (binGPSStamp.getYear() != 0  ||
461				binGPSStamp.getMonth() != 0  ||
462				binGPSStamp.getDay() != 0)
463			{
464				return;
465			}
466
467			XMPNode otherDate = XMPNodeUtils.findChildNode(exifSchema, "exif:DateTimeOriginal",
468					false);
469			if (otherDate == null)
470			{
471				otherDate = XMPNodeUtils.findChildNode(exifSchema, "exif:DateTimeDigitized", false);
472			}
473
474			binOtherDate = XMPUtils.convertToDate(otherDate.getValue());
475			Calendar cal = binGPSStamp.getCalendar();
476			cal.set(Calendar.YEAR, binOtherDate.getYear());
477			cal.set(Calendar.MONTH, binOtherDate.getMonth());
478			cal.set(Calendar.DAY_OF_MONTH, binOtherDate.getDay());
479			binGPSStamp = new XMPDateTimeImpl(cal);
480			gpsDateTime.setValue(XMPUtils.convertFromDate (binGPSStamp));
481		}
482		catch (XMPException e)
483		{
484			// Don't let a missing or bad date stop other things.
485			return;
486		}
487	}
488
489
490
491	/**
492	 * Remove all empty schemas from the metadata tree that were generated during the rdf parsing.
493	 * @param tree the root of the metadata tree
494	 */
495	private static void deleteEmptySchemas(XMPNode tree)
496	{
497		// Delete empty schema nodes. Do this last, other cleanup can make empty
498		// schema.
499
500		for (Iterator it = tree.iterateChildren(); it.hasNext();)
501		{
502			XMPNode schema = (XMPNode) it.next();
503			if (!schema.hasChildren())
504			{
505				it.remove();
506			}
507		}
508	}
509
510
511	/**
512	 * The outermost call is special. The names almost certainly differ. The
513	 * qualifiers (and hence options) will differ for an alias to the x-default
514	 * item of a langAlt array.
515	 *
516	 * @param aliasNode the alias node
517	 * @param baseNode the base node of the alias
518	 * @param outerCall marks the outer call of the recursion
519	 * @throws XMPException Forwards XMP errors
520	 */
521	private static void compareAliasedSubtrees(XMPNode aliasNode, XMPNode baseNode,
522			boolean outerCall) throws XMPException
523	{
524		if (!aliasNode.getValue().equals(baseNode.getValue())  ||
525			aliasNode.getChildrenLength() != baseNode.getChildrenLength())
526		{
527			throw new XMPException("Mismatch between alias and base nodes", XMPError.BADXMP);
528		}
529
530		if (
531				!outerCall  &&
532				(!aliasNode.getName().equals(baseNode.getName())  ||
533				 !aliasNode.getOptions().equals(baseNode.getOptions())  ||
534				 aliasNode.getQualifierLength() != baseNode.getQualifierLength())
535		   )
536	    {
537			throw new XMPException("Mismatch between alias and base nodes",
538				XMPError.BADXMP);
539		}
540
541		for (Iterator an = aliasNode.iterateChildren(),
542					  bn = baseNode.iterateChildren();
543			 an.hasNext() && bn.hasNext();)
544		{
545			XMPNode aliasChild = (XMPNode) an.next();
546			XMPNode baseChild =  (XMPNode) bn.next();
547			compareAliasedSubtrees (aliasChild, baseChild, false);
548		}
549
550
551		for (Iterator an = aliasNode.iterateQualifier(),
552					  bn = baseNode.iterateQualifier();
553			 an.hasNext() && bn.hasNext();)
554		{
555			XMPNode aliasQual = (XMPNode) an.next();
556			XMPNode baseQual =  (XMPNode) bn.next();
557			compareAliasedSubtrees (aliasQual, baseQual, false);
558		}
559	}
560
561
562	/**
563	 * The initial support for WAV files mapped a legacy ID3 audio copyright
564	 * into a new xmpDM:copyright property. This is special case code to migrate
565	 * that into dc:rights['x-default']. The rules:
566	 *
567	 * <pre>
568	 * 1. If there is no dc:rights array, or an empty array -
569	 *    Create one with dc:rights['x-default'] set from double linefeed and xmpDM:copyright.
570	 *
571	 * 2. If there is a dc:rights array but it has no x-default item -
572	 *    Create an x-default item as a copy of the first item then apply rule #3.
573	 *
574	 * 3. If there is a dc:rights array with an x-default item,
575	 *    Look for a double linefeed in the value.
576	 *     A. If no double linefeed, compare the x-default value to the xmpDM:copyright value.
577	 *         A1. If they match then leave the x-default value alone.
578	 *         A2. Otherwise, append a double linefeed and
579	 *             the xmpDM:copyright value to the x-default value.
580	 *     B. If there is a double linefeed, compare the trailing text to the xmpDM:copyright value.
581	 *         B1. If they match then leave the x-default value alone.
582	 *         B2. Otherwise, replace the trailing x-default text with the xmpDM:copyright value.
583	 *
584	 * 4. In all cases, delete the xmpDM:copyright property.
585	 * </pre>
586	 *
587	 * @param xmp the metadata object
588	 * @param dmCopyright the "dm:copyright"-property
589	 */
590	private static void	migrateAudioCopyright (XMPMeta xmp, XMPNode dmCopyright)
591	{
592		try
593		{
594			XMPNode dcSchema = XMPNodeUtils.findSchemaNode(
595				((XMPMetaImpl) xmp).getRoot(), XMPConst.NS_DC, true);
596
597			String dmValue = dmCopyright.getValue();
598			String doubleLF = "\n\n";
599
600			XMPNode dcRightsArray = XMPNodeUtils.findChildNode (dcSchema, "dc:rights", false);
601
602			if (dcRightsArray == null  ||  !dcRightsArray.hasChildren())
603			{
604				// 1. No dc:rights array, create from double linefeed and xmpDM:copyright.
605				dmValue = doubleLF + dmValue;
606				xmp.setLocalizedText(XMPConst.NS_DC, "rights", "", XMPConst.X_DEFAULT, dmValue,
607						null);
608			}
609			else
610			{
611				int xdIndex = XMPNodeUtils.lookupLanguageItem(dcRightsArray, XMPConst.X_DEFAULT);
612
613				if (xdIndex < 0)
614				{
615					// 2. No x-default item, create from the first item.
616					String firstValue = dcRightsArray.getChild(1).getValue();
617					xmp.setLocalizedText (XMPConst.NS_DC, "rights", "", XMPConst.X_DEFAULT,
618						firstValue, null);
619					xdIndex = XMPNodeUtils.lookupLanguageItem(dcRightsArray, XMPConst.X_DEFAULT);
620				}
621
622				// 3. Look for a double linefeed in the x-default value.
623				XMPNode defaultNode = dcRightsArray.getChild(xdIndex);
624				String defaultValue = defaultNode.getValue();
625				int lfPos = defaultValue.indexOf(doubleLF);
626
627				if (lfPos < 0)
628				{
629					// 3A. No double LF, compare whole values.
630					if (!dmValue.equals(defaultValue))
631					{
632						// 3A2. Append the xmpDM:copyright to the x-default
633						// item.
634						defaultNode.setValue(defaultValue + doubleLF + dmValue);
635					}
636				}
637				else
638				{
639					// 3B. Has double LF, compare the tail.
640					if (!defaultValue.substring(lfPos + 2).equals(dmValue))
641					{
642						// 3B2. Replace the x-default tail.
643						defaultNode.setValue(defaultValue.substring(0, lfPos + 2) + dmValue);
644					}
645				}
646
647			}
648
649			// 4. Get rid of the xmpDM:copyright.
650			dmCopyright.getParent().removeChild(dmCopyright);
651		}
652		catch (XMPException e)
653		{
654			// Don't let failures (like a bad dc:rights form) stop other
655			// cleanup.
656		}
657	}
658
659
660	/**
661	 * Initializes the map that contains the known arrays, that are fixed by
662	 * {@link XMPNormalizer#normalizeDCArrays(XMPNode)}.
663	 */
664	private static void initDCArrays()
665	{
666		dcArrayForms = new HashMap();
667
668		// Properties supposed to be a "Bag".
669		PropertyOptions bagForm = new PropertyOptions();
670		bagForm.setArray(true);
671		dcArrayForms.put("dc:contributor", bagForm);
672		dcArrayForms.put("dc:language", bagForm);
673		dcArrayForms.put("dc:publisher", bagForm);
674		dcArrayForms.put("dc:relation", bagForm);
675		dcArrayForms.put("dc:subject", bagForm);
676		dcArrayForms.put("dc:type", bagForm);
677
678		// Properties supposed to be a "Seq".
679		PropertyOptions seqForm = new PropertyOptions();
680		seqForm.setArray(true);
681		seqForm.setArrayOrdered(true);
682		dcArrayForms.put("dc:creator", seqForm);
683		dcArrayForms.put("dc:date", seqForm);
684
685		// Properties supposed to be an "Alt" in alternative-text form.
686		PropertyOptions altTextForm = new PropertyOptions();
687		altTextForm.setArray(true);
688		altTextForm.setArrayOrdered(true);
689		altTextForm.setArrayAlternate(true);
690		altTextForm.setArrayAltText(true);
691		dcArrayForms.put("dc:description", altTextForm);
692		dcArrayForms.put("dc:rights", altTextForm);
693		dcArrayForms.put("dc:title", altTextForm);
694	}
695}
696