1// =================================================================================================
2// ADOBE SYSTEMS INCORPORATED
3// Copyright 2006 Adobe Systems Incorporated
4// All Rights Reserved
5//
6// NOTICE:  Adobe permits you to use, modify, and distribute this file in accordance with the terms
7// of the Adobe license agreement accompanying it.
8// =================================================================================================
9
10package com.adobe.xmp.impl;
11
12import java.util.List;
13import java.util.ArrayList;
14import java.util.Iterator;
15
16import org.w3c.dom.Attr;
17import org.w3c.dom.NamedNodeMap;
18import org.w3c.dom.Node;
19
20import com.adobe.xmp.XMPConst;
21import com.adobe.xmp.XMPError;
22import com.adobe.xmp.XMPException;
23import com.adobe.xmp.XMPMetaFactory;
24import com.adobe.xmp.XMPSchemaRegistry;
25import com.adobe.xmp.options.PropertyOptions;
26
27
28/**
29 * Parser for "normal" XML serialisation of RDF.
30 *
31 * @since   14.07.2006
32 */
33public class ParseRDF implements XMPError, XMPConst
34{
35	/** */
36	public static final int RDFTERM_OTHER = 0;
37	/** Start of coreSyntaxTerms. */
38	public static final int RDFTERM_RDF = 1;
39	/** */
40	public static final int RDFTERM_ID = 2;
41	/** */
42	public static final int RDFTERM_ABOUT = 3;
43	/** */
44	public static final int RDFTERM_PARSE_TYPE = 4;
45	/** */
46	public static final int RDFTERM_RESOURCE = 5;
47	/** */
48	public static final int RDFTERM_NODE_ID = 6;
49	/** End of coreSyntaxTerms */
50	public static final int RDFTERM_DATATYPE = 7;
51	/** Start of additions for syntax Terms. */
52	public static final int RDFTERM_DESCRIPTION = 8;
53	/** End of of additions for syntaxTerms. */
54	public static final int RDFTERM_LI = 9;
55	/** Start of oldTerms. */
56	public static final int RDFTERM_ABOUT_EACH = 10;
57	/** */
58	public static final int RDFTERM_ABOUT_EACH_PREFIX = 11;
59	/** End of oldTerms. */
60	public static final int RDFTERM_BAG_ID = 12;
61	/** */
62	public static final int RDFTERM_FIRST_CORE = RDFTERM_RDF;
63	/** */
64	public static final int RDFTERM_LAST_CORE = RDFTERM_DATATYPE;
65	/** ! Yes, the syntax terms include the core terms. */
66	public static final int RDFTERM_FIRST_SYNTAX = RDFTERM_FIRST_CORE;
67	/** */
68	public static final int RDFTERM_LAST_SYNTAX = RDFTERM_LI;
69	/** */
70	public static final int RDFTERM_FIRST_OLD = RDFTERM_ABOUT_EACH;
71	/** */
72	public static final int RDFTERM_LAST_OLD = RDFTERM_BAG_ID;
73
74	/** this prefix is used for default namespaces */
75	public static final String DEFAULT_PREFIX = "_dflt";
76
77
78
79	/**
80	 * The main parsing method. The XML tree is walked through from the root node and and XMP tree
81	 * is created. This is a raw parse, the normalisation of the XMP tree happens outside.
82	 *
83	 * @param xmlRoot the XML root node
84	 * @return Returns an XMP metadata object (not normalized)
85	 * @throws XMPException Occurs if the parsing fails for any reason.
86	 */
87	static XMPMetaImpl parse(Node xmlRoot) throws XMPException
88	{
89		XMPMetaImpl xmp = new XMPMetaImpl();
90		rdf_RDF(xmp, xmlRoot);
91		return xmp;
92	}
93
94
95	/**
96	 * Each of these parsing methods is responsible for recognizing an RDF
97	 * syntax production and adding the appropriate structure to the XMP tree.
98	 * They simply return for success, failures will throw an exception.
99	 *
100	 * @param xmp the xmp metadata object that is generated
101	 * @param rdfRdfNode the top-level xml node
102	 * @throws XMPException thown on parsing errors
103	 */
104	static void rdf_RDF(XMPMetaImpl xmp, Node rdfRdfNode) throws XMPException
105	{
106		if (rdfRdfNode.hasAttributes())
107		{
108			rdf_NodeElementList (xmp, xmp.getRoot(), rdfRdfNode);
109		}
110		else
111		{
112			throw new XMPException("Invalid attributes of rdf:RDF element", BADRDF);
113		}
114	}
115
116
117	/**
118	 * 7.2.10 nodeElementList<br>
119	 * ws* ( nodeElement ws* )*
120	 *
121	 * Note: this method is only called from the rdf:RDF-node (top level)
122	 * @param xmp the xmp metadata object that is generated
123	 * @param xmpParent the parent xmp node
124	 * @param rdfRdfNode the top-level xml node
125	 * @throws XMPException thown on parsing errors
126	 */
127	private static void rdf_NodeElementList(XMPMetaImpl xmp, XMPNode xmpParent, Node rdfRdfNode)
128		throws XMPException
129	{
130		for (int i = 0; i < rdfRdfNode.getChildNodes().getLength(); i++)
131		{
132			Node child = rdfRdfNode.getChildNodes().item(i);
133			// filter whitespaces (and all text nodes)
134			if (!isWhitespaceNode(child))
135			{
136				rdf_NodeElement  (xmp, xmpParent, child, true);
137			}
138		}
139	}
140
141
142	/**
143 	 * 7.2.5 nodeElementURIs
144	 * 		anyURI - ( coreSyntaxTerms | rdf:li | oldTerms )
145	 *
146 	 * 7.2.11 nodeElement
147	 * 		start-element ( URI == nodeElementURIs,
148	 * 		attributes == set ( ( idAttr | nodeIdAttr | aboutAttr )?, propertyAttr* ) )
149	 * 		propertyEltList
150	 * 		end-element()
151	 *
152	 * A node element URI is rdf:Description or anything else that is not an RDF
153	 * term.
154	 *
155	 * @param xmp the xmp metadata object that is generated
156	 * @param xmpParent the parent xmp node
157	 * @param xmlNode the currently processed XML node
158	 * @param isTopLevel Flag if the node is a top-level node
159	 * @throws XMPException thown on parsing errors
160	 */
161	private static void rdf_NodeElement(XMPMetaImpl xmp, XMPNode xmpParent, Node xmlNode,
162			boolean isTopLevel) throws XMPException
163	{
164		int nodeTerm = getRDFTermKind (xmlNode);
165		if (nodeTerm != RDFTERM_DESCRIPTION  &&  nodeTerm != RDFTERM_OTHER)
166		{
167			throw new XMPException("Node element must be rdf:Description or typed node",
168				BADRDF);
169		}
170		else if (isTopLevel  &&  nodeTerm == RDFTERM_OTHER)
171		{
172			throw new XMPException("Top level typed node not allowed", BADXMP);
173		}
174		else
175		{
176			rdf_NodeElementAttrs (xmp, xmpParent, xmlNode, isTopLevel);
177			rdf_PropertyElementList (xmp, xmpParent, xmlNode, isTopLevel);
178		}
179
180	}
181
182
183	/**
184	 *
185	 * 7.2.7 propertyAttributeURIs
186	 * 		anyURI - ( coreSyntaxTerms | rdf:Description | rdf:li | oldTerms )
187	 *
188	 * 7.2.11 nodeElement
189	 * start-element ( URI == nodeElementURIs,
190	 * 					attributes == set ( ( idAttr | nodeIdAttr | aboutAttr )?, propertyAttr* ) )
191	 * 					propertyEltList
192	 * 					end-element()
193	 *
194	 * Process the attribute list for an RDF node element. A property attribute URI is
195	 * anything other than an RDF term. The rdf:ID and rdf:nodeID attributes are simply ignored,
196	 * as are rdf:about attributes on inner nodes.
197	 *
198	 * @param xmp the xmp metadata object that is generated
199	 * @param xmpParent the parent xmp node
200	 * @param xmlNode the currently processed XML node
201	 * @param isTopLevel Flag if the node is a top-level node
202	 * @throws XMPException thown on parsing errors
203	 */
204	private static void rdf_NodeElementAttrs(XMPMetaImpl xmp, XMPNode xmpParent, Node xmlNode,
205			boolean isTopLevel) throws XMPException
206	{
207		// Used to detect attributes that are mutually exclusive.
208		int exclusiveAttrs = 0;
209
210		for (int i = 0; i < xmlNode.getAttributes().getLength(); i++)
211		{
212			Node attribute = xmlNode.getAttributes().item(i);
213
214			// quick hack, ns declarations do not appear in C++
215			// ignore "ID" without namespace
216			if ("xmlns".equals(attribute.getPrefix())  ||
217				(attribute.getPrefix() == null  &&  "xmlns".equals(attribute.getNodeName())))
218			{
219				continue;
220			}
221
222			int attrTerm = getRDFTermKind(attribute);
223
224			switch (attrTerm)
225			{
226				case RDFTERM_ID:
227				case RDFTERM_NODE_ID:
228				case RDFTERM_ABOUT:
229					if (exclusiveAttrs > 0)
230					{
231						throw new XMPException("Mutally exclusive about, ID, nodeID attributes",
232								BADRDF);
233					}
234
235					exclusiveAttrs++;
236
237					if (isTopLevel && (attrTerm == RDFTERM_ABOUT))
238					{
239						// This is the rdf:about attribute on a top level node. Set
240						// the XMP tree name if
241						// it doesn't have a name yet. Make sure this name matches
242						// the XMP tree name.
243						if (xmpParent.getName() != null && xmpParent.getName().length() > 0)
244						{
245							if (!xmpParent.getName().equals(attribute.getNodeValue()))
246							{
247								throw new XMPException("Mismatched top level rdf:about values",
248										BADXMP);
249							}
250						}
251						else
252						{
253							xmpParent.setName(attribute.getNodeValue());
254						}
255					}
256					break;
257
258				case RDFTERM_OTHER:
259					addChildNode(xmp, xmpParent, attribute, attribute.getNodeValue(), isTopLevel);
260					break;
261
262				default:
263					throw new XMPException("Invalid nodeElement attribute", BADRDF);
264			}
265
266		}
267	}
268
269
270	/**
271	 * 7.2.13 propertyEltList
272	 * ws* ( propertyElt ws* )*
273	 *
274	 * @param xmp the xmp metadata object that is generated
275	 * @param xmpParent the parent xmp node
276	 * @param xmlParent the currently processed XML node
277	 * @param isTopLevel Flag if the node is a top-level node
278	 * @throws XMPException thown on parsing errors
279	 */
280	private static void rdf_PropertyElementList(XMPMetaImpl xmp, XMPNode xmpParent, Node xmlParent,
281			boolean isTopLevel) throws XMPException
282	{
283		for (int i = 0; i < xmlParent.getChildNodes().getLength(); i++)
284		{
285			Node currChild = xmlParent.getChildNodes().item(i);
286			if (isWhitespaceNode(currChild))
287			{
288				continue;
289			}
290			else if (currChild.getNodeType() != Node.ELEMENT_NODE)
291			{
292				throw new XMPException("Expected property element node not found", BADRDF);
293			}
294			else
295			{
296				rdf_PropertyElement(xmp, xmpParent, currChild, isTopLevel);
297			}
298		}
299	}
300
301
302	/**
303	 * 7.2.14 propertyElt
304	 *
305	 *		resourcePropertyElt | literalPropertyElt | parseTypeLiteralPropertyElt |
306	 *		parseTypeResourcePropertyElt | parseTypeCollectionPropertyElt |
307	 *		parseTypeOtherPropertyElt | emptyPropertyElt
308	 *
309	 * 7.2.15 resourcePropertyElt
310	 *		start-element ( URI == propertyElementURIs, attributes == set ( idAttr? ) )
311	 *		ws* nodeElement ws*
312	 *		end-element()
313	 *
314	 * 7.2.16 literalPropertyElt
315	 *		start-element (
316	 *			URI == propertyElementURIs, attributes == set ( idAttr?, datatypeAttr?) )
317	 *		text()
318	 *		end-element()
319	 *
320	 * 7.2.17 parseTypeLiteralPropertyElt
321	 *		start-element (
322	 *			URI == propertyElementURIs, attributes == set ( idAttr?, parseLiteral ) )
323	 *		literal
324	 *		end-element()
325	 *
326	 * 7.2.18 parseTypeResourcePropertyElt
327	 *		start-element (
328	 *			 URI == propertyElementURIs, attributes == set ( idAttr?, parseResource ) )
329	 *		propertyEltList
330	 *		end-element()
331	 *
332	 * 7.2.19 parseTypeCollectionPropertyElt
333	 *		start-element (
334	 *			URI == propertyElementURIs, attributes == set ( idAttr?, parseCollection ) )
335	 *		nodeElementList
336	 *		end-element()
337	 *
338	 * 7.2.20 parseTypeOtherPropertyElt
339	 *		start-element ( URI == propertyElementURIs, attributes == set ( idAttr?, parseOther ) )
340	 *		propertyEltList
341	 *		end-element()
342	 *
343	 * 7.2.21 emptyPropertyElt
344	 *		start-element ( URI == propertyElementURIs,
345	 *			attributes == set ( idAttr?, ( resourceAttr | nodeIdAttr )?, propertyAttr* ) )
346	 *		end-element()
347	 *
348	 * The various property element forms are not distinguished by the XML element name,
349	 * but by their attributes for the most part. The exceptions are resourcePropertyElt and
350	 * literalPropertyElt. They are distinguished by their XML element content.
351	 *
352	 * NOTE: The RDF syntax does not explicitly include the xml:lang attribute although it can
353	 * appear in many of these. We have to allow for it in the attibute counts below.
354	 *
355	 * @param xmp the xmp metadata object that is generated
356	 * @param xmpParent the parent xmp node
357	 * @param xmlNode the currently processed XML node
358	 * @param isTopLevel Flag if the node is a top-level node
359	 * @throws XMPException thown on parsing errors
360	 */
361	private static void rdf_PropertyElement(XMPMetaImpl xmp, XMPNode xmpParent, Node xmlNode,
362			boolean isTopLevel) throws XMPException
363	{
364		int nodeTerm = getRDFTermKind (xmlNode);
365		if (!isPropertyElementName(nodeTerm))
366		{
367			throw new XMPException("Invalid property element name", BADRDF);
368		}
369
370		// remove the namespace-definitions from the list
371		NamedNodeMap attributes = xmlNode.getAttributes();
372		List nsAttrs = null;
373		for (int i = 0; i < attributes.getLength(); i++)
374		{
375			Node attribute = attributes.item(i);
376			if ("xmlns".equals(attribute.getPrefix())  ||
377				(attribute.getPrefix() == null  &&  "xmlns".equals(attribute.getNodeName())))
378			{
379				if (nsAttrs == null)
380				{
381					nsAttrs = new ArrayList();
382				}
383				nsAttrs.add(attribute.getNodeName());
384			}
385		}
386		if (nsAttrs != null)
387		{
388			for (Iterator it = nsAttrs.iterator(); it.hasNext();)
389			{
390				String ns = (String) it.next();
391				attributes.removeNamedItem(ns);
392			}
393		}
394
395
396		if (attributes.getLength() > 3)
397		{
398			// Only an emptyPropertyElt can have more than 3 attributes.
399			rdf_EmptyPropertyElement(xmp, xmpParent, xmlNode, isTopLevel);
400		}
401		else
402		{
403			// Look through the attributes for one that isn't rdf:ID or xml:lang,
404			// it will usually tell what we should be dealing with.
405			// The called routines must verify their specific syntax!
406
407			for (int i = 0; i < attributes.getLength(); i++)
408			{
409				Node attribute = attributes.item(i);
410				String attrLocal = attribute.getLocalName();
411				String attrNS = attribute.getNamespaceURI();
412				String attrValue = attribute.getNodeValue();
413				if (!(XML_LANG.equals(attribute.getNodeName())  &&
414					!("ID".equals(attrLocal)  &&  NS_RDF.equals(attrNS))))
415				{
416					if ("datatype".equals(attrLocal)  &&  NS_RDF.equals(attrNS))
417					{
418						rdf_LiteralPropertyElement (xmp, xmpParent, xmlNode, isTopLevel);
419					}
420					else if (!("parseType".equals(attrLocal)  &&  NS_RDF.equals(attrNS)))
421					{
422						rdf_EmptyPropertyElement (xmp, xmpParent, xmlNode, isTopLevel);
423					}
424					else if ("Literal".equals(attrValue))
425					{
426						rdf_ParseTypeLiteralPropertyElement();
427					}
428					else if ("Resource".equals(attrValue))
429					{
430						rdf_ParseTypeResourcePropertyElement(xmp, xmpParent, xmlNode, isTopLevel);
431					}
432					else if ("Collection".equals(attrValue))
433					{
434						rdf_ParseTypeCollectionPropertyElement();
435					}
436					else
437					{
438						rdf_ParseTypeOtherPropertyElement();
439					}
440
441					return;
442				}
443			}
444
445			// Only rdf:ID and xml:lang, could be a resourcePropertyElt, a literalPropertyElt,
446			// or an emptyPropertyElt. Look at the child XML nodes to decide which.
447
448			if (xmlNode.hasChildNodes())
449			{
450				for (int i = 0; i < xmlNode.getChildNodes().getLength(); i++)
451				{
452					Node currChild = xmlNode.getChildNodes().item(i);
453					if (currChild.getNodeType() != Node.TEXT_NODE)
454					{
455						rdf_ResourcePropertyElement (xmp, xmpParent, xmlNode, isTopLevel);
456						return;
457					}
458				}
459
460				rdf_LiteralPropertyElement (xmp, xmpParent, xmlNode, isTopLevel);
461			}
462			else
463			{
464				rdf_EmptyPropertyElement (xmp, xmpParent, xmlNode, isTopLevel);
465			}
466		}
467	}
468
469
470	/**
471	 * 7.2.15 resourcePropertyElt
472	 *		start-element ( URI == propertyElementURIs, attributes == set ( idAttr? ) )
473	 *		ws* nodeElement ws*
474	 *		end-element()
475	 *
476	 * This handles structs using an rdf:Description node,
477	 * arrays using rdf:Bag/Seq/Alt, and typedNodes. It also catches and cleans up qualified
478	 * properties written with rdf:Description and rdf:value.
479	 *
480	 * @param xmp the xmp metadata object that is generated
481	 * @param xmpParent the parent xmp node
482	 * @param xmlNode the currently processed XML node
483	 * @param isTopLevel Flag if the node is a top-level node
484	 * @throws XMPException thown on parsing errors
485	 */
486	private static void rdf_ResourcePropertyElement(XMPMetaImpl xmp, XMPNode xmpParent,
487			Node xmlNode, boolean isTopLevel) throws XMPException
488	{
489		if (isTopLevel  &&  "iX:changes".equals(xmlNode.getNodeName()))
490		{
491			// Strip old "punchcard" chaff which has on the prefix "iX:".
492			return;
493		}
494
495		XMPNode newCompound = addChildNode(xmp, xmpParent, xmlNode, "", isTopLevel);
496
497		// walk through the attributes
498		for (int i = 0; i < xmlNode.getAttributes().getLength(); i++)
499		{
500			Node attribute = xmlNode.getAttributes().item(i);
501			if ("xmlns".equals(attribute.getPrefix())  ||
502					(attribute.getPrefix() == null  &&  "xmlns".equals(attribute.getNodeName())))
503			{
504				continue;
505			}
506
507			String attrLocal = attribute.getLocalName();
508			String attrNS = attribute.getNamespaceURI();
509			if (XML_LANG.equals(attribute.getNodeName()))
510			{
511				addQualifierNode (newCompound, XML_LANG, attribute.getNodeValue());
512			}
513			else if ("ID".equals(attrLocal)  &&  NS_RDF.equals(attrNS))
514			{
515				continue;	// Ignore all rdf:ID attributes.
516			}
517			else
518			{
519				throw new XMPException(
520					"Invalid attribute for resource property element", BADRDF);
521			}
522		}
523
524		// walk through the children
525
526		Node currChild = null;
527		boolean found = false;
528		int i;
529		for (i = 0; i < xmlNode.getChildNodes().getLength(); i++)
530		{
531			currChild = xmlNode.getChildNodes().item(i);
532			if (!isWhitespaceNode(currChild))
533			{
534				if (currChild.getNodeType() == Node.ELEMENT_NODE  &&  !found)
535				{
536					boolean isRDF = NS_RDF.equals(currChild.getNamespaceURI());
537					String childLocal = currChild.getLocalName();
538
539					if (isRDF  &&  "Bag".equals(childLocal))
540					{
541						newCompound.getOptions().setArray(true);
542					}
543					else if (isRDF  &&  "Seq".equals(childLocal))
544					{
545						newCompound.getOptions().setArray(true).setArrayOrdered(true);
546					}
547					else if (isRDF  &&  "Alt".equals(childLocal))
548					{
549						newCompound.getOptions().setArray(true).setArrayOrdered(true)
550								.setArrayAlternate(true);
551					}
552					else
553					{
554						newCompound.getOptions().setStruct(true);
555						if (!isRDF  &&  !"Description".equals(childLocal))
556						{
557							String typeName = currChild.getNamespaceURI();
558							if (typeName == null)
559							{
560								throw new XMPException(
561										"All XML elements must be in a namespace", BADXMP);
562							}
563							typeName += ':' + childLocal;
564							addQualifierNode (newCompound, "rdf:type", typeName);
565						}
566					}
567
568					rdf_NodeElement (xmp, newCompound, currChild, false);
569
570					if (newCompound.getHasValueChild())
571					{
572						fixupQualifiedNode (newCompound);
573					}
574					else if (newCompound.getOptions().isArrayAlternate())
575					{
576						XMPNodeUtils.detectAltText(newCompound);
577					}
578
579					found = true;
580				}
581				else if (found)
582				{
583					// found second child element
584					throw new XMPException(
585						"Invalid child of resource property element", BADRDF);
586				}
587				else
588				{
589					throw new XMPException(
590						"Children of resource property element must be XML elements", BADRDF);
591				}
592			}
593		}
594
595		if (!found)
596		{
597			// didn't found any child elements
598			throw new XMPException("Missing child of resource property element", BADRDF);
599		}
600	}
601
602
603	/**
604	 * 7.2.16 literalPropertyElt
605	 *		start-element ( URI == propertyElementURIs,
606	 *				attributes == set ( idAttr?, datatypeAttr?) )
607	 *		text()
608	 *		end-element()
609	 *
610	 * Add a leaf node with the text value and qualifiers for the attributes.
611	 * @param xmp the xmp metadata object that is generated
612	 * @param xmpParent the parent xmp node
613	 * @param xmlNode the currently processed XML node
614	 * @param isTopLevel Flag if the node is a top-level node
615	 * @throws XMPException thown on parsing errors
616	 */
617	private static void rdf_LiteralPropertyElement(XMPMetaImpl xmp, XMPNode xmpParent,
618			Node xmlNode, boolean isTopLevel) throws XMPException
619	{
620		XMPNode newChild = addChildNode (xmp, xmpParent, xmlNode, null, isTopLevel);
621
622		for (int i = 0; i < xmlNode.getAttributes().getLength(); i++)
623		{
624			Node attribute = xmlNode.getAttributes().item(i);
625			if ("xmlns".equals(attribute.getPrefix())  ||
626					(attribute.getPrefix() == null  &&  "xmlns".equals(attribute.getNodeName())))
627			{
628				continue;
629			}
630
631			String attrNS = attribute.getNamespaceURI();
632			String attrLocal = attribute.getLocalName();
633			if (XML_LANG.equals(attribute.getNodeName()))
634			{
635				addQualifierNode(newChild, XML_LANG, attribute.getNodeValue());
636			}
637			else if (NS_RDF.equals(attrNS)  &&
638					 ("ID".equals(attrLocal)  ||  "datatype".equals(attrLocal)))
639			{
640				continue;	// Ignore all rdf:ID and rdf:datatype attributes.
641			}
642			else
643			{
644				throw new XMPException(
645					"Invalid attribute for literal property element", BADRDF);
646			}
647		}
648		String textValue = "";
649		for (int i = 0; i < xmlNode.getChildNodes().getLength(); i++)
650		{
651			Node child = xmlNode.getChildNodes().item(i);
652			if (child.getNodeType() == Node.TEXT_NODE)
653			{
654				textValue += child.getNodeValue();
655			}
656			else
657			{
658				throw new XMPException("Invalid child of literal property element", BADRDF);
659			}
660		}
661		newChild.setValue(textValue);
662	}
663
664
665	/**
666	 * 7.2.17 parseTypeLiteralPropertyElt
667	 *		start-element ( URI == propertyElementURIs,
668	 *			attributes == set ( idAttr?, parseLiteral ) )
669	 *		literal
670	 *		end-element()
671	 *
672	 * @throws XMPException thown on parsing errors
673	 */
674	private static void rdf_ParseTypeLiteralPropertyElement() throws XMPException
675	{
676		throw new XMPException("ParseTypeLiteral property element not allowed", BADXMP);
677	}
678
679
680	/**
681	 * 7.2.18 parseTypeResourcePropertyElt
682	 *		start-element ( URI == propertyElementURIs,
683	 *			attributes == set ( idAttr?, parseResource ) )
684	 *		propertyEltList
685	 *		end-element()
686	 *
687	 * Add a new struct node with a qualifier for the possible rdf:ID attribute.
688	 * Then process the XML child nodes to get the struct fields.
689	 *
690	 * @param xmp the xmp metadata object that is generated
691	 * @param xmpParent the parent xmp node
692	 * @param xmlNode the currently processed XML node
693	 * @param isTopLevel Flag if the node is a top-level node
694	 * @throws XMPException thown on parsing errors
695	 */
696	private static void rdf_ParseTypeResourcePropertyElement(XMPMetaImpl xmp, XMPNode xmpParent,
697			Node xmlNode, boolean isTopLevel) throws XMPException
698	{
699		XMPNode newStruct = addChildNode (xmp, xmpParent, xmlNode, "", isTopLevel);
700
701		newStruct.getOptions().setStruct(true);
702
703		for (int i = 0; i < xmlNode.getAttributes().getLength(); i++)
704		{
705			Node attribute = xmlNode.getAttributes().item(i);
706			if ("xmlns".equals(attribute.getPrefix())  ||
707					(attribute.getPrefix() == null  &&  "xmlns".equals(attribute.getNodeName())))
708			{
709				continue;
710			}
711
712			String attrLocal = attribute.getLocalName();
713			String attrNS = attribute.getNamespaceURI();
714			if (XML_LANG.equals(attribute.getNodeName()))
715			{
716				addQualifierNode (newStruct, XML_LANG, attribute.getNodeValue());
717			}
718			else if (NS_RDF.equals(attrNS)  &&
719					 ("ID".equals(attrLocal)  ||  "parseType".equals(attrLocal)))
720			{
721				continue;	// The caller ensured the value is "Resource".
722							// Ignore all rdf:ID attributes.
723			}
724			else
725			{
726				throw new XMPException("Invalid attribute for ParseTypeResource property element",
727						BADRDF);
728			}
729		}
730
731		rdf_PropertyElementList (xmp, newStruct, xmlNode, false);
732
733		if (newStruct.getHasValueChild())
734		{
735			fixupQualifiedNode (newStruct);
736		}
737	}
738
739
740	/**
741	 * 7.2.19 parseTypeCollectionPropertyElt
742	 *		start-element ( URI == propertyElementURIs,
743	 *			attributes == set ( idAttr?, parseCollection ) )
744	 *		nodeElementList
745	 *		end-element()
746	 *
747	 * @throws XMPException thown on parsing errors
748	 */
749	private static void rdf_ParseTypeCollectionPropertyElement() throws XMPException
750	{
751		throw new XMPException("ParseTypeCollection property element not allowed", BADXMP);
752	}
753
754
755	/**
756	 * 7.2.20 parseTypeOtherPropertyElt
757	 *		start-element ( URI == propertyElementURIs, attributes == set ( idAttr?, parseOther ) )
758	 *		propertyEltList
759	 *		end-element()
760	 *
761	 * @throws XMPException thown on parsing errors
762	 */
763	private static void rdf_ParseTypeOtherPropertyElement() throws XMPException
764	{
765		throw new XMPException("ParseTypeOther property element not allowed", BADXMP);
766	}
767
768
769	/**
770	 * 7.2.21 emptyPropertyElt
771	 *		start-element ( URI == propertyElementURIs,
772	 *						attributes == set (
773	 *							idAttr?, ( resourceAttr | nodeIdAttr )?, propertyAttr* ) )
774	 *		end-element()
775	 *
776	 *	<ns:Prop1/>  <!-- a simple property with an empty value -->
777	 *	<ns:Prop2 rdf:resource="http: *www.adobe.com/"/> <!-- a URI value -->
778	 *	<ns:Prop3 rdf:value="..." ns:Qual="..."/> <!-- a simple qualified property -->
779	 *	<ns:Prop4 ns:Field1="..." ns:Field2="..."/> <!-- a struct with simple fields -->
780	 *
781	 * An emptyPropertyElt is an element with no contained content, just a possibly empty set of
782	 * attributes. An emptyPropertyElt can represent three special cases of simple XMP properties: a
783	 * simple property with an empty value (ns:Prop1), a simple property whose value is a URI
784	 * (ns:Prop2), or a simple property with simple qualifiers (ns:Prop3).
785	 * An emptyPropertyElt can also represent an XMP struct whose fields are all simple and
786	 * unqualified (ns:Prop4).
787	 *
788	 * It is an error to use both rdf:value and rdf:resource - that can lead to invalid  RDF in the
789	 * verbose form written using a literalPropertyElt.
790	 *
791	 * The XMP mapping for an emptyPropertyElt is a bit different from generic RDF, partly for
792	 * design reasons and partly for historical reasons. The XMP mapping rules are:
793	 * <ol>
794	 *		<li> If there is an rdf:value attribute then this is a simple property
795	 *				 with a text value.
796	 *		All other attributes are qualifiers.
797	 *		<li> If there is an rdf:resource attribute then this is a simple property
798	 *			with a URI value.
799	 *		All other attributes are qualifiers.
800	 *		<li> If there are no attributes other than xml:lang, rdf:ID, or rdf:nodeID
801	 *				then this is a simple
802	 *		property with an empty value.
803	 *		<li> Otherwise this is a struct, the attributes other than xml:lang, rdf:ID,
804	 *				or rdf:nodeID are fields.
805	 * </ol>
806	 *
807	 * @param xmp the xmp metadata object that is generated
808	 * @param xmpParent the parent xmp node
809	 * @param xmlNode the currently processed XML node
810	 * @param isTopLevel Flag if the node is a top-level node
811	 * @throws XMPException thown on parsing errors
812	 */
813	private static void rdf_EmptyPropertyElement(XMPMetaImpl xmp, XMPNode xmpParent, Node xmlNode,
814			boolean isTopLevel) throws XMPException
815	{
816		boolean hasPropertyAttrs = false;
817		boolean hasResourceAttr = false;
818		boolean hasNodeIDAttr = false;
819		boolean hasValueAttr = false;
820
821		Node valueNode = null;	// ! Can come from rdf:value or rdf:resource.
822
823		if (xmlNode.hasChildNodes())
824		{
825			throw new XMPException(
826					"Nested content not allowed with rdf:resource or property attributes",
827					BADRDF);
828		}
829
830		// First figure out what XMP this maps to and remember the XML node for a simple value.
831		for (int i = 0; i < xmlNode.getAttributes().getLength(); i++)
832		{
833			Node attribute = xmlNode.getAttributes().item(i);
834			if ("xmlns".equals(attribute.getPrefix())  ||
835					(attribute.getPrefix() == null  &&  "xmlns".equals(attribute.getNodeName())))
836			{
837				continue;
838			}
839
840			int attrTerm = getRDFTermKind (attribute);
841
842			switch (attrTerm)
843			{
844				case RDFTERM_ID :
845					// Nothing to do.
846					break;
847
848				case RDFTERM_RESOURCE :
849					if (hasNodeIDAttr)
850					{
851						throw new XMPException(
852							"Empty property element can't have both rdf:resource and rdf:nodeID",
853							BADRDF);
854					}
855					else if (hasValueAttr)
856					{
857						throw new XMPException(
858								"Empty property element can't have both rdf:value and rdf:resource",
859								BADXMP);
860					}
861
862					hasResourceAttr = true;
863					if (!hasValueAttr)
864					{
865						valueNode = attribute;
866					}
867					break;
868
869				case RDFTERM_NODE_ID:
870				if (hasResourceAttr)
871				{
872					throw new XMPException(
873							"Empty property element can't have both rdf:resource and rdf:nodeID",
874							BADRDF);
875				}
876				hasNodeIDAttr = true;
877				break;
878
879			case RDFTERM_OTHER:
880				if ("value".equals(attribute.getLocalName())
881						&& NS_RDF.equals(attribute.getNamespaceURI()))
882				{
883					if (hasResourceAttr)
884					{
885						throw new XMPException(
886								"Empty property element can't have both rdf:value and rdf:resource",
887								BADXMP);
888					}
889					hasValueAttr = true;
890					valueNode = attribute;
891				}
892				else if (!XML_LANG.equals(attribute.getNodeName()))
893				{
894					hasPropertyAttrs = true;
895				}
896				break;
897
898			default:
899				throw new XMPException("Unrecognized attribute of empty property element",
900						BADRDF);
901			}
902		}
903
904		// Create the right kind of child node and visit the attributes again
905		// to add the fields or qualifiers.
906		// ! Because of implementation vagaries,
907		//   the xmpParent is the tree root for top level properties.
908		// ! The schema is found, created if necessary, by addChildNode.
909
910		XMPNode childNode = addChildNode(xmp, xmpParent, xmlNode, "", isTopLevel);
911		boolean childIsStruct = false;
912
913		if (hasValueAttr || hasResourceAttr)
914		{
915			childNode.setValue(valueNode != null ? valueNode.getNodeValue() : "");
916			if (!hasValueAttr)
917			{
918				// ! Might have both rdf:value and rdf:resource.
919				childNode.getOptions().setURI(true);
920			}
921		}
922		else if (hasPropertyAttrs)
923		{
924			childNode.getOptions().setStruct(true);
925			childIsStruct = true;
926		}
927
928		for (int i = 0; i < xmlNode.getAttributes().getLength(); i++)
929		{
930			Node attribute = xmlNode.getAttributes().item(i);
931			if (attribute == valueNode  ||
932				"xmlns".equals(attribute.getPrefix())  ||
933				(attribute.getPrefix() == null  &&  "xmlns".equals(attribute.getNodeName())))
934			{
935				continue;	// Skip the rdf:value or rdf:resource attribute holding the value.
936			}
937
938			int attrTerm = getRDFTermKind (attribute);
939
940			switch (attrTerm)
941			{
942				case RDFTERM_ID :
943				case RDFTERM_NODE_ID :
944					break;	// Ignore all rdf:ID and rdf:nodeID attributes.
945
946				case RDFTERM_RESOURCE :
947					addQualifierNode(childNode, "rdf:resource", attribute.getNodeValue());
948					break;
949
950				case RDFTERM_OTHER :
951					if (!childIsStruct)
952					{
953						addQualifierNode(
954							childNode, attribute.getNodeName(), attribute.getNodeValue());
955					}
956					else if (XML_LANG.equals(attribute.getNodeName()))
957					{
958						addQualifierNode (childNode, XML_LANG, attribute.getNodeValue());
959					}
960					else
961					{
962						addChildNode (xmp, childNode, attribute, attribute.getNodeValue(), false);
963					}
964					break;
965
966				default :
967					throw new XMPException("Unrecognized attribute of empty property element",
968						BADRDF);
969			}
970
971		}
972	}
973
974
975	/**
976	 * Adds a child node.
977	 *
978	 * @param xmp the xmp metadata object that is generated
979	 * @param xmpParent the parent xmp node
980	 * @param xmlNode the currently processed XML node
981	 * @param value Node value
982	 * @param isTopLevel Flag if the node is a top-level node
983	 * @return Returns the newly created child node.
984	 * @throws XMPException thown on parsing errors
985	 */
986	private static XMPNode addChildNode(XMPMetaImpl xmp, XMPNode xmpParent, Node xmlNode,
987			String value, boolean isTopLevel) throws XMPException
988	{
989		XMPSchemaRegistry registry = XMPMetaFactory.getSchemaRegistry();
990		String namespace = xmlNode.getNamespaceURI();
991		String childName;
992		if (namespace != null)
993		{
994			if (NS_DC_DEPRECATED.equals(namespace))
995			{
996				// Fix a legacy DC namespace
997				namespace = NS_DC;
998			}
999
1000			String prefix = registry.getNamespacePrefix(namespace);
1001			if (prefix == null)
1002			{
1003				prefix = xmlNode.getPrefix() != null ? xmlNode.getPrefix() : DEFAULT_PREFIX;
1004				prefix = registry.registerNamespace(namespace, prefix);
1005			}
1006			childName = prefix + xmlNode.getLocalName();
1007		}
1008		else
1009		{
1010			throw new XMPException(
1011				"XML namespace required for all elements and attributes", BADRDF);
1012		}
1013
1014
1015		// create schema node if not already there
1016		PropertyOptions childOptions = new PropertyOptions();
1017		boolean isAlias = false;
1018		if (isTopLevel)
1019		{
1020			// Lookup the schema node, adjust the XMP parent pointer.
1021			// Incoming parent must be the tree root.
1022			XMPNode schemaNode = XMPNodeUtils.findSchemaNode(xmp.getRoot(), namespace,
1023				DEFAULT_PREFIX, true);
1024			schemaNode.setImplicit(false);	// Clear the implicit node bit.
1025			// need runtime check for proper 32 bit code.
1026			xmpParent = schemaNode;
1027
1028			// If this is an alias set the alias flag in the node
1029			// and the hasAliases flag in the tree.
1030			if (registry.findAlias(childName) != null)
1031			{
1032				isAlias = true;
1033				xmp.getRoot().setHasAliases(true);
1034				schemaNode.setHasAliases(true);
1035			}
1036		}
1037
1038
1039		// Make sure that this is not a duplicate of a named node.
1040		boolean isArrayItem  = "rdf:li".equals(childName);
1041		boolean isValueNode  = "rdf:value".equals(childName);
1042
1043		// Create XMP node and so some checks
1044		XMPNode newChild = new XMPNode(
1045			childName, value, childOptions);
1046		newChild.setAlias(isAlias);
1047
1048		// Add the new child to the XMP parent node, a value node first.
1049		if (!isValueNode)
1050		{
1051			xmpParent.addChild(newChild);
1052		}
1053		else
1054		{
1055			xmpParent.addChild(1, newChild);
1056		}
1057
1058
1059		if (isValueNode)
1060		{
1061			if (isTopLevel  ||  !xmpParent.getOptions().isStruct())
1062			{
1063				throw new XMPException("Misplaced rdf:value element", BADRDF);
1064			}
1065			xmpParent.setHasValueChild(true);
1066		}
1067
1068		if (isArrayItem)
1069		{
1070			if (!xmpParent.getOptions().isArray())
1071			{
1072				throw new XMPException("Misplaced rdf:li element", BADRDF);
1073			}
1074			newChild.setName(ARRAY_ITEM_NAME);
1075		}
1076
1077		return newChild;
1078	}
1079
1080
1081	/**
1082	 * Adds a qualifier node.
1083	 *
1084	 * @param xmpParent the parent xmp node
1085	 * @param name the name of the qualifier which has to be
1086	 * 		QName including the <b>default prefix</b>
1087	 * @param value the value of the qualifier
1088	 * @return Returns the newly created child node.
1089	 * @throws XMPException thown on parsing errors
1090	 */
1091	private static XMPNode addQualifierNode(XMPNode xmpParent, String name, String value)
1092			throws XMPException
1093	{
1094		boolean isLang = XML_LANG.equals(name);
1095
1096		XMPNode newQual = null;
1097
1098		// normalize value of language qualifiers
1099		newQual = new XMPNode(name, isLang ? Utils.normalizeLangValue(value) : value, null);
1100		xmpParent.addQualifier(newQual);
1101
1102		return newQual;
1103	}
1104
1105
1106	/**
1107	 * The parent is an RDF pseudo-struct containing an rdf:value field. Fix the
1108	 * XMP data model. The rdf:value node must be the first child, the other
1109	 * children are qualifiers. The form, value, and children of the rdf:value
1110	 * node are the real ones. The rdf:value node's qualifiers must be added to
1111	 * the others.
1112	 *
1113	 * @param xmpParent the parent xmp node
1114	 * @throws XMPException thown on parsing errors
1115	 */
1116	private static void fixupQualifiedNode(XMPNode xmpParent) throws XMPException
1117	{
1118		assert xmpParent.getOptions().isStruct()  &&  xmpParent.hasChildren();
1119
1120		XMPNode valueNode = xmpParent.getChild(1);
1121		assert "rdf:value".equals(valueNode.getName());
1122
1123		// Move the qualifiers on the value node to the parent.
1124		// Make sure an xml:lang qualifier stays at the front.
1125		// Check for duplicate names between the value node's qualifiers and the parent's children.
1126		// The parent's children are about to become qualifiers. Check here, between the groups.
1127		// Intra-group duplicates are caught by XMPNode#addChild(...).
1128		if (valueNode.getOptions().getHasLanguage())
1129		{
1130			if (xmpParent.getOptions().getHasLanguage())
1131			{
1132				throw new XMPException("Redundant xml:lang for rdf:value element",
1133					BADXMP);
1134			}
1135			XMPNode langQual = valueNode.getQualifier(1);
1136			valueNode.removeQualifier(langQual);
1137			xmpParent.addQualifier(langQual);
1138		}
1139
1140		// Start the remaining copy after the xml:lang qualifier.
1141		for (int i = 1; i <= valueNode.getQualifierLength(); i++)
1142		{
1143			XMPNode qualifier = valueNode.getQualifier(i);
1144			xmpParent.addQualifier(qualifier);
1145		}
1146
1147
1148		// Change the parent's other children into qualifiers.
1149		// This loop starts at 1, child 0 is the rdf:value node.
1150		for (int i = 2; i <= xmpParent.getChildrenLength(); i++)
1151		{
1152			XMPNode qualifier = xmpParent.getChild(i);
1153			xmpParent.addQualifier(qualifier);
1154		}
1155
1156		// Move the options and value last, other checks need the parent's original options.
1157		// Move the value node's children to be the parent's children.
1158		assert xmpParent.getOptions().isStruct()  ||  xmpParent.getHasValueChild();
1159
1160		xmpParent.setHasValueChild(false);
1161		xmpParent.getOptions().setStruct(false);
1162		xmpParent.getOptions().mergeWith(valueNode.getOptions());
1163		xmpParent.setValue(valueNode.getValue());
1164
1165		xmpParent.removeChildren();
1166		for (Iterator it = valueNode.iterateChildren(); it.hasNext();)
1167		{
1168			XMPNode child = (XMPNode) it.next();
1169			xmpParent.addChild(child);
1170		}
1171	}
1172
1173
1174	/**
1175	 * Checks if the node is a white space.
1176	 * @param node an XML-node
1177	 * @return Returns whether the node is a whitespace node,
1178	 * 		i.e. a text node that contains only whitespaces.
1179	 */
1180	private static boolean isWhitespaceNode(Node node)
1181	{
1182		if (node.getNodeType() != Node.TEXT_NODE)
1183		{
1184			return false;
1185		}
1186
1187		String value = node.getNodeValue();
1188		for (int i = 0; i < value.length(); i++)
1189		{
1190			if (!Character.isWhitespace(value.charAt(i)))
1191			{
1192				return false;
1193			}
1194		}
1195
1196		return true;
1197	}
1198
1199
1200	/**
1201	 * 7.2.6 propertyElementURIs
1202	 *			anyURI - ( coreSyntaxTerms | rdf:Description | oldTerms )
1203	 *
1204	 * @param term the term id
1205	 * @return Return true if the term is a property element name.
1206	 */
1207	private static boolean isPropertyElementName(int term)
1208	{
1209		if (term == RDFTERM_DESCRIPTION  ||  isOldTerm(term))
1210		{
1211			return false;
1212		}
1213		else
1214		{
1215			return (!isCoreSyntaxTerm(term));
1216		}
1217	}
1218
1219
1220	/**
1221	 * 7.2.4 oldTerms<br>
1222	 * rdf:aboutEach | rdf:aboutEachPrefix | rdf:bagID
1223	 *
1224	 * @param term the term id
1225	 * @return Returns true if the term is an old term.
1226	 */
1227	private static boolean isOldTerm(int term)
1228	{
1229		return  RDFTERM_FIRST_OLD <= term  &&  term <= RDFTERM_LAST_OLD;
1230	}
1231
1232
1233	/**
1234	 * 7.2.2 coreSyntaxTerms<br>
1235	 * rdf:RDF | rdf:ID | rdf:about | rdf:parseType | rdf:resource | rdf:nodeID |
1236	 * rdf:datatype
1237	 *
1238	 * @param term the term id
1239	 * @return Return true if the term is a core syntax term
1240	 */
1241	private static boolean isCoreSyntaxTerm(int term)
1242	{
1243		return  RDFTERM_FIRST_CORE <= term  &&  term <= RDFTERM_LAST_CORE;
1244	}
1245
1246
1247	/**
1248	 * Determines the ID for a certain RDF Term.
1249	 * Arranged to hopefully minimize the parse time for large XMP.
1250	 *
1251	 * @param node an XML node
1252	 * @return Returns the term ID.
1253	 */
1254	private static int getRDFTermKind(Node node)
1255	{
1256		String localName = node.getLocalName();
1257		String namespace = node.getNamespaceURI();
1258
1259		if (
1260				namespace == null  &&
1261				("about".equals(localName) || "ID".equals(localName))  &&
1262				(node instanceof Attr)  &&
1263				NS_RDF.equals(((Attr) node).getOwnerElement().getNamespaceURI())
1264		   )
1265		{
1266			namespace = NS_RDF;
1267		}
1268
1269		if (NS_RDF.equals(namespace))
1270		{
1271			if ("li".equals(localName))
1272			{
1273				return RDFTERM_LI;
1274			}
1275			else if ("parseType".equals(localName))
1276			{
1277				return RDFTERM_PARSE_TYPE;
1278			}
1279			else if ("Description".equals(localName))
1280			{
1281				return RDFTERM_DESCRIPTION;
1282			}
1283			else if ("about".equals(localName))
1284			{
1285				return RDFTERM_ABOUT;
1286			}
1287			else if ("resource".equals(localName))
1288			{
1289				return RDFTERM_RESOURCE;
1290			}
1291			else if ("RDF".equals(localName))
1292			{
1293				return RDFTERM_RDF;
1294			}
1295			else if ("ID".equals(localName))
1296			{
1297				return RDFTERM_ID;
1298			}
1299			else if ("nodeID".equals(localName))
1300			{
1301				return RDFTERM_NODE_ID;
1302			}
1303			else if ("datatype".equals(localName))
1304			{
1305				return RDFTERM_DATATYPE;
1306			}
1307			else if ("aboutEach".equals(localName))
1308			{
1309				return RDFTERM_ABOUT_EACH;
1310			}
1311			else if ("aboutEachPrefix".equals(localName))
1312			{
1313				return RDFTERM_ABOUT_EACH_PREFIX;
1314			}
1315			else if ("bagID".equals(localName))
1316			{
1317				return RDFTERM_BAG_ID;
1318			}
1319		}
1320
1321		return RDFTERM_OTHER;
1322	}
1323}