example.html revision c332dab99ba5fbb78b1a992e07f2f6e014cf01df
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/1999/REC-html401-19991224/loose.dtd"> 2<html> 3<head> 4<meta content="text/html; charset=ISO-8859-1" http-equiv="Content-Type"> 5<link rel="SHORTCUT ICON" href="/favicon.ico"> 6<style type="text/css"><!-- 7TD {font-family: Verdana,Arial,Helvetica} 8BODY {font-family: Verdana,Arial,Helvetica; margin-top: 2em; margin-left: 0em; margin-right: 0em} 9H1 {font-family: Verdana,Arial,Helvetica} 10H2 {font-family: Verdana,Arial,Helvetica} 11H3 {font-family: Verdana,Arial,Helvetica} 12A:link, A:visited, A:active { text-decoration: underline } 13--></style> 14<title>A real example</title> 15</head> 16<body bgcolor="#8b7765" text="#000000" link="#000000" vlink="#000000"> 17<table border="0" width="100%" cellpadding="5" cellspacing="0" align="center"><tr> 18<td width="180"> 19<a href="http://www.gnome.org/"><img src="smallfootonly.gif" alt="Gnome Logo"></a><a href="http://www.w3.org/Status"><img src="w3c.png" alt="W3C Logo"></a><a href="http://www.redhat.com/"><img src="redhat.gif" alt="Red Hat Logo"></a> 20</td> 21<td><table border="0" width="90%" cellpadding="2" cellspacing="0" align="center" bgcolor="#000000"><tr><td><table width="100%" border="0" cellspacing="1" cellpadding="3" bgcolor="#fffacd"><tr><td align="center"> 22<h1>The XML C library for Gnome</h1> 23<h2>A real example</h2> 24</td></tr></table></td></tr></table></td> 25</tr></table> 26<table border="0" cellpadding="4" cellspacing="0" width="100%" align="center"><tr><td bgcolor="#8b7765"><table border="0" cellspacing="0" cellpadding="2" width="100%"><tr> 27<td valign="top" width="200" bgcolor="#8b7765"><table border="0" cellspacing="0" cellpadding="1" width="100%" bgcolor="#000000"><tr><td> 28<table width="100%" border="0" cellspacing="1" cellpadding="3"> 29<tr><td colspan="1" bgcolor="#eecfa1" align="center"><center><b>Main Menu</b></center></td></tr> 30<tr><td bgcolor="#fffacd"><ul> 31<li><a href="index.html">Home</a></li> 32<li><a href="intro.html">Introduction</a></li> 33<li><a href="FAQ.html">FAQ</a></li> 34<li><a href="docs.html">Documentation</a></li> 35<li><a href="bugs.html">Reporting bugs and getting help</a></li> 36<li><a href="help.html">How to help</a></li> 37<li><a href="downloads.html">Downloads</a></li> 38<li><a href="news.html">News</a></li> 39<li><a href="XMLinfo.html">XML</a></li> 40<li><a href="XSLT.html">XSLT</a></li> 41<li><a href="python.html">Python and bindings</a></li> 42<li><a href="architecture.html">libxml architecture</a></li> 43<li><a href="tree.html">The tree output</a></li> 44<li><a href="interface.html">The SAX interface</a></li> 45<li><a href="xmldtd.html">Validation & DTDs</a></li> 46<li><a href="xmlmem.html">Memory Management</a></li> 47<li><a href="encoding.html">Encodings support</a></li> 48<li><a href="xmlio.html">I/O Interfaces</a></li> 49<li><a href="catalog.html">Catalog support</a></li> 50<li><a href="library.html">The parser interfaces</a></li> 51<li><a href="entities.html">Entities or no entities</a></li> 52<li><a href="namespaces.html">Namespaces</a></li> 53<li><a href="upgrade.html">Upgrading 1.x code</a></li> 54<li><a href="threads.html">Thread safety</a></li> 55<li><a href="DOM.html">DOM Principles</a></li> 56<li><a href="example.html">A real example</a></li> 57<li><a href="contribs.html">Contributions</a></li> 58<li> 59<a href="xml.html">flat page</a>, <a href="site.xsl">stylesheet</a> 60</li> 61</ul></td></tr> 62</table> 63<table width="100%" border="0" cellspacing="1" cellpadding="3"> 64<tr><td colspan="1" bgcolor="#eecfa1" align="center"><center><b>API Indexes</b></center></td></tr> 65<tr><td bgcolor="#fffacd"><ul> 66<li><a href="APIchunk0.html">Alphabetic</a></li> 67<li><a href="APIconstructors.html">Constructors</a></li> 68<li><a href="APIfunctions.html">Functions/Types</a></li> 69<li><a href="APIfiles.html">Modules</a></li> 70<li><a href="APIsymbols.html">Symbols</a></li> 71</ul></td></tr> 72</table> 73<table width="100%" border="0" cellspacing="1" cellpadding="3"> 74<tr><td colspan="1" bgcolor="#eecfa1" align="center"><center><b>Related links</b></center></td></tr> 75<tr><td bgcolor="#fffacd"><ul> 76<li><a href="http://mail.gnome.org/archives/xml/">Mail archive</a></li> 77<li><a href="http://xmlsoft.org/XSLT/">XSLT libxslt</a></li> 78<li><a href="http://phd.cs.unibo.it/gdome2/">DOM gdome2</a></li> 79<li><a href="http://www.aleksey.com/xmlsec/">XML-DSig xmlsec</a></li> 80<li><a href="ftp://xmlsoft.org/">FTP</a></li> 81<li><a href="http://www.fh-frankfurt.de/~igor/projects/libxml/">Windows binaries</a></li> 82<li><a href="http://garypennington.net/libxml2/">Solaris binaries</a></li> 83<li><a href="http://bugzilla.gnome.org/buglist.cgi?product=libxml&product=libxml2">Bug Tracker</a></li> 84</ul></td></tr> 85</table> 86</td></tr></table></td> 87<td valign="top" bgcolor="#8b7765"><table border="0" cellspacing="0" cellpadding="1" width="100%"><tr><td><table border="0" cellspacing="0" cellpadding="1" width="100%" bgcolor="#000000"><tr><td><table border="0" cellpadding="3" cellspacing="1" width="100%"><tr><td bgcolor="#fffacd"> 88<p>Here is a real size example, where the actual content of the application 89data is not kept in the DOM tree but uses internal structures. It is based on 90a proposal to keep a database of jobs related to Gnome, with an XML based 91storage structure. Here is an <a href="gjobs.xml">XML encoded jobs 92base</a>:</p> 93<pre><?xml version="1.0"?> 94<gjob:Helping xmlns:gjob="http://www.gnome.org/some-location"> 95 <gjob:Jobs> 96 97 <gjob:Job> 98 <gjob:Project ID="3"/> 99 <gjob:Application>GBackup</gjob:Application> 100 <gjob:Category>Development</gjob:Category> 101 102 <gjob:Update> 103 <gjob:Status>Open</gjob:Status> 104 <gjob:Modified>Mon, 07 Jun 1999 20:27:45 -0400 MET DST</gjob:Modified> 105 <gjob:Salary>USD 0.00</gjob:Salary> 106 </gjob:Update> 107 108 <gjob:Developers> 109 <gjob:Developer> 110 </gjob:Developer> 111 </gjob:Developers> 112 113 <gjob:Contact> 114 <gjob:Person>Nathan Clemons</gjob:Person> 115 <gjob:Email>nathan@windsofstorm.net</gjob:Email> 116 <gjob:Company> 117 </gjob:Company> 118 <gjob:Organisation> 119 </gjob:Organisation> 120 <gjob:Webpage> 121 </gjob:Webpage> 122 <gjob:Snailmail> 123 </gjob:Snailmail> 124 <gjob:Phone> 125 </gjob:Phone> 126 </gjob:Contact> 127 128 <gjob:Requirements> 129 The program should be released as free software, under the GPL. 130 </gjob:Requirements> 131 132 <gjob:Skills> 133 </gjob:Skills> 134 135 <gjob:Details> 136 A GNOME based system that will allow a superuser to configure 137 compressed and uncompressed files and/or file systems to be backed 138 up with a supported media in the system. This should be able to 139 perform via find commands generating a list of files that are passed 140 to tar, dd, cpio, cp, gzip, etc., to be directed to the tape machine 141 or via operations performed on the filesystem itself. Email 142 notification and GUI status display very important. 143 </gjob:Details> 144 145 </gjob:Job> 146 147 </gjob:Jobs> 148</gjob:Helping></pre> 149<p>While loading the XML file into an internal DOM tree is a matter of 150calling only a couple of functions, browsing the tree to gather the ata and 151generate the internal structures is harder, and more error prone.</p> 152<p>The suggested principle is to be tolerant with respect to the input 153structure. For example, the ordering of the attributes is not significant, 154the XML specification is clear about it. It's also usually a good idea not to 155depend on the order of the children of a given node, unless it really makes 156things harder. Here is some code to parse the information for a person:</p> 157<pre>/* 158 * A person record 159 */ 160typedef struct person { 161 char *name; 162 char *email; 163 char *company; 164 char *organisation; 165 char *smail; 166 char *webPage; 167 char *phone; 168} person, *personPtr; 169 170/* 171 * And the code needed to parse it 172 */ 173personPtr parsePerson(xmlDocPtr doc, xmlNsPtr ns, xmlNodePtr cur) { 174 personPtr ret = NULL; 175 176DEBUG("parsePerson\n"); 177 /* 178 * allocate the struct 179 */ 180 ret = (personPtr) malloc(sizeof(person)); 181 if (ret == NULL) { 182 fprintf(stderr,"out of memory\n"); 183 return(NULL); 184 } 185 memset(ret, 0, sizeof(person)); 186 187 /* We don't care what the top level element name is */ 188 cur = cur->xmlChildrenNode; 189 while (cur != NULL) { 190 if ((!strcmp(cur->name, "Person")) && (cur->ns == ns)) 191 ret->name = xmlNodeListGetString(doc, cur->xmlChildrenNode, 1); 192 if ((!strcmp(cur->name, "Email")) && (cur->ns == ns)) 193 ret->email = xmlNodeListGetString(doc, cur->xmlChildrenNode, 1); 194 cur = cur->next; 195 } 196 197 return(ret); 198}</pre> 199<p>Here are a couple of things to notice:</p> 200<ul> 201<li>Usually a recursive parsing style is the more convenient one: XML data 202 is by nature subject to repetitive constructs and usually exibits highly 203 stuctured patterns.</li> 204<li>The two arguments of type <em>xmlDocPtr</em> and <em>xmlNsPtr</em>, 205 i.e. the pointer to the global XML document and the namespace reserved to 206 the application. Document wide information are needed for example to 207 decode entities and it's a good coding practice to define a namespace for 208 your application set of data and test that the element and attributes 209 you're analyzing actually pertains to your application space. This is 210 done by a simple equality test (cur->ns == ns).</li> 211<li>To retrieve text and attributes value, you can use the function 212 <em>xmlNodeListGetString</em> to gather all the text and entity reference 213 nodes generated by the DOM output and produce an single text string.</li> 214</ul> 215<p>Here is another piece of code used to parse another level of the 216structure:</p> 217<pre>#include <libxml/tree.h> 218/* 219 * a Description for a Job 220 */ 221typedef struct job { 222 char *projectID; 223 char *application; 224 char *category; 225 personPtr contact; 226 int nbDevelopers; 227 personPtr developers[100]; /* using dynamic alloc is left as an exercise */ 228} job, *jobPtr; 229 230/* 231 * And the code needed to parse it 232 */ 233jobPtr parseJob(xmlDocPtr doc, xmlNsPtr ns, xmlNodePtr cur) { 234 jobPtr ret = NULL; 235 236DEBUG("parseJob\n"); 237 /* 238 * allocate the struct 239 */ 240 ret = (jobPtr) malloc(sizeof(job)); 241 if (ret == NULL) { 242 fprintf(stderr,"out of memory\n"); 243 return(NULL); 244 } 245 memset(ret, 0, sizeof(job)); 246 247 /* We don't care what the top level element name is */ 248 cur = cur->xmlChildrenNode; 249 while (cur != NULL) { 250 251 if ((!strcmp(cur->name, "Project")) && (cur->ns == ns)) { 252 ret->projectID = xmlGetProp(cur, "ID"); 253 if (ret->projectID == NULL) { 254 fprintf(stderr, "Project has no ID\n"); 255 } 256 } 257 if ((!strcmp(cur->name, "Application")) && (cur->ns == ns)) 258 ret->application = xmlNodeListGetString(doc, cur->xmlChildrenNode, 1); 259 if ((!strcmp(cur->name, "Category")) && (cur->ns == ns)) 260 ret->category = xmlNodeListGetString(doc, cur->xmlChildrenNode, 1); 261 if ((!strcmp(cur->name, "Contact")) && (cur->ns == ns)) 262 ret->contact = parsePerson(doc, ns, cur); 263 cur = cur->next; 264 } 265 266 return(ret); 267}</pre> 268<p>Once you are used to it, writing this kind of code is quite simple, but 269boring. Ultimately, it could be possble to write stubbers taking either C 270data structure definitions, a set of XML examples or an XML DTD and produce 271the code needed to import and export the content between C data and XML 272storage. This is left as an exercise to the reader :-)</p> 273<p>Feel free to use <a href="example/gjobread.c">the code for the full C 274parsing example</a> as a template, it is also available with Makefile in the 275Gnome CVS base under gnome-xml/example</p> 276<p><a href="bugs.html">Daniel Veillard</a></p> 277</td></tr></table></td></tr></table></td></tr></table></td> 278</tr></table></td></tr></table> 279</body> 280</html> 281