example.html revision f85925628ac8b50c0a5339fccd861aedf46c2f00
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/1999/REC-html401-19991224/loose.dtd"> 2<html> 3<head> 4<meta content="text/html; charset=ISO-8859-1" http-equiv="Content-Type"> 5<style type="text/css"><!-- 6TD {font-size: 14pt; font-family: Verdana,Arial,Helvetica} 7BODY {font-size: 14pt; font-family: Verdana,Arial,Helvetica; margin-top: 2em; margin-left: 0em; margin-right: 0em} 8H1 {font-size: 20pt; font-family: Verdana,Arial,Helvetica} 9H2 {font-size: 18pt; font-family: Verdana,Arial,Helvetica} 10H3 {font-size: 16pt; font-family: Verdana,Arial,Helvetica} 11A:link, A:visited, A:active { text-decoration: underline } 12--></style> 13<title>A real example</title> 14</head> 15<body bgcolor="#8b7765" text="#000000" link="#000000" vlink="#000000"> 16<table border="0" width="100%" cellpadding="5" cellspacing="0" align="center"><tr> 17<td width="180"> 18<a href="http://www.gnome.org/"><img src="smallfootonly.gif" alt="Gnome Logo"></a><a href="http://www.w3.org/Status"><img src="w3c.png" alt="W3C Logo"></a><a href="http://www.redhat.com/"><img src="redhat.gif" alt="Red Hat Logo"></a> 19</td> 20<td><table border="0" width="90%" cellpadding="2" cellspacing="0" align="center" bgcolor="#000000"><tr><td><table width="100%" border="0" cellspacing="1" cellpadding="3" bgcolor="#fffacd"><tr><td align="center"> 21<h1>The XML C library for Gnome</h1> 22<h2>A real example</h2> 23</td></tr></table></td></tr></table></td> 24</tr></table> 25<table border="0" cellpadding="4" cellspacing="0" width="100%" align="center"><tr><td bgcolor="#8b7765"><table border="0" cellspacing="0" cellpadding="2" width="100%"><tr> 26<td valign="top" width="200" bgcolor="#8b7765"><table border="0" cellspacing="0" cellpadding="1" width="100%" bgcolor="#000000"><tr><td> 27<table width="100%" border="0" cellspacing="1" cellpadding="3"> 28<tr><td colspan="1" bgcolor="#eecfa1" align="center"><center><b>Main Menu</b></center></td></tr> 29<tr><td bgcolor="#fffacd"><ul> 30<li><a href="index.html">Home</a></li> 31<li><a href="intro.html">Introduction</a></li> 32<li><a href="FAQ.html">FAQ</a></li> 33<li><a href="docs.html">Documentation</a></li> 34<li><a href="bugs.html">Reporting bugs and getting help</a></li> 35<li><a href="help.html">How to help</a></li> 36<li><a href="downloads.html">Downloads</a></li> 37<li><a href="news.html">News</a></li> 38<li><a href="XMLinfo.html">XML</a></li> 39<li><a href="XSLT.html">XSLT</a></li> 40<li><a href="architecture.html">libxml architecture</a></li> 41<li><a href="tree.html">The tree output</a></li> 42<li><a href="interface.html">The SAX interface</a></li> 43<li><a href="xmldtd.html">Validation & DTDs</a></li> 44<li><a href="xmlmem.html">Memory Management</a></li> 45<li><a href="encoding.html">Encodings support</a></li> 46<li><a href="xmlio.html">I/O Interfaces</a></li> 47<li><a href="catalog.html">Catalog support</a></li> 48<li><a href="library.html">The parser interfaces</a></li> 49<li><a href="entities.html">Entities or no entities</a></li> 50<li><a href="namespaces.html">Namespaces</a></li> 51<li><a href="upgrade.html">Upgrading 1.x code</a></li> 52<li><a href="threads.html">Thread safety</a></li> 53<li><a href="DOM.html">DOM Principles</a></li> 54<li><a href="example.html">A real example</a></li> 55<li><a href="contribs.html">Contributions</a></li> 56<li> 57<a href="xml.html">flat page</a>, <a href="site.xsl">stylesheet</a> 58</li> 59</ul></td></tr> 60</table> 61<table width="100%" border="0" cellspacing="1" cellpadding="3"> 62<tr><td colspan="1" bgcolor="#eecfa1" align="center"><center><b>API Indexes</b></center></td></tr> 63<tr><td bgcolor="#fffacd"><ul> 64<li><a href="APIchunk0.html">Alphabetic</a></li> 65<li><a href="APIconstructors.html">Constructors</a></li> 66<li><a href="APIfunctions.html">Functions/Types</a></li> 67<li><a href="APIfiles.html">Modules</a></li> 68<li><a href="APIsymbols.html">Symbols</a></li> 69</ul></td></tr> 70</table> 71<table width="100%" border="0" cellspacing="1" cellpadding="3"> 72<tr><td colspan="1" bgcolor="#eecfa1" align="center"><center><b>Related links</b></center></td></tr> 73<tr><td bgcolor="#fffacd"><ul> 74<li><a href="http://mail.gnome.org/archives/xml/">Mail archive</a></li> 75<li><a href="http://xmlsoft.org/XSLT/">XSLT libxslt</a></li> 76<li><a href="http://phd.cs.unibo.it/gdome2/">DOM gdome2</a></li> 77<li><a href="ftp://xmlsoft.org/">FTP</a></li> 78<li><a href="http://www.fh-frankfurt.de/~igor/projects/libxml/">Windows binaries</a></li> 79<li><a href="http://garypennington.net/libxml2/">Solaris binaries</a></li> 80<li><a href="http://bugzilla.gnome.org/buglist.cgi?product=libxml">Bug Tracker</a></li> 81</ul></td></tr> 82</table> 83</td></tr></table></td> 84<td valign="top" bgcolor="#8b7765"><table border="0" cellspacing="0" cellpadding="1" width="100%"><tr><td><table border="0" cellspacing="0" cellpadding="1" width="100%" bgcolor="#000000"><tr><td><table border="0" cellpadding="3" cellspacing="1" width="100%"><tr><td bgcolor="#fffacd"> 85<p>Here is a real size example, where the actual content of the application 86data is not kept in the DOM tree but uses internal structures. It is based on 87a proposal to keep a database of jobs related to Gnome, with an XML based 88storage structure. Here is an <a href="gjobs.xml">XML encoded jobs 89base</a>:</p> 90<pre><?xml version="1.0"?> 91<gjob:Helping xmlns:gjob="http://www.gnome.org/some-location"> 92 <gjob:Jobs> 93 94 <gjob:Job> 95 <gjob:Project ID="3"/> 96 <gjob:Application>GBackup</gjob:Application> 97 <gjob:Category>Development</gjob:Category> 98 99 <gjob:Update> 100 <gjob:Status>Open</gjob:Status> 101 <gjob:Modified>Mon, 07 Jun 1999 20:27:45 -0400 MET DST</gjob:Modified> 102 <gjob:Salary>USD 0.00</gjob:Salary> 103 </gjob:Update> 104 105 <gjob:Developers> 106 <gjob:Developer> 107 </gjob:Developer> 108 </gjob:Developers> 109 110 <gjob:Contact> 111 <gjob:Person>Nathan Clemons</gjob:Person> 112 <gjob:Email>nathan@windsofstorm.net</gjob:Email> 113 <gjob:Company> 114 </gjob:Company> 115 <gjob:Organisation> 116 </gjob:Organisation> 117 <gjob:Webpage> 118 </gjob:Webpage> 119 <gjob:Snailmail> 120 </gjob:Snailmail> 121 <gjob:Phone> 122 </gjob:Phone> 123 </gjob:Contact> 124 125 <gjob:Requirements> 126 The program should be released as free software, under the GPL. 127 </gjob:Requirements> 128 129 <gjob:Skills> 130 </gjob:Skills> 131 132 <gjob:Details> 133 A GNOME based system that will allow a superuser to configure 134 compressed and uncompressed files and/or file systems to be backed 135 up with a supported media in the system. This should be able to 136 perform via find commands generating a list of files that are passed 137 to tar, dd, cpio, cp, gzip, etc., to be directed to the tape machine 138 or via operations performed on the filesystem itself. Email 139 notification and GUI status display very important. 140 </gjob:Details> 141 142 </gjob:Job> 143 144 </gjob:Jobs> 145</gjob:Helping></pre> 146<p>While loading the XML file into an internal DOM tree is a matter of 147calling only a couple of functions, browsing the tree to gather the ata and 148generate the internal structures is harder, and more error prone.</p> 149<p>The suggested principle is to be tolerant with respect to the input 150structure. For example, the ordering of the attributes is not significant, 151the XML specification is clear about it. It's also usually a good idea not to 152depend on the order of the children of a given node, unless it really makes 153things harder. Here is some code to parse the information for a person:</p> 154<pre>/* 155 * A person record 156 */ 157typedef struct person { 158 char *name; 159 char *email; 160 char *company; 161 char *organisation; 162 char *smail; 163 char *webPage; 164 char *phone; 165} person, *personPtr; 166 167/* 168 * And the code needed to parse it 169 */ 170personPtr parsePerson(xmlDocPtr doc, xmlNsPtr ns, xmlNodePtr cur) { 171 personPtr ret = NULL; 172 173DEBUG("parsePerson\n"); 174 /* 175 * allocate the struct 176 */ 177 ret = (personPtr) malloc(sizeof(person)); 178 if (ret == NULL) { 179 fprintf(stderr,"out of memory\n"); 180 return(NULL); 181 } 182 memset(ret, 0, sizeof(person)); 183 184 /* We don't care what the top level element name is */ 185 cur = cur->xmlChildrenNode; 186 while (cur != NULL) { 187 if ((!strcmp(cur->name, "Person")) && (cur->ns == ns)) 188 ret->name = xmlNodeListGetString(doc, cur->xmlChildrenNode, 1); 189 if ((!strcmp(cur->name, "Email")) && (cur->ns == ns)) 190 ret->email = xmlNodeListGetString(doc, cur->xmlChildrenNode, 1); 191 cur = cur->next; 192 } 193 194 return(ret); 195}</pre> 196<p>Here are a couple of things to notice:</p> 197<ul> 198<li>Usually a recursive parsing style is the more convenient one: XML data 199 is by nature subject to repetitive constructs and usually exibits highly 200 stuctured patterns.</li> 201<li>The two arguments of type <em>xmlDocPtr</em> and <em>xmlNsPtr</em>, 202 i.e. the pointer to the global XML document and the namespace reserved to 203 the application. Document wide information are needed for example to 204 decode entities and it's a good coding practice to define a namespace for 205 your application set of data and test that the element and attributes 206 you're analyzing actually pertains to your application space. This is 207 done by a simple equality test (cur->ns == ns).</li> 208<li>To retrieve text and attributes value, you can use the function 209 <em>xmlNodeListGetString</em> to gather all the text and entity reference 210 nodes generated by the DOM output and produce an single text string.</li> 211</ul> 212<p>Here is another piece of code used to parse another level of the 213structure:</p> 214<pre>#include <libxml/tree.h> 215/* 216 * a Description for a Job 217 */ 218typedef struct job { 219 char *projectID; 220 char *application; 221 char *category; 222 personPtr contact; 223 int nbDevelopers; 224 personPtr developers[100]; /* using dynamic alloc is left as an exercise */ 225} job, *jobPtr; 226 227/* 228 * And the code needed to parse it 229 */ 230jobPtr parseJob(xmlDocPtr doc, xmlNsPtr ns, xmlNodePtr cur) { 231 jobPtr ret = NULL; 232 233DEBUG("parseJob\n"); 234 /* 235 * allocate the struct 236 */ 237 ret = (jobPtr) malloc(sizeof(job)); 238 if (ret == NULL) { 239 fprintf(stderr,"out of memory\n"); 240 return(NULL); 241 } 242 memset(ret, 0, sizeof(job)); 243 244 /* We don't care what the top level element name is */ 245 cur = cur->xmlChildrenNode; 246 while (cur != NULL) { 247 248 if ((!strcmp(cur->name, "Project")) && (cur->ns == ns)) { 249 ret->projectID = xmlGetProp(cur, "ID"); 250 if (ret->projectID == NULL) { 251 fprintf(stderr, "Project has no ID\n"); 252 } 253 } 254 if ((!strcmp(cur->name, "Application")) && (cur->ns == ns)) 255 ret->application = xmlNodeListGetString(doc, cur->xmlChildrenNode, 1); 256 if ((!strcmp(cur->name, "Category")) && (cur->ns == ns)) 257 ret->category = xmlNodeListGetString(doc, cur->xmlChildrenNode, 1); 258 if ((!strcmp(cur->name, "Contact")) && (cur->ns == ns)) 259 ret->contact = parsePerson(doc, ns, cur); 260 cur = cur->next; 261 } 262 263 return(ret); 264}</pre> 265<p>Once you are used to it, writing this kind of code is quite simple, but 266boring. Ultimately, it could be possble to write stubbers taking either C 267data structure definitions, a set of XML examples or an XML DTD and produce 268the code needed to import and export the content between C data and XML 269storage. This is left as an exercise to the reader :-)</p> 270<p>Feel free to use <a href="example/gjobread.c">the code for the full C 271parsing example</a> as a template, it is also available with Makefile in the 272Gnome CVS base under gnome-xml/example</p> 273<p><a href="mailto:daniel@veillard.com">Daniel Veillard</a></p> 274</td></tr></table></td></tr></table></td></tr></table></td> 275</tr></table></td></tr></table> 276</body> 277</html> 278