example.html revision fc59c09da068bd1757aa66dffb5fc20f58a0426d
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/1999/REC-html401-19991224/loose.dtd"> 2<html> 3<head> 4<meta content="text/html; charset=ISO-8859-1" http-equiv="Content-Type"> 5<link rel="SHORTCUT ICON" href="/favicon.ico"> 6<style type="text/css"><!-- 7TD {font-family: Verdana,Arial,Helvetica} 8BODY {font-family: Verdana,Arial,Helvetica; margin-top: 2em; margin-left: 0em; margin-right: 0em} 9H1 {font-family: Verdana,Arial,Helvetica} 10H2 {font-family: Verdana,Arial,Helvetica} 11H3 {font-family: Verdana,Arial,Helvetica} 12A:link, A:visited, A:active { text-decoration: underline } 13--></style> 14<title>A real example</title> 15</head> 16<body bgcolor="#8b7765" text="#000000" link="#000000" vlink="#000000"> 17<table border="0" width="100%" cellpadding="5" cellspacing="0" align="center"><tr> 18<td width="180"> 19<a href="http://www.gnome.org/"><img src="smallfootonly.gif" alt="Gnome Logo"></a><a href="http://www.w3.org/Status"><img src="w3c.png" alt="W3C Logo"></a><a href="http://www.redhat.com/"><img src="redhat.gif" alt="Red Hat Logo"></a> 20</td> 21<td><table border="0" width="90%" cellpadding="2" cellspacing="0" align="center" bgcolor="#000000"><tr><td><table width="100%" border="0" cellspacing="1" cellpadding="3" bgcolor="#fffacd"><tr><td align="center"> 22<h1>The XML C library for Gnome</h1> 23<h2>A real example</h2> 24</td></tr></table></td></tr></table></td> 25</tr></table> 26<table border="0" cellpadding="4" cellspacing="0" width="100%" align="center"><tr><td bgcolor="#8b7765"><table border="0" cellspacing="0" cellpadding="2" width="100%"><tr> 27<td valign="top" width="200" bgcolor="#8b7765"><table border="0" cellspacing="0" cellpadding="1" width="100%" bgcolor="#000000"><tr><td> 28<table width="100%" border="0" cellspacing="1" cellpadding="3"> 29<tr><td colspan="1" bgcolor="#eecfa1" align="center"><center><b>Main Menu</b></center></td></tr> 30<tr><td bgcolor="#fffacd"><ul> 31<li><a href="index.html">Home</a></li> 32<li><a href="intro.html">Introduction</a></li> 33<li><a href="FAQ.html">FAQ</a></li> 34<li><a href="docs.html">Documentation</a></li> 35<li><a href="bugs.html">Reporting bugs and getting help</a></li> 36<li><a href="help.html">How to help</a></li> 37<li><a href="downloads.html">Downloads</a></li> 38<li><a href="news.html">News</a></li> 39<li><a href="XMLinfo.html">XML</a></li> 40<li><a href="XSLT.html">XSLT</a></li> 41<li><a href="python.html">Python and bindings</a></li> 42<li><a href="architecture.html">libxml architecture</a></li> 43<li><a href="tree.html">The tree output</a></li> 44<li><a href="interface.html">The SAX interface</a></li> 45<li><a href="xmldtd.html">Validation & DTDs</a></li> 46<li><a href="xmlmem.html">Memory Management</a></li> 47<li><a href="encoding.html">Encodings support</a></li> 48<li><a href="xmlio.html">I/O Interfaces</a></li> 49<li><a href="catalog.html">Catalog support</a></li> 50<li><a href="library.html">The parser interfaces</a></li> 51<li><a href="entities.html">Entities or no entities</a></li> 52<li><a href="namespaces.html">Namespaces</a></li> 53<li><a href="upgrade.html">Upgrading 1.x code</a></li> 54<li><a href="threads.html">Thread safety</a></li> 55<li><a href="DOM.html">DOM Principles</a></li> 56<li><a href="example.html">A real example</a></li> 57<li><a href="contribs.html">Contributions</a></li> 58<li><a href="tutorial/index.html">Tutorial</a></li> 59<li> 60<a href="xml.html">flat page</a>, <a href="site.xsl">stylesheet</a> 61</li> 62</ul></td></tr> 63</table> 64<table width="100%" border="0" cellspacing="1" cellpadding="3"> 65<tr><td colspan="1" bgcolor="#eecfa1" align="center"><center><b>API Indexes</b></center></td></tr> 66<tr><td bgcolor="#fffacd"><ul> 67<li><a href="APIchunk0.html">Alphabetic</a></li> 68<li><a href="APIconstructors.html">Constructors</a></li> 69<li><a href="APIfunctions.html">Functions/Types</a></li> 70<li><a href="APIfiles.html">Modules</a></li> 71<li><a href="APIsymbols.html">Symbols</a></li> 72</ul></td></tr> 73</table> 74<table width="100%" border="0" cellspacing="1" cellpadding="3"> 75<tr><td colspan="1" bgcolor="#eecfa1" align="center"><center><b>Related links</b></center></td></tr> 76<tr><td bgcolor="#fffacd"><ul> 77<li><a href="http://mail.gnome.org/archives/xml/">Mail archive</a></li> 78<li><a href="http://xmlsoft.org/XSLT/">XSLT libxslt</a></li> 79<li><a href="http://phd.cs.unibo.it/gdome2/">DOM gdome2</a></li> 80<li><a href="http://www.aleksey.com/xmlsec/">XML-DSig xmlsec</a></li> 81<li><a href="ftp://xmlsoft.org/">FTP</a></li> 82<li><a href="http://www.fh-frankfurt.de/~igor/projects/libxml/">Windows binaries</a></li> 83<li><a href="http://garypennington.net/libxml2/">Solaris binaries</a></li> 84<li><a href="http://sourceforge.net/projects/libxml2-pas/">Pascal bindings</a></li> 85<li><a href="http://bugzilla.gnome.org/buglist.cgi?product=libxml&product=libxml2">Bug Tracker</a></li> 86</ul></td></tr> 87</table> 88</td></tr></table></td> 89<td valign="top" bgcolor="#8b7765"><table border="0" cellspacing="0" cellpadding="1" width="100%"><tr><td><table border="0" cellspacing="0" cellpadding="1" width="100%" bgcolor="#000000"><tr><td><table border="0" cellpadding="3" cellspacing="1" width="100%"><tr><td bgcolor="#fffacd"> 90<p>Here is a real size example, where the actual content of the application 91data is not kept in the DOM tree but uses internal structures. It is based on 92a proposal to keep a database of jobs related to Gnome, with an XML based 93storage structure. Here is an <a href="gjobs.xml">XML encoded jobs 94base</a>:</p> 95<pre><?xml version="1.0"?> 96<gjob:Helping xmlns:gjob="http://www.gnome.org/some-location"> 97 <gjob:Jobs> 98 99 <gjob:Job> 100 <gjob:Project ID="3"/> 101 <gjob:Application>GBackup</gjob:Application> 102 <gjob:Category>Development</gjob:Category> 103 104 <gjob:Update> 105 <gjob:Status>Open</gjob:Status> 106 <gjob:Modified>Mon, 07 Jun 1999 20:27:45 -0400 MET DST</gjob:Modified> 107 <gjob:Salary>USD 0.00</gjob:Salary> 108 </gjob:Update> 109 110 <gjob:Developers> 111 <gjob:Developer> 112 </gjob:Developer> 113 </gjob:Developers> 114 115 <gjob:Contact> 116 <gjob:Person>Nathan Clemons</gjob:Person> 117 <gjob:Email>nathan@windsofstorm.net</gjob:Email> 118 <gjob:Company> 119 </gjob:Company> 120 <gjob:Organisation> 121 </gjob:Organisation> 122 <gjob:Webpage> 123 </gjob:Webpage> 124 <gjob:Snailmail> 125 </gjob:Snailmail> 126 <gjob:Phone> 127 </gjob:Phone> 128 </gjob:Contact> 129 130 <gjob:Requirements> 131 The program should be released as free software, under the GPL. 132 </gjob:Requirements> 133 134 <gjob:Skills> 135 </gjob:Skills> 136 137 <gjob:Details> 138 A GNOME based system that will allow a superuser to configure 139 compressed and uncompressed files and/or file systems to be backed 140 up with a supported media in the system. This should be able to 141 perform via find commands generating a list of files that are passed 142 to tar, dd, cpio, cp, gzip, etc., to be directed to the tape machine 143 or via operations performed on the filesystem itself. Email 144 notification and GUI status display very important. 145 </gjob:Details> 146 147 </gjob:Job> 148 149 </gjob:Jobs> 150</gjob:Helping></pre> 151<p>While loading the XML file into an internal DOM tree is a matter of 152calling only a couple of functions, browsing the tree to gather the data and 153generate the internal structures is harder, and more error prone.</p> 154<p>The suggested principle is to be tolerant with respect to the input 155structure. For example, the ordering of the attributes is not significant, 156the XML specification is clear about it. It's also usually a good idea not to 157depend on the order of the children of a given node, unless it really makes 158things harder. Here is some code to parse the information for a person:</p> 159<pre>/* 160 * A person record 161 */ 162typedef struct person { 163 char *name; 164 char *email; 165 char *company; 166 char *organisation; 167 char *smail; 168 char *webPage; 169 char *phone; 170} person, *personPtr; 171 172/* 173 * And the code needed to parse it 174 */ 175personPtr parsePerson(xmlDocPtr doc, xmlNsPtr ns, xmlNodePtr cur) { 176 personPtr ret = NULL; 177 178DEBUG("parsePerson\n"); 179 /* 180 * allocate the struct 181 */ 182 ret = (personPtr) malloc(sizeof(person)); 183 if (ret == NULL) { 184 fprintf(stderr,"out of memory\n"); 185 return(NULL); 186 } 187 memset(ret, 0, sizeof(person)); 188 189 /* We don't care what the top level element name is */ 190 cur = cur->xmlChildrenNode; 191 while (cur != NULL) { 192 if ((!strcmp(cur->name, "Person")) && (cur->ns == ns)) 193 ret->name = xmlNodeListGetString(doc, cur->xmlChildrenNode, 1); 194 if ((!strcmp(cur->name, "Email")) && (cur->ns == ns)) 195 ret->email = xmlNodeListGetString(doc, cur->xmlChildrenNode, 1); 196 cur = cur->next; 197 } 198 199 return(ret); 200}</pre> 201<p>Here are a couple of things to notice:</p> 202<ul> 203<li>Usually a recursive parsing style is the more convenient one: XML data 204 is by nature subject to repetitive constructs and usually exhibits highly 205 structured patterns.</li> 206<li>The two arguments of type <em>xmlDocPtr</em> and <em>xmlNsPtr</em>, 207 i.e. the pointer to the global XML document and the namespace reserved to 208 the application. Document wide information are needed for example to 209 decode entities and it's a good coding practice to define a namespace for 210 your application set of data and test that the element and attributes 211 you're analyzing actually pertains to your application space. This is 212 done by a simple equality test (cur->ns == ns).</li> 213<li>To retrieve text and attributes value, you can use the function 214 <em>xmlNodeListGetString</em> to gather all the text and entity reference 215 nodes generated by the DOM output and produce an single text string.</li> 216</ul> 217<p>Here is another piece of code used to parse another level of the 218structure:</p> 219<pre>#include <libxml/tree.h> 220/* 221 * a Description for a Job 222 */ 223typedef struct job { 224 char *projectID; 225 char *application; 226 char *category; 227 personPtr contact; 228 int nbDevelopers; 229 personPtr developers[100]; /* using dynamic alloc is left as an exercise */ 230} job, *jobPtr; 231 232/* 233 * And the code needed to parse it 234 */ 235jobPtr parseJob(xmlDocPtr doc, xmlNsPtr ns, xmlNodePtr cur) { 236 jobPtr ret = NULL; 237 238DEBUG("parseJob\n"); 239 /* 240 * allocate the struct 241 */ 242 ret = (jobPtr) malloc(sizeof(job)); 243 if (ret == NULL) { 244 fprintf(stderr,"out of memory\n"); 245 return(NULL); 246 } 247 memset(ret, 0, sizeof(job)); 248 249 /* We don't care what the top level element name is */ 250 cur = cur->xmlChildrenNode; 251 while (cur != NULL) { 252 253 if ((!strcmp(cur->name, "Project")) && (cur->ns == ns)) { 254 ret->projectID = xmlGetProp(cur, "ID"); 255 if (ret->projectID == NULL) { 256 fprintf(stderr, "Project has no ID\n"); 257 } 258 } 259 if ((!strcmp(cur->name, "Application")) && (cur->ns == ns)) 260 ret->application = xmlNodeListGetString(doc, cur->xmlChildrenNode, 1); 261 if ((!strcmp(cur->name, "Category")) && (cur->ns == ns)) 262 ret->category = xmlNodeListGetString(doc, cur->xmlChildrenNode, 1); 263 if ((!strcmp(cur->name, "Contact")) && (cur->ns == ns)) 264 ret->contact = parsePerson(doc, ns, cur); 265 cur = cur->next; 266 } 267 268 return(ret); 269}</pre> 270<p>Once you are used to it, writing this kind of code is quite simple, but 271boring. Ultimately, it could be possible to write stubbers taking either C 272data structure definitions, a set of XML examples or an XML DTD and produce 273the code needed to import and export the content between C data and XML 274storage. This is left as an exercise to the reader :-)</p> 275<p>Feel free to use <a href="example/gjobread.c">the code for the full C 276parsing example</a> as a template, it is also available with Makefile in the 277Gnome CVS base under gnome-xml/example</p> 278<p><a href="bugs.html">Daniel Veillard</a></p> 279</td></tr></table></td></tr></table></td></tr></table></td> 280</tr></table></td></tr></table> 281</body> 282</html> 283