1e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET/***************************************************************************
2e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET *                                  _   _ ____  _
3e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET *  Project                     ___| | | |  _ \| |
4e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET *                             / __| | | | |_) | |
5e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET *                            | (__| |_| |  _ <| |___
6e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET *                             \___|\___/|_| \_\_____|
7e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET *
88f1a214b8a21b66f33454790dfba97ae2f818289Alex Deymo * Copyright (C) 1998 - 2016, Daniel Stenberg, <daniel@haxx.se>, et al.
9e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET *
10e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET * This software is licensed as described in the file COPYING, which
11e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET * you should have received as part of this distribution. The terms
128f1a214b8a21b66f33454790dfba97ae2f818289Alex Deymo * are also available at https://curl.haxx.se/docs/copyright.html.
13e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET *
14e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET * copies of the Software, and permit persons to whom the Software is
16e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET * furnished to do so, under the terms of the COPYING file.
17e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET *
18e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET * KIND, either express or implied.
20e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET *
21e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET ***************************************************************************/
228f1a214b8a21b66f33454790dfba97ae2f818289Alex Deymo/* <DESC>
238f1a214b8a21b66f33454790dfba97ae2f818289Alex Deymo * Stream-parse a document using the streaming Expat parser.
248f1a214b8a21b66f33454790dfba97ae2f818289Alex Deymo * </DESC>
258f1a214b8a21b66f33454790dfba97ae2f818289Alex Deymo */
268f1a214b8a21b66f33454790dfba97ae2f818289Alex Deymo/* Written by David Strauss
27e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET *
28e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET * Expat => http://www.libexpat.org/
29e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET *
30e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET * gcc -Wall -I/usr/local/include xmlstream.c -lcurl -lexpat -o xmlstream
31e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET *
32e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET */
33e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET
34e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET#include <stdio.h>
35e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET#include <stdlib.h>
36e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET#include <string.h>
37e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET#include <assert.h>
38e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET
39e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET#include <expat.h>
40e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET#include <curl/curl.h>
41e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET
42e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNETstruct MemoryStruct {
43e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  char *memory;
44e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  size_t size;
45e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET};
46e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET
47e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNETstruct ParserStruct {
48e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  int ok;
49e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  size_t tags;
50e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  size_t depth;
51e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  struct MemoryStruct characters;
52e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET};
53e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET
548f1a214b8a21b66f33454790dfba97ae2f818289Alex Deymostatic void startElement(void *userData, const XML_Char *name,
558f1a214b8a21b66f33454790dfba97ae2f818289Alex Deymo                         const XML_Char **atts)
56e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET{
57e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  struct ParserStruct *state = (struct ParserStruct *) userData;
58e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  state->tags++;
59e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  state->depth++;
60e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET
61e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  /* Get a clean slate for reading in character data. */
62e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  free(state->characters.memory);
63e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  state->characters.memory = NULL;
64e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  state->characters.size = 0;
65e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET}
66e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET
67e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNETstatic void characterDataHandler(void *userData, const XML_Char *s, int len)
68e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET{
69e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  struct ParserStruct *state = (struct ParserStruct *) userData;
70e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  struct MemoryStruct *mem = &state->characters;
71e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET
72e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  mem->memory = realloc(mem->memory, mem->size + len + 1);
73e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  if(mem->memory == NULL) {
74e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET    /* Out of memory. */
75e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET    fprintf(stderr, "Not enough memory (realloc returned NULL).\n");
76e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET    state->ok = 0;
77e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET    return;
78e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  }
79e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET
80e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  memcpy(&(mem->memory[mem->size]), s, len);
81e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  mem->size += len;
82e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  mem->memory[mem->size] = 0;
83e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET}
84e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET
85e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNETstatic void endElement(void *userData, const XML_Char *name)
86e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET{
87e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  struct ParserStruct *state = (struct ParserStruct *) userData;
88e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  state->depth--;
89e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET
90e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  printf("%5lu   %10lu   %s\n", state->depth, state->characters.size, name);
91e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET}
92e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET
938f1a214b8a21b66f33454790dfba97ae2f818289Alex Deymostatic size_t parseStreamCallback(void *contents, size_t length, size_t nmemb,
948f1a214b8a21b66f33454790dfba97ae2f818289Alex Deymo                                  void *userp)
95e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET{
96e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  XML_Parser parser = (XML_Parser) userp;
97e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  size_t real_size = length * nmemb;
98e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  struct ParserStruct *state = (struct ParserStruct *) XML_GetUserData(parser);
99e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET
100e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  /* Only parse if we're not already in a failure state. */
1018f1a214b8a21b66f33454790dfba97ae2f818289Alex Deymo  if(state->ok && XML_Parse(parser, contents, real_size, 0) == 0) {
102e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET    int error_code = XML_GetErrorCode(parser);
1038f1a214b8a21b66f33454790dfba97ae2f818289Alex Deymo    fprintf(stderr, "Parsing response buffer of length %lu failed"
1048f1a214b8a21b66f33454790dfba97ae2f818289Alex Deymo            " with error code %d (%s).\n",
105e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET            real_size, error_code, XML_ErrorString(error_code));
106e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET    state->ok = 0;
107e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  }
108e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET
109e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  return real_size;
110e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET}
111e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET
112e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNETint main(void)
113e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET{
114e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  CURL *curl_handle;
115e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  CURLcode res;
116e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  XML_Parser parser;
117e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  struct ParserStruct state;
118e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET
119e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  /* Initialize the state structure for parsing. */
120e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  memset(&state, 0, sizeof(struct ParserStruct));
121e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  state.ok = 1;
122e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET
123e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  /* Initialize a namespace-aware parser. */
124e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  parser = XML_ParserCreateNS(NULL, '\0');
125e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  XML_SetUserData(parser, &state);
126e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  XML_SetElementHandler(parser, startElement, endElement);
127e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  XML_SetCharacterDataHandler(parser, characterDataHandler);
128e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET
129e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  /* Initialize a libcurl handle. */
130e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  curl_global_init(CURL_GLOBAL_ALL ^ CURL_GLOBAL_SSL);
131e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  curl_handle = curl_easy_init();
1328f1a214b8a21b66f33454790dfba97ae2f818289Alex Deymo  curl_easy_setopt(curl_handle, CURLOPT_URL,
1338f1a214b8a21b66f33454790dfba97ae2f818289Alex Deymo                   "http://www.w3schools.com/xml/simple.xml");
134e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, parseStreamCallback);
135e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)parser);
136e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET
137e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  printf("Depth   Characters   Closing Tag\n");
138e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET
139e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  /* Perform the request and any follow-up parsing. */
140e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  res = curl_easy_perform(curl_handle);
141e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  if(res != CURLE_OK) {
142e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET    fprintf(stderr, "curl_easy_perform() failed: %s\n",
143e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET            curl_easy_strerror(res));
144e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  }
1458f1a214b8a21b66f33454790dfba97ae2f818289Alex Deymo  else if(state.ok) {
146e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET    /* Expat requires one final call to finalize parsing. */
1478f1a214b8a21b66f33454790dfba97ae2f818289Alex Deymo    if(XML_Parse(parser, NULL, 0, 1) == 0) {
148e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET      int error_code = XML_GetErrorCode(parser);
149e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET      fprintf(stderr, "Finalizing parsing failed with error code %d (%s).\n",
150e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET              error_code, XML_ErrorString(error_code));
151e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET    }
152e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET    else {
153e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET      printf("                     --------------\n");
154e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET      printf("                     %lu tags total\n", state.tags);
155e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET    }
156e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  }
157e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET
158e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  /* Clean up. */
159e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  free(state.characters.memory);
160e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  XML_ParserFree(parser);
161e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  curl_easy_cleanup(curl_handle);
162e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  curl_global_cleanup();
163e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET
164e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET  return 0;
165e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET}
166