1e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET/*************************************************************************** 2e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET * _ _ ____ _ 3e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET * Project ___| | | | _ \| | 4e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET * / __| | | | |_) | | 5e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET * | (__| |_| | _ <| |___ 6e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET * \___|\___/|_| \_\_____| 7e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET * 88f1a214b8a21b66f33454790dfba97ae2f818289Alex Deymo * Copyright (C) 1998 - 2016, Daniel Stenberg, <daniel@haxx.se>, et al. 9e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET * 10e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET * This software is licensed as described in the file COPYING, which 11e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET * you should have received as part of this distribution. The terms 128f1a214b8a21b66f33454790dfba97ae2f818289Alex Deymo * are also available at https://curl.haxx.se/docs/copyright.html. 13e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET * 14e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET * You may opt to use, copy, modify, merge, publish, distribute and/or sell 15e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET * copies of the Software, and permit persons to whom the Software is 16e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET * furnished to do so, under the terms of the COPYING file. 17e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET * 18e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY 19e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET * KIND, either express or implied. 20e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET * 21e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET ***************************************************************************/ 228f1a214b8a21b66f33454790dfba97ae2f818289Alex Deymo/* <DESC> 238f1a214b8a21b66f33454790dfba97ae2f818289Alex Deymo * Stream-parse a document using the streaming Expat parser. 248f1a214b8a21b66f33454790dfba97ae2f818289Alex Deymo * </DESC> 258f1a214b8a21b66f33454790dfba97ae2f818289Alex Deymo */ 268f1a214b8a21b66f33454790dfba97ae2f818289Alex Deymo/* Written by David Strauss 27e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET * 28e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET * Expat => http://www.libexpat.org/ 29e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET * 30e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET * gcc -Wall -I/usr/local/include xmlstream.c -lcurl -lexpat -o xmlstream 31e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET * 32e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET */ 33e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET 34e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET#include <stdio.h> 35e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET#include <stdlib.h> 36e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET#include <string.h> 37e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET#include <assert.h> 38e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET 39e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET#include <expat.h> 40e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET#include <curl/curl.h> 41e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET 42e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNETstruct MemoryStruct { 43e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET char *memory; 44e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET size_t size; 45e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET}; 46e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET 47e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNETstruct ParserStruct { 48e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET int ok; 49e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET size_t tags; 50e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET size_t depth; 51e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET struct MemoryStruct characters; 52e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET}; 53e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET 548f1a214b8a21b66f33454790dfba97ae2f818289Alex Deymostatic void startElement(void *userData, const XML_Char *name, 558f1a214b8a21b66f33454790dfba97ae2f818289Alex Deymo const XML_Char **atts) 56e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET{ 57e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET struct ParserStruct *state = (struct ParserStruct *) userData; 58e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET state->tags++; 59e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET state->depth++; 60e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET 61e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET /* Get a clean slate for reading in character data. */ 62e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET free(state->characters.memory); 63e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET state->characters.memory = NULL; 64e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET state->characters.size = 0; 65e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET} 66e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET 67e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNETstatic void characterDataHandler(void *userData, const XML_Char *s, int len) 68e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET{ 69e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET struct ParserStruct *state = (struct ParserStruct *) userData; 70e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET struct MemoryStruct *mem = &state->characters; 71e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET 72e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET mem->memory = realloc(mem->memory, mem->size + len + 1); 73e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET if(mem->memory == NULL) { 74e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET /* Out of memory. */ 75e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET fprintf(stderr, "Not enough memory (realloc returned NULL).\n"); 76e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET state->ok = 0; 77e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET return; 78e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET } 79e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET 80e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET memcpy(&(mem->memory[mem->size]), s, len); 81e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET mem->size += len; 82e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET mem->memory[mem->size] = 0; 83e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET} 84e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET 85e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNETstatic void endElement(void *userData, const XML_Char *name) 86e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET{ 87e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET struct ParserStruct *state = (struct ParserStruct *) userData; 88e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET state->depth--; 89e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET 90e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET printf("%5lu %10lu %s\n", state->depth, state->characters.size, name); 91e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET} 92e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET 938f1a214b8a21b66f33454790dfba97ae2f818289Alex Deymostatic size_t parseStreamCallback(void *contents, size_t length, size_t nmemb, 948f1a214b8a21b66f33454790dfba97ae2f818289Alex Deymo void *userp) 95e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET{ 96e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET XML_Parser parser = (XML_Parser) userp; 97e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET size_t real_size = length * nmemb; 98e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET struct ParserStruct *state = (struct ParserStruct *) XML_GetUserData(parser); 99e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET 100e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET /* Only parse if we're not already in a failure state. */ 1018f1a214b8a21b66f33454790dfba97ae2f818289Alex Deymo if(state->ok && XML_Parse(parser, contents, real_size, 0) == 0) { 102e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET int error_code = XML_GetErrorCode(parser); 1038f1a214b8a21b66f33454790dfba97ae2f818289Alex Deymo fprintf(stderr, "Parsing response buffer of length %lu failed" 1048f1a214b8a21b66f33454790dfba97ae2f818289Alex Deymo " with error code %d (%s).\n", 105e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET real_size, error_code, XML_ErrorString(error_code)); 106e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET state->ok = 0; 107e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET } 108e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET 109e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET return real_size; 110e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET} 111e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET 112e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNETint main(void) 113e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET{ 114e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET CURL *curl_handle; 115e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET CURLcode res; 116e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET XML_Parser parser; 117e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET struct ParserStruct state; 118e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET 119e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET /* Initialize the state structure for parsing. */ 120e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET memset(&state, 0, sizeof(struct ParserStruct)); 121e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET state.ok = 1; 122e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET 123e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET /* Initialize a namespace-aware parser. */ 124e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET parser = XML_ParserCreateNS(NULL, '\0'); 125e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET XML_SetUserData(parser, &state); 126e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET XML_SetElementHandler(parser, startElement, endElement); 127e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET XML_SetCharacterDataHandler(parser, characterDataHandler); 128e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET 129e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET /* Initialize a libcurl handle. */ 130e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET curl_global_init(CURL_GLOBAL_ALL ^ CURL_GLOBAL_SSL); 131e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET curl_handle = curl_easy_init(); 1328f1a214b8a21b66f33454790dfba97ae2f818289Alex Deymo curl_easy_setopt(curl_handle, CURLOPT_URL, 1338f1a214b8a21b66f33454790dfba97ae2f818289Alex Deymo "http://www.w3schools.com/xml/simple.xml"); 134e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, parseStreamCallback); 135e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)parser); 136e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET 137e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET printf("Depth Characters Closing Tag\n"); 138e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET 139e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET /* Perform the request and any follow-up parsing. */ 140e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET res = curl_easy_perform(curl_handle); 141e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET if(res != CURLE_OK) { 142e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET fprintf(stderr, "curl_easy_perform() failed: %s\n", 143e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET curl_easy_strerror(res)); 144e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET } 1458f1a214b8a21b66f33454790dfba97ae2f818289Alex Deymo else if(state.ok) { 146e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET /* Expat requires one final call to finalize parsing. */ 1478f1a214b8a21b66f33454790dfba97ae2f818289Alex Deymo if(XML_Parse(parser, NULL, 0, 1) == 0) { 148e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET int error_code = XML_GetErrorCode(parser); 149e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET fprintf(stderr, "Finalizing parsing failed with error code %d (%s).\n", 150e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET error_code, XML_ErrorString(error_code)); 151e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET } 152e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET else { 153e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET printf(" --------------\n"); 154e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET printf(" %lu tags total\n", state.tags); 155e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET } 156e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET } 157e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET 158e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET /* Clean up. */ 159e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET free(state.characters.memory); 160e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET XML_ParserFree(parser); 161e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET curl_easy_cleanup(curl_handle); 162e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET curl_global_cleanup(); 163e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET 164e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET return 0; 165e6cd738ed3716c02557fb3a47515244e949ade39Bertrand SIMONNET} 166