1/*
2                            __  __            _
3                         ___\ \/ /_ __   __ _| |_
4                        / _ \\  /| '_ \ / _` | __|
5                       |  __//  \| |_) | (_| | |_
6                        \___/_/\_\ .__/ \__,_|\__|
7                                 |_| XML parser
8
9   Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10   Copyright (c) 2000-2017 Expat development team
11   Licensed under the MIT license:
12
13   Permission is  hereby granted,  free of charge,  to any  person obtaining
14   a  copy  of  this  software   and  associated  documentation  files  (the
15   "Software"),  to  deal in  the  Software  without restriction,  including
16   without  limitation the  rights  to use,  copy,  modify, merge,  publish,
17   distribute, sublicense, and/or sell copies of the Software, and to permit
18   persons  to whom  the Software  is  furnished to  do so,  subject to  the
19   following conditions:
20
21   The above copyright  notice and this permission notice  shall be included
22   in all copies or substantial portions of the Software.
23
24   THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
25   EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
26   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27   NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28   DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
29   OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30   USE OR OTHER DEALINGS IN THE SOFTWARE.
31*/
32
33#include <stdio.h>
34#include <stdlib.h>
35#include <stddef.h>
36#include <string.h>
37#include <fcntl.h>
38
39#ifdef _WIN32
40#include "winconfig.h"
41#elif defined(HAVE_EXPAT_CONFIG_H)
42#include <expat_config.h>
43#endif /* ndef _WIN32 */
44
45#include "expat.h"
46#include "internal.h"  /* for UNUSED_P only */
47#include "xmlfile.h"
48#include "xmltchar.h"
49#include "filemap.h"
50
51#if defined(_MSC_VER)
52#include <io.h>
53#endif
54
55#ifdef HAVE_UNISTD_H
56#include <unistd.h>
57#endif
58
59#ifndef O_BINARY
60#ifdef _O_BINARY
61#define O_BINARY _O_BINARY
62#else
63#define O_BINARY 0
64#endif
65#endif
66
67#ifdef _DEBUG
68#define READ_SIZE 16
69#else
70#define READ_SIZE (1024*8)
71#endif
72
73
74typedef struct {
75  XML_Parser parser;
76  int *retPtr;
77} PROCESS_ARGS;
78
79static int
80processStream(const XML_Char *filename, XML_Parser parser);
81
82static void
83reportError(XML_Parser parser, const XML_Char *filename)
84{
85  enum XML_Error code = XML_GetErrorCode(parser);
86  const XML_Char *message = XML_ErrorString(code);
87  if (message)
88    ftprintf(stdout,
89             T("%s")
90               T(":%") T(XML_FMT_INT_MOD) T("u")
91               T(":%") T(XML_FMT_INT_MOD) T("u")
92               T(": %s\n"),
93             filename,
94             XML_GetErrorLineNumber(parser),
95             XML_GetErrorColumnNumber(parser),
96             message);
97  else
98    ftprintf(stderr, T("%s: (unknown message %d)\n"), filename, code);
99}
100
101/* This implementation will give problems on files larger than INT_MAX. */
102static void
103processFile(const void *data, size_t size,
104            const XML_Char *filename, void *args)
105{
106  XML_Parser parser = ((PROCESS_ARGS *)args)->parser;
107  int *retPtr = ((PROCESS_ARGS *)args)->retPtr;
108  if (XML_Parse(parser, (const char *)data, (int)size, 1) == XML_STATUS_ERROR) {
109    reportError(parser, filename);
110    *retPtr = 0;
111  }
112  else
113    *retPtr = 1;
114}
115
116#if defined(_WIN32)
117
118static int
119isAsciiLetter(XML_Char c)
120{
121  return (T('a') <= c && c <= T('z')) || (T('A') <= c && c <= T('Z'));
122}
123
124#endif /* _WIN32 */
125
126static const XML_Char *
127resolveSystemId(const XML_Char *base, const XML_Char *systemId,
128                XML_Char **toFree)
129{
130  XML_Char *s;
131  *toFree = 0;
132  if (!base
133      || *systemId == T('/')
134#if defined(_WIN32)
135      || *systemId == T('\\')
136      || (isAsciiLetter(systemId[0]) && systemId[1] == T(':'))
137#endif
138     )
139    return systemId;
140  *toFree = (XML_Char *)malloc((tcslen(base) + tcslen(systemId) + 2)
141                               * sizeof(XML_Char));
142  if (!*toFree)
143    return systemId;
144  tcscpy(*toFree, base);
145  s = *toFree;
146  if (tcsrchr(s, T('/')))
147    s = tcsrchr(s, T('/')) + 1;
148#if defined(_WIN32)
149  if (tcsrchr(s, T('\\')))
150    s = tcsrchr(s, T('\\')) + 1;
151#endif
152  tcscpy(s, systemId);
153  return *toFree;
154}
155
156static int
157externalEntityRefFilemap(XML_Parser parser,
158                         const XML_Char *context,
159                         const XML_Char *base,
160                         const XML_Char *systemId,
161                         const XML_Char *UNUSED_P(publicId))
162{
163  int result;
164  XML_Char *s;
165  const XML_Char *filename;
166  XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
167  int filemapRes;
168  PROCESS_ARGS args;
169  args.retPtr = &result;
170  args.parser = entParser;
171  filename = resolveSystemId(base, systemId, &s);
172  XML_SetBase(entParser, filename);
173  filemapRes = filemap(filename, processFile, &args);
174  switch (filemapRes) {
175  case 0:
176    result = 0;
177    break;
178  case 2:
179    ftprintf(stderr, T("%s: file too large for memory-mapping")
180        T(", switching to streaming\n"), filename);
181    result = processStream(filename, entParser);
182    break;
183  }
184  free(s);
185  XML_ParserFree(entParser);
186  return result;
187}
188
189static int
190processStream(const XML_Char *filename, XML_Parser parser)
191{
192  /* passing NULL for filename means read intput from stdin */
193  int fd = 0;   /* 0 is the fileno for stdin */
194
195  if (filename != NULL) {
196    fd = topen(filename, O_BINARY|O_RDONLY);
197    if (fd < 0) {
198      tperror(filename);
199      return 0;
200    }
201  }
202  for (;;) {
203    int nread;
204    char *buf = (char *)XML_GetBuffer(parser, READ_SIZE);
205    if (!buf) {
206      if (filename != NULL)
207        close(fd);
208      ftprintf(stderr, T("%s: out of memory\n"),
209               filename != NULL ? filename : T("xmlwf"));
210      return 0;
211    }
212    nread = read(fd, buf, READ_SIZE);
213    if (nread < 0) {
214      tperror(filename != NULL ? filename : T("STDIN"));
215      if (filename != NULL)
216        close(fd);
217      return 0;
218    }
219    if (XML_ParseBuffer(parser, nread, nread == 0) == XML_STATUS_ERROR) {
220        reportError(parser, filename != NULL ? filename : T("STDIN"));
221      if (filename != NULL)
222        close(fd);
223      return 0;
224    }
225    if (nread == 0) {
226      if (filename != NULL)
227        close(fd);
228      break;;
229    }
230  }
231  return 1;
232}
233
234static int
235externalEntityRefStream(XML_Parser parser,
236                        const XML_Char *context,
237                        const XML_Char *base,
238                        const XML_Char *systemId,
239                        const XML_Char *UNUSED_P(publicId))
240{
241  XML_Char *s;
242  const XML_Char *filename;
243  int ret;
244  XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
245  filename = resolveSystemId(base, systemId, &s);
246  XML_SetBase(entParser, filename);
247  ret = processStream(filename, entParser);
248  free(s);
249  XML_ParserFree(entParser);
250  return ret;
251}
252
253int
254XML_ProcessFile(XML_Parser parser,
255                const XML_Char *filename,
256                unsigned flags)
257{
258  int result;
259
260  if (!XML_SetBase(parser, filename)) {
261    ftprintf(stderr, T("%s: out of memory"), filename);
262    exit(1);
263  }
264
265  if (flags & XML_EXTERNAL_ENTITIES)
266      XML_SetExternalEntityRefHandler(parser,
267                                      (flags & XML_MAP_FILE)
268                                      ? externalEntityRefFilemap
269                                      : externalEntityRefStream);
270  if (flags & XML_MAP_FILE) {
271    int filemapRes;
272    PROCESS_ARGS args;
273    args.retPtr = &result;
274    args.parser = parser;
275    filemapRes = filemap(filename, processFile, &args);
276    switch (filemapRes) {
277    case 0:
278      result = 0;
279      break;
280    case 2:
281      ftprintf(stderr, T("%s: file too large for memory-mapping")
282          T(", switching to streaming\n"), filename);
283      result = processStream(filename, parser);
284      break;
285    }
286  }
287  else
288    result = processStream(filename, parser);
289  return result;
290}
291