1#include <string.h>
2#include "xmlmime.h"
3
4static const char *
5getTok(const char **pp)
6{
7  /* inComment means one level of nesting; inComment+1 means two levels etc */
8  enum { inAtom, inString, init, inComment };
9  int state = init;
10  const char *tokStart = 0;
11  for (;;) {
12    switch (**pp) {
13    case '\0':
14      if (state == inAtom)
15        return tokStart;
16      return 0;
17    case ' ':
18    case '\r':
19    case '\t':
20    case '\n':
21      if (state == inAtom)
22        return tokStart;
23      break;
24    case '(':
25      if (state == inAtom)
26        return tokStart;
27      if (state != inString)
28        state++;
29      break;
30    case ')':
31      if (state > init)
32        --state;
33      else if (state != inString)
34        return 0;
35      break;
36    case ';':
37    case '/':
38    case '=':
39      if (state == inAtom)
40        return tokStart;
41      if (state == init)
42        return (*pp)++;
43      break;
44    case '\\':
45      ++*pp;
46      if (**pp == '\0')
47        return 0;
48      break;
49    case '"':
50      switch (state) {
51      case inString:
52        ++*pp;
53        return tokStart;
54      case inAtom:
55        return tokStart;
56      case init:
57        tokStart = *pp;
58        state = inString;
59        break;
60      }
61      break;
62    default:
63      if (state == init) {
64        tokStart = *pp;
65        state = inAtom;
66      }
67      break;
68    }
69    ++*pp;
70  }
71  /* not reached */
72}
73
74/* key must be lowercase ASCII */
75
76static int
77matchkey(const char *start, const char *end, const char *key)
78{
79  if (!start)
80    return 0;
81  for (; start != end; start++, key++)
82    if (*start != *key && *start != 'A' + (*key - 'a'))
83      return 0;
84  return *key == '\0';
85}
86
87void
88getXMLCharset(const char *buf, char *charset)
89{
90  const char *next, *p;
91
92  charset[0] = '\0';
93  next = buf;
94  p = getTok(&next);
95  if (matchkey(p, next, "text"))
96    strcpy(charset, "us-ascii");
97  else if (!matchkey(p, next, "application"))
98    return;
99  p = getTok(&next);
100  if (!p || *p != '/')
101    return;
102  p = getTok(&next);
103#if 0
104  if (!matchkey(p, next, "xml") && charset[0] == '\0')
105    return;
106#endif
107  p = getTok(&next);
108  while (p) {
109    if (*p == ';') {
110      p = getTok(&next);
111      if (matchkey(p, next, "charset")) {
112        p = getTok(&next);
113        if (p && *p == '=') {
114          p = getTok(&next);
115          if (p) {
116            char *s = charset;
117            if (*p == '"') {
118              while (++p != next - 1) {
119                if (*p == '\\')
120                  ++p;
121                if (s == charset + CHARSET_MAX - 1) {
122                  charset[0] = '\0';
123                  break;
124                }
125                *s++ = *p;
126              }
127              *s++ = '\0';
128            }
129            else {
130              if (next - p > CHARSET_MAX - 1)
131                break;
132              while (p != next)
133                *s++ = *p++;
134              *s = 0;
135              break;
136            }
137          }
138        }
139        break;
140      }
141    }
142  else
143    p = getTok(&next);
144  }
145}
146
147#ifdef TEST
148
149#include <stdio.h>
150
151int
152main(int argc, char *argv[])
153{
154  char buf[CHARSET_MAX];
155  if (argc <= 1)
156    return 1;
157  printf("%s\n", argv[1]);
158  getXMLCharset(argv[1], buf);
159  printf("charset=\"%s\"\n", buf);
160  return 0;
161}
162
163#endif /* TEST */
164