1// Amalgamated source file
2#include "upb.h"
3
4
5#include <stdlib.h>
6#include <string.h>
7
8typedef struct {
9  size_t len;
10  char str[1];  /* Null-terminated string data follows. */
11} str_t;
12
13static str_t *newstr(const char *data, size_t len) {
14  str_t *ret = malloc(sizeof(*ret) + len);
15  if (!ret) return NULL;
16  ret->len = len;
17  memcpy(ret->str, data, len);
18  ret->str[len] = '\0';
19  return ret;
20}
21
22static void freestr(str_t *s) { free(s); }
23
24/* isalpha() etc. from <ctype.h> are locale-dependent, which we don't want. */
25static bool upb_isbetween(char c, char low, char high) {
26  return c >= low && c <= high;
27}
28
29static bool upb_isletter(char c) {
30  return upb_isbetween(c, 'A', 'Z') || upb_isbetween(c, 'a', 'z') || c == '_';
31}
32
33static bool upb_isalphanum(char c) {
34  return upb_isletter(c) || upb_isbetween(c, '0', '9');
35}
36
37static bool upb_isident(const char *str, size_t len, bool full, upb_status *s) {
38  bool start = true;
39  size_t i;
40  for (i = 0; i < len; i++) {
41    char c = str[i];
42    if (c == '.') {
43      if (start || !full) {
44        upb_status_seterrf(s, "invalid name: unexpected '.' (%s)", str);
45        return false;
46      }
47      start = true;
48    } else if (start) {
49      if (!upb_isletter(c)) {
50        upb_status_seterrf(
51            s, "invalid name: path components must start with a letter (%s)",
52            str);
53        return false;
54      }
55      start = false;
56    } else {
57      if (!upb_isalphanum(c)) {
58        upb_status_seterrf(s, "invalid name: non-alphanumeric character (%s)",
59                           str);
60        return false;
61      }
62    }
63  }
64  return !start;
65}
66
67
68/* upb_def ********************************************************************/
69
70upb_deftype_t upb_def_type(const upb_def *d) { return d->type; }
71
72const char *upb_def_fullname(const upb_def *d) { return d->fullname; }
73
74bool upb_def_setfullname(upb_def *def, const char *fullname, upb_status *s) {
75  assert(!upb_def_isfrozen(def));
76  if (!upb_isident(fullname, strlen(fullname), true, s)) return false;
77  free((void*)def->fullname);
78  def->fullname = upb_strdup(fullname);
79  return true;
80}
81
82upb_def *upb_def_dup(const upb_def *def, const void *o) {
83  switch (def->type) {
84    case UPB_DEF_MSG:
85      return upb_msgdef_upcast_mutable(
86          upb_msgdef_dup(upb_downcast_msgdef(def), o));
87    case UPB_DEF_FIELD:
88      return upb_fielddef_upcast_mutable(
89          upb_fielddef_dup(upb_downcast_fielddef(def), o));
90    case UPB_DEF_ENUM:
91      return upb_enumdef_upcast_mutable(
92          upb_enumdef_dup(upb_downcast_enumdef(def), o));
93    default: assert(false); return NULL;
94  }
95}
96
97static bool upb_def_init(upb_def *def, upb_deftype_t type,
98                         const struct upb_refcounted_vtbl *vtbl,
99                         const void *owner) {
100  if (!upb_refcounted_init(upb_def_upcast_mutable(def), vtbl, owner)) return false;
101  def->type = type;
102  def->fullname = NULL;
103  def->came_from_user = false;
104  return true;
105}
106
107static void upb_def_uninit(upb_def *def) {
108  free((void*)def->fullname);
109}
110
111static const char *msgdef_name(const upb_msgdef *m) {
112  const char *name = upb_def_fullname(upb_msgdef_upcast(m));
113  return name ? name : "(anonymous)";
114}
115
116static bool upb_validate_field(upb_fielddef *f, upb_status *s) {
117  if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
118    upb_status_seterrmsg(s, "fielddef must have name and number set");
119    return false;
120  }
121
122  if (!f->type_is_set_) {
123    upb_status_seterrmsg(s, "fielddef type was not initialized");
124    return false;
125  }
126
127  if (upb_fielddef_lazy(f) &&
128      upb_fielddef_descriptortype(f) != UPB_DESCRIPTOR_TYPE_MESSAGE) {
129    upb_status_seterrmsg(s,
130                         "only length-delimited submessage fields may be lazy");
131    return false;
132  }
133
134  if (upb_fielddef_hassubdef(f)) {
135    const upb_def *subdef;
136
137    if (f->subdef_is_symbolic) {
138      upb_status_seterrf(s, "field '%s.%s' has not been resolved",
139                         msgdef_name(f->msg.def), upb_fielddef_name(f));
140      return false;
141    }
142
143    subdef = upb_fielddef_subdef(f);
144    if (subdef == NULL) {
145      upb_status_seterrf(s, "field %s.%s is missing required subdef",
146                         msgdef_name(f->msg.def), upb_fielddef_name(f));
147      return false;
148    }
149
150    if (!upb_def_isfrozen(subdef) && !subdef->came_from_user) {
151      upb_status_seterrf(s,
152                         "subdef of field %s.%s is not frozen or being frozen",
153                         msgdef_name(f->msg.def), upb_fielddef_name(f));
154      return false;
155    }
156  }
157
158  if (upb_fielddef_type(f) == UPB_TYPE_ENUM) {
159    bool has_default_name = upb_fielddef_enumhasdefaultstr(f);
160    bool has_default_number = upb_fielddef_enumhasdefaultint32(f);
161
162    /* Previously verified by upb_validate_enumdef(). */
163    assert(upb_enumdef_numvals(upb_fielddef_enumsubdef(f)) > 0);
164
165    /* We've already validated that we have an associated enumdef and that it
166     * has at least one member, so at least one of these should be true.
167     * Because if the user didn't set anything, we'll pick up the enum's
168     * default, but if the user *did* set something we should at least pick up
169     * the one they set (int32 or string). */
170    assert(has_default_name || has_default_number);
171
172    if (!has_default_name) {
173      upb_status_seterrf(s,
174                         "enum default for field %s.%s (%d) is not in the enum",
175                         msgdef_name(f->msg.def), upb_fielddef_name(f),
176                         upb_fielddef_defaultint32(f));
177      return false;
178    }
179
180    if (!has_default_number) {
181      upb_status_seterrf(s,
182                         "enum default for field %s.%s (%s) is not in the enum",
183                         msgdef_name(f->msg.def), upb_fielddef_name(f),
184                         upb_fielddef_defaultstr(f, NULL));
185      return false;
186    }
187
188    /* Lift the effective numeric default into the field's default slot, in case
189     * we were only getting it "by reference" from the enumdef. */
190    upb_fielddef_setdefaultint32(f, upb_fielddef_defaultint32(f));
191  }
192
193  /* Ensure that MapEntry submessages only appear as repeated fields, not
194   * optional/required (singular) fields. */
195  if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
196      upb_fielddef_msgsubdef(f) != NULL) {
197    const upb_msgdef *subdef = upb_fielddef_msgsubdef(f);
198    if (upb_msgdef_mapentry(subdef) && !upb_fielddef_isseq(f)) {
199      upb_status_seterrf(s,
200                         "Field %s refers to mapentry message but is not "
201                         "a repeated field",
202                         upb_fielddef_name(f) ? upb_fielddef_name(f) :
203                         "(unnamed)");
204      return false;
205    }
206  }
207
208  return true;
209}
210
211static bool upb_validate_enumdef(const upb_enumdef *e, upb_status *s) {
212  if (upb_enumdef_numvals(e) == 0) {
213    upb_status_seterrf(s, "enum %s has no members (must have at least one)",
214                       upb_enumdef_fullname(e));
215    return false;
216  }
217
218  return true;
219}
220
221/* All submessage fields are lower than all other fields.
222 * Secondly, fields are increasing in order. */
223uint32_t field_rank(const upb_fielddef *f) {
224  uint32_t ret = upb_fielddef_number(f);
225  const uint32_t high_bit = 1 << 30;
226  assert(ret < high_bit);
227  if (!upb_fielddef_issubmsg(f))
228    ret |= high_bit;
229  return ret;
230}
231
232int cmp_fields(const void *p1, const void *p2) {
233  const upb_fielddef *f1 = *(upb_fielddef*const*)p1;
234  const upb_fielddef *f2 = *(upb_fielddef*const*)p2;
235  return field_rank(f1) - field_rank(f2);
236}
237
238static bool assign_msg_indices(upb_msgdef *m, upb_status *s) {
239  /* Sort fields.  upb internally relies on UPB_TYPE_MESSAGE fields having the
240   * lowest indexes, but we do not publicly guarantee this. */
241  upb_msg_field_iter j;
242  int i;
243  uint32_t selector;
244  int n = upb_msgdef_numfields(m);
245  upb_fielddef **fields = malloc(n * sizeof(*fields));
246  if (!fields) return false;
247
248  m->submsg_field_count = 0;
249  for(i = 0, upb_msg_field_begin(&j, m);
250      !upb_msg_field_done(&j);
251      upb_msg_field_next(&j), i++) {
252    upb_fielddef *f = upb_msg_iter_field(&j);
253    assert(f->msg.def == m);
254    if (!upb_validate_field(f, s)) {
255      free(fields);
256      return false;
257    }
258    if (upb_fielddef_issubmsg(f)) {
259      m->submsg_field_count++;
260    }
261    fields[i] = f;
262  }
263
264  qsort(fields, n, sizeof(*fields), cmp_fields);
265
266  selector = UPB_STATIC_SELECTOR_COUNT + m->submsg_field_count;
267  for (i = 0; i < n; i++) {
268    upb_fielddef *f = fields[i];
269    f->index_ = i;
270    f->selector_base = selector + upb_handlers_selectorbaseoffset(f);
271    selector += upb_handlers_selectorcount(f);
272  }
273  m->selector_count = selector;
274
275#ifndef NDEBUG
276  {
277    /* Verify that all selectors for the message are distinct. */
278#define TRY(type) \
279    if (upb_handlers_getselector(f, type, &sel)) upb_inttable_insert(&t, sel, v);
280
281    upb_inttable t;
282    upb_value v;
283    upb_selector_t sel;
284
285    upb_inttable_init(&t, UPB_CTYPE_BOOL);
286    v = upb_value_bool(true);
287    upb_inttable_insert(&t, UPB_STARTMSG_SELECTOR, v);
288    upb_inttable_insert(&t, UPB_ENDMSG_SELECTOR, v);
289    for(upb_msg_field_begin(&j, m);
290        !upb_msg_field_done(&j);
291        upb_msg_field_next(&j)) {
292      upb_fielddef *f = upb_msg_iter_field(&j);
293      /* These calls will assert-fail in upb_table if the value already
294       * exists. */
295      TRY(UPB_HANDLER_INT32);
296      TRY(UPB_HANDLER_INT64)
297      TRY(UPB_HANDLER_UINT32)
298      TRY(UPB_HANDLER_UINT64)
299      TRY(UPB_HANDLER_FLOAT)
300      TRY(UPB_HANDLER_DOUBLE)
301      TRY(UPB_HANDLER_BOOL)
302      TRY(UPB_HANDLER_STARTSTR)
303      TRY(UPB_HANDLER_STRING)
304      TRY(UPB_HANDLER_ENDSTR)
305      TRY(UPB_HANDLER_STARTSUBMSG)
306      TRY(UPB_HANDLER_ENDSUBMSG)
307      TRY(UPB_HANDLER_STARTSEQ)
308      TRY(UPB_HANDLER_ENDSEQ)
309    }
310    upb_inttable_uninit(&t);
311  }
312#undef TRY
313#endif
314
315  free(fields);
316  return true;
317}
318
319bool upb_def_freeze(upb_def *const* defs, int n, upb_status *s) {
320  int i;
321  int maxdepth;
322  bool ret;
323  upb_status_clear(s);
324
325  /* First perform validation, in two passes so we can check that we have a
326   * transitive closure without needing to search. */
327  for (i = 0; i < n; i++) {
328    upb_def *def = defs[i];
329    if (upb_def_isfrozen(def)) {
330      /* Could relax this requirement if it's annoying. */
331      upb_status_seterrmsg(s, "def is already frozen");
332      goto err;
333    } else if (def->type == UPB_DEF_FIELD) {
334      upb_status_seterrmsg(s, "standalone fielddefs can not be frozen");
335      goto err;
336    } else if (def->type == UPB_DEF_ENUM) {
337      if (!upb_validate_enumdef(upb_dyncast_enumdef(def), s)) {
338        goto err;
339      }
340    } else {
341      /* Set now to detect transitive closure in the second pass. */
342      def->came_from_user = true;
343    }
344  }
345
346  /* Second pass of validation.  Also assign selector bases and indexes, and
347   * compact tables. */
348  for (i = 0; i < n; i++) {
349    upb_msgdef *m = upb_dyncast_msgdef_mutable(defs[i]);
350    upb_enumdef *e = upb_dyncast_enumdef_mutable(defs[i]);
351    if (m) {
352      upb_inttable_compact(&m->itof);
353      if (!assign_msg_indices(m, s)) {
354        goto err;
355      }
356    } else if (e) {
357      upb_inttable_compact(&e->iton);
358    }
359  }
360
361  /* Def graph contains FieldDefs between each MessageDef, so double the
362   * limit. */
363  maxdepth = UPB_MAX_MESSAGE_DEPTH * 2;
364
365  /* Validation all passed; freeze the defs. */
366  ret = upb_refcounted_freeze((upb_refcounted * const *)defs, n, s, maxdepth);
367  assert(!(s && ret != upb_ok(s)));
368  return ret;
369
370err:
371  for (i = 0; i < n; i++) {
372    defs[i]->came_from_user = false;
373  }
374  assert(!(s && upb_ok(s)));
375  return false;
376}
377
378
379/* upb_enumdef ****************************************************************/
380
381static void upb_enumdef_free(upb_refcounted *r) {
382  upb_enumdef *e = (upb_enumdef*)r;
383  upb_inttable_iter i;
384  upb_inttable_begin(&i, &e->iton);
385  for( ; !upb_inttable_done(&i); upb_inttable_next(&i)) {
386    /* To clean up the upb_strdup() from upb_enumdef_addval(). */
387    free(upb_value_getcstr(upb_inttable_iter_value(&i)));
388  }
389  upb_strtable_uninit(&e->ntoi);
390  upb_inttable_uninit(&e->iton);
391  upb_def_uninit(upb_enumdef_upcast_mutable(e));
392  free(e);
393}
394
395upb_enumdef *upb_enumdef_new(const void *owner) {
396  static const struct upb_refcounted_vtbl vtbl = {NULL, &upb_enumdef_free};
397  upb_enumdef *e = malloc(sizeof(*e));
398  if (!e) return NULL;
399  if (!upb_def_init(upb_enumdef_upcast_mutable(e), UPB_DEF_ENUM, &vtbl, owner))
400    goto err2;
401  if (!upb_strtable_init(&e->ntoi, UPB_CTYPE_INT32)) goto err2;
402  if (!upb_inttable_init(&e->iton, UPB_CTYPE_CSTR)) goto err1;
403  return e;
404
405err1:
406  upb_strtable_uninit(&e->ntoi);
407err2:
408  free(e);
409  return NULL;
410}
411
412upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, const void *owner) {
413  upb_enum_iter i;
414  upb_enumdef *new_e = upb_enumdef_new(owner);
415  if (!new_e) return NULL;
416  for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) {
417    bool success = upb_enumdef_addval(
418        new_e, upb_enum_iter_name(&i),upb_enum_iter_number(&i), NULL);
419    if (!success) {
420      upb_enumdef_unref(new_e, owner);
421      return NULL;
422    }
423  }
424  return new_e;
425}
426
427bool upb_enumdef_freeze(upb_enumdef *e, upb_status *status) {
428  upb_def *d = upb_enumdef_upcast_mutable(e);
429  return upb_def_freeze(&d, 1, status);
430}
431
432const char *upb_enumdef_fullname(const upb_enumdef *e) {
433  return upb_def_fullname(upb_enumdef_upcast(e));
434}
435
436bool upb_enumdef_setfullname(upb_enumdef *e, const char *fullname,
437                             upb_status *s) {
438  return upb_def_setfullname(upb_enumdef_upcast_mutable(e), fullname, s);
439}
440
441bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num,
442                        upb_status *status) {
443  if (!upb_isident(name, strlen(name), false, status)) {
444    return false;
445  }
446  if (upb_enumdef_ntoiz(e, name, NULL)) {
447    upb_status_seterrf(status, "name '%s' is already defined", name);
448    return false;
449  }
450  if (!upb_strtable_insert(&e->ntoi, name, upb_value_int32(num))) {
451    upb_status_seterrmsg(status, "out of memory");
452    return false;
453  }
454  if (!upb_inttable_lookup(&e->iton, num, NULL) &&
455      !upb_inttable_insert(&e->iton, num, upb_value_cstr(upb_strdup(name)))) {
456    upb_status_seterrmsg(status, "out of memory");
457    upb_strtable_remove(&e->ntoi, name, NULL);
458    return false;
459  }
460  if (upb_enumdef_numvals(e) == 1) {
461    bool ok = upb_enumdef_setdefault(e, num, NULL);
462    UPB_ASSERT_VAR(ok, ok);
463  }
464  return true;
465}
466
467int32_t upb_enumdef_default(const upb_enumdef *e) {
468  assert(upb_enumdef_iton(e, e->defaultval));
469  return e->defaultval;
470}
471
472bool upb_enumdef_setdefault(upb_enumdef *e, int32_t val, upb_status *s) {
473  assert(!upb_enumdef_isfrozen(e));
474  if (!upb_enumdef_iton(e, val)) {
475    upb_status_seterrf(s, "number '%d' is not in the enum.", val);
476    return false;
477  }
478  e->defaultval = val;
479  return true;
480}
481
482int upb_enumdef_numvals(const upb_enumdef *e) {
483  return upb_strtable_count(&e->ntoi);
484}
485
486void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) {
487  /* We iterate over the ntoi table, to account for duplicate numbers. */
488  upb_strtable_begin(i, &e->ntoi);
489}
490
491void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); }
492bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); }
493
494bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name,
495                      size_t len, int32_t *num) {
496  upb_value v;
497  if (!upb_strtable_lookup2(&def->ntoi, name, len, &v)) {
498    return false;
499  }
500  if (num) *num = upb_value_getint32(v);
501  return true;
502}
503
504const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) {
505  upb_value v;
506  return upb_inttable_lookup32(&def->iton, num, &v) ?
507      upb_value_getcstr(v) : NULL;
508}
509
510const char *upb_enum_iter_name(upb_enum_iter *iter) {
511  return upb_strtable_iter_key(iter);
512}
513
514int32_t upb_enum_iter_number(upb_enum_iter *iter) {
515  return upb_value_getint32(upb_strtable_iter_value(iter));
516}
517
518
519/* upb_fielddef ***************************************************************/
520
521static void upb_fielddef_init_default(upb_fielddef *f);
522
523static void upb_fielddef_uninit_default(upb_fielddef *f) {
524  if (f->type_is_set_ && f->default_is_string && f->defaultval.bytes)
525    freestr(f->defaultval.bytes);
526}
527
528static void visitfield(const upb_refcounted *r, upb_refcounted_visit *visit,
529                       void *closure) {
530  const upb_fielddef *f = (const upb_fielddef*)r;
531  if (upb_fielddef_containingtype(f)) {
532    visit(r, upb_msgdef_upcast2(upb_fielddef_containingtype(f)), closure);
533  }
534  if (upb_fielddef_containingoneof(f)) {
535    visit(r, upb_oneofdef_upcast2(upb_fielddef_containingoneof(f)), closure);
536  }
537  if (upb_fielddef_subdef(f)) {
538    visit(r, upb_def_upcast(upb_fielddef_subdef(f)), closure);
539  }
540}
541
542static void freefield(upb_refcounted *r) {
543  upb_fielddef *f = (upb_fielddef*)r;
544  upb_fielddef_uninit_default(f);
545  if (f->subdef_is_symbolic)
546    free(f->sub.name);
547  upb_def_uninit(upb_fielddef_upcast_mutable(f));
548  free(f);
549}
550
551static const char *enumdefaultstr(const upb_fielddef *f) {
552  const upb_enumdef *e;
553  assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
554  e = upb_fielddef_enumsubdef(f);
555  if (f->default_is_string && f->defaultval.bytes) {
556    /* Default was explicitly set as a string. */
557    str_t *s = f->defaultval.bytes;
558    return s->str;
559  } else if (e) {
560    if (!f->default_is_string) {
561      /* Default was explicitly set as an integer; look it up in enumdef. */
562      const char *name = upb_enumdef_iton(e, f->defaultval.sint);
563      if (name) {
564        return name;
565      }
566    } else {
567      /* Default is completely unset; pull enumdef default. */
568      if (upb_enumdef_numvals(e) > 0) {
569        const char *name = upb_enumdef_iton(e, upb_enumdef_default(e));
570        assert(name);
571        return name;
572      }
573    }
574  }
575  return NULL;
576}
577
578static bool enumdefaultint32(const upb_fielddef *f, int32_t *val) {
579  const upb_enumdef *e;
580  assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
581  e = upb_fielddef_enumsubdef(f);
582  if (!f->default_is_string) {
583    /* Default was explicitly set as an integer. */
584    *val = f->defaultval.sint;
585    return true;
586  } else if (e) {
587    if (f->defaultval.bytes) {
588      /* Default was explicitly set as a str; try to lookup corresponding int. */
589      str_t *s = f->defaultval.bytes;
590      if (upb_enumdef_ntoiz(e, s->str, val)) {
591        return true;
592      }
593    } else {
594      /* Default is unset; try to pull in enumdef default. */
595      if (upb_enumdef_numvals(e) > 0) {
596        *val = upb_enumdef_default(e);
597        return true;
598      }
599    }
600  }
601  return false;
602}
603
604upb_fielddef *upb_fielddef_new(const void *o) {
605  static const struct upb_refcounted_vtbl vtbl = {visitfield, freefield};
606  upb_fielddef *f = malloc(sizeof(*f));
607  if (!f) return NULL;
608  if (!upb_def_init(upb_fielddef_upcast_mutable(f), UPB_DEF_FIELD, &vtbl, o)) {
609    free(f);
610    return NULL;
611  }
612  f->msg.def = NULL;
613  f->sub.def = NULL;
614  f->oneof = NULL;
615  f->subdef_is_symbolic = false;
616  f->msg_is_symbolic = false;
617  f->label_ = UPB_LABEL_OPTIONAL;
618  f->type_ = UPB_TYPE_INT32;
619  f->number_ = 0;
620  f->type_is_set_ = false;
621  f->tagdelim = false;
622  f->is_extension_ = false;
623  f->lazy_ = false;
624  f->packed_ = true;
625
626  /* For the moment we default this to UPB_INTFMT_VARIABLE, since it will work
627   * with all integer types and is in some since more "default" since the most
628   * normal-looking proto2 types int32/int64/uint32/uint64 use variable.
629   *
630   * Other options to consider:
631   * - there is no default; users must set this manually (like type).
632   * - default signed integers to UPB_INTFMT_ZIGZAG, since it's more likely to
633   *   be an optimal default for signed integers. */
634  f->intfmt = UPB_INTFMT_VARIABLE;
635  return f;
636}
637
638upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, const void *owner) {
639  const char *srcname;
640  upb_fielddef *newf = upb_fielddef_new(owner);
641  if (!newf) return NULL;
642  upb_fielddef_settype(newf, upb_fielddef_type(f));
643  upb_fielddef_setlabel(newf, upb_fielddef_label(f));
644  upb_fielddef_setnumber(newf, upb_fielddef_number(f), NULL);
645  upb_fielddef_setname(newf, upb_fielddef_name(f), NULL);
646  if (f->default_is_string && f->defaultval.bytes) {
647    str_t *s = f->defaultval.bytes;
648    upb_fielddef_setdefaultstr(newf, s->str, s->len, NULL);
649  } else {
650    newf->default_is_string = f->default_is_string;
651    newf->defaultval = f->defaultval;
652  }
653
654  if (f->subdef_is_symbolic) {
655    srcname = f->sub.name;  /* Might be NULL. */
656  } else {
657    srcname = f->sub.def ? upb_def_fullname(f->sub.def) : NULL;
658  }
659  if (srcname) {
660    char *newname = malloc(strlen(f->sub.def->fullname) + 2);
661    if (!newname) {
662      upb_fielddef_unref(newf, owner);
663      return NULL;
664    }
665    strcpy(newname, ".");
666    strcat(newname, f->sub.def->fullname);
667    upb_fielddef_setsubdefname(newf, newname, NULL);
668    free(newname);
669  }
670
671  return newf;
672}
673
674bool upb_fielddef_typeisset(const upb_fielddef *f) {
675  return f->type_is_set_;
676}
677
678upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f) {
679  assert(f->type_is_set_);
680  return f->type_;
681}
682
683uint32_t upb_fielddef_index(const upb_fielddef *f) {
684  return f->index_;
685}
686
687upb_label_t upb_fielddef_label(const upb_fielddef *f) {
688  return f->label_;
689}
690
691upb_intfmt_t upb_fielddef_intfmt(const upb_fielddef *f) {
692  return f->intfmt;
693}
694
695bool upb_fielddef_istagdelim(const upb_fielddef *f) {
696  return f->tagdelim;
697}
698
699uint32_t upb_fielddef_number(const upb_fielddef *f) {
700  return f->number_;
701}
702
703bool upb_fielddef_isextension(const upb_fielddef *f) {
704  return f->is_extension_;
705}
706
707bool upb_fielddef_lazy(const upb_fielddef *f) {
708  return f->lazy_;
709}
710
711bool upb_fielddef_packed(const upb_fielddef *f) {
712  return f->packed_;
713}
714
715const char *upb_fielddef_name(const upb_fielddef *f) {
716  return upb_def_fullname(upb_fielddef_upcast(f));
717}
718
719const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f) {
720  return f->msg_is_symbolic ? NULL : f->msg.def;
721}
722
723const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f) {
724  return f->oneof;
725}
726
727upb_msgdef *upb_fielddef_containingtype_mutable(upb_fielddef *f) {
728  return (upb_msgdef*)upb_fielddef_containingtype(f);
729}
730
731const char *upb_fielddef_containingtypename(upb_fielddef *f) {
732  return f->msg_is_symbolic ? f->msg.name : NULL;
733}
734
735static void release_containingtype(upb_fielddef *f) {
736  if (f->msg_is_symbolic) free(f->msg.name);
737}
738
739bool upb_fielddef_setcontainingtypename(upb_fielddef *f, const char *name,
740                                        upb_status *s) {
741  assert(!upb_fielddef_isfrozen(f));
742  if (upb_fielddef_containingtype(f)) {
743    upb_status_seterrmsg(s, "field has already been added to a message.");
744    return false;
745  }
746  /* TODO: validate name (upb_isident() doesn't quite work atm because this name
747   * may have a leading "."). */
748  release_containingtype(f);
749  f->msg.name = upb_strdup(name);
750  f->msg_is_symbolic = true;
751  return true;
752}
753
754bool upb_fielddef_setname(upb_fielddef *f, const char *name, upb_status *s) {
755  if (upb_fielddef_containingtype(f) || upb_fielddef_containingoneof(f)) {
756    upb_status_seterrmsg(s, "Already added to message or oneof");
757    return false;
758  }
759  return upb_def_setfullname(upb_fielddef_upcast_mutable(f), name, s);
760}
761
762static void chkdefaulttype(const upb_fielddef *f, upb_fieldtype_t type) {
763  UPB_UNUSED(f);
764  UPB_UNUSED(type);
765  assert(f->type_is_set_ && upb_fielddef_type(f) == type);
766}
767
768int64_t upb_fielddef_defaultint64(const upb_fielddef *f) {
769  chkdefaulttype(f, UPB_TYPE_INT64);
770  return f->defaultval.sint;
771}
772
773int32_t upb_fielddef_defaultint32(const upb_fielddef *f) {
774  if (f->type_is_set_ && upb_fielddef_type(f) == UPB_TYPE_ENUM) {
775    int32_t val;
776    bool ok = enumdefaultint32(f, &val);
777    UPB_ASSERT_VAR(ok, ok);
778    return val;
779  } else {
780    chkdefaulttype(f, UPB_TYPE_INT32);
781    return f->defaultval.sint;
782  }
783}
784
785uint64_t upb_fielddef_defaultuint64(const upb_fielddef *f) {
786  chkdefaulttype(f, UPB_TYPE_UINT64);
787  return f->defaultval.uint;
788}
789
790uint32_t upb_fielddef_defaultuint32(const upb_fielddef *f) {
791  chkdefaulttype(f, UPB_TYPE_UINT32);
792  return f->defaultval.uint;
793}
794
795bool upb_fielddef_defaultbool(const upb_fielddef *f) {
796  chkdefaulttype(f, UPB_TYPE_BOOL);
797  return f->defaultval.uint;
798}
799
800float upb_fielddef_defaultfloat(const upb_fielddef *f) {
801  chkdefaulttype(f, UPB_TYPE_FLOAT);
802  return f->defaultval.flt;
803}
804
805double upb_fielddef_defaultdouble(const upb_fielddef *f) {
806  chkdefaulttype(f, UPB_TYPE_DOUBLE);
807  return f->defaultval.dbl;
808}
809
810const char *upb_fielddef_defaultstr(const upb_fielddef *f, size_t *len) {
811  assert(f->type_is_set_);
812  assert(upb_fielddef_type(f) == UPB_TYPE_STRING ||
813         upb_fielddef_type(f) == UPB_TYPE_BYTES ||
814         upb_fielddef_type(f) == UPB_TYPE_ENUM);
815
816  if (upb_fielddef_type(f) == UPB_TYPE_ENUM) {
817    const char *ret = enumdefaultstr(f);
818    assert(ret);
819    /* Enum defaults can't have embedded NULLs. */
820    if (len) *len = strlen(ret);
821    return ret;
822  }
823
824  if (f->default_is_string) {
825    str_t *str = f->defaultval.bytes;
826    if (len) *len = str->len;
827    return str->str;
828  }
829
830  return NULL;
831}
832
833static void upb_fielddef_init_default(upb_fielddef *f) {
834  f->default_is_string = false;
835  switch (upb_fielddef_type(f)) {
836    case UPB_TYPE_DOUBLE: f->defaultval.dbl = 0; break;
837    case UPB_TYPE_FLOAT: f->defaultval.flt = 0; break;
838    case UPB_TYPE_INT32:
839    case UPB_TYPE_INT64: f->defaultval.sint = 0; break;
840    case UPB_TYPE_UINT64:
841    case UPB_TYPE_UINT32:
842    case UPB_TYPE_BOOL: f->defaultval.uint = 0; break;
843    case UPB_TYPE_STRING:
844    case UPB_TYPE_BYTES:
845      f->defaultval.bytes = newstr("", 0);
846      f->default_is_string = true;
847      break;
848    case UPB_TYPE_MESSAGE: break;
849    case UPB_TYPE_ENUM:
850      /* This is our special sentinel that indicates "not set" for an enum. */
851      f->default_is_string = true;
852      f->defaultval.bytes = NULL;
853      break;
854  }
855}
856
857const upb_def *upb_fielddef_subdef(const upb_fielddef *f) {
858  return f->subdef_is_symbolic ? NULL : f->sub.def;
859}
860
861const upb_msgdef *upb_fielddef_msgsubdef(const upb_fielddef *f) {
862  const upb_def *def = upb_fielddef_subdef(f);
863  return def ? upb_dyncast_msgdef(def) : NULL;
864}
865
866const upb_enumdef *upb_fielddef_enumsubdef(const upb_fielddef *f) {
867  const upb_def *def = upb_fielddef_subdef(f);
868  return def ? upb_dyncast_enumdef(def) : NULL;
869}
870
871upb_def *upb_fielddef_subdef_mutable(upb_fielddef *f) {
872  return (upb_def*)upb_fielddef_subdef(f);
873}
874
875const char *upb_fielddef_subdefname(const upb_fielddef *f) {
876  if (f->subdef_is_symbolic) {
877    return f->sub.name;
878  } else if (f->sub.def) {
879    return upb_def_fullname(f->sub.def);
880  } else {
881    return NULL;
882  }
883}
884
885bool upb_fielddef_setnumber(upb_fielddef *f, uint32_t number, upb_status *s) {
886  if (upb_fielddef_containingtype(f)) {
887    upb_status_seterrmsg(
888        s, "cannot change field number after adding to a message");
889    return false;
890  }
891  if (number == 0 || number > UPB_MAX_FIELDNUMBER) {
892    upb_status_seterrf(s, "invalid field number (%u)", number);
893    return false;
894  }
895  f->number_ = number;
896  return true;
897}
898
899void upb_fielddef_settype(upb_fielddef *f, upb_fieldtype_t type) {
900  assert(!upb_fielddef_isfrozen(f));
901  assert(upb_fielddef_checktype(type));
902  upb_fielddef_uninit_default(f);
903  f->type_ = type;
904  f->type_is_set_ = true;
905  upb_fielddef_init_default(f);
906}
907
908void upb_fielddef_setdescriptortype(upb_fielddef *f, int type) {
909  assert(!upb_fielddef_isfrozen(f));
910  switch (type) {
911    case UPB_DESCRIPTOR_TYPE_DOUBLE:
912      upb_fielddef_settype(f, UPB_TYPE_DOUBLE);
913      break;
914    case UPB_DESCRIPTOR_TYPE_FLOAT:
915      upb_fielddef_settype(f, UPB_TYPE_FLOAT);
916      break;
917    case UPB_DESCRIPTOR_TYPE_INT64:
918    case UPB_DESCRIPTOR_TYPE_SFIXED64:
919    case UPB_DESCRIPTOR_TYPE_SINT64:
920      upb_fielddef_settype(f, UPB_TYPE_INT64);
921      break;
922    case UPB_DESCRIPTOR_TYPE_UINT64:
923    case UPB_DESCRIPTOR_TYPE_FIXED64:
924      upb_fielddef_settype(f, UPB_TYPE_UINT64);
925      break;
926    case UPB_DESCRIPTOR_TYPE_INT32:
927    case UPB_DESCRIPTOR_TYPE_SFIXED32:
928    case UPB_DESCRIPTOR_TYPE_SINT32:
929      upb_fielddef_settype(f, UPB_TYPE_INT32);
930      break;
931    case UPB_DESCRIPTOR_TYPE_UINT32:
932    case UPB_DESCRIPTOR_TYPE_FIXED32:
933      upb_fielddef_settype(f, UPB_TYPE_UINT32);
934      break;
935    case UPB_DESCRIPTOR_TYPE_BOOL:
936      upb_fielddef_settype(f, UPB_TYPE_BOOL);
937      break;
938    case UPB_DESCRIPTOR_TYPE_STRING:
939      upb_fielddef_settype(f, UPB_TYPE_STRING);
940      break;
941    case UPB_DESCRIPTOR_TYPE_BYTES:
942      upb_fielddef_settype(f, UPB_TYPE_BYTES);
943      break;
944    case UPB_DESCRIPTOR_TYPE_GROUP:
945    case UPB_DESCRIPTOR_TYPE_MESSAGE:
946      upb_fielddef_settype(f, UPB_TYPE_MESSAGE);
947      break;
948    case UPB_DESCRIPTOR_TYPE_ENUM:
949      upb_fielddef_settype(f, UPB_TYPE_ENUM);
950      break;
951    default: assert(false);
952  }
953
954  if (type == UPB_DESCRIPTOR_TYPE_FIXED64 ||
955      type == UPB_DESCRIPTOR_TYPE_FIXED32 ||
956      type == UPB_DESCRIPTOR_TYPE_SFIXED64 ||
957      type == UPB_DESCRIPTOR_TYPE_SFIXED32) {
958    upb_fielddef_setintfmt(f, UPB_INTFMT_FIXED);
959  } else if (type == UPB_DESCRIPTOR_TYPE_SINT64 ||
960             type == UPB_DESCRIPTOR_TYPE_SINT32) {
961    upb_fielddef_setintfmt(f, UPB_INTFMT_ZIGZAG);
962  } else {
963    upb_fielddef_setintfmt(f, UPB_INTFMT_VARIABLE);
964  }
965
966  upb_fielddef_settagdelim(f, type == UPB_DESCRIPTOR_TYPE_GROUP);
967}
968
969upb_descriptortype_t upb_fielddef_descriptortype(const upb_fielddef *f) {
970  switch (upb_fielddef_type(f)) {
971    case UPB_TYPE_FLOAT:  return UPB_DESCRIPTOR_TYPE_FLOAT;
972    case UPB_TYPE_DOUBLE: return UPB_DESCRIPTOR_TYPE_DOUBLE;
973    case UPB_TYPE_BOOL:   return UPB_DESCRIPTOR_TYPE_BOOL;
974    case UPB_TYPE_STRING: return UPB_DESCRIPTOR_TYPE_STRING;
975    case UPB_TYPE_BYTES:  return UPB_DESCRIPTOR_TYPE_BYTES;
976    case UPB_TYPE_ENUM:   return UPB_DESCRIPTOR_TYPE_ENUM;
977    case UPB_TYPE_INT32:
978      switch (upb_fielddef_intfmt(f)) {
979        case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_INT32;
980        case UPB_INTFMT_FIXED:    return UPB_DESCRIPTOR_TYPE_SFIXED32;
981        case UPB_INTFMT_ZIGZAG:   return UPB_DESCRIPTOR_TYPE_SINT32;
982      }
983    case UPB_TYPE_INT64:
984      switch (upb_fielddef_intfmt(f)) {
985        case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_INT64;
986        case UPB_INTFMT_FIXED:    return UPB_DESCRIPTOR_TYPE_SFIXED64;
987        case UPB_INTFMT_ZIGZAG:   return UPB_DESCRIPTOR_TYPE_SINT64;
988      }
989    case UPB_TYPE_UINT32:
990      switch (upb_fielddef_intfmt(f)) {
991        case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_UINT32;
992        case UPB_INTFMT_FIXED:    return UPB_DESCRIPTOR_TYPE_FIXED32;
993        case UPB_INTFMT_ZIGZAG:   return -1;
994      }
995    case UPB_TYPE_UINT64:
996      switch (upb_fielddef_intfmt(f)) {
997        case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_UINT64;
998        case UPB_INTFMT_FIXED:    return UPB_DESCRIPTOR_TYPE_FIXED64;
999        case UPB_INTFMT_ZIGZAG:   return -1;
1000      }
1001    case UPB_TYPE_MESSAGE:
1002      return upb_fielddef_istagdelim(f) ?
1003          UPB_DESCRIPTOR_TYPE_GROUP : UPB_DESCRIPTOR_TYPE_MESSAGE;
1004  }
1005  return 0;
1006}
1007
1008void upb_fielddef_setisextension(upb_fielddef *f, bool is_extension) {
1009  assert(!upb_fielddef_isfrozen(f));
1010  f->is_extension_ = is_extension;
1011}
1012
1013void upb_fielddef_setlazy(upb_fielddef *f, bool lazy) {
1014  assert(!upb_fielddef_isfrozen(f));
1015  f->lazy_ = lazy;
1016}
1017
1018void upb_fielddef_setpacked(upb_fielddef *f, bool packed) {
1019  assert(!upb_fielddef_isfrozen(f));
1020  f->packed_ = packed;
1021}
1022
1023void upb_fielddef_setlabel(upb_fielddef *f, upb_label_t label) {
1024  assert(!upb_fielddef_isfrozen(f));
1025  assert(upb_fielddef_checklabel(label));
1026  f->label_ = label;
1027}
1028
1029void upb_fielddef_setintfmt(upb_fielddef *f, upb_intfmt_t fmt) {
1030  assert(!upb_fielddef_isfrozen(f));
1031  assert(upb_fielddef_checkintfmt(fmt));
1032  f->intfmt = fmt;
1033}
1034
1035void upb_fielddef_settagdelim(upb_fielddef *f, bool tag_delim) {
1036  assert(!upb_fielddef_isfrozen(f));
1037  f->tagdelim = tag_delim;
1038  f->tagdelim = tag_delim;
1039}
1040
1041static bool checksetdefault(upb_fielddef *f, upb_fieldtype_t type) {
1042  if (!f->type_is_set_ || upb_fielddef_isfrozen(f) ||
1043      upb_fielddef_type(f) != type) {
1044    assert(false);
1045    return false;
1046  }
1047  if (f->default_is_string) {
1048    str_t *s = f->defaultval.bytes;
1049    assert(s || type == UPB_TYPE_ENUM);
1050    if (s) freestr(s);
1051  }
1052  f->default_is_string = false;
1053  return true;
1054}
1055
1056void upb_fielddef_setdefaultint64(upb_fielddef *f, int64_t value) {
1057  if (checksetdefault(f, UPB_TYPE_INT64))
1058    f->defaultval.sint = value;
1059}
1060
1061void upb_fielddef_setdefaultint32(upb_fielddef *f, int32_t value) {
1062  if ((upb_fielddef_type(f) == UPB_TYPE_ENUM &&
1063       checksetdefault(f, UPB_TYPE_ENUM)) ||
1064      checksetdefault(f, UPB_TYPE_INT32)) {
1065    f->defaultval.sint = value;
1066  }
1067}
1068
1069void upb_fielddef_setdefaultuint64(upb_fielddef *f, uint64_t value) {
1070  if (checksetdefault(f, UPB_TYPE_UINT64))
1071    f->defaultval.uint = value;
1072}
1073
1074void upb_fielddef_setdefaultuint32(upb_fielddef *f, uint32_t value) {
1075  if (checksetdefault(f, UPB_TYPE_UINT32))
1076    f->defaultval.uint = value;
1077}
1078
1079void upb_fielddef_setdefaultbool(upb_fielddef *f, bool value) {
1080  if (checksetdefault(f, UPB_TYPE_BOOL))
1081    f->defaultval.uint = value;
1082}
1083
1084void upb_fielddef_setdefaultfloat(upb_fielddef *f, float value) {
1085  if (checksetdefault(f, UPB_TYPE_FLOAT))
1086    f->defaultval.flt = value;
1087}
1088
1089void upb_fielddef_setdefaultdouble(upb_fielddef *f, double value) {
1090  if (checksetdefault(f, UPB_TYPE_DOUBLE))
1091    f->defaultval.dbl = value;
1092}
1093
1094bool upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len,
1095                                upb_status *s) {
1096  str_t *str2;
1097  assert(upb_fielddef_isstring(f) || f->type_ == UPB_TYPE_ENUM);
1098  if (f->type_ == UPB_TYPE_ENUM && !upb_isident(str, len, false, s))
1099    return false;
1100
1101  if (f->default_is_string) {
1102    str_t *s = f->defaultval.bytes;
1103    assert(s || f->type_ == UPB_TYPE_ENUM);
1104    if (s) freestr(s);
1105  } else {
1106    assert(f->type_ == UPB_TYPE_ENUM);
1107  }
1108
1109  str2 = newstr(str, len);
1110  f->defaultval.bytes = str2;
1111  f->default_is_string = true;
1112  return true;
1113}
1114
1115void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str,
1116                                 upb_status *s) {
1117  assert(f->type_is_set_);
1118  upb_fielddef_setdefaultstr(f, str, str ? strlen(str) : 0, s);
1119}
1120
1121bool upb_fielddef_enumhasdefaultint32(const upb_fielddef *f) {
1122  int32_t val;
1123  assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
1124  return enumdefaultint32(f, &val);
1125}
1126
1127bool upb_fielddef_enumhasdefaultstr(const upb_fielddef *f) {
1128  assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
1129  return enumdefaultstr(f) != NULL;
1130}
1131
1132static bool upb_subdef_typecheck(upb_fielddef *f, const upb_def *subdef,
1133                                 upb_status *s) {
1134  if (f->type_ == UPB_TYPE_MESSAGE) {
1135    if (upb_dyncast_msgdef(subdef)) return true;
1136    upb_status_seterrmsg(s, "invalid subdef type for this submessage field");
1137    return false;
1138  } else if (f->type_ == UPB_TYPE_ENUM) {
1139    if (upb_dyncast_enumdef(subdef)) return true;
1140    upb_status_seterrmsg(s, "invalid subdef type for this enum field");
1141    return false;
1142  } else {
1143    upb_status_seterrmsg(s, "only message and enum fields can have a subdef");
1144    return false;
1145  }
1146}
1147
1148static void release_subdef(upb_fielddef *f) {
1149  if (f->subdef_is_symbolic) {
1150    free(f->sub.name);
1151  } else if (f->sub.def) {
1152    upb_unref2(f->sub.def, f);
1153  }
1154}
1155
1156bool upb_fielddef_setsubdef(upb_fielddef *f, const upb_def *subdef,
1157                            upb_status *s) {
1158  assert(!upb_fielddef_isfrozen(f));
1159  assert(upb_fielddef_hassubdef(f));
1160  if (subdef && !upb_subdef_typecheck(f, subdef, s)) return false;
1161  release_subdef(f);
1162  f->sub.def = subdef;
1163  f->subdef_is_symbolic = false;
1164  if (f->sub.def) upb_ref2(f->sub.def, f);
1165  return true;
1166}
1167
1168bool upb_fielddef_setmsgsubdef(upb_fielddef *f, const upb_msgdef *subdef,
1169                               upb_status *s) {
1170  return upb_fielddef_setsubdef(f, upb_msgdef_upcast(subdef), s);
1171}
1172
1173bool upb_fielddef_setenumsubdef(upb_fielddef *f, const upb_enumdef *subdef,
1174                                upb_status *s) {
1175  return upb_fielddef_setsubdef(f, upb_enumdef_upcast(subdef), s);
1176}
1177
1178bool upb_fielddef_setsubdefname(upb_fielddef *f, const char *name,
1179                                upb_status *s) {
1180  assert(!upb_fielddef_isfrozen(f));
1181  if (!upb_fielddef_hassubdef(f)) {
1182    upb_status_seterrmsg(s, "field type does not accept a subdef");
1183    return false;
1184  }
1185  /* TODO: validate name (upb_isident() doesn't quite work atm because this name
1186   * may have a leading "."). */
1187  release_subdef(f);
1188  f->sub.name = upb_strdup(name);
1189  f->subdef_is_symbolic = true;
1190  return true;
1191}
1192
1193bool upb_fielddef_issubmsg(const upb_fielddef *f) {
1194  return upb_fielddef_type(f) == UPB_TYPE_MESSAGE;
1195}
1196
1197bool upb_fielddef_isstring(const upb_fielddef *f) {
1198  return upb_fielddef_type(f) == UPB_TYPE_STRING ||
1199         upb_fielddef_type(f) == UPB_TYPE_BYTES;
1200}
1201
1202bool upb_fielddef_isseq(const upb_fielddef *f) {
1203  return upb_fielddef_label(f) == UPB_LABEL_REPEATED;
1204}
1205
1206bool upb_fielddef_isprimitive(const upb_fielddef *f) {
1207  return !upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f);
1208}
1209
1210bool upb_fielddef_ismap(const upb_fielddef *f) {
1211  return upb_fielddef_isseq(f) && upb_fielddef_issubmsg(f) &&
1212         upb_msgdef_mapentry(upb_fielddef_msgsubdef(f));
1213}
1214
1215bool upb_fielddef_hassubdef(const upb_fielddef *f) {
1216  return upb_fielddef_issubmsg(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM;
1217}
1218
1219static bool between(int32_t x, int32_t low, int32_t high) {
1220  return x >= low && x <= high;
1221}
1222
1223bool upb_fielddef_checklabel(int32_t label) { return between(label, 1, 3); }
1224bool upb_fielddef_checktype(int32_t type) { return between(type, 1, 11); }
1225bool upb_fielddef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); }
1226
1227bool upb_fielddef_checkdescriptortype(int32_t type) {
1228  return between(type, 1, 18);
1229}
1230
1231/* upb_msgdef *****************************************************************/
1232
1233static void visitmsg(const upb_refcounted *r, upb_refcounted_visit *visit,
1234                     void *closure) {
1235  upb_msg_oneof_iter o;
1236  const upb_msgdef *m = (const upb_msgdef*)r;
1237  upb_msg_field_iter i;
1238  for(upb_msg_field_begin(&i, m);
1239      !upb_msg_field_done(&i);
1240      upb_msg_field_next(&i)) {
1241    upb_fielddef *f = upb_msg_iter_field(&i);
1242    visit(r, upb_fielddef_upcast2(f), closure);
1243  }
1244  for(upb_msg_oneof_begin(&o, m);
1245      !upb_msg_oneof_done(&o);
1246      upb_msg_oneof_next(&o)) {
1247    upb_oneofdef *f = upb_msg_iter_oneof(&o);
1248    visit(r, upb_oneofdef_upcast2(f), closure);
1249  }
1250}
1251
1252static void freemsg(upb_refcounted *r) {
1253  upb_msgdef *m = (upb_msgdef*)r;
1254  upb_strtable_uninit(&m->ntoo);
1255  upb_strtable_uninit(&m->ntof);
1256  upb_inttable_uninit(&m->itof);
1257  upb_def_uninit(upb_msgdef_upcast_mutable(m));
1258  free(m);
1259}
1260
1261upb_msgdef *upb_msgdef_new(const void *owner) {
1262  static const struct upb_refcounted_vtbl vtbl = {visitmsg, freemsg};
1263  upb_msgdef *m = malloc(sizeof(*m));
1264  if (!m) return NULL;
1265  if (!upb_def_init(upb_msgdef_upcast_mutable(m), UPB_DEF_MSG, &vtbl, owner))
1266    goto err2;
1267  if (!upb_inttable_init(&m->itof, UPB_CTYPE_PTR)) goto err3;
1268  if (!upb_strtable_init(&m->ntof, UPB_CTYPE_PTR)) goto err2;
1269  if (!upb_strtable_init(&m->ntoo, UPB_CTYPE_PTR)) goto err1;
1270  m->map_entry = false;
1271  return m;
1272
1273err1:
1274  upb_strtable_uninit(&m->ntof);
1275err2:
1276  upb_inttable_uninit(&m->itof);
1277err3:
1278  free(m);
1279  return NULL;
1280}
1281
1282upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner) {
1283  bool ok;
1284  upb_msg_field_iter i;
1285  upb_msg_oneof_iter o;
1286
1287  upb_msgdef *newm = upb_msgdef_new(owner);
1288  if (!newm) return NULL;
1289  ok = upb_def_setfullname(upb_msgdef_upcast_mutable(newm),
1290                           upb_def_fullname(upb_msgdef_upcast(m)),
1291                           NULL);
1292  newm->map_entry = m->map_entry;
1293  UPB_ASSERT_VAR(ok, ok);
1294  for(upb_msg_field_begin(&i, m);
1295      !upb_msg_field_done(&i);
1296      upb_msg_field_next(&i)) {
1297    upb_fielddef *f = upb_fielddef_dup(upb_msg_iter_field(&i), &f);
1298    /* Fields in oneofs are dup'd below. */
1299    if (upb_fielddef_containingoneof(f)) continue;
1300    if (!f || !upb_msgdef_addfield(newm, f, &f, NULL)) {
1301      upb_msgdef_unref(newm, owner);
1302      return NULL;
1303    }
1304  }
1305  for(upb_msg_oneof_begin(&o, m);
1306      !upb_msg_oneof_done(&o);
1307      upb_msg_oneof_next(&o)) {
1308    upb_oneofdef *f = upb_oneofdef_dup(upb_msg_iter_oneof(&o), &f);
1309    if (!f || !upb_msgdef_addoneof(newm, f, &f, NULL)) {
1310      upb_msgdef_unref(newm, owner);
1311      return NULL;
1312    }
1313  }
1314  return newm;
1315}
1316
1317bool upb_msgdef_freeze(upb_msgdef *m, upb_status *status) {
1318  upb_def *d = upb_msgdef_upcast_mutable(m);
1319  return upb_def_freeze(&d, 1, status);
1320}
1321
1322const char *upb_msgdef_fullname(const upb_msgdef *m) {
1323  return upb_def_fullname(upb_msgdef_upcast(m));
1324}
1325
1326bool upb_msgdef_setfullname(upb_msgdef *m, const char *fullname,
1327                            upb_status *s) {
1328  return upb_def_setfullname(upb_msgdef_upcast_mutable(m), fullname, s);
1329}
1330
1331/* Helper: check that the field |f| is safe to add to msgdef |m|. Set an error
1332 * on status |s| and return false if not. */
1333static bool check_field_add(const upb_msgdef *m, const upb_fielddef *f,
1334                            upb_status *s) {
1335  if (upb_fielddef_containingtype(f) != NULL) {
1336    upb_status_seterrmsg(s, "fielddef already belongs to a message");
1337    return false;
1338  } else if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
1339    upb_status_seterrmsg(s, "field name or number were not set");
1340    return false;
1341  } else if (upb_msgdef_ntofz(m, upb_fielddef_name(f)) ||
1342             upb_msgdef_itof(m, upb_fielddef_number(f))) {
1343    upb_status_seterrmsg(s, "duplicate field name or number for field");
1344    return false;
1345  }
1346  return true;
1347}
1348
1349static void add_field(upb_msgdef *m, upb_fielddef *f, const void *ref_donor) {
1350  release_containingtype(f);
1351  f->msg.def = m;
1352  f->msg_is_symbolic = false;
1353  upb_inttable_insert(&m->itof, upb_fielddef_number(f), upb_value_ptr(f));
1354  upb_strtable_insert(&m->ntof, upb_fielddef_name(f), upb_value_ptr(f));
1355  upb_ref2(f, m);
1356  upb_ref2(m, f);
1357  if (ref_donor) upb_fielddef_unref(f, ref_donor);
1358}
1359
1360bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, const void *ref_donor,
1361                         upb_status *s) {
1362  /* TODO: extensions need to have a separate namespace, because proto2 allows a
1363   * top-level extension (ie. one not in any package) to have the same name as a
1364   * field from the message.
1365   *
1366   * This also implies that there needs to be a separate lookup-by-name method
1367   * for extensions.  It seems desirable for iteration to return both extensions
1368   * and non-extensions though.
1369   *
1370   * We also need to validate that the field number is in an extension range iff
1371   * it is an extension.
1372   *
1373   * This method is idempotent. Check if |f| is already part of this msgdef and
1374   * return immediately if so. */
1375  if (upb_fielddef_containingtype(f) == m) {
1376    return true;
1377  }
1378
1379  /* Check constraints for all fields before performing any action. */
1380  if (!check_field_add(m, f, s)) {
1381    return false;
1382  } else if (upb_fielddef_containingoneof(f) != NULL) {
1383    /* Fields in a oneof can only be added by adding the oneof to the msgdef. */
1384    upb_status_seterrmsg(s, "fielddef is part of a oneof");
1385    return false;
1386  }
1387
1388  /* Constraint checks ok, perform the action. */
1389  add_field(m, f, ref_donor);
1390  return true;
1391}
1392
1393bool upb_msgdef_addoneof(upb_msgdef *m, upb_oneofdef *o, const void *ref_donor,
1394                         upb_status *s) {
1395  upb_oneof_iter it;
1396
1397  /* Check various conditions that would prevent this oneof from being added. */
1398  if (upb_oneofdef_containingtype(o)) {
1399    upb_status_seterrmsg(s, "oneofdef already belongs to a message");
1400    return false;
1401  } else if (upb_oneofdef_name(o) == NULL) {
1402    upb_status_seterrmsg(s, "oneofdef name was not set");
1403    return false;
1404  } else if (upb_msgdef_ntooz(m, upb_oneofdef_name(o))) {
1405    upb_status_seterrmsg(s, "duplicate oneof name");
1406    return false;
1407  }
1408
1409  /* Check that all of the oneof's fields do not conflict with names or numbers
1410   * of fields already in the message. */
1411  for (upb_oneof_begin(&it, o); !upb_oneof_done(&it); upb_oneof_next(&it)) {
1412    const upb_fielddef *f = upb_oneof_iter_field(&it);
1413    if (!check_field_add(m, f, s)) {
1414      return false;
1415    }
1416  }
1417
1418  /* Everything checks out -- commit now. */
1419
1420  /* Add oneof itself first. */
1421  o->parent = m;
1422  upb_strtable_insert(&m->ntoo, upb_oneofdef_name(o), upb_value_ptr(o));
1423  upb_ref2(o, m);
1424  upb_ref2(m, o);
1425
1426  /* Add each field of the oneof directly to the msgdef. */
1427  for (upb_oneof_begin(&it, o); !upb_oneof_done(&it); upb_oneof_next(&it)) {
1428    upb_fielddef *f = upb_oneof_iter_field(&it);
1429    add_field(m, f, NULL);
1430  }
1431
1432  if (ref_donor) upb_oneofdef_unref(o, ref_donor);
1433
1434  return true;
1435}
1436
1437const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) {
1438  upb_value val;
1439  return upb_inttable_lookup32(&m->itof, i, &val) ?
1440      upb_value_getptr(val) : NULL;
1441}
1442
1443const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name,
1444                                    size_t len) {
1445  upb_value val;
1446  return upb_strtable_lookup2(&m->ntof, name, len, &val) ?
1447      upb_value_getptr(val) : NULL;
1448}
1449
1450const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name,
1451                                    size_t len) {
1452  upb_value val;
1453  return upb_strtable_lookup2(&m->ntoo, name, len, &val) ?
1454      upb_value_getptr(val) : NULL;
1455}
1456
1457int upb_msgdef_numfields(const upb_msgdef *m) {
1458  return upb_strtable_count(&m->ntof);
1459}
1460
1461int upb_msgdef_numoneofs(const upb_msgdef *m) {
1462  return upb_strtable_count(&m->ntoo);
1463}
1464
1465void upb_msgdef_setmapentry(upb_msgdef *m, bool map_entry) {
1466  assert(!upb_msgdef_isfrozen(m));
1467  m->map_entry = map_entry;
1468}
1469
1470bool upb_msgdef_mapentry(const upb_msgdef *m) {
1471  return m->map_entry;
1472}
1473
1474void upb_msg_field_begin(upb_msg_field_iter *iter, const upb_msgdef *m) {
1475  upb_inttable_begin(iter, &m->itof);
1476}
1477
1478void upb_msg_field_next(upb_msg_field_iter *iter) { upb_inttable_next(iter); }
1479
1480bool upb_msg_field_done(const upb_msg_field_iter *iter) {
1481  return upb_inttable_done(iter);
1482}
1483
1484upb_fielddef *upb_msg_iter_field(const upb_msg_field_iter *iter) {
1485  return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter));
1486}
1487
1488void upb_msg_field_iter_setdone(upb_msg_field_iter *iter) {
1489  upb_inttable_iter_setdone(iter);
1490}
1491
1492void upb_msg_oneof_begin(upb_msg_oneof_iter *iter, const upb_msgdef *m) {
1493  upb_strtable_begin(iter, &m->ntoo);
1494}
1495
1496void upb_msg_oneof_next(upb_msg_oneof_iter *iter) { upb_strtable_next(iter); }
1497
1498bool upb_msg_oneof_done(const upb_msg_oneof_iter *iter) {
1499  return upb_strtable_done(iter);
1500}
1501
1502upb_oneofdef *upb_msg_iter_oneof(const upb_msg_oneof_iter *iter) {
1503  return (upb_oneofdef*)upb_value_getptr(upb_strtable_iter_value(iter));
1504}
1505
1506void upb_msg_oneof_iter_setdone(upb_msg_oneof_iter *iter) {
1507  upb_strtable_iter_setdone(iter);
1508}
1509
1510/* upb_oneofdef ***************************************************************/
1511
1512static void visitoneof(const upb_refcounted *r, upb_refcounted_visit *visit,
1513                       void *closure) {
1514  const upb_oneofdef *o = (const upb_oneofdef*)r;
1515  upb_oneof_iter i;
1516  for (upb_oneof_begin(&i, o); !upb_oneof_done(&i); upb_oneof_next(&i)) {
1517    const upb_fielddef *f = upb_oneof_iter_field(&i);
1518    visit(r, upb_fielddef_upcast2(f), closure);
1519  }
1520  if (o->parent) {
1521    visit(r, upb_msgdef_upcast2(o->parent), closure);
1522  }
1523}
1524
1525static void freeoneof(upb_refcounted *r) {
1526  upb_oneofdef *o = (upb_oneofdef*)r;
1527  upb_strtable_uninit(&o->ntof);
1528  upb_inttable_uninit(&o->itof);
1529  upb_def_uninit(upb_oneofdef_upcast_mutable(o));
1530  free(o);
1531}
1532
1533upb_oneofdef *upb_oneofdef_new(const void *owner) {
1534  static const struct upb_refcounted_vtbl vtbl = {visitoneof, freeoneof};
1535  upb_oneofdef *o = malloc(sizeof(*o));
1536  o->parent = NULL;
1537  if (!o) return NULL;
1538  if (!upb_def_init(upb_oneofdef_upcast_mutable(o), UPB_DEF_ONEOF, &vtbl,
1539                    owner))
1540    goto err2;
1541  if (!upb_inttable_init(&o->itof, UPB_CTYPE_PTR)) goto err2;
1542  if (!upb_strtable_init(&o->ntof, UPB_CTYPE_PTR)) goto err1;
1543  return o;
1544
1545err1:
1546  upb_inttable_uninit(&o->itof);
1547err2:
1548  free(o);
1549  return NULL;
1550}
1551
1552upb_oneofdef *upb_oneofdef_dup(const upb_oneofdef *o, const void *owner) {
1553  bool ok;
1554  upb_oneof_iter i;
1555  upb_oneofdef *newo = upb_oneofdef_new(owner);
1556  if (!newo) return NULL;
1557  ok = upb_def_setfullname(upb_oneofdef_upcast_mutable(newo),
1558                           upb_def_fullname(upb_oneofdef_upcast(o)), NULL);
1559  UPB_ASSERT_VAR(ok, ok);
1560  for (upb_oneof_begin(&i, o); !upb_oneof_done(&i); upb_oneof_next(&i)) {
1561    upb_fielddef *f = upb_fielddef_dup(upb_oneof_iter_field(&i), &f);
1562    if (!f || !upb_oneofdef_addfield(newo, f, &f, NULL)) {
1563      upb_oneofdef_unref(newo, owner);
1564      return NULL;
1565    }
1566  }
1567  return newo;
1568}
1569
1570const char *upb_oneofdef_name(const upb_oneofdef *o) {
1571  return upb_def_fullname(upb_oneofdef_upcast(o));
1572}
1573
1574bool upb_oneofdef_setname(upb_oneofdef *o, const char *fullname,
1575                             upb_status *s) {
1576  if (upb_oneofdef_containingtype(o)) {
1577    upb_status_seterrmsg(s, "oneof already added to a message");
1578    return false;
1579  }
1580  return upb_def_setfullname(upb_oneofdef_upcast_mutable(o), fullname, s);
1581}
1582
1583const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o) {
1584  return o->parent;
1585}
1586
1587int upb_oneofdef_numfields(const upb_oneofdef *o) {
1588  return upb_strtable_count(&o->ntof);
1589}
1590
1591bool upb_oneofdef_addfield(upb_oneofdef *o, upb_fielddef *f,
1592                           const void *ref_donor,
1593                           upb_status *s) {
1594  assert(!upb_oneofdef_isfrozen(o));
1595  assert(!o->parent || !upb_msgdef_isfrozen(o->parent));
1596
1597  /* This method is idempotent. Check if |f| is already part of this oneofdef
1598   * and return immediately if so. */
1599  if (upb_fielddef_containingoneof(f) == o) {
1600    return true;
1601  }
1602
1603  /* The field must have an OPTIONAL label. */
1604  if (upb_fielddef_label(f) != UPB_LABEL_OPTIONAL) {
1605    upb_status_seterrmsg(s, "fields in oneof must have OPTIONAL label");
1606    return false;
1607  }
1608
1609  /* Check that no field with this name or number exists already in the oneof.
1610   * Also check that the field is not already part of a oneof. */
1611  if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
1612    upb_status_seterrmsg(s, "field name or number were not set");
1613    return false;
1614  } else if (upb_oneofdef_itof(o, upb_fielddef_number(f)) ||
1615             upb_oneofdef_ntofz(o, upb_fielddef_name(f))) {
1616    upb_status_seterrmsg(s, "duplicate field name or number");
1617    return false;
1618  } else if (upb_fielddef_containingoneof(f) != NULL) {
1619    upb_status_seterrmsg(s, "fielddef already belongs to a oneof");
1620    return false;
1621  }
1622
1623  /* We allow adding a field to the oneof either if the field is not part of a
1624   * msgdef, or if it is and we are also part of the same msgdef. */
1625  if (o->parent == NULL) {
1626    /* If we're not in a msgdef, the field cannot be either. Otherwise we would
1627     * need to magically add this oneof to a msgdef to remain consistent, which
1628     * is surprising behavior. */
1629    if (upb_fielddef_containingtype(f) != NULL) {
1630      upb_status_seterrmsg(s, "fielddef already belongs to a message, but "
1631                              "oneof does not");
1632      return false;
1633    }
1634  } else {
1635    /* If we're in a msgdef, the user can add fields that either aren't in any
1636     * msgdef (in which case they're added to our msgdef) or already a part of
1637     * our msgdef. */
1638    if (upb_fielddef_containingtype(f) != NULL &&
1639        upb_fielddef_containingtype(f) != o->parent) {
1640      upb_status_seterrmsg(s, "fielddef belongs to a different message "
1641                              "than oneof");
1642      return false;
1643    }
1644  }
1645
1646  /* Commit phase. First add the field to our parent msgdef, if any, because
1647   * that may fail; then add the field to our own tables. */
1648
1649  if (o->parent != NULL && upb_fielddef_containingtype(f) == NULL) {
1650    if (!upb_msgdef_addfield((upb_msgdef*)o->parent, f, NULL, s)) {
1651      return false;
1652    }
1653  }
1654
1655  release_containingtype(f);
1656  f->oneof = o;
1657  upb_inttable_insert(&o->itof, upb_fielddef_number(f), upb_value_ptr(f));
1658  upb_strtable_insert(&o->ntof, upb_fielddef_name(f), upb_value_ptr(f));
1659  upb_ref2(f, o);
1660  upb_ref2(o, f);
1661  if (ref_donor) upb_fielddef_unref(f, ref_donor);
1662
1663  return true;
1664}
1665
1666const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o,
1667                                      const char *name, size_t length) {
1668  upb_value val;
1669  return upb_strtable_lookup2(&o->ntof, name, length, &val) ?
1670      upb_value_getptr(val) : NULL;
1671}
1672
1673const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num) {
1674  upb_value val;
1675  return upb_inttable_lookup32(&o->itof, num, &val) ?
1676      upb_value_getptr(val) : NULL;
1677}
1678
1679void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o) {
1680  upb_inttable_begin(iter, &o->itof);
1681}
1682
1683void upb_oneof_next(upb_oneof_iter *iter) {
1684  upb_inttable_next(iter);
1685}
1686
1687bool upb_oneof_done(upb_oneof_iter *iter) {
1688  return upb_inttable_done(iter);
1689}
1690
1691upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter) {
1692  return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter));
1693}
1694
1695void upb_oneof_iter_setdone(upb_oneof_iter *iter) {
1696  upb_inttable_iter_setdone(iter);
1697}
1698
1699
1700#include <stdlib.h>
1701#include <stdio.h>
1702#include <string.h>
1703
1704typedef struct cleanup_ent {
1705  upb_cleanup_func *cleanup;
1706  void *ud;
1707  struct cleanup_ent *next;
1708} cleanup_ent;
1709
1710static void *seeded_alloc(void *ud, void *ptr, size_t oldsize, size_t size);
1711
1712/* Default allocator **********************************************************/
1713
1714/* Just use realloc, keeping all allocated blocks in a linked list to destroy at
1715 * the end. */
1716
1717typedef struct mem_block {
1718  /* List is doubly-linked, because in cases where realloc() moves an existing
1719   * block, we need to be able to remove the old pointer from the list
1720   * efficiently. */
1721  struct mem_block *prev, *next;
1722#ifndef NDEBUG
1723  size_t size;  /* Doesn't include mem_block structure. */
1724#endif
1725} mem_block;
1726
1727typedef struct {
1728  mem_block *head;
1729} default_alloc_ud;
1730
1731static void *default_alloc(void *_ud, void *ptr, size_t oldsize, size_t size) {
1732  default_alloc_ud *ud = _ud;
1733  mem_block *from, *block;
1734  void *ret;
1735  UPB_UNUSED(oldsize);
1736
1737  from = ptr ? (void*)((char*)ptr - sizeof(mem_block)) : NULL;
1738
1739#ifndef NDEBUG
1740  if (from) {
1741    assert(oldsize <= from->size);
1742  }
1743#endif
1744
1745  /* TODO(haberman): we probably need to provide even better alignment here,
1746   * like 16-byte alignment of the returned data pointer. */
1747  block = realloc(from, size + sizeof(mem_block));
1748  if (!block) return NULL;
1749  ret = (char*)block + sizeof(*block);
1750
1751#ifndef NDEBUG
1752  block->size = size;
1753#endif
1754
1755  if (from) {
1756    if (block != from) {
1757      /* The block was moved, so pointers in next and prev blocks must be
1758       * updated to its new location. */
1759      if (block->next) block->next->prev = block;
1760      if (block->prev) block->prev->next = block;
1761      if (ud->head == from) ud->head = block;
1762    }
1763  } else {
1764    /* Insert at head of linked list. */
1765    block->prev = NULL;
1766    block->next = ud->head;
1767    if (block->next) block->next->prev = block;
1768    ud->head = block;
1769  }
1770
1771  return ret;
1772}
1773
1774static void default_alloc_cleanup(void *_ud) {
1775  default_alloc_ud *ud = _ud;
1776  mem_block *block = ud->head;
1777
1778  while (block) {
1779    void *to_free = block;
1780    block = block->next;
1781    free(to_free);
1782  }
1783}
1784
1785
1786/* Standard error functions ***************************************************/
1787
1788static bool default_err(void *ud, const upb_status *status) {
1789  UPB_UNUSED(ud);
1790  UPB_UNUSED(status);
1791  return false;
1792}
1793
1794static bool write_err_to(void *ud, const upb_status *status) {
1795  upb_status *copy_to = ud;
1796  upb_status_copy(copy_to, status);
1797  return false;
1798}
1799
1800
1801/* upb_env ********************************************************************/
1802
1803void upb_env_init(upb_env *e) {
1804  default_alloc_ud *ud = (default_alloc_ud*)&e->default_alloc_ud;
1805  e->ok_ = true;
1806  e->bytes_allocated = 0;
1807  e->cleanup_head = NULL;
1808
1809  ud->head = NULL;
1810
1811  /* Set default functions. */
1812  upb_env_setallocfunc(e, default_alloc, ud);
1813  upb_env_seterrorfunc(e, default_err, NULL);
1814}
1815
1816void upb_env_uninit(upb_env *e) {
1817  cleanup_ent *ent = e->cleanup_head;
1818
1819  while (ent) {
1820    ent->cleanup(ent->ud);
1821    ent = ent->next;
1822  }
1823
1824  /* Must do this after running cleanup functions, because this will delete
1825     the memory we store our cleanup entries in! */
1826  if (e->alloc == default_alloc) {
1827    default_alloc_cleanup(e->alloc_ud);
1828  }
1829}
1830
1831UPB_FORCEINLINE void upb_env_setallocfunc(upb_env *e, upb_alloc_func *alloc,
1832                                          void *ud) {
1833  e->alloc = alloc;
1834  e->alloc_ud = ud;
1835}
1836
1837UPB_FORCEINLINE void upb_env_seterrorfunc(upb_env *e, upb_error_func *func,
1838                                          void *ud) {
1839  e->err = func;
1840  e->err_ud = ud;
1841}
1842
1843void upb_env_reporterrorsto(upb_env *e, upb_status *status) {
1844  e->err = write_err_to;
1845  e->err_ud = status;
1846}
1847
1848bool upb_env_ok(const upb_env *e) {
1849  return e->ok_;
1850}
1851
1852bool upb_env_reporterror(upb_env *e, const upb_status *status) {
1853  e->ok_ = false;
1854  return e->err(e->err_ud, status);
1855}
1856
1857bool upb_env_addcleanup(upb_env *e, upb_cleanup_func *func, void *ud) {
1858  cleanup_ent *ent = upb_env_malloc(e, sizeof(cleanup_ent));
1859  if (!ent) return false;
1860
1861  ent->cleanup = func;
1862  ent->ud = ud;
1863  ent->next = e->cleanup_head;
1864  e->cleanup_head = ent;
1865
1866  return true;
1867}
1868
1869void *upb_env_malloc(upb_env *e, size_t size) {
1870  e->bytes_allocated += size;
1871  if (e->alloc == seeded_alloc) {
1872    /* This is equivalent to the next branch, but allows inlining for a
1873     * measurable perf benefit. */
1874    return seeded_alloc(e->alloc_ud, NULL, 0, size);
1875  } else {
1876    return e->alloc(e->alloc_ud, NULL, 0, size);
1877  }
1878}
1879
1880void *upb_env_realloc(upb_env *e, void *ptr, size_t oldsize, size_t size) {
1881  char *ret;
1882  assert(oldsize <= size);
1883  ret = e->alloc(e->alloc_ud, ptr, oldsize, size);
1884
1885#ifndef NDEBUG
1886  /* Overwrite non-preserved memory to ensure callers are passing the oldsize
1887   * that they truly require. */
1888  memset(ret + oldsize, 0xff, size - oldsize);
1889#endif
1890
1891  return ret;
1892}
1893
1894size_t upb_env_bytesallocated(const upb_env *e) {
1895  return e->bytes_allocated;
1896}
1897
1898
1899/* upb_seededalloc ************************************************************/
1900
1901/* Be conservative and choose 16 in case anyone is using SSE. */
1902static const size_t maxalign = 16;
1903
1904static size_t align_up(size_t size) {
1905  return ((size + maxalign - 1) / maxalign) * maxalign;
1906}
1907
1908UPB_FORCEINLINE static void *seeded_alloc(void *ud, void *ptr, size_t oldsize,
1909                                          size_t size) {
1910  upb_seededalloc *a = ud;
1911
1912  size = align_up(size);
1913
1914  assert(a->mem_limit >= a->mem_ptr);
1915
1916  if (oldsize == 0 && size <= (size_t)(a->mem_limit - a->mem_ptr)) {
1917    /* Fast path: we can satisfy from the initial allocation. */
1918    void *ret = a->mem_ptr;
1919    a->mem_ptr += size;
1920    return ret;
1921  } else {
1922    char *chptr = ptr;
1923    /* Slow path: fallback to other allocator. */
1924    a->need_cleanup = true;
1925    /* Is `ptr` part of the user-provided initial block? Don't pass it to the
1926     * default allocator if so; otherwise, it may try to realloc() the block. */
1927    if (chptr >= a->mem_base && chptr < a->mem_limit) {
1928      void *ret;
1929      assert(chptr + oldsize <= a->mem_limit);
1930      ret = a->alloc(a->alloc_ud, NULL, 0, size);
1931      if (ret) memcpy(ret, ptr, oldsize);
1932      return ret;
1933    } else {
1934      return a->alloc(a->alloc_ud, ptr, oldsize, size);
1935    }
1936  }
1937}
1938
1939void upb_seededalloc_init(upb_seededalloc *a, void *mem, size_t len) {
1940  default_alloc_ud *ud = (default_alloc_ud*)&a->default_alloc_ud;
1941  a->mem_base = mem;
1942  a->mem_ptr = mem;
1943  a->mem_limit = (char*)mem + len;
1944  a->need_cleanup = false;
1945  a->returned_allocfunc = false;
1946
1947  ud->head = NULL;
1948
1949  upb_seededalloc_setfallbackalloc(a, default_alloc, ud);
1950}
1951
1952void upb_seededalloc_uninit(upb_seededalloc *a) {
1953  if (a->alloc == default_alloc && a->need_cleanup) {
1954    default_alloc_cleanup(a->alloc_ud);
1955  }
1956}
1957
1958UPB_FORCEINLINE void upb_seededalloc_setfallbackalloc(upb_seededalloc *a,
1959                                                      upb_alloc_func *alloc,
1960                                                      void *ud) {
1961  assert(!a->returned_allocfunc);
1962  a->alloc = alloc;
1963  a->alloc_ud = ud;
1964}
1965
1966upb_alloc_func *upb_seededalloc_getallocfunc(upb_seededalloc *a) {
1967  a->returned_allocfunc = true;
1968  return seeded_alloc;
1969}
1970/*
1971** TODO(haberman): it's unclear whether a lot of the consistency checks should
1972** assert() or return false.
1973*/
1974
1975
1976#include <stdlib.h>
1977#include <string.h>
1978
1979
1980
1981/* Defined for the sole purpose of having a unique pointer value for
1982 * UPB_NO_CLOSURE. */
1983char _upb_noclosure;
1984
1985static void freehandlers(upb_refcounted *r) {
1986  upb_handlers *h = (upb_handlers*)r;
1987
1988  upb_inttable_iter i;
1989  upb_inttable_begin(&i, &h->cleanup_);
1990  for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
1991    void *val = (void*)upb_inttable_iter_key(&i);
1992    upb_value func_val = upb_inttable_iter_value(&i);
1993    upb_handlerfree *func = upb_value_getfptr(func_val);
1994    func(val);
1995  }
1996
1997  upb_inttable_uninit(&h->cleanup_);
1998  upb_msgdef_unref(h->msg, h);
1999  free(h->sub);
2000  free(h);
2001}
2002
2003static void visithandlers(const upb_refcounted *r, upb_refcounted_visit *visit,
2004                          void *closure) {
2005  const upb_handlers *h = (const upb_handlers*)r;
2006  upb_msg_field_iter i;
2007  for(upb_msg_field_begin(&i, h->msg);
2008      !upb_msg_field_done(&i);
2009      upb_msg_field_next(&i)) {
2010    upb_fielddef *f = upb_msg_iter_field(&i);
2011    const upb_handlers *sub;
2012    if (!upb_fielddef_issubmsg(f)) continue;
2013    sub = upb_handlers_getsubhandlers(h, f);
2014    if (sub) visit(r, upb_handlers_upcast(sub), closure);
2015  }
2016}
2017
2018static const struct upb_refcounted_vtbl vtbl = {visithandlers, freehandlers};
2019
2020typedef struct {
2021  upb_inttable tab;  /* maps upb_msgdef* -> upb_handlers*. */
2022  upb_handlers_callback *callback;
2023  const void *closure;
2024} dfs_state;
2025
2026/* TODO(haberman): discard upb_handlers* objects that do not actually have any
2027 * handlers set and cannot reach any upb_handlers* object that does.  This is
2028 * slightly tricky to do correctly. */
2029static upb_handlers *newformsg(const upb_msgdef *m, const void *owner,
2030                               dfs_state *s) {
2031  upb_msg_field_iter i;
2032  upb_handlers *h = upb_handlers_new(m, owner);
2033  if (!h) return NULL;
2034  if (!upb_inttable_insertptr(&s->tab, m, upb_value_ptr(h))) goto oom;
2035
2036  s->callback(s->closure, h);
2037
2038  /* For each submessage field, get or create a handlers object and set it as
2039   * the subhandlers. */
2040  for(upb_msg_field_begin(&i, m);
2041      !upb_msg_field_done(&i);
2042      upb_msg_field_next(&i)) {
2043    upb_fielddef *f = upb_msg_iter_field(&i);
2044    const upb_msgdef *subdef;
2045    upb_value subm_ent;
2046
2047    if (!upb_fielddef_issubmsg(f)) continue;
2048
2049    subdef = upb_downcast_msgdef(upb_fielddef_subdef(f));
2050    if (upb_inttable_lookupptr(&s->tab, subdef, &subm_ent)) {
2051      upb_handlers_setsubhandlers(h, f, upb_value_getptr(subm_ent));
2052    } else {
2053      upb_handlers *sub_mh = newformsg(subdef, &sub_mh, s);
2054      if (!sub_mh) goto oom;
2055      upb_handlers_setsubhandlers(h, f, sub_mh);
2056      upb_handlers_unref(sub_mh, &sub_mh);
2057    }
2058  }
2059  return h;
2060
2061oom:
2062  upb_handlers_unref(h, owner);
2063  return NULL;
2064}
2065
2066/* Given a selector for a STARTSUBMSG handler, resolves to a pointer to the
2067 * subhandlers for this submessage field. */
2068#define SUBH(h, selector) (h->sub[selector])
2069
2070/* The selector for a submessage field is the field index. */
2071#define SUBH_F(h, f) SUBH(h, f->index_)
2072
2073static int32_t trygetsel(upb_handlers *h, const upb_fielddef *f,
2074                         upb_handlertype_t type) {
2075  upb_selector_t sel;
2076  assert(!upb_handlers_isfrozen(h));
2077  if (upb_handlers_msgdef(h) != upb_fielddef_containingtype(f)) {
2078    upb_status_seterrf(
2079        &h->status_, "type mismatch: field %s does not belong to message %s",
2080        upb_fielddef_name(f), upb_msgdef_fullname(upb_handlers_msgdef(h)));
2081    return -1;
2082  }
2083  if (!upb_handlers_getselector(f, type, &sel)) {
2084    upb_status_seterrf(
2085        &h->status_,
2086        "type mismatch: cannot register handler type %d for field %s",
2087        type, upb_fielddef_name(f));
2088    return -1;
2089  }
2090  return sel;
2091}
2092
2093static upb_selector_t handlers_getsel(upb_handlers *h, const upb_fielddef *f,
2094                             upb_handlertype_t type) {
2095  int32_t sel = trygetsel(h, f, type);
2096  assert(sel >= 0);
2097  return sel;
2098}
2099
2100static const void **returntype(upb_handlers *h, const upb_fielddef *f,
2101                               upb_handlertype_t type) {
2102  return &h->table[handlers_getsel(h, f, type)].attr.return_closure_type_;
2103}
2104
2105static bool doset(upb_handlers *h, int32_t sel, const upb_fielddef *f,
2106                  upb_handlertype_t type, upb_func *func,
2107                  upb_handlerattr *attr) {
2108  upb_handlerattr set_attr = UPB_HANDLERATTR_INITIALIZER;
2109  const void *closure_type;
2110  const void **context_closure_type;
2111
2112  assert(!upb_handlers_isfrozen(h));
2113
2114  if (sel < 0) {
2115    upb_status_seterrmsg(&h->status_,
2116                         "incorrect handler type for this field.");
2117    return false;
2118  }
2119
2120  if (h->table[sel].func) {
2121    upb_status_seterrmsg(&h->status_,
2122                         "cannot change handler once it has been set.");
2123    return false;
2124  }
2125
2126  if (attr) {
2127    set_attr = *attr;
2128  }
2129
2130  /* Check that the given closure type matches the closure type that has been
2131   * established for this context (if any). */
2132  closure_type = upb_handlerattr_closuretype(&set_attr);
2133
2134  if (type == UPB_HANDLER_STRING) {
2135    context_closure_type = returntype(h, f, UPB_HANDLER_STARTSTR);
2136  } else if (f && upb_fielddef_isseq(f) &&
2137             type != UPB_HANDLER_STARTSEQ &&
2138             type != UPB_HANDLER_ENDSEQ) {
2139    context_closure_type = returntype(h, f, UPB_HANDLER_STARTSEQ);
2140  } else {
2141    context_closure_type = &h->top_closure_type;
2142  }
2143
2144  if (closure_type && *context_closure_type &&
2145      closure_type != *context_closure_type) {
2146    /* TODO(haberman): better message for debugging. */
2147    if (f) {
2148      upb_status_seterrf(&h->status_,
2149                         "closure type does not match for field %s",
2150                         upb_fielddef_name(f));
2151    } else {
2152      upb_status_seterrmsg(
2153          &h->status_, "closure type does not match for message-level handler");
2154    }
2155    return false;
2156  }
2157
2158  if (closure_type)
2159    *context_closure_type = closure_type;
2160
2161  /* If this is a STARTSEQ or STARTSTR handler, check that the returned pointer
2162   * matches any pre-existing expectations about what type is expected. */
2163  if (type == UPB_HANDLER_STARTSEQ || type == UPB_HANDLER_STARTSTR) {
2164    const void *return_type = upb_handlerattr_returnclosuretype(&set_attr);
2165    const void *table_return_type =
2166        upb_handlerattr_returnclosuretype(&h->table[sel].attr);
2167    if (return_type && table_return_type && return_type != table_return_type) {
2168      upb_status_seterrmsg(&h->status_, "closure return type does not match");
2169      return false;
2170    }
2171
2172    if (table_return_type && !return_type)
2173      upb_handlerattr_setreturnclosuretype(&set_attr, table_return_type);
2174  }
2175
2176  h->table[sel].func = (upb_func*)func;
2177  h->table[sel].attr = set_attr;
2178  return true;
2179}
2180
2181/* Returns the effective closure type for this handler (which will propagate
2182 * from outer frames if this frame has no START* handler).  Not implemented for
2183 * UPB_HANDLER_STRING at the moment since this is not needed.  Returns NULL is
2184 * the effective closure type is unspecified (either no handler was registered
2185 * to specify it or the handler that was registered did not specify the closure
2186 * type). */
2187const void *effective_closure_type(upb_handlers *h, const upb_fielddef *f,
2188                                   upb_handlertype_t type) {
2189  const void *ret;
2190  upb_selector_t sel;
2191
2192  assert(type != UPB_HANDLER_STRING);
2193  ret = h->top_closure_type;
2194
2195  if (upb_fielddef_isseq(f) &&
2196      type != UPB_HANDLER_STARTSEQ &&
2197      type != UPB_HANDLER_ENDSEQ &&
2198      h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)].func) {
2199    ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
2200  }
2201
2202  if (type == UPB_HANDLER_STRING &&
2203      h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSTR)].func) {
2204    ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
2205  }
2206
2207  /* The effective type of the submessage; not used yet.
2208   * if (type == SUBMESSAGE &&
2209   *     h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)].func) {
2210   *   ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
2211   * } */
2212
2213  return ret;
2214}
2215
2216/* Checks whether the START* handler specified by f & type is missing even
2217 * though it is required to convert the established type of an outer frame
2218 * ("closure_type") into the established type of an inner frame (represented in
2219 * the return closure type of this handler's attr. */
2220bool checkstart(upb_handlers *h, const upb_fielddef *f, upb_handlertype_t type,
2221                upb_status *status) {
2222  const void *closure_type;
2223  const upb_handlerattr *attr;
2224  const void *return_closure_type;
2225
2226  upb_selector_t sel = handlers_getsel(h, f, type);
2227  if (h->table[sel].func) return true;
2228  closure_type = effective_closure_type(h, f, type);
2229  attr = &h->table[sel].attr;
2230  return_closure_type = upb_handlerattr_returnclosuretype(attr);
2231  if (closure_type && return_closure_type &&
2232      closure_type != return_closure_type) {
2233    upb_status_seterrf(status,
2234                       "expected start handler to return sub type for field %f",
2235                       upb_fielddef_name(f));
2236    return false;
2237  }
2238  return true;
2239}
2240
2241/* Public interface ***********************************************************/
2242
2243upb_handlers *upb_handlers_new(const upb_msgdef *md, const void *owner) {
2244  int extra;
2245  upb_handlers *h;
2246
2247  assert(upb_msgdef_isfrozen(md));
2248
2249  extra = sizeof(upb_handlers_tabent) * (md->selector_count - 1);
2250  h = calloc(sizeof(*h) + extra, 1);
2251  if (!h) return NULL;
2252
2253  h->msg = md;
2254  upb_msgdef_ref(h->msg, h);
2255  upb_status_clear(&h->status_);
2256  h->sub = calloc(md->submsg_field_count, sizeof(*h->sub));
2257  if (!h->sub) goto oom;
2258  if (!upb_refcounted_init(upb_handlers_upcast_mutable(h), &vtbl, owner))
2259    goto oom;
2260  if (!upb_inttable_init(&h->cleanup_, UPB_CTYPE_FPTR)) goto oom;
2261
2262  /* calloc() above initialized all handlers to NULL. */
2263  return h;
2264
2265oom:
2266  freehandlers(upb_handlers_upcast_mutable(h));
2267  return NULL;
2268}
2269
2270const upb_handlers *upb_handlers_newfrozen(const upb_msgdef *m,
2271                                           const void *owner,
2272                                           upb_handlers_callback *callback,
2273                                           const void *closure) {
2274  dfs_state state;
2275  upb_handlers *ret;
2276  bool ok;
2277  upb_refcounted *r;
2278
2279  state.callback = callback;
2280  state.closure = closure;
2281  if (!upb_inttable_init(&state.tab, UPB_CTYPE_PTR)) return NULL;
2282
2283  ret = newformsg(m, owner, &state);
2284
2285  upb_inttable_uninit(&state.tab);
2286  if (!ret) return NULL;
2287
2288  r = upb_handlers_upcast_mutable(ret);
2289  ok = upb_refcounted_freeze(&r, 1, NULL, UPB_MAX_HANDLER_DEPTH);
2290  UPB_ASSERT_VAR(ok, ok);
2291
2292  return ret;
2293}
2294
2295const upb_status *upb_handlers_status(upb_handlers *h) {
2296  assert(!upb_handlers_isfrozen(h));
2297  return &h->status_;
2298}
2299
2300void upb_handlers_clearerr(upb_handlers *h) {
2301  assert(!upb_handlers_isfrozen(h));
2302  upb_status_clear(&h->status_);
2303}
2304
2305#define SETTER(name, handlerctype, handlertype) \
2306  bool upb_handlers_set ## name(upb_handlers *h, const upb_fielddef *f, \
2307                                handlerctype func, upb_handlerattr *attr) { \
2308    int32_t sel = trygetsel(h, f, handlertype); \
2309    return doset(h, sel, f, handlertype, (upb_func*)func, attr); \
2310  }
2311
2312SETTER(int32,       upb_int32_handlerfunc*,       UPB_HANDLER_INT32)
2313SETTER(int64,       upb_int64_handlerfunc*,       UPB_HANDLER_INT64)
2314SETTER(uint32,      upb_uint32_handlerfunc*,      UPB_HANDLER_UINT32)
2315SETTER(uint64,      upb_uint64_handlerfunc*,      UPB_HANDLER_UINT64)
2316SETTER(float,       upb_float_handlerfunc*,       UPB_HANDLER_FLOAT)
2317SETTER(double,      upb_double_handlerfunc*,      UPB_HANDLER_DOUBLE)
2318SETTER(bool,        upb_bool_handlerfunc*,        UPB_HANDLER_BOOL)
2319SETTER(startstr,    upb_startstr_handlerfunc*,    UPB_HANDLER_STARTSTR)
2320SETTER(string,      upb_string_handlerfunc*,      UPB_HANDLER_STRING)
2321SETTER(endstr,      upb_endfield_handlerfunc*,    UPB_HANDLER_ENDSTR)
2322SETTER(startseq,    upb_startfield_handlerfunc*,  UPB_HANDLER_STARTSEQ)
2323SETTER(startsubmsg, upb_startfield_handlerfunc*,  UPB_HANDLER_STARTSUBMSG)
2324SETTER(endsubmsg,   upb_endfield_handlerfunc*,    UPB_HANDLER_ENDSUBMSG)
2325SETTER(endseq,      upb_endfield_handlerfunc*,    UPB_HANDLER_ENDSEQ)
2326
2327#undef SETTER
2328
2329bool upb_handlers_setstartmsg(upb_handlers *h, upb_startmsg_handlerfunc *func,
2330                              upb_handlerattr *attr) {
2331  return doset(h, UPB_STARTMSG_SELECTOR, NULL, UPB_HANDLER_INT32,
2332               (upb_func *)func, attr);
2333}
2334
2335bool upb_handlers_setendmsg(upb_handlers *h, upb_endmsg_handlerfunc *func,
2336                            upb_handlerattr *attr) {
2337  assert(!upb_handlers_isfrozen(h));
2338  return doset(h, UPB_ENDMSG_SELECTOR, NULL, UPB_HANDLER_INT32,
2339               (upb_func *)func, attr);
2340}
2341
2342bool upb_handlers_setsubhandlers(upb_handlers *h, const upb_fielddef *f,
2343                                 const upb_handlers *sub) {
2344  assert(sub);
2345  assert(!upb_handlers_isfrozen(h));
2346  assert(upb_fielddef_issubmsg(f));
2347  if (SUBH_F(h, f)) return false;  /* Can't reset. */
2348  if (upb_msgdef_upcast(upb_handlers_msgdef(sub)) != upb_fielddef_subdef(f)) {
2349    return false;
2350  }
2351  SUBH_F(h, f) = sub;
2352  upb_ref2(sub, h);
2353  return true;
2354}
2355
2356const upb_handlers *upb_handlers_getsubhandlers(const upb_handlers *h,
2357                                                const upb_fielddef *f) {
2358  assert(upb_fielddef_issubmsg(f));
2359  return SUBH_F(h, f);
2360}
2361
2362bool upb_handlers_getattr(const upb_handlers *h, upb_selector_t sel,
2363                          upb_handlerattr *attr) {
2364  if (!upb_handlers_gethandler(h, sel))
2365    return false;
2366  *attr = h->table[sel].attr;
2367  return true;
2368}
2369
2370const upb_handlers *upb_handlers_getsubhandlers_sel(const upb_handlers *h,
2371                                                    upb_selector_t sel) {
2372  /* STARTSUBMSG selector in sel is the field's selector base. */
2373  return SUBH(h, sel - UPB_STATIC_SELECTOR_COUNT);
2374}
2375
2376const upb_msgdef *upb_handlers_msgdef(const upb_handlers *h) { return h->msg; }
2377
2378bool upb_handlers_addcleanup(upb_handlers *h, void *p, upb_handlerfree *func) {
2379  bool ok;
2380  if (upb_inttable_lookupptr(&h->cleanup_, p, NULL)) {
2381    return false;
2382  }
2383  ok = upb_inttable_insertptr(&h->cleanup_, p, upb_value_fptr(func));
2384  UPB_ASSERT_VAR(ok, ok);
2385  return true;
2386}
2387
2388
2389/* "Static" methods ***********************************************************/
2390
2391bool upb_handlers_freeze(upb_handlers *const*handlers, int n, upb_status *s) {
2392  /* TODO: verify we have a transitive closure. */
2393  int i;
2394  for (i = 0; i < n; i++) {
2395    upb_msg_field_iter j;
2396    upb_handlers *h = handlers[i];
2397
2398    if (!upb_ok(&h->status_)) {
2399      upb_status_seterrf(s, "handlers for message %s had error status: %s",
2400                         upb_msgdef_fullname(upb_handlers_msgdef(h)),
2401                         upb_status_errmsg(&h->status_));
2402      return false;
2403    }
2404
2405    /* Check that there are no closure mismatches due to missing Start* handlers
2406     * or subhandlers with different type-level types. */
2407    for(upb_msg_field_begin(&j, h->msg);
2408        !upb_msg_field_done(&j);
2409        upb_msg_field_next(&j)) {
2410
2411      const upb_fielddef *f = upb_msg_iter_field(&j);
2412      if (upb_fielddef_isseq(f)) {
2413        if (!checkstart(h, f, UPB_HANDLER_STARTSEQ, s))
2414          return false;
2415      }
2416
2417      if (upb_fielddef_isstring(f)) {
2418        if (!checkstart(h, f, UPB_HANDLER_STARTSTR, s))
2419          return false;
2420      }
2421
2422      if (upb_fielddef_issubmsg(f)) {
2423        bool hashandler = false;
2424        if (upb_handlers_gethandler(
2425                h, handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)) ||
2426            upb_handlers_gethandler(
2427                h, handlers_getsel(h, f, UPB_HANDLER_ENDSUBMSG))) {
2428          hashandler = true;
2429        }
2430
2431        if (upb_fielddef_isseq(f) &&
2432            (upb_handlers_gethandler(
2433                 h, handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)) ||
2434             upb_handlers_gethandler(
2435                 h, handlers_getsel(h, f, UPB_HANDLER_ENDSEQ)))) {
2436          hashandler = true;
2437        }
2438
2439        if (hashandler && !upb_handlers_getsubhandlers(h, f)) {
2440          /* For now we add an empty subhandlers in this case.  It makes the
2441           * decoder code generator simpler, because it only has to handle two
2442           * cases (submessage has handlers or not) as opposed to three
2443           * (submessage has handlers in enclosing message but no subhandlers).
2444           *
2445           * This makes parsing less efficient in the case that we want to
2446           * notice a submessage but skip its contents (like if we're testing
2447           * for submessage presence or counting the number of repeated
2448           * submessages).  In this case we will end up parsing the submessage
2449           * field by field and throwing away the results for each, instead of
2450           * skipping the whole delimited thing at once.  If this is an issue we
2451           * can revisit it, but do remember that this only arises when you have
2452           * handlers (startseq/startsubmsg/endsubmsg/endseq) set for the
2453           * submessage but no subhandlers.  The uses cases for this are
2454           * limited. */
2455          upb_handlers *sub = upb_handlers_new(upb_fielddef_msgsubdef(f), &sub);
2456          upb_handlers_setsubhandlers(h, f, sub);
2457          upb_handlers_unref(sub, &sub);
2458        }
2459
2460        /* TODO(haberman): check type of submessage.
2461         * This is slightly tricky; also consider whether we should check that
2462         * they match at setsubhandlers time. */
2463      }
2464    }
2465  }
2466
2467  if (!upb_refcounted_freeze((upb_refcounted*const*)handlers, n, s,
2468                             UPB_MAX_HANDLER_DEPTH)) {
2469    return false;
2470  }
2471
2472  return true;
2473}
2474
2475upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f) {
2476  switch (upb_fielddef_type(f)) {
2477    case UPB_TYPE_INT32:
2478    case UPB_TYPE_ENUM: return UPB_HANDLER_INT32;
2479    case UPB_TYPE_INT64: return UPB_HANDLER_INT64;
2480    case UPB_TYPE_UINT32: return UPB_HANDLER_UINT32;
2481    case UPB_TYPE_UINT64: return UPB_HANDLER_UINT64;
2482    case UPB_TYPE_FLOAT: return UPB_HANDLER_FLOAT;
2483    case UPB_TYPE_DOUBLE: return UPB_HANDLER_DOUBLE;
2484    case UPB_TYPE_BOOL: return UPB_HANDLER_BOOL;
2485    default: assert(false); return -1;  /* Invalid input. */
2486  }
2487}
2488
2489bool upb_handlers_getselector(const upb_fielddef *f, upb_handlertype_t type,
2490                              upb_selector_t *s) {
2491  switch (type) {
2492    case UPB_HANDLER_INT32:
2493    case UPB_HANDLER_INT64:
2494    case UPB_HANDLER_UINT32:
2495    case UPB_HANDLER_UINT64:
2496    case UPB_HANDLER_FLOAT:
2497    case UPB_HANDLER_DOUBLE:
2498    case UPB_HANDLER_BOOL:
2499      if (!upb_fielddef_isprimitive(f) ||
2500          upb_handlers_getprimitivehandlertype(f) != type)
2501        return false;
2502      *s = f->selector_base;
2503      break;
2504    case UPB_HANDLER_STRING:
2505      if (upb_fielddef_isstring(f)) {
2506        *s = f->selector_base;
2507      } else if (upb_fielddef_lazy(f)) {
2508        *s = f->selector_base + 3;
2509      } else {
2510        return false;
2511      }
2512      break;
2513    case UPB_HANDLER_STARTSTR:
2514      if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) {
2515        *s = f->selector_base + 1;
2516      } else {
2517        return false;
2518      }
2519      break;
2520    case UPB_HANDLER_ENDSTR:
2521      if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) {
2522        *s = f->selector_base + 2;
2523      } else {
2524        return false;
2525      }
2526      break;
2527    case UPB_HANDLER_STARTSEQ:
2528      if (!upb_fielddef_isseq(f)) return false;
2529      *s = f->selector_base - 2;
2530      break;
2531    case UPB_HANDLER_ENDSEQ:
2532      if (!upb_fielddef_isseq(f)) return false;
2533      *s = f->selector_base - 1;
2534      break;
2535    case UPB_HANDLER_STARTSUBMSG:
2536      if (!upb_fielddef_issubmsg(f)) return false;
2537      /* Selectors for STARTSUBMSG are at the beginning of the table so that the
2538       * selector can also be used as an index into the "sub" array of
2539       * subhandlers.  The indexes for the two into these two tables are the
2540       * same, except that in the handler table the static selectors come first. */
2541      *s = f->index_ + UPB_STATIC_SELECTOR_COUNT;
2542      break;
2543    case UPB_HANDLER_ENDSUBMSG:
2544      if (!upb_fielddef_issubmsg(f)) return false;
2545      *s = f->selector_base;
2546      break;
2547  }
2548  assert((size_t)*s < upb_fielddef_containingtype(f)->selector_count);
2549  return true;
2550}
2551
2552uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) {
2553  return upb_fielddef_isseq(f) ? 2 : 0;
2554}
2555
2556uint32_t upb_handlers_selectorcount(const upb_fielddef *f) {
2557  uint32_t ret = 1;
2558  if (upb_fielddef_isseq(f)) ret += 2;    /* STARTSEQ/ENDSEQ */
2559  if (upb_fielddef_isstring(f)) ret += 2; /* [STRING]/STARTSTR/ENDSTR */
2560  if (upb_fielddef_issubmsg(f)) {
2561    /* ENDSUBMSG (STARTSUBMSG is at table beginning) */
2562    ret += 0;
2563    if (upb_fielddef_lazy(f)) {
2564      /* STARTSTR/ENDSTR/STRING (for lazy) */
2565      ret += 3;
2566    }
2567  }
2568  return ret;
2569}
2570
2571
2572/* upb_handlerattr ************************************************************/
2573
2574void upb_handlerattr_init(upb_handlerattr *attr) {
2575  upb_handlerattr from = UPB_HANDLERATTR_INITIALIZER;
2576  memcpy(attr, &from, sizeof(*attr));
2577}
2578
2579void upb_handlerattr_uninit(upb_handlerattr *attr) {
2580  UPB_UNUSED(attr);
2581}
2582
2583bool upb_handlerattr_sethandlerdata(upb_handlerattr *attr, const void *hd) {
2584  attr->handler_data_ = hd;
2585  return true;
2586}
2587
2588bool upb_handlerattr_setclosuretype(upb_handlerattr *attr, const void *type) {
2589  attr->closure_type_ = type;
2590  return true;
2591}
2592
2593const void *upb_handlerattr_closuretype(const upb_handlerattr *attr) {
2594  return attr->closure_type_;
2595}
2596
2597bool upb_handlerattr_setreturnclosuretype(upb_handlerattr *attr,
2598                                          const void *type) {
2599  attr->return_closure_type_ = type;
2600  return true;
2601}
2602
2603const void *upb_handlerattr_returnclosuretype(const upb_handlerattr *attr) {
2604  return attr->return_closure_type_;
2605}
2606
2607bool upb_handlerattr_setalwaysok(upb_handlerattr *attr, bool alwaysok) {
2608  attr->alwaysok_ = alwaysok;
2609  return true;
2610}
2611
2612bool upb_handlerattr_alwaysok(const upb_handlerattr *attr) {
2613  return attr->alwaysok_;
2614}
2615
2616/* upb_bufhandle **************************************************************/
2617
2618size_t upb_bufhandle_objofs(const upb_bufhandle *h) {
2619  return h->objofs_;
2620}
2621
2622/* upb_byteshandler ***********************************************************/
2623
2624void upb_byteshandler_init(upb_byteshandler* h) {
2625  memset(h, 0, sizeof(*h));
2626}
2627
2628/* For when we support handlerfree callbacks. */
2629void upb_byteshandler_uninit(upb_byteshandler* h) {
2630  UPB_UNUSED(h);
2631}
2632
2633bool upb_byteshandler_setstartstr(upb_byteshandler *h,
2634                                  upb_startstr_handlerfunc *func, void *d) {
2635  h->table[UPB_STARTSTR_SELECTOR].func = (upb_func*)func;
2636  h->table[UPB_STARTSTR_SELECTOR].attr.handler_data_ = d;
2637  return true;
2638}
2639
2640bool upb_byteshandler_setstring(upb_byteshandler *h,
2641                                upb_string_handlerfunc *func, void *d) {
2642  h->table[UPB_STRING_SELECTOR].func = (upb_func*)func;
2643  h->table[UPB_STRING_SELECTOR].attr.handler_data_ = d;
2644  return true;
2645}
2646
2647bool upb_byteshandler_setendstr(upb_byteshandler *h,
2648                                upb_endfield_handlerfunc *func, void *d) {
2649  h->table[UPB_ENDSTR_SELECTOR].func = (upb_func*)func;
2650  h->table[UPB_ENDSTR_SELECTOR].attr.handler_data_ = d;
2651  return true;
2652}
2653/*
2654** upb::RefCounted Implementation
2655**
2656** Our key invariants are:
2657** 1. reference cycles never span groups
2658** 2. for ref2(to, from), we increment to's count iff group(from) != group(to)
2659**
2660** The previous two are how we avoid leaking cycles.  Other important
2661** invariants are:
2662** 3. for mutable objects "from" and "to", if there exists a ref2(to, from)
2663**    this implies group(from) == group(to).  (In practice, what we implement
2664**    is even stronger; "from" and "to" will share a group if there has *ever*
2665**    been a ref2(to, from), but all that is necessary for correctness is the
2666**    weaker one).
2667** 4. mutable and immutable objects are never in the same group.
2668*/
2669
2670
2671#include <setjmp.h>
2672#include <stdlib.h>
2673
2674static void freeobj(upb_refcounted *o);
2675
2676const char untracked_val;
2677const void *UPB_UNTRACKED_REF = &untracked_val;
2678
2679/* arch-specific atomic primitives  *******************************************/
2680
2681#ifdef UPB_THREAD_UNSAFE /*---------------------------------------------------*/
2682
2683static void atomic_inc(uint32_t *a) { (*a)++; }
2684static bool atomic_dec(uint32_t *a) { return --(*a) == 0; }
2685
2686#elif defined(__GNUC__) || defined(__clang__) /*------------------------------*/
2687
2688static void atomic_inc(uint32_t *a) { __sync_fetch_and_add(a, 1); }
2689static bool atomic_dec(uint32_t *a) { return __sync_sub_and_fetch(a, 1) == 0; }
2690
2691#elif defined(WIN32) /*-------------------------------------------------------*/
2692
2693#include <Windows.h>
2694
2695static void atomic_inc(upb_atomic_t *a) { InterlockedIncrement(&a->val); }
2696static bool atomic_dec(upb_atomic_t *a) {
2697  return InterlockedDecrement(&a->val) == 0;
2698}
2699
2700#else
2701#error Atomic primitives not defined for your platform/CPU.  \
2702       Implement them or compile with UPB_THREAD_UNSAFE.
2703#endif
2704
2705/* All static objects point to this refcount.
2706 * It is special-cased in ref/unref below.  */
2707uint32_t static_refcount = -1;
2708
2709/* We can avoid atomic ops for statically-declared objects.
2710 * This is a minor optimization but nice since we can avoid degrading under
2711 * contention in this case. */
2712
2713static void refgroup(uint32_t *group) {
2714  if (group != &static_refcount)
2715    atomic_inc(group);
2716}
2717
2718static bool unrefgroup(uint32_t *group) {
2719  if (group == &static_refcount) {
2720    return false;
2721  } else {
2722    return atomic_dec(group);
2723  }
2724}
2725
2726
2727/* Reference tracking (debug only) ********************************************/
2728
2729#ifdef UPB_DEBUG_REFS
2730
2731#ifdef UPB_THREAD_UNSAFE
2732
2733static void upb_lock() {}
2734static void upb_unlock() {}
2735
2736#else
2737
2738/* User must define functions that lock/unlock a global mutex and link this
2739 * file against them. */
2740void upb_lock();
2741void upb_unlock();
2742
2743#endif
2744
2745/* UPB_DEBUG_REFS mode counts on being able to malloc() memory in some
2746 * code-paths that can normally never fail, like upb_refcounted_ref().  Since
2747 * we have no way to propagage out-of-memory errors back to the user, and since
2748 * these errors can only occur in UPB_DEBUG_REFS mode, we immediately fail. */
2749#define CHECK_OOM(predicate) if (!(predicate)) { assert(predicate); exit(1); }
2750
2751typedef struct {
2752  int count;  /* How many refs there are (duplicates only allowed for ref2). */
2753  bool is_ref2;
2754} trackedref;
2755
2756static trackedref *trackedref_new(bool is_ref2) {
2757  trackedref *ret = malloc(sizeof(*ret));
2758  CHECK_OOM(ret);
2759  ret->count = 1;
2760  ret->is_ref2 = is_ref2;
2761  return ret;
2762}
2763
2764static void track(const upb_refcounted *r, const void *owner, bool ref2) {
2765  upb_value v;
2766
2767  assert(owner);
2768  if (owner == UPB_UNTRACKED_REF) return;
2769
2770  upb_lock();
2771  if (upb_inttable_lookupptr(r->refs, owner, &v)) {
2772    trackedref *ref = upb_value_getptr(v);
2773    /* Since we allow multiple ref2's for the same to/from pair without
2774     * allocating separate memory for each one, we lose the fine-grained
2775     * tracking behavior we get with regular refs.  Since ref2s only happen
2776     * inside upb, we'll accept this limitation until/unless there is a really
2777     * difficult upb-internal bug that can't be figured out without it. */
2778    assert(ref2);
2779    assert(ref->is_ref2);
2780    ref->count++;
2781  } else {
2782    trackedref *ref = trackedref_new(ref2);
2783    bool ok = upb_inttable_insertptr(r->refs, owner, upb_value_ptr(ref));
2784    CHECK_OOM(ok);
2785    if (ref2) {
2786      /* We know this cast is safe when it is a ref2, because it's coming from
2787       * another refcounted object. */
2788      const upb_refcounted *from = owner;
2789      assert(!upb_inttable_lookupptr(from->ref2s, r, NULL));
2790      ok = upb_inttable_insertptr(from->ref2s, r, upb_value_ptr(NULL));
2791      CHECK_OOM(ok);
2792    }
2793  }
2794  upb_unlock();
2795}
2796
2797static void untrack(const upb_refcounted *r, const void *owner, bool ref2) {
2798  upb_value v;
2799  bool found;
2800  trackedref *ref;
2801
2802  assert(owner);
2803  if (owner == UPB_UNTRACKED_REF) return;
2804
2805  upb_lock();
2806  found = upb_inttable_lookupptr(r->refs, owner, &v);
2807  /* This assert will fail if an owner attempts to release a ref it didn't have. */
2808  UPB_ASSERT_VAR(found, found);
2809  ref = upb_value_getptr(v);
2810  assert(ref->is_ref2 == ref2);
2811  if (--ref->count == 0) {
2812    free(ref);
2813    upb_inttable_removeptr(r->refs, owner, NULL);
2814    if (ref2) {
2815      /* We know this cast is safe when it is a ref2, because it's coming from
2816       * another refcounted object. */
2817      const upb_refcounted *from = owner;
2818      bool removed = upb_inttable_removeptr(from->ref2s, r, NULL);
2819      assert(removed);
2820    }
2821  }
2822  upb_unlock();
2823}
2824
2825static void checkref(const upb_refcounted *r, const void *owner, bool ref2) {
2826  upb_value v;
2827  bool found;
2828  trackedref *ref;
2829
2830  upb_lock();
2831  found = upb_inttable_lookupptr(r->refs, owner, &v);
2832  UPB_ASSERT_VAR(found, found);
2833  ref = upb_value_getptr(v);
2834  assert(ref->is_ref2 == ref2);
2835  upb_unlock();
2836}
2837
2838/* Populates the given UPB_CTYPE_INT32 inttable with counts of ref2's that
2839 * originate from the given owner. */
2840static void getref2s(const upb_refcounted *owner, upb_inttable *tab) {
2841  upb_inttable_iter i;
2842
2843  upb_lock();
2844  upb_inttable_begin(&i, owner->ref2s);
2845  for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
2846    upb_value v;
2847    upb_value count;
2848    trackedref *ref;
2849    bool ok;
2850    bool found;
2851
2852    upb_refcounted *to = (upb_refcounted*)upb_inttable_iter_key(&i);
2853
2854    /* To get the count we need to look in the target's table. */
2855    found = upb_inttable_lookupptr(to->refs, owner, &v);
2856    assert(found);
2857    ref = upb_value_getptr(v);
2858    count = upb_value_int32(ref->count);
2859
2860    ok = upb_inttable_insertptr(tab, to, count);
2861    CHECK_OOM(ok);
2862  }
2863  upb_unlock();
2864}
2865
2866typedef struct {
2867  upb_inttable ref2;
2868  const upb_refcounted *obj;
2869} check_state;
2870
2871static void visit_check(const upb_refcounted *obj, const upb_refcounted *subobj,
2872                        void *closure) {
2873  check_state *s = closure;
2874  upb_inttable *ref2 = &s->ref2;
2875  upb_value v;
2876  bool removed;
2877  int32_t newcount;
2878
2879  assert(obj == s->obj);
2880  assert(subobj);
2881  removed = upb_inttable_removeptr(ref2, subobj, &v);
2882  /* The following assertion will fail if the visit() function visits a subobj
2883   * that it did not have a ref2 on, or visits the same subobj too many times. */
2884  assert(removed);
2885  newcount = upb_value_getint32(v) - 1;
2886  if (newcount > 0) {
2887    upb_inttable_insert(ref2, (uintptr_t)subobj, upb_value_int32(newcount));
2888  }
2889}
2890
2891static void visit(const upb_refcounted *r, upb_refcounted_visit *v,
2892                  void *closure) {
2893  bool ok;
2894
2895  /* In DEBUG_REFS mode we know what existing ref2 refs there are, so we know
2896   * exactly the set of nodes that visit() should visit.  So we verify visit()'s
2897   * correctness here. */
2898  check_state state;
2899  state.obj = r;
2900  ok = upb_inttable_init(&state.ref2, UPB_CTYPE_INT32);
2901  CHECK_OOM(ok);
2902  getref2s(r, &state.ref2);
2903
2904  /* This should visit any children in the ref2 table. */
2905  if (r->vtbl->visit) r->vtbl->visit(r, visit_check, &state);
2906
2907  /* This assertion will fail if the visit() function missed any children. */
2908  assert(upb_inttable_count(&state.ref2) == 0);
2909  upb_inttable_uninit(&state.ref2);
2910  if (r->vtbl->visit) r->vtbl->visit(r, v, closure);
2911}
2912
2913static bool trackinit(upb_refcounted *r) {
2914  r->refs = malloc(sizeof(*r->refs));
2915  r->ref2s = malloc(sizeof(*r->ref2s));
2916  if (!r->refs || !r->ref2s) goto err1;
2917
2918  if (!upb_inttable_init(r->refs, UPB_CTYPE_PTR)) goto err1;
2919  if (!upb_inttable_init(r->ref2s, UPB_CTYPE_PTR)) goto err2;
2920  return true;
2921
2922err2:
2923  upb_inttable_uninit(r->refs);
2924err1:
2925  free(r->refs);
2926  free(r->ref2s);
2927  return false;
2928}
2929
2930static void trackfree(const upb_refcounted *r) {
2931  upb_inttable_uninit(r->refs);
2932  upb_inttable_uninit(r->ref2s);
2933  free(r->refs);
2934  free(r->ref2s);
2935}
2936
2937#else
2938
2939static void track(const upb_refcounted *r, const void *owner, bool ref2) {
2940  UPB_UNUSED(r);
2941  UPB_UNUSED(owner);
2942  UPB_UNUSED(ref2);
2943}
2944
2945static void untrack(const upb_refcounted *r, const void *owner, bool ref2) {
2946  UPB_UNUSED(r);
2947  UPB_UNUSED(owner);
2948  UPB_UNUSED(ref2);
2949}
2950
2951static void checkref(const upb_refcounted *r, const void *owner, bool ref2) {
2952  UPB_UNUSED(r);
2953  UPB_UNUSED(owner);
2954  UPB_UNUSED(ref2);
2955}
2956
2957static bool trackinit(upb_refcounted *r) {
2958  UPB_UNUSED(r);
2959  return true;
2960}
2961
2962static void trackfree(const upb_refcounted *r) {
2963  UPB_UNUSED(r);
2964}
2965
2966static void visit(const upb_refcounted *r, upb_refcounted_visit *v,
2967                  void *closure) {
2968  if (r->vtbl->visit) r->vtbl->visit(r, v, closure);
2969}
2970
2971#endif  /* UPB_DEBUG_REFS */
2972
2973
2974/* freeze() *******************************************************************/
2975
2976/* The freeze() operation is by far the most complicated part of this scheme.
2977 * We compute strongly-connected components and then mutate the graph such that
2978 * we preserve the invariants documented at the top of this file.  And we must
2979 * handle out-of-memory errors gracefully (without leaving the graph
2980 * inconsistent), which adds to the fun. */
2981
2982/* The state used by the freeze operation (shared across many functions). */
2983typedef struct {
2984  int depth;
2985  int maxdepth;
2986  uint64_t index;
2987  /* Maps upb_refcounted* -> attributes (color, etc).  attr layout varies by
2988   * color. */
2989  upb_inttable objattr;
2990  upb_inttable stack;   /* stack of upb_refcounted* for Tarjan's algorithm. */
2991  upb_inttable groups;  /* array of uint32_t*, malloc'd refcounts for new groups */
2992  upb_status *status;
2993  jmp_buf err;
2994} tarjan;
2995
2996static void release_ref2(const upb_refcounted *obj,
2997                         const upb_refcounted *subobj,
2998                         void *closure);
2999
3000/* Node attributes -----------------------------------------------------------*/
3001
3002/* After our analysis phase all nodes will be either GRAY or WHITE. */
3003
3004typedef enum {
3005  BLACK = 0,  /* Object has not been seen. */
3006  GRAY,   /* Object has been found via a refgroup but may not be reachable. */
3007  GREEN,  /* Object is reachable and is currently on the Tarjan stack. */
3008  WHITE   /* Object is reachable and has been assigned a group (SCC). */
3009} color_t;
3010
3011UPB_NORETURN static void err(tarjan *t) { longjmp(t->err, 1); }
3012UPB_NORETURN static void oom(tarjan *t) {
3013  upb_status_seterrmsg(t->status, "out of memory");
3014  err(t);
3015}
3016
3017static uint64_t trygetattr(const tarjan *t, const upb_refcounted *r) {
3018  upb_value v;
3019  return upb_inttable_lookupptr(&t->objattr, r, &v) ?
3020      upb_value_getuint64(v) : 0;
3021}
3022
3023static uint64_t getattr(const tarjan *t, const upb_refcounted *r) {
3024  upb_value v;
3025  bool found = upb_inttable_lookupptr(&t->objattr, r, &v);
3026  UPB_ASSERT_VAR(found, found);
3027  return upb_value_getuint64(v);
3028}
3029
3030static void setattr(tarjan *t, const upb_refcounted *r, uint64_t attr) {
3031  upb_inttable_removeptr(&t->objattr, r, NULL);
3032  upb_inttable_insertptr(&t->objattr, r, upb_value_uint64(attr));
3033}
3034
3035static color_t color(tarjan *t, const upb_refcounted *r) {
3036  return trygetattr(t, r) & 0x3;  /* Color is always stored in the low 2 bits. */
3037}
3038
3039static void set_gray(tarjan *t, const upb_refcounted *r) {
3040  assert(color(t, r) == BLACK);
3041  setattr(t, r, GRAY);
3042}
3043
3044/* Pushes an obj onto the Tarjan stack and sets it to GREEN. */
3045static void push(tarjan *t, const upb_refcounted *r) {
3046  assert(color(t, r) == BLACK || color(t, r) == GRAY);
3047  /* This defines the attr layout for the GREEN state.  "index" and "lowlink"
3048   * get 31 bits, which is plenty (limit of 2B objects frozen at a time). */
3049  setattr(t, r, GREEN | (t->index << 2) | (t->index << 33));
3050  if (++t->index == 0x80000000) {
3051    upb_status_seterrmsg(t->status, "too many objects to freeze");
3052    err(t);
3053  }
3054  upb_inttable_push(&t->stack, upb_value_ptr((void*)r));
3055}
3056
3057/* Pops an obj from the Tarjan stack and sets it to WHITE, with a ptr to its
3058 * SCC group. */
3059static upb_refcounted *pop(tarjan *t) {
3060  upb_refcounted *r = upb_value_getptr(upb_inttable_pop(&t->stack));
3061  assert(color(t, r) == GREEN);
3062  /* This defines the attr layout for nodes in the WHITE state.
3063   * Top of group stack is [group, NULL]; we point at group. */
3064  setattr(t, r, WHITE | (upb_inttable_count(&t->groups) - 2) << 8);
3065  return r;
3066}
3067
3068static void tarjan_newgroup(tarjan *t) {
3069  uint32_t *group = malloc(sizeof(*group));
3070  if (!group) oom(t);
3071  /* Push group and empty group leader (we'll fill in leader later). */
3072  if (!upb_inttable_push(&t->groups, upb_value_ptr(group)) ||
3073      !upb_inttable_push(&t->groups, upb_value_ptr(NULL))) {
3074    free(group);
3075    oom(t);
3076  }
3077  *group = 0;
3078}
3079
3080static uint32_t idx(tarjan *t, const upb_refcounted *r) {
3081  assert(color(t, r) == GREEN);
3082  return (getattr(t, r) >> 2) & 0x7FFFFFFF;
3083}
3084
3085static uint32_t lowlink(tarjan *t, const upb_refcounted *r) {
3086  if (color(t, r) == GREEN) {
3087    return getattr(t, r) >> 33;
3088  } else {
3089    return UINT32_MAX;
3090  }
3091}
3092
3093static void set_lowlink(tarjan *t, const upb_refcounted *r, uint32_t lowlink) {
3094  assert(color(t, r) == GREEN);
3095  setattr(t, r, ((uint64_t)lowlink << 33) | (getattr(t, r) & 0x1FFFFFFFF));
3096}
3097
3098static uint32_t *group(tarjan *t, upb_refcounted *r) {
3099  uint64_t groupnum;
3100  upb_value v;
3101  bool found;
3102
3103  assert(color(t, r) == WHITE);
3104  groupnum = getattr(t, r) >> 8;
3105  found = upb_inttable_lookup(&t->groups, groupnum, &v);
3106  UPB_ASSERT_VAR(found, found);
3107  return upb_value_getptr(v);
3108}
3109
3110/* If the group leader for this object's group has not previously been set,
3111 * the given object is assigned to be its leader. */
3112static upb_refcounted *groupleader(tarjan *t, upb_refcounted *r) {
3113  uint64_t leader_slot;
3114  upb_value v;
3115  bool found;
3116
3117  assert(color(t, r) == WHITE);
3118  leader_slot = (getattr(t, r) >> 8) + 1;
3119  found = upb_inttable_lookup(&t->groups, leader_slot, &v);
3120  UPB_ASSERT_VAR(found, found);
3121  if (upb_value_getptr(v)) {
3122    return upb_value_getptr(v);
3123  } else {
3124    upb_inttable_remove(&t->groups, leader_slot, NULL);
3125    upb_inttable_insert(&t->groups, leader_slot, upb_value_ptr(r));
3126    return r;
3127  }
3128}
3129
3130
3131/* Tarjan's algorithm --------------------------------------------------------*/
3132
3133/* See:
3134 *   http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm */
3135static void do_tarjan(const upb_refcounted *obj, tarjan *t);
3136
3137static void tarjan_visit(const upb_refcounted *obj,
3138                         const upb_refcounted *subobj,
3139                         void *closure) {
3140  tarjan *t = closure;
3141  if (++t->depth > t->maxdepth) {
3142    upb_status_seterrf(t->status, "graph too deep to freeze (%d)", t->maxdepth);
3143    err(t);
3144  } else if (subobj->is_frozen || color(t, subobj) == WHITE) {
3145    /* Do nothing: we don't want to visit or color already-frozen nodes,
3146     * and WHITE nodes have already been assigned a SCC. */
3147  } else if (color(t, subobj) < GREEN) {
3148    /* Subdef has not yet been visited; recurse on it. */
3149    do_tarjan(subobj, t);
3150    set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), lowlink(t, subobj)));
3151  } else if (color(t, subobj) == GREEN) {
3152    /* Subdef is in the stack and hence in the current SCC. */
3153    set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), idx(t, subobj)));
3154  }
3155  --t->depth;
3156}
3157
3158static void do_tarjan(const upb_refcounted *obj, tarjan *t) {
3159  if (color(t, obj) == BLACK) {
3160    /* We haven't seen this object's group; mark the whole group GRAY. */
3161    const upb_refcounted *o = obj;
3162    do { set_gray(t, o); } while ((o = o->next) != obj);
3163  }
3164
3165  push(t, obj);
3166  visit(obj, tarjan_visit, t);
3167  if (lowlink(t, obj) == idx(t, obj)) {
3168    tarjan_newgroup(t);
3169    while (pop(t) != obj)
3170      ;
3171  }
3172}
3173
3174
3175/* freeze() ------------------------------------------------------------------*/
3176
3177static void crossref(const upb_refcounted *r, const upb_refcounted *subobj,
3178                     void *_t) {
3179  tarjan *t = _t;
3180  assert(color(t, r) > BLACK);
3181  if (color(t, subobj) > BLACK && r->group != subobj->group) {
3182    /* Previously this ref was not reflected in subobj->group because they
3183     * were in the same group; now that they are split a ref must be taken. */
3184    refgroup(subobj->group);
3185  }
3186}
3187
3188static bool freeze(upb_refcounted *const*roots, int n, upb_status *s,
3189                   int maxdepth) {
3190  volatile bool ret = false;
3191  int i;
3192  upb_inttable_iter iter;
3193
3194  /* We run in two passes so that we can allocate all memory before performing
3195   * any mutation of the input -- this allows us to leave the input unchanged
3196   * in the case of memory allocation failure. */
3197  tarjan t;
3198  t.index = 0;
3199  t.depth = 0;
3200  t.maxdepth = maxdepth;
3201  t.status = s;
3202  if (!upb_inttable_init(&t.objattr, UPB_CTYPE_UINT64)) goto err1;
3203  if (!upb_inttable_init(&t.stack, UPB_CTYPE_PTR)) goto err2;
3204  if (!upb_inttable_init(&t.groups, UPB_CTYPE_PTR)) goto err3;
3205  if (setjmp(t.err) != 0) goto err4;
3206
3207
3208  for (i = 0; i < n; i++) {
3209    if (color(&t, roots[i]) < GREEN) {
3210      do_tarjan(roots[i], &t);
3211    }
3212  }
3213
3214  /* If we've made it this far, no further errors are possible so it's safe to
3215   * mutate the objects without risk of leaving them in an inconsistent state. */
3216  ret = true;
3217
3218  /* The transformation that follows requires care.  The preconditions are:
3219   * - all objects in attr map are WHITE or GRAY, and are in mutable groups
3220   *   (groups of all mutable objs)
3221   * - no ref2(to, from) refs have incremented count(to) if both "to" and
3222   *   "from" are in our attr map (this follows from invariants (2) and (3)) */
3223
3224  /* Pass 1: we remove WHITE objects from their mutable groups, and add them to
3225   * new groups  according to the SCC's we computed.  These new groups will
3226   * consist of only frozen objects.  None will be immediately collectible,
3227   * because WHITE objects are by definition reachable from one of "roots",
3228   * which the caller must own refs on. */
3229  upb_inttable_begin(&iter, &t.objattr);
3230  for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
3231    upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
3232    /* Since removal from a singly-linked list requires access to the object's
3233     * predecessor, we consider obj->next instead of obj for moving.  With the
3234     * while() loop we guarantee that we will visit every node's predecessor.
3235     * Proof:
3236     *  1. every node's predecessor is in our attr map.
3237     *  2. though the loop body may change a node's predecessor, it will only
3238     *     change it to be the node we are currently operating on, so with a
3239     *     while() loop we guarantee ourselves the chance to remove each node. */
3240    while (color(&t, obj->next) == WHITE &&
3241           group(&t, obj->next) != obj->next->group) {
3242      upb_refcounted *leader;
3243
3244      /* Remove from old group. */
3245      upb_refcounted *move = obj->next;
3246      if (obj == move) {
3247        /* Removing the last object from a group. */
3248        assert(*obj->group == obj->individual_count);
3249        free(obj->group);
3250      } else {
3251        obj->next = move->next;
3252        /* This may decrease to zero; we'll collect GRAY objects (if any) that
3253         * remain in the group in the third pass. */
3254        assert(*move->group >= move->individual_count);
3255        *move->group -= move->individual_count;
3256      }
3257
3258      /* Add to new group. */
3259      leader = groupleader(&t, move);
3260      if (move == leader) {
3261        /* First object added to new group is its leader. */
3262        move->group = group(&t, move);
3263        move->next = move;
3264        *move->group = move->individual_count;
3265      } else {
3266        /* Group already has at least one object in it. */
3267        assert(leader->group == group(&t, move));
3268        move->group = group(&t, move);
3269        move->next = leader->next;
3270        leader->next = move;
3271        *move->group += move->individual_count;
3272      }
3273
3274      move->is_frozen = true;
3275    }
3276  }
3277
3278  /* Pass 2: GRAY and WHITE objects "obj" with ref2(to, obj) references must
3279   * increment count(to) if group(obj) != group(to) (which could now be the
3280   * case if "to" was just frozen). */
3281  upb_inttable_begin(&iter, &t.objattr);
3282  for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
3283    upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
3284    visit(obj, crossref, &t);
3285  }
3286
3287  /* Pass 3: GRAY objects are collected if their group's refcount dropped to
3288   * zero when we removed its white nodes.  This can happen if they had only
3289   * been kept alive by virtue of sharing a group with an object that was just
3290   * frozen.
3291   *
3292   * It is important that we do this last, since the GRAY object's free()
3293   * function could call unref2() on just-frozen objects, which will decrement
3294   * refs that were added in pass 2. */
3295  upb_inttable_begin(&iter, &t.objattr);
3296  for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
3297    upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
3298    if (obj->group == NULL || *obj->group == 0) {
3299      if (obj->group) {
3300        upb_refcounted *o;
3301
3302        /* We eagerly free() the group's count (since we can't easily determine
3303         * the group's remaining size it's the easiest way to ensure it gets
3304         * done). */
3305        free(obj->group);
3306
3307        /* Visit to release ref2's (done in a separate pass since release_ref2
3308         * depends on o->group being unmodified so it can test merged()). */
3309        o = obj;
3310        do { visit(o, release_ref2, NULL); } while ((o = o->next) != obj);
3311
3312        /* Mark "group" fields as NULL so we know to free the objects later in
3313         * this loop, but also don't try to delete the group twice. */
3314        o = obj;
3315        do { o->group = NULL; } while ((o = o->next) != obj);
3316      }
3317      freeobj(obj);
3318    }
3319  }
3320
3321err4:
3322  if (!ret) {
3323    upb_inttable_begin(&iter, &t.groups);
3324    for(; !upb_inttable_done(&iter); upb_inttable_next(&iter))
3325      free(upb_value_getptr(upb_inttable_iter_value(&iter)));
3326  }
3327  upb_inttable_uninit(&t.groups);
3328err3:
3329  upb_inttable_uninit(&t.stack);
3330err2:
3331  upb_inttable_uninit(&t.objattr);
3332err1:
3333  return ret;
3334}
3335
3336
3337/* Misc internal functions  ***************************************************/
3338
3339static bool merged(const upb_refcounted *r, const upb_refcounted *r2) {
3340  return r->group == r2->group;
3341}
3342
3343static void merge(upb_refcounted *r, upb_refcounted *from) {
3344  upb_refcounted *base;
3345  upb_refcounted *tmp;
3346
3347  if (merged(r, from)) return;
3348  *r->group += *from->group;
3349  free(from->group);
3350  base = from;
3351
3352  /* Set all refcount pointers in the "from" chain to the merged refcount.
3353   *
3354   * TODO(haberman): this linear algorithm can result in an overall O(n^2) bound
3355   * if the user continuously extends a group by one object.  Prevent this by
3356   * using one of the techniques in this paper:
3357   *     ftp://www.ncedc.org/outgoing/geomorph/dino/orals/p245-tarjan.pdf */
3358  do { from->group = r->group; } while ((from = from->next) != base);
3359
3360  /* Merge the two circularly linked lists by swapping their next pointers. */
3361  tmp = r->next;
3362  r->next = base->next;
3363  base->next = tmp;
3364}
3365
3366static void unref(const upb_refcounted *r);
3367
3368static void release_ref2(const upb_refcounted *obj,
3369                         const upb_refcounted *subobj,
3370                         void *closure) {
3371  UPB_UNUSED(closure);
3372  untrack(subobj, obj, true);
3373  if (!merged(obj, subobj)) {
3374    assert(subobj->is_frozen);
3375    unref(subobj);
3376  }
3377}
3378
3379static void unref(const upb_refcounted *r) {
3380  if (unrefgroup(r->group)) {
3381    const upb_refcounted *o;
3382
3383    free(r->group);
3384
3385    /* In two passes, since release_ref2 needs a guarantee that any subobjs
3386     * are alive. */
3387    o = r;
3388    do { visit(o, release_ref2, NULL); } while((o = o->next) != r);
3389
3390    o = r;
3391    do {
3392      const upb_refcounted *next = o->next;
3393      assert(o->is_frozen || o->individual_count == 0);
3394      freeobj((upb_refcounted*)o);
3395      o = next;
3396    } while(o != r);
3397  }
3398}
3399
3400static void freeobj(upb_refcounted *o) {
3401  trackfree(o);
3402  o->vtbl->free((upb_refcounted*)o);
3403}
3404
3405
3406/* Public interface ***********************************************************/
3407
3408bool upb_refcounted_init(upb_refcounted *r,
3409                         const struct upb_refcounted_vtbl *vtbl,
3410                         const void *owner) {
3411#ifndef NDEBUG
3412  /* Endianness check.  This is unrelated to upb_refcounted, it's just a
3413   * convenient place to put the check that we can be assured will run for
3414   * basically every program using upb. */
3415  const int x = 1;
3416#ifdef UPB_BIG_ENDIAN
3417  assert(*(char*)&x != 1);
3418#else
3419  assert(*(char*)&x == 1);
3420#endif
3421#endif
3422
3423  r->next = r;
3424  r->vtbl = vtbl;
3425  r->individual_count = 0;
3426  r->is_frozen = false;
3427  r->group = malloc(sizeof(*r->group));
3428  if (!r->group) return false;
3429  *r->group = 0;
3430  if (!trackinit(r)) {
3431    free(r->group);
3432    return false;
3433  }
3434  upb_refcounted_ref(r, owner);
3435  return true;
3436}
3437
3438bool upb_refcounted_isfrozen(const upb_refcounted *r) {
3439  return r->is_frozen;
3440}
3441
3442void upb_refcounted_ref(const upb_refcounted *r, const void *owner) {
3443  track(r, owner, false);
3444  if (!r->is_frozen)
3445    ((upb_refcounted*)r)->individual_count++;
3446  refgroup(r->group);
3447}
3448
3449void upb_refcounted_unref(const upb_refcounted *r, const void *owner) {
3450  untrack(r, owner, false);
3451  if (!r->is_frozen)
3452    ((upb_refcounted*)r)->individual_count--;
3453  unref(r);
3454}
3455
3456void upb_refcounted_ref2(const upb_refcounted *r, upb_refcounted *from) {
3457  assert(!from->is_frozen);  /* Non-const pointer implies this. */
3458  track(r, from, true);
3459  if (r->is_frozen) {
3460    refgroup(r->group);
3461  } else {
3462    merge((upb_refcounted*)r, from);
3463  }
3464}
3465
3466void upb_refcounted_unref2(const upb_refcounted *r, upb_refcounted *from) {
3467  assert(!from->is_frozen);  /* Non-const pointer implies this. */
3468  untrack(r, from, true);
3469  if (r->is_frozen) {
3470    unref(r);
3471  } else {
3472    assert(merged(r, from));
3473  }
3474}
3475
3476void upb_refcounted_donateref(
3477    const upb_refcounted *r, const void *from, const void *to) {
3478  assert(from != to);
3479  if (to != NULL)
3480    upb_refcounted_ref(r, to);
3481  if (from != NULL)
3482    upb_refcounted_unref(r, from);
3483}
3484
3485void upb_refcounted_checkref(const upb_refcounted *r, const void *owner) {
3486  checkref(r, owner, false);
3487}
3488
3489bool upb_refcounted_freeze(upb_refcounted *const*roots, int n, upb_status *s,
3490                           int maxdepth) {
3491  int i;
3492  for (i = 0; i < n; i++) {
3493    assert(!roots[i]->is_frozen);
3494  }
3495  return freeze(roots, n, s, maxdepth);
3496}
3497
3498
3499#include <stdlib.h>
3500
3501/* Fallback implementation if the shim is not specialized by the JIT. */
3502#define SHIM_WRITER(type, ctype)                                              \
3503  bool upb_shim_set ## type (void *c, const void *hd, ctype val) {            \
3504    uint8_t *m = c;                                                           \
3505    const upb_shim_data *d = hd;                                              \
3506    if (d->hasbit > 0)                                                        \
3507      *(uint8_t*)&m[d->hasbit / 8] |= 1 << (d->hasbit % 8);                   \
3508    *(ctype*)&m[d->offset] = val;                                             \
3509    return true;                                                              \
3510  }                                                                           \
3511
3512SHIM_WRITER(double, double)
3513SHIM_WRITER(float,  float)
3514SHIM_WRITER(int32,  int32_t)
3515SHIM_WRITER(int64,  int64_t)
3516SHIM_WRITER(uint32, uint32_t)
3517SHIM_WRITER(uint64, uint64_t)
3518SHIM_WRITER(bool,   bool)
3519#undef SHIM_WRITER
3520
3521bool upb_shim_set(upb_handlers *h, const upb_fielddef *f, size_t offset,
3522                  int32_t hasbit) {
3523  upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
3524  bool ok;
3525
3526  upb_shim_data *d = malloc(sizeof(*d));
3527  if (!d) return false;
3528  d->offset = offset;
3529  d->hasbit = hasbit;
3530
3531  upb_handlerattr_sethandlerdata(&attr, d);
3532  upb_handlerattr_setalwaysok(&attr, true);
3533  upb_handlers_addcleanup(h, d, free);
3534
3535#define TYPE(u, l) \
3536  case UPB_TYPE_##u: \
3537    ok = upb_handlers_set##l(h, f, upb_shim_set##l, &attr); break;
3538
3539  ok = false;
3540
3541  switch (upb_fielddef_type(f)) {
3542    TYPE(INT64,  int64);
3543    TYPE(INT32,  int32);
3544    TYPE(ENUM,   int32);
3545    TYPE(UINT64, uint64);
3546    TYPE(UINT32, uint32);
3547    TYPE(DOUBLE, double);
3548    TYPE(FLOAT,  float);
3549    TYPE(BOOL,   bool);
3550    default: assert(false); break;
3551  }
3552#undef TYPE
3553
3554  upb_handlerattr_uninit(&attr);
3555  return ok;
3556}
3557
3558const upb_shim_data *upb_shim_getdata(const upb_handlers *h, upb_selector_t s,
3559                                      upb_fieldtype_t *type) {
3560  upb_func *f = upb_handlers_gethandler(h, s);
3561
3562  if ((upb_int64_handlerfunc*)f == upb_shim_setint64) {
3563    *type = UPB_TYPE_INT64;
3564  } else if ((upb_int32_handlerfunc*)f == upb_shim_setint32) {
3565    *type = UPB_TYPE_INT32;
3566  } else if ((upb_uint64_handlerfunc*)f == upb_shim_setuint64) {
3567    *type = UPB_TYPE_UINT64;
3568  } else if ((upb_uint32_handlerfunc*)f == upb_shim_setuint32) {
3569    *type = UPB_TYPE_UINT32;
3570  } else if ((upb_double_handlerfunc*)f == upb_shim_setdouble) {
3571    *type = UPB_TYPE_DOUBLE;
3572  } else if ((upb_float_handlerfunc*)f == upb_shim_setfloat) {
3573    *type = UPB_TYPE_FLOAT;
3574  } else if ((upb_bool_handlerfunc*)f == upb_shim_setbool) {
3575    *type = UPB_TYPE_BOOL;
3576  } else {
3577    return NULL;
3578  }
3579
3580  return (const upb_shim_data*)upb_handlers_gethandlerdata(h, s);
3581}
3582
3583
3584#include <stdlib.h>
3585#include <string.h>
3586
3587static void upb_symtab_free(upb_refcounted *r) {
3588  upb_symtab *s = (upb_symtab*)r;
3589  upb_strtable_iter i;
3590  upb_strtable_begin(&i, &s->symtab);
3591  for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
3592    const upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i));
3593    upb_def_unref(def, s);
3594  }
3595  upb_strtable_uninit(&s->symtab);
3596  free(s);
3597}
3598
3599
3600upb_symtab *upb_symtab_new(const void *owner) {
3601  static const struct upb_refcounted_vtbl vtbl = {NULL, &upb_symtab_free};
3602  upb_symtab *s = malloc(sizeof(*s));
3603  upb_refcounted_init(upb_symtab_upcast_mutable(s), &vtbl, owner);
3604  upb_strtable_init(&s->symtab, UPB_CTYPE_PTR);
3605  return s;
3606}
3607
3608void upb_symtab_freeze(upb_symtab *s) {
3609  upb_refcounted *r;
3610  bool ok;
3611
3612  assert(!upb_symtab_isfrozen(s));
3613  r = upb_symtab_upcast_mutable(s);
3614  /* The symtab does not take ref2's (see refcounted.h) on the defs, because
3615   * defs cannot refer back to the table and therefore cannot create cycles.  So
3616   * 0 will suffice for maxdepth here. */
3617  ok = upb_refcounted_freeze(&r, 1, NULL, 0);
3618  UPB_ASSERT_VAR(ok, ok);
3619}
3620
3621const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym) {
3622  upb_value v;
3623  upb_def *ret = upb_strtable_lookup(&s->symtab, sym, &v) ?
3624      upb_value_getptr(v) : NULL;
3625  return ret;
3626}
3627
3628const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) {
3629  upb_value v;
3630  upb_def *def = upb_strtable_lookup(&s->symtab, sym, &v) ?
3631      upb_value_getptr(v) : NULL;
3632  return def ? upb_dyncast_msgdef(def) : NULL;
3633}
3634
3635const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym) {
3636  upb_value v;
3637  upb_def *def = upb_strtable_lookup(&s->symtab, sym, &v) ?
3638      upb_value_getptr(v) : NULL;
3639  return def ? upb_dyncast_enumdef(def) : NULL;
3640}
3641
3642/* Given a symbol and the base symbol inside which it is defined, find the
3643 * symbol's definition in t. */
3644static upb_def *upb_resolvename(const upb_strtable *t,
3645                                const char *base, const char *sym) {
3646  if(strlen(sym) == 0) return NULL;
3647  if(sym[0] == '.') {
3648    /* Symbols starting with '.' are absolute, so we do a single lookup.
3649     * Slice to omit the leading '.' */
3650    upb_value v;
3651    return upb_strtable_lookup(t, sym + 1, &v) ? upb_value_getptr(v) : NULL;
3652  } else {
3653    /* Remove components from base until we find an entry or run out.
3654     * TODO: This branch is totally broken, but currently not used. */
3655    (void)base;
3656    assert(false);
3657    return NULL;
3658  }
3659}
3660
3661const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base,
3662                                  const char *sym) {
3663  upb_def *ret = upb_resolvename(&s->symtab, base, sym);
3664  return ret;
3665}
3666
3667/* Starts a depth-first traversal at "def", recursing into any subdefs
3668 * (ie. submessage types).  Adds duplicates of existing defs to addtab
3669 * wherever necessary, so that the resulting symtab will be consistent once
3670 * addtab is added.
3671 *
3672 * More specifically, if any def D is found in the DFS that:
3673 *
3674 *   1. can reach a def that is being replaced by something in addtab, AND
3675 *
3676 *   2. is not itself being replaced already (ie. this name doesn't already
3677 *      exist in addtab)
3678 *
3679 * ...then a duplicate (new copy) of D will be added to addtab.
3680 *
3681 * Returns true if this happened for any def reachable from "def."
3682 *
3683 * It is slightly tricky to do this correctly in the presence of cycles.  If we
3684 * detect that our DFS has hit a cycle, we might not yet know if any SCCs on
3685 * our stack can reach a def in addtab or not.  Once we figure this out, that
3686 * answer needs to apply to *all* defs in these SCCs, even if we visited them
3687 * already.  So a straight up one-pass cycle-detecting DFS won't work.
3688 *
3689 * To work around this problem, we traverse each SCC (which we already
3690 * computed, since these defs are frozen) as a single node.  We first compute
3691 * whether the SCC as a whole can reach any def in addtab, then we dup (or not)
3692 * the entire SCC.  This requires breaking the encapsulation of upb_refcounted,
3693 * since that is where we get the data about what SCC we are in. */
3694static bool upb_resolve_dfs(const upb_def *def, upb_strtable *addtab,
3695                            const void *new_owner, upb_inttable *seen,
3696                            upb_status *s) {
3697  upb_value v;
3698  bool need_dup;
3699  const upb_def *base;
3700  const void* memoize_key;
3701
3702  /* Memoize results of this function for efficiency (since we're traversing a
3703   * DAG this is not needed to limit the depth of the search).
3704   *
3705   * We memoize by SCC instead of by individual def. */
3706  memoize_key = def->base.group;
3707
3708  if (upb_inttable_lookupptr(seen, memoize_key, &v))
3709    return upb_value_getbool(v);
3710
3711  /* Visit submessages for all messages in the SCC. */
3712  need_dup = false;
3713  base = def;
3714  do {
3715    upb_value v;
3716    const upb_msgdef *m;
3717
3718    assert(upb_def_isfrozen(def));
3719    if (def->type == UPB_DEF_FIELD) continue;
3720    if (upb_strtable_lookup(addtab, upb_def_fullname(def), &v)) {
3721      need_dup = true;
3722    }
3723
3724    /* For messages, continue the recursion by visiting all subdefs, but only
3725     * ones in different SCCs. */
3726    m = upb_dyncast_msgdef(def);
3727    if (m) {
3728      upb_msg_field_iter i;
3729      for(upb_msg_field_begin(&i, m);
3730          !upb_msg_field_done(&i);
3731          upb_msg_field_next(&i)) {
3732        upb_fielddef *f = upb_msg_iter_field(&i);
3733        const upb_def *subdef;
3734
3735        if (!upb_fielddef_hassubdef(f)) continue;
3736        subdef = upb_fielddef_subdef(f);
3737
3738        /* Skip subdefs in this SCC. */
3739        if (def->base.group == subdef->base.group) continue;
3740
3741        /* |= to avoid short-circuit; we need its side-effects. */
3742        need_dup |= upb_resolve_dfs(subdef, addtab, new_owner, seen, s);
3743        if (!upb_ok(s)) return false;
3744      }
3745    }
3746  } while ((def = (upb_def*)def->base.next) != base);
3747
3748  if (need_dup) {
3749    /* Dup all defs in this SCC that don't already have entries in addtab. */
3750    def = base;
3751    do {
3752      const char *name;
3753
3754      if (def->type == UPB_DEF_FIELD) continue;
3755      name = upb_def_fullname(def);
3756      if (!upb_strtable_lookup(addtab, name, NULL)) {
3757        upb_def *newdef = upb_def_dup(def, new_owner);
3758        if (!newdef) goto oom;
3759        newdef->came_from_user = false;
3760        if (!upb_strtable_insert(addtab, name, upb_value_ptr(newdef)))
3761          goto oom;
3762      }
3763    } while ((def = (upb_def*)def->base.next) != base);
3764  }
3765
3766  upb_inttable_insertptr(seen, memoize_key, upb_value_bool(need_dup));
3767  return need_dup;
3768
3769oom:
3770  upb_status_seterrmsg(s, "out of memory");
3771  return false;
3772}
3773
3774/* TODO(haberman): we need a lot more testing of error conditions.
3775 * The came_from_user stuff in particular is not tested. */
3776bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor,
3777                    upb_status *status) {
3778  int i;
3779  upb_strtable_iter iter;
3780  upb_def **add_defs = NULL;
3781  upb_strtable addtab;
3782  upb_inttable seen;
3783
3784  assert(!upb_symtab_isfrozen(s));
3785  if (!upb_strtable_init(&addtab, UPB_CTYPE_PTR)) {
3786    upb_status_seterrmsg(status, "out of memory");
3787    return false;
3788  }
3789
3790  /* Add new defs to our "add" set. */
3791  for (i = 0; i < n; i++) {
3792    upb_def *def = defs[i];
3793    const char *fullname;
3794    upb_fielddef *f;
3795
3796    if (upb_def_isfrozen(def)) {
3797      upb_status_seterrmsg(status, "added defs must be mutable");
3798      goto err;
3799    }
3800    assert(!upb_def_isfrozen(def));
3801    fullname = upb_def_fullname(def);
3802    if (!fullname) {
3803      upb_status_seterrmsg(
3804          status, "Anonymous defs cannot be added to a symtab");
3805      goto err;
3806    }
3807
3808    f = upb_dyncast_fielddef_mutable(def);
3809
3810    if (f) {
3811      if (!upb_fielddef_containingtypename(f)) {
3812        upb_status_seterrmsg(status,
3813                             "Standalone fielddefs must have a containing type "
3814                             "(extendee) name set");
3815        goto err;
3816      }
3817    } else {
3818      if (upb_strtable_lookup(&addtab, fullname, NULL)) {
3819        upb_status_seterrf(status, "Conflicting defs named '%s'", fullname);
3820        goto err;
3821      }
3822      /* We need this to back out properly, because if there is a failure we
3823       * need to donate the ref back to the caller. */
3824      def->came_from_user = true;
3825      upb_def_donateref(def, ref_donor, s);
3826      if (!upb_strtable_insert(&addtab, fullname, upb_value_ptr(def)))
3827        goto oom_err;
3828    }
3829  }
3830
3831  /* Add standalone fielddefs (ie. extensions) to the appropriate messages.
3832   * If the appropriate message only exists in the existing symtab, duplicate
3833   * it so we have a mutable copy we can add the fields to. */
3834  for (i = 0; i < n; i++) {
3835    upb_def *def = defs[i];
3836    upb_fielddef *f = upb_dyncast_fielddef_mutable(def);
3837    const char *msgname;
3838    upb_value v;
3839    upb_msgdef *m;
3840
3841    if (!f) continue;
3842    msgname = upb_fielddef_containingtypename(f);
3843    /* We validated this earlier in this function. */
3844    assert(msgname);
3845
3846    /* If the extendee name is absolutely qualified, move past the initial ".".
3847     * TODO(haberman): it is not obvious what it would mean if this was not
3848     * absolutely qualified. */
3849    if (msgname[0] == '.') {
3850      msgname++;
3851    }
3852
3853    if (upb_strtable_lookup(&addtab, msgname, &v)) {
3854      /* Extendee is in the set of defs the user asked us to add. */
3855      m = upb_value_getptr(v);
3856    } else {
3857      /* Need to find and dup the extendee from the existing symtab. */
3858      const upb_msgdef *frozen_m = upb_symtab_lookupmsg(s, msgname);
3859      if (!frozen_m) {
3860        upb_status_seterrf(status,
3861                           "Tried to extend message %s that does not exist "
3862                           "in this SymbolTable.",
3863                           msgname);
3864        goto err;
3865      }
3866      m = upb_msgdef_dup(frozen_m, s);
3867      if (!m) goto oom_err;
3868      if (!upb_strtable_insert(&addtab, msgname, upb_value_ptr(m))) {
3869        upb_msgdef_unref(m, s);
3870        goto oom_err;
3871      }
3872    }
3873
3874    if (!upb_msgdef_addfield(m, f, ref_donor, status)) {
3875      goto err;
3876    }
3877  }
3878
3879  /* Add dups of any existing def that can reach a def with the same name as
3880   * anything in our "add" set. */
3881  if (!upb_inttable_init(&seen, UPB_CTYPE_BOOL)) goto oom_err;
3882  upb_strtable_begin(&iter, &s->symtab);
3883  for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
3884    upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
3885    upb_resolve_dfs(def, &addtab, s, &seen, status);
3886    if (!upb_ok(status)) goto err;
3887  }
3888  upb_inttable_uninit(&seen);
3889
3890  /* Now using the table, resolve symbolic references for subdefs. */
3891  upb_strtable_begin(&iter, &addtab);
3892  for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
3893    const char *base;
3894    upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
3895    upb_msgdef *m = upb_dyncast_msgdef_mutable(def);
3896    upb_msg_field_iter j;
3897
3898    if (!m) continue;
3899    /* Type names are resolved relative to the message in which they appear. */
3900    base = upb_msgdef_fullname(m);
3901
3902    for(upb_msg_field_begin(&j, m);
3903        !upb_msg_field_done(&j);
3904        upb_msg_field_next(&j)) {
3905      upb_fielddef *f = upb_msg_iter_field(&j);
3906      const char *name = upb_fielddef_subdefname(f);
3907      if (name && !upb_fielddef_subdef(f)) {
3908        /* Try the lookup in the current set of to-be-added defs first. If not
3909         * there, try existing defs. */
3910        upb_def *subdef = upb_resolvename(&addtab, base, name);
3911        if (subdef == NULL) {
3912          subdef = upb_resolvename(&s->symtab, base, name);
3913        }
3914        if (subdef == NULL) {
3915          upb_status_seterrf(
3916              status, "couldn't resolve name '%s' in message '%s'", name, base);
3917          goto err;
3918        } else if (!upb_fielddef_setsubdef(f, subdef, status)) {
3919          goto err;
3920        }
3921      }
3922    }
3923  }
3924
3925  /* We need an array of the defs in addtab, for passing to upb_def_freeze. */
3926  add_defs = malloc(sizeof(void*) * upb_strtable_count(&addtab));
3927  if (add_defs == NULL) goto oom_err;
3928  upb_strtable_begin(&iter, &addtab);
3929  for (n = 0; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
3930    add_defs[n++] = upb_value_getptr(upb_strtable_iter_value(&iter));
3931  }
3932
3933  if (!upb_def_freeze(add_defs, n, status)) goto err;
3934
3935  /* This must be delayed until all errors have been detected, since error
3936   * recovery code uses this table to cleanup defs. */
3937  upb_strtable_uninit(&addtab);
3938
3939  /* TODO(haberman) we don't properly handle errors after this point (like
3940   * OOM in upb_strtable_insert() below). */
3941  for (i = 0; i < n; i++) {
3942    upb_def *def = add_defs[i];
3943    const char *name = upb_def_fullname(def);
3944    upb_value v;
3945    bool success;
3946
3947    if (upb_strtable_remove(&s->symtab, name, &v)) {
3948      const upb_def *def = upb_value_getptr(v);
3949      upb_def_unref(def, s);
3950    }
3951    success = upb_strtable_insert(&s->symtab, name, upb_value_ptr(def));
3952    UPB_ASSERT_VAR(success, success == true);
3953  }
3954  free(add_defs);
3955  return true;
3956
3957oom_err:
3958  upb_status_seterrmsg(status, "out of memory");
3959err: {
3960    /* For defs the user passed in, we need to donate the refs back.  For defs
3961     * we dup'd, we need to just unref them. */
3962    upb_strtable_begin(&iter, &addtab);
3963    for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
3964      upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
3965      bool came_from_user = def->came_from_user;
3966      def->came_from_user = false;
3967      if (came_from_user) {
3968        upb_def_donateref(def, s, ref_donor);
3969      } else {
3970        upb_def_unref(def, s);
3971      }
3972    }
3973  }
3974  upb_strtable_uninit(&addtab);
3975  free(add_defs);
3976  assert(!upb_ok(status));
3977  return false;
3978}
3979
3980/* Iteration. */
3981
3982static void advance_to_matching(upb_symtab_iter *iter) {
3983  if (iter->type == UPB_DEF_ANY)
3984    return;
3985
3986  while (!upb_strtable_done(&iter->iter) &&
3987         iter->type != upb_symtab_iter_def(iter)->type) {
3988    upb_strtable_next(&iter->iter);
3989  }
3990}
3991
3992void upb_symtab_begin(upb_symtab_iter *iter, const upb_symtab *s,
3993                      upb_deftype_t type) {
3994  upb_strtable_begin(&iter->iter, &s->symtab);
3995  iter->type = type;
3996  advance_to_matching(iter);
3997}
3998
3999void upb_symtab_next(upb_symtab_iter *iter) {
4000  upb_strtable_next(&iter->iter);
4001  advance_to_matching(iter);
4002}
4003
4004bool upb_symtab_done(const upb_symtab_iter *iter) {
4005  return upb_strtable_done(&iter->iter);
4006}
4007
4008const upb_def *upb_symtab_iter_def(const upb_symtab_iter *iter) {
4009  return upb_value_getptr(upb_strtable_iter_value(&iter->iter));
4010}
4011/*
4012** upb_table Implementation
4013**
4014** Implementation is heavily inspired by Lua's ltable.c.
4015*/
4016
4017
4018#include <stdlib.h>
4019#include <string.h>
4020
4021#define UPB_MAXARRSIZE 16  /* 64k. */
4022
4023/* From Chromium. */
4024#define ARRAY_SIZE(x) \
4025    ((sizeof(x)/sizeof(0[x])) / ((size_t)(!(sizeof(x) % sizeof(0[x])))))
4026
4027static const double MAX_LOAD = 0.85;
4028
4029/* The minimum utilization of the array part of a mixed hash/array table.  This
4030 * is a speed/memory-usage tradeoff (though it's not straightforward because of
4031 * cache effects).  The lower this is, the more memory we'll use. */
4032static const double MIN_DENSITY = 0.1;
4033
4034bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; }
4035
4036int log2ceil(uint64_t v) {
4037  int ret = 0;
4038  bool pow2 = is_pow2(v);
4039  while (v >>= 1) ret++;
4040  ret = pow2 ? ret : ret + 1;  /* Ceiling. */
4041  return UPB_MIN(UPB_MAXARRSIZE, ret);
4042}
4043
4044char *upb_strdup(const char *s) {
4045  return upb_strdup2(s, strlen(s));
4046}
4047
4048char *upb_strdup2(const char *s, size_t len) {
4049  size_t n;
4050  char *p;
4051
4052  /* Prevent overflow errors. */
4053  if (len == SIZE_MAX) return NULL;
4054  /* Always null-terminate, even if binary data; but don't rely on the input to
4055   * have a null-terminating byte since it may be a raw binary buffer. */
4056  n = len + 1;
4057  p = malloc(n);
4058  if (p) {
4059    memcpy(p, s, len);
4060    p[len] = 0;
4061  }
4062  return p;
4063}
4064
4065/* A type to represent the lookup key of either a strtable or an inttable. */
4066typedef union {
4067  uintptr_t num;
4068  struct {
4069    const char *str;
4070    size_t len;
4071  } str;
4072} lookupkey_t;
4073
4074static lookupkey_t strkey2(const char *str, size_t len) {
4075  lookupkey_t k;
4076  k.str.str = str;
4077  k.str.len = len;
4078  return k;
4079}
4080
4081static lookupkey_t intkey(uintptr_t key) {
4082  lookupkey_t k;
4083  k.num = key;
4084  return k;
4085}
4086
4087typedef uint32_t hashfunc_t(upb_tabkey key);
4088typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2);
4089
4090/* Base table (shared code) ***************************************************/
4091
4092/* For when we need to cast away const. */
4093static upb_tabent *mutable_entries(upb_table *t) {
4094  return (upb_tabent*)t->entries;
4095}
4096
4097static bool isfull(upb_table *t) {
4098  return (double)(t->count + 1) / upb_table_size(t) > MAX_LOAD;
4099}
4100
4101static bool init(upb_table *t, upb_ctype_t ctype, uint8_t size_lg2) {
4102  size_t bytes;
4103
4104  t->count = 0;
4105  t->ctype = ctype;
4106  t->size_lg2 = size_lg2;
4107  t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0;
4108  bytes = upb_table_size(t) * sizeof(upb_tabent);
4109  if (bytes > 0) {
4110    t->entries = malloc(bytes);
4111    if (!t->entries) return false;
4112    memset(mutable_entries(t), 0, bytes);
4113  } else {
4114    t->entries = NULL;
4115  }
4116  return true;
4117}
4118
4119static void uninit(upb_table *t) { free(mutable_entries(t)); }
4120
4121static upb_tabent *emptyent(upb_table *t) {
4122  upb_tabent *e = mutable_entries(t) + upb_table_size(t);
4123  while (1) { if (upb_tabent_isempty(--e)) return e; assert(e > t->entries); }
4124}
4125
4126static upb_tabent *getentry_mutable(upb_table *t, uint32_t hash) {
4127  return (upb_tabent*)upb_getentry(t, hash);
4128}
4129
4130static const upb_tabent *findentry(const upb_table *t, lookupkey_t key,
4131                                   uint32_t hash, eqlfunc_t *eql) {
4132  const upb_tabent *e;
4133
4134  if (t->size_lg2 == 0) return NULL;
4135  e = upb_getentry(t, hash);
4136  if (upb_tabent_isempty(e)) return NULL;
4137  while (1) {
4138    if (eql(e->key, key)) return e;
4139    if ((e = e->next) == NULL) return NULL;
4140  }
4141}
4142
4143static upb_tabent *findentry_mutable(upb_table *t, lookupkey_t key,
4144                                     uint32_t hash, eqlfunc_t *eql) {
4145  return (upb_tabent*)findentry(t, key, hash, eql);
4146}
4147
4148static bool lookup(const upb_table *t, lookupkey_t key, upb_value *v,
4149                   uint32_t hash, eqlfunc_t *eql) {
4150  const upb_tabent *e = findentry(t, key, hash, eql);
4151  if (e) {
4152    if (v) {
4153      _upb_value_setval(v, e->val.val, t->ctype);
4154    }
4155    return true;
4156  } else {
4157    return false;
4158  }
4159}
4160
4161/* The given key must not already exist in the table. */
4162static void insert(upb_table *t, lookupkey_t key, upb_tabkey tabkey,
4163                   upb_value val, uint32_t hash,
4164                   hashfunc_t *hashfunc, eqlfunc_t *eql) {
4165  upb_tabent *mainpos_e;
4166  upb_tabent *our_e;
4167
4168  UPB_UNUSED(eql);
4169  UPB_UNUSED(key);
4170  assert(findentry(t, key, hash, eql) == NULL);
4171  assert(val.ctype == t->ctype);
4172
4173  t->count++;
4174  mainpos_e = getentry_mutable(t, hash);
4175  our_e = mainpos_e;
4176
4177  if (upb_tabent_isempty(mainpos_e)) {
4178    /* Our main position is empty; use it. */
4179    our_e->next = NULL;
4180  } else {
4181    /* Collision. */
4182    upb_tabent *new_e = emptyent(t);
4183    /* Head of collider's chain. */
4184    upb_tabent *chain = getentry_mutable(t, hashfunc(mainpos_e->key));
4185    if (chain == mainpos_e) {
4186      /* Existing ent is in its main posisiton (it has the same hash as us, and
4187       * is the head of our chain).  Insert to new ent and append to this chain. */
4188      new_e->next = mainpos_e->next;
4189      mainpos_e->next = new_e;
4190      our_e = new_e;
4191    } else {
4192      /* Existing ent is not in its main position (it is a node in some other
4193       * chain).  This implies that no existing ent in the table has our hash.
4194       * Evict it (updating its chain) and use its ent for head of our chain. */
4195      *new_e = *mainpos_e;  /* copies next. */
4196      while (chain->next != mainpos_e) {
4197        chain = (upb_tabent*)chain->next;
4198        assert(chain);
4199      }
4200      chain->next = new_e;
4201      our_e = mainpos_e;
4202      our_e->next = NULL;
4203    }
4204  }
4205  our_e->key = tabkey;
4206  our_e->val.val = val.val;
4207  assert(findentry(t, key, hash, eql) == our_e);
4208}
4209
4210static bool rm(upb_table *t, lookupkey_t key, upb_value *val,
4211               upb_tabkey *removed, uint32_t hash, eqlfunc_t *eql) {
4212  upb_tabent *chain = getentry_mutable(t, hash);
4213  if (upb_tabent_isempty(chain)) return false;
4214  if (eql(chain->key, key)) {
4215    /* Element to remove is at the head of its chain. */
4216    t->count--;
4217    if (val) {
4218      _upb_value_setval(val, chain->val.val, t->ctype);
4219    }
4220    if (chain->next) {
4221      upb_tabent *move = (upb_tabent*)chain->next;
4222      *chain = *move;
4223      if (removed) *removed = move->key;
4224      move->key = 0;  /* Make the slot empty. */
4225    } else {
4226      if (removed) *removed = chain->key;
4227      chain->key = 0;  /* Make the slot empty. */
4228    }
4229    return true;
4230  } else {
4231    /* Element to remove is either in a non-head position or not in the
4232     * table. */
4233    while (chain->next && !eql(chain->next->key, key))
4234      chain = (upb_tabent*)chain->next;
4235    if (chain->next) {
4236      /* Found element to remove. */
4237      upb_tabent *rm;
4238
4239      if (val) {
4240        _upb_value_setval(val, chain->next->val.val, t->ctype);
4241      }
4242      rm = (upb_tabent*)chain->next;
4243      if (removed) *removed = rm->key;
4244      rm->key = 0;
4245      chain->next = rm->next;
4246      t->count--;
4247      return true;
4248    } else {
4249      return false;
4250    }
4251  }
4252}
4253
4254static size_t next(const upb_table *t, size_t i) {
4255  do {
4256    if (++i >= upb_table_size(t))
4257      return SIZE_MAX;
4258  } while(upb_tabent_isempty(&t->entries[i]));
4259
4260  return i;
4261}
4262
4263static size_t begin(const upb_table *t) {
4264  return next(t, -1);
4265}
4266
4267
4268/* upb_strtable ***************************************************************/
4269
4270/* A simple "subclass" of upb_table that only adds a hash function for strings. */
4271
4272static upb_tabkey strcopy(lookupkey_t k2) {
4273  char *str = malloc(k2.str.len + sizeof(uint32_t) + 1);
4274  if (str == NULL) return 0;
4275  memcpy(str, &k2.str.len, sizeof(uint32_t));
4276  memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len + 1);
4277  return (uintptr_t)str;
4278}
4279
4280static uint32_t strhash(upb_tabkey key) {
4281  uint32_t len;
4282  char *str = upb_tabstr(key, &len);
4283  return MurmurHash2(str, len, 0);
4284}
4285
4286static bool streql(upb_tabkey k1, lookupkey_t k2) {
4287  uint32_t len;
4288  char *str = upb_tabstr(k1, &len);
4289  return len == k2.str.len && memcmp(str, k2.str.str, len) == 0;
4290}
4291
4292bool upb_strtable_init(upb_strtable *t, upb_ctype_t ctype) {
4293  return init(&t->t, ctype, 2);
4294}
4295
4296void upb_strtable_uninit(upb_strtable *t) {
4297  size_t i;
4298  for (i = 0; i < upb_table_size(&t->t); i++)
4299    free((void*)t->t.entries[i].key);
4300  uninit(&t->t);
4301}
4302
4303bool upb_strtable_resize(upb_strtable *t, size_t size_lg2) {
4304  upb_strtable new_table;
4305  upb_strtable_iter i;
4306
4307  if (!init(&new_table.t, t->t.ctype, size_lg2))
4308    return false;
4309  upb_strtable_begin(&i, t);
4310  for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) {
4311    upb_strtable_insert2(
4312        &new_table,
4313        upb_strtable_iter_key(&i),
4314        upb_strtable_iter_keylength(&i),
4315        upb_strtable_iter_value(&i));
4316  }
4317  upb_strtable_uninit(t);
4318  *t = new_table;
4319  return true;
4320}
4321
4322bool upb_strtable_insert2(upb_strtable *t, const char *k, size_t len,
4323                          upb_value v) {
4324  lookupkey_t key;
4325  upb_tabkey tabkey;
4326  uint32_t hash;
4327
4328  if (isfull(&t->t)) {
4329    /* Need to resize.  New table of double the size, add old elements to it. */
4330    if (!upb_strtable_resize(t, t->t.size_lg2 + 1)) {
4331      return false;
4332    }
4333  }
4334
4335  key = strkey2(k, len);
4336  tabkey = strcopy(key);
4337  if (tabkey == 0) return false;
4338
4339  hash = MurmurHash2(key.str.str, key.str.len, 0);
4340  insert(&t->t, key, tabkey, v, hash, &strhash, &streql);
4341  return true;
4342}
4343
4344bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
4345                          upb_value *v) {
4346  uint32_t hash = MurmurHash2(key, len, 0);
4347  return lookup(&t->t, strkey2(key, len), v, hash, &streql);
4348}
4349
4350bool upb_strtable_remove2(upb_strtable *t, const char *key, size_t len,
4351                         upb_value *val) {
4352  uint32_t hash = MurmurHash2(key, strlen(key), 0);
4353  upb_tabkey tabkey;
4354  if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) {
4355    free((void*)tabkey);
4356    return true;
4357  } else {
4358    return false;
4359  }
4360}
4361
4362/* Iteration */
4363
4364static const upb_tabent *str_tabent(const upb_strtable_iter *i) {
4365  return &i->t->t.entries[i->index];
4366}
4367
4368void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) {
4369  i->t = t;
4370  i->index = begin(&t->t);
4371}
4372
4373void upb_strtable_next(upb_strtable_iter *i) {
4374  i->index = next(&i->t->t, i->index);
4375}
4376
4377bool upb_strtable_done(const upb_strtable_iter *i) {
4378  return i->index >= upb_table_size(&i->t->t) ||
4379         upb_tabent_isempty(str_tabent(i));
4380}
4381
4382const char *upb_strtable_iter_key(upb_strtable_iter *i) {
4383  assert(!upb_strtable_done(i));
4384  return upb_tabstr(str_tabent(i)->key, NULL);
4385}
4386
4387size_t upb_strtable_iter_keylength(upb_strtable_iter *i) {
4388  uint32_t len;
4389  assert(!upb_strtable_done(i));
4390  upb_tabstr(str_tabent(i)->key, &len);
4391  return len;
4392}
4393
4394upb_value upb_strtable_iter_value(const upb_strtable_iter *i) {
4395  assert(!upb_strtable_done(i));
4396  return _upb_value_val(str_tabent(i)->val.val, i->t->t.ctype);
4397}
4398
4399void upb_strtable_iter_setdone(upb_strtable_iter *i) {
4400  i->index = SIZE_MAX;
4401}
4402
4403bool upb_strtable_iter_isequal(const upb_strtable_iter *i1,
4404                               const upb_strtable_iter *i2) {
4405  if (upb_strtable_done(i1) && upb_strtable_done(i2))
4406    return true;
4407  return i1->t == i2->t && i1->index == i2->index;
4408}
4409
4410
4411/* upb_inttable ***************************************************************/
4412
4413/* For inttables we use a hybrid structure where small keys are kept in an
4414 * array and large keys are put in the hash table. */
4415
4416static uint32_t inthash(upb_tabkey key) { return upb_inthash(key); }
4417
4418static bool inteql(upb_tabkey k1, lookupkey_t k2) {
4419  return k1 == k2.num;
4420}
4421
4422static upb_tabval *mutable_array(upb_inttable *t) {
4423  return (upb_tabval*)t->array;
4424}
4425
4426static upb_tabval *inttable_val(upb_inttable *t, uintptr_t key) {
4427  if (key < t->array_size) {
4428    return upb_arrhas(t->array[key]) ? &(mutable_array(t)[key]) : NULL;
4429  } else {
4430    upb_tabent *e =
4431        findentry_mutable(&t->t, intkey(key), upb_inthash(key), &inteql);
4432    return e ? &e->val : NULL;
4433  }
4434}
4435
4436static const upb_tabval *inttable_val_const(const upb_inttable *t,
4437                                            uintptr_t key) {
4438  return inttable_val((upb_inttable*)t, key);
4439}
4440
4441size_t upb_inttable_count(const upb_inttable *t) {
4442  return t->t.count + t->array_count;
4443}
4444
4445static void check(upb_inttable *t) {
4446  UPB_UNUSED(t);
4447#if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG)
4448  {
4449    /* This check is very expensive (makes inserts/deletes O(N)). */
4450    size_t count = 0;
4451    upb_inttable_iter i;
4452    upb_inttable_begin(&i, t);
4453    for(; !upb_inttable_done(&i); upb_inttable_next(&i), count++) {
4454      assert(upb_inttable_lookup(t, upb_inttable_iter_key(&i), NULL));
4455    }
4456    assert(count == upb_inttable_count(t));
4457  }
4458#endif
4459}
4460
4461bool upb_inttable_sizedinit(upb_inttable *t, upb_ctype_t ctype,
4462                            size_t asize, int hsize_lg2) {
4463  size_t array_bytes;
4464
4465  if (!init(&t->t, ctype, hsize_lg2)) return false;
4466  /* Always make the array part at least 1 long, so that we know key 0
4467   * won't be in the hash part, which simplifies things. */
4468  t->array_size = UPB_MAX(1, asize);
4469  t->array_count = 0;
4470  array_bytes = t->array_size * sizeof(upb_value);
4471  t->array = malloc(array_bytes);
4472  if (!t->array) {
4473    uninit(&t->t);
4474    return false;
4475  }
4476  memset(mutable_array(t), 0xff, array_bytes);
4477  check(t);
4478  return true;
4479}
4480
4481bool upb_inttable_init(upb_inttable *t, upb_ctype_t ctype) {
4482  return upb_inttable_sizedinit(t, ctype, 0, 4);
4483}
4484
4485void upb_inttable_uninit(upb_inttable *t) {
4486  uninit(&t->t);
4487  free(mutable_array(t));
4488}
4489
4490bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) {
4491  /* XXX: Table can't store value (uint64_t)-1.  Need to somehow statically
4492   * guarantee that this is not necessary, or fix the limitation. */
4493  upb_tabval tabval;
4494  tabval.val = val.val;
4495  UPB_UNUSED(tabval);
4496  assert(upb_arrhas(tabval));
4497
4498  if (key < t->array_size) {
4499    assert(!upb_arrhas(t->array[key]));
4500    t->array_count++;
4501    mutable_array(t)[key].val = val.val;
4502  } else {
4503    if (isfull(&t->t)) {
4504      /* Need to resize the hash part, but we re-use the array part. */
4505      size_t i;
4506      upb_table new_table;
4507      if (!init(&new_table, t->t.ctype, t->t.size_lg2 + 1))
4508        return false;
4509      for (i = begin(&t->t); i < upb_table_size(&t->t); i = next(&t->t, i)) {
4510        const upb_tabent *e = &t->t.entries[i];
4511        uint32_t hash;
4512        upb_value v;
4513
4514        _upb_value_setval(&v, e->val.val, t->t.ctype);
4515        hash = upb_inthash(e->key);
4516        insert(&new_table, intkey(e->key), e->key, v, hash, &inthash, &inteql);
4517      }
4518
4519      assert(t->t.count == new_table.count);
4520
4521      uninit(&t->t);
4522      t->t = new_table;
4523    }
4524    insert(&t->t, intkey(key), key, val, upb_inthash(key), &inthash, &inteql);
4525  }
4526  check(t);
4527  return true;
4528}
4529
4530bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v) {
4531  const upb_tabval *table_v = inttable_val_const(t, key);
4532  if (!table_v) return false;
4533  if (v) _upb_value_setval(v, table_v->val, t->t.ctype);
4534  return true;
4535}
4536
4537bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val) {
4538  upb_tabval *table_v = inttable_val(t, key);
4539  if (!table_v) return false;
4540  table_v->val = val.val;
4541  return true;
4542}
4543
4544bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) {
4545  bool success;
4546  if (key < t->array_size) {
4547    if (upb_arrhas(t->array[key])) {
4548      upb_tabval empty = UPB_TABVALUE_EMPTY_INIT;
4549      t->array_count--;
4550      if (val) {
4551        _upb_value_setval(val, t->array[key].val, t->t.ctype);
4552      }
4553      mutable_array(t)[key] = empty;
4554      success = true;
4555    } else {
4556      success = false;
4557    }
4558  } else {
4559    upb_tabkey removed;
4560    uint32_t hash = upb_inthash(key);
4561    success = rm(&t->t, intkey(key), val, &removed, hash, &inteql);
4562  }
4563  check(t);
4564  return success;
4565}
4566
4567bool upb_inttable_push(upb_inttable *t, upb_value val) {
4568  return upb_inttable_insert(t, upb_inttable_count(t), val);
4569}
4570
4571upb_value upb_inttable_pop(upb_inttable *t) {
4572  upb_value val;
4573  bool ok = upb_inttable_remove(t, upb_inttable_count(t) - 1, &val);
4574  UPB_ASSERT_VAR(ok, ok);
4575  return val;
4576}
4577
4578bool upb_inttable_insertptr(upb_inttable *t, const void *key, upb_value val) {
4579  return upb_inttable_insert(t, (uintptr_t)key, val);
4580}
4581
4582bool upb_inttable_lookupptr(const upb_inttable *t, const void *key,
4583                            upb_value *v) {
4584  return upb_inttable_lookup(t, (uintptr_t)key, v);
4585}
4586
4587bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val) {
4588  return upb_inttable_remove(t, (uintptr_t)key, val);
4589}
4590
4591void upb_inttable_compact(upb_inttable *t) {
4592  /* Create a power-of-two histogram of the table keys. */
4593  int counts[UPB_MAXARRSIZE + 1] = {0};
4594  uintptr_t max_key = 0;
4595  upb_inttable_iter i;
4596  size_t arr_size;
4597  int arr_count;
4598  upb_inttable new_t;
4599
4600  upb_inttable_begin(&i, t);
4601  for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
4602    uintptr_t key = upb_inttable_iter_key(&i);
4603    if (key > max_key) {
4604      max_key = key;
4605    }
4606    counts[log2ceil(key)]++;
4607  }
4608
4609  arr_size = 1;
4610  arr_count = upb_inttable_count(t);
4611
4612  if (upb_inttable_count(t) >= max_key * MIN_DENSITY) {
4613    /* We can put 100% of the entries in the array part. */
4614    arr_size = max_key + 1;
4615  } else {
4616    /* Find the largest power of two that satisfies the MIN_DENSITY
4617     * definition. */
4618    int size_lg2;
4619    for (size_lg2 = ARRAY_SIZE(counts) - 1; size_lg2 > 1; size_lg2--) {
4620      arr_size = 1 << size_lg2;
4621      arr_count -= counts[size_lg2];
4622      if (arr_count >= arr_size * MIN_DENSITY) {
4623        break;
4624      }
4625    }
4626  }
4627
4628  /* Array part must always be at least 1 entry large to catch lookups of key
4629   * 0.  Key 0 must always be in the array part because "0" in the hash part
4630   * denotes an empty entry. */
4631  arr_size = UPB_MAX(arr_size, 1);
4632
4633  {
4634    /* Insert all elements into new, perfectly-sized table. */
4635    int hash_count = upb_inttable_count(t) - arr_count;
4636    int hash_size = hash_count ? (hash_count / MAX_LOAD) + 1 : 0;
4637    int hashsize_lg2 = log2ceil(hash_size);
4638
4639    assert(hash_count >= 0);
4640    upb_inttable_sizedinit(&new_t, t->t.ctype, arr_size, hashsize_lg2);
4641    upb_inttable_begin(&i, t);
4642    for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
4643      uintptr_t k = upb_inttable_iter_key(&i);
4644      upb_inttable_insert(&new_t, k, upb_inttable_iter_value(&i));
4645    }
4646    assert(new_t.array_size == arr_size);
4647    assert(new_t.t.size_lg2 == hashsize_lg2);
4648  }
4649  upb_inttable_uninit(t);
4650  *t = new_t;
4651}
4652
4653/* Iteration. */
4654
4655static const upb_tabent *int_tabent(const upb_inttable_iter *i) {
4656  assert(!i->array_part);
4657  return &i->t->t.entries[i->index];
4658}
4659
4660static upb_tabval int_arrent(const upb_inttable_iter *i) {
4661  assert(i->array_part);
4662  return i->t->array[i->index];
4663}
4664
4665void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t) {
4666  i->t = t;
4667  i->index = -1;
4668  i->array_part = true;
4669  upb_inttable_next(i);
4670}
4671
4672void upb_inttable_next(upb_inttable_iter *iter) {
4673  const upb_inttable *t = iter->t;
4674  if (iter->array_part) {
4675    while (++iter->index < t->array_size) {
4676      if (upb_arrhas(int_arrent(iter))) {
4677        return;
4678      }
4679    }
4680    iter->array_part = false;
4681    iter->index = begin(&t->t);
4682  } else {
4683    iter->index = next(&t->t, iter->index);
4684  }
4685}
4686
4687bool upb_inttable_done(const upb_inttable_iter *i) {
4688  if (i->array_part) {
4689    return i->index >= i->t->array_size ||
4690           !upb_arrhas(int_arrent(i));
4691  } else {
4692    return i->index >= upb_table_size(&i->t->t) ||
4693           upb_tabent_isempty(int_tabent(i));
4694  }
4695}
4696
4697uintptr_t upb_inttable_iter_key(const upb_inttable_iter *i) {
4698  assert(!upb_inttable_done(i));
4699  return i->array_part ? i->index : int_tabent(i)->key;
4700}
4701
4702upb_value upb_inttable_iter_value(const upb_inttable_iter *i) {
4703  assert(!upb_inttable_done(i));
4704  return _upb_value_val(
4705      i->array_part ? i->t->array[i->index].val : int_tabent(i)->val.val,
4706      i->t->t.ctype);
4707}
4708
4709void upb_inttable_iter_setdone(upb_inttable_iter *i) {
4710  i->index = SIZE_MAX;
4711  i->array_part = false;
4712}
4713
4714bool upb_inttable_iter_isequal(const upb_inttable_iter *i1,
4715                                          const upb_inttable_iter *i2) {
4716  if (upb_inttable_done(i1) && upb_inttable_done(i2))
4717    return true;
4718  return i1->t == i2->t && i1->index == i2->index &&
4719         i1->array_part == i2->array_part;
4720}
4721
4722#ifdef UPB_UNALIGNED_READS_OK
4723/* -----------------------------------------------------------------------------
4724 * MurmurHash2, by Austin Appleby (released as public domain).
4725 * Reformatted and C99-ified by Joshua Haberman.
4726 * Note - This code makes a few assumptions about how your machine behaves -
4727 *   1. We can read a 4-byte value from any address without crashing
4728 *   2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t
4729 * And it has a few limitations -
4730 *   1. It will not work incrementally.
4731 *   2. It will not produce the same results on little-endian and big-endian
4732 *      machines. */
4733uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) {
4734  /* 'm' and 'r' are mixing constants generated offline.
4735   * They're not really 'magic', they just happen to work well. */
4736  const uint32_t m = 0x5bd1e995;
4737  const int32_t r = 24;
4738
4739  /* Initialize the hash to a 'random' value */
4740  uint32_t h = seed ^ len;
4741
4742  /* Mix 4 bytes at a time into the hash */
4743  const uint8_t * data = (const uint8_t *)key;
4744  while(len >= 4) {
4745    uint32_t k = *(uint32_t *)data;
4746
4747    k *= m;
4748    k ^= k >> r;
4749    k *= m;
4750
4751    h *= m;
4752    h ^= k;
4753
4754    data += 4;
4755    len -= 4;
4756  }
4757
4758  /* Handle the last few bytes of the input array */
4759  switch(len) {
4760    case 3: h ^= data[2] << 16;
4761    case 2: h ^= data[1] << 8;
4762    case 1: h ^= data[0]; h *= m;
4763  };
4764
4765  /* Do a few final mixes of the hash to ensure the last few
4766   * bytes are well-incorporated. */
4767  h ^= h >> 13;
4768  h *= m;
4769  h ^= h >> 15;
4770
4771  return h;
4772}
4773
4774#else /* !UPB_UNALIGNED_READS_OK */
4775
4776/* -----------------------------------------------------------------------------
4777 * MurmurHashAligned2, by Austin Appleby
4778 * Same algorithm as MurmurHash2, but only does aligned reads - should be safer
4779 * on certain platforms.
4780 * Performance will be lower than MurmurHash2 */
4781
4782#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
4783
4784uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) {
4785  const uint32_t m = 0x5bd1e995;
4786  const int32_t r = 24;
4787  const uint8_t * data = (const uint8_t *)key;
4788  uint32_t h = seed ^ len;
4789  uint8_t align = (uintptr_t)data & 3;
4790
4791  if(align && (len >= 4)) {
4792    /* Pre-load the temp registers */
4793    uint32_t t = 0, d = 0;
4794    int32_t sl;
4795    int32_t sr;
4796
4797    switch(align) {
4798      case 1: t |= data[2] << 16;
4799      case 2: t |= data[1] << 8;
4800      case 3: t |= data[0];
4801    }
4802
4803    t <<= (8 * align);
4804
4805    data += 4-align;
4806    len -= 4-align;
4807
4808    sl = 8 * (4-align);
4809    sr = 8 * align;
4810
4811    /* Mix */
4812
4813    while(len >= 4) {
4814      uint32_t k;
4815
4816      d = *(uint32_t *)data;
4817      t = (t >> sr) | (d << sl);
4818
4819      k = t;
4820
4821      MIX(h,k,m);
4822
4823      t = d;
4824
4825      data += 4;
4826      len -= 4;
4827    }
4828
4829    /* Handle leftover data in temp registers */
4830
4831    d = 0;
4832
4833    if(len >= align) {
4834      uint32_t k;
4835
4836      switch(align) {
4837        case 3: d |= data[2] << 16;
4838        case 2: d |= data[1] << 8;
4839        case 1: d |= data[0];
4840      }
4841
4842      k = (t >> sr) | (d << sl);
4843      MIX(h,k,m);
4844
4845      data += align;
4846      len -= align;
4847
4848      /* ----------
4849       * Handle tail bytes */
4850
4851      switch(len) {
4852        case 3: h ^= data[2] << 16;
4853        case 2: h ^= data[1] << 8;
4854        case 1: h ^= data[0]; h *= m;
4855      };
4856    } else {
4857      switch(len) {
4858        case 3: d |= data[2] << 16;
4859        case 2: d |= data[1] << 8;
4860        case 1: d |= data[0];
4861        case 0: h ^= (t >> sr) | (d << sl); h *= m;
4862      }
4863    }
4864
4865    h ^= h >> 13;
4866    h *= m;
4867    h ^= h >> 15;
4868
4869    return h;
4870  } else {
4871    while(len >= 4) {
4872      uint32_t k = *(uint32_t *)data;
4873
4874      MIX(h,k,m);
4875
4876      data += 4;
4877      len -= 4;
4878    }
4879
4880    /* ----------
4881     * Handle tail bytes */
4882
4883    switch(len) {
4884      case 3: h ^= data[2] << 16;
4885      case 2: h ^= data[1] << 8;
4886      case 1: h ^= data[0]; h *= m;
4887    };
4888
4889    h ^= h >> 13;
4890    h *= m;
4891    h ^= h >> 15;
4892
4893    return h;
4894  }
4895}
4896#undef MIX
4897
4898#endif /* UPB_UNALIGNED_READS_OK */
4899
4900#include <errno.h>
4901#include <stdarg.h>
4902#include <stddef.h>
4903#include <stdint.h>
4904#include <stdio.h>
4905#include <stdlib.h>
4906#include <string.h>
4907
4908bool upb_dumptostderr(void *closure, const upb_status* status) {
4909  UPB_UNUSED(closure);
4910  fprintf(stderr, "%s\n", upb_status_errmsg(status));
4911  return false;
4912}
4913
4914/* Guarantee null-termination and provide ellipsis truncation.
4915 * It may be tempting to "optimize" this by initializing these final
4916 * four bytes up-front and then being careful never to overwrite them,
4917 * this is safer and simpler. */
4918static void nullz(upb_status *status) {
4919  const char *ellipsis = "...";
4920  size_t len = strlen(ellipsis);
4921  assert(sizeof(status->msg) > len);
4922  memcpy(status->msg + sizeof(status->msg) - len, ellipsis, len);
4923}
4924
4925void upb_status_clear(upb_status *status) {
4926  if (!status) return;
4927  status->ok_ = true;
4928  status->code_ = 0;
4929  status->msg[0] = '\0';
4930}
4931
4932bool upb_ok(const upb_status *status) { return status->ok_; }
4933
4934upb_errorspace *upb_status_errspace(const upb_status *status) {
4935  return status->error_space_;
4936}
4937
4938int upb_status_errcode(const upb_status *status) { return status->code_; }
4939
4940const char *upb_status_errmsg(const upb_status *status) { return status->msg; }
4941
4942void upb_status_seterrmsg(upb_status *status, const char *msg) {
4943  if (!status) return;
4944  status->ok_ = false;
4945  strncpy(status->msg, msg, sizeof(status->msg));
4946  nullz(status);
4947}
4948
4949void upb_status_seterrf(upb_status *status, const char *fmt, ...) {
4950  va_list args;
4951  va_start(args, fmt);
4952  upb_status_vseterrf(status, fmt, args);
4953  va_end(args);
4954}
4955
4956void upb_status_vseterrf(upb_status *status, const char *fmt, va_list args) {
4957  if (!status) return;
4958  status->ok_ = false;
4959  _upb_vsnprintf(status->msg, sizeof(status->msg), fmt, args);
4960  nullz(status);
4961}
4962
4963void upb_status_seterrcode(upb_status *status, upb_errorspace *space,
4964                           int code) {
4965  if (!status) return;
4966  status->ok_ = false;
4967  status->error_space_ = space;
4968  status->code_ = code;
4969  space->set_message(status, code);
4970}
4971
4972void upb_status_copy(upb_status *to, const upb_status *from) {
4973  if (!to) return;
4974  *to = *from;
4975}
4976/* This file was generated by upbc (the upb compiler).
4977 * Do not edit -- your changes will be discarded when the file is
4978 * regenerated. */
4979
4980
4981static const upb_msgdef msgs[20];
4982static const upb_fielddef fields[81];
4983static const upb_enumdef enums[4];
4984static const upb_tabent strentries[236];
4985static const upb_tabent intentries[14];
4986static const upb_tabval arrays[232];
4987
4988#ifdef UPB_DEBUG_REFS
4989static upb_inttable reftables[212];
4990#endif
4991
4992static const upb_msgdef msgs[20] = {
4993  UPB_MSGDEF_INIT("google.protobuf.DescriptorProto", 27, 6, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[0], 8, 7), UPB_STRTABLE_INIT(7, 15, UPB_CTYPE_PTR, 4, &strentries[0]),&reftables[0], &reftables[1]),
4994  UPB_MSGDEF_INIT("google.protobuf.DescriptorProto.ExtensionRange", 4, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[8], 3, 2), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[16]),&reftables[2], &reftables[3]),
4995  UPB_MSGDEF_INIT("google.protobuf.EnumDescriptorProto", 11, 2, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[11], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[20]),&reftables[4], &reftables[5]),
4996  UPB_MSGDEF_INIT("google.protobuf.EnumOptions", 7, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[0], &arrays[15], 8, 1), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[24]),&reftables[6], &reftables[7]),
4997  UPB_MSGDEF_INIT("google.protobuf.EnumValueDescriptorProto", 8, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[23], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[28]),&reftables[8], &reftables[9]),
4998  UPB_MSGDEF_INIT("google.protobuf.EnumValueOptions", 6, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[2], &arrays[27], 4, 0), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[32]),&reftables[10], &reftables[11]),
4999  UPB_MSGDEF_INIT("google.protobuf.FieldDescriptorProto", 19, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[31], 9, 8), UPB_STRTABLE_INIT(8, 15, UPB_CTYPE_PTR, 4, &strentries[36]),&reftables[12], &reftables[13]),
5000  UPB_MSGDEF_INIT("google.protobuf.FieldOptions", 14, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[4], &arrays[40], 32, 6), UPB_STRTABLE_INIT(7, 15, UPB_CTYPE_PTR, 4, &strentries[52]),&reftables[14], &reftables[15]),
5001  UPB_MSGDEF_INIT("google.protobuf.FileDescriptorProto", 39, 6, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[72], 12, 11), UPB_STRTABLE_INIT(11, 15, UPB_CTYPE_PTR, 4, &strentries[68]),&reftables[16], &reftables[17]),
5002  UPB_MSGDEF_INIT("google.protobuf.FileDescriptorSet", 6, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[84], 2, 1), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[84]),&reftables[18], &reftables[19]),
5003  UPB_MSGDEF_INIT("google.protobuf.FileOptions", 21, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[6], &arrays[86], 64, 9), UPB_STRTABLE_INIT(10, 15, UPB_CTYPE_PTR, 4, &strentries[88]),&reftables[20], &reftables[21]),
5004  UPB_MSGDEF_INIT("google.protobuf.MessageOptions", 8, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[8], &arrays[150], 16, 2), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[104]),&reftables[22], &reftables[23]),
5005  UPB_MSGDEF_INIT("google.protobuf.MethodDescriptorProto", 13, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[166], 5, 4), UPB_STRTABLE_INIT(4, 7, UPB_CTYPE_PTR, 3, &strentries[108]),&reftables[24], &reftables[25]),
5006  UPB_MSGDEF_INIT("google.protobuf.MethodOptions", 6, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[10], &arrays[171], 4, 0), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[116]),&reftables[26], &reftables[27]),
5007  UPB_MSGDEF_INIT("google.protobuf.ServiceDescriptorProto", 11, 2, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[175], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[120]),&reftables[28], &reftables[29]),
5008  UPB_MSGDEF_INIT("google.protobuf.ServiceOptions", 6, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[12], &arrays[179], 4, 0), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[124]),&reftables[30], &reftables[31]),
5009  UPB_MSGDEF_INIT("google.protobuf.SourceCodeInfo", 6, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[183], 2, 1), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[128]),&reftables[32], &reftables[33]),
5010  UPB_MSGDEF_INIT("google.protobuf.SourceCodeInfo.Location", 14, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[185], 5, 4), UPB_STRTABLE_INIT(4, 7, UPB_CTYPE_PTR, 3, &strentries[132]),&reftables[34], &reftables[35]),
5011  UPB_MSGDEF_INIT("google.protobuf.UninterpretedOption", 18, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[190], 9, 7), UPB_STRTABLE_INIT(7, 15, UPB_CTYPE_PTR, 4, &strentries[140]),&reftables[36], &reftables[37]),
5012  UPB_MSGDEF_INIT("google.protobuf.UninterpretedOption.NamePart", 6, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[199], 3, 2), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[156]),&reftables[38], &reftables[39]),
5013};
5014
5015static const upb_fielddef fields[81] = {
5016  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "aggregate_value", 8, &msgs[18], NULL, 15, 6, {0},&reftables[40], &reftables[41]),
5017  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "allow_alias", 2, &msgs[3], NULL, 6, 1, {0},&reftables[42], &reftables[43]),
5018  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "cc_generic_services", 16, &msgs[10], NULL, 17, 6, {0},&reftables[44], &reftables[45]),
5019  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "ctype", 1, &msgs[7], (const upb_def*)(&enums[2]), 6, 1, {0},&reftables[46], &reftables[47]),
5020  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "default_value", 7, &msgs[6], NULL, 16, 7, {0},&reftables[48], &reftables[49]),
5021  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_STRING, 0, false, false, false, false, "dependency", 3, &msgs[8], NULL, 30, 8, {0},&reftables[50], &reftables[51]),
5022  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 3, &msgs[7], NULL, 8, 3, {0},&reftables[52], &reftables[53]),
5023  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_DOUBLE, 0, false, false, false, false, "double_value", 6, &msgs[18], NULL, 11, 4, {0},&reftables[54], &reftables[55]),
5024  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "end", 2, &msgs[1], NULL, 3, 1, {0},&reftables[56], &reftables[57]),
5025  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "enum_type", 4, &msgs[0], (const upb_def*)(&msgs[2]), 16, 2, {0},&reftables[58], &reftables[59]),
5026  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "enum_type", 5, &msgs[8], (const upb_def*)(&msgs[2]), 13, 1, {0},&reftables[60], &reftables[61]),
5027  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "experimental_map_key", 9, &msgs[7], NULL, 10, 5, {0},&reftables[62], &reftables[63]),
5028  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "extendee", 2, &msgs[6], NULL, 7, 2, {0},&reftables[64], &reftables[65]),
5029  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension", 7, &msgs[8], (const upb_def*)(&msgs[6]), 19, 3, {0},&reftables[66], &reftables[67]),
5030  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension", 6, &msgs[0], (const upb_def*)(&msgs[6]), 22, 4, {0},&reftables[68], &reftables[69]),
5031  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension_range", 5, &msgs[0], (const upb_def*)(&msgs[1]), 19, 3, {0},&reftables[70], &reftables[71]),
5032  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "field", 2, &msgs[0], (const upb_def*)(&msgs[6]), 10, 0, {0},&reftables[72], &reftables[73]),
5033  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "file", 1, &msgs[9], (const upb_def*)(&msgs[8]), 5, 0, {0},&reftables[74], &reftables[75]),
5034  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "go_package", 11, &msgs[10], NULL, 14, 5, {0},&reftables[76], &reftables[77]),
5035  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "identifier_value", 3, &msgs[18], NULL, 6, 1, {0},&reftables[78], &reftables[79]),
5036  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "input_type", 2, &msgs[12], NULL, 7, 2, {0},&reftables[80], &reftables[81]),
5037  UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_BOOL, 0, false, false, false, false, "is_extension", 2, &msgs[19], NULL, 5, 1, {0},&reftables[82], &reftables[83]),
5038  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_generate_equals_and_hash", 20, &msgs[10], NULL, 20, 9, {0},&reftables[84], &reftables[85]),
5039  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_generic_services", 17, &msgs[10], NULL, 18, 7, {0},&reftables[86], &reftables[87]),
5040  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_multiple_files", 10, &msgs[10], NULL, 13, 4, {0},&reftables[88], &reftables[89]),
5041  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "java_outer_classname", 8, &msgs[10], NULL, 9, 2, {0},&reftables[90], &reftables[91]),
5042  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "java_package", 1, &msgs[10], NULL, 6, 1, {0},&reftables[92], &reftables[93]),
5043  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "label", 4, &msgs[6], (const upb_def*)(&enums[0]), 11, 4, {0},&reftables[94], &reftables[95]),
5044  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "lazy", 5, &msgs[7], NULL, 9, 4, {0},&reftables[96], &reftables[97]),
5045  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "leading_comments", 3, &msgs[17], NULL, 8, 2, {0},&reftables[98], &reftables[99]),
5046  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "location", 1, &msgs[16], (const upb_def*)(&msgs[17]), 5, 0, {0},&reftables[100], &reftables[101]),
5047  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "message_set_wire_format", 1, &msgs[11], NULL, 6, 1, {0},&reftables[102], &reftables[103]),
5048  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "message_type", 4, &msgs[8], (const upb_def*)(&msgs[0]), 10, 0, {0},&reftables[104], &reftables[105]),
5049  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "method", 2, &msgs[14], (const upb_def*)(&msgs[12]), 6, 0, {0},&reftables[106], &reftables[107]),
5050  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[8], NULL, 22, 6, {0},&reftables[108], &reftables[109]),
5051  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[14], NULL, 8, 2, {0},&reftables[110], &reftables[111]),
5052  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "name", 2, &msgs[18], (const upb_def*)(&msgs[19]), 5, 0, {0},&reftables[112], &reftables[113]),
5053  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[4], NULL, 4, 1, {0},&reftables[114], &reftables[115]),
5054  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[0], NULL, 24, 6, {0},&reftables[116], &reftables[117]),
5055  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[12], NULL, 4, 1, {0},&reftables[118], &reftables[119]),
5056  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[2], NULL, 8, 2, {0},&reftables[120], &reftables[121]),
5057  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[6], NULL, 4, 1, {0},&reftables[122], &reftables[123]),
5058  UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_STRING, 0, false, false, false, false, "name_part", 1, &msgs[19], NULL, 2, 0, {0},&reftables[124], &reftables[125]),
5059  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT64, UPB_INTFMT_VARIABLE, false, false, false, false, "negative_int_value", 5, &msgs[18], NULL, 10, 3, {0},&reftables[126], &reftables[127]),
5060  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "nested_type", 3, &msgs[0], (const upb_def*)(&msgs[0]), 13, 1, {0},&reftables[128], &reftables[129]),
5061  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "no_standard_descriptor_accessor", 2, &msgs[11], NULL, 7, 2, {0},&reftables[130], &reftables[131]),
5062  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "number", 3, &msgs[6], NULL, 10, 3, {0},&reftables[132], &reftables[133]),
5063  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "number", 2, &msgs[4], NULL, 7, 2, {0},&reftables[134], &reftables[135]),
5064  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "optimize_for", 9, &msgs[10], (const upb_def*)(&enums[3]), 12, 3, {0},&reftables[136], &reftables[137]),
5065  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 7, &msgs[0], (const upb_def*)(&msgs[11]), 23, 5, {0},&reftables[138], &reftables[139]),
5066  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[2], (const upb_def*)(&msgs[3]), 7, 1, {0},&reftables[140], &reftables[141]),
5067  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 8, &msgs[6], (const upb_def*)(&msgs[7]), 3, 0, {0},&reftables[142], &reftables[143]),
5068  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[4], (const upb_def*)(&msgs[5]), 3, 0, {0},&reftables[144], &reftables[145]),
5069  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 8, &msgs[8], (const upb_def*)(&msgs[10]), 20, 4, {0},&reftables[146], &reftables[147]),
5070  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[14], (const upb_def*)(&msgs[15]), 7, 1, {0},&reftables[148], &reftables[149]),
5071  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 4, &msgs[12], (const upb_def*)(&msgs[13]), 3, 0, {0},&reftables[150], &reftables[151]),
5072  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "output_type", 3, &msgs[12], NULL, 10, 3, {0},&reftables[152], &reftables[153]),
5073  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "package", 2, &msgs[8], NULL, 25, 7, {0},&reftables[154], &reftables[155]),
5074  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "packed", 2, &msgs[7], NULL, 7, 2, {0},&reftables[156], &reftables[157]),
5075  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, true, "path", 1, &msgs[17], NULL, 4, 0, {0},&reftables[158], &reftables[159]),
5076  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_UINT64, UPB_INTFMT_VARIABLE, false, false, false, false, "positive_int_value", 4, &msgs[18], NULL, 9, 2, {0},&reftables[160], &reftables[161]),
5077  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "public_dependency", 10, &msgs[8], NULL, 35, 9, {0},&reftables[162], &reftables[163]),
5078  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "py_generic_services", 18, &msgs[10], NULL, 19, 8, {0},&reftables[164], &reftables[165]),
5079  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "service", 6, &msgs[8], (const upb_def*)(&msgs[14]), 16, 2, {0},&reftables[166], &reftables[167]),
5080  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "source_code_info", 9, &msgs[8], (const upb_def*)(&msgs[16]), 21, 5, {0},&reftables[168], &reftables[169]),
5081  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, true, "span", 2, &msgs[17], NULL, 7, 1, {0},&reftables[170], &reftables[171]),
5082  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "start", 1, &msgs[1], NULL, 2, 0, {0},&reftables[172], &reftables[173]),
5083  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BYTES, 0, false, false, false, false, "string_value", 7, &msgs[18], NULL, 12, 5, {0},&reftables[174], &reftables[175]),
5084  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "trailing_comments", 4, &msgs[17], NULL, 11, 3, {0},&reftables[176], &reftables[177]),
5085  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "type", 5, &msgs[6], (const upb_def*)(&enums[1]), 12, 5, {0},&reftables[178], &reftables[179]),
5086  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "type_name", 6, &msgs[6], NULL, 13, 6, {0},&reftables[180], &reftables[181]),
5087  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[5], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[182], &reftables[183]),
5088  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[15], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[184], &reftables[185]),
5089  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[3], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[186], &reftables[187]),
5090  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[13], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[188], &reftables[189]),
5091  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[10], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[190], &reftables[191]),
5092  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[11], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[192], &reftables[193]),
5093  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[7], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[194], &reftables[195]),
5094  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "value", 2, &msgs[2], (const upb_def*)(&msgs[4]), 6, 0, {0},&reftables[196], &reftables[197]),
5095  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "weak", 10, &msgs[7], NULL, 13, 6, {0},&reftables[198], &reftables[199]),
5096  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "weak_dependency", 11, &msgs[8], NULL, 38, 10, {0},&reftables[200], &reftables[201]),
5097};
5098
5099static const upb_enumdef enums[4] = {
5100  UPB_ENUMDEF_INIT("google.protobuf.FieldDescriptorProto.Label", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[160]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[202], 4, 3), 0, &reftables[202], &reftables[203]),
5101  UPB_ENUMDEF_INIT("google.protobuf.FieldDescriptorProto.Type", UPB_STRTABLE_INIT(18, 31, UPB_CTYPE_INT32, 5, &strentries[164]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[206], 19, 18), 0, &reftables[204], &reftables[205]),
5102  UPB_ENUMDEF_INIT("google.protobuf.FieldOptions.CType", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[196]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[225], 3, 3), 0, &reftables[206], &reftables[207]),
5103  UPB_ENUMDEF_INIT("google.protobuf.FileOptions.OptimizeMode", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[200]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[228], 4, 3), 0, &reftables[208], &reftables[209]),
5104};
5105
5106static const upb_tabent strentries[236] = {
5107  {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "extension"), UPB_TABVALUE_PTR_INIT(&fields[14]), NULL},
5108  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5109  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5110  {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[38]), NULL},
5111  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5112  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5113  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5114  {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "field"), UPB_TABVALUE_PTR_INIT(&fields[16]), NULL},
5115  {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "extension_range"), UPB_TABVALUE_PTR_INIT(&fields[15]), NULL},
5116  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5117  {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "nested_type"), UPB_TABVALUE_PTR_INIT(&fields[44]), NULL},
5118  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5119  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5120  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5121  {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[49]), NULL},
5122  {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "enum_type"), UPB_TABVALUE_PTR_INIT(&fields[9]), &strentries[14]},
5123  {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "start"), UPB_TABVALUE_PTR_INIT(&fields[66]), NULL},
5124  {UPB_TABKEY_STR("\003", "\000", "\000", "\000", "end"), UPB_TABVALUE_PTR_INIT(&fields[8]), NULL},
5125  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5126  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5127  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5128  {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "value"), UPB_TABVALUE_PTR_INIT(&fields[78]), NULL},
5129  {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[50]), NULL},
5130  {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[40]), &strentries[22]},
5131  {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[73]), NULL},
5132  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5133  {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "allow_alias"), UPB_TABVALUE_PTR_INIT(&fields[1]), NULL},
5134  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5135  {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "number"), UPB_TABVALUE_PTR_INIT(&fields[47]), NULL},
5136  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5137  {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[52]), NULL},
5138  {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[37]), &strentries[30]},
5139  {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[71]), NULL},
5140  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5141  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5142  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5143  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5144  {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "label"), UPB_TABVALUE_PTR_INIT(&fields[27]), NULL},
5145  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5146  {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[41]), NULL},
5147  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5148  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5149  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5150  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5151  {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "number"), UPB_TABVALUE_PTR_INIT(&fields[46]), &strentries[49]},
5152  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5153  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5154  {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "type_name"), UPB_TABVALUE_PTR_INIT(&fields[70]), NULL},
5155  {UPB_TABKEY_STR("\010", "\000", "\000", "\000", "extendee"), UPB_TABVALUE_PTR_INIT(&fields[12]), NULL},
5156  {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "type"), UPB_TABVALUE_PTR_INIT(&fields[69]), &strentries[48]},
5157  {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "default_value"), UPB_TABVALUE_PTR_INIT(&fields[4]), NULL},
5158  {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[51]), NULL},
5159  {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "experimental_map_key"), UPB_TABVALUE_PTR_INIT(&fields[11]), &strentries[67]},
5160  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5161  {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "weak"), UPB_TABVALUE_PTR_INIT(&fields[79]), NULL},
5162  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5163  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5164  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5165  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5166  {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "packed"), UPB_TABVALUE_PTR_INIT(&fields[58]), NULL},
5167  {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "lazy"), UPB_TABVALUE_PTR_INIT(&fields[28]), NULL},
5168  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5169  {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "ctype"), UPB_TABVALUE_PTR_INIT(&fields[3]), NULL},
5170  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5171  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5172  {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[6]), NULL},
5173  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5174  {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[77]), NULL},
5175  {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "extension"), UPB_TABVALUE_PTR_INIT(&fields[13]), NULL},
5176  {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "weak_dependency"), UPB_TABVALUE_PTR_INIT(&fields[80]), NULL},
5177  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5178  {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[34]), NULL},
5179  {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "service"), UPB_TABVALUE_PTR_INIT(&fields[63]), NULL},
5180  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5181  {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "source_code_info"), UPB_TABVALUE_PTR_INIT(&fields[64]), NULL},
5182  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5183  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5184  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5185  {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "dependency"), UPB_TABVALUE_PTR_INIT(&fields[5]), NULL},
5186  {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "message_type"), UPB_TABVALUE_PTR_INIT(&fields[32]), NULL},
5187  {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "package"), UPB_TABVALUE_PTR_INIT(&fields[57]), NULL},
5188  {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[53]), &strentries[82]},
5189  {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "enum_type"), UPB_TABVALUE_PTR_INIT(&fields[10]), NULL},
5190  {UPB_TABKEY_STR("\021", "\000", "\000", "\000", "public_dependency"), UPB_TABVALUE_PTR_INIT(&fields[61]), &strentries[81]},
5191  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5192  {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "file"), UPB_TABVALUE_PTR_INIT(&fields[17]), NULL},
5193  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5194  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5195  {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[75]), NULL},
5196  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5197  {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "cc_generic_services"), UPB_TABVALUE_PTR_INIT(&fields[2]), NULL},
5198  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5199  {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "java_multiple_files"), UPB_TABVALUE_PTR_INIT(&fields[24]), NULL},
5200  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5201  {UPB_TABKEY_STR("\025", "\000", "\000", "\000", "java_generic_services"), UPB_TABVALUE_PTR_INIT(&fields[23]), &strentries[102]},
5202  {UPB_TABKEY_STR("\035", "\000", "\000", "\000", "java_generate_equals_and_hash"), UPB_TABVALUE_PTR_INIT(&fields[22]), NULL},
5203  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5204  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5205  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5206  {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "go_package"), UPB_TABVALUE_PTR_INIT(&fields[18]), NULL},
5207  {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "java_package"), UPB_TABVALUE_PTR_INIT(&fields[26]), NULL},
5208  {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "optimize_for"), UPB_TABVALUE_PTR_INIT(&fields[48]), NULL},
5209  {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "py_generic_services"), UPB_TABVALUE_PTR_INIT(&fields[62]), NULL},
5210  {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "java_outer_classname"), UPB_TABVALUE_PTR_INIT(&fields[25]), NULL},
5211  {UPB_TABKEY_STR("\027", "\000", "\000", "\000", "message_set_wire_format"), UPB_TABVALUE_PTR_INIT(&fields[31]), &strentries[106]},
5212  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5213  {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[76]), NULL},
5214  {UPB_TABKEY_STR("\037", "\000", "\000", "\000", "no_standard_descriptor_accessor"), UPB_TABVALUE_PTR_INIT(&fields[45]), NULL},
5215  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5216  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5217  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5218  {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[39]), NULL},
5219  {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "input_type"), UPB_TABVALUE_PTR_INIT(&fields[20]), NULL},
5220  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5221  {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "output_type"), UPB_TABVALUE_PTR_INIT(&fields[56]), NULL},
5222  {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[55]), NULL},
5223  {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[74]), NULL},
5224  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5225  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5226  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5227  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5228  {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[54]), &strentries[122]},
5229  {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "method"), UPB_TABVALUE_PTR_INIT(&fields[33]), NULL},
5230  {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[35]), &strentries[121]},
5231  {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[72]), NULL},
5232  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5233  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5234  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5235  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5236  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5237  {UPB_TABKEY_STR("\010", "\000", "\000", "\000", "location"), UPB_TABVALUE_PTR_INIT(&fields[30]), NULL},
5238  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5239  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5240  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5241  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5242  {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "span"), UPB_TABVALUE_PTR_INIT(&fields[65]), &strentries[139]},
5243  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5244  {UPB_TABKEY_STR("\021", "\000", "\000", "\000", "trailing_comments"), UPB_TABVALUE_PTR_INIT(&fields[68]), NULL},
5245  {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "leading_comments"), UPB_TABVALUE_PTR_INIT(&fields[29]), &strentries[137]},
5246  {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "path"), UPB_TABVALUE_PTR_INIT(&fields[59]), NULL},
5247  {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "double_value"), UPB_TABVALUE_PTR_INIT(&fields[7]), NULL},
5248  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5249  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5250  {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[36]), NULL},
5251  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5252  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5253  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5254  {UPB_TABKEY_STR("\022", "\000", "\000", "\000", "negative_int_value"), UPB_TABVALUE_PTR_INIT(&fields[43]), NULL},
5255  {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "aggregate_value"), UPB_TABVALUE_PTR_INIT(&fields[0]), NULL},
5256  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5257  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5258  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5259  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5260  {UPB_TABKEY_STR("\022", "\000", "\000", "\000", "positive_int_value"), UPB_TABVALUE_PTR_INIT(&fields[60]), NULL},
5261  {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "identifier_value"), UPB_TABVALUE_PTR_INIT(&fields[19]), NULL},
5262  {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "string_value"), UPB_TABVALUE_PTR_INIT(&fields[67]), &strentries[154]},
5263  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5264  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5265  {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "is_extension"), UPB_TABVALUE_PTR_INIT(&fields[21]), NULL},
5266  {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "name_part"), UPB_TABVALUE_PTR_INIT(&fields[42]), NULL},
5267  {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_REQUIRED"), UPB_TABVALUE_INT_INIT(2), &strentries[162]},
5268  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5269  {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_REPEATED"), UPB_TABVALUE_INT_INIT(3), NULL},
5270  {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_OPTIONAL"), UPB_TABVALUE_INT_INIT(1), NULL},
5271  {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_FIXED64"), UPB_TABVALUE_INT_INIT(6), NULL},
5272  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5273  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5274  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5275  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5276  {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_STRING"), UPB_TABVALUE_INT_INIT(9), NULL},
5277  {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_FLOAT"), UPB_TABVALUE_INT_INIT(2), &strentries[193]},
5278  {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_DOUBLE"), UPB_TABVALUE_INT_INIT(1), NULL},
5279  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5280  {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_INT32"), UPB_TABVALUE_INT_INIT(5), NULL},
5281  {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "TYPE_SFIXED32"), UPB_TABVALUE_INT_INIT(15), NULL},
5282  {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_FIXED32"), UPB_TABVALUE_INT_INIT(7), NULL},
5283  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5284  {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_MESSAGE"), UPB_TABVALUE_INT_INIT(11), &strentries[194]},
5285  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5286  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5287  {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_INT64"), UPB_TABVALUE_INT_INIT(3), &strentries[191]},
5288  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5289  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5290  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5291  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5292  {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "TYPE_ENUM"), UPB_TABVALUE_INT_INIT(14), NULL},
5293  {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_UINT32"), UPB_TABVALUE_INT_INIT(13), NULL},
5294  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5295  {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_UINT64"), UPB_TABVALUE_INT_INIT(4), &strentries[190]},
5296  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5297  {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "TYPE_SFIXED64"), UPB_TABVALUE_INT_INIT(16), NULL},
5298  {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_BYTES"), UPB_TABVALUE_INT_INIT(12), NULL},
5299  {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_SINT64"), UPB_TABVALUE_INT_INIT(18), NULL},
5300  {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "TYPE_BOOL"), UPB_TABVALUE_INT_INIT(8), NULL},
5301  {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_GROUP"), UPB_TABVALUE_INT_INIT(10), NULL},
5302  {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_SINT32"), UPB_TABVALUE_INT_INIT(17), NULL},
5303  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5304  {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "CORD"), UPB_TABVALUE_INT_INIT(1), NULL},
5305  {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "STRING"), UPB_TABVALUE_INT_INIT(0), &strentries[197]},
5306  {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "STRING_PIECE"), UPB_TABVALUE_INT_INIT(2), NULL},
5307  {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "CODE_SIZE"), UPB_TABVALUE_INT_INIT(2), NULL},
5308  {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "SPEED"), UPB_TABVALUE_INT_INIT(1), &strentries[203]},
5309  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5310  {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "LITE_RUNTIME"), UPB_TABVALUE_INT_INIT(3), NULL},
5311  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5312  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5313  {UPB_TABKEY_STR("\047", "\000", "\000", "\000", "google.protobuf.SourceCodeInfo.Location"), UPB_TABVALUE_PTR_INIT(&msgs[17]), NULL},
5314  {UPB_TABKEY_STR("\043", "\000", "\000", "\000", "google.protobuf.UninterpretedOption"), UPB_TABVALUE_PTR_INIT(&msgs[18]), NULL},
5315  {UPB_TABKEY_STR("\043", "\000", "\000", "\000", "google.protobuf.FileDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[8]), NULL},
5316  {UPB_TABKEY_STR("\045", "\000", "\000", "\000", "google.protobuf.MethodDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[12]), NULL},
5317  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5318  {UPB_TABKEY_STR("\040", "\000", "\000", "\000", "google.protobuf.EnumValueOptions"), UPB_TABVALUE_PTR_INIT(&msgs[5]), NULL},
5319  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5320  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5321  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5322  {UPB_TABKEY_STR("\037", "\000", "\000", "\000", "google.protobuf.DescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[0]), &strentries[228]},
5323  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5324  {UPB_TABKEY_STR("\036", "\000", "\000", "\000", "google.protobuf.SourceCodeInfo"), UPB_TABVALUE_PTR_INIT(&msgs[16]), NULL},
5325  {UPB_TABKEY_STR("\051", "\000", "\000", "\000", "google.protobuf.FieldDescriptorProto.Type"), UPB_TABVALUE_PTR_INIT(&enums[1]), NULL},
5326  {UPB_TABKEY_STR("\056", "\000", "\000", "\000", "google.protobuf.DescriptorProto.ExtensionRange"), UPB_TABVALUE_PTR_INIT(&msgs[1]), NULL},
5327  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5328  {UPB_TABKEY_STR("\050", "\000", "\000", "\000", "google.protobuf.EnumValueDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[4]), NULL},
5329  {UPB_TABKEY_STR("\034", "\000", "\000", "\000", "google.protobuf.FieldOptions"), UPB_TABVALUE_PTR_INIT(&msgs[7]), NULL},
5330  {UPB_TABKEY_STR("\033", "\000", "\000", "\000", "google.protobuf.FileOptions"), UPB_TABVALUE_PTR_INIT(&msgs[10]), NULL},
5331  {UPB_TABKEY_STR("\043", "\000", "\000", "\000", "google.protobuf.EnumDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[2]), &strentries[233]},
5332  {UPB_TABKEY_STR("\052", "\000", "\000", "\000", "google.protobuf.FieldDescriptorProto.Label"), UPB_TABVALUE_PTR_INIT(&enums[0]), NULL},
5333  {UPB_TABKEY_STR("\046", "\000", "\000", "\000", "google.protobuf.ServiceDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[14]), NULL},
5334  {UPB_TABKEY_STR("\042", "\000", "\000", "\000", "google.protobuf.FieldOptions.CType"), UPB_TABVALUE_PTR_INIT(&enums[2]), &strentries[229]},
5335  {UPB_TABKEY_STR("\041", "\000", "\000", "\000", "google.protobuf.FileDescriptorSet"), UPB_TABVALUE_PTR_INIT(&msgs[9]), &strentries[235]},
5336  {UPB_TABKEY_STR("\033", "\000", "\000", "\000", "google.protobuf.EnumOptions"), UPB_TABVALUE_PTR_INIT(&msgs[3]), NULL},
5337  {UPB_TABKEY_STR("\044", "\000", "\000", "\000", "google.protobuf.FieldDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[6]), NULL},
5338  {UPB_TABKEY_STR("\050", "\000", "\000", "\000", "google.protobuf.FileOptions.OptimizeMode"), UPB_TABVALUE_PTR_INIT(&enums[3]), &strentries[221]},
5339  {UPB_TABKEY_STR("\036", "\000", "\000", "\000", "google.protobuf.ServiceOptions"), UPB_TABVALUE_PTR_INIT(&msgs[15]), NULL},
5340  {UPB_TABKEY_STR("\036", "\000", "\000", "\000", "google.protobuf.MessageOptions"), UPB_TABVALUE_PTR_INIT(&msgs[11]), NULL},
5341  {UPB_TABKEY_STR("\035", "\000", "\000", "\000", "google.protobuf.MethodOptions"), UPB_TABVALUE_PTR_INIT(&msgs[13]), &strentries[226]},
5342  {UPB_TABKEY_STR("\054", "\000", "\000", "\000", "google.protobuf.UninterpretedOption.NamePart"), UPB_TABVALUE_PTR_INIT(&msgs[19]), NULL},
5343};
5344
5345static const upb_tabent intentries[14] = {
5346  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5347  {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[73]), NULL},
5348  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5349  {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[71]), NULL},
5350  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5351  {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[77]), NULL},
5352  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5353  {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[75]), NULL},
5354  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5355  {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[76]), NULL},
5356  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5357  {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[74]), NULL},
5358  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5359  {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[72]), NULL},
5360};
5361
5362static const upb_tabval arrays[232] = {
5363  UPB_TABVALUE_EMPTY_INIT,
5364  UPB_TABVALUE_PTR_INIT(&fields[38]),
5365  UPB_TABVALUE_PTR_INIT(&fields[16]),
5366  UPB_TABVALUE_PTR_INIT(&fields[44]),
5367  UPB_TABVALUE_PTR_INIT(&fields[9]),
5368  UPB_TABVALUE_PTR_INIT(&fields[15]),
5369  UPB_TABVALUE_PTR_INIT(&fields[14]),
5370  UPB_TABVALUE_PTR_INIT(&fields[49]),
5371  UPB_TABVALUE_EMPTY_INIT,
5372  UPB_TABVALUE_PTR_INIT(&fields[66]),
5373  UPB_TABVALUE_PTR_INIT(&fields[8]),
5374  UPB_TABVALUE_EMPTY_INIT,
5375  UPB_TABVALUE_PTR_INIT(&fields[40]),
5376  UPB_TABVALUE_PTR_INIT(&fields[78]),
5377  UPB_TABVALUE_PTR_INIT(&fields[50]),
5378  UPB_TABVALUE_EMPTY_INIT,
5379  UPB_TABVALUE_EMPTY_INIT,
5380  UPB_TABVALUE_PTR_INIT(&fields[1]),
5381  UPB_TABVALUE_EMPTY_INIT,
5382  UPB_TABVALUE_EMPTY_INIT,
5383  UPB_TABVALUE_EMPTY_INIT,
5384  UPB_TABVALUE_EMPTY_INIT,
5385  UPB_TABVALUE_EMPTY_INIT,
5386  UPB_TABVALUE_EMPTY_INIT,
5387  UPB_TABVALUE_PTR_INIT(&fields[37]),
5388  UPB_TABVALUE_PTR_INIT(&fields[47]),
5389  UPB_TABVALUE_PTR_INIT(&fields[52]),
5390  UPB_TABVALUE_EMPTY_INIT,
5391  UPB_TABVALUE_EMPTY_INIT,
5392  UPB_TABVALUE_EMPTY_INIT,
5393  UPB_TABVALUE_EMPTY_INIT,
5394  UPB_TABVALUE_EMPTY_INIT,
5395  UPB_TABVALUE_PTR_INIT(&fields[41]),
5396  UPB_TABVALUE_PTR_INIT(&fields[12]),
5397  UPB_TABVALUE_PTR_INIT(&fields[46]),
5398  UPB_TABVALUE_PTR_INIT(&fields[27]),
5399  UPB_TABVALUE_PTR_INIT(&fields[69]),
5400  UPB_TABVALUE_PTR_INIT(&fields[70]),
5401  UPB_TABVALUE_PTR_INIT(&fields[4]),
5402  UPB_TABVALUE_PTR_INIT(&fields[51]),
5403  UPB_TABVALUE_EMPTY_INIT,
5404  UPB_TABVALUE_PTR_INIT(&fields[3]),
5405  UPB_TABVALUE_PTR_INIT(&fields[58]),
5406  UPB_TABVALUE_PTR_INIT(&fields[6]),
5407  UPB_TABVALUE_EMPTY_INIT,
5408  UPB_TABVALUE_PTR_INIT(&fields[28]),
5409  UPB_TABVALUE_EMPTY_INIT,
5410  UPB_TABVALUE_EMPTY_INIT,
5411  UPB_TABVALUE_EMPTY_INIT,
5412  UPB_TABVALUE_PTR_INIT(&fields[11]),
5413  UPB_TABVALUE_PTR_INIT(&fields[79]),
5414  UPB_TABVALUE_EMPTY_INIT,
5415  UPB_TABVALUE_EMPTY_INIT,
5416  UPB_TABVALUE_EMPTY_INIT,
5417  UPB_TABVALUE_EMPTY_INIT,
5418  UPB_TABVALUE_EMPTY_INIT,
5419  UPB_TABVALUE_EMPTY_INIT,
5420  UPB_TABVALUE_EMPTY_INIT,
5421  UPB_TABVALUE_EMPTY_INIT,
5422  UPB_TABVALUE_EMPTY_INIT,
5423  UPB_TABVALUE_EMPTY_INIT,
5424  UPB_TABVALUE_EMPTY_INIT,
5425  UPB_TABVALUE_EMPTY_INIT,
5426  UPB_TABVALUE_EMPTY_INIT,
5427  UPB_TABVALUE_EMPTY_INIT,
5428  UPB_TABVALUE_EMPTY_INIT,
5429  UPB_TABVALUE_EMPTY_INIT,
5430  UPB_TABVALUE_EMPTY_INIT,
5431  UPB_TABVALUE_EMPTY_INIT,
5432  UPB_TABVALUE_EMPTY_INIT,
5433  UPB_TABVALUE_EMPTY_INIT,
5434  UPB_TABVALUE_EMPTY_INIT,
5435  UPB_TABVALUE_EMPTY_INIT,
5436  UPB_TABVALUE_PTR_INIT(&fields[34]),
5437  UPB_TABVALUE_PTR_INIT(&fields[57]),
5438  UPB_TABVALUE_PTR_INIT(&fields[5]),
5439  UPB_TABVALUE_PTR_INIT(&fields[32]),
5440  UPB_TABVALUE_PTR_INIT(&fields[10]),
5441  UPB_TABVALUE_PTR_INIT(&fields[63]),
5442  UPB_TABVALUE_PTR_INIT(&fields[13]),
5443  UPB_TABVALUE_PTR_INIT(&fields[53]),
5444  UPB_TABVALUE_PTR_INIT(&fields[64]),
5445  UPB_TABVALUE_PTR_INIT(&fields[61]),
5446  UPB_TABVALUE_PTR_INIT(&fields[80]),
5447  UPB_TABVALUE_EMPTY_INIT,
5448  UPB_TABVALUE_PTR_INIT(&fields[17]),
5449  UPB_TABVALUE_EMPTY_INIT,
5450  UPB_TABVALUE_PTR_INIT(&fields[26]),
5451  UPB_TABVALUE_EMPTY_INIT,
5452  UPB_TABVALUE_EMPTY_INIT,
5453  UPB_TABVALUE_EMPTY_INIT,
5454  UPB_TABVALUE_EMPTY_INIT,
5455  UPB_TABVALUE_EMPTY_INIT,
5456  UPB_TABVALUE_EMPTY_INIT,
5457  UPB_TABVALUE_PTR_INIT(&fields[25]),
5458  UPB_TABVALUE_PTR_INIT(&fields[48]),
5459  UPB_TABVALUE_PTR_INIT(&fields[24]),
5460  UPB_TABVALUE_PTR_INIT(&fields[18]),
5461  UPB_TABVALUE_EMPTY_INIT,
5462  UPB_TABVALUE_EMPTY_INIT,
5463  UPB_TABVALUE_EMPTY_INIT,
5464  UPB_TABVALUE_EMPTY_INIT,
5465  UPB_TABVALUE_PTR_INIT(&fields[2]),
5466  UPB_TABVALUE_PTR_INIT(&fields[23]),
5467  UPB_TABVALUE_PTR_INIT(&fields[62]),
5468  UPB_TABVALUE_EMPTY_INIT,
5469  UPB_TABVALUE_PTR_INIT(&fields[22]),
5470  UPB_TABVALUE_EMPTY_INIT,
5471  UPB_TABVALUE_EMPTY_INIT,
5472  UPB_TABVALUE_EMPTY_INIT,
5473  UPB_TABVALUE_EMPTY_INIT,
5474  UPB_TABVALUE_EMPTY_INIT,
5475  UPB_TABVALUE_EMPTY_INIT,
5476  UPB_TABVALUE_EMPTY_INIT,
5477  UPB_TABVALUE_EMPTY_INIT,
5478  UPB_TABVALUE_EMPTY_INIT,
5479  UPB_TABVALUE_EMPTY_INIT,
5480  UPB_TABVALUE_EMPTY_INIT,
5481  UPB_TABVALUE_EMPTY_INIT,
5482  UPB_TABVALUE_EMPTY_INIT,
5483  UPB_TABVALUE_EMPTY_INIT,
5484  UPB_TABVALUE_EMPTY_INIT,
5485  UPB_TABVALUE_EMPTY_INIT,
5486  UPB_TABVALUE_EMPTY_INIT,
5487  UPB_TABVALUE_EMPTY_INIT,
5488  UPB_TABVALUE_EMPTY_INIT,
5489  UPB_TABVALUE_EMPTY_INIT,
5490  UPB_TABVALUE_EMPTY_INIT,
5491  UPB_TABVALUE_EMPTY_INIT,
5492  UPB_TABVALUE_EMPTY_INIT,
5493  UPB_TABVALUE_EMPTY_INIT,
5494  UPB_TABVALUE_EMPTY_INIT,
5495  UPB_TABVALUE_EMPTY_INIT,
5496  UPB_TABVALUE_EMPTY_INIT,
5497  UPB_TABVALUE_EMPTY_INIT,
5498  UPB_TABVALUE_EMPTY_INIT,
5499  UPB_TABVALUE_EMPTY_INIT,
5500  UPB_TABVALUE_EMPTY_INIT,
5501  UPB_TABVALUE_EMPTY_INIT,
5502  UPB_TABVALUE_EMPTY_INIT,
5503  UPB_TABVALUE_EMPTY_INIT,
5504  UPB_TABVALUE_EMPTY_INIT,
5505  UPB_TABVALUE_EMPTY_INIT,
5506  UPB_TABVALUE_EMPTY_INIT,
5507  UPB_TABVALUE_EMPTY_INIT,
5508  UPB_TABVALUE_EMPTY_INIT,
5509  UPB_TABVALUE_EMPTY_INIT,
5510  UPB_TABVALUE_EMPTY_INIT,
5511  UPB_TABVALUE_EMPTY_INIT,
5512  UPB_TABVALUE_EMPTY_INIT,
5513  UPB_TABVALUE_EMPTY_INIT,
5514  UPB_TABVALUE_PTR_INIT(&fields[31]),
5515  UPB_TABVALUE_PTR_INIT(&fields[45]),
5516  UPB_TABVALUE_EMPTY_INIT,
5517  UPB_TABVALUE_EMPTY_INIT,
5518  UPB_TABVALUE_EMPTY_INIT,
5519  UPB_TABVALUE_EMPTY_INIT,
5520  UPB_TABVALUE_EMPTY_INIT,
5521  UPB_TABVALUE_EMPTY_INIT,
5522  UPB_TABVALUE_EMPTY_INIT,
5523  UPB_TABVALUE_EMPTY_INIT,
5524  UPB_TABVALUE_EMPTY_INIT,
5525  UPB_TABVALUE_EMPTY_INIT,
5526  UPB_TABVALUE_EMPTY_INIT,
5527  UPB_TABVALUE_EMPTY_INIT,
5528  UPB_TABVALUE_EMPTY_INIT,
5529  UPB_TABVALUE_EMPTY_INIT,
5530  UPB_TABVALUE_PTR_INIT(&fields[39]),
5531  UPB_TABVALUE_PTR_INIT(&fields[20]),
5532  UPB_TABVALUE_PTR_INIT(&fields[56]),
5533  UPB_TABVALUE_PTR_INIT(&fields[55]),
5534  UPB_TABVALUE_EMPTY_INIT,
5535  UPB_TABVALUE_EMPTY_INIT,
5536  UPB_TABVALUE_EMPTY_INIT,
5537  UPB_TABVALUE_EMPTY_INIT,
5538  UPB_TABVALUE_EMPTY_INIT,
5539  UPB_TABVALUE_PTR_INIT(&fields[35]),
5540  UPB_TABVALUE_PTR_INIT(&fields[33]),
5541  UPB_TABVALUE_PTR_INIT(&fields[54]),
5542  UPB_TABVALUE_EMPTY_INIT,
5543  UPB_TABVALUE_EMPTY_INIT,
5544  UPB_TABVALUE_EMPTY_INIT,
5545  UPB_TABVALUE_EMPTY_INIT,
5546  UPB_TABVALUE_EMPTY_INIT,
5547  UPB_TABVALUE_PTR_INIT(&fields[30]),
5548  UPB_TABVALUE_EMPTY_INIT,
5549  UPB_TABVALUE_PTR_INIT(&fields[59]),
5550  UPB_TABVALUE_PTR_INIT(&fields[65]),
5551  UPB_TABVALUE_PTR_INIT(&fields[29]),
5552  UPB_TABVALUE_PTR_INIT(&fields[68]),
5553  UPB_TABVALUE_EMPTY_INIT,
5554  UPB_TABVALUE_EMPTY_INIT,
5555  UPB_TABVALUE_PTR_INIT(&fields[36]),
5556  UPB_TABVALUE_PTR_INIT(&fields[19]),
5557  UPB_TABVALUE_PTR_INIT(&fields[60]),
5558  UPB_TABVALUE_PTR_INIT(&fields[43]),
5559  UPB_TABVALUE_PTR_INIT(&fields[7]),
5560  UPB_TABVALUE_PTR_INIT(&fields[67]),
5561  UPB_TABVALUE_PTR_INIT(&fields[0]),
5562  UPB_TABVALUE_EMPTY_INIT,
5563  UPB_TABVALUE_PTR_INIT(&fields[42]),
5564  UPB_TABVALUE_PTR_INIT(&fields[21]),
5565  UPB_TABVALUE_EMPTY_INIT,
5566  UPB_TABVALUE_PTR_INIT("LABEL_OPTIONAL"),
5567  UPB_TABVALUE_PTR_INIT("LABEL_REQUIRED"),
5568  UPB_TABVALUE_PTR_INIT("LABEL_REPEATED"),
5569  UPB_TABVALUE_EMPTY_INIT,
5570  UPB_TABVALUE_PTR_INIT("TYPE_DOUBLE"),
5571  UPB_TABVALUE_PTR_INIT("TYPE_FLOAT"),
5572  UPB_TABVALUE_PTR_INIT("TYPE_INT64"),
5573  UPB_TABVALUE_PTR_INIT("TYPE_UINT64"),
5574  UPB_TABVALUE_PTR_INIT("TYPE_INT32"),
5575  UPB_TABVALUE_PTR_INIT("TYPE_FIXED64"),
5576  UPB_TABVALUE_PTR_INIT("TYPE_FIXED32"),
5577  UPB_TABVALUE_PTR_INIT("TYPE_BOOL"),
5578  UPB_TABVALUE_PTR_INIT("TYPE_STRING"),
5579  UPB_TABVALUE_PTR_INIT("TYPE_GROUP"),
5580  UPB_TABVALUE_PTR_INIT("TYPE_MESSAGE"),
5581  UPB_TABVALUE_PTR_INIT("TYPE_BYTES"),
5582  UPB_TABVALUE_PTR_INIT("TYPE_UINT32"),
5583  UPB_TABVALUE_PTR_INIT("TYPE_ENUM"),
5584  UPB_TABVALUE_PTR_INIT("TYPE_SFIXED32"),
5585  UPB_TABVALUE_PTR_INIT("TYPE_SFIXED64"),
5586  UPB_TABVALUE_PTR_INIT("TYPE_SINT32"),
5587  UPB_TABVALUE_PTR_INIT("TYPE_SINT64"),
5588  UPB_TABVALUE_PTR_INIT("STRING"),
5589  UPB_TABVALUE_PTR_INIT("CORD"),
5590  UPB_TABVALUE_PTR_INIT("STRING_PIECE"),
5591  UPB_TABVALUE_EMPTY_INIT,
5592  UPB_TABVALUE_PTR_INIT("SPEED"),
5593  UPB_TABVALUE_PTR_INIT("CODE_SIZE"),
5594  UPB_TABVALUE_PTR_INIT("LITE_RUNTIME"),
5595};
5596
5597static const upb_symtab symtab = UPB_SYMTAB_INIT(UPB_STRTABLE_INIT(24, 31, UPB_CTYPE_PTR, 5, &strentries[204]), &reftables[210], &reftables[211]);
5598
5599const upb_symtab *upbdefs_google_protobuf_descriptor(const void *owner) {
5600  upb_symtab_ref(&symtab, owner);
5601  return &symtab;
5602}
5603
5604#ifdef UPB_DEBUG_REFS
5605static upb_inttable reftables[212] = {
5606  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5607  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5608  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5609  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5610  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5611  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5612  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5613  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5614  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5615  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5616  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5617  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5618  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5619  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5620  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5621  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5622  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5623  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5624  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5625  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5626  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5627  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5628  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5629  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5630  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5631  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5632  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5633  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5634  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5635  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5636  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5637  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5638  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5639  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5640  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5641  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5642  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5643  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5644  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5645  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5646  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5647  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5648  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5649  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5650  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5651  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5652  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5653  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5654  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5655  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5656  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5657  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5658  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5659  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5660  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5661  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5662  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5663  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5664  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5665  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5666  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5667  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5668  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5669  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5670  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5671  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5672  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5673  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5674  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5675  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5676  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5677  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5678  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5679  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5680  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5681  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5682  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5683  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5684  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5685  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5686  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5687  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5688  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5689  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5690  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5691  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5692  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5693  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5694  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5695  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5696  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5697  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5698  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5699  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5700  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5701  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5702  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5703  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5704  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5705  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5706  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5707  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5708  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5709  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5710  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5711  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5712  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5713  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5714  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5715  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5716  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5717  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5718  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5719  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5720  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5721  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5722  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5723  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5724  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5725  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5726  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5727  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5728  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5729  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5730  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5731  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5732  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5733  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5734  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5735  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5736  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5737  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5738  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5739  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5740  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5741  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5742  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5743  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5744  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5745  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5746  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5747  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5748  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5749  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5750  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5751  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5752  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5753  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5754  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5755  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5756  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5757  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5758  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5759  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5760  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5761  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5762  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5763  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5764  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5765  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5766  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5767  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5768  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5769  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5770  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5771  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5772  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5773  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5774  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5775  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5776  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5777  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5778  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5779  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5780  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5781  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5782  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5783  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5784  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5785  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5786  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5787  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5788  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5789  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5790  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5791  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5792  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5793  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5794  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5795  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5796  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5797  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5798  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5799  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5800  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5801  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5802  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5803  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5804  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5805  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5806  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5807  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5808  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5809  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5810  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5811  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5812  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5813  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5814  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5815  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5816  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5817  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5818};
5819#endif
5820
5821/*
5822** XXX: The routines in this file that consume a string do not currently
5823** support having the string span buffers.  In the future, as upb_sink and
5824** its buffering/sharing functionality evolve there should be an easy and
5825** idiomatic way of correctly handling this case.  For now, we accept this
5826** limitation since we currently only parse descriptors from single strings.
5827*/
5828
5829
5830#include <errno.h>
5831#include <stdlib.h>
5832#include <string.h>
5833
5834/* upb_deflist is an internal-only dynamic array for storing a growing list of
5835 * upb_defs. */
5836typedef struct {
5837  upb_def **defs;
5838  size_t len;
5839  size_t size;
5840  bool owned;
5841} upb_deflist;
5842
5843/* We keep a stack of all the messages scopes we are currently in, as well as
5844 * the top-level file scope.  This is necessary to correctly qualify the
5845 * definitions that are contained inside.  "name" tracks the name of the
5846 * message or package (a bare name -- not qualified by any enclosing scopes). */
5847typedef struct {
5848  char *name;
5849  /* Index of the first def that is under this scope.  For msgdefs, the
5850   * msgdef itself is at start-1. */
5851  int start;
5852} upb_descreader_frame;
5853
5854/* The maximum number of nested declarations that are allowed, ie.
5855 * message Foo {
5856 *   message Bar {
5857 *     message Baz {
5858 *     }
5859 *   }
5860 * }
5861 *
5862 * This is a resource limit that affects how big our runtime stack can grow.
5863 * TODO: make this a runtime-settable property of the Reader instance. */
5864#define UPB_MAX_MESSAGE_NESTING 64
5865
5866struct upb_descreader {
5867  upb_sink sink;
5868  upb_deflist defs;
5869  upb_descreader_frame stack[UPB_MAX_MESSAGE_NESTING];
5870  int stack_len;
5871
5872  uint32_t number;
5873  char *name;
5874  bool saw_number;
5875  bool saw_name;
5876
5877  char *default_string;
5878
5879  upb_fielddef *f;
5880};
5881
5882static char *upb_strndup(const char *buf, size_t n) {
5883  char *ret = malloc(n + 1);
5884  if (!ret) return NULL;
5885  memcpy(ret, buf, n);
5886  ret[n] = '\0';
5887  return ret;
5888}
5889
5890/* Returns a newly allocated string that joins input strings together, for
5891 * example:
5892 *   join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
5893 *   join("", "Baz") -> "Baz"
5894 * Caller owns a ref on the returned string. */
5895static char *upb_join(const char *base, const char *name) {
5896  if (!base || strlen(base) == 0) {
5897    return upb_strdup(name);
5898  } else {
5899    char *ret = malloc(strlen(base) + strlen(name) + 2);
5900    ret[0] = '\0';
5901    strcat(ret, base);
5902    strcat(ret, ".");
5903    strcat(ret, name);
5904    return ret;
5905  }
5906}
5907
5908
5909/* upb_deflist ****************************************************************/
5910
5911void upb_deflist_init(upb_deflist *l) {
5912  l->size = 0;
5913  l->defs = NULL;
5914  l->len = 0;
5915  l->owned = true;
5916}
5917
5918void upb_deflist_uninit(upb_deflist *l) {
5919  size_t i;
5920  if (l->owned)
5921    for(i = 0; i < l->len; i++)
5922      upb_def_unref(l->defs[i], l);
5923  free(l->defs);
5924}
5925
5926bool upb_deflist_push(upb_deflist *l, upb_def *d) {
5927  if(++l->len >= l->size) {
5928    size_t new_size = UPB_MAX(l->size, 4);
5929    new_size *= 2;
5930    l->defs = realloc(l->defs, new_size * sizeof(void *));
5931    if (!l->defs) return false;
5932    l->size = new_size;
5933  }
5934  l->defs[l->len - 1] = d;
5935  return true;
5936}
5937
5938void upb_deflist_donaterefs(upb_deflist *l, void *owner) {
5939  size_t i;
5940  assert(l->owned);
5941  for (i = 0; i < l->len; i++)
5942    upb_def_donateref(l->defs[i], l, owner);
5943  l->owned = false;
5944}
5945
5946static upb_def *upb_deflist_last(upb_deflist *l) {
5947  return l->defs[l->len-1];
5948}
5949
5950/* Qualify the defname for all defs starting with offset "start" with "str". */
5951static void upb_deflist_qualify(upb_deflist *l, char *str, int32_t start) {
5952  uint32_t i;
5953  for (i = start; i < l->len; i++) {
5954    upb_def *def = l->defs[i];
5955    char *name = upb_join(str, upb_def_fullname(def));
5956    upb_def_setfullname(def, name, NULL);
5957    free(name);
5958  }
5959}
5960
5961
5962/* upb_descreader  ************************************************************/
5963
5964static upb_msgdef *upb_descreader_top(upb_descreader *r) {
5965  int index;
5966  assert(r->stack_len > 1);
5967  index = r->stack[r->stack_len-1].start - 1;
5968  assert(index >= 0);
5969  return upb_downcast_msgdef_mutable(r->defs.defs[index]);
5970}
5971
5972static upb_def *upb_descreader_last(upb_descreader *r) {
5973  return upb_deflist_last(&r->defs);
5974}
5975
5976/* Start/end handlers for FileDescriptorProto and DescriptorProto (the two
5977 * entities that have names and can contain sub-definitions. */
5978void upb_descreader_startcontainer(upb_descreader *r) {
5979  upb_descreader_frame *f = &r->stack[r->stack_len++];
5980  f->start = r->defs.len;
5981  f->name = NULL;
5982}
5983
5984void upb_descreader_endcontainer(upb_descreader *r) {
5985  upb_descreader_frame *f = &r->stack[--r->stack_len];
5986  upb_deflist_qualify(&r->defs, f->name, f->start);
5987  free(f->name);
5988  f->name = NULL;
5989}
5990
5991void upb_descreader_setscopename(upb_descreader *r, char *str) {
5992  upb_descreader_frame *f = &r->stack[r->stack_len-1];
5993  free(f->name);
5994  f->name = str;
5995}
5996
5997/* Handlers for google.protobuf.FileDescriptorProto. */
5998static bool file_startmsg(void *r, const void *hd) {
5999  UPB_UNUSED(hd);
6000  upb_descreader_startcontainer(r);
6001  return true;
6002}
6003
6004static bool file_endmsg(void *closure, const void *hd, upb_status *status) {
6005  upb_descreader *r = closure;
6006  UPB_UNUSED(hd);
6007  UPB_UNUSED(status);
6008  upb_descreader_endcontainer(r);
6009  return true;
6010}
6011
6012static size_t file_onpackage(void *closure, const void *hd, const char *buf,
6013                             size_t n, const upb_bufhandle *handle) {
6014  upb_descreader *r = closure;
6015  UPB_UNUSED(hd);
6016  UPB_UNUSED(handle);
6017  /* XXX: see comment at the top of the file. */
6018  upb_descreader_setscopename(r, upb_strndup(buf, n));
6019  return n;
6020}
6021
6022/* Handlers for google.protobuf.EnumValueDescriptorProto. */
6023static bool enumval_startmsg(void *closure, const void *hd) {
6024  upb_descreader *r = closure;
6025  UPB_UNUSED(hd);
6026  r->saw_number = false;
6027  r->saw_name = false;
6028  return true;
6029}
6030
6031static size_t enumval_onname(void *closure, const void *hd, const char *buf,
6032                             size_t n, const upb_bufhandle *handle) {
6033  upb_descreader *r = closure;
6034  UPB_UNUSED(hd);
6035  UPB_UNUSED(handle);
6036  /* XXX: see comment at the top of the file. */
6037  free(r->name);
6038  r->name = upb_strndup(buf, n);
6039  r->saw_name = true;
6040  return n;
6041}
6042
6043static bool enumval_onnumber(void *closure, const void *hd, int32_t val) {
6044  upb_descreader *r = closure;
6045  UPB_UNUSED(hd);
6046  r->number = val;
6047  r->saw_number = true;
6048  return true;
6049}
6050
6051static bool enumval_endmsg(void *closure, const void *hd, upb_status *status) {
6052  upb_descreader *r = closure;
6053  upb_enumdef *e;
6054  UPB_UNUSED(hd);
6055
6056  if(!r->saw_number || !r->saw_name) {
6057    upb_status_seterrmsg(status, "Enum value missing name or number.");
6058    return false;
6059  }
6060  e = upb_downcast_enumdef_mutable(upb_descreader_last(r));
6061  upb_enumdef_addval(e, r->name, r->number, status);
6062  free(r->name);
6063  r->name = NULL;
6064  return true;
6065}
6066
6067
6068/* Handlers for google.protobuf.EnumDescriptorProto. */
6069static bool enum_startmsg(void *closure, const void *hd) {
6070  upb_descreader *r = closure;
6071  UPB_UNUSED(hd);
6072  upb_deflist_push(&r->defs,
6073                   upb_enumdef_upcast_mutable(upb_enumdef_new(&r->defs)));
6074  return true;
6075}
6076
6077static bool enum_endmsg(void *closure, const void *hd, upb_status *status) {
6078  upb_descreader *r = closure;
6079  upb_enumdef *e;
6080  UPB_UNUSED(hd);
6081
6082  e = upb_downcast_enumdef_mutable(upb_descreader_last(r));
6083  if (upb_def_fullname(upb_descreader_last(r)) == NULL) {
6084    upb_status_seterrmsg(status, "Enum had no name.");
6085    return false;
6086  }
6087  if (upb_enumdef_numvals(e) == 0) {
6088    upb_status_seterrmsg(status, "Enum had no values.");
6089    return false;
6090  }
6091  return true;
6092}
6093
6094static size_t enum_onname(void *closure, const void *hd, const char *buf,
6095                          size_t n, const upb_bufhandle *handle) {
6096  upb_descreader *r = closure;
6097  char *fullname = upb_strndup(buf, n);
6098  UPB_UNUSED(hd);
6099  UPB_UNUSED(handle);
6100  /* XXX: see comment at the top of the file. */
6101  upb_def_setfullname(upb_descreader_last(r), fullname, NULL);
6102  free(fullname);
6103  return n;
6104}
6105
6106/* Handlers for google.protobuf.FieldDescriptorProto */
6107static bool field_startmsg(void *closure, const void *hd) {
6108  upb_descreader *r = closure;
6109  UPB_UNUSED(hd);
6110  r->f = upb_fielddef_new(&r->defs);
6111  free(r->default_string);
6112  r->default_string = NULL;
6113
6114  /* fielddefs default to packed, but descriptors default to non-packed. */
6115  upb_fielddef_setpacked(r->f, false);
6116  return true;
6117}
6118
6119/* Converts the default value in string "str" into "d".  Passes a ref on str.
6120 * Returns true on success. */
6121static bool parse_default(char *str, upb_fielddef *f) {
6122  bool success = true;
6123  char *end;
6124  switch (upb_fielddef_type(f)) {
6125    case UPB_TYPE_INT32: {
6126      long val = strtol(str, &end, 0);
6127      if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end)
6128        success = false;
6129      else
6130        upb_fielddef_setdefaultint32(f, val);
6131      break;
6132    }
6133    case UPB_TYPE_INT64: {
6134      /* XXX: Need to write our own strtoll, since it's not available in c89. */
6135      long long val = strtol(str, &end, 0);
6136      if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end)
6137        success = false;
6138      else
6139        upb_fielddef_setdefaultint64(f, val);
6140      break;
6141    }
6142    case UPB_TYPE_UINT32: {
6143      unsigned long val = strtoul(str, &end, 0);
6144      if (val > UINT32_MAX || errno == ERANGE || *end)
6145        success = false;
6146      else
6147        upb_fielddef_setdefaultuint32(f, val);
6148      break;
6149    }
6150    case UPB_TYPE_UINT64: {
6151      /* XXX: Need to write our own strtoull, since it's not available in c89. */
6152      unsigned long long val = strtoul(str, &end, 0);
6153      if (val > UINT64_MAX || errno == ERANGE || *end)
6154        success = false;
6155      else
6156        upb_fielddef_setdefaultuint64(f, val);
6157      break;
6158    }
6159    case UPB_TYPE_DOUBLE: {
6160      double val = strtod(str, &end);
6161      if (errno == ERANGE || *end)
6162        success = false;
6163      else
6164        upb_fielddef_setdefaultdouble(f, val);
6165      break;
6166    }
6167    case UPB_TYPE_FLOAT: {
6168      /* XXX: Need to write our own strtof, since it's not available in c89. */
6169      float val = strtod(str, &end);
6170      if (errno == ERANGE || *end)
6171        success = false;
6172      else
6173        upb_fielddef_setdefaultfloat(f, val);
6174      break;
6175    }
6176    case UPB_TYPE_BOOL: {
6177      if (strcmp(str, "false") == 0)
6178        upb_fielddef_setdefaultbool(f, false);
6179      else if (strcmp(str, "true") == 0)
6180        upb_fielddef_setdefaultbool(f, true);
6181      else
6182        success = false;
6183      break;
6184    }
6185    default: abort();
6186  }
6187  return success;
6188}
6189
6190static bool field_endmsg(void *closure, const void *hd, upb_status *status) {
6191  upb_descreader *r = closure;
6192  upb_fielddef *f = r->f;
6193  UPB_UNUSED(hd);
6194
6195  /* TODO: verify that all required fields were present. */
6196  assert(upb_fielddef_number(f) != 0);
6197  assert(upb_fielddef_name(f) != NULL);
6198  assert((upb_fielddef_subdefname(f) != NULL) == upb_fielddef_hassubdef(f));
6199
6200  if (r->default_string) {
6201    if (upb_fielddef_issubmsg(f)) {
6202      upb_status_seterrmsg(status, "Submessages cannot have defaults.");
6203      return false;
6204    }
6205    if (upb_fielddef_isstring(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM) {
6206      upb_fielddef_setdefaultcstr(f, r->default_string, NULL);
6207    } else {
6208      if (r->default_string && !parse_default(r->default_string, f)) {
6209        /* We don't worry too much about giving a great error message since the
6210         * compiler should have ensured this was correct. */
6211        upb_status_seterrmsg(status, "Error converting default value.");
6212        return false;
6213      }
6214    }
6215  }
6216  return true;
6217}
6218
6219static bool field_onlazy(void *closure, const void *hd, bool val) {
6220  upb_descreader *r = closure;
6221  UPB_UNUSED(hd);
6222
6223  upb_fielddef_setlazy(r->f, val);
6224  return true;
6225}
6226
6227static bool field_onpacked(void *closure, const void *hd, bool val) {
6228  upb_descreader *r = closure;
6229  UPB_UNUSED(hd);
6230
6231  upb_fielddef_setpacked(r->f, val);
6232  return true;
6233}
6234
6235static bool field_ontype(void *closure, const void *hd, int32_t val) {
6236  upb_descreader *r = closure;
6237  UPB_UNUSED(hd);
6238
6239  upb_fielddef_setdescriptortype(r->f, val);
6240  return true;
6241}
6242
6243static bool field_onlabel(void *closure, const void *hd, int32_t val) {
6244  upb_descreader *r = closure;
6245  UPB_UNUSED(hd);
6246
6247  upb_fielddef_setlabel(r->f, val);
6248  return true;
6249}
6250
6251static bool field_onnumber(void *closure, const void *hd, int32_t val) {
6252  upb_descreader *r = closure;
6253  bool ok = upb_fielddef_setnumber(r->f, val, NULL);
6254  UPB_UNUSED(hd);
6255
6256  UPB_ASSERT_VAR(ok, ok);
6257  return true;
6258}
6259
6260static size_t field_onname(void *closure, const void *hd, const char *buf,
6261                           size_t n, const upb_bufhandle *handle) {
6262  upb_descreader *r = closure;
6263  char *name = upb_strndup(buf, n);
6264  UPB_UNUSED(hd);
6265  UPB_UNUSED(handle);
6266
6267  /* XXX: see comment at the top of the file. */
6268  upb_fielddef_setname(r->f, name, NULL);
6269  free(name);
6270  return n;
6271}
6272
6273static size_t field_ontypename(void *closure, const void *hd, const char *buf,
6274                               size_t n, const upb_bufhandle *handle) {
6275  upb_descreader *r = closure;
6276  char *name = upb_strndup(buf, n);
6277  UPB_UNUSED(hd);
6278  UPB_UNUSED(handle);
6279
6280  /* XXX: see comment at the top of the file. */
6281  upb_fielddef_setsubdefname(r->f, name, NULL);
6282  free(name);
6283  return n;
6284}
6285
6286static size_t field_onextendee(void *closure, const void *hd, const char *buf,
6287                               size_t n, const upb_bufhandle *handle) {
6288  upb_descreader *r = closure;
6289  char *name = upb_strndup(buf, n);
6290  UPB_UNUSED(hd);
6291  UPB_UNUSED(handle);
6292
6293  /* XXX: see comment at the top of the file. */
6294  upb_fielddef_setcontainingtypename(r->f, name, NULL);
6295  free(name);
6296  return n;
6297}
6298
6299static size_t field_ondefaultval(void *closure, const void *hd, const char *buf,
6300                                 size_t n, const upb_bufhandle *handle) {
6301  upb_descreader *r = closure;
6302  UPB_UNUSED(hd);
6303  UPB_UNUSED(handle);
6304
6305  /* Have to convert from string to the correct type, but we might not know the
6306   * type yet, so we save it as a string until the end of the field.
6307   * XXX: see comment at the top of the file. */
6308  free(r->default_string);
6309  r->default_string = upb_strndup(buf, n);
6310  return n;
6311}
6312
6313/* Handlers for google.protobuf.DescriptorProto (representing a message). */
6314static bool msg_startmsg(void *closure, const void *hd) {
6315  upb_descreader *r = closure;
6316  UPB_UNUSED(hd);
6317
6318  upb_deflist_push(&r->defs,
6319                   upb_msgdef_upcast_mutable(upb_msgdef_new(&r->defs)));
6320  upb_descreader_startcontainer(r);
6321  return true;
6322}
6323
6324static bool msg_endmsg(void *closure, const void *hd, upb_status *status) {
6325  upb_descreader *r = closure;
6326  upb_msgdef *m = upb_descreader_top(r);
6327  UPB_UNUSED(hd);
6328
6329  if(!upb_def_fullname(upb_msgdef_upcast_mutable(m))) {
6330    upb_status_seterrmsg(status, "Encountered message with no name.");
6331    return false;
6332  }
6333  upb_descreader_endcontainer(r);
6334  return true;
6335}
6336
6337static size_t msg_onname(void *closure, const void *hd, const char *buf,
6338                         size_t n, const upb_bufhandle *handle) {
6339  upb_descreader *r = closure;
6340  upb_msgdef *m = upb_descreader_top(r);
6341  /* XXX: see comment at the top of the file. */
6342  char *name = upb_strndup(buf, n);
6343  UPB_UNUSED(hd);
6344  UPB_UNUSED(handle);
6345
6346  upb_def_setfullname(upb_msgdef_upcast_mutable(m), name, NULL);
6347  upb_descreader_setscopename(r, name);  /* Passes ownership of name. */
6348  return n;
6349}
6350
6351static bool msg_onendfield(void *closure, const void *hd) {
6352  upb_descreader *r = closure;
6353  upb_msgdef *m = upb_descreader_top(r);
6354  UPB_UNUSED(hd);
6355
6356  upb_msgdef_addfield(m, r->f, &r->defs, NULL);
6357  r->f = NULL;
6358  return true;
6359}
6360
6361static bool pushextension(void *closure, const void *hd) {
6362  upb_descreader *r = closure;
6363  UPB_UNUSED(hd);
6364
6365  assert(upb_fielddef_containingtypename(r->f));
6366  upb_fielddef_setisextension(r->f, true);
6367  upb_deflist_push(&r->defs, upb_fielddef_upcast_mutable(r->f));
6368  r->f = NULL;
6369  return true;
6370}
6371
6372#define D(name) upbdefs_google_protobuf_ ## name(s)
6373
6374static void reghandlers(const void *closure, upb_handlers *h) {
6375  const upb_symtab *s = closure;
6376  const upb_msgdef *m = upb_handlers_msgdef(h);
6377
6378  if (m == D(DescriptorProto)) {
6379    upb_handlers_setstartmsg(h, &msg_startmsg, NULL);
6380    upb_handlers_setendmsg(h, &msg_endmsg, NULL);
6381    upb_handlers_setstring(h, D(DescriptorProto_name), &msg_onname, NULL);
6382    upb_handlers_setendsubmsg(h, D(DescriptorProto_field), &msg_onendfield,
6383                              NULL);
6384    upb_handlers_setendsubmsg(h, D(DescriptorProto_extension), &pushextension,
6385                              NULL);
6386  } else if (m == D(FileDescriptorProto)) {
6387    upb_handlers_setstartmsg(h, &file_startmsg, NULL);
6388    upb_handlers_setendmsg(h, &file_endmsg, NULL);
6389    upb_handlers_setstring(h, D(FileDescriptorProto_package), &file_onpackage,
6390                           NULL);
6391    upb_handlers_setendsubmsg(h, D(FileDescriptorProto_extension), &pushextension,
6392                              NULL);
6393  } else if (m == D(EnumValueDescriptorProto)) {
6394    upb_handlers_setstartmsg(h, &enumval_startmsg, NULL);
6395    upb_handlers_setendmsg(h, &enumval_endmsg, NULL);
6396    upb_handlers_setstring(h, D(EnumValueDescriptorProto_name), &enumval_onname, NULL);
6397    upb_handlers_setint32(h, D(EnumValueDescriptorProto_number), &enumval_onnumber,
6398                          NULL);
6399  } else if (m == D(EnumDescriptorProto)) {
6400    upb_handlers_setstartmsg(h, &enum_startmsg, NULL);
6401    upb_handlers_setendmsg(h, &enum_endmsg, NULL);
6402    upb_handlers_setstring(h, D(EnumDescriptorProto_name), &enum_onname, NULL);
6403  } else if (m == D(FieldDescriptorProto)) {
6404    upb_handlers_setstartmsg(h, &field_startmsg, NULL);
6405    upb_handlers_setendmsg(h, &field_endmsg, NULL);
6406    upb_handlers_setint32(h, D(FieldDescriptorProto_type), &field_ontype,
6407                          NULL);
6408    upb_handlers_setint32(h, D(FieldDescriptorProto_label), &field_onlabel,
6409                          NULL);
6410    upb_handlers_setint32(h, D(FieldDescriptorProto_number), &field_onnumber,
6411                          NULL);
6412    upb_handlers_setstring(h, D(FieldDescriptorProto_name), &field_onname,
6413                           NULL);
6414    upb_handlers_setstring(h, D(FieldDescriptorProto_type_name),
6415                           &field_ontypename, NULL);
6416    upb_handlers_setstring(h, D(FieldDescriptorProto_extendee),
6417                           &field_onextendee, NULL);
6418    upb_handlers_setstring(h, D(FieldDescriptorProto_default_value),
6419                           &field_ondefaultval, NULL);
6420  } else if (m == D(FieldOptions)) {
6421    upb_handlers_setbool(h, D(FieldOptions_lazy), &field_onlazy, NULL);
6422    upb_handlers_setbool(h, D(FieldOptions_packed), &field_onpacked, NULL);
6423  }
6424}
6425
6426#undef D
6427
6428void descreader_cleanup(void *_r) {
6429  upb_descreader *r = _r;
6430  free(r->name);
6431  upb_deflist_uninit(&r->defs);
6432  free(r->default_string);
6433  while (r->stack_len > 0) {
6434    upb_descreader_frame *f = &r->stack[--r->stack_len];
6435    free(f->name);
6436  }
6437}
6438
6439
6440/* Public API  ****************************************************************/
6441
6442upb_descreader *upb_descreader_create(upb_env *e, const upb_handlers *h) {
6443  upb_descreader *r = upb_env_malloc(e, sizeof(upb_descreader));
6444  if (!r || !upb_env_addcleanup(e, descreader_cleanup, r)) {
6445    return NULL;
6446  }
6447
6448  upb_deflist_init(&r->defs);
6449  upb_sink_reset(upb_descreader_input(r), h, r);
6450  r->stack_len = 0;
6451  r->name = NULL;
6452  r->default_string = NULL;
6453
6454  return r;
6455}
6456
6457upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n) {
6458  *n = r->defs.len;
6459  upb_deflist_donaterefs(&r->defs, owner);
6460  return r->defs.defs;
6461}
6462
6463upb_sink *upb_descreader_input(upb_descreader *r) {
6464  return &r->sink;
6465}
6466
6467const upb_handlers *upb_descreader_newhandlers(const void *owner) {
6468  const upb_symtab *s = upbdefs_google_protobuf_descriptor(&s);
6469  const upb_handlers *h = upb_handlers_newfrozen(
6470      upbdefs_google_protobuf_FileDescriptorSet(s), owner, reghandlers, s);
6471  upb_symtab_unref(s, &s);
6472  return h;
6473}
6474/*
6475** protobuf decoder bytecode compiler
6476**
6477** Code to compile a upb::Handlers into bytecode for decoding a protobuf
6478** according to that specific schema and destination handlers.
6479**
6480** Compiling to bytecode is always the first step.  If we are using the
6481** interpreted decoder we leave it as bytecode and interpret that.  If we are
6482** using a JIT decoder we use a code generator to turn the bytecode into native
6483** code, LLVM IR, etc.
6484**
6485** Bytecode definition is in decoder.int.h.
6486*/
6487
6488#include <stdarg.h>
6489
6490#ifdef UPB_DUMP_BYTECODE
6491#include <stdio.h>
6492#endif
6493
6494#define MAXLABEL 5
6495#define EMPTYLABEL -1
6496
6497/* mgroup *********************************************************************/
6498
6499static void freegroup(upb_refcounted *r) {
6500  mgroup *g = (mgroup*)r;
6501  upb_inttable_uninit(&g->methods);
6502#ifdef UPB_USE_JIT_X64
6503  upb_pbdecoder_freejit(g);
6504#endif
6505  free(g->bytecode);
6506  free(g);
6507}
6508
6509static void visitgroup(const upb_refcounted *r, upb_refcounted_visit *visit,
6510                       void *closure) {
6511  const mgroup *g = (const mgroup*)r;
6512  upb_inttable_iter i;
6513  upb_inttable_begin(&i, &g->methods);
6514  for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
6515    upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
6516    visit(r, upb_pbdecodermethod_upcast(method), closure);
6517  }
6518}
6519
6520mgroup *newgroup(const void *owner) {
6521  mgroup *g = malloc(sizeof(*g));
6522  static const struct upb_refcounted_vtbl vtbl = {visitgroup, freegroup};
6523  upb_refcounted_init(mgroup_upcast_mutable(g), &vtbl, owner);
6524  upb_inttable_init(&g->methods, UPB_CTYPE_PTR);
6525  g->bytecode = NULL;
6526  g->bytecode_end = NULL;
6527  return g;
6528}
6529
6530
6531/* upb_pbdecodermethod ********************************************************/
6532
6533static void freemethod(upb_refcounted *r) {
6534  upb_pbdecodermethod *method = (upb_pbdecodermethod*)r;
6535
6536  if (method->dest_handlers_) {
6537    upb_handlers_unref(method->dest_handlers_, method);
6538  }
6539
6540  upb_inttable_uninit(&method->dispatch);
6541  free(method);
6542}
6543
6544static void visitmethod(const upb_refcounted *r, upb_refcounted_visit *visit,
6545                        void *closure) {
6546  const upb_pbdecodermethod *m = (const upb_pbdecodermethod*)r;
6547  visit(r, m->group, closure);
6548}
6549
6550static upb_pbdecodermethod *newmethod(const upb_handlers *dest_handlers,
6551                                      mgroup *group) {
6552  static const struct upb_refcounted_vtbl vtbl = {visitmethod, freemethod};
6553  upb_pbdecodermethod *ret = malloc(sizeof(*ret));
6554  upb_refcounted_init(upb_pbdecodermethod_upcast_mutable(ret), &vtbl, &ret);
6555  upb_byteshandler_init(&ret->input_handler_);
6556
6557  /* The method references the group and vice-versa, in a circular reference. */
6558  upb_ref2(ret, group);
6559  upb_ref2(group, ret);
6560  upb_inttable_insertptr(&group->methods, dest_handlers, upb_value_ptr(ret));
6561  upb_pbdecodermethod_unref(ret, &ret);
6562
6563  ret->group = mgroup_upcast_mutable(group);
6564  ret->dest_handlers_ = dest_handlers;
6565  ret->is_native_ = false;  /* If we JIT, it will update this later. */
6566  upb_inttable_init(&ret->dispatch, UPB_CTYPE_UINT64);
6567
6568  if (ret->dest_handlers_) {
6569    upb_handlers_ref(ret->dest_handlers_, ret);
6570  }
6571  return ret;
6572}
6573
6574const upb_handlers *upb_pbdecodermethod_desthandlers(
6575    const upb_pbdecodermethod *m) {
6576  return m->dest_handlers_;
6577}
6578
6579const upb_byteshandler *upb_pbdecodermethod_inputhandler(
6580    const upb_pbdecodermethod *m) {
6581  return &m->input_handler_;
6582}
6583
6584bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m) {
6585  return m->is_native_;
6586}
6587
6588const upb_pbdecodermethod *upb_pbdecodermethod_new(
6589    const upb_pbdecodermethodopts *opts, const void *owner) {
6590  const upb_pbdecodermethod *ret;
6591  upb_pbcodecache cache;
6592
6593  upb_pbcodecache_init(&cache);
6594  ret = upb_pbcodecache_getdecodermethod(&cache, opts);
6595  upb_pbdecodermethod_ref(ret, owner);
6596  upb_pbcodecache_uninit(&cache);
6597  return ret;
6598}
6599
6600
6601/* bytecode compiler **********************************************************/
6602
6603/* Data used only at compilation time. */
6604typedef struct {
6605  mgroup *group;
6606
6607  uint32_t *pc;
6608  int fwd_labels[MAXLABEL];
6609  int back_labels[MAXLABEL];
6610
6611  /* For fields marked "lazy", parse them lazily or eagerly? */
6612  bool lazy;
6613} compiler;
6614
6615static compiler *newcompiler(mgroup *group, bool lazy) {
6616  compiler *ret = malloc(sizeof(*ret));
6617  int i;
6618
6619  ret->group = group;
6620  ret->lazy = lazy;
6621  for (i = 0; i < MAXLABEL; i++) {
6622    ret->fwd_labels[i] = EMPTYLABEL;
6623    ret->back_labels[i] = EMPTYLABEL;
6624  }
6625  return ret;
6626}
6627
6628static void freecompiler(compiler *c) {
6629  free(c);
6630}
6631
6632const size_t ptr_words = sizeof(void*) / sizeof(uint32_t);
6633
6634/* How many words an instruction is. */
6635static int instruction_len(uint32_t instr) {
6636  switch (getop(instr)) {
6637    case OP_SETDISPATCH: return 1 + ptr_words;
6638    case OP_TAGN: return 3;
6639    case OP_SETBIGGROUPNUM: return 2;
6640    default: return 1;
6641  }
6642}
6643
6644bool op_has_longofs(int32_t instruction) {
6645  switch (getop(instruction)) {
6646    case OP_CALL:
6647    case OP_BRANCH:
6648    case OP_CHECKDELIM:
6649      return true;
6650    /* The "tag" instructions only have 8 bytes available for the jump target,
6651     * but that is ok because these opcodes only require short jumps. */
6652    case OP_TAG1:
6653    case OP_TAG2:
6654    case OP_TAGN:
6655      return false;
6656    default:
6657      assert(false);
6658      return false;
6659  }
6660}
6661
6662static int32_t getofs(uint32_t instruction) {
6663  if (op_has_longofs(instruction)) {
6664    return (int32_t)instruction >> 8;
6665  } else {
6666    return (int8_t)(instruction >> 8);
6667  }
6668}
6669
6670static void setofs(uint32_t *instruction, int32_t ofs) {
6671  if (op_has_longofs(*instruction)) {
6672    *instruction = getop(*instruction) | ofs << 8;
6673  } else {
6674    *instruction = (*instruction & ~0xff00) | ((ofs & 0xff) << 8);
6675  }
6676  assert(getofs(*instruction) == ofs);  /* Would fail in cases of overflow. */
6677}
6678
6679static uint32_t pcofs(compiler *c) { return c->pc - c->group->bytecode; }
6680
6681/* Defines a local label at the current PC location.  All previous forward
6682 * references are updated to point to this location.  The location is noted
6683 * for any future backward references. */
6684static void label(compiler *c, unsigned int label) {
6685  int val;
6686  uint32_t *codep;
6687
6688  assert(label < MAXLABEL);
6689  val = c->fwd_labels[label];
6690  codep = (val == EMPTYLABEL) ? NULL : c->group->bytecode + val;
6691  while (codep) {
6692    int ofs = getofs(*codep);
6693    setofs(codep, c->pc - codep - instruction_len(*codep));
6694    codep = ofs ? codep + ofs : NULL;
6695  }
6696  c->fwd_labels[label] = EMPTYLABEL;
6697  c->back_labels[label] = pcofs(c);
6698}
6699
6700/* Creates a reference to a numbered label; either a forward reference
6701 * (positive arg) or backward reference (negative arg).  For forward references
6702 * the value returned now is actually a "next" pointer into a linked list of all
6703 * instructions that use this label and will be patched later when the label is
6704 * defined with label().
6705 *
6706 * The returned value is the offset that should be written into the instruction.
6707 */
6708static int32_t labelref(compiler *c, int label) {
6709  assert(label < MAXLABEL);
6710  if (label == LABEL_DISPATCH) {
6711    /* No resolving required. */
6712    return 0;
6713  } else if (label < 0) {
6714    /* Backward local label.  Relative to the next instruction. */
6715    uint32_t from = (c->pc + 1) - c->group->bytecode;
6716    return c->back_labels[-label] - from;
6717  } else {
6718    /* Forward local label: prepend to (possibly-empty) linked list. */
6719    int *lptr = &c->fwd_labels[label];
6720    int32_t ret = (*lptr == EMPTYLABEL) ? 0 : *lptr - pcofs(c);
6721    *lptr = pcofs(c);
6722    return ret;
6723  }
6724}
6725
6726static void put32(compiler *c, uint32_t v) {
6727  mgroup *g = c->group;
6728  if (c->pc == g->bytecode_end) {
6729    int ofs = pcofs(c);
6730    size_t oldsize = g->bytecode_end - g->bytecode;
6731    size_t newsize = UPB_MAX(oldsize * 2, 64);
6732    /* TODO(haberman): handle OOM. */
6733    g->bytecode = realloc(g->bytecode, newsize * sizeof(uint32_t));
6734    g->bytecode_end = g->bytecode + newsize;
6735    c->pc = g->bytecode + ofs;
6736  }
6737  *c->pc++ = v;
6738}
6739
6740static void putop(compiler *c, opcode op, ...) {
6741  va_list ap;
6742  va_start(ap, op);
6743
6744  switch (op) {
6745    case OP_SETDISPATCH: {
6746      uintptr_t ptr = (uintptr_t)va_arg(ap, void*);
6747      put32(c, OP_SETDISPATCH);
6748      put32(c, ptr);
6749      if (sizeof(uintptr_t) > sizeof(uint32_t))
6750        put32(c, (uint64_t)ptr >> 32);
6751      break;
6752    }
6753    case OP_STARTMSG:
6754    case OP_ENDMSG:
6755    case OP_PUSHLENDELIM:
6756    case OP_POP:
6757    case OP_SETDELIM:
6758    case OP_HALT:
6759    case OP_RET:
6760    case OP_DISPATCH:
6761      put32(c, op);
6762      break;
6763    case OP_PARSE_DOUBLE:
6764    case OP_PARSE_FLOAT:
6765    case OP_PARSE_INT64:
6766    case OP_PARSE_UINT64:
6767    case OP_PARSE_INT32:
6768    case OP_PARSE_FIXED64:
6769    case OP_PARSE_FIXED32:
6770    case OP_PARSE_BOOL:
6771    case OP_PARSE_UINT32:
6772    case OP_PARSE_SFIXED32:
6773    case OP_PARSE_SFIXED64:
6774    case OP_PARSE_SINT32:
6775    case OP_PARSE_SINT64:
6776    case OP_STARTSEQ:
6777    case OP_ENDSEQ:
6778    case OP_STARTSUBMSG:
6779    case OP_ENDSUBMSG:
6780    case OP_STARTSTR:
6781    case OP_STRING:
6782    case OP_ENDSTR:
6783    case OP_PUSHTAGDELIM:
6784      put32(c, op | va_arg(ap, upb_selector_t) << 8);
6785      break;
6786    case OP_SETBIGGROUPNUM:
6787      put32(c, op);
6788      put32(c, va_arg(ap, int));
6789      break;
6790    case OP_CALL: {
6791      const upb_pbdecodermethod *method = va_arg(ap, upb_pbdecodermethod *);
6792      put32(c, op | (method->code_base.ofs - (pcofs(c) + 1)) << 8);
6793      break;
6794    }
6795    case OP_CHECKDELIM:
6796    case OP_BRANCH: {
6797      uint32_t instruction = op;
6798      int label = va_arg(ap, int);
6799      setofs(&instruction, labelref(c, label));
6800      put32(c, instruction);
6801      break;
6802    }
6803    case OP_TAG1:
6804    case OP_TAG2: {
6805      int label = va_arg(ap, int);
6806      uint64_t tag = va_arg(ap, uint64_t);
6807      uint32_t instruction = op | (tag << 16);
6808      assert(tag <= 0xffff);
6809      setofs(&instruction, labelref(c, label));
6810      put32(c, instruction);
6811      break;
6812    }
6813    case OP_TAGN: {
6814      int label = va_arg(ap, int);
6815      uint64_t tag = va_arg(ap, uint64_t);
6816      uint32_t instruction = op | (upb_value_size(tag) << 16);
6817      setofs(&instruction, labelref(c, label));
6818      put32(c, instruction);
6819      put32(c, tag);
6820      put32(c, tag >> 32);
6821      break;
6822    }
6823  }
6824
6825  va_end(ap);
6826}
6827
6828#if defined(UPB_USE_JIT_X64) || defined(UPB_DUMP_BYTECODE)
6829
6830const char *upb_pbdecoder_getopname(unsigned int op) {
6831#define QUOTE(x) #x
6832#define EXPAND_AND_QUOTE(x) QUOTE(x)
6833#define OPNAME(x) OP_##x
6834#define OP(x) case OPNAME(x): return EXPAND_AND_QUOTE(OPNAME(x));
6835#define T(x) OP(PARSE_##x)
6836  /* Keep in sync with list in decoder.int.h. */
6837  switch ((opcode)op) {
6838    T(DOUBLE) T(FLOAT) T(INT64) T(UINT64) T(INT32) T(FIXED64) T(FIXED32)
6839    T(BOOL) T(UINT32) T(SFIXED32) T(SFIXED64) T(SINT32) T(SINT64)
6840    OP(STARTMSG) OP(ENDMSG) OP(STARTSEQ) OP(ENDSEQ) OP(STARTSUBMSG)
6841    OP(ENDSUBMSG) OP(STARTSTR) OP(STRING) OP(ENDSTR) OP(CALL) OP(RET)
6842    OP(PUSHLENDELIM) OP(PUSHTAGDELIM) OP(SETDELIM) OP(CHECKDELIM)
6843    OP(BRANCH) OP(TAG1) OP(TAG2) OP(TAGN) OP(SETDISPATCH) OP(POP)
6844    OP(SETBIGGROUPNUM) OP(DISPATCH) OP(HALT)
6845  }
6846  return "<unknown op>";
6847#undef OP
6848#undef T
6849}
6850
6851#endif
6852
6853#ifdef UPB_DUMP_BYTECODE
6854
6855static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) {
6856
6857  uint32_t *begin = p;
6858
6859  while (p < end) {
6860    fprintf(f, "%p  %8tx", p, p - begin);
6861    uint32_t instr = *p++;
6862    uint8_t op = getop(instr);
6863    fprintf(f, " %s", upb_pbdecoder_getopname(op));
6864    switch ((opcode)op) {
6865      case OP_SETDISPATCH: {
6866        const upb_inttable *dispatch;
6867        memcpy(&dispatch, p, sizeof(void*));
6868        p += ptr_words;
6869        const upb_pbdecodermethod *method =
6870            (void *)((char *)dispatch -
6871                     offsetof(upb_pbdecodermethod, dispatch));
6872        fprintf(f, " %s", upb_msgdef_fullname(
6873                              upb_handlers_msgdef(method->dest_handlers_)));
6874        break;
6875      }
6876      case OP_DISPATCH:
6877      case OP_STARTMSG:
6878      case OP_ENDMSG:
6879      case OP_PUSHLENDELIM:
6880      case OP_POP:
6881      case OP_SETDELIM:
6882      case OP_HALT:
6883      case OP_RET:
6884        break;
6885      case OP_PARSE_DOUBLE:
6886      case OP_PARSE_FLOAT:
6887      case OP_PARSE_INT64:
6888      case OP_PARSE_UINT64:
6889      case OP_PARSE_INT32:
6890      case OP_PARSE_FIXED64:
6891      case OP_PARSE_FIXED32:
6892      case OP_PARSE_BOOL:
6893      case OP_PARSE_UINT32:
6894      case OP_PARSE_SFIXED32:
6895      case OP_PARSE_SFIXED64:
6896      case OP_PARSE_SINT32:
6897      case OP_PARSE_SINT64:
6898      case OP_STARTSEQ:
6899      case OP_ENDSEQ:
6900      case OP_STARTSUBMSG:
6901      case OP_ENDSUBMSG:
6902      case OP_STARTSTR:
6903      case OP_STRING:
6904      case OP_ENDSTR:
6905      case OP_PUSHTAGDELIM:
6906        fprintf(f, " %d", instr >> 8);
6907        break;
6908      case OP_SETBIGGROUPNUM:
6909        fprintf(f, " %d", *p++);
6910        break;
6911      case OP_CHECKDELIM:
6912      case OP_CALL:
6913      case OP_BRANCH:
6914        fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
6915        break;
6916      case OP_TAG1:
6917      case OP_TAG2: {
6918        fprintf(f, " tag:0x%x", instr >> 16);
6919        if (getofs(instr)) {
6920          fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
6921        }
6922        break;
6923      }
6924      case OP_TAGN: {
6925        uint64_t tag = *p++;
6926        tag |= (uint64_t)*p++ << 32;
6927        fprintf(f, " tag:0x%llx", (long long)tag);
6928        fprintf(f, " n:%d", instr >> 16);
6929        if (getofs(instr)) {
6930          fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
6931        }
6932        break;
6933      }
6934    }
6935    fputs("\n", f);
6936  }
6937}
6938
6939#endif
6940
6941static uint64_t get_encoded_tag(const upb_fielddef *f, int wire_type) {
6942  uint32_t tag = (upb_fielddef_number(f) << 3) | wire_type;
6943  uint64_t encoded_tag = upb_vencode32(tag);
6944  /* No tag should be greater than 5 bytes. */
6945  assert(encoded_tag <= 0xffffffffff);
6946  return encoded_tag;
6947}
6948
6949static void putchecktag(compiler *c, const upb_fielddef *f,
6950                        int wire_type, int dest) {
6951  uint64_t tag = get_encoded_tag(f, wire_type);
6952  switch (upb_value_size(tag)) {
6953    case 1:
6954      putop(c, OP_TAG1, dest, tag);
6955      break;
6956    case 2:
6957      putop(c, OP_TAG2, dest, tag);
6958      break;
6959    default:
6960      putop(c, OP_TAGN, dest, tag);
6961      break;
6962  }
6963}
6964
6965static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) {
6966  upb_selector_t selector;
6967  bool ok = upb_handlers_getselector(f, type, &selector);
6968  UPB_ASSERT_VAR(ok, ok);
6969  return selector;
6970}
6971
6972/* Takes an existing, primary dispatch table entry and repacks it with a
6973 * different alternate wire type.  Called when we are inserting a secondary
6974 * dispatch table entry for an alternate wire type. */
6975static uint64_t repack(uint64_t dispatch, int new_wt2) {
6976  uint64_t ofs;
6977  uint8_t wt1;
6978  uint8_t old_wt2;
6979  upb_pbdecoder_unpackdispatch(dispatch, &ofs, &wt1, &old_wt2);
6980  assert(old_wt2 == NO_WIRE_TYPE);  /* wt2 should not be set yet. */
6981  return upb_pbdecoder_packdispatch(ofs, wt1, new_wt2);
6982}
6983
6984/* Marks the current bytecode position as the dispatch target for this message,
6985 * field, and wire type. */
6986static void dispatchtarget(compiler *c, upb_pbdecodermethod *method,
6987                           const upb_fielddef *f, int wire_type) {
6988  /* Offset is relative to msg base. */
6989  uint64_t ofs = pcofs(c) - method->code_base.ofs;
6990  uint32_t fn = upb_fielddef_number(f);
6991  upb_inttable *d = &method->dispatch;
6992  upb_value v;
6993  if (upb_inttable_remove(d, fn, &v)) {
6994    /* TODO: prioritize based on packed setting in .proto file. */
6995    uint64_t repacked = repack(upb_value_getuint64(v), wire_type);
6996    upb_inttable_insert(d, fn, upb_value_uint64(repacked));
6997    upb_inttable_insert(d, fn + UPB_MAX_FIELDNUMBER, upb_value_uint64(ofs));
6998  } else {
6999    uint64_t val = upb_pbdecoder_packdispatch(ofs, wire_type, NO_WIRE_TYPE);
7000    upb_inttable_insert(d, fn, upb_value_uint64(val));
7001  }
7002}
7003
7004static void putpush(compiler *c, const upb_fielddef *f) {
7005  if (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE) {
7006    putop(c, OP_PUSHLENDELIM);
7007  } else {
7008    uint32_t fn = upb_fielddef_number(f);
7009    if (fn >= 1 << 24) {
7010      putop(c, OP_PUSHTAGDELIM, 0);
7011      putop(c, OP_SETBIGGROUPNUM, fn);
7012    } else {
7013      putop(c, OP_PUSHTAGDELIM, fn);
7014    }
7015  }
7016}
7017
7018static upb_pbdecodermethod *find_submethod(const compiler *c,
7019                                           const upb_pbdecodermethod *method,
7020                                           const upb_fielddef *f) {
7021  const upb_handlers *sub =
7022      upb_handlers_getsubhandlers(method->dest_handlers_, f);
7023  upb_value v;
7024  return upb_inttable_lookupptr(&c->group->methods, sub, &v)
7025             ? upb_value_getptr(v)
7026             : NULL;
7027}
7028
7029static void putsel(compiler *c, opcode op, upb_selector_t sel,
7030                   const upb_handlers *h) {
7031  if (upb_handlers_gethandler(h, sel)) {
7032    putop(c, op, sel);
7033  }
7034}
7035
7036/* Puts an opcode to call a callback, but only if a callback actually exists for
7037 * this field and handler type. */
7038static void maybeput(compiler *c, opcode op, const upb_handlers *h,
7039                     const upb_fielddef *f, upb_handlertype_t type) {
7040  putsel(c, op, getsel(f, type), h);
7041}
7042
7043static bool haslazyhandlers(const upb_handlers *h, const upb_fielddef *f) {
7044  if (!upb_fielddef_lazy(f))
7045    return false;
7046
7047  return upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STARTSTR)) ||
7048         upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STRING)) ||
7049         upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_ENDSTR));
7050}
7051
7052
7053/* bytecode compiler code generation ******************************************/
7054
7055/* Symbolic names for our local labels. */
7056#define LABEL_LOOPSTART 1  /* Top of a repeated field loop. */
7057#define LABEL_LOOPBREAK 2  /* To jump out of a repeated loop */
7058#define LABEL_FIELD     3  /* Jump backward to find the most recent field. */
7059#define LABEL_ENDMSG    4  /* To reach the OP_ENDMSG instr for this msg. */
7060
7061/* Generates bytecode to parse a single non-lazy message field. */
7062static void generate_msgfield(compiler *c, const upb_fielddef *f,
7063                              upb_pbdecodermethod *method) {
7064  const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
7065  const upb_pbdecodermethod *sub_m = find_submethod(c, method, f);
7066  int wire_type;
7067
7068  if (!sub_m) {
7069    /* Don't emit any code for this field at all; it will be parsed as an
7070     * unknown field. */
7071    return;
7072  }
7073
7074  label(c, LABEL_FIELD);
7075
7076  wire_type =
7077      (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE)
7078          ? UPB_WIRE_TYPE_DELIMITED
7079          : UPB_WIRE_TYPE_START_GROUP;
7080
7081  if (upb_fielddef_isseq(f)) {
7082    putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7083    putchecktag(c, f, wire_type, LABEL_DISPATCH);
7084   dispatchtarget(c, method, f, wire_type);
7085    putop(c, OP_PUSHTAGDELIM, 0);
7086    putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
7087   label(c, LABEL_LOOPSTART);
7088    putpush(c, f);
7089    putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
7090    putop(c, OP_CALL, sub_m);
7091    putop(c, OP_POP);
7092    maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
7093    if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
7094      putop(c, OP_SETDELIM);
7095    }
7096    putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
7097    putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
7098    putop(c, OP_BRANCH, -LABEL_LOOPSTART);
7099   label(c, LABEL_LOOPBREAK);
7100    putop(c, OP_POP);
7101    maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
7102  } else {
7103    putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7104    putchecktag(c, f, wire_type, LABEL_DISPATCH);
7105   dispatchtarget(c, method, f, wire_type);
7106    putpush(c, f);
7107    putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
7108    putop(c, OP_CALL, sub_m);
7109    putop(c, OP_POP);
7110    maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
7111    if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
7112      putop(c, OP_SETDELIM);
7113    }
7114  }
7115}
7116
7117/* Generates bytecode to parse a single string or lazy submessage field. */
7118static void generate_delimfield(compiler *c, const upb_fielddef *f,
7119                                upb_pbdecodermethod *method) {
7120  const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
7121
7122  label(c, LABEL_FIELD);
7123  if (upb_fielddef_isseq(f)) {
7124    putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7125    putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
7126   dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
7127    putop(c, OP_PUSHTAGDELIM, 0);
7128    putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
7129   label(c, LABEL_LOOPSTART);
7130    putop(c, OP_PUSHLENDELIM);
7131    putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
7132    /* Need to emit even if no handler to skip past the string. */
7133    putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
7134    putop(c, OP_POP);
7135    maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
7136    putop(c, OP_SETDELIM);
7137    putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
7138    putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_LOOPBREAK);
7139    putop(c, OP_BRANCH, -LABEL_LOOPSTART);
7140   label(c, LABEL_LOOPBREAK);
7141    putop(c, OP_POP);
7142    maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
7143  } else {
7144    putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7145    putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
7146   dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
7147    putop(c, OP_PUSHLENDELIM);
7148    putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
7149    putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
7150    putop(c, OP_POP);
7151    maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
7152    putop(c, OP_SETDELIM);
7153  }
7154}
7155
7156/* Generates bytecode to parse a single primitive field. */
7157static void generate_primitivefield(compiler *c, const upb_fielddef *f,
7158                                    upb_pbdecodermethod *method) {
7159  const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
7160  upb_descriptortype_t descriptor_type = upb_fielddef_descriptortype(f);
7161  opcode parse_type;
7162  upb_selector_t sel;
7163  int wire_type;
7164
7165  label(c, LABEL_FIELD);
7166
7167  /* From a decoding perspective, ENUM is the same as INT32. */
7168  if (descriptor_type == UPB_DESCRIPTOR_TYPE_ENUM)
7169    descriptor_type = UPB_DESCRIPTOR_TYPE_INT32;
7170
7171  parse_type = (opcode)descriptor_type;
7172
7173  /* TODO(haberman): generate packed or non-packed first depending on "packed"
7174   * setting in the fielddef.  This will favor (in speed) whichever was
7175   * specified. */
7176
7177  assert((int)parse_type >= 0 && parse_type <= OP_MAX);
7178  sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
7179  wire_type = upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
7180  if (upb_fielddef_isseq(f)) {
7181    putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7182    putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
7183   dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
7184    putop(c, OP_PUSHLENDELIM);
7185    putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));  /* Packed */
7186   label(c, LABEL_LOOPSTART);
7187    putop(c, parse_type, sel);
7188    putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
7189    putop(c, OP_BRANCH, -LABEL_LOOPSTART);
7190   dispatchtarget(c, method, f, wire_type);
7191    putop(c, OP_PUSHTAGDELIM, 0);
7192    putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));  /* Non-packed */
7193   label(c, LABEL_LOOPSTART);
7194    putop(c, parse_type, sel);
7195    putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
7196    putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
7197    putop(c, OP_BRANCH, -LABEL_LOOPSTART);
7198   label(c, LABEL_LOOPBREAK);
7199    putop(c, OP_POP);  /* Packed and non-packed join. */
7200    maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
7201    putop(c, OP_SETDELIM);  /* Could remove for non-packed by dup ENDSEQ. */
7202  } else {
7203    putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7204    putchecktag(c, f, wire_type, LABEL_DISPATCH);
7205   dispatchtarget(c, method, f, wire_type);
7206    putop(c, parse_type, sel);
7207  }
7208}
7209
7210/* Adds bytecode for parsing the given message to the given decoderplan,
7211 * while adding all dispatch targets to this message's dispatch table. */
7212static void compile_method(compiler *c, upb_pbdecodermethod *method) {
7213  const upb_handlers *h;
7214  const upb_msgdef *md;
7215  uint32_t* start_pc;
7216  upb_msg_field_iter i;
7217  upb_value val;
7218
7219  assert(method);
7220
7221  /* Clear all entries in the dispatch table. */
7222  upb_inttable_uninit(&method->dispatch);
7223  upb_inttable_init(&method->dispatch, UPB_CTYPE_UINT64);
7224
7225  h = upb_pbdecodermethod_desthandlers(method);
7226  md = upb_handlers_msgdef(h);
7227
7228 method->code_base.ofs = pcofs(c);
7229  putop(c, OP_SETDISPATCH, &method->dispatch);
7230  putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h);
7231 label(c, LABEL_FIELD);
7232  start_pc = c->pc;
7233  for(upb_msg_field_begin(&i, md);
7234      !upb_msg_field_done(&i);
7235      upb_msg_field_next(&i)) {
7236    const upb_fielddef *f = upb_msg_iter_field(&i);
7237    upb_fieldtype_t type = upb_fielddef_type(f);
7238
7239    if (type == UPB_TYPE_MESSAGE && !(haslazyhandlers(h, f) && c->lazy)) {
7240      generate_msgfield(c, f, method);
7241    } else if (type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES ||
7242               type == UPB_TYPE_MESSAGE) {
7243      generate_delimfield(c, f, method);
7244    } else {
7245      generate_primitivefield(c, f, method);
7246    }
7247  }
7248
7249  /* If there were no fields, or if no handlers were defined, we need to
7250   * generate a non-empty loop body so that we can at least dispatch for unknown
7251   * fields and check for the end of the message. */
7252  if (c->pc == start_pc) {
7253    /* Check for end-of-message. */
7254    putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7255    /* Unconditionally dispatch. */
7256    putop(c, OP_DISPATCH, 0);
7257  }
7258
7259  /* For now we just loop back to the last field of the message (or if none,
7260   * the DISPATCH opcode for the message). */
7261  putop(c, OP_BRANCH, -LABEL_FIELD);
7262
7263  /* Insert both a label and a dispatch table entry for this end-of-msg. */
7264 label(c, LABEL_ENDMSG);
7265  val = upb_value_uint64(pcofs(c) - method->code_base.ofs);
7266  upb_inttable_insert(&method->dispatch, DISPATCH_ENDMSG, val);
7267
7268  putsel(c, OP_ENDMSG, UPB_ENDMSG_SELECTOR, h);
7269  putop(c, OP_RET);
7270
7271  upb_inttable_compact(&method->dispatch);
7272}
7273
7274/* Populate "methods" with new upb_pbdecodermethod objects reachable from "h".
7275 * Returns the method for these handlers.
7276 *
7277 * Generates a new method for every destination handlers reachable from "h". */
7278static void find_methods(compiler *c, const upb_handlers *h) {
7279  upb_value v;
7280  upb_msg_field_iter i;
7281  const upb_msgdef *md;
7282
7283  if (upb_inttable_lookupptr(&c->group->methods, h, &v))
7284    return;
7285  newmethod(h, c->group);
7286
7287  /* Find submethods. */
7288  md = upb_handlers_msgdef(h);
7289  for(upb_msg_field_begin(&i, md);
7290      !upb_msg_field_done(&i);
7291      upb_msg_field_next(&i)) {
7292    const upb_fielddef *f = upb_msg_iter_field(&i);
7293    const upb_handlers *sub_h;
7294    if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
7295        (sub_h = upb_handlers_getsubhandlers(h, f)) != NULL) {
7296      /* We only generate a decoder method for submessages with handlers.
7297       * Others will be parsed as unknown fields. */
7298      find_methods(c, sub_h);
7299    }
7300  }
7301}
7302
7303/* (Re-)compile bytecode for all messages in "msgs."
7304 * Overwrites any existing bytecode in "c". */
7305static void compile_methods(compiler *c) {
7306  upb_inttable_iter i;
7307
7308  /* Start over at the beginning of the bytecode. */
7309  c->pc = c->group->bytecode;
7310
7311  upb_inttable_begin(&i, &c->group->methods);
7312  for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
7313    upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
7314    compile_method(c, method);
7315  }
7316}
7317
7318static void set_bytecode_handlers(mgroup *g) {
7319  upb_inttable_iter i;
7320  upb_inttable_begin(&i, &g->methods);
7321  for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
7322    upb_pbdecodermethod *m = upb_value_getptr(upb_inttable_iter_value(&i));
7323    upb_byteshandler *h = &m->input_handler_;
7324
7325    m->code_base.ptr = g->bytecode + m->code_base.ofs;
7326
7327    upb_byteshandler_setstartstr(h, upb_pbdecoder_startbc, m->code_base.ptr);
7328    upb_byteshandler_setstring(h, upb_pbdecoder_decode, g);
7329    upb_byteshandler_setendstr(h, upb_pbdecoder_end, m);
7330  }
7331}
7332
7333
7334/* JIT setup. *****************************************************************/
7335
7336#ifdef UPB_USE_JIT_X64
7337
7338static void sethandlers(mgroup *g, bool allowjit) {
7339  g->jit_code = NULL;
7340  if (allowjit) {
7341    /* Compile byte-code into machine code, create handlers. */
7342    upb_pbdecoder_jit(g);
7343  } else {
7344    set_bytecode_handlers(g);
7345  }
7346}
7347
7348#else  /* UPB_USE_JIT_X64 */
7349
7350static void sethandlers(mgroup *g, bool allowjit) {
7351  /* No JIT compiled in; use bytecode handlers unconditionally. */
7352  UPB_UNUSED(allowjit);
7353  set_bytecode_handlers(g);
7354}
7355
7356#endif  /* UPB_USE_JIT_X64 */
7357
7358
7359/* TODO(haberman): allow this to be constructed for an arbitrary set of dest
7360 * handlers and other mgroups (but verify we have a transitive closure). */
7361const mgroup *mgroup_new(const upb_handlers *dest, bool allowjit, bool lazy,
7362                         const void *owner) {
7363  mgroup *g;
7364  compiler *c;
7365
7366  UPB_UNUSED(allowjit);
7367  assert(upb_handlers_isfrozen(dest));
7368
7369  g = newgroup(owner);
7370  c = newcompiler(g, lazy);
7371  find_methods(c, dest);
7372
7373  /* We compile in two passes:
7374   * 1. all messages are assigned relative offsets from the beginning of the
7375   *    bytecode (saved in method->code_base).
7376   * 2. forwards OP_CALL instructions can be correctly linked since message
7377   *    offsets have been previously assigned.
7378   *
7379   * Could avoid the second pass by linking OP_CALL instructions somehow. */
7380  compile_methods(c);
7381  compile_methods(c);
7382  g->bytecode_end = c->pc;
7383  freecompiler(c);
7384
7385#ifdef UPB_DUMP_BYTECODE
7386  {
7387    FILE *f = fopen("/tmp/upb-bytecode", "wb");
7388    assert(f);
7389    dumpbc(g->bytecode, g->bytecode_end, stderr);
7390    dumpbc(g->bytecode, g->bytecode_end, f);
7391    fclose(f);
7392  }
7393#endif
7394
7395  sethandlers(g, allowjit);
7396  return g;
7397}
7398
7399
7400/* upb_pbcodecache ************************************************************/
7401
7402void upb_pbcodecache_init(upb_pbcodecache *c) {
7403  upb_inttable_init(&c->groups, UPB_CTYPE_CONSTPTR);
7404  c->allow_jit_ = true;
7405}
7406
7407void upb_pbcodecache_uninit(upb_pbcodecache *c) {
7408  upb_inttable_iter i;
7409  upb_inttable_begin(&i, &c->groups);
7410  for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
7411    const mgroup *group = upb_value_getconstptr(upb_inttable_iter_value(&i));
7412    mgroup_unref(group, c);
7413  }
7414  upb_inttable_uninit(&c->groups);
7415}
7416
7417bool upb_pbcodecache_allowjit(const upb_pbcodecache *c) {
7418  return c->allow_jit_;
7419}
7420
7421bool upb_pbcodecache_setallowjit(upb_pbcodecache *c, bool allow) {
7422  if (upb_inttable_count(&c->groups) > 0)
7423    return false;
7424  c->allow_jit_ = allow;
7425  return true;
7426}
7427
7428const upb_pbdecodermethod *upb_pbcodecache_getdecodermethod(
7429    upb_pbcodecache *c, const upb_pbdecodermethodopts *opts) {
7430  upb_value v;
7431  bool ok;
7432
7433  /* Right now we build a new DecoderMethod every time.
7434   * TODO(haberman): properly cache methods by their true key. */
7435  const mgroup *g = mgroup_new(opts->handlers, c->allow_jit_, opts->lazy, c);
7436  upb_inttable_push(&c->groups, upb_value_constptr(g));
7437
7438  ok = upb_inttable_lookupptr(&g->methods, opts->handlers, &v);
7439  UPB_ASSERT_VAR(ok, ok);
7440  return upb_value_getptr(v);
7441}
7442
7443
7444/* upb_pbdecodermethodopts ****************************************************/
7445
7446void upb_pbdecodermethodopts_init(upb_pbdecodermethodopts *opts,
7447                                  const upb_handlers *h) {
7448  opts->handlers = h;
7449  opts->lazy = false;
7450}
7451
7452void upb_pbdecodermethodopts_setlazy(upb_pbdecodermethodopts *opts, bool lazy) {
7453  opts->lazy = lazy;
7454}
7455/*
7456** upb::Decoder (Bytecode Decoder VM)
7457**
7458** Bytecode must previously have been generated using the bytecode compiler in
7459** compile_decoder.c.  This decoder then walks through the bytecode op-by-op to
7460** parse the input.
7461**
7462** Decoding is fully resumable; we just keep a pointer to the current bytecode
7463** instruction and resume from there.  A fair amount of the logic here is to
7464** handle the fact that values can span buffer seams and we have to be able to
7465** be capable of suspending/resuming from any byte in the stream.  This
7466** sometimes requires keeping a few trailing bytes from the last buffer around
7467** in the "residual" buffer.
7468*/
7469
7470#include <inttypes.h>
7471#include <stddef.h>
7472
7473#ifdef UPB_DUMP_BYTECODE
7474#include <stdio.h>
7475#endif
7476
7477#define CHECK_SUSPEND(x) if (!(x)) return upb_pbdecoder_suspend(d);
7478
7479/* Error messages that are shared between the bytecode and JIT decoders. */
7480const char *kPbDecoderStackOverflow = "Nesting too deep.";
7481const char *kPbDecoderSubmessageTooLong =
7482    "Submessage end extends past enclosing submessage.";
7483
7484/* Error messages shared within this file. */
7485static const char *kUnterminatedVarint = "Unterminated varint.";
7486
7487/* upb_pbdecoder **************************************************************/
7488
7489static opcode halt = OP_HALT;
7490
7491/* Whether an op consumes any of the input buffer. */
7492static bool consumes_input(opcode op) {
7493  switch (op) {
7494    case OP_SETDISPATCH:
7495    case OP_STARTMSG:
7496    case OP_ENDMSG:
7497    case OP_STARTSEQ:
7498    case OP_ENDSEQ:
7499    case OP_STARTSUBMSG:
7500    case OP_ENDSUBMSG:
7501    case OP_STARTSTR:
7502    case OP_ENDSTR:
7503    case OP_PUSHTAGDELIM:
7504    case OP_POP:
7505    case OP_SETDELIM:
7506    case OP_SETBIGGROUPNUM:
7507    case OP_CHECKDELIM:
7508    case OP_CALL:
7509    case OP_RET:
7510    case OP_BRANCH:
7511      return false;
7512    default:
7513      return true;
7514  }
7515}
7516
7517static size_t stacksize(upb_pbdecoder *d, size_t entries) {
7518  UPB_UNUSED(d);
7519  return entries * sizeof(upb_pbdecoder_frame);
7520}
7521
7522static size_t callstacksize(upb_pbdecoder *d, size_t entries) {
7523  UPB_UNUSED(d);
7524
7525#ifdef UPB_USE_JIT_X64
7526  if (d->method_->is_native_) {
7527    /* Each native stack frame needs two pointers, plus we need a few frames for
7528     * the enter/exit trampolines. */
7529    size_t ret = entries * sizeof(void*) * 2;
7530    ret += sizeof(void*) * 10;
7531    return ret;
7532  }
7533#endif
7534
7535  return entries * sizeof(uint32_t*);
7536}
7537
7538
7539static bool in_residual_buf(const upb_pbdecoder *d, const char *p);
7540
7541/* It's unfortunate that we have to micro-manage the compiler with
7542 * UPB_FORCEINLINE and UPB_NOINLINE, especially since this tuning is necessarily
7543 * specific to one hardware configuration.  But empirically on a Core i7,
7544 * performance increases 30-50% with these annotations.  Every instance where
7545 * these appear, gcc 4.2.1 made the wrong decision and degraded performance in
7546 * benchmarks. */
7547
7548static void seterr(upb_pbdecoder *d, const char *msg) {
7549  upb_status status = UPB_STATUS_INIT;
7550  upb_status_seterrmsg(&status, msg);
7551  upb_env_reporterror(d->env, &status);
7552}
7553
7554void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg) {
7555  seterr(d, msg);
7556}
7557
7558
7559/* Buffering ******************************************************************/
7560
7561/* We operate on one buffer at a time, which is either the user's buffer passed
7562 * to our "decode" callback or some residual bytes from the previous buffer. */
7563
7564/* How many bytes can be safely read from d->ptr without reading past end-of-buf
7565 * or past the current delimited end. */
7566static size_t curbufleft(const upb_pbdecoder *d) {
7567  assert(d->data_end >= d->ptr);
7568  return d->data_end - d->ptr;
7569}
7570
7571/* How many bytes are available before end-of-buffer. */
7572static size_t bufleft(const upb_pbdecoder *d) {
7573  return d->end - d->ptr;
7574}
7575
7576/* Overall stream offset of d->ptr. */
7577uint64_t offset(const upb_pbdecoder *d) {
7578  return d->bufstart_ofs + (d->ptr - d->buf);
7579}
7580
7581/* How many bytes are available before the end of this delimited region. */
7582size_t delim_remaining(const upb_pbdecoder *d) {
7583  return d->top->end_ofs - offset(d);
7584}
7585
7586/* Advances d->ptr. */
7587static void advance(upb_pbdecoder *d, size_t len) {
7588  assert(curbufleft(d) >= len);
7589  d->ptr += len;
7590}
7591
7592static bool in_buf(const char *p, const char *buf, const char *end) {
7593  return p >= buf && p <= end;
7594}
7595
7596static bool in_residual_buf(const upb_pbdecoder *d, const char *p) {
7597  return in_buf(p, d->residual, d->residual_end);
7598}
7599
7600/* Calculates the delim_end value, which is affected by both the current buffer
7601 * and the parsing stack, so must be called whenever either is updated. */
7602static void set_delim_end(upb_pbdecoder *d) {
7603  size_t delim_ofs = d->top->end_ofs - d->bufstart_ofs;
7604  if (delim_ofs <= (size_t)(d->end - d->buf)) {
7605    d->delim_end = d->buf + delim_ofs;
7606    d->data_end = d->delim_end;
7607  } else {
7608    d->data_end = d->end;
7609    d->delim_end = NULL;
7610  }
7611}
7612
7613static void switchtobuf(upb_pbdecoder *d, const char *buf, const char *end) {
7614  d->ptr = buf;
7615  d->buf = buf;
7616  d->end = end;
7617  set_delim_end(d);
7618}
7619
7620static void advancetobuf(upb_pbdecoder *d, const char *buf, size_t len) {
7621  assert(curbufleft(d) == 0);
7622  d->bufstart_ofs += (d->end - d->buf);
7623  switchtobuf(d, buf, buf + len);
7624}
7625
7626static void checkpoint(upb_pbdecoder *d) {
7627  /* The assertion here is in the interests of efficiency, not correctness.
7628   * We are trying to ensure that we don't checkpoint() more often than
7629   * necessary. */
7630  assert(d->checkpoint != d->ptr);
7631  d->checkpoint = d->ptr;
7632}
7633
7634/* Skips "bytes" bytes in the stream, which may be more than available.  If we
7635 * skip more bytes than are available, we return a long read count to the caller
7636 * indicating how many bytes can be skipped over before passing actual data
7637 * again.  Skipped bytes can pass a NULL buffer and the decoder guarantees they
7638 * won't actually be read.
7639 */
7640static int32_t skip(upb_pbdecoder *d, size_t bytes) {
7641  assert(!in_residual_buf(d, d->ptr) || d->size_param == 0);
7642  assert(d->skip == 0);
7643  if (bytes > delim_remaining(d)) {
7644    seterr(d, "Skipped value extended beyond enclosing submessage.");
7645    return upb_pbdecoder_suspend(d);
7646  } else if (bufleft(d) > bytes) {
7647    /* Skipped data is all in current buffer, and more is still available. */
7648    advance(d, bytes);
7649    d->skip = 0;
7650    return DECODE_OK;
7651  } else {
7652    /* Skipped data extends beyond currently available buffers. */
7653    d->pc = d->last;
7654    d->skip = bytes - curbufleft(d);
7655    d->bufstart_ofs += (d->end - d->buf);
7656    d->residual_end = d->residual;
7657    switchtobuf(d, d->residual, d->residual_end);
7658    return d->size_param + d->skip;
7659  }
7660}
7661
7662
7663/* Resumes the decoder from an initial state or from a previous suspend. */
7664int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
7665                             size_t size, const upb_bufhandle *handle) {
7666  UPB_UNUSED(p);  /* Useless; just for the benefit of the JIT. */
7667
7668  d->buf_param = buf;
7669  d->size_param = size;
7670  d->handle = handle;
7671
7672  if (d->residual_end > d->residual) {
7673    /* We have residual bytes from the last buffer. */
7674    assert(d->ptr == d->residual);
7675  } else {
7676    switchtobuf(d, buf, buf + size);
7677  }
7678
7679  d->checkpoint = d->ptr;
7680
7681  if (d->skip) {
7682    size_t skip_bytes = d->skip;
7683    d->skip = 0;
7684    CHECK_RETURN(skip(d, skip_bytes));
7685    d->checkpoint = d->ptr;
7686  }
7687
7688  if (!buf) {
7689    /* NULL buf is ok if its entire span is covered by the "skip" above, but
7690     * by this point we know that "skip" doesn't cover the buffer. */
7691    seterr(d, "Passed NULL buffer over non-skippable region.");
7692    return upb_pbdecoder_suspend(d);
7693  }
7694
7695  if (d->top->groupnum < 0) {
7696    CHECK_RETURN(upb_pbdecoder_skipunknown(d, -1, 0));
7697    d->checkpoint = d->ptr;
7698  }
7699
7700  return DECODE_OK;
7701}
7702
7703/* Suspends the decoder at the last checkpoint, without saving any residual
7704 * bytes.  If there are any unconsumed bytes, returns a short byte count. */
7705size_t upb_pbdecoder_suspend(upb_pbdecoder *d) {
7706  d->pc = d->last;
7707  if (d->checkpoint == d->residual) {
7708    /* Checkpoint was in residual buf; no user bytes were consumed. */
7709    d->ptr = d->residual;
7710    return 0;
7711  } else {
7712    size_t consumed;
7713    assert(!in_residual_buf(d, d->checkpoint));
7714    assert(d->buf == d->buf_param);
7715
7716    consumed = d->checkpoint - d->buf;
7717    d->bufstart_ofs += consumed;
7718    d->residual_end = d->residual;
7719    switchtobuf(d, d->residual, d->residual_end);
7720    return consumed;
7721  }
7722}
7723
7724/* Suspends the decoder at the last checkpoint, and saves any unconsumed
7725 * bytes in our residual buffer.  This is necessary if we need more user
7726 * bytes to form a complete value, which might not be contiguous in the
7727 * user's buffers.  Always consumes all user bytes. */
7728static size_t suspend_save(upb_pbdecoder *d) {
7729  /* We hit end-of-buffer before we could parse a full value.
7730   * Save any unconsumed bytes (if any) to the residual buffer. */
7731  d->pc = d->last;
7732
7733  if (d->checkpoint == d->residual) {
7734    /* Checkpoint was in residual buf; append user byte(s) to residual buf. */
7735    assert((d->residual_end - d->residual) + d->size_param <=
7736           sizeof(d->residual));
7737    if (!in_residual_buf(d, d->ptr)) {
7738      d->bufstart_ofs -= (d->residual_end - d->residual);
7739    }
7740    memcpy(d->residual_end, d->buf_param, d->size_param);
7741    d->residual_end += d->size_param;
7742  } else {
7743    /* Checkpoint was in user buf; old residual bytes not needed. */
7744    size_t save;
7745    assert(!in_residual_buf(d, d->checkpoint));
7746
7747    d->ptr = d->checkpoint;
7748    save = curbufleft(d);
7749    assert(save <= sizeof(d->residual));
7750    memcpy(d->residual, d->ptr, save);
7751    d->residual_end = d->residual + save;
7752    d->bufstart_ofs = offset(d);
7753  }
7754
7755  switchtobuf(d, d->residual, d->residual_end);
7756  return d->size_param;
7757}
7758
7759/* Copies the next "bytes" bytes into "buf" and advances the stream.
7760 * Requires that this many bytes are available in the current buffer. */
7761UPB_FORCEINLINE static void consumebytes(upb_pbdecoder *d, void *buf,
7762                                         size_t bytes) {
7763  assert(bytes <= curbufleft(d));
7764  memcpy(buf, d->ptr, bytes);
7765  advance(d, bytes);
7766}
7767
7768/* Slow path for getting the next "bytes" bytes, regardless of whether they are
7769 * available in the current buffer or not.  Returns a status code as described
7770 * in decoder.int.h. */
7771UPB_NOINLINE static int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
7772                                          size_t bytes) {
7773  const size_t avail = curbufleft(d);
7774  consumebytes(d, buf, avail);
7775  bytes -= avail;
7776  assert(bytes > 0);
7777  if (in_residual_buf(d, d->ptr)) {
7778    advancetobuf(d, d->buf_param, d->size_param);
7779  }
7780  if (curbufleft(d) >= bytes) {
7781    consumebytes(d, (char *)buf + avail, bytes);
7782    return DECODE_OK;
7783  } else if (d->data_end == d->delim_end) {
7784    seterr(d, "Submessage ended in the middle of a value or group");
7785    return upb_pbdecoder_suspend(d);
7786  } else {
7787    return suspend_save(d);
7788  }
7789}
7790
7791/* Gets the next "bytes" bytes, regardless of whether they are available in the
7792 * current buffer or not.  Returns a status code as described in decoder.int.h.
7793 */
7794UPB_FORCEINLINE static int32_t getbytes(upb_pbdecoder *d, void *buf,
7795                                        size_t bytes) {
7796  if (curbufleft(d) >= bytes) {
7797    /* Buffer has enough data to satisfy. */
7798    consumebytes(d, buf, bytes);
7799    return DECODE_OK;
7800  } else {
7801    return getbytes_slow(d, buf, bytes);
7802  }
7803}
7804
7805UPB_NOINLINE static size_t peekbytes_slow(upb_pbdecoder *d, void *buf,
7806                                          size_t bytes) {
7807  size_t ret = curbufleft(d);
7808  memcpy(buf, d->ptr, ret);
7809  if (in_residual_buf(d, d->ptr)) {
7810    size_t copy = UPB_MIN(bytes - ret, d->size_param);
7811    memcpy((char *)buf + ret, d->buf_param, copy);
7812    ret += copy;
7813  }
7814  return ret;
7815}
7816
7817UPB_FORCEINLINE static size_t peekbytes(upb_pbdecoder *d, void *buf,
7818                                        size_t bytes) {
7819  if (curbufleft(d) >= bytes) {
7820    memcpy(buf, d->ptr, bytes);
7821    return bytes;
7822  } else {
7823    return peekbytes_slow(d, buf, bytes);
7824  }
7825}
7826
7827
7828/* Decoding of wire types *****************************************************/
7829
7830/* Slow path for decoding a varint from the current buffer position.
7831 * Returns a status code as described in decoder.int.h. */
7832UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
7833                                                      uint64_t *u64) {
7834  uint8_t byte = 0x80;
7835  int bitpos;
7836  *u64 = 0;
7837  for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
7838    int32_t ret = getbytes(d, &byte, 1);
7839    if (ret >= 0) return ret;
7840    *u64 |= (uint64_t)(byte & 0x7F) << bitpos;
7841  }
7842  if(bitpos == 70 && (byte & 0x80)) {
7843    seterr(d, kUnterminatedVarint);
7844    return upb_pbdecoder_suspend(d);
7845  }
7846  return DECODE_OK;
7847}
7848
7849/* Decodes a varint from the current buffer position.
7850 * Returns a status code as described in decoder.int.h. */
7851UPB_FORCEINLINE static int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) {
7852  if (curbufleft(d) > 0 && !(*d->ptr & 0x80)) {
7853    *u64 = *d->ptr;
7854    advance(d, 1);
7855    return DECODE_OK;
7856  } else if (curbufleft(d) >= 10) {
7857    /* Fast case. */
7858    upb_decoderet r = upb_vdecode_fast(d->ptr);
7859    if (r.p == NULL) {
7860      seterr(d, kUnterminatedVarint);
7861      return upb_pbdecoder_suspend(d);
7862    }
7863    advance(d, r.p - d->ptr);
7864    *u64 = r.val;
7865    return DECODE_OK;
7866  } else {
7867    /* Slow case -- varint spans buffer seam. */
7868    return upb_pbdecoder_decode_varint_slow(d, u64);
7869  }
7870}
7871
7872/* Decodes a 32-bit varint from the current buffer position.
7873 * Returns a status code as described in decoder.int.h. */
7874UPB_FORCEINLINE static int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) {
7875  uint64_t u64;
7876  int32_t ret = decode_varint(d, &u64);
7877  if (ret >= 0) return ret;
7878  if (u64 > UINT32_MAX) {
7879    seterr(d, "Unterminated 32-bit varint");
7880    /* TODO(haberman) guarantee that this function return is >= 0 somehow,
7881     * so we know this path will always be treated as error by our caller.
7882     * Right now the size_t -> int32_t can overflow and produce negative values.
7883     */
7884    *u32 = 0;
7885    return upb_pbdecoder_suspend(d);
7886  }
7887  *u32 = u64;
7888  return DECODE_OK;
7889}
7890
7891/* Decodes a fixed32 from the current buffer position.
7892 * Returns a status code as described in decoder.int.h.
7893 * TODO: proper byte swapping for big-endian machines. */
7894UPB_FORCEINLINE static int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) {
7895  return getbytes(d, u32, 4);
7896}
7897
7898/* Decodes a fixed64 from the current buffer position.
7899 * Returns a status code as described in decoder.int.h.
7900 * TODO: proper byte swapping for big-endian machines. */
7901UPB_FORCEINLINE static int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) {
7902  return getbytes(d, u64, 8);
7903}
7904
7905/* Non-static versions of the above functions.
7906 * These are called by the JIT for fallback paths. */
7907int32_t upb_pbdecoder_decode_f32(upb_pbdecoder *d, uint32_t *u32) {
7908  return decode_fixed32(d, u32);
7909}
7910
7911int32_t upb_pbdecoder_decode_f64(upb_pbdecoder *d, uint64_t *u64) {
7912  return decode_fixed64(d, u64);
7913}
7914
7915static double as_double(uint64_t n) { double d; memcpy(&d, &n, 8); return d; }
7916static float  as_float(uint32_t n)  { float  f; memcpy(&f, &n, 4); return f; }
7917
7918/* Pushes a frame onto the decoder stack. */
7919static bool decoder_push(upb_pbdecoder *d, uint64_t end) {
7920  upb_pbdecoder_frame *fr = d->top;
7921
7922  if (end > fr->end_ofs) {
7923    seterr(d, kPbDecoderSubmessageTooLong);
7924    return false;
7925  } else if (fr == d->limit) {
7926    seterr(d, kPbDecoderStackOverflow);
7927    return false;
7928  }
7929
7930  fr++;
7931  fr->end_ofs = end;
7932  fr->dispatch = NULL;
7933  fr->groupnum = 0;
7934  d->top = fr;
7935  return true;
7936}
7937
7938static bool pushtagdelim(upb_pbdecoder *d, uint32_t arg) {
7939  /* While we expect to see an "end" tag (either ENDGROUP or a non-sequence
7940   * field number) prior to hitting any enclosing submessage end, pushing our
7941   * existing delim end prevents us from continuing to parse values from a
7942   * corrupt proto that doesn't give us an END tag in time. */
7943  if (!decoder_push(d, d->top->end_ofs))
7944    return false;
7945  d->top->groupnum = arg;
7946  return true;
7947}
7948
7949/* Pops a frame from the decoder stack. */
7950static void decoder_pop(upb_pbdecoder *d) { d->top--; }
7951
7952UPB_NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d,
7953                                                 uint64_t expected) {
7954  uint64_t data = 0;
7955  size_t bytes = upb_value_size(expected);
7956  size_t read = peekbytes(d, &data, bytes);
7957  if (read == bytes && data == expected) {
7958    /* Advance past matched bytes. */
7959    int32_t ok = getbytes(d, &data, read);
7960    UPB_ASSERT_VAR(ok, ok < 0);
7961    return DECODE_OK;
7962  } else if (read < bytes && memcmp(&data, &expected, read) == 0) {
7963    return suspend_save(d);
7964  } else {
7965    return DECODE_MISMATCH;
7966  }
7967}
7968
7969int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, int32_t fieldnum,
7970                                  uint8_t wire_type) {
7971  if (fieldnum >= 0)
7972    goto have_tag;
7973
7974  while (true) {
7975    uint32_t tag;
7976    CHECK_RETURN(decode_v32(d, &tag));
7977    wire_type = tag & 0x7;
7978    fieldnum = tag >> 3;
7979
7980have_tag:
7981    if (fieldnum == 0) {
7982      seterr(d, "Saw invalid field number (0)");
7983      return upb_pbdecoder_suspend(d);
7984    }
7985
7986    /* TODO: deliver to unknown field callback. */
7987    switch (wire_type) {
7988      case UPB_WIRE_TYPE_32BIT:
7989        CHECK_RETURN(skip(d, 4));
7990        break;
7991      case UPB_WIRE_TYPE_64BIT:
7992        CHECK_RETURN(skip(d, 8));
7993        break;
7994      case UPB_WIRE_TYPE_VARINT: {
7995        uint64_t u64;
7996        CHECK_RETURN(decode_varint(d, &u64));
7997        break;
7998      }
7999      case UPB_WIRE_TYPE_DELIMITED: {
8000        uint32_t len;
8001        CHECK_RETURN(decode_v32(d, &len));
8002        CHECK_RETURN(skip(d, len));
8003        break;
8004      }
8005      case UPB_WIRE_TYPE_START_GROUP:
8006        CHECK_SUSPEND(pushtagdelim(d, -fieldnum));
8007        break;
8008      case UPB_WIRE_TYPE_END_GROUP:
8009        if (fieldnum == -d->top->groupnum) {
8010          decoder_pop(d);
8011        } else if (fieldnum == d->top->groupnum) {
8012          return DECODE_ENDGROUP;
8013        } else {
8014          seterr(d, "Unmatched ENDGROUP tag.");
8015          return upb_pbdecoder_suspend(d);
8016        }
8017        break;
8018      default:
8019        seterr(d, "Invalid wire type");
8020        return upb_pbdecoder_suspend(d);
8021    }
8022
8023    if (d->top->groupnum >= 0) {
8024      return DECODE_OK;
8025    }
8026
8027    /* Unknown group -- continue looping over unknown fields. */
8028    checkpoint(d);
8029  }
8030}
8031
8032static void goto_endmsg(upb_pbdecoder *d) {
8033  upb_value v;
8034  bool found = upb_inttable_lookup32(d->top->dispatch, DISPATCH_ENDMSG, &v);
8035  UPB_ASSERT_VAR(found, found);
8036  d->pc = d->top->base + upb_value_getuint64(v);
8037}
8038
8039/* Parses a tag and jumps to the corresponding bytecode instruction for this
8040 * field.
8041 *
8042 * If the tag is unknown (or the wire type doesn't match), parses the field as
8043 * unknown.  If the tag is a valid ENDGROUP tag, jumps to the bytecode
8044 * instruction for the end of message. */
8045static int32_t dispatch(upb_pbdecoder *d) {
8046  upb_inttable *dispatch = d->top->dispatch;
8047  uint32_t tag;
8048  uint8_t wire_type;
8049  uint32_t fieldnum;
8050  upb_value val;
8051  int32_t retval;
8052
8053  /* Decode tag. */
8054  CHECK_RETURN(decode_v32(d, &tag));
8055  wire_type = tag & 0x7;
8056  fieldnum = tag >> 3;
8057
8058  /* Lookup tag.  Because of packed/non-packed compatibility, we have to
8059   * check the wire type against two possibilities. */
8060  if (fieldnum != DISPATCH_ENDMSG &&
8061      upb_inttable_lookup32(dispatch, fieldnum, &val)) {
8062    uint64_t v = upb_value_getuint64(val);
8063    if (wire_type == (v & 0xff)) {
8064      d->pc = d->top->base + (v >> 16);
8065      return DECODE_OK;
8066    } else if (wire_type == ((v >> 8) & 0xff)) {
8067      bool found =
8068          upb_inttable_lookup(dispatch, fieldnum + UPB_MAX_FIELDNUMBER, &val);
8069      UPB_ASSERT_VAR(found, found);
8070      d->pc = d->top->base + upb_value_getuint64(val);
8071      return DECODE_OK;
8072    }
8073  }
8074
8075  /* We have some unknown fields (or ENDGROUP) to parse.  The DISPATCH or TAG
8076   * bytecode that triggered this is preceded by a CHECKDELIM bytecode which
8077   * we need to back up to, so that when we're done skipping unknown data we
8078   * can re-check the delimited end. */
8079  d->last--;  /* Necessary if we get suspended */
8080  d->pc = d->last;
8081  assert(getop(*d->last) == OP_CHECKDELIM);
8082
8083  /* Unknown field or ENDGROUP. */
8084  retval = upb_pbdecoder_skipunknown(d, fieldnum, wire_type);
8085
8086  CHECK_RETURN(retval);
8087
8088  if (retval == DECODE_ENDGROUP) {
8089    goto_endmsg(d);
8090    return DECODE_OK;
8091  }
8092
8093  return DECODE_OK;
8094}
8095
8096/* Callers know that the stack is more than one deep because the opcodes that
8097 * call this only occur after PUSH operations. */
8098upb_pbdecoder_frame *outer_frame(upb_pbdecoder *d) {
8099  assert(d->top != d->stack);
8100  return d->top - 1;
8101}
8102
8103
8104/* The main decoding loop *****************************************************/
8105
8106/* The main decoder VM function.  Uses traditional bytecode dispatch loop with a
8107 * switch() statement. */
8108size_t run_decoder_vm(upb_pbdecoder *d, const mgroup *group,
8109                      const upb_bufhandle* handle) {
8110
8111#define VMCASE(op, code) \
8112  case op: { code; if (consumes_input(op)) checkpoint(d); break; }
8113#define PRIMITIVE_OP(type, wt, name, convfunc, ctype) \
8114  VMCASE(OP_PARSE_ ## type, { \
8115    ctype val; \
8116    CHECK_RETURN(decode_ ## wt(d, &val)); \
8117    upb_sink_put ## name(&d->top->sink, arg, (convfunc)(val)); \
8118  })
8119
8120  while(1) {
8121    int32_t instruction;
8122    opcode op;
8123    uint32_t arg;
8124    int32_t longofs;
8125
8126    d->last = d->pc;
8127    instruction = *d->pc++;
8128    op = getop(instruction);
8129    arg = instruction >> 8;
8130    longofs = arg;
8131    assert(d->ptr != d->residual_end);
8132    UPB_UNUSED(group);
8133#ifdef UPB_DUMP_BYTECODE
8134    fprintf(stderr, "s_ofs=%d buf_ofs=%d data_rem=%d buf_rem=%d delim_rem=%d "
8135                    "%x %s (%d)\n",
8136            (int)offset(d),
8137            (int)(d->ptr - d->buf),
8138            (int)(d->data_end - d->ptr),
8139            (int)(d->end - d->ptr),
8140            (int)((d->top->end_ofs - d->bufstart_ofs) - (d->ptr - d->buf)),
8141            (int)(d->pc - 1 - group->bytecode),
8142            upb_pbdecoder_getopname(op),
8143            arg);
8144#endif
8145    switch (op) {
8146      /* Technically, we are losing data if we see a 32-bit varint that is not
8147       * properly sign-extended.  We could detect this and error about the data
8148       * loss, but proto2 does not do this, so we pass. */
8149      PRIMITIVE_OP(INT32,    varint,  int32,  int32_t,      uint64_t)
8150      PRIMITIVE_OP(INT64,    varint,  int64,  int64_t,      uint64_t)
8151      PRIMITIVE_OP(UINT32,   varint,  uint32, uint32_t,     uint64_t)
8152      PRIMITIVE_OP(UINT64,   varint,  uint64, uint64_t,     uint64_t)
8153      PRIMITIVE_OP(FIXED32,  fixed32, uint32, uint32_t,     uint32_t)
8154      PRIMITIVE_OP(FIXED64,  fixed64, uint64, uint64_t,     uint64_t)
8155      PRIMITIVE_OP(SFIXED32, fixed32, int32,  int32_t,      uint32_t)
8156      PRIMITIVE_OP(SFIXED64, fixed64, int64,  int64_t,      uint64_t)
8157      PRIMITIVE_OP(BOOL,     varint,  bool,   bool,         uint64_t)
8158      PRIMITIVE_OP(DOUBLE,   fixed64, double, as_double,    uint64_t)
8159      PRIMITIVE_OP(FLOAT,    fixed32, float,  as_float,     uint32_t)
8160      PRIMITIVE_OP(SINT32,   varint,  int32,  upb_zzdec_32, uint64_t)
8161      PRIMITIVE_OP(SINT64,   varint,  int64,  upb_zzdec_64, uint64_t)
8162
8163      VMCASE(OP_SETDISPATCH,
8164        d->top->base = d->pc - 1;
8165        memcpy(&d->top->dispatch, d->pc, sizeof(void*));
8166        d->pc += sizeof(void*) / sizeof(uint32_t);
8167      )
8168      VMCASE(OP_STARTMSG,
8169        CHECK_SUSPEND(upb_sink_startmsg(&d->top->sink));
8170      )
8171      VMCASE(OP_ENDMSG,
8172        CHECK_SUSPEND(upb_sink_endmsg(&d->top->sink, d->status));
8173      )
8174      VMCASE(OP_STARTSEQ,
8175        upb_pbdecoder_frame *outer = outer_frame(d);
8176        CHECK_SUSPEND(upb_sink_startseq(&outer->sink, arg, &d->top->sink));
8177      )
8178      VMCASE(OP_ENDSEQ,
8179        CHECK_SUSPEND(upb_sink_endseq(&d->top->sink, arg));
8180      )
8181      VMCASE(OP_STARTSUBMSG,
8182        upb_pbdecoder_frame *outer = outer_frame(d);
8183        CHECK_SUSPEND(upb_sink_startsubmsg(&outer->sink, arg, &d->top->sink));
8184      )
8185      VMCASE(OP_ENDSUBMSG,
8186        CHECK_SUSPEND(upb_sink_endsubmsg(&d->top->sink, arg));
8187      )
8188      VMCASE(OP_STARTSTR,
8189        uint32_t len = delim_remaining(d);
8190        upb_pbdecoder_frame *outer = outer_frame(d);
8191        CHECK_SUSPEND(upb_sink_startstr(&outer->sink, arg, len, &d->top->sink));
8192        if (len == 0) {
8193          d->pc++;  /* Skip OP_STRING. */
8194        }
8195      )
8196      VMCASE(OP_STRING,
8197        uint32_t len = curbufleft(d);
8198        size_t n = upb_sink_putstring(&d->top->sink, arg, d->ptr, len, handle);
8199        if (n > len) {
8200          if (n > delim_remaining(d)) {
8201            seterr(d, "Tried to skip past end of string.");
8202            return upb_pbdecoder_suspend(d);
8203          } else {
8204            int32_t ret = skip(d, n);
8205            /* This shouldn't return DECODE_OK, because n > len. */
8206            assert(ret >= 0);
8207            return ret;
8208          }
8209        }
8210        advance(d, n);
8211        if (n < len || d->delim_end == NULL) {
8212          /* We aren't finished with this string yet. */
8213          d->pc--;  /* Repeat OP_STRING. */
8214          if (n > 0) checkpoint(d);
8215          return upb_pbdecoder_suspend(d);
8216        }
8217      )
8218      VMCASE(OP_ENDSTR,
8219        CHECK_SUSPEND(upb_sink_endstr(&d->top->sink, arg));
8220      )
8221      VMCASE(OP_PUSHTAGDELIM,
8222        CHECK_SUSPEND(pushtagdelim(d, arg));
8223      )
8224      VMCASE(OP_SETBIGGROUPNUM,
8225        d->top->groupnum = *d->pc++;
8226      )
8227      VMCASE(OP_POP,
8228        assert(d->top > d->stack);
8229        decoder_pop(d);
8230      )
8231      VMCASE(OP_PUSHLENDELIM,
8232        uint32_t len;
8233        CHECK_RETURN(decode_v32(d, &len));
8234        CHECK_SUSPEND(decoder_push(d, offset(d) + len));
8235        set_delim_end(d);
8236      )
8237      VMCASE(OP_SETDELIM,
8238        set_delim_end(d);
8239      )
8240      VMCASE(OP_CHECKDELIM,
8241        /* We are guaranteed of this assert because we never allow ourselves to
8242         * consume bytes beyond data_end, which covers delim_end when non-NULL.
8243         */
8244        assert(!(d->delim_end && d->ptr > d->delim_end));
8245        if (d->ptr == d->delim_end)
8246          d->pc += longofs;
8247      )
8248      VMCASE(OP_CALL,
8249        d->callstack[d->call_len++] = d->pc;
8250        d->pc += longofs;
8251      )
8252      VMCASE(OP_RET,
8253        assert(d->call_len > 0);
8254        d->pc = d->callstack[--d->call_len];
8255      )
8256      VMCASE(OP_BRANCH,
8257        d->pc += longofs;
8258      )
8259      VMCASE(OP_TAG1,
8260        uint8_t expected;
8261        CHECK_SUSPEND(curbufleft(d) > 0);
8262        expected = (arg >> 8) & 0xff;
8263        if (*d->ptr == expected) {
8264          advance(d, 1);
8265        } else {
8266          int8_t shortofs;
8267         badtag:
8268          shortofs = arg;
8269          if (shortofs == LABEL_DISPATCH) {
8270            CHECK_RETURN(dispatch(d));
8271          } else {
8272            d->pc += shortofs;
8273            break; /* Avoid checkpoint(). */
8274          }
8275        }
8276      )
8277      VMCASE(OP_TAG2,
8278        uint16_t expected;
8279        CHECK_SUSPEND(curbufleft(d) > 0);
8280        expected = (arg >> 8) & 0xffff;
8281        if (curbufleft(d) >= 2) {
8282          uint16_t actual;
8283          memcpy(&actual, d->ptr, 2);
8284          if (expected == actual) {
8285            advance(d, 2);
8286          } else {
8287            goto badtag;
8288          }
8289        } else {
8290          int32_t result = upb_pbdecoder_checktag_slow(d, expected);
8291          if (result == DECODE_MISMATCH) goto badtag;
8292          if (result >= 0) return result;
8293        }
8294      )
8295      VMCASE(OP_TAGN, {
8296        uint64_t expected;
8297        int32_t result;
8298        memcpy(&expected, d->pc, 8);
8299        d->pc += 2;
8300        result = upb_pbdecoder_checktag_slow(d, expected);
8301        if (result == DECODE_MISMATCH) goto badtag;
8302        if (result >= 0) return result;
8303      })
8304      VMCASE(OP_DISPATCH, {
8305        CHECK_RETURN(dispatch(d));
8306      })
8307      VMCASE(OP_HALT, {
8308        return d->size_param;
8309      })
8310    }
8311  }
8312}
8313
8314
8315/* BytesHandler handlers ******************************************************/
8316
8317void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) {
8318  upb_pbdecoder *d = closure;
8319  UPB_UNUSED(size_hint);
8320  d->top->end_ofs = UINT64_MAX;
8321  d->bufstart_ofs = 0;
8322  d->call_len = 1;
8323  d->callstack[0] = &halt;
8324  d->pc = pc;
8325  d->skip = 0;
8326  return d;
8327}
8328
8329void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint) {
8330  upb_pbdecoder *d = closure;
8331  UPB_UNUSED(hd);
8332  UPB_UNUSED(size_hint);
8333  d->top->end_ofs = UINT64_MAX;
8334  d->bufstart_ofs = 0;
8335  d->call_len = 0;
8336  d->skip = 0;
8337  return d;
8338}
8339
8340bool upb_pbdecoder_end(void *closure, const void *handler_data) {
8341  upb_pbdecoder *d = closure;
8342  const upb_pbdecodermethod *method = handler_data;
8343  uint64_t end;
8344  char dummy;
8345
8346  if (d->residual_end > d->residual) {
8347    seterr(d, "Unexpected EOF: decoder still has buffered unparsed data");
8348    return false;
8349  }
8350
8351  if (d->skip) {
8352    seterr(d, "Unexpected EOF inside skipped data");
8353    return false;
8354  }
8355
8356  if (d->top->end_ofs != UINT64_MAX) {
8357    seterr(d, "Unexpected EOF inside delimited string");
8358    return false;
8359  }
8360
8361  /* The user's end() call indicates that the message ends here. */
8362  end = offset(d);
8363  d->top->end_ofs = end;
8364
8365#ifdef UPB_USE_JIT_X64
8366  if (method->is_native_) {
8367    const mgroup *group = (const mgroup*)method->group;
8368    if (d->top != d->stack)
8369      d->stack->end_ofs = 0;
8370    group->jit_code(closure, method->code_base.ptr, &dummy, 0, NULL);
8371  } else
8372#endif
8373  {
8374    const uint32_t *p = d->pc;
8375    d->stack->end_ofs = end;
8376    /* Check the previous bytecode, but guard against beginning. */
8377    if (p != method->code_base.ptr) p--;
8378    if (getop(*p) == OP_CHECKDELIM) {
8379      /* Rewind from OP_TAG* to OP_CHECKDELIM. */
8380      assert(getop(*d->pc) == OP_TAG1 ||
8381             getop(*d->pc) == OP_TAG2 ||
8382             getop(*d->pc) == OP_TAGN ||
8383             getop(*d->pc) == OP_DISPATCH);
8384      d->pc = p;
8385    }
8386    upb_pbdecoder_decode(closure, handler_data, &dummy, 0, NULL);
8387  }
8388
8389  if (d->call_len != 0) {
8390    seterr(d, "Unexpected EOF inside submessage or group");
8391    return false;
8392  }
8393
8394  return true;
8395}
8396
8397size_t upb_pbdecoder_decode(void *decoder, const void *group, const char *buf,
8398                            size_t size, const upb_bufhandle *handle) {
8399  int32_t result = upb_pbdecoder_resume(decoder, NULL, buf, size, handle);
8400
8401  if (result == DECODE_ENDGROUP) goto_endmsg(decoder);
8402  CHECK_RETURN(result);
8403
8404  return run_decoder_vm(decoder, group, handle);
8405}
8406
8407
8408/* Public API *****************************************************************/
8409
8410void upb_pbdecoder_reset(upb_pbdecoder *d) {
8411  d->top = d->stack;
8412  d->top->groupnum = 0;
8413  d->ptr = d->residual;
8414  d->buf = d->residual;
8415  d->end = d->residual;
8416  d->residual_end = d->residual;
8417}
8418
8419upb_pbdecoder *upb_pbdecoder_create(upb_env *e, const upb_pbdecodermethod *m,
8420                                    upb_sink *sink) {
8421  const size_t default_max_nesting = 64;
8422#ifndef NDEBUG
8423  size_t size_before = upb_env_bytesallocated(e);
8424#endif
8425
8426  upb_pbdecoder *d = upb_env_malloc(e, sizeof(upb_pbdecoder));
8427  if (!d) return NULL;
8428
8429  d->method_ = m;
8430  d->callstack = upb_env_malloc(e, callstacksize(d, default_max_nesting));
8431  d->stack = upb_env_malloc(e, stacksize(d, default_max_nesting));
8432  if (!d->stack || !d->callstack) {
8433    return NULL;
8434  }
8435
8436  d->env = e;
8437  d->limit = d->stack + default_max_nesting - 1;
8438  d->stack_size = default_max_nesting;
8439
8440  upb_pbdecoder_reset(d);
8441  upb_bytessink_reset(&d->input_, &m->input_handler_, d);
8442
8443  assert(sink);
8444  if (d->method_->dest_handlers_) {
8445    if (sink->handlers != d->method_->dest_handlers_)
8446      return NULL;
8447  }
8448  upb_sink_reset(&d->top->sink, sink->handlers, sink->closure);
8449
8450  /* If this fails, increase the value in decoder.h. */
8451  assert(upb_env_bytesallocated(e) - size_before <= UPB_PB_DECODER_SIZE);
8452  return d;
8453}
8454
8455uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d) {
8456  return offset(d);
8457}
8458
8459const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d) {
8460  return d->method_;
8461}
8462
8463upb_bytessink *upb_pbdecoder_input(upb_pbdecoder *d) {
8464  return &d->input_;
8465}
8466
8467size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d) {
8468  return d->stack_size;
8469}
8470
8471bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max) {
8472  assert(d->top >= d->stack);
8473
8474  if (max < (size_t)(d->top - d->stack)) {
8475    /* Can't set a limit smaller than what we are currently at. */
8476    return false;
8477  }
8478
8479  if (max > d->stack_size) {
8480    /* Need to reallocate stack and callstack to accommodate. */
8481    size_t old_size = stacksize(d, d->stack_size);
8482    size_t new_size = stacksize(d, max);
8483    void *p = upb_env_realloc(d->env, d->stack, old_size, new_size);
8484    if (!p) {
8485      return false;
8486    }
8487    d->stack = p;
8488
8489    old_size = callstacksize(d, d->stack_size);
8490    new_size = callstacksize(d, max);
8491    p = upb_env_realloc(d->env, d->callstack, old_size, new_size);
8492    if (!p) {
8493      return false;
8494    }
8495    d->callstack = p;
8496
8497    d->stack_size = max;
8498  }
8499
8500  d->limit = d->stack + max - 1;
8501  return true;
8502}
8503/*
8504** upb::Encoder
8505**
8506** Since we are implementing pure handlers (ie. without any out-of-band access
8507** to pre-computed lengths), we have to buffer all submessages before we can
8508** emit even their first byte.
8509**
8510** Not knowing the size of submessages also means we can't write a perfect
8511** zero-copy implementation, even with buffering.  Lengths are stored as
8512** varints, which means that we don't know how many bytes to reserve for the
8513** length until we know what the length is.
8514**
8515** This leaves us with three main choices:
8516**
8517** 1. buffer all submessage data in a temporary buffer, then copy it exactly
8518**    once into the output buffer.
8519**
8520** 2. attempt to buffer data directly into the output buffer, estimating how
8521**    many bytes each length will take.  When our guesses are wrong, use
8522**    memmove() to grow or shrink the allotted space.
8523**
8524** 3. buffer directly into the output buffer, allocating a max length
8525**    ahead-of-time for each submessage length.  If we overallocated, we waste
8526**    space, but no memcpy() or memmove() is required.  This approach requires
8527**    defining a maximum size for submessages and rejecting submessages that
8528**    exceed that size.
8529**
8530** (2) and (3) have the potential to have better performance, but they are more
8531** complicated and subtle to implement:
8532**
8533**   (3) requires making an arbitrary choice of the maximum message size; it
8534**       wastes space when submessages are shorter than this and fails
8535**       completely when they are longer.  This makes it more finicky and
8536**       requires configuration based on the input.  It also makes it impossible
8537**       to perfectly match the output of reference encoders that always use the
8538**       optimal amount of space for each length.
8539**
8540**   (2) requires guessing the the size upfront, and if multiple lengths are
8541**       guessed wrong the minimum required number of memmove() operations may
8542**       be complicated to compute correctly.  Implemented properly, it may have
8543**       a useful amortized or average cost, but more investigation is required
8544**       to determine this and what the optimal algorithm is to achieve it.
8545**
8546**   (1) makes you always pay for exactly one copy, but its implementation is
8547**       the simplest and its performance is predictable.
8548**
8549** So for now, we implement (1) only.  If we wish to optimize later, we should
8550** be able to do it without affecting users.
8551**
8552** The strategy is to buffer the segments of data that do *not* depend on
8553** unknown lengths in one buffer, and keep a separate buffer of segment pointers
8554** and lengths.  When the top-level submessage ends, we can go beginning to end,
8555** alternating the writing of lengths with memcpy() of the rest of the data.
8556** At the top level though, no buffering is required.
8557*/
8558
8559
8560#include <stdlib.h>
8561
8562/* The output buffer is divided into segments; a segment is a string of data
8563 * that is "ready to go" -- it does not need any varint lengths inserted into
8564 * the middle.  The seams between segments are where varints will be inserted
8565 * once they are known.
8566 *
8567 * We also use the concept of a "run", which is a range of encoded bytes that
8568 * occur at a single submessage level.  Every segment contains one or more runs.
8569 *
8570 * A segment can span messages.  Consider:
8571 *
8572 *                  .--Submessage lengths---------.
8573 *                  |       |                     |
8574 *                  |       V                     V
8575 *                  V      | |---------------    | |-----------------
8576 * Submessages:    | |-----------------------------------------------
8577 * Top-level msg: ------------------------------------------------------------
8578 *
8579 * Segments:          -----   -------------------   -----------------
8580 * Runs:              *----   *--------------*---   *----------------
8581 * (* marks the start)
8582 *
8583 * Note that the top-level menssage is not in any segment because it does not
8584 * have any length preceding it.
8585 *
8586 * A segment is only interrupted when another length needs to be inserted.  So
8587 * observe how the second segment spans both the inner submessage and part of
8588 * the next enclosing message. */
8589typedef struct {
8590  uint32_t msglen;  /* The length to varint-encode before this segment. */
8591  uint32_t seglen;  /* Length of the segment. */
8592} upb_pb_encoder_segment;
8593
8594struct upb_pb_encoder {
8595  upb_env *env;
8596
8597  /* Our input and output. */
8598  upb_sink input_;
8599  upb_bytessink *output_;
8600
8601  /* The "subclosure" -- used as the inner closure as part of the bytessink
8602   * protocol. */
8603  void *subc;
8604
8605  /* The output buffer and limit, and our current write position.  "buf"
8606   * initially points to "initbuf", but is dynamically allocated if we need to
8607   * grow beyond the initial size. */
8608  char *buf, *ptr, *limit;
8609
8610  /* The beginning of the current run, or undefined if we are at the top
8611   * level. */
8612  char *runbegin;
8613
8614  /* The list of segments we are accumulating. */
8615  upb_pb_encoder_segment *segbuf, *segptr, *seglimit;
8616
8617  /* The stack of enclosing submessages.  Each entry in the stack points to the
8618   * segment where this submessage's length is being accumulated. */
8619  int *stack, *top, *stacklimit;
8620
8621  /* Depth of startmsg/endmsg calls. */
8622  int depth;
8623};
8624
8625/* low-level buffering ********************************************************/
8626
8627/* Low-level functions for interacting with the output buffer. */
8628
8629/* TODO(haberman): handle pushback */
8630static void putbuf(upb_pb_encoder *e, const char *buf, size_t len) {
8631  size_t n = upb_bytessink_putbuf(e->output_, e->subc, buf, len, NULL);
8632  UPB_ASSERT_VAR(n, n == len);
8633}
8634
8635static upb_pb_encoder_segment *top(upb_pb_encoder *e) {
8636  return &e->segbuf[*e->top];
8637}
8638
8639/* Call to ensure that at least "bytes" bytes are available for writing at
8640 * e->ptr.  Returns false if the bytes could not be allocated. */
8641static bool reserve(upb_pb_encoder *e, size_t bytes) {
8642  if ((size_t)(e->limit - e->ptr) < bytes) {
8643    /* Grow buffer. */
8644    char *new_buf;
8645    size_t needed = bytes + (e->ptr - e->buf);
8646    size_t old_size = e->limit - e->buf;
8647
8648    size_t new_size = old_size;
8649
8650    while (new_size < needed) {
8651      new_size *= 2;
8652    }
8653
8654    new_buf = upb_env_realloc(e->env, e->buf, old_size, new_size);
8655
8656    if (new_buf == NULL) {
8657      return false;
8658    }
8659
8660    e->ptr = new_buf + (e->ptr - e->buf);
8661    e->runbegin = new_buf + (e->runbegin - e->buf);
8662    e->limit = new_buf + new_size;
8663    e->buf = new_buf;
8664  }
8665
8666  return true;
8667}
8668
8669/* Call when "bytes" bytes have been writte at e->ptr.  The caller *must* have
8670 * previously called reserve() with at least this many bytes. */
8671static void encoder_advance(upb_pb_encoder *e, size_t bytes) {
8672  assert((size_t)(e->limit - e->ptr) >= bytes);
8673  e->ptr += bytes;
8674}
8675
8676/* Call when all of the bytes for a handler have been written.  Flushes the
8677 * bytes if possible and necessary, returning false if this failed. */
8678static bool commit(upb_pb_encoder *e) {
8679  if (!e->top) {
8680    /* We aren't inside a delimited region.  Flush our accumulated bytes to
8681     * the output.
8682     *
8683     * TODO(haberman): in the future we may want to delay flushing for
8684     * efficiency reasons. */
8685    putbuf(e, e->buf, e->ptr - e->buf);
8686    e->ptr = e->buf;
8687  }
8688
8689  return true;
8690}
8691
8692/* Writes the given bytes to the buffer, handling reserve/advance. */
8693static bool encode_bytes(upb_pb_encoder *e, const void *data, size_t len) {
8694  if (!reserve(e, len)) {
8695    return false;
8696  }
8697
8698  memcpy(e->ptr, data, len);
8699  encoder_advance(e, len);
8700  return true;
8701}
8702
8703/* Finish the current run by adding the run totals to the segment and message
8704 * length. */
8705static void accumulate(upb_pb_encoder *e) {
8706  size_t run_len;
8707  assert(e->ptr >= e->runbegin);
8708  run_len = e->ptr - e->runbegin;
8709  e->segptr->seglen += run_len;
8710  top(e)->msglen += run_len;
8711  e->runbegin = e->ptr;
8712}
8713
8714/* Call to indicate the start of delimited region for which the full length is
8715 * not yet known.  All data will be buffered until the length is known.
8716 * Delimited regions may be nested; their lengths will all be tracked properly. */
8717static bool start_delim(upb_pb_encoder *e) {
8718  if (e->top) {
8719    /* We are already buffering, advance to the next segment and push it on the
8720     * stack. */
8721    accumulate(e);
8722
8723    if (++e->top == e->stacklimit) {
8724      /* TODO(haberman): grow stack? */
8725      return false;
8726    }
8727
8728    if (++e->segptr == e->seglimit) {
8729      /* Grow segment buffer. */
8730      size_t old_size =
8731          (e->seglimit - e->segbuf) * sizeof(upb_pb_encoder_segment);
8732      size_t new_size = old_size * 2;
8733      upb_pb_encoder_segment *new_buf =
8734          upb_env_realloc(e->env, e->segbuf, old_size, new_size);
8735
8736      if (new_buf == NULL) {
8737        return false;
8738      }
8739
8740      e->segptr = new_buf + (e->segptr - e->segbuf);
8741      e->seglimit = new_buf + (new_size / sizeof(upb_pb_encoder_segment));
8742      e->segbuf = new_buf;
8743    }
8744  } else {
8745    /* We were previously at the top level, start buffering. */
8746    e->segptr = e->segbuf;
8747    e->top = e->stack;
8748    e->runbegin = e->ptr;
8749  }
8750
8751  *e->top = e->segptr - e->segbuf;
8752  e->segptr->seglen = 0;
8753  e->segptr->msglen = 0;
8754
8755  return true;
8756}
8757
8758/* Call to indicate the end of a delimited region.  We now know the length of
8759 * the delimited region.  If we are not nested inside any other delimited
8760 * regions, we can now emit all of the buffered data we accumulated. */
8761static bool end_delim(upb_pb_encoder *e) {
8762  size_t msglen;
8763  accumulate(e);
8764  msglen = top(e)->msglen;
8765
8766  if (e->top == e->stack) {
8767    /* All lengths are now available, emit all buffered data. */
8768    char buf[UPB_PB_VARINT_MAX_LEN];
8769    upb_pb_encoder_segment *s;
8770    const char *ptr = e->buf;
8771    for (s = e->segbuf; s <= e->segptr; s++) {
8772      size_t lenbytes = upb_vencode64(s->msglen, buf);
8773      putbuf(e, buf, lenbytes);
8774      putbuf(e, ptr, s->seglen);
8775      ptr += s->seglen;
8776    }
8777
8778    e->ptr = e->buf;
8779    e->top = NULL;
8780  } else {
8781    /* Need to keep buffering; propagate length info into enclosing
8782     * submessages. */
8783    --e->top;
8784    top(e)->msglen += msglen + upb_varint_size(msglen);
8785  }
8786
8787  return true;
8788}
8789
8790
8791/* tag_t **********************************************************************/
8792
8793/* A precomputed (pre-encoded) tag and length. */
8794
8795typedef struct {
8796  uint8_t bytes;
8797  char tag[7];
8798} tag_t;
8799
8800/* Allocates a new tag for this field, and sets it in these handlerattr. */
8801static void new_tag(upb_handlers *h, const upb_fielddef *f, upb_wiretype_t wt,
8802                    upb_handlerattr *attr) {
8803  uint32_t n = upb_fielddef_number(f);
8804
8805  tag_t *tag = malloc(sizeof(tag_t));
8806  tag->bytes = upb_vencode64((n << 3) | wt, tag->tag);
8807
8808  upb_handlerattr_init(attr);
8809  upb_handlerattr_sethandlerdata(attr, tag);
8810  upb_handlers_addcleanup(h, tag, free);
8811}
8812
8813static bool encode_tag(upb_pb_encoder *e, const tag_t *tag) {
8814  return encode_bytes(e, tag->tag, tag->bytes);
8815}
8816
8817
8818/* encoding of wire types *****************************************************/
8819
8820static bool encode_fixed64(upb_pb_encoder *e, uint64_t val) {
8821  /* TODO(haberman): byte-swap for big endian. */
8822  return encode_bytes(e, &val, sizeof(uint64_t));
8823}
8824
8825static bool encode_fixed32(upb_pb_encoder *e, uint32_t val) {
8826  /* TODO(haberman): byte-swap for big endian. */
8827  return encode_bytes(e, &val, sizeof(uint32_t));
8828}
8829
8830static bool encode_varint(upb_pb_encoder *e, uint64_t val) {
8831  if (!reserve(e, UPB_PB_VARINT_MAX_LEN)) {
8832    return false;
8833  }
8834
8835  encoder_advance(e, upb_vencode64(val, e->ptr));
8836  return true;
8837}
8838
8839static uint64_t dbl2uint64(double d) {
8840  uint64_t ret;
8841  memcpy(&ret, &d, sizeof(uint64_t));
8842  return ret;
8843}
8844
8845static uint32_t flt2uint32(float d) {
8846  uint32_t ret;
8847  memcpy(&ret, &d, sizeof(uint32_t));
8848  return ret;
8849}
8850
8851
8852/* encoding of proto types ****************************************************/
8853
8854static bool startmsg(void *c, const void *hd) {
8855  upb_pb_encoder *e = c;
8856  UPB_UNUSED(hd);
8857  if (e->depth++ == 0) {
8858    upb_bytessink_start(e->output_, 0, &e->subc);
8859  }
8860  return true;
8861}
8862
8863static bool endmsg(void *c, const void *hd, upb_status *status) {
8864  upb_pb_encoder *e = c;
8865  UPB_UNUSED(hd);
8866  UPB_UNUSED(status);
8867  if (--e->depth == 0) {
8868    upb_bytessink_end(e->output_);
8869  }
8870  return true;
8871}
8872
8873static void *encode_startdelimfield(void *c, const void *hd) {
8874  bool ok = encode_tag(c, hd) && commit(c) && start_delim(c);
8875  return ok ? c : UPB_BREAK;
8876}
8877
8878static bool encode_enddelimfield(void *c, const void *hd) {
8879  UPB_UNUSED(hd);
8880  return end_delim(c);
8881}
8882
8883static void *encode_startgroup(void *c, const void *hd) {
8884  return (encode_tag(c, hd) && commit(c)) ? c : UPB_BREAK;
8885}
8886
8887static bool encode_endgroup(void *c, const void *hd) {
8888  return encode_tag(c, hd) && commit(c);
8889}
8890
8891static void *encode_startstr(void *c, const void *hd, size_t size_hint) {
8892  UPB_UNUSED(size_hint);
8893  return encode_startdelimfield(c, hd);
8894}
8895
8896static size_t encode_strbuf(void *c, const void *hd, const char *buf,
8897                            size_t len, const upb_bufhandle *h) {
8898  UPB_UNUSED(hd);
8899  UPB_UNUSED(h);
8900  return encode_bytes(c, buf, len) ? len : 0;
8901}
8902
8903#define T(type, ctype, convert, encode)                                  \
8904  static bool encode_scalar_##type(void *e, const void *hd, ctype val) { \
8905    return encode_tag(e, hd) && encode(e, (convert)(val)) && commit(e);  \
8906  }                                                                      \
8907  static bool encode_packed_##type(void *e, const void *hd, ctype val) { \
8908    UPB_UNUSED(hd);                                                      \
8909    return encode(e, (convert)(val));                                    \
8910  }
8911
8912T(double,   double,   dbl2uint64,   encode_fixed64)
8913T(float,    float,    flt2uint32,   encode_fixed32)
8914T(int64,    int64_t,  uint64_t,     encode_varint)
8915T(int32,    int32_t,  uint32_t,     encode_varint)
8916T(fixed64,  uint64_t, uint64_t,     encode_fixed64)
8917T(fixed32,  uint32_t, uint32_t,     encode_fixed32)
8918T(bool,     bool,     bool,         encode_varint)
8919T(uint32,   uint32_t, uint32_t,     encode_varint)
8920T(uint64,   uint64_t, uint64_t,     encode_varint)
8921T(enum,     int32_t,  uint32_t,     encode_varint)
8922T(sfixed32, int32_t,  uint32_t,     encode_fixed32)
8923T(sfixed64, int64_t,  uint64_t,     encode_fixed64)
8924T(sint32,   int32_t,  upb_zzenc_32, encode_varint)
8925T(sint64,   int64_t,  upb_zzenc_64, encode_varint)
8926
8927#undef T
8928
8929
8930/* code to build the handlers *************************************************/
8931
8932static void newhandlers_callback(const void *closure, upb_handlers *h) {
8933  const upb_msgdef *m;
8934  upb_msg_field_iter i;
8935
8936  UPB_UNUSED(closure);
8937
8938  upb_handlers_setstartmsg(h, startmsg, NULL);
8939  upb_handlers_setendmsg(h, endmsg, NULL);
8940
8941  m = upb_handlers_msgdef(h);
8942  for(upb_msg_field_begin(&i, m);
8943      !upb_msg_field_done(&i);
8944      upb_msg_field_next(&i)) {
8945    const upb_fielddef *f = upb_msg_iter_field(&i);
8946    bool packed = upb_fielddef_isseq(f) && upb_fielddef_isprimitive(f) &&
8947                  upb_fielddef_packed(f);
8948    upb_handlerattr attr;
8949    upb_wiretype_t wt =
8950        packed ? UPB_WIRE_TYPE_DELIMITED
8951               : upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
8952
8953    /* Pre-encode the tag for this field. */
8954    new_tag(h, f, wt, &attr);
8955
8956    if (packed) {
8957      upb_handlers_setstartseq(h, f, encode_startdelimfield, &attr);
8958      upb_handlers_setendseq(h, f, encode_enddelimfield, &attr);
8959    }
8960
8961#define T(upper, lower, upbtype)                                     \
8962  case UPB_DESCRIPTOR_TYPE_##upper:                                  \
8963    if (packed) {                                                    \
8964      upb_handlers_set##upbtype(h, f, encode_packed_##lower, &attr); \
8965    } else {                                                         \
8966      upb_handlers_set##upbtype(h, f, encode_scalar_##lower, &attr); \
8967    }                                                                \
8968    break;
8969
8970    switch (upb_fielddef_descriptortype(f)) {
8971      T(DOUBLE,   double,   double);
8972      T(FLOAT,    float,    float);
8973      T(INT64,    int64,    int64);
8974      T(INT32,    int32,    int32);
8975      T(FIXED64,  fixed64,  uint64);
8976      T(FIXED32,  fixed32,  uint32);
8977      T(BOOL,     bool,     bool);
8978      T(UINT32,   uint32,   uint32);
8979      T(UINT64,   uint64,   uint64);
8980      T(ENUM,     enum,     int32);
8981      T(SFIXED32, sfixed32, int32);
8982      T(SFIXED64, sfixed64, int64);
8983      T(SINT32,   sint32,   int32);
8984      T(SINT64,   sint64,   int64);
8985      case UPB_DESCRIPTOR_TYPE_STRING:
8986      case UPB_DESCRIPTOR_TYPE_BYTES:
8987        upb_handlers_setstartstr(h, f, encode_startstr, &attr);
8988        upb_handlers_setendstr(h, f, encode_enddelimfield, &attr);
8989        upb_handlers_setstring(h, f, encode_strbuf, &attr);
8990        break;
8991      case UPB_DESCRIPTOR_TYPE_MESSAGE:
8992        upb_handlers_setstartsubmsg(h, f, encode_startdelimfield, &attr);
8993        upb_handlers_setendsubmsg(h, f, encode_enddelimfield, &attr);
8994        break;
8995      case UPB_DESCRIPTOR_TYPE_GROUP: {
8996        /* Endgroup takes a different tag (wire_type = END_GROUP). */
8997        upb_handlerattr attr2;
8998        new_tag(h, f, UPB_WIRE_TYPE_END_GROUP, &attr2);
8999
9000        upb_handlers_setstartsubmsg(h, f, encode_startgroup, &attr);
9001        upb_handlers_setendsubmsg(h, f, encode_endgroup, &attr2);
9002
9003        upb_handlerattr_uninit(&attr2);
9004        break;
9005      }
9006    }
9007
9008#undef T
9009
9010    upb_handlerattr_uninit(&attr);
9011  }
9012}
9013
9014void upb_pb_encoder_reset(upb_pb_encoder *e) {
9015  e->segptr = NULL;
9016  e->top = NULL;
9017  e->depth = 0;
9018}
9019
9020
9021/* public API *****************************************************************/
9022
9023const upb_handlers *upb_pb_encoder_newhandlers(const upb_msgdef *m,
9024                                               const void *owner) {
9025  return upb_handlers_newfrozen(m, owner, newhandlers_callback, NULL);
9026}
9027
9028upb_pb_encoder *upb_pb_encoder_create(upb_env *env, const upb_handlers *h,
9029                                      upb_bytessink *output) {
9030  const size_t initial_bufsize = 256;
9031  const size_t initial_segbufsize = 16;
9032  /* TODO(haberman): make this configurable. */
9033  const size_t stack_size = 64;
9034#ifndef NDEBUG
9035  const size_t size_before = upb_env_bytesallocated(env);
9036#endif
9037
9038  upb_pb_encoder *e = upb_env_malloc(env, sizeof(upb_pb_encoder));
9039  if (!e) return NULL;
9040
9041  e->buf = upb_env_malloc(env, initial_bufsize);
9042  e->segbuf = upb_env_malloc(env, initial_segbufsize * sizeof(*e->segbuf));
9043  e->stack = upb_env_malloc(env, stack_size * sizeof(*e->stack));
9044
9045  if (!e->buf || !e->segbuf || !e->stack) {
9046    return NULL;
9047  }
9048
9049  e->limit = e->buf + initial_bufsize;
9050  e->seglimit = e->segbuf + initial_segbufsize;
9051  e->stacklimit = e->stack + stack_size;
9052
9053  upb_pb_encoder_reset(e);
9054  upb_sink_reset(&e->input_, h, e);
9055
9056  e->env = env;
9057  e->output_ = output;
9058  e->subc = output->closure;
9059  e->ptr = e->buf;
9060
9061  /* If this fails, increase the value in encoder.h. */
9062  assert(upb_env_bytesallocated(env) - size_before <= UPB_PB_ENCODER_SIZE);
9063  return e;
9064}
9065
9066upb_sink *upb_pb_encoder_input(upb_pb_encoder *e) { return &e->input_; }
9067
9068
9069#include <stdio.h>
9070#include <stdlib.h>
9071#include <string.h>
9072
9073upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
9074                                        void *owner, upb_status *status) {
9075  /* Create handlers. */
9076  const upb_pbdecodermethod *decoder_m;
9077  const upb_handlers *reader_h = upb_descreader_newhandlers(&reader_h);
9078  upb_env env;
9079  upb_pbdecodermethodopts opts;
9080  upb_pbdecoder *decoder;
9081  upb_descreader *reader;
9082  bool ok;
9083  upb_def **ret = NULL;
9084  upb_def **defs;
9085
9086  upb_pbdecodermethodopts_init(&opts, reader_h);
9087  decoder_m = upb_pbdecodermethod_new(&opts, &decoder_m);
9088
9089  upb_env_init(&env);
9090  upb_env_reporterrorsto(&env, status);
9091
9092  reader = upb_descreader_create(&env, reader_h);
9093  decoder = upb_pbdecoder_create(&env, decoder_m, upb_descreader_input(reader));
9094
9095  /* Push input data. */
9096  ok = upb_bufsrc_putbuf(str, len, upb_pbdecoder_input(decoder));
9097
9098  if (!ok) goto cleanup;
9099  defs = upb_descreader_getdefs(reader, owner, n);
9100  ret = malloc(sizeof(upb_def*) * (*n));
9101  memcpy(ret, defs, sizeof(upb_def*) * (*n));
9102
9103cleanup:
9104  upb_env_uninit(&env);
9105  upb_handlers_unref(reader_h, &reader_h);
9106  upb_pbdecodermethod_unref(decoder_m, &decoder_m);
9107  return ret;
9108}
9109
9110bool upb_load_descriptor_into_symtab(upb_symtab *s, const char *str, size_t len,
9111                                     upb_status *status) {
9112  int n;
9113  bool success;
9114  upb_def **defs = upb_load_defs_from_descriptor(str, len, &n, &defs, status);
9115  if (!defs) return false;
9116  success = upb_symtab_add(s, defs, n, &defs, status);
9117  free(defs);
9118  return success;
9119}
9120
9121char *upb_readfile(const char *filename, size_t *len) {
9122  long size;
9123  char *buf;
9124  FILE *f = fopen(filename, "rb");
9125  if(!f) return NULL;
9126  if(fseek(f, 0, SEEK_END) != 0) goto error;
9127  size = ftell(f);
9128  if(size < 0) goto error;
9129  if(fseek(f, 0, SEEK_SET) != 0) goto error;
9130  buf = malloc(size + 1);
9131  if(size && fread(buf, size, 1, f) != 1) goto error;
9132  fclose(f);
9133  if (len) *len = size;
9134  return buf;
9135
9136error:
9137  fclose(f);
9138  return NULL;
9139}
9140
9141bool upb_load_descriptor_file_into_symtab(upb_symtab *symtab, const char *fname,
9142                                          upb_status *status) {
9143  size_t len;
9144  bool success;
9145  char *data = upb_readfile(fname, &len);
9146  if (!data) {
9147    if (status) upb_status_seterrf(status, "Couldn't read file: %s", fname);
9148    return false;
9149  }
9150  success = upb_load_descriptor_into_symtab(symtab, data, len, status);
9151  free(data);
9152  return success;
9153}
9154/*
9155 * upb::pb::TextPrinter
9156 *
9157 * OPT: This is not optimized at all.  It uses printf() which parses the format
9158 * string every time, and it allocates memory for every put.
9159 */
9160
9161
9162#include <ctype.h>
9163#include <float.h>
9164#include <inttypes.h>
9165#include <stdarg.h>
9166#include <stdio.h>
9167#include <stdlib.h>
9168#include <string.h>
9169
9170
9171struct upb_textprinter {
9172  upb_sink input_;
9173  upb_bytessink *output_;
9174  int indent_depth_;
9175  bool single_line_;
9176  void *subc;
9177};
9178
9179#define CHECK(x) if ((x) < 0) goto err;
9180
9181static const char *shortname(const char *longname) {
9182  const char *last = strrchr(longname, '.');
9183  return last ? last + 1 : longname;
9184}
9185
9186static int indent(upb_textprinter *p) {
9187  int i;
9188  if (!p->single_line_)
9189    for (i = 0; i < p->indent_depth_; i++)
9190      upb_bytessink_putbuf(p->output_, p->subc, "  ", 2, NULL);
9191  return 0;
9192}
9193
9194static int endfield(upb_textprinter *p) {
9195  const char ch = (p->single_line_ ? ' ' : '\n');
9196  upb_bytessink_putbuf(p->output_, p->subc, &ch, 1, NULL);
9197  return 0;
9198}
9199
9200static int putescaped(upb_textprinter *p, const char *buf, size_t len,
9201                      bool preserve_utf8) {
9202  /* Based on CEscapeInternal() from Google's protobuf release. */
9203  char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf);
9204  const char *end = buf + len;
9205
9206  /* I think hex is prettier and more useful, but proto2 uses octal; should
9207   * investigate whether it can parse hex also. */
9208  const bool use_hex = false;
9209  bool last_hex_escape = false; /* true if last output char was \xNN */
9210
9211  for (; buf < end; buf++) {
9212    bool is_hex_escape;
9213
9214    if (dstend - dst < 4) {
9215      upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
9216      dst = dstbuf;
9217    }
9218
9219    is_hex_escape = false;
9220    switch (*buf) {
9221      case '\n': *(dst++) = '\\'; *(dst++) = 'n';  break;
9222      case '\r': *(dst++) = '\\'; *(dst++) = 'r';  break;
9223      case '\t': *(dst++) = '\\'; *(dst++) = 't';  break;
9224      case '\"': *(dst++) = '\\'; *(dst++) = '\"'; break;
9225      case '\'': *(dst++) = '\\'; *(dst++) = '\''; break;
9226      case '\\': *(dst++) = '\\'; *(dst++) = '\\'; break;
9227      default:
9228        /* Note that if we emit \xNN and the buf character after that is a hex
9229         * digit then that digit must be escaped too to prevent it being
9230         * interpreted as part of the character code by C. */
9231        if ((!preserve_utf8 || (uint8_t)*buf < 0x80) &&
9232            (!isprint(*buf) || (last_hex_escape && isxdigit(*buf)))) {
9233          sprintf(dst, (use_hex ? "\\x%02x" : "\\%03o"), (uint8_t)*buf);
9234          is_hex_escape = use_hex;
9235          dst += 4;
9236        } else {
9237          *(dst++) = *buf; break;
9238        }
9239    }
9240    last_hex_escape = is_hex_escape;
9241  }
9242  /* Flush remaining data. */
9243  upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
9244  return 0;
9245}
9246
9247bool putf(upb_textprinter *p, const char *fmt, ...) {
9248  va_list args;
9249  va_list args_copy;
9250  char *str;
9251  int written;
9252  int len;
9253  bool ok;
9254
9255  va_start(args, fmt);
9256
9257  /* Run once to get the length of the string. */
9258  _upb_va_copy(args_copy, args);
9259  len = _upb_vsnprintf(NULL, 0, fmt, args_copy);
9260  va_end(args_copy);
9261
9262  /* + 1 for NULL terminator (vsprintf() requires it even if we don't). */
9263  str = malloc(len + 1);
9264  if (!str) return false;
9265  written = vsprintf(str, fmt, args);
9266  va_end(args);
9267  UPB_ASSERT_VAR(written, written == len);
9268
9269  ok = upb_bytessink_putbuf(p->output_, p->subc, str, len, NULL);
9270  free(str);
9271  return ok;
9272}
9273
9274
9275/* handlers *******************************************************************/
9276
9277static bool textprinter_startmsg(void *c, const void *hd) {
9278  upb_textprinter *p = c;
9279  UPB_UNUSED(hd);
9280  if (p->indent_depth_ == 0) {
9281    upb_bytessink_start(p->output_, 0, &p->subc);
9282  }
9283  return true;
9284}
9285
9286static bool textprinter_endmsg(void *c, const void *hd, upb_status *s) {
9287  upb_textprinter *p = c;
9288  UPB_UNUSED(hd);
9289  UPB_UNUSED(s);
9290  if (p->indent_depth_ == 0) {
9291    upb_bytessink_end(p->output_);
9292  }
9293  return true;
9294}
9295
9296#define TYPE(name, ctype, fmt) \
9297  static bool textprinter_put ## name(void *closure, const void *handler_data, \
9298                                      ctype val) {                             \
9299    upb_textprinter *p = closure;                                              \
9300    const upb_fielddef *f = handler_data;                                      \
9301    CHECK(indent(p));                                                          \
9302    putf(p, "%s: " fmt, upb_fielddef_name(f), val);                            \
9303    CHECK(endfield(p));                                                        \
9304    return true;                                                               \
9305  err:                                                                         \
9306    return false;                                                              \
9307}
9308
9309static bool textprinter_putbool(void *closure, const void *handler_data,
9310                                bool val) {
9311  upb_textprinter *p = closure;
9312  const upb_fielddef *f = handler_data;
9313  CHECK(indent(p));
9314  putf(p, "%s: %s", upb_fielddef_name(f), val ? "true" : "false");
9315  CHECK(endfield(p));
9316  return true;
9317err:
9318  return false;
9319}
9320
9321#define STRINGIFY_HELPER(x) #x
9322#define STRINGIFY_MACROVAL(x) STRINGIFY_HELPER(x)
9323
9324TYPE(int32,  int32_t,  "%" PRId32)
9325TYPE(int64,  int64_t,  "%" PRId64)
9326TYPE(uint32, uint32_t, "%" PRIu32)
9327TYPE(uint64, uint64_t, "%" PRIu64)
9328TYPE(float,  float,    "%." STRINGIFY_MACROVAL(FLT_DIG) "g")
9329TYPE(double, double,   "%." STRINGIFY_MACROVAL(DBL_DIG) "g")
9330
9331#undef TYPE
9332
9333/* Output a symbolic value from the enum if found, else just print as int32. */
9334static bool textprinter_putenum(void *closure, const void *handler_data,
9335                                int32_t val) {
9336  upb_textprinter *p = closure;
9337  const upb_fielddef *f = handler_data;
9338  const upb_enumdef *enum_def = upb_downcast_enumdef(upb_fielddef_subdef(f));
9339  const char *label = upb_enumdef_iton(enum_def, val);
9340  if (label) {
9341    indent(p);
9342    putf(p, "%s: %s", upb_fielddef_name(f), label);
9343    endfield(p);
9344  } else {
9345    if (!textprinter_putint32(closure, handler_data, val))
9346      return false;
9347  }
9348  return true;
9349}
9350
9351static void *textprinter_startstr(void *closure, const void *handler_data,
9352                      size_t size_hint) {
9353  upb_textprinter *p = closure;
9354  const upb_fielddef *f = handler_data;
9355  UPB_UNUSED(size_hint);
9356  indent(p);
9357  putf(p, "%s: \"", upb_fielddef_name(f));
9358  return p;
9359}
9360
9361static bool textprinter_endstr(void *closure, const void *handler_data) {
9362  upb_textprinter *p = closure;
9363  UPB_UNUSED(handler_data);
9364  putf(p, "\"");
9365  endfield(p);
9366  return true;
9367}
9368
9369static size_t textprinter_putstr(void *closure, const void *hd, const char *buf,
9370                                 size_t len, const upb_bufhandle *handle) {
9371  upb_textprinter *p = closure;
9372  const upb_fielddef *f = hd;
9373  UPB_UNUSED(handle);
9374  CHECK(putescaped(p, buf, len, upb_fielddef_type(f) == UPB_TYPE_STRING));
9375  return len;
9376err:
9377  return 0;
9378}
9379
9380static void *textprinter_startsubmsg(void *closure, const void *handler_data) {
9381  upb_textprinter *p = closure;
9382  const char *name = handler_data;
9383  CHECK(indent(p));
9384  putf(p, "%s {%c", name, p->single_line_ ? ' ' : '\n');
9385  p->indent_depth_++;
9386  return p;
9387err:
9388  return UPB_BREAK;
9389}
9390
9391static bool textprinter_endsubmsg(void *closure, const void *handler_data) {
9392  upb_textprinter *p = closure;
9393  UPB_UNUSED(handler_data);
9394  p->indent_depth_--;
9395  CHECK(indent(p));
9396  upb_bytessink_putbuf(p->output_, p->subc, "}", 1, NULL);
9397  CHECK(endfield(p));
9398  return true;
9399err:
9400  return false;
9401}
9402
9403static void onmreg(const void *c, upb_handlers *h) {
9404  const upb_msgdef *m = upb_handlers_msgdef(h);
9405  upb_msg_field_iter i;
9406  UPB_UNUSED(c);
9407
9408  upb_handlers_setstartmsg(h, textprinter_startmsg, NULL);
9409  upb_handlers_setendmsg(h, textprinter_endmsg, NULL);
9410
9411  for(upb_msg_field_begin(&i, m);
9412      !upb_msg_field_done(&i);
9413      upb_msg_field_next(&i)) {
9414    upb_fielddef *f = upb_msg_iter_field(&i);
9415    upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
9416    upb_handlerattr_sethandlerdata(&attr, f);
9417    switch (upb_fielddef_type(f)) {
9418      case UPB_TYPE_INT32:
9419        upb_handlers_setint32(h, f, textprinter_putint32, &attr);
9420        break;
9421      case UPB_TYPE_INT64:
9422        upb_handlers_setint64(h, f, textprinter_putint64, &attr);
9423        break;
9424      case UPB_TYPE_UINT32:
9425        upb_handlers_setuint32(h, f, textprinter_putuint32, &attr);
9426        break;
9427      case UPB_TYPE_UINT64:
9428        upb_handlers_setuint64(h, f, textprinter_putuint64, &attr);
9429        break;
9430      case UPB_TYPE_FLOAT:
9431        upb_handlers_setfloat(h, f, textprinter_putfloat, &attr);
9432        break;
9433      case UPB_TYPE_DOUBLE:
9434        upb_handlers_setdouble(h, f, textprinter_putdouble, &attr);
9435        break;
9436      case UPB_TYPE_BOOL:
9437        upb_handlers_setbool(h, f, textprinter_putbool, &attr);
9438        break;
9439      case UPB_TYPE_STRING:
9440      case UPB_TYPE_BYTES:
9441        upb_handlers_setstartstr(h, f, textprinter_startstr, &attr);
9442        upb_handlers_setstring(h, f, textprinter_putstr, &attr);
9443        upb_handlers_setendstr(h, f, textprinter_endstr, &attr);
9444        break;
9445      case UPB_TYPE_MESSAGE: {
9446        const char *name =
9447            upb_fielddef_istagdelim(f)
9448                ? shortname(upb_msgdef_fullname(upb_fielddef_msgsubdef(f)))
9449                : upb_fielddef_name(f);
9450        upb_handlerattr_sethandlerdata(&attr, name);
9451        upb_handlers_setstartsubmsg(h, f, textprinter_startsubmsg, &attr);
9452        upb_handlers_setendsubmsg(h, f, textprinter_endsubmsg, &attr);
9453        break;
9454      }
9455      case UPB_TYPE_ENUM:
9456        upb_handlers_setint32(h, f, textprinter_putenum, &attr);
9457        break;
9458    }
9459  }
9460}
9461
9462static void textprinter_reset(upb_textprinter *p, bool single_line) {
9463  p->single_line_ = single_line;
9464  p->indent_depth_ = 0;
9465}
9466
9467
9468/* Public API *****************************************************************/
9469
9470upb_textprinter *upb_textprinter_create(upb_env *env, const upb_handlers *h,
9471                                        upb_bytessink *output) {
9472  upb_textprinter *p = upb_env_malloc(env, sizeof(upb_textprinter));
9473  if (!p) return NULL;
9474
9475  p->output_ = output;
9476  upb_sink_reset(&p->input_, h, p);
9477  textprinter_reset(p, false);
9478
9479  return p;
9480}
9481
9482const upb_handlers *upb_textprinter_newhandlers(const upb_msgdef *m,
9483                                                const void *owner) {
9484  return upb_handlers_newfrozen(m, owner, &onmreg, NULL);
9485}
9486
9487upb_sink *upb_textprinter_input(upb_textprinter *p) { return &p->input_; }
9488
9489void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line) {
9490  p->single_line_ = single_line;
9491}
9492
9493
9494/* Index is descriptor type. */
9495const uint8_t upb_pb_native_wire_types[] = {
9496  UPB_WIRE_TYPE_END_GROUP,     /* ENDGROUP */
9497  UPB_WIRE_TYPE_64BIT,         /* DOUBLE */
9498  UPB_WIRE_TYPE_32BIT,         /* FLOAT */
9499  UPB_WIRE_TYPE_VARINT,        /* INT64 */
9500  UPB_WIRE_TYPE_VARINT,        /* UINT64 */
9501  UPB_WIRE_TYPE_VARINT,        /* INT32 */
9502  UPB_WIRE_TYPE_64BIT,         /* FIXED64 */
9503  UPB_WIRE_TYPE_32BIT,         /* FIXED32 */
9504  UPB_WIRE_TYPE_VARINT,        /* BOOL */
9505  UPB_WIRE_TYPE_DELIMITED,     /* STRING */
9506  UPB_WIRE_TYPE_START_GROUP,   /* GROUP */
9507  UPB_WIRE_TYPE_DELIMITED,     /* MESSAGE */
9508  UPB_WIRE_TYPE_DELIMITED,     /* BYTES */
9509  UPB_WIRE_TYPE_VARINT,        /* UINT32 */
9510  UPB_WIRE_TYPE_VARINT,        /* ENUM */
9511  UPB_WIRE_TYPE_32BIT,         /* SFIXED32 */
9512  UPB_WIRE_TYPE_64BIT,         /* SFIXED64 */
9513  UPB_WIRE_TYPE_VARINT,        /* SINT32 */
9514  UPB_WIRE_TYPE_VARINT,        /* SINT64 */
9515};
9516
9517/* A basic branch-based decoder, uses 32-bit values to get good performance
9518 * on 32-bit architectures (but performs well on 64-bits also).
9519 * This scheme comes from the original Google Protobuf implementation
9520 * (proto2). */
9521upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r) {
9522  upb_decoderet err = {NULL, 0};
9523  const char *p = r.p;
9524  uint32_t low = (uint32_t)r.val;
9525  uint32_t high = 0;
9526  uint32_t b;
9527  b = *(p++); low  |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done;
9528  b = *(p++); low  |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done;
9529  b = *(p++); low  |= (b & 0x7fU) << 28;
9530              high  = (b & 0x7fU) >>  4; if (!(b & 0x80)) goto done;
9531  b = *(p++); high |= (b & 0x7fU) <<  3; if (!(b & 0x80)) goto done;
9532  b = *(p++); high |= (b & 0x7fU) << 10; if (!(b & 0x80)) goto done;
9533  b = *(p++); high |= (b & 0x7fU) << 17; if (!(b & 0x80)) goto done;
9534  b = *(p++); high |= (b & 0x7fU) << 24; if (!(b & 0x80)) goto done;
9535  b = *(p++); high |= (b & 0x7fU) << 31; if (!(b & 0x80)) goto done;
9536  return err;
9537
9538done:
9539  r.val = ((uint64_t)high << 32) | low;
9540  r.p = p;
9541  return r;
9542}
9543
9544/* Like the previous, but uses 64-bit values. */
9545upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r) {
9546  const char *p = r.p;
9547  uint64_t val = r.val;
9548  uint64_t b;
9549  upb_decoderet err = {NULL, 0};
9550  b = *(p++); val |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done;
9551  b = *(p++); val |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done;
9552  b = *(p++); val |= (b & 0x7fU) << 28; if (!(b & 0x80)) goto done;
9553  b = *(p++); val |= (b & 0x7fU) << 35; if (!(b & 0x80)) goto done;
9554  b = *(p++); val |= (b & 0x7fU) << 42; if (!(b & 0x80)) goto done;
9555  b = *(p++); val |= (b & 0x7fU) << 49; if (!(b & 0x80)) goto done;
9556  b = *(p++); val |= (b & 0x7fU) << 56; if (!(b & 0x80)) goto done;
9557  b = *(p++); val |= (b & 0x7fU) << 63; if (!(b & 0x80)) goto done;
9558  return err;
9559
9560done:
9561  r.val = val;
9562  r.p = p;
9563  return r;
9564}
9565
9566/* Given an encoded varint v, returns an integer with a single bit set that
9567 * indicates the end of the varint.  Subtracting one from this value will
9568 * yield a mask that leaves only bits that are part of the varint.  Returns
9569 * 0 if the varint is unterminated. */
9570static uint64_t upb_get_vstopbit(uint64_t v) {
9571  uint64_t cbits = v | 0x7f7f7f7f7f7f7f7fULL;
9572  return ~cbits & (cbits+1);
9573}
9574
9575/* A branchless decoder.  Credit to Pascal Massimino for the bit-twiddling. */
9576upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r) {
9577  uint64_t b;
9578  uint64_t stop_bit;
9579  upb_decoderet my_r;
9580  memcpy(&b, r.p, sizeof(b));
9581  stop_bit = upb_get_vstopbit(b);
9582  b =  (b & 0x7f7f7f7f7f7f7f7fULL) & (stop_bit - 1);
9583  b +=       b & 0x007f007f007f007fULL;
9584  b +=  3 * (b & 0x0000ffff0000ffffULL);
9585  b += 15 * (b & 0x00000000ffffffffULL);
9586  if (stop_bit == 0) {
9587    /* Error: unterminated varint. */
9588    upb_decoderet err_r = {(void*)0, 0};
9589    return err_r;
9590  }
9591  my_r = upb_decoderet_make(r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
9592                            r.val | (b << 7));
9593  return my_r;
9594}
9595
9596/* A branchless decoder.  Credit to Daniel Wright for the bit-twiddling. */
9597upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) {
9598  uint64_t b;
9599  uint64_t stop_bit;
9600  upb_decoderet my_r;
9601  memcpy(&b, r.p, sizeof(b));
9602  stop_bit = upb_get_vstopbit(b);
9603  b &= (stop_bit - 1);
9604  b = ((b & 0x7f007f007f007f00ULL) >> 1) | (b & 0x007f007f007f007fULL);
9605  b = ((b & 0xffff0000ffff0000ULL) >> 2) | (b & 0x0000ffff0000ffffULL);
9606  b = ((b & 0xffffffff00000000ULL) >> 4) | (b & 0x00000000ffffffffULL);
9607  if (stop_bit == 0) {
9608    /* Error: unterminated varint. */
9609    upb_decoderet err_r = {(void*)0, 0};
9610    return err_r;
9611  }
9612  my_r = upb_decoderet_make(r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
9613                            r.val | (b << 14));
9614  return my_r;
9615}
9616
9617#line 1 "upb/json/parser.rl"
9618/*
9619** upb::json::Parser (upb_json_parser)
9620**
9621** A parser that uses the Ragel State Machine Compiler to generate
9622** the finite automata.
9623**
9624** Ragel only natively handles regular languages, but we can manually
9625** program it a bit to handle context-free languages like JSON, by using
9626** the "fcall" and "fret" constructs.
9627**
9628** This parser can handle the basics, but needs several things to be fleshed
9629** out:
9630**
9631** - handling of unicode escape sequences (including high surrogate pairs).
9632** - properly check and report errors for unknown fields, stack overflow,
9633**   improper array nesting (or lack of nesting).
9634** - handling of base64 sequences with padding characters.
9635** - handling of push-back (non-success returns from sink functions).
9636** - handling of keys/escape-sequences/etc that span input buffers.
9637*/
9638
9639#include <stdio.h>
9640#include <stdint.h>
9641#include <assert.h>
9642#include <string.h>
9643#include <stdlib.h>
9644#include <errno.h>
9645
9646
9647#define UPB_JSON_MAX_DEPTH 64
9648
9649typedef struct {
9650  upb_sink sink;
9651
9652  /* The current message in which we're parsing, and the field whose value we're
9653   * expecting next. */
9654  const upb_msgdef *m;
9655  const upb_fielddef *f;
9656
9657  /* We are in a repeated-field context, ready to emit mapentries as
9658   * submessages. This flag alters the start-of-object (open-brace) behavior to
9659   * begin a sequence of mapentry messages rather than a single submessage. */
9660  bool is_map;
9661
9662  /* We are in a map-entry message context. This flag is set when parsing the
9663   * value field of a single map entry and indicates to all value-field parsers
9664   * (subobjects, strings, numbers, and bools) that the map-entry submessage
9665   * should end as soon as the value is parsed. */
9666  bool is_mapentry;
9667
9668  /* If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent
9669   * message's map field that we're currently parsing. This differs from |f|
9670   * because |f| is the field in the *current* message (i.e., the map-entry
9671   * message itself), not the parent's field that leads to this map. */
9672  const upb_fielddef *mapfield;
9673} upb_jsonparser_frame;
9674
9675struct upb_json_parser {
9676  upb_env *env;
9677  upb_byteshandler input_handler_;
9678  upb_bytessink input_;
9679
9680  /* Stack to track the JSON scopes we are in. */
9681  upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH];
9682  upb_jsonparser_frame *top;
9683  upb_jsonparser_frame *limit;
9684
9685  upb_status status;
9686
9687  /* Ragel's internal parsing stack for the parsing state machine. */
9688  int current_state;
9689  int parser_stack[UPB_JSON_MAX_DEPTH];
9690  int parser_top;
9691
9692  /* The handle for the current buffer. */
9693  const upb_bufhandle *handle;
9694
9695  /* Accumulate buffer.  See details in parser.rl. */
9696  const char *accumulated;
9697  size_t accumulated_len;
9698  char *accumulate_buf;
9699  size_t accumulate_buf_size;
9700
9701  /* Multi-part text data.  See details in parser.rl. */
9702  int multipart_state;
9703  upb_selector_t string_selector;
9704
9705  /* Input capture.  See details in parser.rl. */
9706  const char *capture;
9707
9708  /* Intermediate result of parsing a unicode escape sequence. */
9709  uint32_t digit;
9710};
9711
9712#define PARSER_CHECK_RETURN(x) if (!(x)) return false
9713
9714/* Used to signal that a capture has been suspended. */
9715static char suspend_capture;
9716
9717static upb_selector_t getsel_for_handlertype(upb_json_parser *p,
9718                                             upb_handlertype_t type) {
9719  upb_selector_t sel;
9720  bool ok = upb_handlers_getselector(p->top->f, type, &sel);
9721  UPB_ASSERT_VAR(ok, ok);
9722  return sel;
9723}
9724
9725static upb_selector_t parser_getsel(upb_json_parser *p) {
9726  return getsel_for_handlertype(
9727      p, upb_handlers_getprimitivehandlertype(p->top->f));
9728}
9729
9730static bool check_stack(upb_json_parser *p) {
9731  if ((p->top + 1) == p->limit) {
9732    upb_status_seterrmsg(&p->status, "Nesting too deep");
9733    upb_env_reporterror(p->env, &p->status);
9734    return false;
9735  }
9736
9737  return true;
9738}
9739
9740/* There are GCC/Clang built-ins for overflow checking which we could start
9741 * using if there was any performance benefit to it. */
9742
9743static bool checked_add(size_t a, size_t b, size_t *c) {
9744  if (SIZE_MAX - a < b) return false;
9745  *c = a + b;
9746  return true;
9747}
9748
9749static size_t saturating_multiply(size_t a, size_t b) {
9750  /* size_t is unsigned, so this is defined behavior even on overflow. */
9751  size_t ret = a * b;
9752  if (b != 0 && ret / b != a) {
9753    ret = SIZE_MAX;
9754  }
9755  return ret;
9756}
9757
9758
9759/* Base64 decoding ************************************************************/
9760
9761/* TODO(haberman): make this streaming. */
9762
9763static const signed char b64table[] = {
9764  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9765  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9766  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9767  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9768  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9769  -1,      -1,      -1,      62/*+*/, -1,      -1,      -1,      63/*/ */,
9770  52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
9771  60/*8*/, 61/*9*/, -1,      -1,      -1,      -1,      -1,      -1,
9772  -1,       0/*A*/,  1/*B*/,  2/*C*/,  3/*D*/,  4/*E*/,  5/*F*/,  6/*G*/,
9773  07/*H*/,  8/*I*/,  9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
9774  15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
9775  23/*X*/, 24/*Y*/, 25/*Z*/, -1,      -1,      -1,      -1,      -1,
9776  -1,      26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
9777  33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
9778  41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
9779  49/*x*/, 50/*y*/, 51/*z*/, -1,      -1,      -1,      -1,      -1,
9780  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9781  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9782  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9783  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9784  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9785  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9786  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9787  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9788  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9789  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9790  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9791  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9792  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9793  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9794  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9795  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1
9796};
9797
9798/* Returns the table value sign-extended to 32 bits.  Knowing that the upper
9799 * bits will be 1 for unrecognized characters makes it easier to check for
9800 * this error condition later (see below). */
9801int32_t b64lookup(unsigned char ch) { return b64table[ch]; }
9802
9803/* Returns true if the given character is not a valid base64 character or
9804 * padding. */
9805bool nonbase64(unsigned char ch) { return b64lookup(ch) == -1 && ch != '='; }
9806
9807static bool base64_push(upb_json_parser *p, upb_selector_t sel, const char *ptr,
9808                        size_t len) {
9809  const char *limit = ptr + len;
9810  for (; ptr < limit; ptr += 4) {
9811    uint32_t val;
9812    char output[3];
9813
9814    if (limit - ptr < 4) {
9815      upb_status_seterrf(&p->status,
9816                         "Base64 input for bytes field not a multiple of 4: %s",
9817                         upb_fielddef_name(p->top->f));
9818      upb_env_reporterror(p->env, &p->status);
9819      return false;
9820    }
9821
9822    val = b64lookup(ptr[0]) << 18 |
9823          b64lookup(ptr[1]) << 12 |
9824          b64lookup(ptr[2]) << 6  |
9825          b64lookup(ptr[3]);
9826
9827    /* Test the upper bit; returns true if any of the characters returned -1. */
9828    if (val & 0x80000000) {
9829      goto otherchar;
9830    }
9831
9832    output[0] = val >> 16;
9833    output[1] = (val >> 8) & 0xff;
9834    output[2] = val & 0xff;
9835    upb_sink_putstring(&p->top->sink, sel, output, 3, NULL);
9836  }
9837  return true;
9838
9839otherchar:
9840  if (nonbase64(ptr[0]) || nonbase64(ptr[1]) || nonbase64(ptr[2]) ||
9841      nonbase64(ptr[3]) ) {
9842    upb_status_seterrf(&p->status,
9843                       "Non-base64 characters in bytes field: %s",
9844                       upb_fielddef_name(p->top->f));
9845    upb_env_reporterror(p->env, &p->status);
9846    return false;
9847  } if (ptr[2] == '=') {
9848    uint32_t val;
9849    char output;
9850
9851    /* Last group contains only two input bytes, one output byte. */
9852    if (ptr[0] == '=' || ptr[1] == '=' || ptr[3] != '=') {
9853      goto badpadding;
9854    }
9855
9856    val = b64lookup(ptr[0]) << 18 |
9857          b64lookup(ptr[1]) << 12;
9858
9859    assert(!(val & 0x80000000));
9860    output = val >> 16;
9861    upb_sink_putstring(&p->top->sink, sel, &output, 1, NULL);
9862    return true;
9863  } else {
9864    uint32_t val;
9865    char output[2];
9866
9867    /* Last group contains only three input bytes, two output bytes. */
9868    if (ptr[0] == '=' || ptr[1] == '=' || ptr[2] == '=') {
9869      goto badpadding;
9870    }
9871
9872    val = b64lookup(ptr[0]) << 18 |
9873          b64lookup(ptr[1]) << 12 |
9874          b64lookup(ptr[2]) << 6;
9875
9876    output[0] = val >> 16;
9877    output[1] = (val >> 8) & 0xff;
9878    upb_sink_putstring(&p->top->sink, sel, output, 2, NULL);
9879    return true;
9880  }
9881
9882badpadding:
9883  upb_status_seterrf(&p->status,
9884                     "Incorrect base64 padding for field: %s (%.*s)",
9885                     upb_fielddef_name(p->top->f),
9886                     4, ptr);
9887  upb_env_reporterror(p->env, &p->status);
9888  return false;
9889}
9890
9891
9892/* Accumulate buffer **********************************************************/
9893
9894/* Functionality for accumulating a buffer.
9895 *
9896 * Some parts of the parser need an entire value as a contiguous string.  For
9897 * example, to look up a member name in a hash table, or to turn a string into
9898 * a number, the relevant library routines need the input string to be in
9899 * contiguous memory, even if the value spanned two or more buffers in the
9900 * input.  These routines handle that.
9901 *
9902 * In the common case we can just point to the input buffer to get this
9903 * contiguous string and avoid any actual copy.  So we optimistically begin
9904 * this way.  But there are a few cases where we must instead copy into a
9905 * separate buffer:
9906 *
9907 *   1. The string was not contiguous in the input (it spanned buffers).
9908 *
9909 *   2. The string included escape sequences that need to be interpreted to get
9910 *      the true value in a contiguous buffer. */
9911
9912static void assert_accumulate_empty(upb_json_parser *p) {
9913  UPB_UNUSED(p);
9914  assert(p->accumulated == NULL);
9915  assert(p->accumulated_len == 0);
9916}
9917
9918static void accumulate_clear(upb_json_parser *p) {
9919  p->accumulated = NULL;
9920  p->accumulated_len = 0;
9921}
9922
9923/* Used internally by accumulate_append(). */
9924static bool accumulate_realloc(upb_json_parser *p, size_t need) {
9925  void *mem;
9926  size_t old_size = p->accumulate_buf_size;
9927  size_t new_size = UPB_MAX(old_size, 128);
9928  while (new_size < need) {
9929    new_size = saturating_multiply(new_size, 2);
9930  }
9931
9932  mem = upb_env_realloc(p->env, p->accumulate_buf, old_size, new_size);
9933  if (!mem) {
9934    upb_status_seterrmsg(&p->status, "Out of memory allocating buffer.");
9935    upb_env_reporterror(p->env, &p->status);
9936    return false;
9937  }
9938
9939  p->accumulate_buf = mem;
9940  p->accumulate_buf_size = new_size;
9941  return true;
9942}
9943
9944/* Logically appends the given data to the append buffer.
9945 * If "can_alias" is true, we will try to avoid actually copying, but the buffer
9946 * must be valid until the next accumulate_append() call (if any). */
9947static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len,
9948                              bool can_alias) {
9949  size_t need;
9950
9951  if (!p->accumulated && can_alias) {
9952    p->accumulated = buf;
9953    p->accumulated_len = len;
9954    return true;
9955  }
9956
9957  if (!checked_add(p->accumulated_len, len, &need)) {
9958    upb_status_seterrmsg(&p->status, "Integer overflow.");
9959    upb_env_reporterror(p->env, &p->status);
9960    return false;
9961  }
9962
9963  if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) {
9964    return false;
9965  }
9966
9967  if (p->accumulated != p->accumulate_buf) {
9968    memcpy(p->accumulate_buf, p->accumulated, p->accumulated_len);
9969    p->accumulated = p->accumulate_buf;
9970  }
9971
9972  memcpy(p->accumulate_buf + p->accumulated_len, buf, len);
9973  p->accumulated_len += len;
9974  return true;
9975}
9976
9977/* Returns a pointer to the data accumulated since the last accumulate_clear()
9978 * call, and writes the length to *len.  This with point either to the input
9979 * buffer or a temporary accumulate buffer. */
9980static const char *accumulate_getptr(upb_json_parser *p, size_t *len) {
9981  assert(p->accumulated);
9982  *len = p->accumulated_len;
9983  return p->accumulated;
9984}
9985
9986
9987/* Mult-part text data ********************************************************/
9988
9989/* When we have text data in the input, it can often come in multiple segments.
9990 * For example, there may be some raw string data followed by an escape
9991 * sequence.  The two segments are processed with different logic.  Also buffer
9992 * seams in the input can cause multiple segments.
9993 *
9994 * As we see segments, there are two main cases for how we want to process them:
9995 *
9996 *  1. we want to push the captured input directly to string handlers.
9997 *
9998 *  2. we need to accumulate all the parts into a contiguous buffer for further
9999 *     processing (field name lookup, string->number conversion, etc). */
10000
10001/* This is the set of states for p->multipart_state. */
10002enum {
10003  /* We are not currently processing multipart data. */
10004  MULTIPART_INACTIVE = 0,
10005
10006  /* We are processing multipart data by accumulating it into a contiguous
10007   * buffer. */
10008  MULTIPART_ACCUMULATE = 1,
10009
10010  /* We are processing multipart data by pushing each part directly to the
10011   * current string handlers. */
10012  MULTIPART_PUSHEAGERLY = 2
10013};
10014
10015/* Start a multi-part text value where we accumulate the data for processing at
10016 * the end. */
10017static void multipart_startaccum(upb_json_parser *p) {
10018  assert_accumulate_empty(p);
10019  assert(p->multipart_state == MULTIPART_INACTIVE);
10020  p->multipart_state = MULTIPART_ACCUMULATE;
10021}
10022
10023/* Start a multi-part text value where we immediately push text data to a string
10024 * value with the given selector. */
10025static void multipart_start(upb_json_parser *p, upb_selector_t sel) {
10026  assert_accumulate_empty(p);
10027  assert(p->multipart_state == MULTIPART_INACTIVE);
10028  p->multipart_state = MULTIPART_PUSHEAGERLY;
10029  p->string_selector = sel;
10030}
10031
10032static bool multipart_text(upb_json_parser *p, const char *buf, size_t len,
10033                           bool can_alias) {
10034  switch (p->multipart_state) {
10035    case MULTIPART_INACTIVE:
10036      upb_status_seterrmsg(
10037          &p->status, "Internal error: unexpected state MULTIPART_INACTIVE");
10038      upb_env_reporterror(p->env, &p->status);
10039      return false;
10040
10041    case MULTIPART_ACCUMULATE:
10042      if (!accumulate_append(p, buf, len, can_alias)) {
10043        return false;
10044      }
10045      break;
10046
10047    case MULTIPART_PUSHEAGERLY: {
10048      const upb_bufhandle *handle = can_alias ? p->handle : NULL;
10049      upb_sink_putstring(&p->top->sink, p->string_selector, buf, len, handle);
10050      break;
10051    }
10052  }
10053
10054  return true;
10055}
10056
10057/* Note: this invalidates the accumulate buffer!  Call only after reading its
10058 * contents. */
10059static void multipart_end(upb_json_parser *p) {
10060  assert(p->multipart_state != MULTIPART_INACTIVE);
10061  p->multipart_state = MULTIPART_INACTIVE;
10062  accumulate_clear(p);
10063}
10064
10065
10066/* Input capture **************************************************************/
10067
10068/* Functionality for capturing a region of the input as text.  Gracefully
10069 * handles the case where a buffer seam occurs in the middle of the captured
10070 * region. */
10071
10072static void capture_begin(upb_json_parser *p, const char *ptr) {
10073  assert(p->multipart_state != MULTIPART_INACTIVE);
10074  assert(p->capture == NULL);
10075  p->capture = ptr;
10076}
10077
10078static bool capture_end(upb_json_parser *p, const char *ptr) {
10079  assert(p->capture);
10080  if (multipart_text(p, p->capture, ptr - p->capture, true)) {
10081    p->capture = NULL;
10082    return true;
10083  } else {
10084    return false;
10085  }
10086}
10087
10088/* This is called at the end of each input buffer (ie. when we have hit a
10089 * buffer seam).  If we are in the middle of capturing the input, this
10090 * processes the unprocessed capture region. */
10091static void capture_suspend(upb_json_parser *p, const char **ptr) {
10092  if (!p->capture) return;
10093
10094  if (multipart_text(p, p->capture, *ptr - p->capture, false)) {
10095    /* We use this as a signal that we were in the middle of capturing, and
10096     * that capturing should resume at the beginning of the next buffer.
10097     *
10098     * We can't use *ptr here, because we have no guarantee that this pointer
10099     * will be valid when we resume (if the underlying memory is freed, then
10100     * using the pointer at all, even to compare to NULL, is likely undefined
10101     * behavior). */
10102    p->capture = &suspend_capture;
10103  } else {
10104    /* Need to back up the pointer to the beginning of the capture, since
10105     * we were not able to actually preserve it. */
10106    *ptr = p->capture;
10107  }
10108}
10109
10110static void capture_resume(upb_json_parser *p, const char *ptr) {
10111  if (p->capture) {
10112    assert(p->capture == &suspend_capture);
10113    p->capture = ptr;
10114  }
10115}
10116
10117
10118/* Callbacks from the parser **************************************************/
10119
10120/* These are the functions called directly from the parser itself.
10121 * We define these in the same order as their declarations in the parser. */
10122
10123static char escape_char(char in) {
10124  switch (in) {
10125    case 'r': return '\r';
10126    case 't': return '\t';
10127    case 'n': return '\n';
10128    case 'f': return '\f';
10129    case 'b': return '\b';
10130    case '/': return '/';
10131    case '"': return '"';
10132    case '\\': return '\\';
10133    default:
10134      assert(0);
10135      return 'x';
10136  }
10137}
10138
10139static bool escape(upb_json_parser *p, const char *ptr) {
10140  char ch = escape_char(*ptr);
10141  return multipart_text(p, &ch, 1, false);
10142}
10143
10144static void start_hex(upb_json_parser *p) {
10145  p->digit = 0;
10146}
10147
10148static void hexdigit(upb_json_parser *p, const char *ptr) {
10149  char ch = *ptr;
10150
10151  p->digit <<= 4;
10152
10153  if (ch >= '0' && ch <= '9') {
10154    p->digit += (ch - '0');
10155  } else if (ch >= 'a' && ch <= 'f') {
10156    p->digit += ((ch - 'a') + 10);
10157  } else {
10158    assert(ch >= 'A' && ch <= 'F');
10159    p->digit += ((ch - 'A') + 10);
10160  }
10161}
10162
10163static bool end_hex(upb_json_parser *p) {
10164  uint32_t codepoint = p->digit;
10165
10166  /* emit the codepoint as UTF-8. */
10167  char utf8[3]; /* support \u0000 -- \uFFFF -- need only three bytes. */
10168  int length = 0;
10169  if (codepoint <= 0x7F) {
10170    utf8[0] = codepoint;
10171    length = 1;
10172  } else if (codepoint <= 0x07FF) {
10173    utf8[1] = (codepoint & 0x3F) | 0x80;
10174    codepoint >>= 6;
10175    utf8[0] = (codepoint & 0x1F) | 0xC0;
10176    length = 2;
10177  } else /* codepoint <= 0xFFFF */ {
10178    utf8[2] = (codepoint & 0x3F) | 0x80;
10179    codepoint >>= 6;
10180    utf8[1] = (codepoint & 0x3F) | 0x80;
10181    codepoint >>= 6;
10182    utf8[0] = (codepoint & 0x0F) | 0xE0;
10183    length = 3;
10184  }
10185  /* TODO(haberman): Handle high surrogates: if codepoint is a high surrogate
10186   * we have to wait for the next escape to get the full code point). */
10187
10188  return multipart_text(p, utf8, length, false);
10189}
10190
10191static void start_text(upb_json_parser *p, const char *ptr) {
10192  capture_begin(p, ptr);
10193}
10194
10195static bool end_text(upb_json_parser *p, const char *ptr) {
10196  return capture_end(p, ptr);
10197}
10198
10199static void start_number(upb_json_parser *p, const char *ptr) {
10200  multipart_startaccum(p);
10201  capture_begin(p, ptr);
10202}
10203
10204static bool parse_number(upb_json_parser *p);
10205
10206static bool end_number(upb_json_parser *p, const char *ptr) {
10207  if (!capture_end(p, ptr)) {
10208    return false;
10209  }
10210
10211  return parse_number(p);
10212}
10213
10214static bool parse_number(upb_json_parser *p) {
10215  size_t len;
10216  const char *buf;
10217  const char *myend;
10218  char *end;
10219
10220  /* strtol() and friends unfortunately do not support specifying the length of
10221   * the input string, so we need to force a copy into a NULL-terminated buffer. */
10222  if (!multipart_text(p, "\0", 1, false)) {
10223    return false;
10224  }
10225
10226  buf = accumulate_getptr(p, &len);
10227  myend = buf + len - 1;  /* One for NULL. */
10228
10229  /* XXX: We are using strtol to parse integers, but this is wrong as even
10230   * integers can be represented as 1e6 (for example), which strtol can't
10231   * handle correctly.
10232   *
10233   * XXX: Also, we can't handle large integers properly because strto[u]ll
10234   * isn't in C89.
10235   *
10236   * XXX: Also, we don't properly check floats for overflow, since strtof
10237   * isn't in C89. */
10238  switch (upb_fielddef_type(p->top->f)) {
10239    case UPB_TYPE_ENUM:
10240    case UPB_TYPE_INT32: {
10241      long val = strtol(p->accumulated, &end, 0);
10242      if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || end != myend)
10243        goto err;
10244      else
10245        upb_sink_putint32(&p->top->sink, parser_getsel(p), val);
10246      break;
10247    }
10248    case UPB_TYPE_INT64: {
10249      long long val = strtol(p->accumulated, &end, 0);
10250      if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || end != myend)
10251        goto err;
10252      else
10253        upb_sink_putint64(&p->top->sink, parser_getsel(p), val);
10254      break;
10255    }
10256    case UPB_TYPE_UINT32: {
10257      unsigned long val = strtoul(p->accumulated, &end, 0);
10258      if (val > UINT32_MAX || errno == ERANGE || end != myend)
10259        goto err;
10260      else
10261        upb_sink_putuint32(&p->top->sink, parser_getsel(p), val);
10262      break;
10263    }
10264    case UPB_TYPE_UINT64: {
10265      unsigned long long val = strtoul(p->accumulated, &end, 0);
10266      if (val > UINT64_MAX || errno == ERANGE || end != myend)
10267        goto err;
10268      else
10269        upb_sink_putuint64(&p->top->sink, parser_getsel(p), val);
10270      break;
10271    }
10272    case UPB_TYPE_DOUBLE: {
10273      double val = strtod(p->accumulated, &end);
10274      if (errno == ERANGE || end != myend)
10275        goto err;
10276      else
10277        upb_sink_putdouble(&p->top->sink, parser_getsel(p), val);
10278      break;
10279    }
10280    case UPB_TYPE_FLOAT: {
10281      float val = strtod(p->accumulated, &end);
10282      if (errno == ERANGE || end != myend)
10283        goto err;
10284      else
10285        upb_sink_putfloat(&p->top->sink, parser_getsel(p), val);
10286      break;
10287    }
10288    default:
10289      assert(false);
10290  }
10291
10292  multipart_end(p);
10293
10294  return true;
10295
10296err:
10297  upb_status_seterrf(&p->status, "error parsing number: %s", buf);
10298  upb_env_reporterror(p->env, &p->status);
10299  multipart_end(p);
10300  return false;
10301}
10302
10303static bool parser_putbool(upb_json_parser *p, bool val) {
10304  bool ok;
10305
10306  if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) {
10307    upb_status_seterrf(&p->status,
10308                       "Boolean value specified for non-bool field: %s",
10309                       upb_fielddef_name(p->top->f));
10310    upb_env_reporterror(p->env, &p->status);
10311    return false;
10312  }
10313
10314  ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val);
10315  UPB_ASSERT_VAR(ok, ok);
10316
10317  return true;
10318}
10319
10320static bool start_stringval(upb_json_parser *p) {
10321  assert(p->top->f);
10322
10323  if (upb_fielddef_isstring(p->top->f)) {
10324    upb_jsonparser_frame *inner;
10325    upb_selector_t sel;
10326
10327    if (!check_stack(p)) return false;
10328
10329    /* Start a new parser frame: parser frames correspond one-to-one with
10330     * handler frames, and string events occur in a sub-frame. */
10331    inner = p->top + 1;
10332    sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
10333    upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
10334    inner->m = p->top->m;
10335    inner->f = p->top->f;
10336    inner->is_map = false;
10337    inner->is_mapentry = false;
10338    p->top = inner;
10339
10340    if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) {
10341      /* For STRING fields we push data directly to the handlers as it is
10342       * parsed.  We don't do this yet for BYTES fields, because our base64
10343       * decoder is not streaming.
10344       *
10345       * TODO(haberman): make base64 decoding streaming also. */
10346      multipart_start(p, getsel_for_handlertype(p, UPB_HANDLER_STRING));
10347      return true;
10348    } else {
10349      multipart_startaccum(p);
10350      return true;
10351    }
10352  } else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) {
10353    /* No need to push a frame -- symbolic enum names in quotes remain in the
10354     * current parser frame.
10355     *
10356     * Enum string values must accumulate so we can look up the value in a table
10357     * once it is complete. */
10358    multipart_startaccum(p);
10359    return true;
10360  } else {
10361    upb_status_seterrf(&p->status,
10362                       "String specified for non-string/non-enum field: %s",
10363                       upb_fielddef_name(p->top->f));
10364    upb_env_reporterror(p->env, &p->status);
10365    return false;
10366  }
10367}
10368
10369static bool end_stringval(upb_json_parser *p) {
10370  bool ok = true;
10371
10372  switch (upb_fielddef_type(p->top->f)) {
10373    case UPB_TYPE_BYTES:
10374      if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING),
10375                       p->accumulated, p->accumulated_len)) {
10376        return false;
10377      }
10378      /* Fall through. */
10379
10380    case UPB_TYPE_STRING: {
10381      upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
10382      upb_sink_endstr(&p->top->sink, sel);
10383      p->top--;
10384      break;
10385    }
10386
10387    case UPB_TYPE_ENUM: {
10388      /* Resolve enum symbolic name to integer value. */
10389      const upb_enumdef *enumdef =
10390          (const upb_enumdef*)upb_fielddef_subdef(p->top->f);
10391
10392      size_t len;
10393      const char *buf = accumulate_getptr(p, &len);
10394
10395      int32_t int_val = 0;
10396      ok = upb_enumdef_ntoi(enumdef, buf, len, &int_val);
10397
10398      if (ok) {
10399        upb_selector_t sel = parser_getsel(p);
10400        upb_sink_putint32(&p->top->sink, sel, int_val);
10401      } else {
10402        upb_status_seterrf(&p->status, "Enum value unknown: '%.*s'", len, buf);
10403        upb_env_reporterror(p->env, &p->status);
10404      }
10405
10406      break;
10407    }
10408
10409    default:
10410      assert(false);
10411      upb_status_seterrmsg(&p->status, "Internal error in JSON decoder");
10412      upb_env_reporterror(p->env, &p->status);
10413      ok = false;
10414      break;
10415  }
10416
10417  multipart_end(p);
10418
10419  return ok;
10420}
10421
10422static void start_member(upb_json_parser *p) {
10423  assert(!p->top->f);
10424  multipart_startaccum(p);
10425}
10426
10427/* Helper: invoked during parse_mapentry() to emit the mapentry message's key
10428 * field based on the current contents of the accumulate buffer. */
10429static bool parse_mapentry_key(upb_json_parser *p) {
10430
10431  size_t len;
10432  const char *buf = accumulate_getptr(p, &len);
10433
10434  /* Emit the key field. We do a bit of ad-hoc parsing here because the
10435   * parser state machine has already decided that this is a string field
10436   * name, and we are reinterpreting it as some arbitrary key type. In
10437   * particular, integer and bool keys are quoted, so we need to parse the
10438   * quoted string contents here. */
10439
10440  p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY);
10441  if (p->top->f == NULL) {
10442    upb_status_seterrmsg(&p->status, "mapentry message has no key");
10443    upb_env_reporterror(p->env, &p->status);
10444    return false;
10445  }
10446  switch (upb_fielddef_type(p->top->f)) {
10447    case UPB_TYPE_INT32:
10448    case UPB_TYPE_INT64:
10449    case UPB_TYPE_UINT32:
10450    case UPB_TYPE_UINT64:
10451      /* Invoke end_number. The accum buffer has the number's text already. */
10452      if (!parse_number(p)) {
10453        return false;
10454      }
10455      break;
10456    case UPB_TYPE_BOOL:
10457      if (len == 4 && !strncmp(buf, "true", 4)) {
10458        if (!parser_putbool(p, true)) {
10459          return false;
10460        }
10461      } else if (len == 5 && !strncmp(buf, "false", 5)) {
10462        if (!parser_putbool(p, false)) {
10463          return false;
10464        }
10465      } else {
10466        upb_status_seterrmsg(&p->status,
10467                             "Map bool key not 'true' or 'false'");
10468        upb_env_reporterror(p->env, &p->status);
10469        return false;
10470      }
10471      multipart_end(p);
10472      break;
10473    case UPB_TYPE_STRING:
10474    case UPB_TYPE_BYTES: {
10475      upb_sink subsink;
10476      upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
10477      upb_sink_startstr(&p->top->sink, sel, len, &subsink);
10478      sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
10479      upb_sink_putstring(&subsink, sel, buf, len, NULL);
10480      sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
10481      upb_sink_endstr(&subsink, sel);
10482      multipart_end(p);
10483      break;
10484    }
10485    default:
10486      upb_status_seterrmsg(&p->status, "Invalid field type for map key");
10487      upb_env_reporterror(p->env, &p->status);
10488      return false;
10489  }
10490
10491  return true;
10492}
10493
10494/* Helper: emit one map entry (as a submessage in the map field sequence). This
10495 * is invoked from end_membername(), at the end of the map entry's key string,
10496 * with the map key in the accumulate buffer. It parses the key from that
10497 * buffer, emits the handler calls to start the mapentry submessage (setting up
10498 * its subframe in the process), and sets up state in the subframe so that the
10499 * value parser (invoked next) will emit the mapentry's value field and then
10500 * end the mapentry message. */
10501
10502static bool handle_mapentry(upb_json_parser *p) {
10503  const upb_fielddef *mapfield;
10504  const upb_msgdef *mapentrymsg;
10505  upb_jsonparser_frame *inner;
10506  upb_selector_t sel;
10507
10508  /* Map entry: p->top->sink is the seq frame, so we need to start a frame
10509   * for the mapentry itself, and then set |f| in that frame so that the map
10510   * value field is parsed, and also set a flag to end the frame after the
10511   * map-entry value is parsed. */
10512  if (!check_stack(p)) return false;
10513
10514  mapfield = p->top->mapfield;
10515  mapentrymsg = upb_fielddef_msgsubdef(mapfield);
10516
10517  inner = p->top + 1;
10518  p->top->f = mapfield;
10519  sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
10520  upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
10521  inner->m = mapentrymsg;
10522  inner->mapfield = mapfield;
10523  inner->is_map = false;
10524
10525  /* Don't set this to true *yet* -- we reuse parsing handlers below to push
10526   * the key field value to the sink, and these handlers will pop the frame
10527   * if they see is_mapentry (when invoked by the parser state machine, they
10528   * would have just seen the map-entry value, not key). */
10529  inner->is_mapentry = false;
10530  p->top = inner;
10531
10532  /* send STARTMSG in submsg frame. */
10533  upb_sink_startmsg(&p->top->sink);
10534
10535  parse_mapentry_key(p);
10536
10537  /* Set up the value field to receive the map-entry value. */
10538  p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE);
10539  p->top->is_mapentry = true;  /* set up to pop frame after value is parsed. */
10540  p->top->mapfield = mapfield;
10541  if (p->top->f == NULL) {
10542    upb_status_seterrmsg(&p->status, "mapentry message has no value");
10543    upb_env_reporterror(p->env, &p->status);
10544    return false;
10545  }
10546
10547  return true;
10548}
10549
10550static bool end_membername(upb_json_parser *p) {
10551  assert(!p->top->f);
10552
10553  if (p->top->is_map) {
10554    return handle_mapentry(p);
10555  } else {
10556    size_t len;
10557    const char *buf = accumulate_getptr(p, &len);
10558    const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len);
10559
10560    if (!f) {
10561      /* TODO(haberman): Ignore unknown fields if requested/configured to do
10562       * so. */
10563      upb_status_seterrf(&p->status, "No such field: %.*s\n", (int)len, buf);
10564      upb_env_reporterror(p->env, &p->status);
10565      return false;
10566    }
10567
10568    p->top->f = f;
10569    multipart_end(p);
10570
10571    return true;
10572  }
10573}
10574
10575static void end_member(upb_json_parser *p) {
10576  /* If we just parsed a map-entry value, end that frame too. */
10577  if (p->top->is_mapentry) {
10578    upb_status s = UPB_STATUS_INIT;
10579    upb_selector_t sel;
10580    bool ok;
10581    const upb_fielddef *mapfield;
10582
10583    assert(p->top > p->stack);
10584    /* send ENDMSG on submsg. */
10585    upb_sink_endmsg(&p->top->sink, &s);
10586    mapfield = p->top->mapfield;
10587
10588    /* send ENDSUBMSG in repeated-field-of-mapentries frame. */
10589    p->top--;
10590    ok = upb_handlers_getselector(mapfield, UPB_HANDLER_ENDSUBMSG, &sel);
10591    UPB_ASSERT_VAR(ok, ok);
10592    upb_sink_endsubmsg(&p->top->sink, sel);
10593  }
10594
10595  p->top->f = NULL;
10596}
10597
10598static bool start_subobject(upb_json_parser *p) {
10599  assert(p->top->f);
10600
10601  if (upb_fielddef_ismap(p->top->f)) {
10602    upb_jsonparser_frame *inner;
10603    upb_selector_t sel;
10604
10605    /* Beginning of a map. Start a new parser frame in a repeated-field
10606     * context. */
10607    if (!check_stack(p)) return false;
10608
10609    inner = p->top + 1;
10610    sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
10611    upb_sink_startseq(&p->top->sink, sel, &inner->sink);
10612    inner->m = upb_fielddef_msgsubdef(p->top->f);
10613    inner->mapfield = p->top->f;
10614    inner->f = NULL;
10615    inner->is_map = true;
10616    inner->is_mapentry = false;
10617    p->top = inner;
10618
10619    return true;
10620  } else if (upb_fielddef_issubmsg(p->top->f)) {
10621    upb_jsonparser_frame *inner;
10622    upb_selector_t sel;
10623
10624    /* Beginning of a subobject. Start a new parser frame in the submsg
10625     * context. */
10626    if (!check_stack(p)) return false;
10627
10628    inner = p->top + 1;
10629
10630    sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
10631    upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
10632    inner->m = upb_fielddef_msgsubdef(p->top->f);
10633    inner->f = NULL;
10634    inner->is_map = false;
10635    inner->is_mapentry = false;
10636    p->top = inner;
10637
10638    return true;
10639  } else {
10640    upb_status_seterrf(&p->status,
10641                       "Object specified for non-message/group field: %s",
10642                       upb_fielddef_name(p->top->f));
10643    upb_env_reporterror(p->env, &p->status);
10644    return false;
10645  }
10646}
10647
10648static void end_subobject(upb_json_parser *p) {
10649  if (p->top->is_map) {
10650    upb_selector_t sel;
10651    p->top--;
10652    sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
10653    upb_sink_endseq(&p->top->sink, sel);
10654  } else {
10655    upb_selector_t sel;
10656    p->top--;
10657    sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
10658    upb_sink_endsubmsg(&p->top->sink, sel);
10659  }
10660}
10661
10662static bool start_array(upb_json_parser *p) {
10663  upb_jsonparser_frame *inner;
10664  upb_selector_t sel;
10665
10666  assert(p->top->f);
10667
10668  if (!upb_fielddef_isseq(p->top->f)) {
10669    upb_status_seterrf(&p->status,
10670                       "Array specified for non-repeated field: %s",
10671                       upb_fielddef_name(p->top->f));
10672    upb_env_reporterror(p->env, &p->status);
10673    return false;
10674  }
10675
10676  if (!check_stack(p)) return false;
10677
10678  inner = p->top + 1;
10679  sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
10680  upb_sink_startseq(&p->top->sink, sel, &inner->sink);
10681  inner->m = p->top->m;
10682  inner->f = p->top->f;
10683  inner->is_map = false;
10684  inner->is_mapentry = false;
10685  p->top = inner;
10686
10687  return true;
10688}
10689
10690static void end_array(upb_json_parser *p) {
10691  upb_selector_t sel;
10692
10693  assert(p->top > p->stack);
10694
10695  p->top--;
10696  sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
10697  upb_sink_endseq(&p->top->sink, sel);
10698}
10699
10700static void start_object(upb_json_parser *p) {
10701  if (!p->top->is_map) {
10702    upb_sink_startmsg(&p->top->sink);
10703  }
10704}
10705
10706static void end_object(upb_json_parser *p) {
10707  if (!p->top->is_map) {
10708    upb_status status;
10709    upb_status_clear(&status);
10710    upb_sink_endmsg(&p->top->sink, &status);
10711    if (!upb_ok(&status)) {
10712      upb_env_reporterror(p->env, &status);
10713    }
10714  }
10715}
10716
10717
10718#define CHECK_RETURN_TOP(x) if (!(x)) goto error
10719
10720
10721/* The actual parser **********************************************************/
10722
10723/* What follows is the Ragel parser itself.  The language is specified in Ragel
10724 * and the actions call our C functions above.
10725 *
10726 * Ragel has an extensive set of functionality, and we use only a small part of
10727 * it.  There are many action types but we only use a few:
10728 *
10729 *   ">" -- transition into a machine
10730 *   "%" -- transition out of a machine
10731 *   "@" -- transition into a final state of a machine.
10732 *
10733 * "@" transitions are tricky because a machine can transition into a final
10734 * state repeatedly.  But in some cases we know this can't happen, for example
10735 * a string which is delimited by a final '"' can only transition into its
10736 * final state once, when the closing '"' is seen. */
10737
10738
10739#line 1218 "upb/json/parser.rl"
10740
10741
10742
10743#line 1130 "upb/json/parser.c"
10744static const char _json_actions[] = {
10745	0, 1, 0, 1, 2, 1, 3, 1,
10746	5, 1, 6, 1, 7, 1, 8, 1,
10747	10, 1, 12, 1, 13, 1, 14, 1,
10748	15, 1, 16, 1, 17, 1, 21, 1,
10749	25, 1, 27, 2, 3, 8, 2, 4,
10750	5, 2, 6, 2, 2, 6, 8, 2,
10751	11, 9, 2, 13, 15, 2, 14, 15,
10752	2, 18, 1, 2, 19, 27, 2, 20,
10753	9, 2, 22, 27, 2, 23, 27, 2,
10754	24, 27, 2, 26, 27, 3, 14, 11,
10755	9
10756};
10757
10758static const unsigned char _json_key_offsets[] = {
10759	0, 0, 4, 9, 14, 15, 19, 24,
10760	29, 34, 38, 42, 45, 48, 50, 54,
10761	58, 60, 62, 67, 69, 71, 80, 86,
10762	92, 98, 104, 106, 115, 116, 116, 116,
10763	121, 126, 131, 132, 133, 134, 135, 135,
10764	136, 137, 138, 138, 139, 140, 141, 141,
10765	146, 151, 152, 156, 161, 166, 171, 175,
10766	175, 178, 178, 178
10767};
10768
10769static const char _json_trans_keys[] = {
10770	32, 123, 9, 13, 32, 34, 125, 9,
10771	13, 32, 34, 125, 9, 13, 34, 32,
10772	58, 9, 13, 32, 93, 125, 9, 13,
10773	32, 44, 125, 9, 13, 32, 44, 125,
10774	9, 13, 32, 34, 9, 13, 45, 48,
10775	49, 57, 48, 49, 57, 46, 69, 101,
10776	48, 57, 69, 101, 48, 57, 43, 45,
10777	48, 57, 48, 57, 48, 57, 46, 69,
10778	101, 48, 57, 34, 92, 34, 92, 34,
10779	47, 92, 98, 102, 110, 114, 116, 117,
10780	48, 57, 65, 70, 97, 102, 48, 57,
10781	65, 70, 97, 102, 48, 57, 65, 70,
10782	97, 102, 48, 57, 65, 70, 97, 102,
10783	34, 92, 34, 45, 91, 102, 110, 116,
10784	123, 48, 57, 34, 32, 93, 125, 9,
10785	13, 32, 44, 93, 9, 13, 32, 93,
10786	125, 9, 13, 97, 108, 115, 101, 117,
10787	108, 108, 114, 117, 101, 32, 34, 125,
10788	9, 13, 32, 34, 125, 9, 13, 34,
10789	32, 58, 9, 13, 32, 93, 125, 9,
10790	13, 32, 44, 125, 9, 13, 32, 44,
10791	125, 9, 13, 32, 34, 9, 13, 32,
10792	9, 13, 0
10793};
10794
10795static const char _json_single_lengths[] = {
10796	0, 2, 3, 3, 1, 2, 3, 3,
10797	3, 2, 2, 1, 3, 0, 2, 2,
10798	0, 0, 3, 2, 2, 9, 0, 0,
10799	0, 0, 2, 7, 1, 0, 0, 3,
10800	3, 3, 1, 1, 1, 1, 0, 1,
10801	1, 1, 0, 1, 1, 1, 0, 3,
10802	3, 1, 2, 3, 3, 3, 2, 0,
10803	1, 0, 0, 0
10804};
10805
10806static const char _json_range_lengths[] = {
10807	0, 1, 1, 1, 0, 1, 1, 1,
10808	1, 1, 1, 1, 0, 1, 1, 1,
10809	1, 1, 1, 0, 0, 0, 3, 3,
10810	3, 3, 0, 1, 0, 0, 0, 1,
10811	1, 1, 0, 0, 0, 0, 0, 0,
10812	0, 0, 0, 0, 0, 0, 0, 1,
10813	1, 0, 1, 1, 1, 1, 1, 0,
10814	1, 0, 0, 0
10815};
10816
10817static const short _json_index_offsets[] = {
10818	0, 0, 4, 9, 14, 16, 20, 25,
10819	30, 35, 39, 43, 46, 50, 52, 56,
10820	60, 62, 64, 69, 72, 75, 85, 89,
10821	93, 97, 101, 104, 113, 115, 116, 117,
10822	122, 127, 132, 134, 136, 138, 140, 141,
10823	143, 145, 147, 148, 150, 152, 154, 155,
10824	160, 165, 167, 171, 176, 181, 186, 190,
10825	191, 194, 195, 196
10826};
10827
10828static const char _json_indicies[] = {
10829	0, 2, 0, 1, 3, 4, 5, 3,
10830	1, 6, 7, 8, 6, 1, 9, 1,
10831	10, 11, 10, 1, 11, 1, 1, 11,
10832	12, 13, 14, 15, 13, 1, 16, 17,
10833	8, 16, 1, 17, 7, 17, 1, 18,
10834	19, 20, 1, 19, 20, 1, 22, 23,
10835	23, 21, 24, 1, 23, 23, 24, 21,
10836	25, 25, 26, 1, 26, 1, 26, 21,
10837	22, 23, 23, 20, 21, 28, 29, 27,
10838	31, 32, 30, 33, 33, 33, 33, 33,
10839	33, 33, 33, 34, 1, 35, 35, 35,
10840	1, 36, 36, 36, 1, 37, 37, 37,
10841	1, 38, 38, 38, 1, 40, 41, 39,
10842	42, 43, 44, 45, 46, 47, 48, 43,
10843	1, 49, 1, 50, 51, 53, 54, 1,
10844	53, 52, 55, 56, 54, 55, 1, 56,
10845	1, 1, 56, 52, 57, 1, 58, 1,
10846	59, 1, 60, 1, 61, 62, 1, 63,
10847	1, 64, 1, 65, 66, 1, 67, 1,
10848	68, 1, 69, 70, 71, 72, 70, 1,
10849	73, 74, 75, 73, 1, 76, 1, 77,
10850	78, 77, 1, 78, 1, 1, 78, 79,
10851	80, 81, 82, 80, 1, 83, 84, 75,
10852	83, 1, 84, 74, 84, 1, 85, 86,
10853	86, 1, 1, 1, 1, 0
10854};
10855
10856static const char _json_trans_targs[] = {
10857	1, 0, 2, 3, 4, 56, 3, 4,
10858	56, 5, 5, 6, 7, 8, 9, 56,
10859	8, 9, 11, 12, 18, 57, 13, 15,
10860	14, 16, 17, 20, 58, 21, 20, 58,
10861	21, 19, 22, 23, 24, 25, 26, 20,
10862	58, 21, 28, 30, 31, 34, 39, 43,
10863	47, 29, 59, 59, 32, 31, 29, 32,
10864	33, 35, 36, 37, 38, 59, 40, 41,
10865	42, 59, 44, 45, 46, 59, 48, 49,
10866	55, 48, 49, 55, 50, 50, 51, 52,
10867	53, 54, 55, 53, 54, 59, 56
10868};
10869
10870static const char _json_trans_actions[] = {
10871	0, 0, 0, 21, 77, 53, 0, 47,
10872	23, 17, 0, 0, 15, 19, 19, 50,
10873	0, 0, 0, 0, 0, 1, 0, 0,
10874	0, 0, 0, 3, 13, 0, 0, 35,
10875	5, 11, 0, 38, 7, 7, 7, 41,
10876	44, 9, 62, 56, 25, 0, 0, 0,
10877	31, 29, 33, 59, 15, 0, 27, 0,
10878	0, 0, 0, 0, 0, 68, 0, 0,
10879	0, 71, 0, 0, 0, 65, 21, 77,
10880	53, 0, 47, 23, 17, 0, 0, 15,
10881	19, 19, 50, 0, 0, 74, 0
10882};
10883
10884static const int json_start = 1;
10885
10886static const int json_en_number_machine = 10;
10887static const int json_en_string_machine = 19;
10888static const int json_en_value_machine = 27;
10889static const int json_en_main = 1;
10890
10891
10892#line 1221 "upb/json/parser.rl"
10893
10894size_t parse(void *closure, const void *hd, const char *buf, size_t size,
10895             const upb_bufhandle *handle) {
10896  upb_json_parser *parser = closure;
10897
10898  /* Variables used by Ragel's generated code. */
10899  int cs = parser->current_state;
10900  int *stack = parser->parser_stack;
10901  int top = parser->parser_top;
10902
10903  const char *p = buf;
10904  const char *pe = buf + size;
10905
10906  parser->handle = handle;
10907
10908  UPB_UNUSED(hd);
10909  UPB_UNUSED(handle);
10910
10911  capture_resume(parser, buf);
10912
10913
10914#line 1301 "upb/json/parser.c"
10915	{
10916	int _klen;
10917	unsigned int _trans;
10918	const char *_acts;
10919	unsigned int _nacts;
10920	const char *_keys;
10921
10922	if ( p == pe )
10923		goto _test_eof;
10924	if ( cs == 0 )
10925		goto _out;
10926_resume:
10927	_keys = _json_trans_keys + _json_key_offsets[cs];
10928	_trans = _json_index_offsets[cs];
10929
10930	_klen = _json_single_lengths[cs];
10931	if ( _klen > 0 ) {
10932		const char *_lower = _keys;
10933		const char *_mid;
10934		const char *_upper = _keys + _klen - 1;
10935		while (1) {
10936			if ( _upper < _lower )
10937				break;
10938
10939			_mid = _lower + ((_upper-_lower) >> 1);
10940			if ( (*p) < *_mid )
10941				_upper = _mid - 1;
10942			else if ( (*p) > *_mid )
10943				_lower = _mid + 1;
10944			else {
10945				_trans += (unsigned int)(_mid - _keys);
10946				goto _match;
10947			}
10948		}
10949		_keys += _klen;
10950		_trans += _klen;
10951	}
10952
10953	_klen = _json_range_lengths[cs];
10954	if ( _klen > 0 ) {
10955		const char *_lower = _keys;
10956		const char *_mid;
10957		const char *_upper = _keys + (_klen<<1) - 2;
10958		while (1) {
10959			if ( _upper < _lower )
10960				break;
10961
10962			_mid = _lower + (((_upper-_lower) >> 1) & ~1);
10963			if ( (*p) < _mid[0] )
10964				_upper = _mid - 2;
10965			else if ( (*p) > _mid[1] )
10966				_lower = _mid + 2;
10967			else {
10968				_trans += (unsigned int)((_mid - _keys)>>1);
10969				goto _match;
10970			}
10971		}
10972		_trans += _klen;
10973	}
10974
10975_match:
10976	_trans = _json_indicies[_trans];
10977	cs = _json_trans_targs[_trans];
10978
10979	if ( _json_trans_actions[_trans] == 0 )
10980		goto _again;
10981
10982	_acts = _json_actions + _json_trans_actions[_trans];
10983	_nacts = (unsigned int) *_acts++;
10984	while ( _nacts-- > 0 )
10985	{
10986		switch ( *_acts++ )
10987		{
10988	case 0:
10989#line 1133 "upb/json/parser.rl"
10990	{ p--; {cs = stack[--top]; goto _again;} }
10991	break;
10992	case 1:
10993#line 1134 "upb/json/parser.rl"
10994	{ p--; {stack[top++] = cs; cs = 10; goto _again;} }
10995	break;
10996	case 2:
10997#line 1138 "upb/json/parser.rl"
10998	{ start_text(parser, p); }
10999	break;
11000	case 3:
11001#line 1139 "upb/json/parser.rl"
11002	{ CHECK_RETURN_TOP(end_text(parser, p)); }
11003	break;
11004	case 4:
11005#line 1145 "upb/json/parser.rl"
11006	{ start_hex(parser); }
11007	break;
11008	case 5:
11009#line 1146 "upb/json/parser.rl"
11010	{ hexdigit(parser, p); }
11011	break;
11012	case 6:
11013#line 1147 "upb/json/parser.rl"
11014	{ CHECK_RETURN_TOP(end_hex(parser)); }
11015	break;
11016	case 7:
11017#line 1153 "upb/json/parser.rl"
11018	{ CHECK_RETURN_TOP(escape(parser, p)); }
11019	break;
11020	case 8:
11021#line 1159 "upb/json/parser.rl"
11022	{ p--; {cs = stack[--top]; goto _again;} }
11023	break;
11024	case 9:
11025#line 1162 "upb/json/parser.rl"
11026	{ {stack[top++] = cs; cs = 19; goto _again;} }
11027	break;
11028	case 10:
11029#line 1164 "upb/json/parser.rl"
11030	{ p--; {stack[top++] = cs; cs = 27; goto _again;} }
11031	break;
11032	case 11:
11033#line 1169 "upb/json/parser.rl"
11034	{ start_member(parser); }
11035	break;
11036	case 12:
11037#line 1170 "upb/json/parser.rl"
11038	{ CHECK_RETURN_TOP(end_membername(parser)); }
11039	break;
11040	case 13:
11041#line 1173 "upb/json/parser.rl"
11042	{ end_member(parser); }
11043	break;
11044	case 14:
11045#line 1179 "upb/json/parser.rl"
11046	{ start_object(parser); }
11047	break;
11048	case 15:
11049#line 1182 "upb/json/parser.rl"
11050	{ end_object(parser); }
11051	break;
11052	case 16:
11053#line 1188 "upb/json/parser.rl"
11054	{ CHECK_RETURN_TOP(start_array(parser)); }
11055	break;
11056	case 17:
11057#line 1192 "upb/json/parser.rl"
11058	{ end_array(parser); }
11059	break;
11060	case 18:
11061#line 1197 "upb/json/parser.rl"
11062	{ start_number(parser, p); }
11063	break;
11064	case 19:
11065#line 1198 "upb/json/parser.rl"
11066	{ CHECK_RETURN_TOP(end_number(parser, p)); }
11067	break;
11068	case 20:
11069#line 1200 "upb/json/parser.rl"
11070	{ CHECK_RETURN_TOP(start_stringval(parser)); }
11071	break;
11072	case 21:
11073#line 1201 "upb/json/parser.rl"
11074	{ CHECK_RETURN_TOP(end_stringval(parser)); }
11075	break;
11076	case 22:
11077#line 1203 "upb/json/parser.rl"
11078	{ CHECK_RETURN_TOP(parser_putbool(parser, true)); }
11079	break;
11080	case 23:
11081#line 1205 "upb/json/parser.rl"
11082	{ CHECK_RETURN_TOP(parser_putbool(parser, false)); }
11083	break;
11084	case 24:
11085#line 1207 "upb/json/parser.rl"
11086	{ /* null value */ }
11087	break;
11088	case 25:
11089#line 1209 "upb/json/parser.rl"
11090	{ CHECK_RETURN_TOP(start_subobject(parser)); }
11091	break;
11092	case 26:
11093#line 1210 "upb/json/parser.rl"
11094	{ end_subobject(parser); }
11095	break;
11096	case 27:
11097#line 1215 "upb/json/parser.rl"
11098	{ p--; {cs = stack[--top]; goto _again;} }
11099	break;
11100#line 1487 "upb/json/parser.c"
11101		}
11102	}
11103
11104_again:
11105	if ( cs == 0 )
11106		goto _out;
11107	if ( ++p != pe )
11108		goto _resume;
11109	_test_eof: {}
11110	_out: {}
11111	}
11112
11113#line 1242 "upb/json/parser.rl"
11114
11115  if (p != pe) {
11116    upb_status_seterrf(&parser->status, "Parse error at %s\n", p);
11117    upb_env_reporterror(parser->env, &parser->status);
11118  } else {
11119    capture_suspend(parser, &p);
11120  }
11121
11122error:
11123  /* Save parsing state back to parser. */
11124  parser->current_state = cs;
11125  parser->parser_top = top;
11126
11127  return p - buf;
11128}
11129
11130bool end(void *closure, const void *hd) {
11131  UPB_UNUSED(closure);
11132  UPB_UNUSED(hd);
11133
11134  /* Prevent compile warning on unused static constants. */
11135  UPB_UNUSED(json_start);
11136  UPB_UNUSED(json_en_number_machine);
11137  UPB_UNUSED(json_en_string_machine);
11138  UPB_UNUSED(json_en_value_machine);
11139  UPB_UNUSED(json_en_main);
11140  return true;
11141}
11142
11143static void json_parser_reset(upb_json_parser *p) {
11144  int cs;
11145  int top;
11146
11147  p->top = p->stack;
11148  p->top->f = NULL;
11149  p->top->is_map = false;
11150  p->top->is_mapentry = false;
11151
11152  /* Emit Ragel initialization of the parser. */
11153
11154#line 1541 "upb/json/parser.c"
11155	{
11156	cs = json_start;
11157	top = 0;
11158	}
11159
11160#line 1282 "upb/json/parser.rl"
11161  p->current_state = cs;
11162  p->parser_top = top;
11163  accumulate_clear(p);
11164  p->multipart_state = MULTIPART_INACTIVE;
11165  p->capture = NULL;
11166  p->accumulated = NULL;
11167  upb_status_clear(&p->status);
11168}
11169
11170
11171/* Public API *****************************************************************/
11172
11173upb_json_parser *upb_json_parser_create(upb_env *env, upb_sink *output) {
11174#ifndef NDEBUG
11175  const size_t size_before = upb_env_bytesallocated(env);
11176#endif
11177  upb_json_parser *p = upb_env_malloc(env, sizeof(upb_json_parser));
11178  if (!p) return false;
11179
11180  p->env = env;
11181  p->limit = p->stack + UPB_JSON_MAX_DEPTH;
11182  p->accumulate_buf = NULL;
11183  p->accumulate_buf_size = 0;
11184  upb_byteshandler_init(&p->input_handler_);
11185  upb_byteshandler_setstring(&p->input_handler_, parse, NULL);
11186  upb_byteshandler_setendstr(&p->input_handler_, end, NULL);
11187  upb_bytessink_reset(&p->input_, &p->input_handler_, p);
11188
11189  json_parser_reset(p);
11190  upb_sink_reset(&p->top->sink, output->handlers, output->closure);
11191  p->top->m = upb_handlers_msgdef(output->handlers);
11192
11193  /* If this fails, uncomment and increase the value in parser.h. */
11194  /* fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before); */
11195  assert(upb_env_bytesallocated(env) - size_before <= UPB_JSON_PARSER_SIZE);
11196  return p;
11197}
11198
11199upb_bytessink *upb_json_parser_input(upb_json_parser *p) {
11200  return &p->input_;
11201}
11202/*
11203** This currently uses snprintf() to format primitives, and could be optimized
11204** further.
11205*/
11206
11207
11208#include <stdlib.h>
11209#include <stdio.h>
11210#include <string.h>
11211#include <stdint.h>
11212
11213struct upb_json_printer {
11214  upb_sink input_;
11215  /* BytesSink closure. */
11216  void *subc_;
11217  upb_bytessink *output_;
11218
11219  /* We track the depth so that we know when to emit startstr/endstr on the
11220   * output. */
11221  int depth_;
11222
11223  /* Have we emitted the first element? This state is necessary to emit commas
11224   * without leaving a trailing comma in arrays/maps. We keep this state per
11225   * frame depth.
11226   *
11227   * Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages.
11228   * We count frames (contexts in which we separate elements by commas) as both
11229   * repeated fields and messages (maps), and the worst case is a
11230   * message->repeated field->submessage->repeated field->... nesting. */
11231  bool first_elem_[UPB_MAX_HANDLER_DEPTH * 2];
11232};
11233
11234/* StringPiece; a pointer plus a length. */
11235typedef struct {
11236  const char *ptr;
11237  size_t len;
11238} strpc;
11239
11240strpc *newstrpc(upb_handlers *h, const upb_fielddef *f) {
11241  strpc *ret = malloc(sizeof(*ret));
11242  ret->ptr = upb_fielddef_name(f);
11243  ret->len = strlen(ret->ptr);
11244  upb_handlers_addcleanup(h, ret, free);
11245  return ret;
11246}
11247
11248/* ------------ JSON string printing: values, maps, arrays ------------------ */
11249
11250static void print_data(
11251    upb_json_printer *p, const char *buf, unsigned int len) {
11252  /* TODO: Will need to change if we support pushback from the sink. */
11253  size_t n = upb_bytessink_putbuf(p->output_, p->subc_, buf, len, NULL);
11254  UPB_ASSERT_VAR(n, n == len);
11255}
11256
11257static void print_comma(upb_json_printer *p) {
11258  if (!p->first_elem_[p->depth_]) {
11259    print_data(p, ",", 1);
11260  }
11261  p->first_elem_[p->depth_] = false;
11262}
11263
11264/* Helpers that print properly formatted elements to the JSON output stream. */
11265
11266/* Used for escaping control chars in strings. */
11267static const char kControlCharLimit = 0x20;
11268
11269UPB_INLINE bool is_json_escaped(char c) {
11270  /* See RFC 4627. */
11271  unsigned char uc = (unsigned char)c;
11272  return uc < kControlCharLimit || uc == '"' || uc == '\\';
11273}
11274
11275UPB_INLINE char* json_nice_escape(char c) {
11276  switch (c) {
11277    case '"':  return "\\\"";
11278    case '\\': return "\\\\";
11279    case '\b': return "\\b";
11280    case '\f': return "\\f";
11281    case '\n': return "\\n";
11282    case '\r': return "\\r";
11283    case '\t': return "\\t";
11284    default:   return NULL;
11285  }
11286}
11287
11288/* Write a properly escaped string chunk. The surrounding quotes are *not*
11289 * printed; this is so that the caller has the option of emitting the string
11290 * content in chunks. */
11291static void putstring(upb_json_printer *p, const char *buf, unsigned int len) {
11292  const char* unescaped_run = NULL;
11293  unsigned int i;
11294  for (i = 0; i < len; i++) {
11295    char c = buf[i];
11296    /* Handle escaping. */
11297    if (is_json_escaped(c)) {
11298      /* Use a "nice" escape, like \n, if one exists for this character. */
11299      const char* escape = json_nice_escape(c);
11300      /* If we don't have a specific 'nice' escape code, use a \uXXXX-style
11301       * escape. */
11302      char escape_buf[8];
11303      if (!escape) {
11304        unsigned char byte = (unsigned char)c;
11305        _upb_snprintf(escape_buf, sizeof(escape_buf), "\\u%04x", (int)byte);
11306        escape = escape_buf;
11307      }
11308
11309      /* N.B. that we assume that the input encoding is equal to the output
11310       * encoding (both UTF-8 for  now), so for chars >= 0x20 and != \, ", we
11311       * can simply pass the bytes through. */
11312
11313      /* If there's a current run of unescaped chars, print that run first. */
11314      if (unescaped_run) {
11315        print_data(p, unescaped_run, &buf[i] - unescaped_run);
11316        unescaped_run = NULL;
11317      }
11318      /* Then print the escape code. */
11319      print_data(p, escape, strlen(escape));
11320    } else {
11321      /* Add to the current unescaped run of characters. */
11322      if (unescaped_run == NULL) {
11323        unescaped_run = &buf[i];
11324      }
11325    }
11326  }
11327
11328  /* If the string ended in a run of unescaped characters, print that last run. */
11329  if (unescaped_run) {
11330    print_data(p, unescaped_run, &buf[len] - unescaped_run);
11331  }
11332}
11333
11334#define CHKLENGTH(x) if (!(x)) return -1;
11335
11336/* Helpers that format floating point values according to our custom formats.
11337 * Right now we use %.8g and %.17g for float/double, respectively, to match
11338 * proto2::util::JsonFormat's defaults.  May want to change this later. */
11339
11340static size_t fmt_double(double val, char* buf, size_t length) {
11341  size_t n = _upb_snprintf(buf, length, "%.17g", val);
11342  CHKLENGTH(n > 0 && n < length);
11343  return n;
11344}
11345
11346static size_t fmt_float(float val, char* buf, size_t length) {
11347  size_t n = _upb_snprintf(buf, length, "%.8g", val);
11348  CHKLENGTH(n > 0 && n < length);
11349  return n;
11350}
11351
11352static size_t fmt_bool(bool val, char* buf, size_t length) {
11353  size_t n = _upb_snprintf(buf, length, "%s", (val ? "true" : "false"));
11354  CHKLENGTH(n > 0 && n < length);
11355  return n;
11356}
11357
11358static size_t fmt_int64(long val, char* buf, size_t length) {
11359  size_t n = _upb_snprintf(buf, length, "%ld", val);
11360  CHKLENGTH(n > 0 && n < length);
11361  return n;
11362}
11363
11364static size_t fmt_uint64(unsigned long long val, char* buf, size_t length) {
11365  size_t n = _upb_snprintf(buf, length, "%llu", val);
11366  CHKLENGTH(n > 0 && n < length);
11367  return n;
11368}
11369
11370/* Print a map key given a field name. Called by scalar field handlers and by
11371 * startseq for repeated fields. */
11372static bool putkey(void *closure, const void *handler_data) {
11373  upb_json_printer *p = closure;
11374  const strpc *key = handler_data;
11375  print_comma(p);
11376  print_data(p, "\"", 1);
11377  putstring(p, key->ptr, key->len);
11378  print_data(p, "\":", 2);
11379  return true;
11380}
11381
11382#define CHKFMT(val) if ((val) == (size_t)-1) return false;
11383#define CHK(val)    if (!(val)) return false;
11384
11385#define TYPE_HANDLERS(type, fmt_func)                                        \
11386  static bool put##type(void *closure, const void *handler_data, type val) { \
11387    upb_json_printer *p = closure;                                           \
11388    char data[64];                                                           \
11389    size_t length = fmt_func(val, data, sizeof(data));                       \
11390    UPB_UNUSED(handler_data);                                                \
11391    CHKFMT(length);                                                          \
11392    print_data(p, data, length);                                             \
11393    return true;                                                             \
11394  }                                                                          \
11395  static bool scalar_##type(void *closure, const void *handler_data,         \
11396                            type val) {                                      \
11397    CHK(putkey(closure, handler_data));                                      \
11398    CHK(put##type(closure, handler_data, val));                              \
11399    return true;                                                             \
11400  }                                                                          \
11401  static bool repeated_##type(void *closure, const void *handler_data,       \
11402                              type val) {                                    \
11403    upb_json_printer *p = closure;                                           \
11404    print_comma(p);                                                          \
11405    CHK(put##type(closure, handler_data, val));                              \
11406    return true;                                                             \
11407  }
11408
11409#define TYPE_HANDLERS_MAPKEY(type, fmt_func)                                 \
11410  static bool putmapkey_##type(void *closure, const void *handler_data,      \
11411                            type val) {                                      \
11412    upb_json_printer *p = closure;                                           \
11413    print_data(p, "\"", 1);                                                  \
11414    CHK(put##type(closure, handler_data, val));                              \
11415    print_data(p, "\":", 2);                                                 \
11416    return true;                                                             \
11417  }
11418
11419TYPE_HANDLERS(double,   fmt_double)
11420TYPE_HANDLERS(float,    fmt_float)
11421TYPE_HANDLERS(bool,     fmt_bool)
11422TYPE_HANDLERS(int32_t,  fmt_int64)
11423TYPE_HANDLERS(uint32_t, fmt_int64)
11424TYPE_HANDLERS(int64_t,  fmt_int64)
11425TYPE_HANDLERS(uint64_t, fmt_uint64)
11426
11427/* double and float are not allowed to be map keys. */
11428TYPE_HANDLERS_MAPKEY(bool,     fmt_bool)
11429TYPE_HANDLERS_MAPKEY(int32_t,  fmt_int64)
11430TYPE_HANDLERS_MAPKEY(uint32_t, fmt_int64)
11431TYPE_HANDLERS_MAPKEY(int64_t,  fmt_int64)
11432TYPE_HANDLERS_MAPKEY(uint64_t, fmt_uint64)
11433
11434#undef TYPE_HANDLERS
11435#undef TYPE_HANDLERS_MAPKEY
11436
11437typedef struct {
11438  void *keyname;
11439  const upb_enumdef *enumdef;
11440} EnumHandlerData;
11441
11442static bool scalar_enum(void *closure, const void *handler_data,
11443                        int32_t val) {
11444  const EnumHandlerData *hd = handler_data;
11445  upb_json_printer *p = closure;
11446  const char *symbolic_name;
11447
11448  CHK(putkey(closure, hd->keyname));
11449
11450  symbolic_name = upb_enumdef_iton(hd->enumdef, val);
11451  if (symbolic_name) {
11452    print_data(p, "\"", 1);
11453    putstring(p, symbolic_name, strlen(symbolic_name));
11454    print_data(p, "\"", 1);
11455  } else {
11456    putint32_t(closure, NULL, val);
11457  }
11458
11459  return true;
11460}
11461
11462static void print_enum_symbolic_name(upb_json_printer *p,
11463                                     const upb_enumdef *def,
11464                                     int32_t val) {
11465  const char *symbolic_name = upb_enumdef_iton(def, val);
11466  if (symbolic_name) {
11467    print_data(p, "\"", 1);
11468    putstring(p, symbolic_name, strlen(symbolic_name));
11469    print_data(p, "\"", 1);
11470  } else {
11471    putint32_t(p, NULL, val);
11472  }
11473}
11474
11475static bool repeated_enum(void *closure, const void *handler_data,
11476                          int32_t val) {
11477  const EnumHandlerData *hd = handler_data;
11478  upb_json_printer *p = closure;
11479  print_comma(p);
11480
11481  print_enum_symbolic_name(p, hd->enumdef, val);
11482
11483  return true;
11484}
11485
11486static bool mapvalue_enum(void *closure, const void *handler_data,
11487                          int32_t val) {
11488  const EnumHandlerData *hd = handler_data;
11489  upb_json_printer *p = closure;
11490
11491  print_enum_symbolic_name(p, hd->enumdef, val);
11492
11493  return true;
11494}
11495
11496static void *scalar_startsubmsg(void *closure, const void *handler_data) {
11497  return putkey(closure, handler_data) ? closure : UPB_BREAK;
11498}
11499
11500static void *repeated_startsubmsg(void *closure, const void *handler_data) {
11501  upb_json_printer *p = closure;
11502  UPB_UNUSED(handler_data);
11503  print_comma(p);
11504  return closure;
11505}
11506
11507static void start_frame(upb_json_printer *p) {
11508  p->depth_++;
11509  p->first_elem_[p->depth_] = true;
11510  print_data(p, "{", 1);
11511}
11512
11513static void end_frame(upb_json_printer *p) {
11514  print_data(p, "}", 1);
11515  p->depth_--;
11516}
11517
11518static bool printer_startmsg(void *closure, const void *handler_data) {
11519  upb_json_printer *p = closure;
11520  UPB_UNUSED(handler_data);
11521  if (p->depth_ == 0) {
11522    upb_bytessink_start(p->output_, 0, &p->subc_);
11523  }
11524  start_frame(p);
11525  return true;
11526}
11527
11528static bool printer_endmsg(void *closure, const void *handler_data, upb_status *s) {
11529  upb_json_printer *p = closure;
11530  UPB_UNUSED(handler_data);
11531  UPB_UNUSED(s);
11532  end_frame(p);
11533  if (p->depth_ == 0) {
11534    upb_bytessink_end(p->output_);
11535  }
11536  return true;
11537}
11538
11539static void *startseq(void *closure, const void *handler_data) {
11540  upb_json_printer *p = closure;
11541  CHK(putkey(closure, handler_data));
11542  p->depth_++;
11543  p->first_elem_[p->depth_] = true;
11544  print_data(p, "[", 1);
11545  return closure;
11546}
11547
11548static bool endseq(void *closure, const void *handler_data) {
11549  upb_json_printer *p = closure;
11550  UPB_UNUSED(handler_data);
11551  print_data(p, "]", 1);
11552  p->depth_--;
11553  return true;
11554}
11555
11556static void *startmap(void *closure, const void *handler_data) {
11557  upb_json_printer *p = closure;
11558  CHK(putkey(closure, handler_data));
11559  p->depth_++;
11560  p->first_elem_[p->depth_] = true;
11561  print_data(p, "{", 1);
11562  return closure;
11563}
11564
11565static bool endmap(void *closure, const void *handler_data) {
11566  upb_json_printer *p = closure;
11567  UPB_UNUSED(handler_data);
11568  print_data(p, "}", 1);
11569  p->depth_--;
11570  return true;
11571}
11572
11573static size_t putstr(void *closure, const void *handler_data, const char *str,
11574                     size_t len, const upb_bufhandle *handle) {
11575  upb_json_printer *p = closure;
11576  UPB_UNUSED(handler_data);
11577  UPB_UNUSED(handle);
11578  putstring(p, str, len);
11579  return len;
11580}
11581
11582/* This has to Base64 encode the bytes, because JSON has no "bytes" type. */
11583static size_t putbytes(void *closure, const void *handler_data, const char *str,
11584                       size_t len, const upb_bufhandle *handle) {
11585  upb_json_printer *p = closure;
11586
11587  /* This is the regular base64, not the "web-safe" version. */
11588  static const char base64[] =
11589      "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
11590
11591  /* Base64-encode. */
11592  char data[16000];
11593  const char *limit = data + sizeof(data);
11594  const unsigned char *from = (const unsigned char*)str;
11595  char *to = data;
11596  size_t remaining = len;
11597  size_t bytes;
11598
11599  UPB_UNUSED(handler_data);
11600  UPB_UNUSED(handle);
11601
11602  while (remaining > 2) {
11603    /* TODO(haberman): handle encoded lengths > sizeof(data) */
11604    UPB_ASSERT_VAR(limit, (limit - to) >= 4);
11605
11606    to[0] = base64[from[0] >> 2];
11607    to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)];
11608    to[2] = base64[((from[1] & 0xf) << 2) | (from[2] >> 6)];
11609    to[3] = base64[from[2] & 0x3f];
11610
11611    remaining -= 3;
11612    to += 4;
11613    from += 3;
11614  }
11615
11616  switch (remaining) {
11617    case 2:
11618      to[0] = base64[from[0] >> 2];
11619      to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)];
11620      to[2] = base64[(from[1] & 0xf) << 2];
11621      to[3] = '=';
11622      to += 4;
11623      from += 2;
11624      break;
11625    case 1:
11626      to[0] = base64[from[0] >> 2];
11627      to[1] = base64[((from[0] & 0x3) << 4)];
11628      to[2] = '=';
11629      to[3] = '=';
11630      to += 4;
11631      from += 1;
11632      break;
11633  }
11634
11635  bytes = to - data;
11636  print_data(p, "\"", 1);
11637  putstring(p, data, bytes);
11638  print_data(p, "\"", 1);
11639  return len;
11640}
11641
11642static void *scalar_startstr(void *closure, const void *handler_data,
11643                             size_t size_hint) {
11644  upb_json_printer *p = closure;
11645  UPB_UNUSED(handler_data);
11646  UPB_UNUSED(size_hint);
11647  CHK(putkey(closure, handler_data));
11648  print_data(p, "\"", 1);
11649  return p;
11650}
11651
11652static size_t scalar_str(void *closure, const void *handler_data,
11653                         const char *str, size_t len,
11654                         const upb_bufhandle *handle) {
11655  CHK(putstr(closure, handler_data, str, len, handle));
11656  return len;
11657}
11658
11659static bool scalar_endstr(void *closure, const void *handler_data) {
11660  upb_json_printer *p = closure;
11661  UPB_UNUSED(handler_data);
11662  print_data(p, "\"", 1);
11663  return true;
11664}
11665
11666static void *repeated_startstr(void *closure, const void *handler_data,
11667                               size_t size_hint) {
11668  upb_json_printer *p = closure;
11669  UPB_UNUSED(handler_data);
11670  UPB_UNUSED(size_hint);
11671  print_comma(p);
11672  print_data(p, "\"", 1);
11673  return p;
11674}
11675
11676static size_t repeated_str(void *closure, const void *handler_data,
11677                           const char *str, size_t len,
11678                           const upb_bufhandle *handle) {
11679  CHK(putstr(closure, handler_data, str, len, handle));
11680  return len;
11681}
11682
11683static bool repeated_endstr(void *closure, const void *handler_data) {
11684  upb_json_printer *p = closure;
11685  UPB_UNUSED(handler_data);
11686  print_data(p, "\"", 1);
11687  return true;
11688}
11689
11690static void *mapkeyval_startstr(void *closure, const void *handler_data,
11691                                size_t size_hint) {
11692  upb_json_printer *p = closure;
11693  UPB_UNUSED(handler_data);
11694  UPB_UNUSED(size_hint);
11695  print_data(p, "\"", 1);
11696  return p;
11697}
11698
11699static size_t mapkey_str(void *closure, const void *handler_data,
11700                         const char *str, size_t len,
11701                         const upb_bufhandle *handle) {
11702  CHK(putstr(closure, handler_data, str, len, handle));
11703  return len;
11704}
11705
11706static bool mapkey_endstr(void *closure, const void *handler_data) {
11707  upb_json_printer *p = closure;
11708  UPB_UNUSED(handler_data);
11709  print_data(p, "\":", 2);
11710  return true;
11711}
11712
11713static bool mapvalue_endstr(void *closure, const void *handler_data) {
11714  upb_json_printer *p = closure;
11715  UPB_UNUSED(handler_data);
11716  print_data(p, "\"", 1);
11717  return true;
11718}
11719
11720static size_t scalar_bytes(void *closure, const void *handler_data,
11721                           const char *str, size_t len,
11722                           const upb_bufhandle *handle) {
11723  CHK(putkey(closure, handler_data));
11724  CHK(putbytes(closure, handler_data, str, len, handle));
11725  return len;
11726}
11727
11728static size_t repeated_bytes(void *closure, const void *handler_data,
11729                             const char *str, size_t len,
11730                             const upb_bufhandle *handle) {
11731  upb_json_printer *p = closure;
11732  print_comma(p);
11733  CHK(putbytes(closure, handler_data, str, len, handle));
11734  return len;
11735}
11736
11737static size_t mapkey_bytes(void *closure, const void *handler_data,
11738                           const char *str, size_t len,
11739                           const upb_bufhandle *handle) {
11740  upb_json_printer *p = closure;
11741  CHK(putbytes(closure, handler_data, str, len, handle));
11742  print_data(p, ":", 1);
11743  return len;
11744}
11745
11746static void set_enum_hd(upb_handlers *h,
11747                        const upb_fielddef *f,
11748                        upb_handlerattr *attr) {
11749  EnumHandlerData *hd = malloc(sizeof(EnumHandlerData));
11750  hd->enumdef = (const upb_enumdef *)upb_fielddef_subdef(f);
11751  hd->keyname = newstrpc(h, f);
11752  upb_handlers_addcleanup(h, hd, free);
11753  upb_handlerattr_sethandlerdata(attr, hd);
11754}
11755
11756/* Set up handlers for a mapentry submessage (i.e., an individual key/value pair
11757 * in a map).
11758 *
11759 * TODO: Handle missing key, missing value, out-of-order key/value, or repeated
11760 * key or value cases properly. The right way to do this is to allocate a
11761 * temporary structure at the start of a mapentry submessage, store key and
11762 * value data in it as key and value handlers are called, and then print the
11763 * key/value pair once at the end of the submessage. If we don't do this, we
11764 * should at least detect the case and throw an error. However, so far all of
11765 * our sources that emit mapentry messages do so canonically (with one key
11766 * field, and then one value field), so this is not a pressing concern at the
11767 * moment. */
11768void printer_sethandlers_mapentry(const void *closure, upb_handlers *h) {
11769  const upb_msgdef *md = upb_handlers_msgdef(h);
11770
11771  /* A mapentry message is printed simply as '"key": value'. Rather than
11772   * special-case key and value for every type below, we just handle both
11773   * fields explicitly here. */
11774  const upb_fielddef* key_field = upb_msgdef_itof(md, UPB_MAPENTRY_KEY);
11775  const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_MAPENTRY_VALUE);
11776
11777  upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
11778
11779  UPB_UNUSED(closure);
11780
11781  switch (upb_fielddef_type(key_field)) {
11782    case UPB_TYPE_INT32:
11783      upb_handlers_setint32(h, key_field, putmapkey_int32_t, &empty_attr);
11784      break;
11785    case UPB_TYPE_INT64:
11786      upb_handlers_setint64(h, key_field, putmapkey_int64_t, &empty_attr);
11787      break;
11788    case UPB_TYPE_UINT32:
11789      upb_handlers_setuint32(h, key_field, putmapkey_uint32_t, &empty_attr);
11790      break;
11791    case UPB_TYPE_UINT64:
11792      upb_handlers_setuint64(h, key_field, putmapkey_uint64_t, &empty_attr);
11793      break;
11794    case UPB_TYPE_BOOL:
11795      upb_handlers_setbool(h, key_field, putmapkey_bool, &empty_attr);
11796      break;
11797    case UPB_TYPE_STRING:
11798      upb_handlers_setstartstr(h, key_field, mapkeyval_startstr, &empty_attr);
11799      upb_handlers_setstring(h, key_field, mapkey_str, &empty_attr);
11800      upb_handlers_setendstr(h, key_field, mapkey_endstr, &empty_attr);
11801      break;
11802    case UPB_TYPE_BYTES:
11803      upb_handlers_setstring(h, key_field, mapkey_bytes, &empty_attr);
11804      break;
11805    default:
11806      assert(false);
11807      break;
11808  }
11809
11810  switch (upb_fielddef_type(value_field)) {
11811    case UPB_TYPE_INT32:
11812      upb_handlers_setint32(h, value_field, putint32_t, &empty_attr);
11813      break;
11814    case UPB_TYPE_INT64:
11815      upb_handlers_setint64(h, value_field, putint64_t, &empty_attr);
11816      break;
11817    case UPB_TYPE_UINT32:
11818      upb_handlers_setuint32(h, value_field, putuint32_t, &empty_attr);
11819      break;
11820    case UPB_TYPE_UINT64:
11821      upb_handlers_setuint64(h, value_field, putuint64_t, &empty_attr);
11822      break;
11823    case UPB_TYPE_BOOL:
11824      upb_handlers_setbool(h, value_field, putbool, &empty_attr);
11825      break;
11826    case UPB_TYPE_FLOAT:
11827      upb_handlers_setfloat(h, value_field, putfloat, &empty_attr);
11828      break;
11829    case UPB_TYPE_DOUBLE:
11830      upb_handlers_setdouble(h, value_field, putdouble, &empty_attr);
11831      break;
11832    case UPB_TYPE_STRING:
11833      upb_handlers_setstartstr(h, value_field, mapkeyval_startstr, &empty_attr);
11834      upb_handlers_setstring(h, value_field, putstr, &empty_attr);
11835      upb_handlers_setendstr(h, value_field, mapvalue_endstr, &empty_attr);
11836      break;
11837    case UPB_TYPE_BYTES:
11838      upb_handlers_setstring(h, value_field, putbytes, &empty_attr);
11839      break;
11840    case UPB_TYPE_ENUM: {
11841      upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER;
11842      set_enum_hd(h, value_field, &enum_attr);
11843      upb_handlers_setint32(h, value_field, mapvalue_enum, &enum_attr);
11844      upb_handlerattr_uninit(&enum_attr);
11845      break;
11846    }
11847    case UPB_TYPE_MESSAGE:
11848      /* No handler necessary -- the submsg handlers will print the message
11849       * as appropriate. */
11850      break;
11851  }
11852
11853  upb_handlerattr_uninit(&empty_attr);
11854}
11855
11856void printer_sethandlers(const void *closure, upb_handlers *h) {
11857  const upb_msgdef *md = upb_handlers_msgdef(h);
11858  bool is_mapentry = upb_msgdef_mapentry(md);
11859  upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
11860  upb_msg_field_iter i;
11861
11862  UPB_UNUSED(closure);
11863
11864  if (is_mapentry) {
11865    /* mapentry messages are sufficiently different that we handle them
11866     * separately. */
11867    printer_sethandlers_mapentry(closure, h);
11868    return;
11869  }
11870
11871  upb_handlers_setstartmsg(h, printer_startmsg, &empty_attr);
11872  upb_handlers_setendmsg(h, printer_endmsg, &empty_attr);
11873
11874#define TYPE(type, name, ctype)                                               \
11875  case type:                                                                  \
11876    if (upb_fielddef_isseq(f)) {                                              \
11877      upb_handlers_set##name(h, f, repeated_##ctype, &empty_attr);            \
11878    } else {                                                                  \
11879      upb_handlers_set##name(h, f, scalar_##ctype, &name_attr);               \
11880    }                                                                         \
11881    break;
11882
11883  upb_msg_field_begin(&i, md);
11884  for(; !upb_msg_field_done(&i); upb_msg_field_next(&i)) {
11885    const upb_fielddef *f = upb_msg_iter_field(&i);
11886
11887    upb_handlerattr name_attr = UPB_HANDLERATTR_INITIALIZER;
11888    upb_handlerattr_sethandlerdata(&name_attr, newstrpc(h, f));
11889
11890    if (upb_fielddef_ismap(f)) {
11891      upb_handlers_setstartseq(h, f, startmap, &name_attr);
11892      upb_handlers_setendseq(h, f, endmap, &name_attr);
11893    } else if (upb_fielddef_isseq(f)) {
11894      upb_handlers_setstartseq(h, f, startseq, &name_attr);
11895      upb_handlers_setendseq(h, f, endseq, &empty_attr);
11896    }
11897
11898    switch (upb_fielddef_type(f)) {
11899      TYPE(UPB_TYPE_FLOAT,  float,  float);
11900      TYPE(UPB_TYPE_DOUBLE, double, double);
11901      TYPE(UPB_TYPE_BOOL,   bool,   bool);
11902      TYPE(UPB_TYPE_INT32,  int32,  int32_t);
11903      TYPE(UPB_TYPE_UINT32, uint32, uint32_t);
11904      TYPE(UPB_TYPE_INT64,  int64,  int64_t);
11905      TYPE(UPB_TYPE_UINT64, uint64, uint64_t);
11906      case UPB_TYPE_ENUM: {
11907        /* For now, we always emit symbolic names for enums. We may want an
11908         * option later to control this behavior, but we will wait for a real
11909         * need first. */
11910        upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER;
11911        set_enum_hd(h, f, &enum_attr);
11912
11913        if (upb_fielddef_isseq(f)) {
11914          upb_handlers_setint32(h, f, repeated_enum, &enum_attr);
11915        } else {
11916          upb_handlers_setint32(h, f, scalar_enum, &enum_attr);
11917        }
11918
11919        upb_handlerattr_uninit(&enum_attr);
11920        break;
11921      }
11922      case UPB_TYPE_STRING:
11923        if (upb_fielddef_isseq(f)) {
11924          upb_handlers_setstartstr(h, f, repeated_startstr, &empty_attr);
11925          upb_handlers_setstring(h, f, repeated_str, &empty_attr);
11926          upb_handlers_setendstr(h, f, repeated_endstr, &empty_attr);
11927        } else {
11928          upb_handlers_setstartstr(h, f, scalar_startstr, &name_attr);
11929          upb_handlers_setstring(h, f, scalar_str, &empty_attr);
11930          upb_handlers_setendstr(h, f, scalar_endstr, &empty_attr);
11931        }
11932        break;
11933      case UPB_TYPE_BYTES:
11934        /* XXX: this doesn't support strings that span buffers yet. The base64
11935         * encoder will need to be made resumable for this to work properly. */
11936        if (upb_fielddef_isseq(f)) {
11937          upb_handlers_setstring(h, f, repeated_bytes, &empty_attr);
11938        } else {
11939          upb_handlers_setstring(h, f, scalar_bytes, &name_attr);
11940        }
11941        break;
11942      case UPB_TYPE_MESSAGE:
11943        if (upb_fielddef_isseq(f)) {
11944          upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &name_attr);
11945        } else {
11946          upb_handlers_setstartsubmsg(h, f, scalar_startsubmsg, &name_attr);
11947        }
11948        break;
11949    }
11950
11951    upb_handlerattr_uninit(&name_attr);
11952  }
11953
11954  upb_handlerattr_uninit(&empty_attr);
11955#undef TYPE
11956}
11957
11958static void json_printer_reset(upb_json_printer *p) {
11959  p->depth_ = 0;
11960}
11961
11962
11963/* Public API *****************************************************************/
11964
11965upb_json_printer *upb_json_printer_create(upb_env *e, const upb_handlers *h,
11966                                          upb_bytessink *output) {
11967#ifndef NDEBUG
11968  size_t size_before = upb_env_bytesallocated(e);
11969#endif
11970
11971  upb_json_printer *p = upb_env_malloc(e, sizeof(upb_json_printer));
11972  if (!p) return NULL;
11973
11974  p->output_ = output;
11975  json_printer_reset(p);
11976  upb_sink_reset(&p->input_, h, p);
11977
11978  /* If this fails, increase the value in printer.h. */
11979  assert(upb_env_bytesallocated(e) - size_before <= UPB_JSON_PRINTER_SIZE);
11980  return p;
11981}
11982
11983upb_sink *upb_json_printer_input(upb_json_printer *p) {
11984  return &p->input_;
11985}
11986
11987const upb_handlers *upb_json_printer_newhandlers(const upb_msgdef *md,
11988                                                 const void *owner) {
11989  return upb_handlers_newfrozen(md, owner, printer_sethandlers, NULL);
11990}
11991