1/* Extended regular expression matching and search library, version
2   0.12.  (Implements POSIX draft P10003.2/D11.2, except for
3   internationalization features.)
4
5   Copyright (C) 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
6
7   This program is free software; you can redistribute it and/or modify
8   it under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 2, or (at your option)
10   any later version.
11
12   This program is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
15   GNU General Public License for more details.
16
17   You should have received a copy of the GNU General Public License
18   along with this program; if not, write to the Free Software
19   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
20   USA.	 */
21
22/* AIX requires this to be the first thing in the file. */
23#if defined (_AIX) && !defined (REGEX_MALLOC)
24  #pragma alloca
25#endif
26
27#undef	_GNU_SOURCE
28#define _GNU_SOURCE
29
30#include "cs_config.h"
31#include "util/osdep.h"
32
33#ifdef HAVE_CONFIG_H
34#include <config.h>
35#endif
36
37/* We need this for `regex.h', and perhaps for the Emacs include files.	 */
38#include <sys/types.h>
39
40/* This is for other GNU distributions with internationalized messages.	 */
41#if HAVE_LIBINTL_H || defined (_LIBC)
42# include <libintl.h>
43#else
44# define gettext(msgid) (msgid)
45#endif
46
47#ifndef gettext_noop
48/* This define is so xgettext can find the internationalizable
49   strings.  */
50#define gettext_noop(String) String
51#endif
52
53/* The `emacs' switch turns on certain matching commands
54   that make sense only in Emacs. */
55#ifdef emacs
56
57#include "lisp.h"
58#include "buffer.h"
59#include "syntax.h"
60
61#else  /* not emacs */
62
63/* If we are not linking with Emacs proper,
64   we can't use the relocating allocator
65   even if config.h says that we can.  */
66#undef REL_ALLOC
67
68#if defined (STDC_HEADERS) || defined (_LIBC)
69#include <stdlib.h>
70#else
71char *malloc ();
72char *realloc ();
73#endif
74
75/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
76   If nothing else has been done, use the method below.	 */
77#ifdef INHIBIT_STRING_HEADER
78#if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY))
79#if !defined (bzero) && !defined (bcopy)
80#undef INHIBIT_STRING_HEADER
81#endif
82#endif
83#endif
84
85/* This is the normal way of making sure we have a bcopy and a bzero.
86   This is used in most programs--a few other programs avoid this
87   by defining INHIBIT_STRING_HEADER.  */
88#ifndef INHIBIT_STRING_HEADER
89#if defined (HAVE_STRING_H) || defined (STDC_HEADERS) || defined (_LIBC)
90#include <string.h>
91#ifndef bcmp
92#define bcmp(s1, s2, n)	memcmp ((s1), (s2), (n))
93#endif
94#ifndef bcopy
95#define bcopy(s, d, n)	memcpy ((d), (s), (n))
96#endif
97#ifndef bzero
98#define bzero(s, n)	memset ((s), 0, (n))
99#endif
100#else
101#include <strings.h>
102#endif
103#endif
104
105/* Define the syntax stuff for \<, \>, etc.  */
106
107/* This must be nonzero for the wordchar and notwordchar pattern
108   commands in re_match_2.  */
109#ifndef Sword
110#define Sword 1
111#endif
112
113#ifdef SWITCH_ENUM_BUG
114#define SWITCH_ENUM_CAST(x) ((int)(x))
115#else
116#define SWITCH_ENUM_CAST(x) (x)
117#endif
118
119#ifdef SYNTAX_TABLE
120
121extern char *re_syntax_table;
122
123#else /* not SYNTAX_TABLE */
124
125/* How many characters in the character set.  */
126#define CHAR_SET_SIZE 256
127
128static char re_syntax_table[CHAR_SET_SIZE];
129
130static void
131init_syntax_once ()
132{
133   register int c;
134   static int done = 0;
135
136   if (done)
137     return;
138
139   bzero (re_syntax_table, sizeof re_syntax_table);
140
141   for (c = 'a'; c <= 'z'; c++)
142     re_syntax_table[c] = Sword;
143
144   for (c = 'A'; c <= 'Z'; c++)
145     re_syntax_table[c] = Sword;
146
147   for (c = '0'; c <= '9'; c++)
148     re_syntax_table[c] = Sword;
149
150   re_syntax_table['_'] = Sword;
151
152   done = 1;
153}
154
155#endif /* not SYNTAX_TABLE */
156
157#define SYNTAX(c) re_syntax_table[c]
158
159#endif /* not emacs */
160
161/* Get the interface, including the syntax bits.  */
162#include "regex.h"
163
164/* isalpha etc. are used for the character classes.  */
165#include <ctype.h>
166
167/* Jim Meyering writes:
168
169   "... Some ctype macros are valid only for character codes that
170   isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
171   using /bin/cc or gcc but without giving an ansi option).  So, all
172   ctype uses should be through macros like ISPRINT...	If
173   STDC_HEADERS is defined, then autoconf has verified that the ctype
174   macros don't need to be guarded with references to isascii. ...
175   Defining IN_CTYPE_DOMAIN to 1 should let any compiler worth its salt
176   eliminate the && through constant folding."	*/
177
178#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
179#define IN_CTYPE_DOMAIN(c) 1
180#else
181#define IN_CTYPE_DOMAIN(c) isascii(c)
182#endif
183
184#ifdef isblank
185#define ISBLANK(c) (IN_CTYPE_DOMAIN (c) && isblank (c))
186#else
187#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
188#endif
189#ifdef isgraph
190#define ISGRAPH(c) (IN_CTYPE_DOMAIN (c) && isgraph (c))
191#else
192#define ISGRAPH(c) (IN_CTYPE_DOMAIN (c) && isprint (c) && !isspace (c))
193#endif
194
195#define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c))
196#define ISDIGIT(c) (IN_CTYPE_DOMAIN (c) && isdigit (c))
197#define ISALNUM(c) (IN_CTYPE_DOMAIN (c) && isalnum (c))
198#define ISALPHA(c) (IN_CTYPE_DOMAIN (c) && isalpha (c))
199#define ISCNTRL(c) (IN_CTYPE_DOMAIN (c) && iscntrl (c))
200#define ISLOWER(c) (IN_CTYPE_DOMAIN (c) && islower (c))
201#define ISPUNCT(c) (IN_CTYPE_DOMAIN (c) && ispunct (c))
202#define ISSPACE(c) (IN_CTYPE_DOMAIN (c) && isspace (c))
203#define ISUPPER(c) (IN_CTYPE_DOMAIN (c) && isupper (c))
204#define ISXDIGIT(c) (IN_CTYPE_DOMAIN (c) && isxdigit (c))
205
206#ifndef NULL
207#define NULL (void *)0
208#endif
209
210/* We remove any previous definition of `SIGN_EXTEND_CHAR',
211   since ours (we hope) works properly with all combinations of
212   machines, compilers, `char' and `unsigned char' argument types.
213   (Per Bothner suggested the basic approach.)	*/
214#undef SIGN_EXTEND_CHAR
215#if __STDC__
216#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
217#else  /* not __STDC__ */
218/* As in Harbison and Steele.  */
219#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
220#endif
221
222/* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
223   use `alloca' instead of `malloc'.  This is because using malloc in
224   re_search* or re_match* could cause memory leaks when C-g is used in
225   Emacs; also, malloc is slower and causes storage fragmentation.  On
226   the other hand, malloc is more portable, and easier to debug.
227
228   Because we sometimes use alloca, some routines have to be macros,
229   not functions -- `alloca'-allocated space disappears at the end of the
230   function it is called in.  */
231
232#ifdef REGEX_MALLOC
233
234#define REGEX_ALLOCATE malloc
235#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
236#define REGEX_FREE free
237
238#else /* not REGEX_MALLOC  */
239
240/* Emacs already defines alloca, sometimes.  */
241#ifndef alloca
242
243/* Make alloca work the best possible way.  */
244#ifdef __GNUC__
245#define alloca __builtin_alloca
246#else /* not __GNUC__ */
247#if HAVE_ALLOCA_H
248#include <alloca.h>
249#else /* not __GNUC__ or HAVE_ALLOCA_H */
250#if 0 /* It is a bad idea to declare alloca.  We always cast the result.  */
251#ifndef _AIX /* Already did AIX, up at the top.	 */
252char *alloca ();
253#endif /* not _AIX */
254#endif
255#endif /* not HAVE_ALLOCA_H */
256#endif /* not __GNUC__ */
257
258#endif /* not alloca */
259
260#define REGEX_ALLOCATE alloca
261
262/* Assumes a `char *destination' variable.  */
263#define REGEX_REALLOCATE(source, osize, nsize)				\
264  (destination = (char *) alloca (nsize),				\
265   bcopy (source, destination, osize),					\
266   destination)
267
268/* No need to do anything to free, after alloca.  */
269#define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
270
271#endif /* not REGEX_MALLOC */
272
273/* Define how to allocate the failure stack.  */
274
275#if defined (REL_ALLOC) && defined (REGEX_MALLOC)
276
277#define REGEX_ALLOCATE_STACK(size)				\
278  r_alloc (&failure_stack_ptr, (size))
279#define REGEX_REALLOCATE_STACK(source, osize, nsize)		\
280  r_re_alloc (&failure_stack_ptr, (nsize))
281#define REGEX_FREE_STACK(ptr)					\
282  r_alloc_free (&failure_stack_ptr)
283
284#else /* not using relocating allocator */
285
286#ifdef REGEX_MALLOC
287
288#define REGEX_ALLOCATE_STACK malloc
289#define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
290#define REGEX_FREE_STACK free
291
292#else /* not REGEX_MALLOC */
293
294#define REGEX_ALLOCATE_STACK alloca
295
296#define REGEX_REALLOCATE_STACK(source, osize, nsize)			\
297   REGEX_REALLOCATE (source, osize, nsize)
298/* No need to explicitly free anything.	 */
299#define REGEX_FREE_STACK(arg)
300
301#endif /* not REGEX_MALLOC */
302#endif /* not using relocating allocator */
303
304
305/* True if `size1' is non-NULL and PTR is pointing anywhere inside
306   `string1' or just past its end.  This works if PTR is NULL, which is
307   a good thing.  */
308#define FIRST_STRING_P(ptr)					\
309  (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
310
311/* (Re)Allocate N items of type T using malloc, or fail.  */
312#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
313#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
314#define RETALLOC_IF(addr, n, t) \
315  if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
316#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
317
318#define BYTEWIDTH 8 /* In bits.	 */
319
320#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
321
322#undef MAX
323#undef MIN
324#define MAX(a, b) ((a) > (b) ? (a) : (b))
325#define MIN(a, b) ((a) < (b) ? (a) : (b))
326
327typedef char boolean;
328#define false 0
329#define true 1
330
331static int re_match_2_internal ();
332
333/* These are the command codes that appear in compiled regular
334   expressions.	 Some opcodes are followed by argument bytes.  A
335   command code can specify any interpretation whatsoever for its
336   arguments.  Zero bytes may appear in the compiled regular expression.  */
337
338typedef enum
339{
340  no_op = 0,
341
342  /* Succeed right away--no more backtracking.	*/
343  succeed,
344
345	/* Followed by one byte giving n, then by n literal bytes.  */
346  exactn,
347
348	/* Matches any (more or less) character.  */
349  anychar,
350
351	/* Matches any one char belonging to specified set.  First
352	   following byte is number of bitmap bytes.  Then come bytes
353	   for a bitmap saying which chars are in.  Bits in each byte
354	   are ordered low-bit-first.  A character is in the set if its
355	   bit is 1.  A character too large to have a bit in the map is
356	   automatically not in the set.  */
357  charset,
358
359	/* Same parameters as charset, but match any character that is
360	   not one of those specified.	*/
361  charset_not,
362
363	/* Start remembering the text that is matched, for storing in a
364	   register.  Followed by one byte with the register number, in
365	   the range 0 to one less than the pattern buffer's re_nsub
366	   field.  Then followed by one byte with the number of groups
367	   inner to this one.  (This last has to be part of the
368	   start_memory only because we need it in the on_failure_jump
369	   of re_match_2.)  */
370  start_memory,
371
372	/* Stop remembering the text that is matched and store it in a
373	   memory register.  Followed by one byte with the register
374	   number, in the range 0 to one less than `re_nsub' in the
375	   pattern buffer, and one byte with the number of inner groups,
376	   just like `start_memory'.  (We need the number of inner
377	   groups here because we don't have any easy way of finding the
378	   corresponding start_memory when we're at a stop_memory.)  */
379  stop_memory,
380
381	/* Match a duplicate of something remembered. Followed by one
382	   byte containing the register number.	 */
383  duplicate,
384
385	/* Fail unless at beginning of line.  */
386  begline,
387
388	/* Fail unless at end of line.	*/
389  endline,
390
391	/* Succeeds if at beginning of buffer (if emacs) or at beginning
392	   of string to be matched (if not).  */
393  begbuf,
394
395	/* Analogously, for end of buffer/string.  */
396  endbuf,
397
398	/* Followed by two byte relative address to which to jump.  */
399  jump,
400
401	/* Same as jump, but marks the end of an alternative.  */
402  jump_past_alt,
403
404	/* Followed by two-byte relative address of place to resume at
405	   in case of failure.	*/
406  on_failure_jump,
407
408	/* Like on_failure_jump, but pushes a placeholder instead of the
409	   current string position when executed.  */
410  on_failure_keep_string_jump,
411
412	/* Throw away latest failure point and then jump to following
413	   two-byte relative address.  */
414  pop_failure_jump,
415
416	/* Change to pop_failure_jump if know won't have to backtrack to
417	   match; otherwise change to jump.  This is used to jump
418	   back to the beginning of a repeat.  If what follows this jump
419	   clearly won't match what the repeat does, such that we can be
420	   sure that there is no use backtracking out of repetitions
421	   already matched, then we change it to a pop_failure_jump.
422	   Followed by two-byte address.  */
423  maybe_pop_jump,
424
425	/* Jump to following two-byte address, and push a dummy failure
426	   point. This failure point will be thrown away if an attempt
427	   is made to use it for a failure.  A `+' construct makes this
428	   before the first repeat.  Also used as an intermediary kind
429	   of jump when compiling an alternative.  */
430  dummy_failure_jump,
431
432	/* Push a dummy failure point and continue.  Used at the end of
433	   alternatives.  */
434  push_dummy_failure,
435
436	/* Followed by two-byte relative address and two-byte number n.
437	   After matching N times, jump to the address upon failure.  */
438  succeed_n,
439
440	/* Followed by two-byte relative address, and two-byte number n.
441	   Jump to the address N times, then fail.  */
442  jump_n,
443
444	/* Set the following two-byte relative address to the
445	   subsequent two-byte number.	The address *includes* the two
446	   bytes of number.  */
447  set_number_at,
448
449  wordchar,	/* Matches any word-constituent character.  */
450  notwordchar,	/* Matches any char that is not a word-constituent.  */
451
452  wordbeg,	/* Succeeds if at word beginning.  */
453  wordend,	/* Succeeds if at word end.  */
454
455  wordbound,	/* Succeeds if at a word boundary.  */
456  notwordbound	/* Succeeds if not at a word boundary.	*/
457
458#ifdef emacs
459  ,before_dot,	/* Succeeds if before point.  */
460  at_dot,	/* Succeeds if at point.  */
461  after_dot,	/* Succeeds if after point.  */
462
463	/* Matches any character whose syntax is specified.  Followed by
464	   a byte which contains a syntax code, e.g., Sword.  */
465  syntaxspec,
466
467	/* Matches any character whose syntax is not that specified.  */
468  notsyntaxspec
469#endif /* emacs */
470} re_opcode_t;
471
472/* Common operations on the compiled pattern.  */
473
474/* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
475
476#define STORE_NUMBER(destination, number)				\
477  do {									\
478    (destination)[0] = (number) & 0377;					\
479    (destination)[1] = (number) >> 8;					\
480  } while (0)
481
482/* Same as STORE_NUMBER, except increment DESTINATION to
483   the byte after where the number is stored.  Therefore, DESTINATION
484   must be an lvalue.  */
485
486#define STORE_NUMBER_AND_INCR(destination, number)			\
487  do {									\
488    STORE_NUMBER (destination, number);					\
489    (destination) += 2;							\
490  } while (0)
491
492/* Put into DESTINATION a number stored in two contiguous bytes starting
493   at SOURCE.  */
494
495#define EXTRACT_NUMBER(destination, source)				\
496  do {									\
497    (destination) = *(source) & 0377;					\
498    (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
499  } while (0)
500
501#ifdef DEBUG
502static void
503extract_number (dest, source)
504    int *dest;
505    unsigned char *source;
506{
507  int temp = SIGN_EXTEND_CHAR (*(source + 1));
508  *dest = *source & 0377;
509  *dest += temp << 8;
510}
511
512#ifndef EXTRACT_MACROS /* To debug the macros.	*/
513#undef EXTRACT_NUMBER
514#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
515#endif /* not EXTRACT_MACROS */
516
517#endif /* DEBUG */
518
519/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
520   SOURCE must be an lvalue.  */
521
522#define EXTRACT_NUMBER_AND_INCR(destination, source)			\
523  do {									\
524    EXTRACT_NUMBER (destination, source);				\
525    (source) += 2;							\
526  } while (0)
527
528#ifdef DEBUG
529static void
530extract_number_and_incr (destination, source)
531    int *destination;
532    unsigned char **source;
533{
534  extract_number (destination, *source);
535  *source += 2;
536}
537
538#ifndef EXTRACT_MACROS
539#undef EXTRACT_NUMBER_AND_INCR
540#define EXTRACT_NUMBER_AND_INCR(dest, src) \
541  extract_number_and_incr (&dest, &src)
542#endif /* not EXTRACT_MACROS */
543
544#endif /* DEBUG */
545
546/* If DEBUG is defined, Regex prints many voluminous messages about what
547   it is doing (if the variable `debug' is nonzero).  If linked with the
548   main program in `iregex.c', you can enter patterns and strings
549   interactively.  And if linked with the main program in `main.c' and
550   the other test files, you can run the already-written tests.	 */
551
552#ifdef DEBUG
553
554/* We use standard I/O for debugging.  */
555#include <stdio.h>
556
557/* It is useful to test things that ``must'' be true when debugging.  */
558#include <assert.h>
559
560static int debug = 0;
561
562#define DEBUG_STATEMENT(e) e
563#define DEBUG_PRINT1(x) if (debug) printf (x)
564#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
565#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
566#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
567#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)				\
568  if (debug) print_partial_compiled_pattern (s, e)
569#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
570  if (debug) print_double_string (w, s1, sz1, s2, sz2)
571
572
573/* Print the fastmap in human-readable form.  */
574
575void
576print_fastmap (fastmap)
577    char *fastmap;
578{
579  unsigned was_a_range = 0;
580  unsigned i = 0;
581
582  while (i < (1 << BYTEWIDTH))
583    {
584      if (fastmap[i++])
585	{
586	  was_a_range = 0;
587	  putchar (i - 1);
588	  while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
589	    {
590	      was_a_range = 1;
591	      i++;
592	    }
593	  if (was_a_range)
594	    {
595	      printf ("-");
596	      putchar (i - 1);
597	    }
598	}
599    }
600  putchar ('\n');
601}
602
603
604/* Print a compiled pattern string in human-readable form, starting at
605   the START pointer into it and ending just before the pointer END.  */
606
607void
608print_partial_compiled_pattern (start, end)
609    unsigned char *start;
610    unsigned char *end;
611{
612  int mcnt, mcnt2;
613  unsigned char *p = start;
614  unsigned char *pend = end;
615
616  if (start == NULL)
617    {
618      printf ("(null)\n");
619      return;
620    }
621
622  /* Loop over pattern commands.  */
623  while (p < pend)
624    {
625      printf ("%d:\t", p - start);
626
627      switch ((re_opcode_t) *p++)
628	{
629	case no_op:
630	  printf ("/no_op");
631	  break;
632
633	case exactn:
634	  mcnt = *p++;
635	  printf ("/exactn/%d", mcnt);
636	  do
637	    {
638	      putchar ('/');
639	      putchar (*p++);
640	    }
641	  while (--mcnt);
642	  break;
643
644	case start_memory:
645	  mcnt = *p++;
646	  printf ("/start_memory/%d/%d", mcnt, *p++);
647	  break;
648
649	case stop_memory:
650	  mcnt = *p++;
651	  printf ("/stop_memory/%d/%d", mcnt, *p++);
652	  break;
653
654	case duplicate:
655	  printf ("/duplicate/%d", *p++);
656	  break;
657
658	case anychar:
659	  printf ("/anychar");
660	  break;
661
662	case charset:
663	case charset_not:
664	  {
665	    register int c, last = -100;
666	    register int in_range = 0;
667
668	    printf ("/charset [%s",
669		    (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
670
671	    assert (p + *p < pend);
672
673	    for (c = 0; c < 256; c++)
674	      if (c / 8 < *p
675		  && (p[1 + (c/8)] & (1 << (c % 8))))
676		{
677		  /* Are we starting a range?  */
678		  if (last + 1 == c && ! in_range)
679		    {
680		      putchar ('-');
681		      in_range = 1;
682		    }
683		  /* Have we broken a range?  */
684		  else if (last + 1 != c && in_range)
685	      {
686		      putchar (last);
687		      in_range = 0;
688		    }
689
690		  if (! in_range)
691		    putchar (c);
692
693		  last = c;
694	      }
695
696	    if (in_range)
697	      putchar (last);
698
699	    putchar (']');
700
701	    p += 1 + *p;
702	  }
703	  break;
704
705	case begline:
706	  printf ("/begline");
707	  break;
708
709	case endline:
710	  printf ("/endline");
711	  break;
712
713	case on_failure_jump:
714	  extract_number_and_incr (&mcnt, &p);
715	  printf ("/on_failure_jump to %d", p + mcnt - start);
716	  break;
717
718	case on_failure_keep_string_jump:
719	  extract_number_and_incr (&mcnt, &p);
720	  printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
721	  break;
722
723	case dummy_failure_jump:
724	  extract_number_and_incr (&mcnt, &p);
725	  printf ("/dummy_failure_jump to %d", p + mcnt - start);
726	  break;
727
728	case push_dummy_failure:
729	  printf ("/push_dummy_failure");
730	  break;
731
732	case maybe_pop_jump:
733	  extract_number_and_incr (&mcnt, &p);
734	  printf ("/maybe_pop_jump to %d", p + mcnt - start);
735	  break;
736
737	case pop_failure_jump:
738	  extract_number_and_incr (&mcnt, &p);
739	  printf ("/pop_failure_jump to %d", p + mcnt - start);
740	  break;
741
742	case jump_past_alt:
743	  extract_number_and_incr (&mcnt, &p);
744	  printf ("/jump_past_alt to %d", p + mcnt - start);
745	  break;
746
747	case jump:
748	  extract_number_and_incr (&mcnt, &p);
749	  printf ("/jump to %d", p + mcnt - start);
750	  break;
751
752	case succeed_n:
753	  extract_number_and_incr (&mcnt, &p);
754	  extract_number_and_incr (&mcnt2, &p);
755	  printf ("/succeed_n to %d, %d times", p + mcnt - start, mcnt2);
756	  break;
757
758	case jump_n:
759	  extract_number_and_incr (&mcnt, &p);
760	  extract_number_and_incr (&mcnt2, &p);
761	  printf ("/jump_n to %d, %d times", p + mcnt - start, mcnt2);
762	  break;
763
764	case set_number_at:
765	  extract_number_and_incr (&mcnt, &p);
766	  extract_number_and_incr (&mcnt2, &p);
767	  printf ("/set_number_at location %d to %d", p + mcnt - start, mcnt2);
768	  break;
769
770	case wordbound:
771	  printf ("/wordbound");
772	  break;
773
774	case notwordbound:
775	  printf ("/notwordbound");
776	  break;
777
778	case wordbeg:
779	  printf ("/wordbeg");
780	  break;
781
782	case wordend:
783	  printf ("/wordend");
784
785#ifdef emacs
786	case before_dot:
787	  printf ("/before_dot");
788	  break;
789
790	case at_dot:
791	  printf ("/at_dot");
792	  break;
793
794	case after_dot:
795	  printf ("/after_dot");
796	  break;
797
798	case syntaxspec:
799	  printf ("/syntaxspec");
800	  mcnt = *p++;
801	  printf ("/%d", mcnt);
802	  break;
803
804	case notsyntaxspec:
805	  printf ("/notsyntaxspec");
806	  mcnt = *p++;
807	  printf ("/%d", mcnt);
808	  break;
809#endif /* emacs */
810
811	case wordchar:
812	  printf ("/wordchar");
813	  break;
814
815	case notwordchar:
816	  printf ("/notwordchar");
817	  break;
818
819	case begbuf:
820	  printf ("/begbuf");
821	  break;
822
823	case endbuf:
824	  printf ("/endbuf");
825	  break;
826
827	default:
828	  printf ("?%d", *(p-1));
829	}
830
831      putchar ('\n');
832    }
833
834  printf ("%d:\tend of pattern.\n", p - start);
835}
836
837
838void
839print_compiled_pattern (bufp)
840    struct re_pattern_buffer *bufp;
841{
842  unsigned char *buffer = bufp->buffer;
843
844  print_partial_compiled_pattern (buffer, buffer + bufp->used);
845  printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
846
847  if (bufp->fastmap_accurate && bufp->fastmap)
848    {
849      printf ("fastmap: ");
850      print_fastmap (bufp->fastmap);
851    }
852
853  printf ("re_nsub: %d\t", bufp->re_nsub);
854  printf ("regs_alloc: %d\t", bufp->regs_allocated);
855  printf ("can_be_null: %d\t", bufp->can_be_null);
856  printf ("newline_anchor: %d\n", bufp->newline_anchor);
857  printf ("no_sub: %d\t", bufp->no_sub);
858  printf ("not_bol: %d\t", bufp->not_bol);
859  printf ("not_eol: %d\t", bufp->not_eol);
860  printf ("syntax: %d\n", bufp->syntax);
861  /* Perhaps we should print the translate table?  */
862}
863
864
865void
866print_double_string (where, string1, size1, string2, size2)
867    const char *where;
868    const char *string1;
869    const char *string2;
870    int size1;
871    int size2;
872{
873  unsigned this_char;
874
875  if (where == NULL)
876    printf ("(null)");
877  else
878    {
879      if (FIRST_STRING_P (where))
880	{
881	  for (this_char = where - string1; this_char < size1; this_char++)
882	    putchar (string1[this_char]);
883
884	  where = string2;
885	}
886
887      for (this_char = where - string2; this_char < size2; this_char++)
888	putchar (string2[this_char]);
889    }
890}
891
892#else /* not DEBUG */
893
894#undef assert
895#define assert(e)
896
897#define DEBUG_STATEMENT(e)
898#define DEBUG_PRINT1(x)
899#define DEBUG_PRINT2(x1, x2)
900#define DEBUG_PRINT3(x1, x2, x3)
901#define DEBUG_PRINT4(x1, x2, x3, x4)
902#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
903#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
904
905#endif /* not DEBUG */
906
907/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
908   also be assigned to arbitrarily: each pattern buffer stores its own
909   syntax, so it can be changed between regex compilations.  */
910/* This has no initializer because initialized variables in Emacs
911   become read-only after dumping.  */
912reg_syntax_t re_syntax_options;
913
914
915/* Specify the precise syntax of regexps for compilation.  This provides
916   for compatibility for various utilities which historically have
917   different, incompatible syntaxes.
918
919   The argument SYNTAX is a bit mask comprised of the various bits
920   defined in regex.h.	We return the old syntax.  */
921
922reg_syntax_t
923re_set_syntax (syntax)
924    reg_syntax_t syntax;
925{
926  reg_syntax_t ret = re_syntax_options;
927
928  re_syntax_options = syntax;
929  return ret;
930}
931
932/* This table gives an error message for each of the error codes listed
933   in regex.h.	Obviously the order here has to be same as there.
934   POSIX doesn't require that we do anything for REG_NOERROR,
935   but why not be nice?	 */
936
937static const char *re_error_msgid[] =
938  {
939    gettext_noop ("Success"),	/* REG_NOERROR */
940    gettext_noop ("No match"),	/* REG_NOMATCH */
941    gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
942    gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
943    gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
944    gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
945    gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
946    gettext_noop ("Unmatched [ or [^"),	/* REG_EBRACK */
947    gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
948    gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
949    gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
950    gettext_noop ("Invalid range end"),	/* REG_ERANGE */
951    gettext_noop ("Memory exhausted"), /* REG_ESPACE */
952    gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
953    gettext_noop ("Premature end of regular expression"), /* REG_EEND */
954    gettext_noop ("Regular expression too big"), /* REG_ESIZE */
955    gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
956  };
957
958/* Avoiding alloca during matching, to placate r_alloc.	 */
959
960/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
961   searching and matching functions should not call alloca.  On some
962   systems, alloca is implemented in terms of malloc, and if we're
963   using the relocating allocator routines, then malloc could cause a
964   relocation, which might (if the strings being searched are in the
965   ralloc heap) shift the data out from underneath the regexp
966   routines.
967
968   Here's another reason to avoid allocation: Emacs
969   processes input from X in a signal handler; processing X input may
970   call malloc; if input arrives while a matching routine is calling
971   malloc, then we're scrod.  But Emacs can't just block input while
972   calling matching routines; then we don't notice interrupts when
973   they come in.  So, Emacs blocks input around all regexp calls
974   except the matching calls, which it leaves unprotected, in the
975   faith that they will not malloc.  */
976
977/* Normally, this is fine.  */
978#define MATCH_MAY_ALLOCATE
979
980/* When using GNU C, we are not REALLY using the C alloca, no matter
981   what config.h may say.  So don't take precautions for it.  */
982#ifdef __GNUC__
983#undef C_ALLOCA
984#endif
985
986/* The match routines may not allocate if (1) they would do it with malloc
987   and (2) it's not safe for them to use malloc.
988   Note that if REL_ALLOC is defined, matching would not use malloc for the
989   failure stack, but we would still use it for the register vectors;
990   so REL_ALLOC should not affect this.	 */
991#if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs)
992#undef MATCH_MAY_ALLOCATE
993#endif
994
995
996/* Failure stack declarations and macros; both re_compile_fastmap and
997   re_match_2 use a failure stack.  These have to be macros because of
998   REGEX_ALLOCATE_STACK.  */
999
1000
1001/* Number of failure points for which to initially allocate space
1002   when matching.  If this number is exceeded, we allocate more
1003   space, so it is not a hard limit.  */
1004#ifndef INIT_FAILURE_ALLOC
1005#define INIT_FAILURE_ALLOC 5
1006#endif
1007
1008/* Roughly the maximum number of failure points on the stack.  Would be
1009   exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
1010   This is a variable only so users of regex can assign to it; we never
1011   change it ourselves.	 */
1012#if defined (MATCH_MAY_ALLOCATE)
1013/* 4400 was enough to cause a crash on Alpha OSF/1,
1014   whose default stack limit is 2mb.  */
1015int re_max_failures = 20000;
1016#else
1017int re_max_failures = 2000;
1018#endif
1019
1020union fail_stack_elt
1021{
1022  unsigned char *pointer;
1023  int integer;
1024};
1025
1026typedef union fail_stack_elt fail_stack_elt_t;
1027
1028typedef struct
1029{
1030  fail_stack_elt_t *stack;
1031  unsigned size;
1032  unsigned avail;			/* Offset of next open position.  */
1033} fail_stack_type;
1034
1035#define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
1036#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1037#define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
1038
1039
1040/* Define macros to initialize and free the failure stack.
1041   Do `return -2' if the alloc fails.  */
1042
1043#ifdef MATCH_MAY_ALLOCATE
1044#define INIT_FAIL_STACK()						\
1045  do {									\
1046    fail_stack.stack = (fail_stack_elt_t *)				\
1047      REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t));	\
1048									\
1049    if (fail_stack.stack == NULL)					\
1050      return -2;							\
1051									\
1052    fail_stack.size = INIT_FAILURE_ALLOC;				\
1053    fail_stack.avail = 0;						\
1054  } while (0)
1055
1056#define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
1057#else
1058#define INIT_FAIL_STACK()						\
1059  do {									\
1060    fail_stack.avail = 0;						\
1061  } while (0)
1062
1063#define RESET_FAIL_STACK()
1064#endif
1065
1066
1067/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
1068
1069   Return 1 if succeeds, and 0 if either ran out of memory
1070   allocating space for it or it was already too large.
1071
1072   REGEX_REALLOCATE_STACK requires `destination' be declared.	*/
1073
1074#define DOUBLE_FAIL_STACK(fail_stack)					\
1075  ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS		\
1076   ? 0									\
1077   : ((fail_stack).stack = (fail_stack_elt_t *)				\
1078	REGEX_REALLOCATE_STACK ((fail_stack).stack,			\
1079	  (fail_stack).size * sizeof (fail_stack_elt_t),		\
1080	  ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),	\
1081									\
1082      (fail_stack).stack == NULL					\
1083      ? 0								\
1084      : ((fail_stack).size <<= 1,					\
1085	 1)))
1086
1087
1088/* Push pointer POINTER on FAIL_STACK.
1089   Return 1 if was able to do so and 0 if ran out of memory allocating
1090   space to do so.  */
1091#define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\
1092  ((FAIL_STACK_FULL ()							\
1093    && !DOUBLE_FAIL_STACK (FAIL_STACK))					\
1094   ? 0									\
1095   : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\
1096      1))
1097
1098/* Push a pointer value onto the failure stack.
1099   Assumes the variable `fail_stack'.  Probably should only
1100   be called from within `PUSH_FAILURE_POINT'.	*/
1101#define PUSH_FAILURE_POINTER(item)					\
1102  fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
1103
1104/* This pushes an integer-valued item onto the failure stack.
1105   Assumes the variable `fail_stack'.  Probably should only
1106   be called from within `PUSH_FAILURE_POINT'.	*/
1107#define PUSH_FAILURE_INT(item)					\
1108  fail_stack.stack[fail_stack.avail++].integer = (item)
1109
1110/* Push a fail_stack_elt_t value onto the failure stack.
1111   Assumes the variable `fail_stack'.  Probably should only
1112   be called from within `PUSH_FAILURE_POINT'.	*/
1113#define PUSH_FAILURE_ELT(item)					\
1114  fail_stack.stack[fail_stack.avail++] =  (item)
1115
1116/* These three POP... operations complement the three PUSH... operations.
1117   All assume that `fail_stack' is nonempty.  */
1118#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1119#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1120#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1121
1122/* Used to omit pushing failure point id's when we're not debugging.  */
1123#ifdef DEBUG
1124#define DEBUG_PUSH PUSH_FAILURE_INT
1125#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
1126#else
1127#define DEBUG_PUSH(item)
1128#define DEBUG_POP(item_addr)
1129#endif
1130
1131
1132/* Push the information about the state we will need
1133   if we ever fail back to it.
1134
1135   Requires variables fail_stack, regstart, regend, reg_info, and
1136   num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
1137   declared.
1138
1139   Does `return FAILURE_CODE' if runs out of memory.  */
1140
1141#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
1142  do {									\
1143    char *destination;							\
1144    /* Must be int, so when we don't save any registers, the arithmetic	\
1145       of 0 + -1 isn't done as unsigned.  */				\
1146    int this_reg;							\
1147									\
1148    DEBUG_STATEMENT (failure_id++);					\
1149    DEBUG_STATEMENT (nfailure_points_pushed++);				\
1150    DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
1151    DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
1152    DEBUG_PRINT2 ("			size: %d\n", (fail_stack).size);\
1153									\
1154    DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);		\
1155    DEBUG_PRINT2 ("	available: %d\n", REMAINING_AVAIL_SLOTS);	\
1156									\
1157    /* Ensure we have enough space allocated for what we will push.  */	\
1158    while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
1159      {									\
1160	if (!DOUBLE_FAIL_STACK (fail_stack))				\
1161	  return failure_code;						\
1162									\
1163	DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
1164		       (fail_stack).size);				\
1165	DEBUG_PRINT2 ("	 slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1166      }									\
1167									\
1168    /* Push the info, starting with the registers.  */			\
1169    DEBUG_PRINT1 ("\n");						\
1170									\
1171    if (1)								\
1172      for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
1173	   this_reg++)							\
1174	{								\
1175	  DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);		\
1176	  DEBUG_STATEMENT (num_regs_pushed++);				\
1177									\
1178	  DEBUG_PRINT2 ("    start: 0x%x\n", regstart[this_reg]);	\
1179	  PUSH_FAILURE_POINTER (regstart[this_reg]);			\
1180									\
1181	  DEBUG_PRINT2 ("    end: 0x%x\n", regend[this_reg]);		\
1182	  PUSH_FAILURE_POINTER (regend[this_reg]);			\
1183									\
1184	  DEBUG_PRINT2 ("    info: 0x%x\n      ", reg_info[this_reg]);	\
1185	  DEBUG_PRINT2 (" match_null=%d",				\
1186			REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
1187	  DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
1188	  DEBUG_PRINT2 (" matched_something=%d",			\
1189			MATCHED_SOMETHING (reg_info[this_reg]));	\
1190	  DEBUG_PRINT2 (" ever_matched=%d",				\
1191			EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
1192	  DEBUG_PRINT1 ("\n");						\
1193	  PUSH_FAILURE_ELT (reg_info[this_reg].word);			\
1194	}								\
1195									\
1196    DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
1197    PUSH_FAILURE_INT (lowest_active_reg);				\
1198									\
1199    DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
1200    PUSH_FAILURE_INT (highest_active_reg);				\
1201									\
1202    DEBUG_PRINT2 ("  Pushing pattern 0x%x: ", pattern_place);		\
1203    DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
1204    PUSH_FAILURE_POINTER (pattern_place);				\
1205									\
1206    DEBUG_PRINT2 ("  Pushing string 0x%x: `", string_place);		\
1207    DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,	\
1208				 size2);				\
1209    DEBUG_PRINT1 ("'\n");						\
1210    PUSH_FAILURE_POINTER (string_place);				\
1211									\
1212    DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
1213    DEBUG_PUSH (failure_id);						\
1214  } while (0)
1215
1216/* This is the number of items that are pushed and popped on the stack
1217   for each register.  */
1218#define NUM_REG_ITEMS  3
1219
1220/* Individual items aside from the registers.  */
1221#ifdef DEBUG
1222#define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
1223#else
1224#define NUM_NONREG_ITEMS 4
1225#endif
1226
1227/* We push at most this many items on the stack.  */
1228/* We used to use (num_regs - 1), which is the number of registers
1229   this regexp will save; but that was changed to 5
1230   to avoid stack overflow for a regexp with lots of parens.  */
1231#define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
1232
1233/* We actually push this many items.  */
1234#define NUM_FAILURE_ITEMS				\
1235  (((0							\
1236     ? 0 : highest_active_reg - lowest_active_reg + 1)	\
1237    * NUM_REG_ITEMS)					\
1238   + NUM_NONREG_ITEMS)
1239
1240/* How many items can still be added to the stack without overflowing it.  */
1241#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1242
1243
1244/* Pops what PUSH_FAIL_STACK pushes.
1245
1246   We restore into the parameters, all of which should be lvalues:
1247     STR -- the saved data position.
1248     PAT -- the saved pattern position.
1249     LOW_REG, HIGH_REG -- the highest and lowest active registers.
1250     REGSTART, REGEND -- arrays of string positions.
1251     REG_INFO -- array of information about each subexpression.
1252
1253   Also assumes the variables `fail_stack' and (if debugging), `bufp',
1254   `pend', `string1', `size1', `string2', and `size2'.	*/
1255
1256#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1257{									\
1258  DEBUG_STATEMENT (fail_stack_elt_t failure_id;)			\
1259  int this_reg;								\
1260  const unsigned char *string_temp;					\
1261									\
1262  assert (!FAIL_STACK_EMPTY ());					\
1263									\
1264  /* Remove failure points and point to how many regs pushed.  */	\
1265  DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
1266  DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
1267  DEBUG_PRINT2 ("		     size: %d\n", fail_stack.size);	\
1268									\
1269  assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
1270									\
1271  DEBUG_POP (&failure_id);						\
1272  DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
1273									\
1274  /* If the saved string location is NULL, it came from an		\
1275     on_failure_keep_string_jump opcode, and we want to throw away the	\
1276     saved NULL, thus retaining our current position in the string.  */	\
1277  string_temp = POP_FAILURE_POINTER ();					\
1278  if (string_temp != NULL)						\
1279    str = (const char *) string_temp;					\
1280									\
1281  DEBUG_PRINT2 ("  Popping string 0x%x: `", str);			\
1282  DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
1283  DEBUG_PRINT1 ("'\n");							\
1284									\
1285  pat = (unsigned char *) POP_FAILURE_POINTER ();			\
1286  DEBUG_PRINT2 ("  Popping pattern 0x%x: ", pat);			\
1287  DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
1288									\
1289  /* Restore register info.  */						\
1290  high_reg = (unsigned) POP_FAILURE_INT ();				\
1291  DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg);		\
1292									\
1293  low_reg = (unsigned) POP_FAILURE_INT ();				\
1294  DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg);		\
1295									\
1296  if (1)								\
1297    for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
1298      {									\
1299	DEBUG_PRINT2 ("	   Popping reg: %d\n", this_reg);		\
1300									\
1301	reg_info[this_reg].word = POP_FAILURE_ELT ();			\
1302	DEBUG_PRINT2 ("	     info: 0x%x\n", reg_info[this_reg]);	\
1303									\
1304	regend[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
1305	DEBUG_PRINT2 ("	     end: 0x%x\n", regend[this_reg]);		\
1306									\
1307	regstart[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
1308	DEBUG_PRINT2 ("	     start: 0x%x\n", regstart[this_reg]);	\
1309      }									\
1310  else									\
1311    {									\
1312      for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
1313	{								\
1314	  reg_info[this_reg].word.integer = 0;				\
1315	  regend[this_reg] = 0;						\
1316	  regstart[this_reg] = 0;					\
1317	}								\
1318      highest_active_reg = high_reg;					\
1319    }									\
1320									\
1321  set_regs_matched_done = 0;						\
1322  DEBUG_STATEMENT (nfailure_points_popped++);				\
1323} /* POP_FAILURE_POINT */
1324
1325
1326
1327/* Structure for per-register (a.k.a. per-group) information.
1328   Other register information, such as the
1329   starting and ending positions (which are addresses), and the list of
1330   inner groups (which is a bits list) are maintained in separate
1331   variables.
1332
1333   We are making a (strictly speaking) nonportable assumption here: that
1334   the compiler will pack our bit fields into something that fits into
1335   the type of `word', i.e., is something that fits into one item on the
1336   failure stack.  */
1337
1338typedef union
1339{
1340  fail_stack_elt_t word;
1341  struct
1342  {
1343      /* This field is one if this group can match the empty string,
1344	 zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
1345#define MATCH_NULL_UNSET_VALUE 3
1346    unsigned match_null_string_p : 2;
1347    unsigned is_active : 1;
1348    unsigned matched_something : 1;
1349    unsigned ever_matched_something : 1;
1350  } bits;
1351} register_info_type;
1352
1353#define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
1354#define IS_ACTIVE(R)  ((R).bits.is_active)
1355#define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
1356#define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
1357
1358
1359/* Call this when have matched a real character; it sets `matched' flags
1360   for the subexpressions which we are currently inside.  Also records
1361   that those subexprs have matched.  */
1362#define SET_REGS_MATCHED()						\
1363  do									\
1364    {									\
1365      if (!set_regs_matched_done)					\
1366	{								\
1367	  unsigned r;							\
1368	  set_regs_matched_done = 1;					\
1369	  for (r = lowest_active_reg; r <= highest_active_reg; r++)	\
1370	    {								\
1371	      MATCHED_SOMETHING (reg_info[r])				\
1372		= EVER_MATCHED_SOMETHING (reg_info[r])			\
1373		= 1;							\
1374	    }								\
1375	}								\
1376    }									\
1377  while (0)
1378
1379/* Registers are set to a sentinel when they haven't yet matched.  */
1380static char reg_unset_dummy;
1381#define REG_UNSET_VALUE (&reg_unset_dummy)
1382#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1383
1384/* Subroutine declarations and macros for regex_compile.  */
1385
1386static void store_op1 (), store_op2 ();
1387static void insert_op1 (), insert_op2 ();
1388static boolean at_begline_loc_p (), at_endline_loc_p ();
1389static boolean group_in_compile_stack ();
1390static reg_errcode_t compile_range ();
1391
1392/* Fetch the next character in the uncompiled pattern---translating it
1393   if necessary.  Also cast from a signed character in the constant
1394   string passed to us by the user to an unsigned char that we can use
1395   as an array index (in, e.g., `translate').  */
1396#ifndef PATFETCH
1397#define PATFETCH(c)							\
1398  do {if (p == pend) return REG_EEND;					\
1399    c = (unsigned char) *p++;						\
1400    if (translate) c = (unsigned char) translate[c];			\
1401  } while (0)
1402#endif
1403
1404/* Fetch the next character in the uncompiled pattern, with no
1405   translation.	 */
1406#define PATFETCH_RAW(c)							\
1407  do {if (p == pend) return REG_EEND;					\
1408    c = (unsigned char) *p++;						\
1409  } while (0)
1410
1411/* Go backwards one character in the pattern.  */
1412#define PATUNFETCH p--
1413
1414
1415/* If `translate' is non-null, return translate[D], else just D.  We
1416   cast the subscript to translate because some data is declared as
1417   `char *', to avoid warnings when a string constant is passed.  But
1418   when we use a character as a subscript we must make it unsigned.  */
1419#ifndef TRANSLATE
1420#define TRANSLATE(d) \
1421  (translate ? (char) translate[(unsigned char) (d)] : (d))
1422#endif
1423
1424
1425/* Macros for outputting the compiled pattern into `buffer'.  */
1426
1427/* If the buffer isn't allocated when it comes in, use this.  */
1428#define INIT_BUF_SIZE  32
1429
1430/* Make sure we have at least N more bytes of space in buffer.	*/
1431#define GET_BUFFER_SPACE(n)						\
1432    while (b - bufp->buffer + (n) > bufp->allocated)			\
1433      EXTEND_BUFFER ()
1434
1435/* Make sure we have one more byte of buffer space and then add C to it.  */
1436#define BUF_PUSH(c)							\
1437  do {									\
1438    GET_BUFFER_SPACE (1);						\
1439    *b++ = (unsigned char) (c);						\
1440  } while (0)
1441
1442
1443/* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
1444#define BUF_PUSH_2(c1, c2)						\
1445  do {									\
1446    GET_BUFFER_SPACE (2);						\
1447    *b++ = (unsigned char) (c1);					\
1448    *b++ = (unsigned char) (c2);					\
1449  } while (0)
1450
1451
1452/* As with BUF_PUSH_2, except for three bytes.	*/
1453#define BUF_PUSH_3(c1, c2, c3)						\
1454  do {									\
1455    GET_BUFFER_SPACE (3);						\
1456    *b++ = (unsigned char) (c1);					\
1457    *b++ = (unsigned char) (c2);					\
1458    *b++ = (unsigned char) (c3);					\
1459  } while (0)
1460
1461
1462/* Store a jump with opcode OP at LOC to location TO.  We store a
1463   relative address offset by the three bytes the jump itself occupies.	 */
1464#define STORE_JUMP(op, loc, to) \
1465  store_op1 (op, loc, (to) - (loc) - 3)
1466
1467/* Likewise, for a two-argument jump.  */
1468#define STORE_JUMP2(op, loc, to, arg) \
1469  store_op2 (op, loc, (to) - (loc) - 3, arg)
1470
1471/* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.	 */
1472#define INSERT_JUMP(op, loc, to) \
1473  insert_op1 (op, loc, (to) - (loc) - 3, b)
1474
1475/* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
1476#define INSERT_JUMP2(op, loc, to, arg) \
1477  insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
1478
1479
1480/* This is not an arbitrary limit: the arguments which represent offsets
1481   into the pattern are two bytes long.	 So if 2^16 bytes turns out to
1482   be too small, many things would have to change.  */
1483#define MAX_BUF_SIZE (1L << 16)
1484
1485
1486/* Extend the buffer by twice its current size via realloc and
1487   reset the pointers that pointed into the old block to point to the
1488   correct places in the new one.  If extending the buffer results in it
1489   being larger than MAX_BUF_SIZE, then flag memory exhausted.	*/
1490#define EXTEND_BUFFER()							\
1491  do {									\
1492    unsigned char *old_buffer = bufp->buffer;				\
1493    if (bufp->allocated == MAX_BUF_SIZE)				\
1494      return REG_ESIZE;							\
1495    bufp->allocated <<= 1;						\
1496    if (bufp->allocated > MAX_BUF_SIZE)					\
1497      bufp->allocated = MAX_BUF_SIZE;					\
1498    bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
1499    if (bufp->buffer == NULL)						\
1500      return REG_ESPACE;						\
1501    /* If the buffer moved, move all the pointers into it.  */		\
1502    if (old_buffer != bufp->buffer)					\
1503      {									\
1504	b = (b - old_buffer) + bufp->buffer;				\
1505	begalt = (begalt - old_buffer) + bufp->buffer;			\
1506	if (fixup_alt_jump)						\
1507	  fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
1508	if (laststart)							\
1509	  laststart = (laststart - old_buffer) + bufp->buffer;		\
1510	if (pending_exact)						\
1511	  pending_exact = (pending_exact - old_buffer) + bufp->buffer;	\
1512      }									\
1513  } while (0)
1514
1515
1516/* Since we have one byte reserved for the register number argument to
1517   {start,stop}_memory, the maximum number of groups we can report
1518   things about is what fits in that byte.  */
1519#define MAX_REGNUM 255
1520
1521/* But patterns can have more than `MAX_REGNUM' registers.  We just
1522   ignore the excess.  */
1523typedef unsigned regnum_t;
1524
1525
1526/* Macros for the compile stack.  */
1527
1528/* Since offsets can go either forwards or backwards, this type needs to
1529   be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.	 */
1530typedef int pattern_offset_t;
1531
1532typedef struct
1533{
1534  pattern_offset_t begalt_offset;
1535  pattern_offset_t fixup_alt_jump;
1536  pattern_offset_t inner_group_offset;
1537  pattern_offset_t laststart_offset;
1538  regnum_t regnum;
1539} compile_stack_elt_t;
1540
1541
1542typedef struct
1543{
1544  compile_stack_elt_t *stack;
1545  unsigned size;
1546  unsigned avail;			/* Offset of next open position.  */
1547} compile_stack_type;
1548
1549
1550#define INIT_COMPILE_STACK_SIZE 32
1551
1552#define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1553#define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1554
1555/* The next available element.	*/
1556#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1557
1558
1559/* Set the bit for character C in a list.  */
1560#define SET_LIST_BIT(c)				      \
1561  (b[((unsigned char) (c)) / BYTEWIDTH]		      \
1562   |= 1 << (((unsigned char) c) % BYTEWIDTH))
1563
1564
1565/* Get the next unsigned number in the uncompiled pattern.  */
1566#define GET_UNSIGNED_NUMBER(num)					\
1567  { if (p != pend)							\
1568     {									\
1569       PATFETCH (c);							\
1570       while (ISDIGIT (c))						\
1571	 {								\
1572	   if (num < 0)							\
1573	      num = 0;							\
1574	   num = num * 10 + c - '0';					\
1575	   if (p == pend)						\
1576	      break;							\
1577	   PATFETCH (c);						\
1578	 }								\
1579       }								\
1580    }
1581
1582#define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
1583
1584#define IS_CHAR_CLASS(string)						\
1585   (STREQ (string, "alpha") || STREQ (string, "upper")			\
1586    || STREQ (string, "lower") || STREQ (string, "digit")		\
1587    || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
1588    || STREQ (string, "space") || STREQ (string, "print")		\
1589    || STREQ (string, "punct") || STREQ (string, "graph")		\
1590    || STREQ (string, "cntrl") || STREQ (string, "blank"))
1591
1592#ifndef MATCH_MAY_ALLOCATE
1593
1594/* If we cannot allocate large objects within re_match_2_internal,
1595   we make the fail stack and register vectors global.
1596   The fail stack, we grow to the maximum size when a regexp
1597   is compiled.
1598   The register vectors, we adjust in size each time we
1599   compile a regexp, according to the number of registers it needs.  */
1600
1601static fail_stack_type fail_stack;
1602
1603/* Size with which the following vectors are currently allocated.
1604   That is so we can make them bigger as needed,
1605   but never make them smaller.	 */
1606static int regs_allocated_size;
1607
1608static const char **	 regstart, **	  regend;
1609static const char ** old_regstart, ** old_regend;
1610static const char **best_regstart, **best_regend;
1611static register_info_type *reg_info;
1612static const char **reg_dummy;
1613static register_info_type *reg_info_dummy;
1614
1615/* Make the register vectors big enough for NUM_REGS registers,
1616   but don't make them smaller.	 */
1617
1618static
1619regex_grow_registers (num_regs)
1620     int num_regs;
1621{
1622  if (num_regs > regs_allocated_size)
1623    {
1624      RETALLOC_IF (regstart,	 num_regs, const char *);
1625      RETALLOC_IF (regend,	 num_regs, const char *);
1626      RETALLOC_IF (old_regstart, num_regs, const char *);
1627      RETALLOC_IF (old_regend,	 num_regs, const char *);
1628      RETALLOC_IF (best_regstart, num_regs, const char *);
1629      RETALLOC_IF (best_regend,	 num_regs, const char *);
1630      RETALLOC_IF (reg_info,	 num_regs, register_info_type);
1631      RETALLOC_IF (reg_dummy,	 num_regs, const char *);
1632      RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
1633
1634      regs_allocated_size = num_regs;
1635    }
1636}
1637
1638#endif /* not MATCH_MAY_ALLOCATE */
1639
1640/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1641   Returns one of error codes defined in `regex.h', or zero for success.
1642
1643   Assumes the `allocated' (and perhaps `buffer') and `translate'
1644   fields are set in BUFP on entry.
1645
1646   If it succeeds, results are put in BUFP (if it returns an error, the
1647   contents of BUFP are undefined):
1648     `buffer' is the compiled pattern;
1649     `syntax' is set to SYNTAX;
1650     `used' is set to the length of the compiled pattern;
1651     `fastmap_accurate' is zero;
1652     `re_nsub' is the number of subexpressions in PATTERN;
1653     `not_bol' and `not_eol' are zero;
1654
1655   The `fastmap' and `newline_anchor' fields are neither
1656   examined nor set.  */
1657
1658/* Return, freeing storage we allocated.  */
1659#define FREE_STACK_RETURN(value)		\
1660  return (free (compile_stack.stack), value)
1661
1662static reg_errcode_t
1663regex_compile (pattern, size, syntax, bufp)
1664     const char *pattern;
1665     int size;
1666     reg_syntax_t syntax;
1667     struct re_pattern_buffer *bufp;
1668{
1669  /* We fetch characters from PATTERN here.  Even though PATTERN is
1670     `char *' (i.e., signed), we declare these variables as unsigned, so
1671     they can be reliably used as array indices.  */
1672  register unsigned char c, c1;
1673
1674  /* A random temporary spot in PATTERN.  */
1675  const char *p1;
1676
1677  /* Points to the end of the buffer, where we should append.  */
1678  register unsigned char *b;
1679
1680  /* Keeps track of unclosed groups.  */
1681  compile_stack_type compile_stack;
1682
1683  /* Points to the current (ending) position in the pattern.  */
1684  const char *p = pattern;
1685  const char *pend = pattern + size;
1686
1687  /* How to translate the characters in the pattern.  */
1688  RE_TRANSLATE_TYPE translate = bufp->translate;
1689
1690  /* Address of the count-byte of the most recently inserted `exactn'
1691     command.  This makes it possible to tell if a new exact-match
1692     character can be added to that command or if the character requires
1693     a new `exactn' command.  */
1694  unsigned char *pending_exact = 0;
1695
1696  /* Address of start of the most recently finished expression.
1697     This tells, e.g., postfix * where to find the start of its
1698     operand.  Reset at the beginning of groups and alternatives.  */
1699  unsigned char *laststart = 0;
1700
1701  /* Address of beginning of regexp, or inside of last group.  */
1702  unsigned char *begalt;
1703
1704  /* Place in the uncompiled pattern (i.e., the {) to
1705     which to go back if the interval is invalid.  */
1706  const char *beg_interval;
1707
1708  /* Address of the place where a forward jump should go to the end of
1709     the containing expression.	 Each alternative of an `or' -- except the
1710     last -- ends with a forward jump of this sort.  */
1711  unsigned char *fixup_alt_jump = 0;
1712
1713  /* Counts open-groups as they are encountered.  Remembered for the
1714     matching close-group on the compile stack, so the same register
1715     number is put in the stop_memory as the start_memory.  */
1716  regnum_t regnum = 0;
1717
1718#ifdef DEBUG
1719  DEBUG_PRINT1 ("\nCompiling pattern: ");
1720  if (debug)
1721    {
1722      unsigned debug_count;
1723
1724      for (debug_count = 0; debug_count < size; debug_count++)
1725	putchar (pattern[debug_count]);
1726      putchar ('\n');
1727    }
1728#endif /* DEBUG */
1729
1730  /* Initialize the compile stack.  */
1731  compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1732  if (compile_stack.stack == NULL)
1733    return REG_ESPACE;
1734
1735  compile_stack.size = INIT_COMPILE_STACK_SIZE;
1736  compile_stack.avail = 0;
1737
1738  /* Initialize the pattern buffer.  */
1739  bufp->syntax = syntax;
1740  bufp->fastmap_accurate = 0;
1741  bufp->not_bol = bufp->not_eol = 0;
1742
1743  /* Set `used' to zero, so that if we return an error, the pattern
1744     printer (for debugging) will think there's no pattern.  We reset it
1745     at the end.  */
1746  bufp->used = 0;
1747
1748  /* Always count groups, whether or not bufp->no_sub is set.  */
1749  bufp->re_nsub = 0;
1750
1751#if !defined (emacs) && !defined (SYNTAX_TABLE)
1752  /* Initialize the syntax table.  */
1753   init_syntax_once ();
1754#endif
1755
1756  if (bufp->allocated == 0)
1757    {
1758      if (bufp->buffer)
1759	{ /* If zero allocated, but buffer is non-null, try to realloc
1760	     enough space.  This loses if buffer's address is bogus, but
1761	     that is the user's responsibility.	 */
1762	  RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1763	}
1764      else
1765	{ /* Caller did not allocate a buffer.	Do it for them.	 */
1766	  bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1767	}
1768      if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
1769
1770      bufp->allocated = INIT_BUF_SIZE;
1771    }
1772
1773  begalt = b = bufp->buffer;
1774
1775  /* Loop through the uncompiled pattern until we're at the end.  */
1776  while (p != pend)
1777    {
1778      PATFETCH (c);
1779
1780      switch (c)
1781	{
1782	case '^':
1783	  {
1784	    if (   /* If at start of pattern, it's an operator.	 */
1785		   p == pattern + 1
1786		   /* If context independent, it's an operator.	 */
1787		|| syntax & RE_CONTEXT_INDEP_ANCHORS
1788		   /* Otherwise, depends on what's come before.	 */
1789		|| at_begline_loc_p (pattern, p, syntax))
1790	      BUF_PUSH (begline);
1791	    else
1792	      goto normal_char;
1793	  }
1794	  break;
1795
1796
1797	case '$':
1798	  {
1799	    if (   /* If at end of pattern, it's an operator.  */
1800		   p == pend
1801		   /* If context independent, it's an operator.	 */
1802		|| syntax & RE_CONTEXT_INDEP_ANCHORS
1803		   /* Otherwise, depends on what's next.  */
1804		|| at_endline_loc_p (p, pend, syntax))
1805	       BUF_PUSH (endline);
1806	     else
1807	       goto normal_char;
1808	   }
1809	   break;
1810
1811
1812	case '+':
1813	case '?':
1814	  if ((syntax & RE_BK_PLUS_QM)
1815	      || (syntax & RE_LIMITED_OPS))
1816	    goto normal_char;
1817	handle_plus:
1818	case '*':
1819	  /* If there is no previous pattern... */
1820	  if (!laststart)
1821	    {
1822	      if (syntax & RE_CONTEXT_INVALID_OPS)
1823		FREE_STACK_RETURN (REG_BADRPT);
1824	      else if (!(syntax & RE_CONTEXT_INDEP_OPS))
1825		goto normal_char;
1826	    }
1827
1828	  {
1829	    /* Are we optimizing this jump?  */
1830	    boolean keep_string_p = false;
1831
1832	    /* 1 means zero (many) matches is allowed.	*/
1833	    char zero_times_ok = 0, many_times_ok = 0;
1834
1835	    /* If there is a sequence of repetition chars, collapse it
1836	       down to just one (the right one).  We can't combine
1837	       interval operators with these because of, e.g., `a{2}*',
1838	       which should only match an even number of `a's.	*/
1839
1840	    for (;;)
1841	      {
1842		zero_times_ok |= c != '+';
1843		many_times_ok |= c != '?';
1844
1845		if (p == pend)
1846		  break;
1847
1848		PATFETCH (c);
1849
1850		if (c == '*'
1851		    || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
1852		  ;
1853
1854		else if (syntax & RE_BK_PLUS_QM	 &&  c == '\\')
1855		  {
1856		    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
1857
1858		    PATFETCH (c1);
1859		    if (!(c1 == '+' || c1 == '?'))
1860		      {
1861			PATUNFETCH;
1862			PATUNFETCH;
1863			break;
1864		      }
1865
1866		    c = c1;
1867		  }
1868		else
1869		  {
1870		    PATUNFETCH;
1871		    break;
1872		  }
1873
1874		/* If we get here, we found another repeat character.  */
1875	       }
1876
1877	    /* Star, etc. applied to an empty pattern is equivalent
1878	       to an empty pattern.  */
1879	    if (!laststart)
1880	      break;
1881
1882	    /* Now we know whether or not zero matches is allowed
1883	       and also whether or not two or more matches is allowed.	*/
1884	    if (many_times_ok)
1885	      { /* More than one repetition is allowed, so put in at the
1886		   end a backward relative jump from `b' to before the next
1887		   jump we're going to put in below (which jumps from
1888		   laststart to after this jump).
1889
1890		   But if we are at the `*' in the exact sequence `.*\n',
1891		   insert an unconditional jump backwards to the .,
1892		   instead of the beginning of the loop.  This way we only
1893		   push a failure point once, instead of every time
1894		   through the loop.  */
1895		assert (p - 1 > pattern);
1896
1897		/* Allocate the space for the jump.  */
1898		GET_BUFFER_SPACE (3);
1899
1900		/* We know we are not at the first character of the pattern,
1901		   because laststart was nonzero.  And we've already
1902		   incremented `p', by the way, to be the character after
1903		   the `*'.  Do we have to do something analogous here
1904		   for null bytes, because of RE_DOT_NOT_NULL?	*/
1905		if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
1906		    && zero_times_ok
1907		    && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
1908		    && !(syntax & RE_DOT_NEWLINE))
1909		  { /* We have .*\n.  */
1910		    STORE_JUMP (jump, b, laststart);
1911		    keep_string_p = true;
1912		  }
1913		else
1914		  /* Anything else.  */
1915		  STORE_JUMP (maybe_pop_jump, b, laststart - 3);
1916
1917		/* We've added more stuff to the buffer.  */
1918		b += 3;
1919	      }
1920
1921	    /* On failure, jump from laststart to b + 3, which will be the
1922	       end of the buffer after this jump is inserted.  */
1923	    GET_BUFFER_SPACE (3);
1924	    INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
1925				       : on_failure_jump,
1926			 laststart, b + 3);
1927	    pending_exact = 0;
1928	    b += 3;
1929
1930	    if (!zero_times_ok)
1931	      {
1932		/* At least one repetition is required, so insert a
1933		   `dummy_failure_jump' before the initial
1934		   `on_failure_jump' instruction of the loop. This
1935		   effects a skip over that instruction the first time
1936		   we hit that loop.  */
1937		GET_BUFFER_SPACE (3);
1938		INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
1939		b += 3;
1940	      }
1941	    }
1942	  break;
1943
1944
1945	case '.':
1946	  laststart = b;
1947	  BUF_PUSH (anychar);
1948	  break;
1949
1950
1951	case '[':
1952	  {
1953	    boolean had_char_class = false;
1954
1955	    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
1956
1957	    /* Ensure that we have enough space to push a charset: the
1958	       opcode, the length count, and the bitset; 34 bytes in all.  */
1959	    GET_BUFFER_SPACE (34);
1960
1961	    laststart = b;
1962
1963	    /* We test `*p == '^' twice, instead of using an if
1964	       statement, so we only need one BUF_PUSH.	 */
1965	    BUF_PUSH (*p == '^' ? charset_not : charset);
1966	    if (*p == '^')
1967	      p++;
1968
1969	    /* Remember the first position in the bracket expression.  */
1970	    p1 = p;
1971
1972	    /* Push the number of bytes in the bitmap.	*/
1973	    BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
1974
1975	    /* Clear the whole map.  */
1976	    bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
1977
1978	    /* charset_not matches newline according to a syntax bit.  */
1979	    if ((re_opcode_t) b[-2] == charset_not
1980		&& (syntax & RE_HAT_LISTS_NOT_NEWLINE))
1981	      SET_LIST_BIT ('\n');
1982
1983	    /* Read in characters and ranges, setting map bits.	 */
1984	    for (;;)
1985	      {
1986		if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
1987
1988		PATFETCH (c);
1989
1990		/* \ might escape characters inside [...] and [^...].  */
1991		if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
1992		  {
1993		    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
1994
1995		    PATFETCH (c1);
1996		    SET_LIST_BIT (c1);
1997		    continue;
1998		  }
1999
2000		/* Could be the end of the bracket expression.	If it's
2001		   not (i.e., when the bracket expression is `[]' so
2002		   far), the ']' character bit gets set way below.  */
2003		if (c == ']' && p != p1 + 1)
2004		  break;
2005
2006		/* Look ahead to see if it's a range when the last thing
2007		   was a character class.  */
2008		if (had_char_class && c == '-' && *p != ']')
2009		  FREE_STACK_RETURN (REG_ERANGE);
2010
2011		/* Look ahead to see if it's a range when the last thing
2012		   was a character: if this is a hyphen not at the
2013		   beginning or the end of a list, then it's the range
2014		   operator.  */
2015		if (c == '-'
2016		    && !(p - 2 >= pattern && p[-2] == '[')
2017		    && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
2018		    && *p != ']')
2019		  {
2020		    reg_errcode_t ret
2021		      = compile_range (&p, pend, translate, syntax, b);
2022		    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2023		  }
2024
2025		else if (p[0] == '-' && p[1] != ']')
2026		  { /* This handles ranges made up of characters only.	*/
2027		    reg_errcode_t ret;
2028
2029		    /* Move past the `-'.  */
2030		    PATFETCH (c1);
2031
2032		    ret = compile_range (&p, pend, translate, syntax, b);
2033		    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2034		  }
2035
2036		/* See if we're at the beginning of a possible character
2037		   class.  */
2038
2039		else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
2040		  { /* Leave room for the null.	 */
2041		    char str[CHAR_CLASS_MAX_LENGTH + 1];
2042
2043		    PATFETCH (c);
2044		    c1 = 0;
2045
2046		    /* If pattern is `[[:'.  */
2047		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2048
2049		    for (;;)
2050		      {
2051			PATFETCH (c);
2052			if (c == ':' || c == ']' || p == pend
2053			    || c1 == CHAR_CLASS_MAX_LENGTH)
2054			  break;
2055			str[c1++] = c;
2056		      }
2057		    str[c1] = '\0';
2058
2059		    /* If isn't a word bracketed by `[:' and:`]':
2060		       undo the ending character, the letters, and leave
2061		       the leading `:' and `[' (but set bits for them).	 */
2062		    if (c == ':' && *p == ']')
2063		      {
2064			int ch;
2065			boolean is_alnum = STREQ (str, "alnum");
2066			boolean is_alpha = STREQ (str, "alpha");
2067			boolean is_blank = STREQ (str, "blank");
2068			boolean is_cntrl = STREQ (str, "cntrl");
2069			boolean is_digit = STREQ (str, "digit");
2070			boolean is_graph = STREQ (str, "graph");
2071			boolean is_lower = STREQ (str, "lower");
2072			boolean is_print = STREQ (str, "print");
2073			boolean is_punct = STREQ (str, "punct");
2074			boolean is_space = STREQ (str, "space");
2075			boolean is_upper = STREQ (str, "upper");
2076			boolean is_xdigit = STREQ (str, "xdigit");
2077
2078			if (!IS_CHAR_CLASS (str))
2079			  FREE_STACK_RETURN (REG_ECTYPE);
2080
2081			/* Throw away the ] at the end of the character
2082			   class.  */
2083			PATFETCH (c);
2084
2085			if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2086
2087			for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
2088			  {
2089			    int translated = TRANSLATE (ch);
2090			    /* This was split into 3 if's to
2091			       avoid an arbitrary limit in some compiler.  */
2092			    if (   (is_alnum  && ISALNUM (ch))
2093				|| (is_alpha  && ISALPHA (ch))
2094				|| (is_blank  && ISBLANK (ch))
2095				|| (is_cntrl  && ISCNTRL (ch)))
2096			      SET_LIST_BIT (translated);
2097			    if (   (is_digit  && ISDIGIT (ch))
2098				|| (is_graph  && ISGRAPH (ch))
2099				|| (is_lower  && ISLOWER (ch))
2100				|| (is_print  && ISPRINT (ch)))
2101			      SET_LIST_BIT (translated);
2102			    if (   (is_punct  && ISPUNCT (ch))
2103				|| (is_space  && ISSPACE (ch))
2104				|| (is_upper  && ISUPPER (ch))
2105				|| (is_xdigit && ISXDIGIT (ch)))
2106			      SET_LIST_BIT (translated);
2107			  }
2108			had_char_class = true;
2109		      }
2110		    else
2111		      {
2112			c1++;
2113			while (c1--)
2114			  PATUNFETCH;
2115			SET_LIST_BIT ('[');
2116			SET_LIST_BIT (':');
2117			had_char_class = false;
2118		      }
2119		  }
2120		else
2121		  {
2122		    had_char_class = false;
2123		    SET_LIST_BIT (c);
2124		  }
2125	      }
2126
2127	    /* Discard any (non)matching list bytes that are all 0 at the
2128	       end of the map.	Decrease the map-length byte too.  */
2129	    while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
2130	      b[-1]--;
2131	    b += b[-1];
2132	  }
2133	  break;
2134
2135
2136	case '(':
2137	  if (syntax & RE_NO_BK_PARENS)
2138	    goto handle_open;
2139	  else
2140	    goto normal_char;
2141
2142
2143	case ')':
2144	  if (syntax & RE_NO_BK_PARENS)
2145	    goto handle_close;
2146	  else
2147	    goto normal_char;
2148
2149
2150	case '\n':
2151	  if (syntax & RE_NEWLINE_ALT)
2152	    goto handle_alt;
2153	  else
2154	    goto normal_char;
2155
2156
2157	case '|':
2158	  if (syntax & RE_NO_BK_VBAR)
2159	    goto handle_alt;
2160	  else
2161	    goto normal_char;
2162
2163
2164	case '{':
2165	   if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
2166	     goto handle_interval;
2167	   else
2168	     goto normal_char;
2169
2170
2171	case '\\':
2172	  if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2173
2174	  /* Do not translate the character after the \, so that we can
2175	     distinguish, e.g., \B from \b, even if we normally would
2176	     translate, e.g., B to b.  */
2177	  PATFETCH_RAW (c);
2178
2179	  switch (c)
2180	    {
2181	    case '(':
2182	      if (syntax & RE_NO_BK_PARENS)
2183		goto normal_backslash;
2184
2185	    handle_open:
2186	      bufp->re_nsub++;
2187	      regnum++;
2188
2189	      if (COMPILE_STACK_FULL)
2190		{
2191		  RETALLOC (compile_stack.stack, compile_stack.size << 1,
2192			    compile_stack_elt_t);
2193		  if (compile_stack.stack == NULL) return REG_ESPACE;
2194
2195		  compile_stack.size <<= 1;
2196		}
2197
2198	      /* These are the values to restore when we hit end of this
2199		 group.	 They are all relative offsets, so that if the
2200		 whole pattern moves because of realloc, they will still
2201		 be valid.  */
2202	      COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
2203	      COMPILE_STACK_TOP.fixup_alt_jump
2204		= fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
2205	      COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
2206	      COMPILE_STACK_TOP.regnum = regnum;
2207
2208	      /* We will eventually replace the 0 with the number of
2209		 groups inner to this one.  But do not push a
2210		 start_memory for groups beyond the last one we can
2211		 represent in the compiled pattern.  */
2212	      if (regnum <= MAX_REGNUM)
2213		{
2214		  COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
2215		  BUF_PUSH_3 (start_memory, regnum, 0);
2216		}
2217
2218	      compile_stack.avail++;
2219
2220	      fixup_alt_jump = 0;
2221	      laststart = 0;
2222	      begalt = b;
2223	      /* If we've reached MAX_REGNUM groups, then this open
2224		 won't actually generate any code, so we'll have to
2225		 clear pending_exact explicitly.  */
2226	      pending_exact = 0;
2227	      break;
2228
2229
2230	    case ')':
2231	      if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
2232
2233	      if (COMPILE_STACK_EMPTY)
2234	      {
2235		if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2236		  goto normal_backslash;
2237		else
2238		  FREE_STACK_RETURN (REG_ERPAREN);
2239	      }
2240
2241	    handle_close:
2242	      if (fixup_alt_jump)
2243		{ /* Push a dummy failure point at the end of the
2244		     alternative for a possible future
2245		     `pop_failure_jump' to pop.	 See comments at
2246		     `push_dummy_failure' in `re_match_2'.  */
2247		  BUF_PUSH (push_dummy_failure);
2248
2249		  /* We allocated space for this jump when we assigned
2250		     to `fixup_alt_jump', in the `handle_alt' case below.  */
2251		  STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
2252		}
2253
2254	      /* See similar code for backslashed left paren above.  */
2255	      if (COMPILE_STACK_EMPTY)
2256	      {
2257		if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2258		  goto normal_char;
2259		else
2260		  FREE_STACK_RETURN (REG_ERPAREN);
2261	      }
2262
2263	      /* Since we just checked for an empty stack above, this
2264		 ``can't happen''.  */
2265	      assert (compile_stack.avail != 0);
2266	      {
2267		/* We don't just want to restore into `regnum', because
2268		   later groups should continue to be numbered higher,
2269		   as in `(ab)c(de)' -- the second group is #2.	 */
2270		regnum_t this_group_regnum;
2271
2272		compile_stack.avail--;
2273		begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
2274		fixup_alt_jump
2275		  = COMPILE_STACK_TOP.fixup_alt_jump
2276		    ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
2277		    : 0;
2278		laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
2279		this_group_regnum = COMPILE_STACK_TOP.regnum;
2280		/* If we've reached MAX_REGNUM groups, then this open
2281		   won't actually generate any code, so we'll have to
2282		   clear pending_exact explicitly.  */
2283		pending_exact = 0;
2284
2285		/* We're at the end of the group, so now we know how many
2286		   groups were inside this one.	 */
2287		if (this_group_regnum <= MAX_REGNUM)
2288		  {
2289		    unsigned char *inner_group_loc
2290		      = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
2291
2292		    *inner_group_loc = regnum - this_group_regnum;
2293		    BUF_PUSH_3 (stop_memory, this_group_regnum,
2294				regnum - this_group_regnum);
2295		  }
2296	      }
2297	      break;
2298
2299
2300	    case '|':					/* `\|'.  */
2301	      if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
2302		goto normal_backslash;
2303	    handle_alt:
2304	      if (syntax & RE_LIMITED_OPS)
2305		goto normal_char;
2306
2307	      /* Insert before the previous alternative a jump which
2308		 jumps to this alternative if the former fails.	 */
2309	      GET_BUFFER_SPACE (3);
2310	      INSERT_JUMP (on_failure_jump, begalt, b + 6);
2311	      pending_exact = 0;
2312	      b += 3;
2313
2314	      /* The alternative before this one has a jump after it
2315		 which gets executed if it gets matched.  Adjust that
2316		 jump so it will jump to this alternative's analogous
2317		 jump (put in below, which in turn will jump to the next
2318		 (if any) alternative's such jump, etc.).  The last such
2319		 jump jumps to the correct final destination.  A picture:
2320			  _____ _____
2321			  |   | |   |
2322			  |   v |   v
2323			 a | b	 | c
2324
2325		 If we are at `b', then fixup_alt_jump right now points to a
2326		 three-byte space after `a'.  We'll put in the jump, set
2327		 fixup_alt_jump to right after `b', and leave behind three
2328		 bytes which we'll fill in when we get to after `c'.  */
2329
2330	      if (fixup_alt_jump)
2331		STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2332
2333	      /* Mark and leave space for a jump after this alternative,
2334		 to be filled in later either by next alternative or
2335		 when know we're at the end of a series of alternatives.  */
2336	      fixup_alt_jump = b;
2337	      GET_BUFFER_SPACE (3);
2338	      b += 3;
2339
2340	      laststart = 0;
2341	      begalt = b;
2342	      break;
2343
2344
2345	    case '{':
2346	      /* If \{ is a literal.  */
2347	      if (!(syntax & RE_INTERVALS)
2348		     /* If we're at `\{' and it's not the open-interval
2349			operator.  */
2350		  || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
2351		  || (p - 2 == pattern	&&  p == pend))
2352		goto normal_backslash;
2353
2354	    handle_interval:
2355	      {
2356		/* If got here, then the syntax allows intervals.  */
2357
2358		/* At least (most) this many matches must be made.  */
2359		int lower_bound = -1, upper_bound = -1;
2360
2361		beg_interval = p - 1;
2362
2363		if (p == pend)
2364		  {
2365		    if (syntax & RE_NO_BK_BRACES)
2366		      goto unfetch_interval;
2367		    else
2368		      FREE_STACK_RETURN (REG_EBRACE);
2369		  }
2370
2371		GET_UNSIGNED_NUMBER (lower_bound);
2372
2373		if (c == ',')
2374		  {
2375		    GET_UNSIGNED_NUMBER (upper_bound);
2376		    if (upper_bound < 0) upper_bound = RE_DUP_MAX;
2377		  }
2378		else
2379		  /* Interval such as `{1}' => match exactly once. */
2380		  upper_bound = lower_bound;
2381
2382		if (lower_bound < 0 || upper_bound > RE_DUP_MAX
2383		    || lower_bound > upper_bound)
2384		  {
2385		    if (syntax & RE_NO_BK_BRACES)
2386		      goto unfetch_interval;
2387		    else
2388		      FREE_STACK_RETURN (REG_BADBR);
2389		  }
2390
2391		if (!(syntax & RE_NO_BK_BRACES))
2392		  {
2393		    if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
2394
2395		    PATFETCH (c);
2396		  }
2397
2398		if (c != '}')
2399		  {
2400		    if (syntax & RE_NO_BK_BRACES)
2401		      goto unfetch_interval;
2402		    else
2403		      FREE_STACK_RETURN (REG_BADBR);
2404		  }
2405
2406		/* We just parsed a valid interval.  */
2407
2408		/* If it's invalid to have no preceding re.  */
2409		if (!laststart)
2410		  {
2411		    if (syntax & RE_CONTEXT_INVALID_OPS)
2412		      FREE_STACK_RETURN (REG_BADRPT);
2413		    else if (syntax & RE_CONTEXT_INDEP_OPS)
2414		      laststart = b;
2415		    else
2416		      goto unfetch_interval;
2417		  }
2418
2419		/* If the upper bound is zero, don't want to succeed at
2420		   all; jump from `laststart' to `b + 3', which will be
2421		   the end of the buffer after we insert the jump.  */
2422		 if (upper_bound == 0)
2423		   {
2424		     GET_BUFFER_SPACE (3);
2425		     INSERT_JUMP (jump, laststart, b + 3);
2426		     b += 3;
2427		   }
2428
2429		 /* Otherwise, we have a nontrivial interval.  When
2430		    we're all done, the pattern will look like:
2431		      set_number_at <jump count> <upper bound>
2432		      set_number_at <succeed_n count> <lower bound>
2433		      succeed_n <after jump addr> <succeed_n count>
2434		      <body of loop>
2435		      jump_n <succeed_n addr> <jump count>
2436		    (The upper bound and `jump_n' are omitted if
2437		    `upper_bound' is 1, though.)  */
2438		 else
2439		   { /* If the upper bound is > 1, we need to insert
2440			more at the end of the loop.  */
2441		     unsigned nbytes = 10 + (upper_bound > 1) * 10;
2442
2443		     GET_BUFFER_SPACE (nbytes);
2444
2445		     /* Initialize lower bound of the `succeed_n', even
2446			though it will be set during matching by its
2447			attendant `set_number_at' (inserted next),
2448			because `re_compile_fastmap' needs to know.
2449			Jump to the `jump_n' we might insert below.  */
2450		     INSERT_JUMP2 (succeed_n, laststart,
2451				   b + 5 + (upper_bound > 1) * 5,
2452				   lower_bound);
2453		     b += 5;
2454
2455		     /* Code to initialize the lower bound.  Insert
2456			before the `succeed_n'.	 The `5' is the last two
2457			bytes of this `set_number_at', plus 3 bytes of
2458			the following `succeed_n'.  */
2459		     insert_op2 (set_number_at, laststart, 5, lower_bound, b);
2460		     b += 5;
2461
2462		     if (upper_bound > 1)
2463		       { /* More than one repetition is allowed, so
2464			    append a backward jump to the `succeed_n'
2465			    that starts this interval.
2466
2467			    When we've reached this during matching,
2468			    we'll have matched the interval once, so
2469			    jump back only `upper_bound - 1' times.  */
2470			 STORE_JUMP2 (jump_n, b, laststart + 5,
2471				      upper_bound - 1);
2472			 b += 5;
2473
2474			 /* The location we want to set is the second
2475			    parameter of the `jump_n'; that is `b-2' as
2476			    an absolute address.  `laststart' will be
2477			    the `set_number_at' we're about to insert;
2478			    `laststart+3' the number to set, the source
2479			    for the relative address.  But we are
2480			    inserting into the middle of the pattern --
2481			    so everything is getting moved up by 5.
2482			    Conclusion: (b - 2) - (laststart + 3) + 5,
2483			    i.e., b - laststart.
2484
2485			    We insert this at the beginning of the loop
2486			    so that if we fail during matching, we'll
2487			    reinitialize the bounds.  */
2488			 insert_op2 (set_number_at, laststart, b - laststart,
2489				     upper_bound - 1, b);
2490			 b += 5;
2491		       }
2492		   }
2493		pending_exact = 0;
2494		beg_interval = NULL;
2495	      }
2496	      break;
2497
2498	    unfetch_interval:
2499	      /* If an invalid interval, match the characters as literals.  */
2500	       assert (beg_interval);
2501	       p = beg_interval;
2502	       beg_interval = NULL;
2503
2504	       /* normal_char and normal_backslash need `c'.  */
2505	       PATFETCH (c);
2506
2507	       if (!(syntax & RE_NO_BK_BRACES))
2508		 {
2509		   if (p > pattern  &&	p[-1] == '\\')
2510		     goto normal_backslash;
2511		 }
2512	       goto normal_char;
2513
2514#ifdef emacs
2515	    /* There is no way to specify the before_dot and after_dot
2516	       operators.  rms says this is ok.	 --karl	 */
2517	    case '=':
2518	      BUF_PUSH (at_dot);
2519	      break;
2520
2521	    case 's':
2522	      laststart = b;
2523	      PATFETCH (c);
2524	      BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
2525	      break;
2526
2527	    case 'S':
2528	      laststart = b;
2529	      PATFETCH (c);
2530	      BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
2531	      break;
2532#endif /* emacs */
2533
2534
2535	    case 'w':
2536	      laststart = b;
2537	      BUF_PUSH (wordchar);
2538	      break;
2539
2540
2541	    case 'W':
2542	      laststart = b;
2543	      BUF_PUSH (notwordchar);
2544	      break;
2545
2546
2547	    case '<':
2548	      BUF_PUSH (wordbeg);
2549	      break;
2550
2551	    case '>':
2552	      BUF_PUSH (wordend);
2553	      break;
2554
2555	    case 'b':
2556	      BUF_PUSH (wordbound);
2557	      break;
2558
2559	    case 'B':
2560	      BUF_PUSH (notwordbound);
2561	      break;
2562
2563	    case '`':
2564	      BUF_PUSH (begbuf);
2565	      break;
2566
2567	    case '\'':
2568	      BUF_PUSH (endbuf);
2569	      break;
2570
2571	    case '1': case '2': case '3': case '4': case '5':
2572	    case '6': case '7': case '8': case '9':
2573	      if (syntax & RE_NO_BK_REFS)
2574		goto normal_char;
2575
2576	      c1 = c - '0';
2577
2578	      if (c1 > regnum)
2579		FREE_STACK_RETURN (REG_ESUBREG);
2580
2581	      /* Can't back reference to a subexpression if inside of it.  */
2582	      if (group_in_compile_stack (compile_stack, c1))
2583		goto normal_char;
2584
2585	      laststart = b;
2586	      BUF_PUSH_2 (duplicate, c1);
2587	      break;
2588
2589
2590	    case '+':
2591	    case '?':
2592	      if (syntax & RE_BK_PLUS_QM)
2593		goto handle_plus;
2594	      else
2595		goto normal_backslash;
2596
2597	    default:
2598	    normal_backslash:
2599	      /* You might think it would be useful for \ to mean
2600		 not to translate; but if we don't translate it
2601		 it will never match anything.	*/
2602	      c = TRANSLATE (c);
2603	      goto normal_char;
2604	    }
2605	  break;
2606
2607
2608	default:
2609	/* Expects the character in `c'.  */
2610	normal_char:
2611	      /* If no exactn currently being built.  */
2612	  if (!pending_exact
2613
2614	      /* If last exactn not at current position.  */
2615	      || pending_exact + *pending_exact + 1 != b
2616
2617	      /* We have only one byte following the exactn for the count.  */
2618	      || *pending_exact == (1 << BYTEWIDTH) - 1
2619
2620	      /* If followed by a repetition operator.	*/
2621	      || *p == '*' || *p == '^'
2622	      || ((syntax & RE_BK_PLUS_QM)
2623		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
2624		  : (*p == '+' || *p == '?'))
2625	      || ((syntax & RE_INTERVALS)
2626		  && ((syntax & RE_NO_BK_BRACES)
2627		      ? *p == '{'
2628		      : (p[0] == '\\' && p[1] == '{'))))
2629	    {
2630	      /* Start building a new exactn.  */
2631
2632	      laststart = b;
2633
2634	      BUF_PUSH_2 (exactn, 0);
2635	      pending_exact = b - 1;
2636	    }
2637
2638	  BUF_PUSH (c);
2639	  (*pending_exact)++;
2640	  break;
2641	} /* switch (c) */
2642    } /* while p != pend */
2643
2644
2645  /* Through the pattern now.  */
2646
2647  if (fixup_alt_jump)
2648    STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2649
2650  if (!COMPILE_STACK_EMPTY)
2651    FREE_STACK_RETURN (REG_EPAREN);
2652
2653  /* If we don't want backtracking, force success
2654     the first time we reach the end of the compiled pattern.  */
2655  if (syntax & RE_NO_POSIX_BACKTRACKING)
2656    BUF_PUSH (succeed);
2657
2658  free (compile_stack.stack);
2659
2660  /* We have succeeded; set the length of the buffer.  */
2661  bufp->used = b - bufp->buffer;
2662
2663#ifdef DEBUG
2664  if (debug)
2665    {
2666      DEBUG_PRINT1 ("\nCompiled pattern: \n");
2667      print_compiled_pattern (bufp);
2668    }
2669#endif /* DEBUG */
2670
2671#ifndef MATCH_MAY_ALLOCATE
2672  /* Initialize the failure stack to the largest possible stack.  This
2673     isn't necessary unless we're trying to avoid calling alloca in
2674     the search and match routines.  */
2675  {
2676    int num_regs = bufp->re_nsub + 1;
2677
2678    /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
2679       is strictly greater than re_max_failures, the largest possible stack
2680       is 2 * re_max_failures failure points.  */
2681    if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
2682      {
2683	fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
2684
2685#ifdef emacs
2686	if (! fail_stack.stack)
2687	  fail_stack.stack
2688	    = (fail_stack_elt_t *) xmalloc (fail_stack.size
2689					    * sizeof (fail_stack_elt_t));
2690	else
2691	  fail_stack.stack
2692	    = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
2693					     (fail_stack.size
2694					      * sizeof (fail_stack_elt_t)));
2695#else /* not emacs */
2696	if (! fail_stack.stack)
2697	  fail_stack.stack
2698	    = (fail_stack_elt_t *) malloc (fail_stack.size
2699					   * sizeof (fail_stack_elt_t));
2700	else
2701	  fail_stack.stack
2702	    = (fail_stack_elt_t *) realloc (fail_stack.stack,
2703					    (fail_stack.size
2704					     * sizeof (fail_stack_elt_t)));
2705#endif /* not emacs */
2706      }
2707
2708    regex_grow_registers (num_regs);
2709  }
2710#endif /* not MATCH_MAY_ALLOCATE */
2711
2712  return REG_NOERROR;
2713} /* regex_compile */
2714
2715/* Subroutines for `regex_compile'.  */
2716
2717/* Store OP at LOC followed by two-byte integer parameter ARG.	*/
2718
2719static void
2720store_op1 (op, loc, arg)
2721    re_opcode_t op;
2722    unsigned char *loc;
2723    int arg;
2724{
2725  *loc = (unsigned char) op;
2726  STORE_NUMBER (loc + 1, arg);
2727}
2728
2729
2730/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
2731
2732static void
2733store_op2 (op, loc, arg1, arg2)
2734    re_opcode_t op;
2735    unsigned char *loc;
2736    int arg1, arg2;
2737{
2738  *loc = (unsigned char) op;
2739  STORE_NUMBER (loc + 1, arg1);
2740  STORE_NUMBER (loc + 3, arg2);
2741}
2742
2743
2744/* Copy the bytes from LOC to END to open up three bytes of space at LOC
2745   for OP followed by two-byte integer parameter ARG.  */
2746
2747static void
2748insert_op1 (op, loc, arg, end)
2749    re_opcode_t op;
2750    unsigned char *loc;
2751    int arg;
2752    unsigned char *end;
2753{
2754  register unsigned char *pfrom = end;
2755  register unsigned char *pto = end + 3;
2756
2757  while (pfrom != loc)
2758    *--pto = *--pfrom;
2759
2760  store_op1 (op, loc, arg);
2761}
2762
2763
2764/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
2765
2766static void
2767insert_op2 (op, loc, arg1, arg2, end)
2768    re_opcode_t op;
2769    unsigned char *loc;
2770    int arg1, arg2;
2771    unsigned char *end;
2772{
2773  register unsigned char *pfrom = end;
2774  register unsigned char *pto = end + 5;
2775
2776  while (pfrom != loc)
2777    *--pto = *--pfrom;
2778
2779  store_op2 (op, loc, arg1, arg2);
2780}
2781
2782
2783/* P points to just after a ^ in PATTERN.  Return true if that ^ comes
2784   after an alternative or a begin-subexpression.  We assume there is at
2785   least one character before the ^.  */
2786
2787static boolean
2788at_begline_loc_p (pattern, p, syntax)
2789    const char *pattern, *p;
2790    reg_syntax_t syntax;
2791{
2792  const char *prev = p - 2;
2793  boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
2794
2795  return
2796       /* After a subexpression?  */
2797       (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
2798       /* After an alternative?	 */
2799    || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
2800}
2801
2802
2803/* The dual of at_begline_loc_p.  This one is for $.  We assume there is
2804   at least one character after the $, i.e., `P < PEND'.  */
2805
2806static boolean
2807at_endline_loc_p (p, pend, syntax)
2808    const char *p, *pend;
2809    int syntax;
2810{
2811  const char *next = p;
2812  boolean next_backslash = *next == '\\';
2813  const char *next_next = p + 1 < pend ? p + 1 : 0;
2814
2815  return
2816       /* Before a subexpression?  */
2817       (syntax & RE_NO_BK_PARENS ? *next == ')'
2818	: next_backslash && next_next && *next_next == ')')
2819       /* Before an alternative?  */
2820    || (syntax & RE_NO_BK_VBAR ? *next == '|'
2821	: next_backslash && next_next && *next_next == '|');
2822}
2823
2824
2825/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
2826   false if it's not.  */
2827
2828static boolean
2829group_in_compile_stack (compile_stack, regnum)
2830    compile_stack_type compile_stack;
2831    regnum_t regnum;
2832{
2833  int this_element;
2834
2835  for (this_element = compile_stack.avail - 1;
2836       this_element >= 0;
2837       this_element--)
2838    if (compile_stack.stack[this_element].regnum == regnum)
2839      return true;
2840
2841  return false;
2842}
2843
2844
2845/* Read the ending character of a range (in a bracket expression) from the
2846   uncompiled pattern *P_PTR (which ends at PEND).  We assume the
2847   starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
2848   Then we set the translation of all bits between the starting and
2849   ending characters (inclusive) in the compiled pattern B.
2850
2851   Return an error code.
2852
2853   We use these short variable names so we can use the same macros as
2854   `regex_compile' itself.  */
2855
2856static reg_errcode_t
2857compile_range (p_ptr, pend, translate, syntax, b)
2858    const char **p_ptr, *pend;
2859    RE_TRANSLATE_TYPE translate;
2860    reg_syntax_t syntax;
2861    unsigned char *b;
2862{
2863  unsigned this_char;
2864
2865  const char *p = *p_ptr;
2866  int range_start, range_end;
2867
2868  if (p == pend)
2869    return REG_ERANGE;
2870
2871  /* Even though the pattern is a signed `char *', we need to fetch
2872     with unsigned char *'s; if the high bit of the pattern character
2873     is set, the range endpoints will be negative if we fetch using a
2874     signed char *.
2875
2876     We also want to fetch the endpoints without translating them; the
2877     appropriate translation is done in the bit-setting loop below.  */
2878  /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *.  */
2879  range_start = ((const unsigned char *) p)[-2];
2880  range_end   = ((const unsigned char *) p)[0];
2881
2882  /* Have to increment the pointer into the pattern string, so the
2883     caller isn't still at the ending character.  */
2884  (*p_ptr)++;
2885
2886  /* If the start is after the end, the range is empty.	 */
2887  if (range_start > range_end)
2888    return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
2889
2890  /* Here we see why `this_char' has to be larger than an `unsigned
2891     char' -- the range is inclusive, so if `range_end' == 0xff
2892     (assuming 8-bit characters), we would otherwise go into an infinite
2893     loop, since all characters <= 0xff.  */
2894  for (this_char = range_start; this_char <= range_end; this_char++)
2895    {
2896      SET_LIST_BIT (TRANSLATE (this_char));
2897    }
2898
2899  return REG_NOERROR;
2900}
2901
2902/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
2903   BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
2904   characters can start a string that matches the pattern.  This fastmap
2905   is used by re_search to skip quickly over impossible starting points.
2906
2907   The caller must supply the address of a (1 << BYTEWIDTH)-byte data
2908   area as BUFP->fastmap.
2909
2910   We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
2911   the pattern buffer.
2912
2913   Returns 0 if we succeed, -2 if an internal error.   */
2914
2915int
2916re_compile_fastmap (bufp)
2917     struct re_pattern_buffer *bufp;
2918{
2919  int j, k;
2920#ifdef MATCH_MAY_ALLOCATE
2921  fail_stack_type fail_stack;
2922#endif
2923#ifndef REGEX_MALLOC
2924  char *destination;
2925#endif
2926  /* We don't push any register information onto the failure stack.  */
2927  unsigned num_regs = 0;
2928
2929  register char *fastmap = bufp->fastmap;
2930  unsigned char *pattern = bufp->buffer;
2931  unsigned long size = bufp->used;
2932  unsigned char *p = pattern;
2933  register unsigned char *pend = pattern + size;
2934
2935  /* This holds the pointer to the failure stack, when
2936     it is allocated relocatably.  */
2937#ifdef REL_ALLOC
2938  fail_stack_elt_t *failure_stack_ptr;
2939#endif
2940
2941  /* Assume that each path through the pattern can be null until
2942     proven otherwise.	We set this false at the bottom of switch
2943     statement, to which we get only if a particular path doesn't
2944     match the empty string.  */
2945  boolean path_can_be_null = true;
2946
2947  /* We aren't doing a `succeed_n' to begin with.  */
2948  boolean succeed_n_p = false;
2949
2950  assert (fastmap != NULL && p != NULL);
2951
2952  INIT_FAIL_STACK ();
2953  bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.	*/
2954  bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
2955  bufp->can_be_null = 0;
2956
2957  while (1)
2958    {
2959      if (p == pend || *p == succeed)
2960	{
2961	  /* We have reached the (effective) end of pattern.  */
2962	  if (!FAIL_STACK_EMPTY ())
2963	    {
2964	      bufp->can_be_null |= path_can_be_null;
2965
2966	      /* Reset for next path.  */
2967	      path_can_be_null = true;
2968
2969	      p = fail_stack.stack[--fail_stack.avail].pointer;
2970
2971	      continue;
2972	    }
2973	  else
2974	    break;
2975	}
2976
2977      /* We should never be about to go beyond the end of the pattern.	*/
2978      assert (p < pend);
2979
2980      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
2981	{
2982
2983	/* I guess the idea here is to simply not bother with a fastmap
2984	   if a backreference is used, since it's too hard to figure out
2985	   the fastmap for the corresponding group.  Setting
2986	   `can_be_null' stops `re_search_2' from using the fastmap, so
2987	   that is all we do.  */
2988	case duplicate:
2989	  bufp->can_be_null = 1;
2990	  goto done;
2991
2992
2993      /* Following are the cases which match a character.  These end
2994	 with `break'.	*/
2995
2996	case exactn:
2997	  fastmap[p[1]] = 1;
2998	  break;
2999
3000
3001	case charset:
3002	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3003	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
3004	      fastmap[j] = 1;
3005	  break;
3006
3007
3008	case charset_not:
3009	  /* Chars beyond end of map must be allowed.  */
3010	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
3011	    fastmap[j] = 1;
3012
3013	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3014	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
3015	      fastmap[j] = 1;
3016	  break;
3017
3018
3019	case wordchar:
3020	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3021	    if (SYNTAX (j) == Sword)
3022	      fastmap[j] = 1;
3023	  break;
3024
3025
3026	case notwordchar:
3027	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3028	    if (SYNTAX (j) != Sword)
3029	      fastmap[j] = 1;
3030	  break;
3031
3032
3033	case anychar:
3034	  {
3035	    int fastmap_newline = fastmap['\n'];
3036
3037	    /* `.' matches anything ...	 */
3038	    for (j = 0; j < (1 << BYTEWIDTH); j++)
3039	      fastmap[j] = 1;
3040
3041	    /* ... except perhaps newline.  */
3042	    if (!(bufp->syntax & RE_DOT_NEWLINE))
3043	      fastmap['\n'] = fastmap_newline;
3044
3045	    /* Return if we have already set `can_be_null'; if we have,
3046	       then the fastmap is irrelevant.	Something's wrong here.	 */
3047	    else if (bufp->can_be_null)
3048	      goto done;
3049
3050	    /* Otherwise, have to check alternative paths.  */
3051	    break;
3052	  }
3053
3054#ifdef emacs
3055	case syntaxspec:
3056	  k = *p++;
3057	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3058	    if (SYNTAX (j) == (enum syntaxcode) k)
3059	      fastmap[j] = 1;
3060	  break;
3061
3062
3063	case notsyntaxspec:
3064	  k = *p++;
3065	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3066	    if (SYNTAX (j) != (enum syntaxcode) k)
3067	      fastmap[j] = 1;
3068	  break;
3069
3070
3071      /* All cases after this match the empty string.  These end with
3072	 `continue'.  */
3073
3074
3075	case before_dot:
3076	case at_dot:
3077	case after_dot:
3078	  continue;
3079#endif /* emacs */
3080
3081
3082	case no_op:
3083	case begline:
3084	case endline:
3085	case begbuf:
3086	case endbuf:
3087	case wordbound:
3088	case notwordbound:
3089	case wordbeg:
3090	case wordend:
3091	case push_dummy_failure:
3092	  continue;
3093
3094
3095	case jump_n:
3096	case pop_failure_jump:
3097	case maybe_pop_jump:
3098	case jump:
3099	case jump_past_alt:
3100	case dummy_failure_jump:
3101	  EXTRACT_NUMBER_AND_INCR (j, p);
3102	  p += j;
3103	  if (j > 0)
3104	    continue;
3105
3106	  /* Jump backward implies we just went through the body of a
3107	     loop and matched nothing.	Opcode jumped to should be
3108	     `on_failure_jump' or `succeed_n'.	Just treat it like an
3109	     ordinary jump.  For a * loop, it has pushed its failure
3110	     point already; if so, discard that as redundant.  */
3111	  if ((re_opcode_t) *p != on_failure_jump
3112	      && (re_opcode_t) *p != succeed_n)
3113	    continue;
3114
3115	  p++;
3116	  EXTRACT_NUMBER_AND_INCR (j, p);
3117	  p += j;
3118
3119	  /* If what's on the stack is where we are now, pop it.  */
3120	  if (!FAIL_STACK_EMPTY ()
3121	      && fail_stack.stack[fail_stack.avail - 1].pointer == p)
3122	    fail_stack.avail--;
3123
3124	  continue;
3125
3126
3127	case on_failure_jump:
3128	case on_failure_keep_string_jump:
3129	handle_on_failure_jump:
3130	  EXTRACT_NUMBER_AND_INCR (j, p);
3131
3132	  /* For some patterns, e.g., `(a?)?', `p+j' here points to the
3133	     end of the pattern.  We don't want to push such a point,
3134	     since when we restore it above, entering the switch will
3135	     increment `p' past the end of the pattern.	 We don't need
3136	     to push such a point since we obviously won't find any more
3137	     fastmap entries beyond `pend'.  Such a pattern can match
3138	     the null string, though.  */
3139	  if (p + j < pend)
3140	    {
3141	      if (!PUSH_PATTERN_OP (p + j, fail_stack))
3142		{
3143		  RESET_FAIL_STACK ();
3144		  return -2;
3145		}
3146	    }
3147	  else
3148	    bufp->can_be_null = 1;
3149
3150	  if (succeed_n_p)
3151	    {
3152	      EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.	*/
3153	      succeed_n_p = false;
3154	    }
3155
3156	  continue;
3157
3158
3159	case succeed_n:
3160	  /* Get to the number of times to succeed.  */
3161	  p += 2;
3162
3163	  /* Increment p past the n for when k != 0.  */
3164	  EXTRACT_NUMBER_AND_INCR (k, p);
3165	  if (k == 0)
3166	    {
3167	      p -= 4;
3168	      succeed_n_p = true;  /* Spaghetti code alert.  */
3169	      goto handle_on_failure_jump;
3170	    }
3171	  continue;
3172
3173
3174	case set_number_at:
3175	  p += 4;
3176	  continue;
3177
3178
3179	case start_memory:
3180	case stop_memory:
3181	  p += 2;
3182	  continue;
3183
3184
3185	default:
3186	  abort (); /* We have listed all the cases.  */
3187	} /* switch *p++ */
3188
3189      /* Getting here means we have found the possible starting
3190	 characters for one path of the pattern -- and that the empty
3191	 string does not match.	 We need not follow this path further.
3192	 Instead, look at the next alternative (remembered on the
3193	 stack), or quit if no more.  The test at the top of the loop
3194	 does these things.  */
3195      path_can_be_null = false;
3196      p = pend;
3197    } /* while p */
3198
3199  /* Set `can_be_null' for the last path (also the first path, if the
3200     pattern is empty).	 */
3201  bufp->can_be_null |= path_can_be_null;
3202
3203 done:
3204  RESET_FAIL_STACK ();
3205  return 0;
3206} /* re_compile_fastmap */
3207
3208/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
3209   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
3210   this memory for recording register information.  STARTS and ENDS
3211   must be allocated using the malloc library routine, and must each
3212   be at least NUM_REGS * sizeof (regoff_t) bytes long.
3213
3214   If NUM_REGS == 0, then subsequent matches should allocate their own
3215   register data.
3216
3217   Unless this function is called, the first search or match using
3218   PATTERN_BUFFER will allocate its own register data, without
3219   freeing the old data.  */
3220
3221void
3222re_set_registers (bufp, regs, num_regs, starts, ends)
3223    struct re_pattern_buffer *bufp;
3224    struct re_registers *regs;
3225    unsigned num_regs;
3226    regoff_t *starts, *ends;
3227{
3228  if (num_regs)
3229    {
3230      bufp->regs_allocated = REGS_REALLOCATE;
3231      regs->num_regs = num_regs;
3232      regs->start = starts;
3233      regs->end = ends;
3234    }
3235  else
3236    {
3237      bufp->regs_allocated = REGS_UNALLOCATED;
3238      regs->num_regs = 0;
3239      regs->start = regs->end = (regoff_t *) 0;
3240    }
3241}
3242
3243/* Searching routines.	*/
3244
3245/* Like re_search_2, below, but only one string is specified, and
3246   doesn't let you say where to stop matching. */
3247
3248int
3249re_search (bufp, string, size, startpos, range, regs)
3250     struct re_pattern_buffer *bufp;
3251     const char *string;
3252     int size, startpos, range;
3253     struct re_registers *regs;
3254{
3255  return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
3256		      regs, size);
3257}
3258
3259
3260/* Using the compiled pattern in BUFP->buffer, first tries to match the
3261   virtual concatenation of STRING1 and STRING2, starting first at index
3262   STARTPOS, then at STARTPOS + 1, and so on.
3263
3264   STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
3265
3266   RANGE is how far to scan while trying to match.  RANGE = 0 means try
3267   only at STARTPOS; in general, the last start tried is STARTPOS +
3268   RANGE.
3269
3270   In REGS, return the indices of the virtual concatenation of STRING1
3271   and STRING2 that matched the entire BUFP->buffer and its contained
3272   subexpressions.
3273
3274   Do not consider matching one past the index STOP in the virtual
3275   concatenation of STRING1 and STRING2.
3276
3277   We return either the position in the strings at which the match was
3278   found, -1 if no match, or -2 if error (such as failure
3279   stack overflow).  */
3280
3281int
3282re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
3283     struct re_pattern_buffer *bufp;
3284     const char *string1, *string2;
3285     int size1, size2;
3286     int startpos;
3287     int range;
3288     struct re_registers *regs;
3289     int stop;
3290{
3291  int val;
3292  register char *fastmap = bufp->fastmap;
3293  register RE_TRANSLATE_TYPE translate = bufp->translate;
3294  int total_size = size1 + size2;
3295  int endpos = startpos + range;
3296  int anchored_start = 0;
3297
3298  /* Check for out-of-range STARTPOS.  */
3299  if (startpos < 0 || startpos > total_size)
3300    return -1;
3301
3302  /* Fix up RANGE if it might eventually take us outside
3303     the virtual concatenation of STRING1 and STRING2.
3304     Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
3305  if (endpos < 0)
3306    range = 0 - startpos;
3307  else if (endpos > total_size)
3308    range = total_size - startpos;
3309
3310  /* If the search isn't to be a backwards one, don't waste time in a
3311     search for a pattern that must be anchored.  */
3312  if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
3313    {
3314      if (startpos > 0)
3315	return -1;
3316      else
3317	range = 1;
3318    }
3319
3320#ifdef emacs
3321  /* In a forward search for something that starts with \=.
3322     don't keep searching past point.  */
3323  if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
3324    {
3325      range = PT - startpos;
3326      if (range <= 0)
3327	return -1;
3328    }
3329#endif /* emacs */
3330
3331  /* Update the fastmap now if not correct already.  */
3332  if (fastmap && !bufp->fastmap_accurate)
3333    if (re_compile_fastmap (bufp) == -2)
3334      return -2;
3335
3336  /* See whether the pattern is anchored.  */
3337  if (bufp->buffer[0] == begline)
3338    anchored_start = 1;
3339
3340  /* Loop through the string, looking for a place to start matching.  */
3341  for (;;)
3342    {
3343      /* If the pattern is anchored,
3344	 skip quickly past places we cannot match.
3345	 We don't bother to treat startpos == 0 specially
3346	 because that case doesn't repeat.  */
3347      if (anchored_start && startpos > 0)
3348	{
3349	  if (! (bufp->newline_anchor
3350		 && ((startpos <= size1 ? string1[startpos - 1]
3351		      : string2[startpos - size1 - 1])
3352		     == '\n')))
3353	    goto advance;
3354	}
3355
3356      /* If a fastmap is supplied, skip quickly over characters that
3357	 cannot be the start of a match.  If the pattern can match the
3358	 null string, however, we don't need to skip characters; we want
3359	 the first null string.	 */
3360      if (fastmap && startpos < total_size && !bufp->can_be_null)
3361	{
3362	  if (range > 0)	/* Searching forwards.	*/
3363	    {
3364	      register const char *d;
3365	      register int lim = 0;
3366	      int irange = range;
3367
3368	      if (startpos < size1 && startpos + range >= size1)
3369		lim = range - (size1 - startpos);
3370
3371	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
3372
3373	      /* Written out as an if-else to avoid testing `translate'
3374		 inside the loop.  */
3375	      if (translate)
3376		while (range > lim
3377		       && !fastmap[(unsigned char)
3378				   translate[(unsigned char) *d++]])
3379		  range--;
3380	      else
3381		while (range > lim && !fastmap[(unsigned char) *d++])
3382		  range--;
3383
3384	      startpos += irange - range;
3385	    }
3386	  else				/* Searching backwards.	 */
3387	    {
3388	      register char c = (size1 == 0 || startpos >= size1
3389				 ? string2[startpos - size1]
3390				 : string1[startpos]);
3391
3392	      if (!fastmap[(unsigned char) TRANSLATE (c)])
3393		goto advance;
3394	    }
3395	}
3396
3397      /* If can't match the null string, and that's all we have left, fail.  */
3398      if (range >= 0 && startpos == total_size && fastmap
3399	  && !bufp->can_be_null)
3400	return -1;
3401
3402      val = re_match_2_internal (bufp, string1, size1, string2, size2,
3403				 startpos, regs, stop);
3404#ifndef REGEX_MALLOC
3405#ifdef C_ALLOCA
3406      alloca (0);
3407#endif
3408#endif
3409
3410      if (val >= 0)
3411	return startpos;
3412
3413      if (val == -2)
3414	return -2;
3415
3416    advance:
3417      if (!range)
3418	break;
3419      else if (range > 0)
3420	{
3421	  range--;
3422	  startpos++;
3423	}
3424      else
3425	{
3426	  range++;
3427	  startpos--;
3428	}
3429    }
3430  return -1;
3431} /* re_search_2 */
3432
3433/* Declarations and macros for re_match_2.  */
3434
3435static int bcmp_translate ();
3436static boolean alt_match_null_string_p (),
3437	       common_op_match_null_string_p (),
3438	       group_match_null_string_p ();
3439
3440/* This converts PTR, a pointer into one of the search strings `string1'
3441   and `string2' into an offset from the beginning of that string.  */
3442#define POINTER_TO_OFFSET(ptr)			\
3443  (FIRST_STRING_P (ptr)				\
3444   ? ((regoff_t) ((ptr) - string1))		\
3445   : ((regoff_t) ((ptr) - string2 + size1)))
3446
3447/* Macros for dealing with the split strings in re_match_2.  */
3448
3449#define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
3450
3451/* Call before fetching a character with *d.  This switches over to
3452   string2 if necessary.  */
3453#define PREFETCH()							\
3454  while (d == dend)							\
3455    {									\
3456      /* End of string2 => fail.  */					\
3457      if (dend == end_match_2)						\
3458	goto fail;							\
3459      /* End of string1 => advance to string2.	*/			\
3460      d = string2;							\
3461      dend = end_match_2;						\
3462    }
3463
3464
3465/* Test if at very beginning or at very end of the virtual concatenation
3466   of `string1' and `string2'.	If only one string, it's `string2'.  */
3467#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
3468#define AT_STRINGS_END(d) ((d) == end2)
3469
3470
3471/* Test if D points to a character which is word-constituent.  We have
3472   two special cases to check for: if past the end of string1, look at
3473   the first character in string2; and if before the beginning of
3474   string2, look at the last character in string1.  */
3475#define WORDCHAR_P(d)							\
3476  (SYNTAX ((d) == end1 ? *string2					\
3477	   : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
3478   == Sword)
3479
3480/* Disabled due to a compiler bug -- see comment at case wordbound */
3481#if 0
3482/* Test if the character before D and the one at D differ with respect
3483   to being word-constituent.  */
3484#define AT_WORD_BOUNDARY(d)						\
3485  (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
3486   || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
3487#endif
3488
3489/* Free everything we malloc.  */
3490#ifdef MATCH_MAY_ALLOCATE
3491#define FREE_VAR(var) if (var) { REGEX_FREE (var); var = NULL; } else
3492#define FREE_VARIABLES()						\
3493  do {									\
3494    REGEX_FREE_STACK (fail_stack.stack);				\
3495    FREE_VAR (regstart);						\
3496    FREE_VAR (regend);							\
3497    FREE_VAR (old_regstart);						\
3498    FREE_VAR (old_regend);						\
3499    FREE_VAR (best_regstart);						\
3500    FREE_VAR (best_regend);						\
3501    FREE_VAR (reg_info);						\
3502    FREE_VAR (reg_dummy);						\
3503    FREE_VAR (reg_info_dummy);						\
3504  } while (0)
3505#else
3506#define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning.  */
3507#endif /* not MATCH_MAY_ALLOCATE */
3508
3509/* These values must meet several constraints.	They must not be valid
3510   register values; since we have a limit of 255 registers (because
3511   we use only one byte in the pattern for the register number), we can
3512   use numbers larger than 255.	 They must differ by 1, because of
3513   NUM_FAILURE_ITEMS above.  And the value for the lowest register must
3514   be larger than the value for the highest register, so we do not try
3515   to actually save any registers when none are active.	 */
3516#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
3517#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
3518
3519/* Matching routines.  */
3520
3521#ifndef emacs	/* Emacs never uses this.  */
3522/* re_match is like re_match_2 except it takes only a single string.  */
3523
3524int
3525re_match (bufp, string, size, pos, regs)
3526     struct re_pattern_buffer *bufp;
3527     const char *string;
3528     int size, pos;
3529     struct re_registers *regs;
3530{
3531  int result = re_match_2_internal (bufp, NULL, 0, string, size,
3532				    pos, regs, size);
3533  alloca (0);
3534  return result;
3535}
3536#endif /* not emacs */
3537
3538
3539/* re_match_2 matches the compiled pattern in BUFP against the
3540   the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
3541   and SIZE2, respectively).  We start matching at POS, and stop
3542   matching at STOP.
3543
3544   If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
3545   store offsets for the substring each group matched in REGS.	See the
3546   documentation for exactly how many groups we fill.
3547
3548   We return -1 if no match, -2 if an internal error (such as the
3549   failure stack overflowing).	Otherwise, we return the length of the
3550   matched substring.  */
3551
3552int
3553re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3554     struct re_pattern_buffer *bufp;
3555     const char *string1, *string2;
3556     int size1, size2;
3557     int pos;
3558     struct re_registers *regs;
3559     int stop;
3560{
3561  int result = re_match_2_internal (bufp, string1, size1, string2, size2,
3562				    pos, regs, stop);
3563  alloca (0);
3564  return result;
3565}
3566
3567/* This is a separate function so that we can force an alloca cleanup
3568   afterwards.	*/
3569static int
3570re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
3571     struct re_pattern_buffer *bufp;
3572     const char *string1, *string2;
3573     int size1, size2;
3574     int pos;
3575     struct re_registers *regs;
3576     int stop;
3577{
3578  /* General temporaries.  */
3579  int mcnt;
3580  unsigned char *p1;
3581
3582  /* Just past the end of the corresponding string.  */
3583  const char *end1, *end2;
3584
3585  /* Pointers into string1 and string2, just past the last characters in
3586     each to consider matching.	 */
3587  const char *end_match_1, *end_match_2;
3588
3589  /* Where we are in the data, and the end of the current string.  */
3590  const char *d, *dend;
3591
3592  /* Where we are in the pattern, and the end of the pattern.  */
3593  unsigned char *p = bufp->buffer;
3594  register unsigned char *pend = p + bufp->used;
3595
3596  /* Mark the opcode just after a start_memory, so we can test for an
3597     empty subpattern when we get to the stop_memory.  */
3598  unsigned char *just_past_start_mem = 0;
3599
3600  /* We use this to map every character in the string.	*/
3601  RE_TRANSLATE_TYPE translate = bufp->translate;
3602
3603  /* Failure point stack.  Each place that can handle a failure further
3604     down the line pushes a failure point on this stack.  It consists of
3605     restart, regend, and reg_info for all registers corresponding to
3606     the subexpressions we're currently inside, plus the number of such
3607     registers, and, finally, two char *'s.  The first char * is where
3608     to resume scanning the pattern; the second one is where to resume
3609     scanning the strings.  If the latter is zero, the failure point is
3610     a ``dummy''; if a failure happens and the failure point is a dummy,
3611     it gets discarded and the next next one is tried.	*/
3612#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.	 */
3613  fail_stack_type fail_stack;
3614#endif
3615#ifdef DEBUG
3616  static unsigned failure_id = 0;
3617  unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
3618#endif
3619
3620  /* This holds the pointer to the failure stack, when
3621     it is allocated relocatably.  */
3622#ifdef REL_ALLOC
3623  fail_stack_elt_t *failure_stack_ptr;
3624#endif
3625
3626  /* We fill all the registers internally, independent of what we
3627     return, for use in backreferences.	 The number here includes
3628     an element for register zero.  */
3629  unsigned num_regs = bufp->re_nsub + 1;
3630
3631  /* The currently active registers.  */
3632  unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3633  unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3634
3635  /* Information on the contents of registers. These are pointers into
3636     the input strings; they record just what was matched (on this
3637     attempt) by a subexpression part of the pattern, that is, the
3638     regnum-th regstart pointer points to where in the pattern we began
3639     matching and the regnum-th regend points to right after where we
3640     stopped matching the regnum-th subexpression.  (The zeroth register
3641     keeps track of what the whole pattern matches.)  */
3642#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3643  const char **regstart, **regend;
3644#endif
3645
3646  /* If a group that's operated upon by a repetition operator fails to
3647     match anything, then the register for its start will need to be
3648     restored because it will have been set to wherever in the string we
3649     are when we last see its open-group operator.  Similarly for a
3650     register's end.  */
3651#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3652  const char **old_regstart, **old_regend;
3653#endif
3654
3655  /* The is_active field of reg_info helps us keep track of which (possibly
3656     nested) subexpressions we are currently in. The matched_something
3657     field of reg_info[reg_num] helps us tell whether or not we have
3658     matched any of the pattern so far this time through the reg_num-th
3659     subexpression.  These two fields get reset each time through any
3660     loop their register is in.	 */
3661#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.	 */
3662  register_info_type *reg_info;
3663#endif
3664
3665  /* The following record the register info as found in the above
3666     variables when we find a match better than any we've seen before.
3667     This happens as we backtrack through the failure points, which in
3668     turn happens only if we have not yet matched the entire string. */
3669  unsigned best_regs_set = false;
3670#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3671  const char **best_regstart, **best_regend;
3672#endif
3673
3674  /* Logically, this is `best_regend[0]'.  But we don't want to have to
3675     allocate space for that if we're not allocating space for anything
3676     else (see below).	Also, we never need info about register 0 for
3677     any of the other register vectors, and it seems rather a kludge to
3678     treat `best_regend' differently than the rest.  So we keep track of
3679     the end of the best match so far in a separate variable.  We
3680     initialize this to NULL so that when we backtrack the first time
3681     and need to test it, it's not garbage.  */
3682  const char *match_end = NULL;
3683
3684  /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
3685  int set_regs_matched_done = 0;
3686
3687  /* Used when we pop values we don't care about.  */
3688#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3689  const char **reg_dummy;
3690  register_info_type *reg_info_dummy;
3691#endif
3692
3693#ifdef DEBUG
3694  /* Counts the total number of registers pushed.  */
3695  unsigned num_regs_pushed = 0;
3696#endif
3697
3698  DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
3699
3700  INIT_FAIL_STACK ();
3701
3702#ifdef MATCH_MAY_ALLOCATE
3703  /* Do not bother to initialize all the register variables if there are
3704     no groups in the pattern, as it takes a fair amount of time.  If
3705     there are groups, we include space for register 0 (the whole
3706     pattern), even though we never use it, since it simplifies the
3707     array indexing.  We should fix this.  */
3708  if (bufp->re_nsub)
3709    {
3710      regstart = REGEX_TALLOC (num_regs, const char *);
3711      regend = REGEX_TALLOC (num_regs, const char *);
3712      old_regstart = REGEX_TALLOC (num_regs, const char *);
3713      old_regend = REGEX_TALLOC (num_regs, const char *);
3714      best_regstart = REGEX_TALLOC (num_regs, const char *);
3715      best_regend = REGEX_TALLOC (num_regs, const char *);
3716      reg_info = REGEX_TALLOC (num_regs, register_info_type);
3717      reg_dummy = REGEX_TALLOC (num_regs, const char *);
3718      reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
3719
3720      if (!(regstart && regend && old_regstart && old_regend && reg_info
3721	    && best_regstart && best_regend && reg_dummy && reg_info_dummy))
3722	{
3723	  FREE_VARIABLES ();
3724	  return -2;
3725	}
3726    }
3727  else
3728    {
3729      /* We must initialize all our variables to NULL, so that
3730	 `FREE_VARIABLES' doesn't try to free them.  */
3731      regstart = regend = old_regstart = old_regend = best_regstart
3732	= best_regend = reg_dummy = NULL;
3733      reg_info = reg_info_dummy = (register_info_type *) NULL;
3734    }
3735#endif /* MATCH_MAY_ALLOCATE */
3736
3737  /* The starting position is bogus.  */
3738  if (pos < 0 || pos > size1 + size2)
3739    {
3740      FREE_VARIABLES ();
3741      return -1;
3742    }
3743
3744  /* Initialize subexpression text positions to -1 to mark ones that no
3745     start_memory/stop_memory has been seen for. Also initialize the
3746     register information struct.  */
3747  for (mcnt = 1; mcnt < num_regs; mcnt++)
3748    {
3749      regstart[mcnt] = regend[mcnt]
3750	= old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
3751
3752      REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
3753      IS_ACTIVE (reg_info[mcnt]) = 0;
3754      MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3755      EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3756    }
3757
3758  /* We move `string1' into `string2' if the latter's empty -- but not if
3759     `string1' is null.	 */
3760  if (size2 == 0 && string1 != NULL)
3761    {
3762      string2 = string1;
3763      size2 = size1;
3764      string1 = 0;
3765      size1 = 0;
3766    }
3767  end1 = string1 + size1;
3768  end2 = string2 + size2;
3769
3770  /* Compute where to stop matching, within the two strings.  */
3771  if (stop <= size1)
3772    {
3773      end_match_1 = string1 + stop;
3774      end_match_2 = string2;
3775    }
3776  else
3777    {
3778      end_match_1 = end1;
3779      end_match_2 = string2 + stop - size1;
3780    }
3781
3782  /* `p' scans through the pattern as `d' scans through the data.
3783     `dend' is the end of the input string that `d' points within.  `d'
3784     is advanced into the following input string whenever necessary, but
3785     this happens before fetching; therefore, at the beginning of the
3786     loop, `d' can be pointing at the end of a string, but it cannot
3787     equal `string2'.  */
3788  if (size1 > 0 && pos <= size1)
3789    {
3790      d = string1 + pos;
3791      dend = end_match_1;
3792    }
3793  else
3794    {
3795      d = string2 + pos - size1;
3796      dend = end_match_2;
3797    }
3798
3799  DEBUG_PRINT1 ("The compiled pattern is: ");
3800  DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
3801  DEBUG_PRINT1 ("The string to match is: `");
3802  DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
3803  DEBUG_PRINT1 ("'\n");
3804
3805  /* This loops over pattern commands.	It exits by returning from the
3806     function if the match is complete, or it drops through if the match
3807     fails at this starting point in the input data.  */
3808  for (;;)
3809    {
3810      DEBUG_PRINT2 ("\n0x%x: ", p);
3811
3812      if (p == pend)
3813	{ /* End of pattern means we might have succeeded.  */
3814	  DEBUG_PRINT1 ("end of pattern ... ");
3815
3816	  /* If we haven't matched the entire string, and we want the
3817	     longest match, try backtracking.  */
3818	  if (d != end_match_2)
3819	    {
3820	      /* 1 if this match ends in the same string (string1 or string2)
3821		 as the best previous match.  */
3822	      boolean same_str_p = (FIRST_STRING_P (match_end)
3823				    == MATCHING_IN_FIRST_STRING);
3824	      /* 1 if this match is the best seen so far.  */
3825	      boolean best_match_p;
3826
3827	      /* AIX compiler got confused when this was combined
3828		 with the previous declaration.	 */
3829	      if (same_str_p)
3830		best_match_p = d > match_end;
3831	      else
3832		best_match_p = !MATCHING_IN_FIRST_STRING;
3833
3834	      DEBUG_PRINT1 ("backtracking.\n");
3835
3836	      if (!FAIL_STACK_EMPTY ())
3837		{ /* More failure points to try.  */
3838
3839		  /* If exceeds best match so far, save it.  */
3840		  if (!best_regs_set || best_match_p)
3841		    {
3842		      best_regs_set = true;
3843		      match_end = d;
3844
3845		      DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
3846
3847		      for (mcnt = 1; mcnt < num_regs; mcnt++)
3848			{
3849			  best_regstart[mcnt] = regstart[mcnt];
3850			  best_regend[mcnt] = regend[mcnt];
3851			}
3852		    }
3853		  goto fail;
3854		}
3855
3856	      /* If no failure points, don't restore garbage.  And if
3857		 last match is real best match, don't restore second
3858		 best one. */
3859	      else if (best_regs_set && !best_match_p)
3860		{
3861		restore_best_regs:
3862		  /* Restore best match.  It may happen that `dend ==
3863		     end_match_1' while the restored d is in string2.
3864		     For example, the pattern `x.*y.*z' against the
3865		     strings `x-' and `y-z-', if the two strings are
3866		     not consecutive in memory.	 */
3867		  DEBUG_PRINT1 ("Restoring best registers.\n");
3868
3869		  d = match_end;
3870		  dend = ((d >= string1 && d <= end1)
3871			   ? end_match_1 : end_match_2);
3872
3873		  for (mcnt = 1; mcnt < num_regs; mcnt++)
3874		    {
3875		      regstart[mcnt] = best_regstart[mcnt];
3876		      regend[mcnt] = best_regend[mcnt];
3877		    }
3878		}
3879	    } /* d != end_match_2 */
3880
3881	succeed_label:
3882	  DEBUG_PRINT1 ("Accepting match.\n");
3883
3884	  /* If caller wants register contents data back, do it.  */
3885	  if (regs && !bufp->no_sub)
3886	    {
3887	      /* Have the register data arrays been allocated?	*/
3888	      if (bufp->regs_allocated == REGS_UNALLOCATED)
3889		{ /* No.  So allocate them with malloc.	 We need one
3890		     extra element beyond `num_regs' for the `-1' marker
3891		     GNU code uses.  */
3892		  regs->num_regs = MAX (RE_NREGS, num_regs + 1);
3893		  regs->start = TALLOC (regs->num_regs, regoff_t);
3894		  regs->end = TALLOC (regs->num_regs, regoff_t);
3895		  if (regs->start == NULL || regs->end == NULL)
3896		    {
3897		      FREE_VARIABLES ();
3898		      return -2;
3899		    }
3900		  bufp->regs_allocated = REGS_REALLOCATE;
3901		}
3902	      else if (bufp->regs_allocated == REGS_REALLOCATE)
3903		{ /* Yes.  If we need more elements than were already
3904		     allocated, reallocate them.  If we need fewer, just
3905		     leave it alone.  */
3906		  if (regs->num_regs < num_regs + 1)
3907		    {
3908		      regs->num_regs = num_regs + 1;
3909		      RETALLOC (regs->start, regs->num_regs, regoff_t);
3910		      RETALLOC (regs->end, regs->num_regs, regoff_t);
3911		      if (regs->start == NULL || regs->end == NULL)
3912			{
3913			  FREE_VARIABLES ();
3914			  return -2;
3915			}
3916		    }
3917		}
3918	      else
3919		{
3920		  /* These braces fend off a "empty body in an else-statement"
3921		     warning under GCC when assert expands to nothing.	*/
3922		  assert (bufp->regs_allocated == REGS_FIXED);
3923		}
3924
3925	      /* Convert the pointer data in `regstart' and `regend' to
3926		 indices.  Register zero has to be set differently,
3927		 since we haven't kept track of any info for it.  */
3928	      if (regs->num_regs > 0)
3929		{
3930		  regs->start[0] = pos;
3931		  regs->end[0] = (MATCHING_IN_FIRST_STRING
3932				  ? ((regoff_t) (d - string1))
3933				  : ((regoff_t) (d - string2 + size1)));
3934		}
3935
3936	      /* Go through the first `min (num_regs, regs->num_regs)'
3937		 registers, since that is all we initialized.  */
3938	      for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
3939		{
3940		  if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
3941		    regs->start[mcnt] = regs->end[mcnt] = -1;
3942		  else
3943		    {
3944		      regs->start[mcnt]
3945			= (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
3946		      regs->end[mcnt]
3947			= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
3948		    }
3949		}
3950
3951	      /* If the regs structure we return has more elements than
3952		 were in the pattern, set the extra elements to -1.  If
3953		 we (re)allocated the registers, this is the case,
3954		 because we always allocate enough to have at least one
3955		 -1 at the end.	 */
3956	      for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
3957		regs->start[mcnt] = regs->end[mcnt] = -1;
3958	    } /* regs && !bufp->no_sub */
3959
3960	  DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
3961			nfailure_points_pushed, nfailure_points_popped,
3962			nfailure_points_pushed - nfailure_points_popped);
3963	  DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
3964
3965	  mcnt = d - pos - (MATCHING_IN_FIRST_STRING
3966			    ? string1
3967			    : string2 - size1);
3968
3969	  DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
3970
3971	  FREE_VARIABLES ();
3972	  return mcnt;
3973	}
3974
3975      /* Otherwise match next pattern command.	*/
3976      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
3977	{
3978	/* Ignore these.  Used to ignore the n of succeed_n's which
3979	   currently have n == 0.  */
3980	case no_op:
3981	  DEBUG_PRINT1 ("EXECUTING no_op.\n");
3982	  break;
3983
3984	case succeed:
3985	  DEBUG_PRINT1 ("EXECUTING succeed.\n");
3986	  goto succeed_label;
3987
3988	/* Match the next n pattern characters exactly.	 The following
3989	   byte in the pattern defines n, and the n bytes after that
3990	   are the characters to match.	 */
3991	case exactn:
3992	  mcnt = *p++;
3993	  DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
3994
3995	  /* This is written out as an if-else so we don't waste time
3996	     testing `translate' inside the loop.  */
3997	  if (translate)
3998	    {
3999	      do
4000		{
4001		  PREFETCH ();
4002		  if ((unsigned char) translate[(unsigned char) *d++]
4003		      != (unsigned char) *p++)
4004		    goto fail;
4005		}
4006	      while (--mcnt);
4007	    }
4008	  else
4009	    {
4010	      do
4011		{
4012		  PREFETCH ();
4013		  if (*d++ != (char) *p++) goto fail;
4014		}
4015	      while (--mcnt);
4016	    }
4017	  SET_REGS_MATCHED ();
4018	  break;
4019
4020
4021	/* Match any character except possibly a newline or a null.  */
4022	case anychar:
4023	  DEBUG_PRINT1 ("EXECUTING anychar.\n");
4024
4025	  PREFETCH ();
4026
4027	  if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
4028	      || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
4029	    goto fail;
4030
4031	  SET_REGS_MATCHED ();
4032	  DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
4033	  d++;
4034	  break;
4035
4036
4037	case charset:
4038	case charset_not:
4039	  {
4040	    register unsigned char c;
4041	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
4042
4043	    DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
4044
4045	    PREFETCH ();
4046	    c = TRANSLATE (*d); /* The character to match.  */
4047
4048	    /* Cast to `unsigned' instead of `unsigned char' in case the
4049	       bit list is a full 32 bytes long.  */
4050	    if (c < (unsigned) (*p * BYTEWIDTH)
4051		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4052	      not = !not;
4053
4054	    p += 1 + *p;
4055
4056	    if (!not) goto fail;
4057
4058	    SET_REGS_MATCHED ();
4059	    d++;
4060	    break;
4061	  }
4062
4063
4064	/* The beginning of a group is represented by start_memory.
4065	   The arguments are the register number in the next byte, and the
4066	   number of groups inner to this one in the next.  The text
4067	   matched within the group is recorded (in the internal
4068	   registers data structure) under the register number.	 */
4069	case start_memory:
4070	  DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
4071
4072	  /* Find out if this group can match the empty string.	 */
4073	  p1 = p;		/* To send to group_match_null_string_p.  */
4074
4075	  if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
4076	    REG_MATCH_NULL_STRING_P (reg_info[*p])
4077	      = group_match_null_string_p (&p1, pend, reg_info);
4078
4079	  /* Save the position in the string where we were the last time
4080	     we were at this open-group operator in case the group is
4081	     operated upon by a repetition operator, e.g., with `(a*)*b'
4082	     against `ab'; then we want to ignore where we are now in
4083	     the string in case this attempt to match fails.  */
4084	  old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4085			     ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
4086			     : regstart[*p];
4087	  DEBUG_PRINT2 ("  old_regstart: %d\n",
4088			 POINTER_TO_OFFSET (old_regstart[*p]));
4089
4090	  regstart[*p] = d;
4091	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
4092
4093	  IS_ACTIVE (reg_info[*p]) = 1;
4094	  MATCHED_SOMETHING (reg_info[*p]) = 0;
4095
4096	  /* Clear this whenever we change the register activity status.  */
4097	  set_regs_matched_done = 0;
4098
4099	  /* This is the new highest active register.  */
4100	  highest_active_reg = *p;
4101
4102	  /* If nothing was active before, this is the new lowest active
4103	     register.	*/
4104	  if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4105	    lowest_active_reg = *p;
4106
4107	  /* Move past the register number and inner group count.  */
4108	  p += 2;
4109	  just_past_start_mem = p;
4110
4111	  break;
4112
4113
4114	/* The stop_memory opcode represents the end of a group.  Its
4115	   arguments are the same as start_memory's: the register
4116	   number, and the number of inner groups.  */
4117	case stop_memory:
4118	  DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
4119
4120	  /* We need to save the string position the last time we were at
4121	     this close-group operator in case the group is operated
4122	     upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
4123	     against `aba'; then we want to ignore where we are now in
4124	     the string in case this attempt to match fails.  */
4125	  old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4126			   ? REG_UNSET (regend[*p]) ? d : regend[*p]
4127			   : regend[*p];
4128	  DEBUG_PRINT2 ("      old_regend: %d\n",
4129			 POINTER_TO_OFFSET (old_regend[*p]));
4130
4131	  regend[*p] = d;
4132	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
4133
4134	  /* This register isn't active anymore.  */
4135	  IS_ACTIVE (reg_info[*p]) = 0;
4136
4137	  /* Clear this whenever we change the register activity status.  */
4138	  set_regs_matched_done = 0;
4139
4140	  /* If this was the only register active, nothing is active
4141	     anymore.  */
4142	  if (lowest_active_reg == highest_active_reg)
4143	    {
4144	      lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4145	      highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4146	    }
4147	  else
4148	    { /* We must scan for the new highest active register, since
4149		 it isn't necessarily one less than now: consider
4150		 (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
4151		 new highest active register is 1.  */
4152	      unsigned char r = *p - 1;
4153	      while (r > 0 && !IS_ACTIVE (reg_info[r]))
4154		r--;
4155
4156	      /* If we end up at register zero, that means that we saved
4157		 the registers as the result of an `on_failure_jump', not
4158		 a `start_memory', and we jumped to past the innermost
4159		 `stop_memory'.	 For example, in ((.)*) we save
4160		 registers 1 and 2 as a result of the *, but when we pop
4161		 back to the second ), we are at the stop_memory 1.
4162		 Thus, nothing is active.  */
4163	      if (r == 0)
4164		{
4165		  lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4166		  highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4167		}
4168	      else
4169		highest_active_reg = r;
4170	    }
4171
4172	  /* If just failed to match something this time around with a
4173	     group that's operated on by a repetition operator, try to
4174	     force exit from the ``loop'', and restore the register
4175	     information for this group that we had before trying this
4176	     last match.  */
4177	  if ((!MATCHED_SOMETHING (reg_info[*p])
4178	       || just_past_start_mem == p - 1)
4179	      && (p + 2) < pend)
4180	    {
4181	      boolean is_a_jump_n = false;
4182
4183	      p1 = p + 2;
4184	      mcnt = 0;
4185	      switch ((re_opcode_t) *p1++)
4186		{
4187		  case jump_n:
4188		    is_a_jump_n = true;
4189		  case pop_failure_jump:
4190		  case maybe_pop_jump:
4191		  case jump:
4192		  case dummy_failure_jump:
4193		    EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4194		    if (is_a_jump_n)
4195		      p1 += 2;
4196		    break;
4197
4198		  default:
4199		    /* do nothing */ ;
4200		}
4201	      p1 += mcnt;
4202
4203	      /* If the next operation is a jump backwards in the pattern
4204		 to an on_failure_jump right before the start_memory
4205		 corresponding to this stop_memory, exit from the loop
4206		 by forcing a failure after pushing on the stack the
4207		 on_failure_jump's jump in the pattern, and d.	*/
4208	      if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
4209		  && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
4210		{
4211		  /* If this group ever matched anything, then restore
4212		     what its registers were before trying this last
4213		     failed match, e.g., with `(a*)*b' against `ab' for
4214		     regstart[1], and, e.g., with `((a*)*(b*)*)*'
4215		     against `aba' for regend[3].
4216
4217		     Also restore the registers for inner groups for,
4218		     e.g., `((a*)(b*))*' against `aba' (register 3 would
4219		     otherwise get trashed).  */
4220
4221		  if (EVER_MATCHED_SOMETHING (reg_info[*p]))
4222		    {
4223		      unsigned r;
4224
4225		      EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
4226
4227		      /* Restore this and inner groups' (if any) registers.  */
4228		      for (r = *p; r < *p + *(p + 1); r++)
4229			{
4230			  regstart[r] = old_regstart[r];
4231
4232			  /* xx why this test?	*/
4233			  if (old_regend[r] >= regstart[r])
4234			    regend[r] = old_regend[r];
4235			}
4236		    }
4237		  p1++;
4238		  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4239		  PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
4240
4241		  goto fail;
4242		}
4243	    }
4244
4245	  /* Move past the register number and the inner group count.  */
4246	  p += 2;
4247	  break;
4248
4249
4250	/* \<digit> has been turned into a `duplicate' command which is
4251	   followed by the numeric value of <digit> as the register number.  */
4252	case duplicate:
4253	  {
4254	    register const char *d2, *dend2;
4255	    int regno = *p++;	/* Get which register to match against.	 */
4256	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
4257
4258	    /* Can't back reference a group which we've never matched.	*/
4259	    if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
4260	      goto fail;
4261
4262	    /* Where in input to try to start matching.	 */
4263	    d2 = regstart[regno];
4264
4265	    /* Where to stop matching; if both the place to start and
4266	       the place to stop matching are in the same string, then
4267	       set to the place to stop, otherwise, for now have to use
4268	       the end of the first string.  */
4269
4270	    dend2 = ((FIRST_STRING_P (regstart[regno])
4271		      == FIRST_STRING_P (regend[regno]))
4272		     ? regend[regno] : end_match_1);
4273	    for (;;)
4274	      {
4275		/* If necessary, advance to next segment in register
4276		   contents.  */
4277		while (d2 == dend2)
4278		  {
4279		    if (dend2 == end_match_2) break;
4280		    if (dend2 == regend[regno]) break;
4281
4282		    /* End of string1 => advance to string2. */
4283		    d2 = string2;
4284		    dend2 = regend[regno];
4285		  }
4286		/* At end of register contents => success */
4287		if (d2 == dend2) break;
4288
4289		/* If necessary, advance to next segment in data.  */
4290		PREFETCH ();
4291
4292		/* How many characters left in this segment to match.  */
4293		mcnt = dend - d;
4294
4295		/* Want how many consecutive characters we can match in
4296		   one shot, so, if necessary, adjust the count.  */
4297		if (mcnt > dend2 - d2)
4298		  mcnt = dend2 - d2;
4299
4300		/* Compare that many; failure if mismatch, else move
4301		   past them.  */
4302		if (translate
4303		    ? bcmp_translate (d, d2, mcnt, translate)
4304		    : bcmp (d, d2, mcnt))
4305		  goto fail;
4306		d += mcnt, d2 += mcnt;
4307
4308		/* Do this because we've match some characters.	 */
4309		SET_REGS_MATCHED ();
4310	      }
4311	  }
4312	  break;
4313
4314
4315	/* begline matches the empty string at the beginning of the string
4316	   (unless `not_bol' is set in `bufp'), and, if
4317	   `newline_anchor' is set, after newlines.  */
4318	case begline:
4319	  DEBUG_PRINT1 ("EXECUTING begline.\n");
4320
4321	  if (AT_STRINGS_BEG (d))
4322	    {
4323	      if (!bufp->not_bol) break;
4324	    }
4325	  else if (d[-1] == '\n' && bufp->newline_anchor)
4326	    {
4327	      break;
4328	    }
4329	  /* In all other cases, we fail.  */
4330	  goto fail;
4331
4332
4333	/* endline is the dual of begline.  */
4334	case endline:
4335	  DEBUG_PRINT1 ("EXECUTING endline.\n");
4336
4337	  if (AT_STRINGS_END (d))
4338	    {
4339	      if (!bufp->not_eol) break;
4340	    }
4341
4342	  /* We have to ``prefetch'' the next character.  */
4343	  else if ((d == end1 ? *string2 : *d) == '\n'
4344		   && bufp->newline_anchor)
4345	    {
4346	      break;
4347	    }
4348	  goto fail;
4349
4350
4351	/* Match at the very beginning of the data.  */
4352	case begbuf:
4353	  DEBUG_PRINT1 ("EXECUTING begbuf.\n");
4354	  if (AT_STRINGS_BEG (d))
4355	    break;
4356	  goto fail;
4357
4358
4359	/* Match at the very end of the data.  */
4360	case endbuf:
4361	  DEBUG_PRINT1 ("EXECUTING endbuf.\n");
4362	  if (AT_STRINGS_END (d))
4363	    break;
4364	  goto fail;
4365
4366
4367	/* on_failure_keep_string_jump is used to optimize `.*\n'.  It
4368	   pushes NULL as the value for the string on the stack.  Then
4369	   `pop_failure_point' will keep the current value for the
4370	   string, instead of restoring it.  To see why, consider
4371	   matching `foo\nbar' against `.*\n'.	The .* matches the foo;
4372	   then the . fails against the \n.  But the next thing we want
4373	   to do is match the \n against the \n; if we restored the
4374	   string value, we would be back at the foo.
4375
4376	   Because this is used only in specific cases, we don't need to
4377	   check all the things that `on_failure_jump' does, to make
4378	   sure the right things get saved on the stack.  Hence we don't
4379	   share its code.  The only reason to push anything on the
4380	   stack at all is that otherwise we would have to change
4381	   `anychar's code to do something besides goto fail in this
4382	   case; that seems worse than this.  */
4383	case on_failure_keep_string_jump:
4384	  DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
4385
4386	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
4387	  DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
4388
4389	  PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
4390	  break;
4391
4392
4393	/* Uses of on_failure_jump:
4394
4395	   Each alternative starts with an on_failure_jump that points
4396	   to the beginning of the next alternative.  Each alternative
4397	   except the last ends with a jump that in effect jumps past
4398	   the rest of the alternatives.  (They really jump to the
4399	   ending jump of the following alternative, because tensioning
4400	   these jumps is a hassle.)
4401
4402	   Repeats start with an on_failure_jump that points past both
4403	   the repetition text and either the following jump or
4404	   pop_failure_jump back to this on_failure_jump.  */
4405	case on_failure_jump:
4406	on_failure:
4407	  DEBUG_PRINT1 ("EXECUTING on_failure_jump");
4408
4409	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
4410	  DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
4411
4412	  /* If this on_failure_jump comes right before a group (i.e.,
4413	     the original * applied to a group), save the information
4414	     for that group and all inner ones, so that if we fail back
4415	     to this point, the group's information will be correct.
4416	     For example, in \(a*\)*\1, we need the preceding group,
4417	     and in \(zz\(a*\)b*\)\2, we need the inner group.	*/
4418
4419	  /* We can't use `p' to check ahead because we push
4420	     a failure point to `p + mcnt' after we do this.  */
4421	  p1 = p;
4422
4423	  /* We need to skip no_op's before we look for the
4424	     start_memory in case this on_failure_jump is happening as
4425	     the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
4426	     against aba.  */
4427	  while (p1 < pend && (re_opcode_t) *p1 == no_op)
4428	    p1++;
4429
4430	  if (p1 < pend && (re_opcode_t) *p1 == start_memory)
4431	    {
4432	      /* We have a new highest active register now.  This will
4433		 get reset at the start_memory we are about to get to,
4434		 but we will have saved all the registers relevant to
4435		 this repetition op, as described above.  */
4436	      highest_active_reg = *(p1 + 1) + *(p1 + 2);
4437	      if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4438		lowest_active_reg = *(p1 + 1);
4439	    }
4440
4441	  DEBUG_PRINT1 (":\n");
4442	  PUSH_FAILURE_POINT (p + mcnt, d, -2);
4443	  break;
4444
4445
4446	/* A smart repeat ends with `maybe_pop_jump'.
4447	   We change it to either `pop_failure_jump' or `jump'.	 */
4448	case maybe_pop_jump:
4449	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
4450	  DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
4451	  {
4452	    register unsigned char *p2 = p;
4453
4454	    /* Compare the beginning of the repeat with what in the
4455	       pattern follows its end. If we can establish that there
4456	       is nothing that they would both match, i.e., that we
4457	       would have to backtrack because of (as in, e.g., `a*a')
4458	       then we can change to pop_failure_jump, because we'll
4459	       never have to backtrack.
4460
4461	       This is not true in the case of alternatives: in
4462	       `(a|ab)*' we do need to backtrack to the `ab' alternative
4463	       (e.g., if the string was `ab').	But instead of trying to
4464	       detect that here, the alternative has put on a dummy
4465	       failure point which is what we will end up popping.  */
4466
4467	    /* Skip over open/close-group commands.
4468	       If what follows this loop is a ...+ construct,
4469	       look at what begins its body, since we will have to
4470	       match at least one of that.  */
4471	    while (1)
4472	      {
4473		if (p2 + 2 < pend
4474		    && ((re_opcode_t) *p2 == stop_memory
4475			|| (re_opcode_t) *p2 == start_memory))
4476		  p2 += 3;
4477		else if (p2 + 6 < pend
4478			 && (re_opcode_t) *p2 == dummy_failure_jump)
4479		  p2 += 6;
4480		else
4481		  break;
4482	      }
4483
4484	    p1 = p + mcnt;
4485	    /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
4486	       to the `maybe_finalize_jump' of this case.  Examine what
4487	       follows.	 */
4488
4489	    /* If we're at the end of the pattern, we can change.  */
4490	    if (p2 == pend)
4491	      {
4492		/* Consider what happens when matching ":\(.*\)"
4493		   against ":/".  I don't really understand this code
4494		   yet.	 */
4495		p[-3] = (unsigned char) pop_failure_jump;
4496		DEBUG_PRINT1
4497		  ("  End of pattern: change to `pop_failure_jump'.\n");
4498	      }
4499
4500	    else if ((re_opcode_t) *p2 == exactn
4501		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
4502	      {
4503		register unsigned char c
4504		  = *p2 == (unsigned char) endline ? '\n' : p2[2];
4505
4506		if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
4507		  {
4508		    p[-3] = (unsigned char) pop_failure_jump;
4509		    DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4510				  c, p1[5]);
4511		  }
4512
4513		else if ((re_opcode_t) p1[3] == charset
4514			 || (re_opcode_t) p1[3] == charset_not)
4515		  {
4516		    int not = (re_opcode_t) p1[3] == charset_not;
4517
4518		    if (c < (unsigned char) (p1[4] * BYTEWIDTH)
4519			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4520		      not = !not;
4521
4522		    /* `not' is equal to 1 if c would match, which means
4523			that we can't change to pop_failure_jump.  */
4524		    if (!not)
4525		      {
4526			p[-3] = (unsigned char) pop_failure_jump;
4527			DEBUG_PRINT1 ("	 No match => pop_failure_jump.\n");
4528		      }
4529		  }
4530	      }
4531	    else if ((re_opcode_t) *p2 == charset)
4532	      {
4533#ifdef DEBUG
4534		register unsigned char c
4535		  = *p2 == (unsigned char) endline ? '\n' : p2[2];
4536#endif
4537
4538		if ((re_opcode_t) p1[3] == exactn
4539		    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
4540			  && (p2[2 + p1[5] / BYTEWIDTH]
4541			      & (1 << (p1[5] % BYTEWIDTH)))))
4542		  {
4543		    p[-3] = (unsigned char) pop_failure_jump;
4544		    DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4545				  c, p1[5]);
4546		  }
4547
4548		else if ((re_opcode_t) p1[3] == charset_not)
4549		  {
4550		    int idx;
4551		    /* We win if the charset_not inside the loop
4552		       lists every character listed in the charset after.  */
4553		    for (idx = 0; idx < (int) p2[1]; idx++)
4554		      if (! (p2[2 + idx] == 0
4555			     || (idx < (int) p1[4]
4556				 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
4557			break;
4558
4559		    if (idx == p2[1])
4560		      {
4561			p[-3] = (unsigned char) pop_failure_jump;
4562			DEBUG_PRINT1 ("	 No match => pop_failure_jump.\n");
4563		      }
4564		  }
4565		else if ((re_opcode_t) p1[3] == charset)
4566		  {
4567		    int idx;
4568		    /* We win if the charset inside the loop
4569		       has no overlap with the one after the loop.  */
4570		    for (idx = 0;
4571			 idx < (int) p2[1] && idx < (int) p1[4];
4572			 idx++)
4573		      if ((p2[2 + idx] & p1[5 + idx]) != 0)
4574			break;
4575
4576		    if (idx == p2[1] || idx == p1[4])
4577		      {
4578			p[-3] = (unsigned char) pop_failure_jump;
4579			DEBUG_PRINT1 ("	 No match => pop_failure_jump.\n");
4580		      }
4581		  }
4582	      }
4583	  }
4584	  p -= 2;		/* Point at relative address again.  */
4585	  if ((re_opcode_t) p[-1] != pop_failure_jump)
4586	    {
4587	      p[-1] = (unsigned char) jump;
4588	      DEBUG_PRINT1 ("  Match => jump.\n");
4589	      goto unconditional_jump;
4590	    }
4591	/* Note fall through.  */
4592
4593
4594	/* The end of a simple repeat has a pop_failure_jump back to
4595	   its matching on_failure_jump, where the latter will push a
4596	   failure point.  The pop_failure_jump takes off failure
4597	   points put on by this pop_failure_jump's matching
4598	   on_failure_jump; we got through the pattern to here from the
4599	   matching on_failure_jump, so didn't fail.  */
4600	case pop_failure_jump:
4601	  {
4602	    /* We need to pass separate storage for the lowest and
4603	       highest registers, even though we don't care about the
4604	       actual values.  Otherwise, we will restore only one
4605	       register from the stack, since lowest will == highest in
4606	       `pop_failure_point'.  */
4607	    unsigned dummy_low_reg, dummy_high_reg;
4608	    unsigned char *pdummy;
4609	    const char *sdummy;
4610
4611	    DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
4612	    POP_FAILURE_POINT (sdummy, pdummy,
4613			       dummy_low_reg, dummy_high_reg,
4614			       reg_dummy, reg_dummy, reg_info_dummy);
4615	  }
4616	  /* Note fall through.	 */
4617
4618
4619	/* Unconditionally jump (without popping any failure points).  */
4620	case jump:
4621	unconditional_jump:
4622	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
4623	  DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
4624	  p += mcnt;				/* Do the jump.	 */
4625	  DEBUG_PRINT2 ("(to 0x%x).\n", p);
4626	  break;
4627
4628
4629	/* We need this opcode so we can detect where alternatives end
4630	   in `group_match_null_string_p' et al.  */
4631	case jump_past_alt:
4632	  DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
4633	  goto unconditional_jump;
4634
4635
4636	/* Normally, the on_failure_jump pushes a failure point, which
4637	   then gets popped at pop_failure_jump.  We will end up at
4638	   pop_failure_jump, also, and with a pattern of, say, `a+', we
4639	   are skipping over the on_failure_jump, so we have to push
4640	   something meaningless for pop_failure_jump to pop.  */
4641	case dummy_failure_jump:
4642	  DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
4643	  /* It doesn't matter what we push for the string here.  What
4644	     the code at `fail' tests is the value for the pattern.  */
4645	  PUSH_FAILURE_POINT (0, 0, -2);
4646	  goto unconditional_jump;
4647
4648
4649	/* At the end of an alternative, we need to push a dummy failure
4650	   point in case we are followed by a `pop_failure_jump', because
4651	   we don't want the failure point for the alternative to be
4652	   popped.  For example, matching `(a|ab)*' against `aab'
4653	   requires that we match the `ab' alternative.	 */
4654	case push_dummy_failure:
4655	  DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
4656	  /* See comments just above at `dummy_failure_jump' about the
4657	     two zeroes.  */
4658	  PUSH_FAILURE_POINT (0, 0, -2);
4659	  break;
4660
4661	/* Have to succeed matching what follows at least n times.
4662	   After that, handle like `on_failure_jump'.  */
4663	case succeed_n:
4664	  EXTRACT_NUMBER (mcnt, p + 2);
4665	  DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
4666
4667	  assert (mcnt >= 0);
4668	  /* Originally, this is how many times we HAVE to succeed.  */
4669	  if (mcnt > 0)
4670	    {
4671	       mcnt--;
4672	       p += 2;
4673	       STORE_NUMBER_AND_INCR (p, mcnt);
4674	       DEBUG_PRINT3 ("	Setting 0x%x to %d.\n", p, mcnt);
4675	    }
4676	  else if (mcnt == 0)
4677	    {
4678	      DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
4679	      p[2] = (unsigned char) no_op;
4680	      p[3] = (unsigned char) no_op;
4681	      goto on_failure;
4682	    }
4683	  break;
4684
4685	case jump_n:
4686	  EXTRACT_NUMBER (mcnt, p + 2);
4687	  DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
4688
4689	  /* Originally, this is how many times we CAN jump.  */
4690	  if (mcnt)
4691	    {
4692	       mcnt--;
4693	       STORE_NUMBER (p + 2, mcnt);
4694	       goto unconditional_jump;
4695	    }
4696	  /* If don't have to jump any more, skip over the rest of command.  */
4697	  else
4698	    p += 4;
4699	  break;
4700
4701	case set_number_at:
4702	  {
4703	    DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
4704
4705	    EXTRACT_NUMBER_AND_INCR (mcnt, p);
4706	    p1 = p + mcnt;
4707	    EXTRACT_NUMBER_AND_INCR (mcnt, p);
4708	    DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
4709	    STORE_NUMBER (p1, mcnt);
4710	    break;
4711	  }
4712
4713#if 0
4714	/* The DEC Alpha C compiler 3.x generates incorrect code for the
4715	   test	 WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
4716	   AT_WORD_BOUNDARY, so this code is disabled.	Expanding the
4717	   macro and introducing temporary variables works around the bug.  */
4718
4719	case wordbound:
4720	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4721	  if (AT_WORD_BOUNDARY (d))
4722	    break;
4723	  goto fail;
4724
4725	case notwordbound:
4726	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
4727	  if (AT_WORD_BOUNDARY (d))
4728	    goto fail;
4729	  break;
4730#else
4731	case wordbound:
4732	{
4733	  boolean prevchar, thischar;
4734
4735	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4736	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
4737	    break;
4738
4739	  prevchar = WORDCHAR_P (d - 1);
4740	  thischar = WORDCHAR_P (d);
4741	  if (prevchar != thischar)
4742	    break;
4743	  goto fail;
4744	}
4745
4746      case notwordbound:
4747	{
4748	  boolean prevchar, thischar;
4749
4750	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
4751	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
4752	    goto fail;
4753
4754	  prevchar = WORDCHAR_P (d - 1);
4755	  thischar = WORDCHAR_P (d);
4756	  if (prevchar != thischar)
4757	    goto fail;
4758	  break;
4759	}
4760#endif
4761
4762	case wordbeg:
4763	  DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
4764	  if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
4765	    break;
4766	  goto fail;
4767
4768	case wordend:
4769	  DEBUG_PRINT1 ("EXECUTING wordend.\n");
4770	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
4771	      && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
4772	    break;
4773	  goto fail;
4774
4775#ifdef emacs
4776	case before_dot:
4777	  DEBUG_PRINT1 ("EXECUTING before_dot.\n");
4778	  if (PTR_CHAR_POS ((unsigned char *) d) >= PT)
4779	    goto fail;
4780	  break;
4781
4782	case at_dot:
4783	  DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4784	  if (PTR_CHAR_POS ((unsigned char *) d) != PT)
4785	    goto fail;
4786	  break;
4787
4788	case after_dot:
4789	  DEBUG_PRINT1 ("EXECUTING after_dot.\n");
4790	  if (PTR_CHAR_POS ((unsigned char *) d) <= PT)
4791	    goto fail;
4792	  break;
4793
4794	case syntaxspec:
4795	  DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
4796	  mcnt = *p++;
4797	  goto matchsyntax;
4798
4799	case wordchar:
4800	  DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
4801	  mcnt = (int) Sword;
4802	matchsyntax:
4803	  PREFETCH ();
4804	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
4805	  d++;
4806	  if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
4807	    goto fail;
4808	  SET_REGS_MATCHED ();
4809	  break;
4810
4811	case notsyntaxspec:
4812	  DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
4813	  mcnt = *p++;
4814	  goto matchnotsyntax;
4815
4816	case notwordchar:
4817	  DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
4818	  mcnt = (int) Sword;
4819	matchnotsyntax:
4820	  PREFETCH ();
4821	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
4822	  d++;
4823	  if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
4824	    goto fail;
4825	  SET_REGS_MATCHED ();
4826	  break;
4827
4828#else /* not emacs */
4829	case wordchar:
4830	  DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
4831	  PREFETCH ();
4832	  if (!WORDCHAR_P (d))
4833	    goto fail;
4834	  SET_REGS_MATCHED ();
4835	  d++;
4836	  break;
4837
4838	case notwordchar:
4839	  DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
4840	  PREFETCH ();
4841	  if (WORDCHAR_P (d))
4842	    goto fail;
4843	  SET_REGS_MATCHED ();
4844	  d++;
4845	  break;
4846#endif /* not emacs */
4847
4848	default:
4849	  abort ();
4850	}
4851      continue;	 /* Successfully executed one pattern command; keep going.  */
4852
4853
4854    /* We goto here if a matching operation fails. */
4855    fail:
4856      if (!FAIL_STACK_EMPTY ())
4857	{ /* A restart point is known.	Restore to that state.	*/
4858	  DEBUG_PRINT1 ("\nFAIL:\n");
4859	  POP_FAILURE_POINT (d, p,
4860			     lowest_active_reg, highest_active_reg,
4861			     regstart, regend, reg_info);
4862
4863	  /* If this failure point is a dummy, try the next one.  */
4864	  if (!p)
4865	    goto fail;
4866
4867	  /* If we failed to the end of the pattern, don't examine *p.	*/
4868	  assert (p <= pend);
4869	  if (p < pend)
4870	    {
4871	      boolean is_a_jump_n = false;
4872
4873	      /* If failed to a backwards jump that's part of a repetition
4874		 loop, need to pop this failure point and use the next one.  */
4875	      switch ((re_opcode_t) *p)
4876		{
4877		case jump_n:
4878		  is_a_jump_n = true;
4879		case maybe_pop_jump:
4880		case pop_failure_jump:
4881		case jump:
4882		  p1 = p + 1;
4883		  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4884		  p1 += mcnt;
4885
4886		  if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
4887		      || (!is_a_jump_n
4888			  && (re_opcode_t) *p1 == on_failure_jump))
4889		    goto fail;
4890		  break;
4891		default:
4892		  /* do nothing */ ;
4893		}
4894	    }
4895
4896	  if (d >= string1 && d <= end1)
4897	    dend = end_match_1;
4898	}
4899      else
4900	break;	 /* Matching at this starting point really fails.  */
4901    } /* for (;;) */
4902
4903  if (best_regs_set)
4904    goto restore_best_regs;
4905
4906  FREE_VARIABLES ();
4907
4908  return -1;				/* Failure to match.  */
4909} /* re_match_2 */
4910
4911/* Subroutine definitions for re_match_2.  */
4912
4913
4914/* We are passed P pointing to a register number after a start_memory.
4915
4916   Return true if the pattern up to the corresponding stop_memory can
4917   match the empty string, and false otherwise.
4918
4919   If we find the matching stop_memory, sets P to point to one past its number.
4920   Otherwise, sets P to an undefined byte less than or equal to END.
4921
4922   We don't handle duplicates properly (yet).  */
4923
4924static boolean
4925group_match_null_string_p (p, end, reg_info)
4926    unsigned char **p, *end;
4927    register_info_type *reg_info;
4928{
4929  int mcnt;
4930  /* Point to after the args to the start_memory.  */
4931  unsigned char *p1 = *p + 2;
4932
4933  while (p1 < end)
4934    {
4935      /* Skip over opcodes that can match nothing, and return true or
4936	 false, as appropriate, when we get to one that can't, or to the
4937	 matching stop_memory.	*/
4938
4939      switch ((re_opcode_t) *p1)
4940	{
4941	/* Could be either a loop or a series of alternatives.	*/
4942	case on_failure_jump:
4943	  p1++;
4944	  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4945
4946	  /* If the next operation is not a jump backwards in the
4947	     pattern.  */
4948
4949	  if (mcnt >= 0)
4950	    {
4951	      /* Go through the on_failure_jumps of the alternatives,
4952		 seeing if any of the alternatives cannot match nothing.
4953		 The last alternative starts with only a jump,
4954		 whereas the rest start with on_failure_jump and end
4955		 with a jump, e.g., here is the pattern for `a|b|c':
4956
4957		 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
4958		 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
4959		 /exactn/1/c
4960
4961		 So, we have to first go through the first (n-1)
4962		 alternatives and then deal with the last one separately.  */
4963
4964
4965	      /* Deal with the first (n-1) alternatives, which start
4966		 with an on_failure_jump (see above) that jumps to right
4967		 past a jump_past_alt.	*/
4968
4969	      while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
4970		{
4971		  /* `mcnt' holds how many bytes long the alternative
4972		     is, including the ending `jump_past_alt' and
4973		     its number.  */
4974
4975		  if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
4976						      reg_info))
4977		    return false;
4978
4979		  /* Move to right after this alternative, including the
4980		     jump_past_alt.  */
4981		  p1 += mcnt;
4982
4983		  /* Break if it's the beginning of an n-th alternative
4984		     that doesn't begin with an on_failure_jump.  */
4985		  if ((re_opcode_t) *p1 != on_failure_jump)
4986		    break;
4987
4988		  /* Still have to check that it's not an n-th
4989		     alternative that starts with an on_failure_jump.  */
4990		  p1++;
4991		  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4992		  if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
4993		    {
4994		      /* Get to the beginning of the n-th alternative.	*/
4995		      p1 -= 3;
4996		      break;
4997		    }
4998		}
4999
5000	      /* Deal with the last alternative: go back and get number
5001		 of the `jump_past_alt' just before it.	 `mcnt' contains
5002		 the length of the alternative.	 */
5003	      EXTRACT_NUMBER (mcnt, p1 - 2);
5004
5005	      if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
5006		return false;
5007
5008	      p1 += mcnt;	/* Get past the n-th alternative.  */
5009	    } /* if mcnt > 0 */
5010	  break;
5011
5012
5013	case stop_memory:
5014	  assert (p1[1] == **p);
5015	  *p = p1 + 2;
5016	  return true;
5017
5018
5019	default:
5020	  if (!common_op_match_null_string_p (&p1, end, reg_info))
5021	    return false;
5022	}
5023    } /* while p1 < end */
5024
5025  return false;
5026} /* group_match_null_string_p */
5027
5028
5029/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
5030   It expects P to be the first byte of a single alternative and END one
5031   byte past the last. The alternative can contain groups.  */
5032
5033static boolean
5034alt_match_null_string_p (p, end, reg_info)
5035    unsigned char *p, *end;
5036    register_info_type *reg_info;
5037{
5038  int mcnt;
5039  unsigned char *p1 = p;
5040
5041  while (p1 < end)
5042    {
5043      /* Skip over opcodes that can match nothing, and break when we get
5044	 to one that can't.  */
5045
5046      switch ((re_opcode_t) *p1)
5047	{
5048	/* It's a loop.	 */
5049	case on_failure_jump:
5050	  p1++;
5051	  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5052	  p1 += mcnt;
5053	  break;
5054
5055	default:
5056	  if (!common_op_match_null_string_p (&p1, end, reg_info))
5057	    return false;
5058	}
5059    }  /* while p1 < end */
5060
5061  return true;
5062} /* alt_match_null_string_p */
5063
5064
5065/* Deals with the ops common to group_match_null_string_p and
5066   alt_match_null_string_p.
5067
5068   Sets P to one after the op and its arguments, if any.  */
5069
5070static boolean
5071common_op_match_null_string_p (p, end, reg_info)
5072    unsigned char **p, *end;
5073    register_info_type *reg_info;
5074{
5075  int mcnt;
5076  boolean ret;
5077  int reg_no;
5078  unsigned char *p1 = *p;
5079
5080  switch ((re_opcode_t) *p1++)
5081    {
5082    case no_op:
5083    case begline:
5084    case endline:
5085    case begbuf:
5086    case endbuf:
5087    case wordbeg:
5088    case wordend:
5089    case wordbound:
5090    case notwordbound:
5091#ifdef emacs
5092    case before_dot:
5093    case at_dot:
5094    case after_dot:
5095#endif
5096      break;
5097
5098    case start_memory:
5099      reg_no = *p1;
5100      assert (reg_no > 0 && reg_no <= MAX_REGNUM);
5101      ret = group_match_null_string_p (&p1, end, reg_info);
5102
5103      /* Have to set this here in case we're checking a group which
5104	 contains a group and a back reference to it.  */
5105
5106      if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
5107	REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
5108
5109      if (!ret)
5110	return false;
5111      break;
5112
5113    /* If this is an optimized succeed_n for zero times, make the jump.	 */
5114    case jump:
5115      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5116      if (mcnt >= 0)
5117	p1 += mcnt;
5118      else
5119	return false;
5120      break;
5121
5122    case succeed_n:
5123      /* Get to the number of times to succeed.	 */
5124      p1 += 2;
5125      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5126
5127      if (mcnt == 0)
5128	{
5129	  p1 -= 4;
5130	  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5131	  p1 += mcnt;
5132	}
5133      else
5134	return false;
5135      break;
5136
5137    case duplicate:
5138      if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
5139	return false;
5140      break;
5141
5142    case set_number_at:
5143      p1 += 4;
5144
5145    default:
5146      /* All other opcodes mean we cannot match the empty string.  */
5147      return false;
5148  }
5149
5150  *p = p1;
5151  return true;
5152} /* common_op_match_null_string_p */
5153
5154
5155/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
5156   bytes; nonzero otherwise.  */
5157
5158static int
5159bcmp_translate (s1, s2, len, translate)
5160     unsigned char *s1, *s2;
5161     register int len;
5162     RE_TRANSLATE_TYPE translate;
5163{
5164  register unsigned char *p1 = s1, *p2 = s2;
5165  while (len)
5166    {
5167      if (translate[*p1++] != translate[*p2++]) return 1;
5168      len--;
5169    }
5170  return 0;
5171}
5172
5173/* Entry points for GNU code.  */
5174
5175/* re_compile_pattern is the GNU regular expression compiler: it
5176   compiles PATTERN (of length SIZE) and puts the result in BUFP.
5177   Returns 0 if the pattern was valid, otherwise an error string.
5178
5179   Assumes the `allocated' (and perhaps `buffer') and `translate' fields
5180   are set in BUFP on entry.
5181
5182   We call regex_compile to do the actual compilation.	*/
5183
5184const char *
5185re_compile_pattern (pattern, length, bufp)
5186     const char *pattern;
5187     int length;
5188     struct re_pattern_buffer *bufp;
5189{
5190  reg_errcode_t ret;
5191
5192  /* GNU code is written to assume at least RE_NREGS registers will be set
5193     (and at least one extra will be -1).  */
5194  bufp->regs_allocated = REGS_UNALLOCATED;
5195
5196  /* And GNU code determines whether or not to get register information
5197     by passing null for the REGS argument to re_match, etc., not by
5198     setting no_sub.  */
5199  bufp->no_sub = 0;
5200
5201  /* Match anchors at newline.	*/
5202  bufp->newline_anchor = 1;
5203
5204  ret = regex_compile (pattern, length, re_syntax_options, bufp);
5205
5206  if (!ret)
5207    return NULL;
5208  return gettext (re_error_msgid[(int) ret]);
5209}
5210
5211/* Entry points compatible with 4.2 BSD regex library.	We don't define
5212   them unless specifically requested.	*/
5213
5214#if defined (_REGEX_RE_COMP) || defined (_LIBC)
5215
5216/* BSD has one and only one pattern buffer.  */
5217static struct re_pattern_buffer re_comp_buf;
5218
5219char *
5220#ifdef _LIBC
5221/* Make these definitions weak in libc, so POSIX programs can redefine
5222   these names if they don't use our functions, and still use
5223   regcomp/regexec below without link errors.  */
5224weak_function
5225#endif
5226re_comp (s)
5227    const char *s;
5228{
5229  reg_errcode_t ret;
5230
5231  if (!s)
5232    {
5233      if (!re_comp_buf.buffer)
5234	return gettext ("No previous regular expression");
5235      return 0;
5236    }
5237
5238  if (!re_comp_buf.buffer)
5239    {
5240      re_comp_buf.buffer = (unsigned char *) malloc (200);
5241      if (re_comp_buf.buffer == NULL)
5242	return gettext (re_error_msgid[(int) REG_ESPACE]);
5243      re_comp_buf.allocated = 200;
5244
5245      re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
5246      if (re_comp_buf.fastmap == NULL)
5247	return gettext (re_error_msgid[(int) REG_ESPACE]);
5248    }
5249
5250  /* Since `re_exec' always passes NULL for the `regs' argument, we
5251     don't need to initialize the pattern buffer fields which affect it.  */
5252
5253  /* Match anchors at newlines.	 */
5254  re_comp_buf.newline_anchor = 1;
5255
5256  ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
5257
5258  if (!ret)
5259    return NULL;
5260
5261  /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
5262  return (char *) gettext (re_error_msgid[(int) ret]);
5263}
5264
5265
5266int
5267#ifdef _LIBC
5268weak_function
5269#endif
5270re_exec (s)
5271    const char *s;
5272{
5273  const int len = strlen (s);
5274  return
5275    0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
5276}
5277#endif /* _REGEX_RE_COMP */
5278
5279/* POSIX.2 functions.  Don't define these for Emacs.  */
5280
5281#ifndef emacs
5282
5283/* regcomp takes a regular expression as a string and compiles it.
5284
5285   PREG is a regex_t *.	 We do not expect any fields to be initialized,
5286   since POSIX says we shouldn't.  Thus, we set
5287
5288     `buffer' to the compiled pattern;
5289     `used' to the length of the compiled pattern;
5290     `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
5291       REG_EXTENDED bit in CFLAGS is set; otherwise, to
5292       RE_SYNTAX_POSIX_BASIC;
5293     `newline_anchor' to REG_NEWLINE being set in CFLAGS;
5294     `fastmap' and `fastmap_accurate' to zero;
5295     `re_nsub' to the number of subexpressions in PATTERN.
5296
5297   PATTERN is the address of the pattern string.
5298
5299   CFLAGS is a series of bits which affect compilation.
5300
5301     If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
5302     use POSIX basic syntax.
5303
5304     If REG_NEWLINE is set, then . and [^...] don't match newline.
5305     Also, regexec will try a match beginning after every newline.
5306
5307     If REG_ICASE is set, then we considers upper- and lowercase
5308     versions of letters to be equivalent when matching.
5309
5310     If REG_NOSUB is set, then when PREG is passed to regexec, that
5311     routine will report only success or failure, and nothing about the
5312     registers.
5313
5314   It returns 0 if it succeeds, nonzero if it doesn't.	(See regex.h for
5315   the return codes and their meanings.)  */
5316
5317int
5318regcomp (preg, pattern, cflags)
5319    regex_t *preg;
5320    const char *pattern;
5321    int cflags;
5322{
5323  reg_errcode_t ret;
5324  unsigned syntax
5325    = (cflags & REG_EXTENDED) ?
5326      RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
5327
5328  /* regex_compile will allocate the space for the compiled pattern.  */
5329  preg->buffer = 0;
5330  preg->allocated = 0;
5331  preg->used = 0;
5332
5333  /* Don't bother to use a fastmap when searching.  This simplifies the
5334     REG_NEWLINE case: if we used a fastmap, we'd have to put all the
5335     characters after newlines into the fastmap.  This way, we just try
5336     every character.  */
5337  preg->fastmap = 0;
5338
5339  if (cflags & REG_ICASE)
5340    {
5341      unsigned i;
5342
5343      preg->translate
5344	= (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
5345				      * sizeof (*(RE_TRANSLATE_TYPE)0));
5346      if (preg->translate == NULL)
5347	return (int) REG_ESPACE;
5348
5349      /* Map uppercase characters to corresponding lowercase ones.  */
5350      for (i = 0; i < CHAR_SET_SIZE; i++)
5351	preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
5352    }
5353  else
5354    preg->translate = NULL;
5355
5356  /* If REG_NEWLINE is set, newlines are treated differently.  */
5357  if (cflags & REG_NEWLINE)
5358    { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
5359      syntax &= ~RE_DOT_NEWLINE;
5360      syntax |= RE_HAT_LISTS_NOT_NEWLINE;
5361      /* It also changes the matching behavior.	 */
5362      preg->newline_anchor = 1;
5363    }
5364  else
5365    preg->newline_anchor = 0;
5366
5367  preg->no_sub = !!(cflags & REG_NOSUB);
5368
5369  /* POSIX says a null character in the pattern terminates it, so we
5370     can use strlen here in compiling the pattern.  */
5371  ret = regex_compile (pattern, strlen (pattern), syntax, preg);
5372
5373  /* POSIX doesn't distinguish between an unmatched open-group and an
5374     unmatched close-group: both are REG_EPAREN.  */
5375  if (ret == REG_ERPAREN) ret = REG_EPAREN;
5376
5377  return (int) ret;
5378}
5379
5380
5381/* regexec searches for a given pattern, specified by PREG, in the
5382   string STRING.
5383
5384   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
5385   `regcomp', we ignore PMATCH.	 Otherwise, we assume PMATCH has at
5386   least NMATCH elements, and we set them to the offsets of the
5387   corresponding matched substrings.
5388
5389   EFLAGS specifies `execution flags' which affect matching: if
5390   REG_NOTBOL is set, then ^ does not match at the beginning of the
5391   string; if REG_NOTEOL is set, then $ does not match at the end.
5392
5393   We return 0 if we find a match and REG_NOMATCH if not.  */
5394
5395int
5396regexec (preg, string, nmatch, pmatch, eflags)
5397    const regex_t *preg;
5398    const char *string;
5399    size_t nmatch;
5400    regmatch_t pmatch[];
5401    int eflags;
5402{
5403  int ret;
5404  struct re_registers regs;
5405  regex_t private_preg;
5406  int len = strlen (string);
5407  boolean want_reg_info = !preg->no_sub && nmatch > 0;
5408
5409  private_preg = *preg;
5410
5411  private_preg.not_bol = !!(eflags & REG_NOTBOL);
5412  private_preg.not_eol = !!(eflags & REG_NOTEOL);
5413
5414  /* The user has told us exactly how many registers to return
5415     information about, via `nmatch'.  We have to pass that on to the
5416     matching routines.	 */
5417  private_preg.regs_allocated = REGS_FIXED;
5418
5419  if (want_reg_info)
5420    {
5421      regs.num_regs = nmatch;
5422      regs.start = TALLOC (nmatch, regoff_t);
5423      regs.end = TALLOC (nmatch, regoff_t);
5424      if (regs.start == NULL || regs.end == NULL)
5425	return (int) REG_NOMATCH;
5426    }
5427
5428  /* Perform the searching operation.  */
5429  ret = re_search (&private_preg, string, len,
5430		   /* start: */ 0, /* range: */ len,
5431		   want_reg_info ? &regs : (struct re_registers *) 0);
5432
5433  /* Copy the register information to the POSIX structure.  */
5434  if (want_reg_info)
5435    {
5436      if (ret >= 0)
5437	{
5438	  unsigned r;
5439
5440	  for (r = 0; r < nmatch; r++)
5441	    {
5442	      pmatch[r].rm_so = regs.start[r];
5443	      pmatch[r].rm_eo = regs.end[r];
5444	    }
5445	}
5446
5447      /* If we needed the temporary register info, free the space now.	*/
5448      free (regs.start);
5449      free (regs.end);
5450    }
5451
5452  /* We want zero return to mean success, unlike `re_search'.  */
5453  return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
5454}
5455
5456
5457/* Returns a message corresponding to an error code, ERRCODE, returned
5458   from either regcomp or regexec.   We don't use PREG here.  */
5459
5460size_t
5461regerror (errcode, preg, errbuf, errbuf_size)
5462    int errcode;
5463    const regex_t *preg;
5464    char *errbuf;
5465    size_t errbuf_size;
5466{
5467  const char *msg;
5468  size_t msg_size;
5469
5470  if (errcode < 0
5471      || errcode >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0])))
5472    /* Only error codes returned by the rest of the code should be passed
5473       to this routine.	 If we are given anything else, or if other regex
5474       code generates an invalid error code, then the program has a bug.
5475       Dump core so we can fix it.  */
5476    abort ();
5477
5478  msg = gettext (re_error_msgid[errcode]);
5479
5480  msg_size = strlen (msg) + 1; /* Includes the null.  */
5481
5482  if (errbuf_size != 0)
5483    {
5484      if (msg_size > errbuf_size)
5485	{
5486	  strncpy (errbuf, msg, errbuf_size - 1);
5487	  errbuf[errbuf_size - 1] = 0;
5488	}
5489      else
5490	strcpy (errbuf, msg);
5491    }
5492
5493  return msg_size;
5494}
5495
5496
5497/* Free dynamically allocated space used by PREG.  */
5498
5499void
5500regfree (preg)
5501    regex_t *preg;
5502{
5503  if (preg->buffer != NULL)
5504    free (preg->buffer);
5505  preg->buffer = NULL;
5506
5507  preg->allocated = 0;
5508  preg->used = 0;
5509
5510  if (preg->fastmap != NULL)
5511    free (preg->fastmap);
5512  preg->fastmap = NULL;
5513  preg->fastmap_accurate = 0;
5514
5515  if (preg->translate != NULL)
5516    free (preg->translate);
5517  preg->translate = NULL;
5518}
5519
5520#endif /* not emacs  */
5521