165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************
265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*           PCRE DEMONSTRATION PROGRAM           *
365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/
465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This is a demonstration program to illustrate the most straightforward ways
665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichof calling the PCRE regular expression library from a C program. See the
765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcresample documentation for a short discussion ("man pcresample" if you have
865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthe PCRE man pages installed).
965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
1065de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichIn Unix-like environments, if PCRE is installed in your standard system
1165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichlibraries, you should be able to compile this program using this command:
1265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
1365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichgcc -Wall pcredemo.c -lpcre -o pcredemo
1465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
1565de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichIf PCRE is not installed in a standard place, it is likely to be installed with
1665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichsupport for the pkg-config mechanism. If you have pkg-config, you can compile
1765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthis program using this command:
1865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
1965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichgcc -Wall pcredemo.c `pkg-config --cflags --libs libpcre` -o pcredemo
2065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
2165de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichIf you do not have pkg-config, you may have to use this:
2265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
2365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichgcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \
2465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  -R/usr/local/lib -lpcre -o pcredemo
2565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
2665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReplace "/usr/local/include" and "/usr/local/lib" with wherever the include and
2765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichlibrary files for PCRE are installed on your system. Only some operating
2865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichsystems (e.g. Solaris) use the -R option.
2965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
3065de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichBuilding under Windows:
3165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
3265de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichIf you want to statically link this program against a non-dll .a file, you must
3365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichdefine PCRE_STATIC before including pcre.h, otherwise the pcre_malloc() and
3465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_free() exported functions will be declared __declspec(dllimport), with
3565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichunwanted results. So in this environment, uncomment the following line. */
3665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
3765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* #define PCRE_STATIC */
3865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
3965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#include <stdio.h>
4065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#include <string.h>
4165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#include <pcre.h>
4265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
4365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define OVECCOUNT 30    /* should be a multiple of 3 */
4465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
4565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
4665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint main(int argc, char **argv)
4765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{
4865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre *re;
4965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst char *error;
5065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichchar *pattern;
5165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichchar *subject;
5265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichunsigned char *name_table;
5365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichunsigned int option_bits;
5465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint erroffset;
5565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint find_all;
5665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint crlf_is_newline;
5765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint namecount;
5865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint name_entry_size;
5965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint ovector[OVECCOUNT];
6065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint subject_length;
6165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint rc, i;
6265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint utf8;
6365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
6465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
6565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/**************************************************************************
6665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* First, sort out the command line. There is only one possible option at  *
6765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* the moment, "-g" to request repeated matching to find all occurrences,  *
6865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* like Perl's /g option. We set the variable find_all to a non-zero value *
6965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* if the -g option is present. Apart from that, there must be exactly two *
7065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* arguments.                                                              *
7165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich**************************************************************************/
7265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
7365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfind_all = 0;
7465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor (i = 1; i < argc; i++)
7565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
7665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (strcmp(argv[i], "-g") == 0) find_all = 1;
7765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    else break;
7865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
7965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
8065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* After the options, we require exactly two arguments, which are the pattern,
8165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichand the subject string. */
8265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
8365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (argc - i != 2)
8465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
8565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  printf("Two arguments required: a regex and a subject string\n");
8665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  return 1;
8765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
8865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
8965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpattern = argv[i];
9065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichsubject = argv[i+1];
9165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichsubject_length = (int)strlen(subject);
9265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
9365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
9465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************************************
9565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* Now we are going to compile the regular expression pattern, and handle *
9665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* and errors that are detected.                                          *
9765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************************************/
9865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
9965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre = pcre_compile(
10065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  pattern,              /* the pattern */
10165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0,                    /* default options */
10265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  &error,               /* for error message */
10365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  &erroffset,           /* for error offset */
10465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  NULL);                /* use default character tables */
10565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
10665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Compilation failed: print the error message and exit */
10765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
10865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (re == NULL)
10965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
11065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
11165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  return 1;
11265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
11365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
11465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
11565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************************************
11665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* If the compilation succeeded, we call PCRE again, in order to do a     *
11765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* pattern match against the subject string. This does just ONE match. If *
11865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* further matching is needed, it will be done below.                     *
11965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************************************/
12065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
12165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichrc = pcre_exec(
12265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  re,                   /* the compiled pattern */
12365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  NULL,                 /* no extra data - we didn't study the pattern */
12465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  subject,              /* the subject string */
12565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  subject_length,       /* the length of the subject */
12665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0,                    /* start at offset 0 in the subject */
12765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  0,                    /* default options */
12865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  ovector,              /* output vector for substring information */
12965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  OVECCOUNT);           /* number of elements in the output vector */
13065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
13165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Matching failed: handle error cases */
13265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
13365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (rc < 0)
13465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
13565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  switch(rc)
13665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
13765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
13865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    /*
13965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    Handle other special cases if you like
14065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    */
14165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    default: printf("Matching error %d\n", rc); break;
14265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
14365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  pcre_free(re);     /* Release memory used for the compiled pattern */
14465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  return 1;
14565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
14665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
14765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Match succeded */
14865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
14965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichprintf("\nMatch succeeded at offset %d\n", ovector[0]);
15065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
15165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
15265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************************************
15365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* We have found the first match within the subject string. If the output *
15465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* vector wasn't big enough, say so. Then output any substrings that were *
15565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* captured.                                                              *
15665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************************************/
15765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
15865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* The output vector wasn't big enough */
15965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
16065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (rc == 0)
16165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
16265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  rc = OVECCOUNT/3;
16365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  printf("ovector only has room for %d captured substrings\n", rc - 1);
16465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
16565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
16665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Show substrings stored in the output vector by number. Obviously, in a real
16765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichapplication you might want to do things other than print them. */
16865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
16965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor (i = 0; i < rc; i++)
17065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
17165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  char *substring_start = subject + ovector[2*i];
17265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  int substring_length = ovector[2*i+1] - ovector[2*i];
17365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  printf("%2d: %.*s\n", i, substring_length, substring_start);
17465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
17565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
17665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
17765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/**************************************************************************
17865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* That concludes the basic part of this demonstration program. We have    *
17965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* compiled a pattern, and performed a single match. The code that follows *
18065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* shows first how to access named substrings, and then how to code for    *
18165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* repeated matches on the same subject.                                   *
18265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich**************************************************************************/
18365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
18465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* See if there are any named substrings, and if so, show them by name. First
18565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwe have to extract the count of named parentheses from the pattern. */
18665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
18765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich(void)pcre_fullinfo(
18865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  re,                   /* the compiled pattern */
18965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  NULL,                 /* no extra data - we didn't study the pattern */
19065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  PCRE_INFO_NAMECOUNT,  /* number of named substrings */
19165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  &namecount);          /* where to put the answer */
19265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
19365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (namecount <= 0) printf("No named substrings\n"); else
19465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
19565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  unsigned char *tabptr;
19665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  printf("Named substrings\n");
19765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
19865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Before we can access the substrings, we must extract the table for
19965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  translating names to numbers, and the size of each entry in the table. */
20065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
20165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  (void)pcre_fullinfo(
20265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    re,                       /* the compiled pattern */
20365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    NULL,                     /* no extra data - we didn't study the pattern */
20465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    PCRE_INFO_NAMETABLE,      /* address of the table */
20565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    &name_table);             /* where to put the answer */
20665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
20765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  (void)pcre_fullinfo(
20865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    re,                       /* the compiled pattern */
20965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    NULL,                     /* no extra data - we didn't study the pattern */
21065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    PCRE_INFO_NAMEENTRYSIZE,  /* size of each entry in the table */
21165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    &name_entry_size);        /* where to put the answer */
21265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
21365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Now we can scan the table and, for each entry, print the number, the name,
21465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  and the substring itself. */
21565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
21665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  tabptr = name_table;
21765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  for (i = 0; i < namecount; i++)
21865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
21965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    int n = (tabptr[0] << 8) | tabptr[1];
22065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
22165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
22265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    tabptr += name_entry_size;
22365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
22465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
22565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
22665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
22765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/*************************************************************************
22865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* If the "-g" option was given on the command line, we want to continue  *
22965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* to search for additional matches in the subject string, in a similar   *
23065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* way to the /g option in Perl. This turns out to be trickier than you   *
23165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* might think because of the possibility of matching an empty string.    *
23265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* What happens is as follows:                                            *
23365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*                                                                        *
23465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* If the previous match was NOT for an empty string, we can just start   *
23565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* the next match at the end of the previous one.                         *
23665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*                                                                        *
23765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* If the previous match WAS for an empty string, we can't do that, as it *
23865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* would lead to an infinite loop. Instead, a special call of pcre_exec() *
23965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* is made with the PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED flags set.    *
24065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* The first of these tells PCRE that an empty string at the start of the *
24165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* subject is not a valid match; other possibilities must be tried. The   *
24265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* second flag restricts PCRE to one match attempt at the initial string  *
24365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* position. If this match succeeds, an alternative to the empty string   *
24465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* match has been found, and we can print it and proceed round the loop,  *
24565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* advancing by the length of whatever was found. If this match does not  *
24665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* succeed, we still stay in the loop, advancing by just one character.   *
24765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* In UTF-8 mode, which can be set by (*UTF8) in the pattern, this may be *
24865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* more than one byte.                                                    *
24965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*                                                                        *
25065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* However, there is a complication concerned with newlines. When the     *
25165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* newline convention is such that CRLF is a valid newline, we must       *
25265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* advance by two characters rather than one. The newline convention can  *
25365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* be set in the regex by (*CR), etc.; if not, we must find the default.  *
25465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************************************/
25565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
25665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (!find_all)     /* Check for -g */
25765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
25865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  pcre_free(re);   /* Release the memory used for the compiled pattern */
25965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  return 0;        /* Finish unless -g was given */
26065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
26165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
26265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Before running the loop, check for UTF-8 and whether CRLF is a valid newline
26365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichsequence. First, find the options with which the regex was compiled; extract
26465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthe UTF-8 state, and mask off all but the newline options. */
26565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
26665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich(void)pcre_fullinfo(re, NULL, PCRE_INFO_OPTIONS, &option_bits);
26765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichutf8 = option_bits & PCRE_UTF8;
26865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichoption_bits &= PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_CRLF|
26965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich               PCRE_NEWLINE_ANY|PCRE_NEWLINE_ANYCRLF;
27065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
27165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* If no newline options were set, find the default newline convention from the
27265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbuild configuration. */
27365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
27465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (option_bits == 0)
27565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
27665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  int d;
27765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
27865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Note that these values are always the ASCII ones, even in
27965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  EBCDIC environments. CR = 13, NL = 10. */
28065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  option_bits = (d == 13)? PCRE_NEWLINE_CR :
28165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          (d == 10)? PCRE_NEWLINE_LF :
28265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
28365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          (d == -2)? PCRE_NEWLINE_ANYCRLF :
28465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich          (d == -1)? PCRE_NEWLINE_ANY : 0;
28565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }
28665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
28765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* See if CRLF is a valid newline sequence. */
28865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
28965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcrlf_is_newline =
29065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     option_bits == PCRE_NEWLINE_ANY ||
29165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     option_bits == PCRE_NEWLINE_CRLF ||
29265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich     option_bits == PCRE_NEWLINE_ANYCRLF;
29365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
29465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Loop for second and subsequent matches */
29565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
29665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor (;;)
29765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  {
29865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  int options = 0;                 /* Normally no options */
29965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  int start_offset = ovector[1];   /* Start at end of previous match */
30065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
30165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* If the previous match was for an empty string, we are finished if we are
30265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  at the end of the subject. Otherwise, arrange to run another match at the
30365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  same point to see if a non-empty match can be found. */
30465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
30565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (ovector[0] == ovector[1])
30665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
30765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (ovector[0] == subject_length) break;
30865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    options = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
30965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
31065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
31165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Run the next matching operation */
31265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
31365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  rc = pcre_exec(
31465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    re,                   /* the compiled pattern */
31565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    NULL,                 /* no extra data - we didn't study the pattern */
31665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    subject,              /* the subject string */
31765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    subject_length,       /* the length of the subject */
31865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    start_offset,         /* starting offset in the subject */
31965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    options,              /* options */
32065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    ovector,              /* output vector for substring information */
32165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    OVECCOUNT);           /* number of elements in the output vector */
32265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
32365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* This time, a result of NOMATCH isn't an error. If the value in "options"
32465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  is zero, it just means we have found all possible matches, so the loop ends.
32565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  Otherwise, it means we have failed to find a non-empty-string match at a
32665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  point where there was a previous empty-string match. In this case, we do what
32765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  Perl does: advance the matching position by one character, and continue. We
32865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  do this by setting the "end of previous match" offset, because that is picked
32965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  up at the top of the loop as the point at which to start again.
33065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
33165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  There are two complications: (a) When CRLF is a valid newline sequence, and
33265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  the current position is just before it, advance by an extra byte. (b)
33365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  Otherwise we must ensure that we skip an entire UTF-8 character if we are in
33465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  UTF-8 mode. */
33565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
33665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (rc == PCRE_ERROR_NOMATCH)
33765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
33865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (options == 0) break;                    /* All matches found */
33965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    ovector[1] = start_offset + 1;              /* Advance one byte */
34065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    if (crlf_is_newline &&                      /* If CRLF is newline & */
34165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        start_offset < subject_length - 1 &&    /* we are at CRLF, */
34265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        subject[start_offset] == '\r' &&
34365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        subject[start_offset + 1] == '\n')
34465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      ovector[1] += 1;                          /* Advance by one more. */
34565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    else if (utf8)                              /* Otherwise, ensure we */
34665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {                                         /* advance a whole UTF-8 */
34765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      while (ovector[1] < subject_length)       /* character. */
34865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        {
34965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        if ((subject[ovector[1]] & 0xc0) != 0x80) break;
35065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        ovector[1] += 1;
35165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        }
35265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
35365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    continue;    /* Go round the loop again */
35465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
35565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
35665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Other matching errors are not recoverable. */
35765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
35865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (rc < 0)
35965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
36065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    printf("Matching error %d\n", rc);
36165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    pcre_free(re);    /* Release memory used for the compiled pattern */
36265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    return 1;
36365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
36465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
36565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* Match succeded */
36665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
36765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  printf("\nMatch succeeded again at offset %d\n", ovector[0]);
36865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
36965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* The match succeeded, but the output vector wasn't big enough. */
37065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
37165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (rc == 0)
37265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
37365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    rc = OVECCOUNT/3;
37465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    printf("ovector only has room for %d captured substrings\n", rc - 1);
37565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
37665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
37765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  /* As before, show substrings stored in the output vector by number, and then
37865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  also any named substrings. */
37965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
38065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  for (i = 0; i < rc; i++)
38165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
38265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    char *substring_start = subject + ovector[2*i];
38365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    int substring_length = ovector[2*i+1] - ovector[2*i];
38465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    printf("%2d: %.*s\n", i, substring_length, substring_start);
38565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
38665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
38765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  if (namecount <= 0) printf("No named substrings\n"); else
38865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    {
38965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    unsigned char *tabptr = name_table;
39065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    printf("Named substrings\n");
39165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    for (i = 0; i < namecount; i++)
39265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      {
39365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      int n = (tabptr[0] << 8) | tabptr[1];
39465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
39565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich        ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
39665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      tabptr += name_entry_size;
39765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich      }
39865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich    }
39965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich  }      /* End of loop to find second and subsequent matches */
40065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
40165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichprintf("\n");
40265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_free(re);       /* Release memory used for the compiled pattern */
40365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn 0;
40465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}
40565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich
40665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* End of pcredemo.c */
407