150f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis#ifndef SRC_REGEX_H_
250f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis#define SRC_REGEX_H_
350f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis
450f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis#include <stdio.h>
550f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis
650f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis#ifdef USE_PCRE2
750f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis#include <pcre2.h>
850f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis#else
950f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis#include <pcre.h>
1050f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis#endif
1150f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis
1250f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis#include "dso.h"
1350f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis
1450f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskisenum { REGEX_MATCH,
1550f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis       REGEX_MATCH_PARTIAL,
1650f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis       REGEX_NO_MATCH,
1750f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis       REGEX_ERROR = -1,
1850f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis};
1950f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis
2050f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskisstruct regex_data;
2150f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis
2250f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis#ifdef USE_PCRE2
2350f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskisstruct regex_error_data {
2450f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis	int error_code;
2550f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis	PCRE2_SIZE error_offset;
2650f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis};
2750f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis#else
2850f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskisstruct regex_error_data {
2950f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis	char const *error_buffer;
3050f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis	int error_offset;
3150f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis};
3250f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis#endif
3350f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis
3450f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskisstruct mmap_area;
3550f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis
3650f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis/**
373b68c6f9e981e3665ae8f80e6ca16be59a6a91c8Janis Danisevskis * regex_arch_string return a string that represents the pointer width, the
383b68c6f9e981e3665ae8f80e6ca16be59a6a91c8Janis Danisevskis * width of what the backend considers a size type, and the endianness of the
393b68c6f9e981e3665ae8f80e6ca16be59a6a91c8Janis Danisevskis * system that this library was build for. (e.g. for x86_64: "8-8-el").
403b68c6f9e981e3665ae8f80e6ca16be59a6a91c8Janis Danisevskis * This is required when loading stored regular espressions. PCRE2 regular
413b68c6f9e981e3665ae8f80e6ca16be59a6a91c8Janis Danisevskis * expressions are not portable across architectures that do not have a
423b68c6f9e981e3665ae8f80e6ca16be59a6a91c8Janis Danisevskis * matching arch-string.
433b68c6f9e981e3665ae8f80e6ca16be59a6a91c8Janis Danisevskis */
443b68c6f9e981e3665ae8f80e6ca16be59a6a91c8Janis Danisevskischar const *regex_arch_string(void) hidden;
453b68c6f9e981e3665ae8f80e6ca16be59a6a91c8Janis Danisevskis
463b68c6f9e981e3665ae8f80e6ca16be59a6a91c8Janis Danisevskis/**
4750f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * regex_verison returns the version string of the underlying regular
4850f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * regular expressions library. In the case of PCRE it just returns the
4950f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * result of pcre_version(). In the case of PCRE2, the very first time this
5050f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * function is called it allocates a buffer large enough to hold the version
5150f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * string and reads the PCRE2_CONFIG_VERSION option to fill the buffer.
5250f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * The allocated buffer will linger in memory until the calling process is being
5350f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * reaped.
5450f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis *
5550f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * It may return NULL on error.
5650f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis */
5750f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskischar const *regex_version(void) hidden;
5850f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis/**
5950f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * This constructor function allocates a buffer for a regex_data structure.
6050f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * The buffer is being initialized with zeroes.
6150f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis */
6250f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskisstruct regex_data *regex_data_create(void) hidden;
6350f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis/**
6450f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * This complementary destructor function frees the a given regex_data buffer.
6550f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * It also frees any non NULL member pointers with the appropriate pcreX_X_free
6650f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * function. For PCRE this function respects the extra_owned field and frees
6750f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * the pcre_extra data conditionally. Calling this function on a NULL pointer is
6850f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * save.
6950f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis */
7050f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskisvoid regex_data_free(struct regex_data *regex) hidden;
7150f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis/**
7250f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * This function compiles the regular expression. Additionally, it prepares
7350f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * data structures required by the different underlying engines. For PCRE
7450f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * it calls pcre_study to generate optional data required for optimized
7550f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * execution of the compiled pattern. In the case of PCRE2, it allocates
7650f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * a pcre2_match_data structure of appropriate size to hold all possible
7750f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * matches created by the pattern.
7850f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis *
7950f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * @arg regex If successful, the structure returned through *regex was allocated
8050f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis *            with regex_data_create and must be freed with regex_data_free.
8150f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * @arg pattern_string The pattern string that is to be compiled.
8250f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * @arg errordata A pointer to a regex_error_data structure must be passed
8350f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis *                to this function. This structure depends on the underlying
8450f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis *                implementation. It can be passed to regex_format_error
8550f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis *                to generate a human readable error message.
8650f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * @retval 0 on success
8750f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * @retval -1 on error
8850f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis */
8950f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskisint regex_prepare_data(struct regex_data **regex, char const *pattern_string,
9050f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis		       struct regex_error_data *errordata) hidden;
9150f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis/**
9250f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * This function loads a serialized precompiled pattern from a contiguous
9350f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * data region given by map_area.
9450f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis *
9550f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * @arg map_area Description of the memory region holding a serialized
9650f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis *               representation of the precompiled pattern.
9750f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * @arg regex If successful, the structure returned through *regex was allocated
9850f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis *            with regex_data_create and must be freed with regex_data_free.
993b68c6f9e981e3665ae8f80e6ca16be59a6a91c8Janis Danisevskis * @arg do_load_precompregex If non-zero precompiled patterns get loaded from
1003b68c6f9e981e3665ae8f80e6ca16be59a6a91c8Janis Danisevskis *			     the mmap region (ignored by PCRE1 back-end).
10150f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis *
10250f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * @retval 0 on success
10350f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * @retval -1 on error
10450f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis */
10550f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskisint regex_load_mmap(struct mmap_area *map_area,
1063b68c6f9e981e3665ae8f80e6ca16be59a6a91c8Janis Danisevskis		    struct regex_data **regex,
1073b68c6f9e981e3665ae8f80e6ca16be59a6a91c8Janis Danisevskis		    int do_load_precompregex) hidden;
10850f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis/**
10950f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * This function stores a precompiled regular expression to a file.
11050f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * In the case of PCRE, it just dumps the binary representation of the
11150f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * precomplied pattern into a file. In the case of PCRE2, it uses the
11250f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * serialization function provided by the library.
11350f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis *
11450f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * @arg regex The precomplied regular expression data.
11550f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * @arg fp A file stream specifying the output file.
11650f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * @arg do_write_precompregex If non-zero precompiled patterns are written to
11750f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis *			      the output file (ignored by PCRE1 back-end).
11850f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis */
11950f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskisint regex_writef(struct regex_data *regex, FILE *fp,
12050f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis		 int do_write_precompregex) hidden;
12150f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis/**
12250f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * This function applies a precompiled pattern to a subject string and
12350f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * returns whether or not a match was found.
12450f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis *
12550f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * @arg regex The precompiled pattern.
12650f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * @arg subject The subject string.
12750f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * @arg partial Boolean indicating if partial matches are wanted. A nonzero
12850f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis *              value is equivalent to specifying PCRE[2]_PARTIAL_SOFT as
12950f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis *              option to pcre_exec of pcre2_match.
13050f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * @retval REGEX_MATCH if a match was found
13150f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * @retval REGEX_MATCH_PARTIAL if a partial match was found
13250f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * @retval REGEX_NO_MATCH if no match was found
13350f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * @retval REGEX_ERROR if an error was encountered during the execution of the
13450f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis *                     regular expression
13550f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis */
13650f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskisint regex_match(struct regex_data *regex, char const *subject,
13750f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis		int partial) hidden;
13850f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis/**
13950f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * This function compares two compiled regular expressions (regex1 and regex2).
14050f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * It compares the binary representations of the compiled patterns. It is a very
14150f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * crude approximation because the binary representation holds data like
14250f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * reference counters, that has nothing to do with the actual state machine.
14350f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis *
14450f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * @retval SELABEL_EQUAL if the pattern's binary representations are exactly
14550f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis *                       the same
14650f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * @retval SELABEL_INCOMPARABLE otherwise
14750f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis */
14850f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskisint regex_cmp(struct regex_data *regex1, struct regex_data *regex2) hidden;
14950f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis/**
15050f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * This function takes the error data returned by regex_prepare_data and turns
15150f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * it in to a human readable error message.
15250f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * If the buffer given to hold the error message is to small it truncates the
15350f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * message and indicates the truncation with an ellipsis ("...") at the end of
15450f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * the buffer.
15550f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis *
15650f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * @arg error_data Error data as returned by regex_prepare_data.
15750f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * @arg buffer String buffer to hold the formated error string.
15850f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis * @arg buf_size Total size of the given bufer in bytes.
15950f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis */
16050f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskisvoid regex_format_error(struct regex_error_data const *error_data, char *buffer,
16150f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis			size_t buf_size) hidden;
16250f0910cf05bdc1d10710c7c3fb748a178473387Janis Danisevskis#endif /* SRC_REGEX_H_ */
163