32 # define regfree(preg) __regfree (preg) 33 # define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef) 34 # define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags) 35 # define regerror(errcode, preg, errbuf, errbuf_size) \ 36 __regerror(errcode, preg, errbuf, errbuf_size) 37 # define re_set_registers(bu, re, nu, st, en) \ 38 __re_set_registers (bu, re, nu, st, en) 39 # define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \ 40 __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) 41 # define re_match(bufp, string, size, pos, regs) \ 42 __re_match (bufp, string, size, pos, regs) 43 # define re_search(bufp, string, size, startpos, range, regs) \ 44 __re_search (bufp, string, size, startpos, range, regs) 45 # define re_compile_pattern(pattern, length, bufp) \ 46 __re_compile_pattern (pattern, length, bufp) 47 # define re_set_syntax(syntax) __re_set_syntax (syntax) 48 # define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \ 49 __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop) 50 # define re_compile_fastmap(bufp) __re_compile_fastmap (bufp) 52 # include "../locale/localeinfo.h" 85 #ifndef _REGEX_INTERNAL_H 86 #define _REGEX_INTERNAL_H 1 94 #if defined(__MINGW32_VERSION) || defined(_MSC_VER) 95 #define strcasecmp stricmp 98 #if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC 99 # include <langinfo.h> 101 #if defined HAVE_LOCALE_H || defined _LIBC 104 #if defined HAVE_WCHAR_H || defined _LIBC 107 #if defined HAVE_WCTYPE_H || defined _LIBC 110 #if defined HAVE_STDBOOL_H || defined _LIBC 111 # include <stdbool.h> 113 typedef enum {
false,
true } bool;
115 #if defined HAVE_STDINT_H || defined _LIBC 119 # include <bits/libc-lock.h> 121 # define __libc_lock_define(CLASS,NAME) 122 # define __libc_lock_init(NAME) do { } while (0) 123 # define __libc_lock_lock(NAME) do { } while (0) 124 # define __libc_lock_unlock(NAME) do { } while (0) 128 #if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank 129 # define isblank(ch) ((ch) == ' ' || (ch) == '\t') 133 # ifndef _RE_DEFINE_LOCALE_FUNCTIONS 134 # define _RE_DEFINE_LOCALE_FUNCTIONS 1 135 # include <locale/localeinfo.h> 136 # include <locale/elem-hash.h> 137 # include <locale/coll-lookup.h> 142 #if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC 143 # include <libintl.h> 146 # define gettext(msgid) \ 147 INTUSE(__dcgettext) (_libc_intl_domainname, msgid, LC_MESSAGES) 150 # define gettext(msgid) (msgid) 156 # define gettext_noop(String) String 161 # define SIZE_MAX ((size_t) -1) 164 #if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_WCRTOMB && HAVE_MBRTOWC && HAVE_WCSCOLL) || _LIBC 165 # define RE_ENABLE_I18N 169 # define BE(expr, val) __builtin_expect (expr, val) 171 # define BE(expr, val) (expr) 178 #define COLL_ELEM_LEN_MAX 8 181 #define NEWLINE_CHAR '\n' 182 #define WIDE_NEWLINE_CHAR L'\n' 186 # define __wctype wctype 187 # define __iswctype iswctype 188 # define __btowc btowc 189 # define __mempcpy mempcpy 190 # define __wcrtomb wcrtomb 191 # define __regfree regfree 192 # define attribute_hidden 196 # define __attribute(arg) __attribute__ (arg) 198 # define __attribute(arg) 201 extern const char __re_error_msgid[] attribute_hidden;
202 extern const size_t __re_error_msgid_idx[] attribute_hidden;
206 typedef unsigned long int bitset_word_t;
208 #define BITSET_WORD_MAX ULONG_MAX 210 #define BITSET_WORD_BITS (sizeof (bitset_word_t) * CHAR_BIT) 212 #define BITSET_WORDS (SBC_MAX / BITSET_WORD_BITS) 213 typedef bitset_word_t bitset_t[BITSET_WORDS];
214 typedef bitset_word_t *re_bitset_ptr_t;
215 typedef const bitset_word_t *re_const_bitset_ptr_t;
217 #define bitset_set(set,i) \ 218 (set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS) 219 #define bitset_clear(set,i) \ 220 (set[i / BITSET_WORD_BITS] &= ~((bitset_word_t) 1 << i % BITSET_WORD_BITS)) 221 #define bitset_contain(set,i) \ 222 (set[i / BITSET_WORD_BITS] & ((bitset_word_t) 1 << i % BITSET_WORD_BITS)) 223 #define bitset_empty(set) memset (set, '\0', sizeof (bitset_t)) 224 #define bitset_set_all(set) memset (set, '\xff', sizeof (bitset_t)) 225 #define bitset_copy(dest,src) memcpy (dest, src, sizeof (bitset_t)) 227 #define PREV_WORD_CONSTRAINT 0x0001 228 #define PREV_NOTWORD_CONSTRAINT 0x0002 229 #define NEXT_WORD_CONSTRAINT 0x0004 230 #define NEXT_NOTWORD_CONSTRAINT 0x0008 231 #define PREV_NEWLINE_CONSTRAINT 0x0010 232 #define NEXT_NEWLINE_CONSTRAINT 0x0020 233 #define PREV_BEGBUF_CONSTRAINT 0x0040 234 #define NEXT_ENDBUF_CONSTRAINT 0x0080 235 #define WORD_DELIM_CONSTRAINT 0x0100 236 #define NOT_WORD_DELIM_CONSTRAINT 0x0200 240 INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
241 WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
242 WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
243 INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
244 LINE_FIRST = PREV_NEWLINE_CONSTRAINT,
245 LINE_LAST = NEXT_NEWLINE_CONSTRAINT,
246 BUF_FIRST = PREV_BEGBUF_CONSTRAINT,
247 BUF_LAST = NEXT_ENDBUF_CONSTRAINT,
248 WORD_DELIM = WORD_DELIM_CONSTRAINT,
249 NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT
269 #ifdef RE_ENABLE_I18N 276 #define EPSILON_BIT 8 277 OP_OPEN_SUBEXP = EPSILON_BIT | 0,
278 OP_CLOSE_SUBEXP = EPSILON_BIT | 1,
279 OP_ALT = EPSILON_BIT | 2,
280 OP_DUP_ASTERISK = EPSILON_BIT | 3,
281 ANCHOR = EPSILON_BIT | 4,
299 OP_CLOSE_EQUIV_CLASS,
310 #ifdef RE_ENABLE_I18N 331 wchar_t *range_starts;
336 wctype_t *char_classes;
339 unsigned int non_match : 1;
363 re_bitset_ptr_t sbcset;
364 #ifdef RE_ENABLE_I18N 365 re_charset_t *mbcset;
368 re_context_type ctx_type;
371 re_token_type_t
type : 8;
373 re_token_type_t
type;
375 unsigned int constraint : 10;
376 unsigned int duplicated : 1;
377 unsigned int opt_subexp : 1;
378 #ifdef RE_ENABLE_I18N 379 unsigned int accept_mb : 1;
382 unsigned int mb_partial : 1;
384 unsigned int word_char : 1;
387 #define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT) 393 const unsigned char *raw_mbs;
398 #ifdef RE_ENABLE_I18N 429 unsigned int tip_context;
433 re_const_bitset_ptr_t word_char;
436 unsigned char is_utf8;
437 unsigned char map_notascii;
438 unsigned char mbs_allocated;
439 unsigned char offsets_needed;
440 unsigned char newline_anchor;
441 unsigned char word_ops_used;
444 typedef struct re_string_t re_string_t;
448 typedef struct re_dfa_t re_dfa_t;
452 # define internal_function __attribute ((regparm (3), stdcall)) 454 # define internal_function 458 static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
461 #ifdef RE_ENABLE_I18N 462 static void build_wcs_buffer (re_string_t *pstr) internal_function;
463 static int build_wcs_upper_buffer (re_string_t *pstr) internal_function;
465 static void build_upper_buffer (re_string_t *pstr) internal_function;
466 static void re_string_translate_buffer (re_string_t *pstr) internal_function;
467 static unsigned int re_string_context_at (
const re_string_t *input,
int idx,
469 internal_function __attribute ((pure));
470 #define re_string_peek_byte(pstr, offset) \ 471 ((pstr)->mbs[(pstr)->cur_idx + offset]) 472 #define re_string_fetch_byte(pstr) \ 473 ((pstr)->mbs[(pstr)->cur_idx++]) 474 #define re_string_first_byte(pstr, idx) \ 475 ((idx) == (pstr)->valid_len || (pstr)->wcs[idx] != WEOF) 476 #define re_string_is_single_byte_char(pstr, idx) \ 477 ((pstr)->wcs[idx] != WEOF && ((pstr)->valid_len == (idx) + 1 \ 478 || (pstr)->wcs[(idx) + 1] != WEOF)) 479 #define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx) 480 #define re_string_cur_idx(pstr) ((pstr)->cur_idx) 481 #define re_string_get_buffer(pstr) ((pstr)->mbs) 482 #define re_string_length(pstr) ((pstr)->len) 483 #define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx]) 484 #define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx)) 485 #define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx)) 488 # define alloca(size) __builtin_alloca (size) 489 # define HAVE_ALLOCA 1 490 #elif defined(_MSC_VER) 492 # define alloca _alloca 493 # define HAVE_ALLOCA 1 504 # define __libc_use_alloca(n) ((n) < 4032) 507 # define __libc_use_alloca(n) 0 511 #define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t))) 512 #define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t))) 513 #define re_free(p) free (p) 517 struct bin_tree_t *parent;
518 struct bin_tree_t *
left;
519 struct bin_tree_t *
right;
520 struct bin_tree_t *
first;
521 struct bin_tree_t *next;
529 typedef struct bin_tree_t bin_tree_t;
531 #define BIN_TREE_STORAGE_SIZE \ 532 ((1024 - sizeof (void *)) / sizeof (bin_tree_t)) 534 struct bin_tree_storage_t
536 struct bin_tree_storage_t *next;
537 bin_tree_t
data[BIN_TREE_STORAGE_SIZE];
539 typedef struct bin_tree_storage_t bin_tree_storage_t;
541 #define CONTEXT_WORD 1 542 #define CONTEXT_NEWLINE (CONTEXT_WORD << 1) 543 #define CONTEXT_BEGBUF (CONTEXT_NEWLINE << 1) 544 #define CONTEXT_ENDBUF (CONTEXT_BEGBUF << 1) 546 #define IS_WORD_CONTEXT(c) ((c) & CONTEXT_WORD) 547 #define IS_NEWLINE_CONTEXT(c) ((c) & CONTEXT_NEWLINE) 548 #define IS_BEGBUF_CONTEXT(c) ((c) & CONTEXT_BEGBUF) 549 #define IS_ENDBUF_CONTEXT(c) ((c) & CONTEXT_ENDBUF) 550 #define IS_ORDINARY_CONTEXT(c) ((c) == 0) 552 #define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_') 553 #define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR) 554 #define IS_WIDE_WORD_CHAR(ch) (iswalnum (ch) || (ch) == L'_') 555 #define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR) 557 #define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \ 558 ((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \ 559 || ((constraint & PREV_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \ 560 || ((constraint & PREV_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context))\ 561 || ((constraint & PREV_BEGBUF_CONSTRAINT) && !IS_BEGBUF_CONTEXT (context))) 563 #define NOT_SATISFY_NEXT_CONSTRAINT(constraint,context) \ 564 ((((constraint) & NEXT_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \ 565 || (((constraint) & NEXT_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \ 566 || (((constraint) & NEXT_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context)) \ 567 || (((constraint) & NEXT_ENDBUF_CONSTRAINT) && !IS_ENDBUF_CONTEXT (context))) 573 re_node_set non_eps_nodes;
574 re_node_set inveclosure;
575 re_node_set *entrance_nodes;
576 struct re_dfastate_t **trtable, **word_trtable;
577 unsigned int context : 4;
578 unsigned int halt : 1;
582 unsigned int accept_mb : 1;
584 unsigned int has_backref : 1;
585 unsigned int has_constraint : 1;
587 typedef struct re_dfastate_t re_dfastate_t;
589 struct re_state_table_entry
593 re_dfastate_t **
array;
602 re_dfastate_t **
array;
612 } re_sub_match_last_t;
625 re_sub_match_last_t **lasts;
626 } re_sub_match_top_t;
628 struct re_backref_cache_entry
636 unsigned short int eps_reachable_subexps_map;
643 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) 644 const re_dfa_t *
const dfa;
654 re_dfastate_t **state_log;
659 struct re_backref_cache_entry *bkref_ents;
663 re_sub_match_top_t **sub_tops;
664 } re_match_context_t;
668 re_dfastate_t **sifted_states;
669 re_dfastate_t **limited_states;
675 struct re_fail_stack_ent_t
680 re_node_set eps_via_nodes;
683 struct re_fail_stack_t
687 struct re_fail_stack_ent_t *
stack;
698 re_node_set *eclosures;
699 re_node_set *inveclosures;
700 struct re_state_table_entry *state_table;
701 re_dfastate_t *init_state;
702 re_dfastate_t *init_state_word;
703 re_dfastate_t *init_state_nl;
704 re_dfastate_t *init_state_begbuf;
705 bin_tree_t *str_tree;
706 bin_tree_storage_t *str_tree_storage;
707 re_bitset_ptr_t sb_char;
708 int str_tree_storage_idx;
711 unsigned int state_hash_mask;
716 bitset_word_t used_bkref_map;
717 bitset_word_t completed_bkref_map;
719 unsigned int has_plural_match : 1;
723 unsigned int has_mb_node : 1;
724 unsigned int is_utf8 : 1;
725 unsigned int map_notascii : 1;
726 unsigned int word_ops_used : 1;
734 __libc_lock_define (, lock)
737 #define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set)) 738 #define re_node_set_remove(set,id) \ 739 (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1)) 740 #define re_node_set_empty(p) ((p)->nelem = 0) 741 #define re_node_set_free(set) re_free ((set)->elems) 755 bracket_elem_type
type;
767 bitset_not (bitset_t
set)
770 for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
771 set[bitset_i] = ~
set[bitset_i];
775 bitset_merge (bitset_t dest,
const bitset_t src)
778 for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
779 dest[bitset_i] |= src[bitset_i];
783 bitset_mask (bitset_t dest,
const bitset_t src)
786 for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
787 dest[bitset_i] &= src[bitset_i];
790 #ifdef RE_ENABLE_I18N 793 internal_function __attribute ((pure))
794 re_string_char_size_at (
const re_string_t *pstr,
int idx)
797 if (pstr->mb_cur_max == 1)
799 for (byte_idx = 1; idx + byte_idx < pstr->valid_len; ++byte_idx)
800 if (pstr->wcs[idx + byte_idx] != WEOF)
806 internal_function __attribute ((pure))
807 re_string_wchar_at (
const re_string_t *pstr,
int idx)
809 if (pstr->mb_cur_max == 1)
810 return (wint_t) pstr->mbs[idx];
811 return (wint_t) pstr->wcs[idx];
815 internal_function __attribute ((pure))
816 re_string_elem_size_at (
const re_string_t *pstr,
int idx)
819 const unsigned char *
p, *extra;
822 # include <locale/weight.h> 823 uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
827 table = (
const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
828 extra = (
const unsigned char *)
829 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
830 indirect = (
const int32_t *) _NL_CURRENT (LC_COLLATE,
831 _NL_COLLATE_INDIRECTMB);
834 return p - pstr->mbs - idx;
871 static void re_string_construct_common (
const char *
str,
int len,
874 const re_dfa_t *dfa) internal_function;
875 static re_dfastate_t *create_ci_newstate (
const re_dfa_t *dfa,
876 const re_node_set *
nodes,
877 unsigned int hash) internal_function;
878 static re_dfastate_t *create_cd_newstate (
const re_dfa_t *dfa,
879 const re_node_set *
nodes,
880 unsigned int context,
881 unsigned int hash) internal_function;
890 re_string_allocate (re_string_t *pstr,
const char *
str,
int len,
int init_len,
897 if (init_len < dfa->mb_cur_max)
898 init_len = dfa->mb_cur_max;
899 init_buf_len = (
len + 1 < init_len) ?
len + 1: init_len;
900 re_string_construct_common (
str,
len, pstr,
trans, icase, dfa);
902 ret = re_string_realloc_buffers (pstr, init_buf_len);
906 pstr->word_char = dfa->word_char;
907 pstr->word_ops_used = dfa->word_ops_used;
908 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (
unsigned char *)
str;
909 pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 :
len;
910 pstr->valid_raw_len = pstr->valid_len;
918 re_string_construct (re_string_t *pstr,
const char *
str,
int len,
922 memset (pstr,
'\0',
sizeof (re_string_t));
923 re_string_construct_common (
str,
len, pstr,
trans, icase, dfa);
927 ret = re_string_realloc_buffers (pstr,
len + 1);
931 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (
unsigned char *)
str;
935 #ifdef RE_ENABLE_I18N 936 if (dfa->mb_cur_max > 1)
940 ret = build_wcs_upper_buffer (pstr);
943 if (pstr->valid_raw_len >=
len)
945 if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
947 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
954 build_upper_buffer (pstr);
958 #ifdef RE_ENABLE_I18N 959 if (dfa->mb_cur_max > 1)
960 build_wcs_buffer (pstr);
965 re_string_translate_buffer (pstr);
968 pstr->valid_len = pstr->bufs_len;
969 pstr->valid_raw_len = pstr->bufs_len;
981 re_string_realloc_buffers (re_string_t *pstr,
int new_buf_len)
983 #ifdef RE_ENABLE_I18N 984 if (pstr->mb_cur_max > 1)
986 wint_t *new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
987 if (BE (new_wcs ==
NULL, 0))
990 if (pstr->offsets !=
NULL)
992 int *new_offsets = re_realloc (pstr->offsets,
int, new_buf_len);
993 if (BE (new_offsets ==
NULL, 0))
995 pstr->offsets = new_offsets;
999 if (pstr->mbs_allocated)
1001 unsigned char *new_mbs = re_realloc (pstr->mbs,
unsigned char,
1003 if (BE (new_mbs ==
NULL, 0))
1005 pstr->mbs = new_mbs;
1007 pstr->bufs_len = new_buf_len;
1014 re_string_construct_common (
const char *
str,
int len, re_string_t *pstr,
1016 const re_dfa_t *dfa)
1018 pstr->raw_mbs = (
const unsigned char *)
str;
1020 pstr->raw_len =
len;
1021 pstr->trans =
trans;
1022 pstr->icase = icase ? 1 : 0;
1023 pstr->mbs_allocated = (
trans !=
NULL || icase);
1024 pstr->mb_cur_max = dfa->mb_cur_max;
1025 pstr->is_utf8 = dfa->is_utf8;
1026 pstr->map_notascii = dfa->map_notascii;
1027 pstr->stop = pstr->len;
1028 pstr->raw_stop = pstr->stop;
1031 #ifdef RE_ENABLE_I18N 1046 build_wcs_buffer (re_string_t *pstr)
1049 unsigned char buf[MB_LEN_MAX];
1050 assert (MB_LEN_MAX >= pstr->mb_cur_max);
1052 unsigned char buf[64];
1055 int byte_idx, end_idx, remain_len;
1060 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
1061 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
1066 remain_len = end_idx - byte_idx;
1067 prev_st = pstr->cur_state;
1069 if (BE (pstr->trans !=
NULL, 0))
1073 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++
i)
1075 ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx +
i];
1076 buf[
i] = pstr->mbs[byte_idx +
i] = pstr->trans[ch];
1078 p = (
const char *) buf;
1081 p = (
const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
1082 mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
1083 if (BE (mbclen == (
size_t) -2, 0))
1086 pstr->cur_state = prev_st;
1089 else if (BE (mbclen == (
size_t) -1 || mbclen == 0, 0))
1093 wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
1094 if (BE (pstr->trans !=
NULL, 0))
1095 wc = pstr->trans[wc];
1096 pstr->cur_state = prev_st;
1100 pstr->wcs[byte_idx++] = wc;
1102 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
1103 pstr->wcs[byte_idx++] = WEOF;
1105 pstr->valid_len = byte_idx;
1106 pstr->valid_raw_len = byte_idx;
1114 build_wcs_upper_buffer (re_string_t *pstr)
1117 int src_idx, byte_idx, end_idx, remain_len;
1120 char buf[MB_LEN_MAX];
1121 assert (MB_LEN_MAX >= pstr->mb_cur_max);
1126 byte_idx = pstr->valid_len;
1127 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
1131 if (! pstr->map_notascii && pstr->trans ==
NULL && !pstr->offsets_needed)
1133 while (byte_idx < end_idx)
1137 if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
1138 && mbsinit (&pstr->cur_state))
1142 = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
1145 pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
1150 remain_len = end_idx - byte_idx;
1151 prev_st = pstr->cur_state;
1152 mbclen = mbrtowc (&wc,
1153 ((
const char *) pstr->raw_mbs + pstr->raw_mbs_idx
1154 + byte_idx), remain_len, &pstr->cur_state);
1155 if (BE (mbclen + 2 > 2, 1))
1162 wcu = towupper (wc);
1163 mbcdlen = wcrtomb (buf, wcu, &prev_st);
1164 if (BE (mbclen == mbcdlen, 1))
1165 memcpy (pstr->mbs + byte_idx, buf, mbclen);
1169 goto offsets_needed;
1173 memcpy (pstr->mbs + byte_idx,
1174 pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
1175 pstr->wcs[byte_idx++] = wcu;
1177 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
1178 pstr->wcs[byte_idx++] = WEOF;
1180 else if (mbclen == (
size_t) -1 || mbclen == 0)
1183 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
1184 pstr->mbs[byte_idx] = ch;
1186 pstr->wcs[byte_idx++] = (wchar_t) ch;
1187 if (BE (mbclen == (
size_t) -1, 0))
1188 pstr->cur_state = prev_st;
1193 pstr->cur_state = prev_st;
1197 pstr->valid_len = byte_idx;
1198 pstr->valid_raw_len = byte_idx;
1202 for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
1207 remain_len = end_idx - byte_idx;
1208 prev_st = pstr->cur_state;
1209 if (BE (pstr->trans !=
NULL, 0))
1213 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++
i)
1215 ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx +
i];
1216 buf[
i] = pstr->trans[ch];
1218 p = (
const char *) buf;
1221 p = (
const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
1222 mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
1223 if (BE (mbclen + 2 > 2, 1))
1230 wcu = towupper (wc);
1231 mbcdlen = wcrtomb ((
char *) buf, wcu, &prev_st);
1232 if (BE (mbclen == mbcdlen, 1))
1233 memcpy (pstr->mbs + byte_idx, buf, mbclen);
1234 else if (mbcdlen != (
size_t) -1)
1238 if (byte_idx + mbcdlen > pstr->bufs_len)
1240 pstr->cur_state = prev_st;
1244 if (pstr->offsets ==
NULL)
1246 pstr->offsets = re_malloc (
int, pstr->bufs_len);
1248 if (pstr->offsets ==
NULL)
1251 if (!pstr->offsets_needed)
1253 for (i = 0; i < (
size_t) byte_idx; ++
i)
1254 pstr->offsets[i] = i;
1255 pstr->offsets_needed = 1;
1258 memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
1259 pstr->wcs[byte_idx] = wcu;
1260 pstr->offsets[byte_idx] = src_idx;
1261 for (i = 1; i < mbcdlen; ++
i)
1263 pstr->offsets[byte_idx +
i]
1264 = src_idx + (i < mbclen ?
i : mbclen - 1);
1265 pstr->wcs[byte_idx +
i] = WEOF;
1267 pstr->len += mbcdlen - mbclen;
1268 if (pstr->raw_stop > src_idx)
1269 pstr->stop += mbcdlen - mbclen;
1270 end_idx = (pstr->bufs_len > pstr->len)
1271 ? pstr->len : pstr->bufs_len;
1272 byte_idx += mbcdlen;
1277 memcpy (pstr->mbs + byte_idx, p, mbclen);
1280 memcpy (pstr->mbs + byte_idx, p, mbclen);
1282 if (BE (pstr->offsets_needed != 0, 0))
1285 for (i = 0; i < mbclen; ++
i)
1286 pstr->offsets[byte_idx + i] = src_idx + i;
1290 pstr->wcs[byte_idx++] = wcu;
1292 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
1293 pstr->wcs[byte_idx++] = WEOF;
1295 else if (mbclen == (
size_t) -1 || mbclen == 0)
1298 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
1300 if (BE (pstr->trans !=
NULL, 0))
1301 ch = pstr->trans [ch];
1302 pstr->mbs[byte_idx] = ch;
1304 if (BE (pstr->offsets_needed != 0, 0))
1305 pstr->offsets[byte_idx] = src_idx;
1309 pstr->wcs[byte_idx++] = (wchar_t) ch;
1310 if (BE (mbclen == (
size_t) -1, 0))
1311 pstr->cur_state = prev_st;
1316 pstr->cur_state = prev_st;
1320 pstr->valid_len = byte_idx;
1321 pstr->valid_raw_len = src_idx;
1330 re_string_skip_chars (re_string_t *pstr,
int new_raw_idx, wint_t *last_wc)
1338 for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
1339 rawbuf_idx < new_raw_idx;)
1342 remain_len = pstr->len - rawbuf_idx;
1343 prev_st = pstr->cur_state;
1344 mbclen = mbrtowc (&wc, (
const char *) pstr->raw_mbs + rawbuf_idx,
1345 remain_len, &pstr->cur_state);
1346 if (BE (mbclen == (
size_t) -2 || mbclen == (
size_t) -1 || mbclen == 0, 0))
1349 if (mbclen == 0 || remain_len == 0)
1352 wc = *(
unsigned char *) (pstr->raw_mbs + rawbuf_idx);
1354 pstr->cur_state = prev_st;
1357 rawbuf_idx += mbclen;
1359 *last_wc = (wint_t) wc;
1369 build_upper_buffer (re_string_t *pstr)
1371 int char_idx, end_idx;
1372 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
1374 for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
1376 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
1377 if (BE (pstr->trans !=
NULL, 0))
1378 ch = pstr->trans[ch];
1380 pstr->mbs[char_idx] = toupper (ch);
1382 pstr->mbs[char_idx] = ch;
1384 pstr->valid_len = char_idx;
1385 pstr->valid_raw_len = char_idx;
1392 re_string_translate_buffer (re_string_t *pstr)
1394 int buf_idx, end_idx;
1395 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
1397 for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
1399 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
1400 pstr->mbs[buf_idx] = pstr->trans[ch];
1403 pstr->valid_len = buf_idx;
1404 pstr->valid_raw_len = buf_idx;
1413 re_string_reconstruct (re_string_t *pstr,
int idx,
int eflags)
1415 int offset = idx - pstr->raw_mbs_idx;
1416 if (BE (offset < 0, 0))
1419 #ifdef RE_ENABLE_I18N 1420 if (pstr->mb_cur_max > 1)
1421 memset (&pstr->cur_state,
'\0', sizeof (mbstate_t));
1423 pstr->len = pstr->raw_len;
1424 pstr->stop = pstr->raw_stop;
1425 pstr->valid_len = 0;
1426 pstr->raw_mbs_idx = 0;
1427 pstr->valid_raw_len = 0;
1428 pstr->offsets_needed = 0;
1429 pstr->tip_context = ((eflags &
REG_NOTBOL) ? CONTEXT_BEGBUF
1430 : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
1431 if (!pstr->mbs_allocated)
1432 pstr->mbs = (
unsigned char *) pstr->raw_mbs;
1436 if (BE (offset != 0, 1))
1439 if (BE (offset < pstr->valid_raw_len, 1))
1442 #ifdef RE_ENABLE_I18N 1443 if (BE (pstr->offsets_needed, 0))
1445 int low = 0, high = pstr->valid_len, mid;
1448 mid = (high + low) / 2;
1449 if (pstr->offsets[mid] > offset)
1451 else if (pstr->offsets[mid] < offset)
1457 if (pstr->offsets[mid] < offset)
1459 pstr->tip_context = re_string_context_at (pstr, mid - 1,
1465 if (pstr->valid_len > offset
1466 && mid == offset && pstr->offsets[mid] == offset)
1468 memmove (pstr->wcs, pstr->wcs + offset,
1469 (pstr->valid_len - offset) * sizeof (wint_t));
1470 memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
1471 pstr->valid_len -=
offset;
1472 pstr->valid_raw_len -=
offset;
1473 for (low = 0; low < pstr->valid_len; low++)
1474 pstr->offsets[low] = pstr->offsets[low + offset] - offset;
1480 pstr->len = pstr->raw_len - idx +
offset;
1481 pstr->stop = pstr->raw_stop - idx +
offset;
1482 pstr->offsets_needed = 0;
1483 while (mid > 0 && pstr->offsets[mid - 1] == offset)
1485 while (mid < pstr->valid_len)
1486 if (pstr->wcs[mid] != WEOF)
1490 if (mid == pstr->valid_len)
1491 pstr->valid_len = 0;
1494 pstr->valid_len = pstr->offsets[mid] -
offset;
1495 if (pstr->valid_len)
1497 for (low = 0; low < pstr->valid_len; ++low)
1498 pstr->wcs[low] = WEOF;
1499 memset (pstr->mbs, 255, pstr->valid_len);
1502 pstr->valid_raw_len = pstr->valid_len;
1508 pstr->tip_context = re_string_context_at (pstr, offset - 1,
1510 #ifdef RE_ENABLE_I18N 1511 if (pstr->mb_cur_max > 1)
1512 memmove (pstr->wcs, pstr->wcs + offset,
1513 (pstr->valid_len - offset) * sizeof (wint_t));
1515 if (BE (pstr->mbs_allocated, 0))
1516 memmove (pstr->mbs, pstr->mbs + offset,
1517 pstr->valid_len - offset);
1518 pstr->valid_len -=
offset;
1519 pstr->valid_raw_len -=
offset;
1521 assert (pstr->valid_len > 0);
1528 int prev_valid_len = pstr->valid_len;
1530 #ifdef RE_ENABLE_I18N 1531 if (BE (pstr->offsets_needed, 0))
1533 pstr->len = pstr->raw_len - idx +
offset;
1534 pstr->stop = pstr->raw_stop - idx +
offset;
1535 pstr->offsets_needed = 0;
1538 pstr->valid_len = 0;
1539 #ifdef RE_ENABLE_I18N 1540 if (pstr->mb_cur_max > 1)
1547 const unsigned char *raw, *
p, *
q, *
end;
1551 raw = pstr->raw_mbs + pstr->raw_mbs_idx;
1552 end = raw + (offset - pstr->mb_cur_max);
1553 if (end < pstr->raw_mbs)
1554 end = pstr->raw_mbs;
1555 p = raw + offset - 1;
1559 if (isascii (*p) && BE (pstr->trans ==
NULL, 1))
1561 memset (&pstr->cur_state,
'\0', sizeof (mbstate_t));
1567 for (; p >=
end; --
p)
1568 if ((*p & 0xc0) != 0x80)
1570 mbstate_t cur_state;
1572 int mlen = raw + pstr->len -
p;
1573 unsigned char buf[6];
1577 if (BE (pstr->trans !=
NULL, 0))
1579 int i = mlen < 6 ? mlen : 6;
1581 buf[
i] = pstr->trans[p[
i]];
1586 memset (&cur_state, 0,
sizeof (cur_state));
1587 mbclen = mbrtowc (&wc2, (
const char *) p, mlen,
1589 if (raw + offset - p <= mbclen
1590 && mbclen < (
size_t) -2)
1592 memset (&pstr->cur_state,
'\0',
1593 sizeof (mbstate_t));
1594 pstr->valid_len = mbclen - (raw + offset -
p);
1602 pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
1605 = re_string_context_at (pstr, prev_valid_len - 1, eflags);
1607 pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
1608 && IS_WIDE_WORD_CHAR (wc))
1610 : ((IS_WIDE_NEWLINE (wc)
1611 && pstr->newline_anchor)
1612 ? CONTEXT_NEWLINE : 0));
1613 if (BE (pstr->valid_len, 0))
1615 for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
1616 pstr->wcs[wcs_idx] = WEOF;
1617 if (pstr->mbs_allocated)
1618 memset (pstr->mbs, 255, pstr->valid_len);
1620 pstr->valid_raw_len = pstr->valid_len;
1625 int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
1626 pstr->valid_raw_len = 0;
1629 pstr->tip_context = (bitset_contain (pstr->word_char, c)
1631 : ((IS_NEWLINE (c) && pstr->newline_anchor)
1632 ? CONTEXT_NEWLINE : 0));
1635 if (!BE (pstr->mbs_allocated, 0))
1636 pstr->mbs += offset;
1638 pstr->raw_mbs_idx = idx;
1643 #ifdef RE_ENABLE_I18N 1644 if (pstr->mb_cur_max > 1)
1653 build_wcs_buffer (pstr);
1657 if (BE (pstr->mbs_allocated, 0))
1660 build_upper_buffer (pstr);
1661 else if (pstr->trans !=
NULL)
1662 re_string_translate_buffer (pstr);
1665 pstr->valid_len = pstr->len;
1671 static unsigned char 1672 internal_function __attribute ((pure))
1673 re_string_peek_byte_case (
const re_string_t *pstr,
int idx)
1678 if (BE (!pstr->mbs_allocated, 1))
1679 return re_string_peek_byte (pstr, idx);
1681 #ifdef RE_ENABLE_I18N 1682 if (pstr->mb_cur_max > 1
1683 && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
1684 return re_string_peek_byte (pstr, idx);
1687 off = pstr->cur_idx + idx;
1688 #ifdef RE_ENABLE_I18N 1689 if (pstr->offsets_needed)
1690 off = pstr->offsets[off];
1693 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
1695 #ifdef RE_ENABLE_I18N 1700 if (pstr->offsets_needed && !isascii (ch))
1701 return re_string_peek_byte (pstr, idx);
1707 static unsigned char 1708 internal_function __attribute ((pure))
1709 re_string_fetch_byte_case (re_string_t *pstr)
1711 if (BE (!pstr->mbs_allocated, 1))
1712 return re_string_fetch_byte (pstr);
1714 #ifdef RE_ENABLE_I18N 1715 if (pstr->offsets_needed)
1726 if (!re_string_first_byte (pstr, pstr->cur_idx))
1727 return re_string_fetch_byte (pstr);
1729 off = pstr->offsets[pstr->cur_idx];
1730 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
1733 return re_string_fetch_byte (pstr);
1735 re_string_skip_bytes (pstr,
1736 re_string_char_size_at (pstr, pstr->cur_idx));
1741 return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
1746 re_string_destruct (re_string_t *pstr)
1748 #ifdef RE_ENABLE_I18N 1749 re_free (pstr->wcs);
1750 re_free (pstr->offsets);
1752 if (pstr->mbs_allocated)
1753 re_free (pstr->mbs);
1760 re_string_context_at (
const re_string_t *input,
int idx,
int eflags)
1763 if (BE (idx < 0, 0))
1766 return input->tip_context;
1767 if (BE (idx == input->len, 0))
1768 return ((eflags &
REG_NOTEOL) ? CONTEXT_ENDBUF
1769 : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
1770 #ifdef RE_ENABLE_I18N 1771 if (input->mb_cur_max > 1)
1775 while(input->wcs[wc_idx] == WEOF)
1779 assert (wc_idx >= 0);
1783 return input->tip_context;
1785 wc = input->wcs[wc_idx];
1786 if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
1787 return CONTEXT_WORD;
1788 return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
1789 ? CONTEXT_NEWLINE : 0);
1794 c = re_string_byte_at (input, idx);
1795 if (bitset_contain (input->word_char, c))
1796 return CONTEXT_WORD;
1797 return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
1805 re_node_set_alloc (re_node_set *
set,
int size)
1809 set->elems = re_malloc (
int, size);
1810 if (BE (
set->elems ==
NULL, 0))
1817 re_node_set_init_1 (re_node_set *
set,
int elem)
1821 set->elems = re_malloc (
int, 1);
1822 if (BE (
set->elems ==
NULL, 0))
1824 set->alloc =
set->nelem = 0;
1827 set->elems[0] = elem;
1833 re_node_set_init_2 (re_node_set *
set,
int elem1,
int elem2)
1836 set->elems = re_malloc (
int, 2);
1837 if (BE (
set->elems ==
NULL, 0))
1842 set->elems[0] = elem1;
1849 set->elems[0] = elem1;
1850 set->elems[1] = elem2;
1854 set->elems[0] = elem2;
1855 set->elems[1] = elem1;
1863 re_node_set_init_copy (re_node_set *dest,
const re_node_set *src)
1865 dest->nelem = src->nelem;
1868 dest->alloc = dest->nelem;
1869 dest->elems = re_malloc (
int, dest->alloc);
1870 if (BE (dest->elems ==
NULL, 0))
1872 dest->alloc = dest->nelem = 0;
1875 memcpy (dest->elems, src->elems, src->nelem * sizeof (
int));
1878 re_node_set_init_empty (dest);
1888 re_node_set_add_intersect (re_node_set *dest,
const re_node_set *src1,
1889 const re_node_set *src2)
1891 int i1, i2, is,
id,
delta, sbase;
1892 if (src1->nelem == 0 || src2->nelem == 0)
1897 if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
1899 int new_alloc = src1->nelem + src2->nelem + dest->alloc;
1900 int *new_elems = re_realloc (dest->elems,
int, new_alloc);
1901 if (BE (new_elems ==
NULL, 0))
1903 dest->elems = new_elems;
1904 dest->alloc = new_alloc;
1909 sbase = dest->nelem + src1->nelem + src2->nelem;
1910 i1 = src1->nelem - 1;
1911 i2 = src2->nelem - 1;
1912 id = dest->nelem - 1;
1915 if (src1->elems[i1] == src2->elems[i2])
1918 while (
id >= 0 && dest->elems[
id] > src1->elems[i1])
1921 if (id < 0 || dest->elems[
id] != src1->elems[i1])
1922 dest->elems[--sbase] = src1->elems[i1];
1924 if (--i1 < 0 || --i2 < 0)
1929 else if (src1->elems[i1] < src2->elems[i2])
1941 id = dest->nelem - 1;
1942 is = dest->nelem + src1->nelem + src2->nelem - 1;
1943 delta = is - sbase + 1;
1948 dest->nelem +=
delta;
1949 if (delta > 0 &&
id >= 0)
1952 if (dest->elems[is] > dest->elems[
id])
1955 dest->elems[
id + delta--] = dest->elems[is--];
1962 dest->elems[
id +
delta] = dest->elems[
id];
1969 memcpy (dest->elems, dest->elems + sbase, delta * sizeof (
int));
1979 re_node_set_init_union (re_node_set *dest,
const re_node_set *src1,
1980 const re_node_set *src2)
1983 if (src1 !=
NULL && src1->nelem > 0 && src2 !=
NULL && src2->nelem > 0)
1985 dest->alloc = src1->nelem + src2->nelem;
1986 dest->elems = re_malloc (
int, dest->alloc);
1987 if (BE (dest->elems ==
NULL, 0))
1992 if (src1 !=
NULL && src1->nelem > 0)
1993 return re_node_set_init_copy (dest, src1);
1994 else if (src2 !=
NULL && src2->nelem > 0)
1995 return re_node_set_init_copy (dest, src2);
1997 re_node_set_init_empty (dest);
2000 for (i1 = i2 =
id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
2002 if (src1->elems[i1] > src2->elems[i2])
2004 dest->elems[
id++] = src2->elems[i2++];
2007 if (src1->elems[i1] == src2->elems[i2])
2009 dest->elems[
id++] = src1->elems[i1++];
2011 if (i1 < src1->nelem)
2013 memcpy (dest->elems +
id, src1->elems + i1,
2014 (src1->nelem - i1) * sizeof (
int));
2015 id += src1->nelem - i1;
2017 else if (i2 < src2->nelem)
2019 memcpy (dest->elems +
id, src2->elems + i2,
2020 (src2->nelem - i2) * sizeof (
int));
2021 id += src2->nelem - i2;
2032 re_node_set_merge (re_node_set *dest,
const re_node_set *src)
2035 if (src ==
NULL || src->nelem == 0)
2037 if (dest->alloc < 2 * src->nelem + dest->nelem)
2039 int new_alloc = 2 * (src->nelem + dest->alloc);
2040 int *new_buffer = re_realloc (dest->elems,
int, new_alloc);
2041 if (BE (new_buffer ==
NULL, 0))
2043 dest->elems = new_buffer;
2044 dest->alloc = new_alloc;
2047 if (BE (dest->nelem == 0, 0))
2049 dest->nelem = src->nelem;
2050 memcpy (dest->elems, src->elems, src->nelem * sizeof (
int));
2056 for (sbase = dest->nelem + 2 * src->nelem,
2057 is = src->nelem - 1,
id = dest->nelem - 1; is >= 0 &&
id >= 0; )
2059 if (dest->elems[
id] == src->elems[is])
2061 else if (dest->elems[
id] < src->elems[is])
2062 dest->elems[--sbase] = src->elems[is--];
2071 memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (
int));
2074 id = dest->nelem - 1;
2075 is = dest->nelem + 2 * src->nelem - 1;
2076 delta = is - sbase + 1;
2082 dest->nelem +=
delta;
2085 if (dest->elems[is] > dest->elems[
id])
2088 dest->elems[
id + delta--] = dest->elems[is--];
2095 dest->elems[
id +
delta] = dest->elems[
id];
2099 memcpy (dest->elems, dest->elems + sbase,
2100 delta * sizeof (
int));
2115 re_node_set_insert (re_node_set *
set,
int elem)
2119 if (
set->alloc == 0)
2121 if (BE (re_node_set_init_1 (
set, elem) ==
REG_NOERROR, 1))
2127 if (BE (
set->nelem, 0) == 0)
2130 set->elems[0] = elem;
2136 if (
set->alloc ==
set->nelem)
2139 set->alloc =
set->alloc * 2;
2140 new_elems = re_realloc (
set->elems,
int,
set->alloc);
2141 if (BE (new_elems ==
NULL, 0))
2143 set->elems = new_elems;
2148 if (elem < set->elems[0])
2151 for (idx =
set->nelem; idx > 0; idx--)
2152 set->elems[idx] =
set->elems[idx - 1];
2156 for (idx =
set->nelem;
set->elems[idx - 1] > elem; idx--)
2157 set->elems[idx] =
set->elems[idx - 1];
2161 set->elems[idx] = elem;
2172 re_node_set_insert_last (re_node_set *
set,
int elem)
2175 if (
set->alloc ==
set->nelem)
2178 set->alloc = (
set->alloc + 1) * 2;
2179 new_elems = re_realloc (
set->elems,
int,
set->alloc);
2180 if (BE (new_elems ==
NULL, 0))
2182 set->elems = new_elems;
2186 set->elems[
set->nelem++] = elem;
2194 internal_function __attribute ((pure))
2195 re_node_set_compare (
const re_node_set *set1,
const re_node_set *set2)
2198 if (set1 ==
NULL || set2 ==
NULL || set1->nelem != set2->nelem)
2200 for (i = set1->nelem ; --i >= 0 ; )
2201 if (set1->elems[i] != set2->elems[i])
2209 internal_function __attribute ((pure))
2210 re_node_set_contains (
const re_node_set *
set,
int elem)
2212 unsigned int idx,
right, mid;
2213 if (
set->nelem <= 0)
2218 right =
set->nelem - 1;
2221 mid = (idx +
right) / 2;
2222 if (
set->elems[mid] < elem)
2227 return set->elems[idx] == elem ? idx + 1 : 0;
2232 re_node_set_remove_at (re_node_set *
set,
int idx)
2234 if (idx < 0 || idx >=
set->nelem)
2237 for (; idx <
set->nelem; idx++)
2238 set->elems[idx] =
set->elems[idx + 1];
2247 re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
2249 int type = token.type;
2250 if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
2252 size_t new_nodes_alloc = dfa->nodes_alloc * 2;
2253 int *new_nexts, *new_indices;
2254 re_node_set *new_edests, *new_eclosures;
2255 re_token_t *new_nodes;
2258 if (BE (new_nodes_alloc < dfa->nodes_alloc, 0))
2261 new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
2262 if (BE (new_nodes ==
NULL, 0))
2264 dfa->nodes = new_nodes;
2265 new_nexts = re_realloc (dfa->nexts,
int, new_nodes_alloc);
2266 new_indices = re_realloc (dfa->org_indices,
int, new_nodes_alloc);
2267 new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
2268 new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
2269 if (BE (new_nexts ==
NULL || new_indices ==
NULL 2270 || new_edests ==
NULL || new_eclosures ==
NULL, 0))
2272 dfa->nexts = new_nexts;
2273 dfa->org_indices = new_indices;
2274 dfa->edests = new_edests;
2275 dfa->eclosures = new_eclosures;
2276 dfa->nodes_alloc = new_nodes_alloc;
2278 dfa->nodes[dfa->nodes_len] = token;
2279 dfa->nodes[dfa->nodes_len].constraint = 0;
2280 #ifdef RE_ENABLE_I18N 2281 dfa->nodes[dfa->nodes_len].accept_mb =
2282 (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
2284 dfa->nexts[dfa->nodes_len] = -1;
2285 re_node_set_init_empty (dfa->edests + dfa->nodes_len);
2286 re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
2287 return dfa->nodes_len++;
2290 static inline unsigned int 2292 calc_state_hash (
const re_node_set *
nodes,
unsigned int context)
2294 unsigned int hash =
nodes->nelem + context;
2296 for (i = 0 ; i <
nodes->nelem ; i++)
2297 hash +=
nodes->elems[i];
2310 static re_dfastate_t *
2313 const re_node_set *
nodes)
2316 re_dfastate_t *new_state;
2317 struct re_state_table_entry *spot;
2319 if (BE (nodes->nelem == 0, 0))
2324 hash = calc_state_hash (nodes, 0);
2325 spot = dfa->state_table + (hash & dfa->state_hash_mask);
2327 for (i = 0 ; i < spot->num ; i++)
2329 re_dfastate_t *
state = spot->array[
i];
2330 if (hash != state->hash)
2332 if (re_node_set_compare (&state->nodes, nodes))
2337 new_state = create_ci_newstate (dfa, nodes, hash);
2338 if (BE (new_state ==
NULL, 0))
2354 static re_dfastate_t *
2356 re_acquire_state_context (
reg_errcode_t *err,
const re_dfa_t *dfa,
2357 const re_node_set *
nodes,
unsigned int context)
2360 re_dfastate_t *new_state;
2361 struct re_state_table_entry *spot;
2363 if (
nodes->nelem == 0)
2368 hash = calc_state_hash (
nodes, context);
2369 spot = dfa->state_table + (hash & dfa->state_hash_mask);
2371 for (i = 0 ; i < spot->num ; i++)
2373 re_dfastate_t *
state = spot->array[
i];
2374 if (state->hash == hash
2375 && state->context == context
2376 && re_node_set_compare (state->entrance_nodes,
nodes))
2380 new_state = create_cd_newstate (dfa,
nodes, context, hash);
2381 if (BE (new_state ==
NULL, 0))
2392 register_state (
const re_dfa_t *dfa, re_dfastate_t *newstate,
2395 struct re_state_table_entry *spot;
2399 newstate->hash =
hash;
2400 err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
2403 for (i = 0; i < newstate->nodes.nelem; i++)
2405 int elem = newstate->nodes.elems[
i];
2406 if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
2407 re_node_set_insert_last (&newstate->non_eps_nodes, elem);
2410 spot = dfa->state_table + (hash & dfa->state_hash_mask);
2411 if (BE (spot->alloc <= spot->num, 0))
2413 int new_alloc = 2 * spot->num + 2;
2414 re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
2416 if (BE (new_array ==
NULL, 0))
2418 spot->array = new_array;
2419 spot->alloc = new_alloc;
2421 spot->array[spot->num++] = newstate;
2426 free_state (re_dfastate_t *
state)
2428 re_node_set_free (&state->non_eps_nodes);
2429 re_node_set_free (&state->inveclosure);
2430 if (state->entrance_nodes != &state->nodes)
2432 re_node_set_free (state->entrance_nodes);
2433 re_free (state->entrance_nodes);
2435 re_node_set_free (&state->nodes);
2436 re_free (state->word_trtable);
2437 re_free (state->trtable);
2444 static re_dfastate_t *
2446 create_ci_newstate (
const re_dfa_t *dfa,
const re_node_set *
nodes,
2451 re_dfastate_t *newstate;
2453 newstate = (re_dfastate_t *) calloc (
sizeof (re_dfastate_t), 1);
2454 if (BE (newstate ==
NULL, 0))
2456 err = re_node_set_init_copy (&newstate->nodes,
nodes);
2463 newstate->entrance_nodes = &newstate->nodes;
2464 for (i = 0 ; i <
nodes->nelem ; i++)
2466 re_token_t *node = dfa->nodes +
nodes->elems[
i];
2467 re_token_type_t
type = node->type;
2468 if (type == CHARACTER && !node->constraint)
2470 #ifdef RE_ENABLE_I18N 2471 newstate->accept_mb |= node->accept_mb;
2475 if (type == END_OF_RE)
2477 else if (type == OP_BACK_REF)
2478 newstate->has_backref = 1;
2479 else if (type == ANCHOR || node->constraint)
2480 newstate->has_constraint = 1;
2482 err = register_state (dfa, newstate, hash);
2485 free_state (newstate);
2494 static re_dfastate_t *
2496 create_cd_newstate (
const re_dfa_t *dfa,
const re_node_set *
nodes,
2497 unsigned int context,
unsigned int hash)
2499 int i, nctx_nodes = 0;
2501 re_dfastate_t *newstate;
2503 newstate = (re_dfastate_t *) calloc (
sizeof (re_dfastate_t), 1);
2504 if (BE (newstate ==
NULL, 0))
2506 err = re_node_set_init_copy (&newstate->nodes,
nodes);
2513 newstate->context = context;
2514 newstate->entrance_nodes = &newstate->nodes;
2516 for (i = 0 ; i <
nodes->nelem ; i++)
2518 unsigned int constraint = 0;
2519 re_token_t *node = dfa->nodes +
nodes->elems[
i];
2520 re_token_type_t
type = node->type;
2521 if (node->constraint)
2522 constraint = node->constraint;
2524 if (type == CHARACTER && !constraint)
2526 #ifdef RE_ENABLE_I18N 2527 newstate->accept_mb |= node->accept_mb;
2531 if (type == END_OF_RE)
2533 else if (type == OP_BACK_REF)
2534 newstate->has_backref = 1;
2535 else if (type == ANCHOR)
2536 constraint = node->opr.ctx_type;
2540 if (newstate->entrance_nodes == &newstate->nodes)
2542 newstate->entrance_nodes = re_malloc (re_node_set, 1);
2543 if (BE (newstate->entrance_nodes ==
NULL, 0))
2545 free_state (newstate);
2548 re_node_set_init_copy (newstate->entrance_nodes,
nodes);
2550 newstate->has_constraint = 1;
2553 if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
2555 re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
2560 err = register_state (dfa, newstate, hash);
2563 free_state (newstate);
2598 static void re_compile_fastmap_iter (
regex_t *bufp,
2599 const re_dfastate_t *init_state,
2601 static reg_errcode_t init_dfa (re_dfa_t *dfa,
size_t pat_len);
2602 #ifdef RE_ENABLE_I18N 2603 static void free_charset (re_charset_t *cset);
2605 static void free_workarea_compile (
regex_t *preg);
2607 #ifdef RE_ENABLE_I18N 2608 static void optimize_utf8 (re_dfa_t *dfa);
2617 static reg_errcode_t optimize_subexps (
void *extra, bin_tree_t *node);
2618 static reg_errcode_t lower_subexps (
void *extra, bin_tree_t *node);
2621 static reg_errcode_t calc_first (
void *extra, bin_tree_t *node);
2622 static reg_errcode_t calc_next (
void *extra, bin_tree_t *node);
2623 static reg_errcode_t link_nfa_nodes (
void *extra, bin_tree_t *node);
2624 static int duplicate_node (re_dfa_t *dfa,
int org_idx,
unsigned int constraint);
2625 static int search_duplicated_node (
const re_dfa_t *dfa,
int org_node,
2626 unsigned int constraint);
2628 static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
2629 int node,
int root);
2631 static int fetch_number (re_string_t *input, re_token_t *token,
2633 static int peek_token (re_token_t *token, re_string_t *input,
2635 static bin_tree_t *
parse (re_string_t *regexp,
regex_t *preg,
2637 static bin_tree_t *parse_reg_exp (re_string_t *regexp,
regex_t *preg,
2640 static bin_tree_t *parse_branch (re_string_t *regexp,
regex_t *preg,
2643 static bin_tree_t *parse_expression (re_string_t *regexp,
regex_t *preg,
2646 static bin_tree_t *parse_sub_exp (re_string_t *regexp,
regex_t *preg,
2649 static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp,
2650 re_dfa_t *dfa, re_token_t *token,
2652 static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa,
2655 static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,
2656 re_string_t *regexp,
2657 re_token_t *token,
int token_len,
2661 static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
2662 re_string_t *regexp,
2664 #ifdef RE_ENABLE_I18N 2666 re_charset_t *mbcset,
2667 int *equiv_class_alloc,
2668 const unsigned char *
name);
2671 re_charset_t *mbcset,
2672 int *char_class_alloc,
2673 const unsigned char *class_name,
2677 const unsigned char *
name);
2680 const unsigned char *class_name,
2683 static bin_tree_t *build_charclass_op (re_dfa_t *dfa,
2685 const unsigned char *class_name,
2686 const unsigned char *extra,
2688 static bin_tree_t *create_tree (re_dfa_t *dfa,
2690 re_token_type_t
type);
2691 static bin_tree_t *create_token_tree (re_dfa_t *dfa,
2692 bin_tree_t *left, bin_tree_t *right,
2693 const re_token_t *token);
2694 static bin_tree_t *duplicate_tree (
const bin_tree_t *src, re_dfa_t *dfa);
2695 static void free_token (re_token_t *node);
2696 static reg_errcode_t free_tree (
void *extra, bin_tree_t *node);
2697 static reg_errcode_t mark_opt_subexp (
void *extra, bin_tree_t *node);
2704 const char __re_error_msgid[] attribute_hidden =
2706 #define REG_NOERROR_IDX 0 2707 gettext_noop (
"Success")
2709 #define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success") 2710 gettext_noop (
"No match")
2712 #define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match") 2713 gettext_noop (
"Invalid regular expression")
2715 #define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression") 2716 gettext_noop (
"Invalid collation character")
2718 #define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character") 2719 gettext_noop (
"Invalid character class name")
2721 #define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name") 2722 gettext_noop (
"Trailing backslash")
2724 #define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash") 2725 gettext_noop (
"Invalid back reference")
2727 #define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference") 2728 gettext_noop (
"Unmatched [ or [^")
2730 #define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^") 2731 gettext_noop (
"Unmatched ( or \\(")
2733 #define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(") 2734 gettext_noop (
"Unmatched \\{")
2736 #define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{") 2737 gettext_noop (
"Invalid content of \\{\\}")
2739 #define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}") 2740 gettext_noop (
"Invalid range end")
2742 #define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end") 2743 gettext_noop (
"Memory exhausted")
2745 #define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted") 2746 gettext_noop (
"Invalid preceding regular expression")
2748 #define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression") 2749 gettext_noop (
"Premature end of regular expression")
2751 #define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression") 2752 gettext_noop (
"Regular expression too big")
2754 #define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big") 2755 gettext_noop (
"Unmatched ) or \\)")
2758 const size_t __re_error_msgid_idx[] attribute_hidden =
2790 const char *pattern;
2808 return gettext (__re_error_msgid + __re_error_msgid_idx[(
int) ret]);
2846 re_dfa_t *dfa = (re_dfa_t *) bufp->
buffer;
2849 memset (
fastmap,
'\0',
sizeof (
char) * SBC_MAX);
2850 re_compile_fastmap_iter (bufp, dfa->init_state,
fastmap);
2851 if (dfa->init_state != dfa->init_state_word)
2852 re_compile_fastmap_iter (bufp, dfa->init_state_word,
fastmap);
2853 if (dfa->init_state != dfa->init_state_nl)
2854 re_compile_fastmap_iter (bufp, dfa->init_state_nl,
fastmap);
2855 if (dfa->init_state != dfa->init_state_begbuf)
2856 re_compile_fastmap_iter (bufp, dfa->init_state_begbuf,
fastmap);
2865 __attribute ((always_inline))
2866 re_set_fastmap (
char *
fastmap,
int icase,
int ch)
2877 re_compile_fastmap_iter (
regex_t *bufp,
const re_dfastate_t *init_state,
2880 re_dfa_t *dfa = (re_dfa_t *) bufp->
buffer;
2883 for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
2885 int node = init_state->nodes.elems[node_cnt];
2886 re_token_type_t
type = dfa->nodes[node].type;
2888 if (type == CHARACTER)
2890 re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
2891 #ifdef RE_ENABLE_I18N 2894 unsigned char *buf = alloca (dfa->mb_cur_max), *
p;
2899 *
p++ = dfa->nodes[node].opr.c;
2900 while (++node < dfa->nodes_len
2901 && dfa->nodes[node].type == CHARACTER
2902 && dfa->nodes[node].mb_partial)
2903 *
p++ = dfa->nodes[node].opr.c;
2904 memset (&state,
'\0',
sizeof (state));
2905 if (mbrtowc (&wc, (
const char *) buf,
p - buf,
2907 && (__wcrtomb ((
char *) buf, towlower (wc), &state)
2909 re_set_fastmap (fastmap, 0, buf[0]);
2913 else if (type == SIMPLE_BRACKET)
2916 for (i = 0, ch = 0; i < BITSET_WORDS; ++
i)
2919 bitset_word_t
w = dfa->nodes[node].opr.sbcset[
i];
2920 for (j = 0; j < BITSET_WORD_BITS; ++
j, ++ch)
2921 if (w & ((bitset_word_t) 1 << j))
2922 re_set_fastmap (fastmap, icase, ch);
2925 #ifdef RE_ENABLE_I18N 2926 else if (type == COMPLEX_BRACKET)
2929 re_charset_t *cset = dfa->nodes[node].opr.mbcset;
2930 if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes
2931 || cset->nranges || cset->nchar_classes)
2934 if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0)
2943 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
2944 for (i = 0; i < SBC_MAX; ++
i)
2946 re_set_fastmap (fastmap, icase, i);
2949 if (dfa->mb_cur_max > 1)
2950 for (i = 0; i < SBC_MAX; ++
i)
2951 if (__btowc (i) == WEOF)
2952 re_set_fastmap (fastmap, icase, i);
2955 for (i = 0; i < cset->nmbchars; ++
i)
2959 memset (&state,
'\0',
sizeof (state));
2960 if (__wcrtomb (buf, cset->mbchars[i], &state) != (
size_t) -1)
2961 re_set_fastmap (fastmap, icase, *(
unsigned char *) buf);
2964 if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
2966 re_set_fastmap (fastmap, 0, *(
unsigned char *) buf);
2971 else if (type == OP_PERIOD
2972 #ifdef RE_ENABLE_I18N
2973 || type == OP_UTF8_PERIOD
2975 || type == END_OF_RE)
2977 memset (fastmap,
'\1',
sizeof (
char) * SBC_MAX);
2978 if (type == END_OF_RE)
3022 regcomp (preg, pattern, cflags)
3036 preg->
fastmap = re_malloc (
char, SBC_MAX);
3055 ret = re_compile_internal (preg, pattern, strlen (pattern), syntax);
3077 weak_alias (__regcomp,
regcomp)
3095 || errcode >= (
int) (
sizeof (__re_error_msgid_idx)
3096 /
sizeof (__re_error_msgid_idx[0])), 0))
3103 msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]);
3105 msg_size = strlen (msg) + 1;
3107 if (BE (errbuf_size != 0, 1))
3109 if (BE (msg_size > errbuf_size, 0))
3111 #if defined HAVE_MEMPCPY || defined _LIBC 3112 *((
char *) __mempcpy (errbuf, msg, errbuf_size - 1)) =
'\0';
3114 memcpy (errbuf, msg, errbuf_size - 1);
3115 errbuf[errbuf_size - 1] = 0;
3119 memcpy (errbuf, msg, msg_size);
3129 #ifdef RE_ENABLE_I18N 3134 static const bitset_t utf8_sb_map =
3137 [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX
3143 free_dfa_content (re_dfa_t *dfa)
3148 for (i = 0; i < dfa->nodes_len; ++
i)
3149 free_token (dfa->nodes + i);
3150 re_free (dfa->nexts);
3151 for (i = 0; i < dfa->nodes_len; ++
i)
3153 if (dfa->eclosures !=
NULL)
3154 re_node_set_free (dfa->eclosures + i);
3155 if (dfa->inveclosures !=
NULL)
3156 re_node_set_free (dfa->inveclosures + i);
3157 if (dfa->edests !=
NULL)
3158 re_node_set_free (dfa->edests + i);
3160 re_free (dfa->edests);
3161 re_free (dfa->eclosures);
3162 re_free (dfa->inveclosures);
3163 re_free (dfa->nodes);
3165 if (dfa->state_table)
3166 for (i = 0; i <= dfa->state_hash_mask; ++
i)
3168 struct re_state_table_entry *entry = dfa->state_table +
i;
3169 for (j = 0; j < entry->num; ++
j)
3171 re_dfastate_t *state = entry->array[
j];
3174 re_free (entry->array);
3176 re_free (dfa->state_table);
3177 #ifdef RE_ENABLE_I18N 3178 if (dfa->sb_char != utf8_sb_map)
3179 re_free (dfa->sb_char);
3181 re_free (dfa->subexp_map);
3183 re_free (dfa->re_str);
3196 re_dfa_t *dfa = (re_dfa_t *) preg->
buffer;
3198 free_dfa_content (dfa);
3209 weak_alias (__regfree,
regfree)
3215 #if defined _REGEX_RE_COMP || defined _LIBC 3236 return gettext (
"No previous regular expression");
3242 fastmap = re_comp_buf.
fastmap;
3244 __regfree (&re_comp_buf);
3245 memset (&re_comp_buf,
'\0',
sizeof (re_comp_buf));
3251 re_comp_buf.
fastmap = (
char *) malloc (SBC_MAX);
3253 return (
char *) gettext (__re_error_msgid
3269 return (
char *) gettext (__re_error_msgid + __re_error_msgid_idx[(
int) ret]);
3273 libc_freeres_fn (free_mem)
3275 __regfree (&re_comp_buf);
3286 re_compile_internal (
regex_t *preg,
const char * pattern,
size_t length,
3303 dfa = (re_dfa_t *) preg->
buffer;
3310 dfa = re_realloc (preg->
buffer, re_dfa_t, 1);
3314 preg->
buffer = (
unsigned char *) dfa;
3316 preg->
used =
sizeof (re_dfa_t);
3318 err = init_dfa (dfa, length);
3321 free_dfa_content (dfa);
3328 dfa->re_str = re_malloc (
char, length + 1);
3329 strncpy (dfa->re_str, pattern, length + 1);
3332 __libc_lock_init (dfa->lock);
3334 err = re_string_construct (®exp, pattern, length, preg->
translate,
3338 re_compile_internal_free_return:
3339 free_workarea_compile (preg);
3340 re_string_destruct (®exp);
3341 free_dfa_content (dfa);
3349 dfa->str_tree =
parse (®exp, preg, syntax, &err);
3350 if (BE (dfa->str_tree ==
NULL, 0))
3351 goto re_compile_internal_free_return;
3354 err = analyze (preg);
3356 goto re_compile_internal_free_return;
3358 #ifdef RE_ENABLE_I18N 3360 if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->
translate ==
NULL)
3361 optimize_utf8 (dfa);
3365 err = create_initial_state (dfa);
3368 free_workarea_compile (preg);
3369 re_string_destruct (®exp);
3373 free_dfa_content (dfa);
3385 init_dfa (re_dfa_t *dfa,
size_t pat_len)
3387 unsigned int table_size;
3392 memset (dfa,
'\0',
sizeof (re_dfa_t));
3395 dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
3401 dfa->nodes_alloc = pat_len + 1;
3402 dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
3405 for (table_size = 1; ; table_size <<= 1)
3406 if (table_size > pat_len)
3409 dfa->state_table = calloc (
sizeof (
struct re_state_table_entry), table_size);
3410 dfa->state_hash_mask = table_size - 1;
3412 dfa->mb_cur_max = MB_CUR_MAX;
3414 if (dfa->mb_cur_max == 6
3415 && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME),
"UTF-8") == 0)
3417 dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
3420 # ifdef HAVE_LANGINFO_CODESET 3421 codeset_name = nl_langinfo (CODESET);
3423 codeset_name = getenv (
"LC_ALL");
3424 if (codeset_name ==
NULL || codeset_name[0] ==
'\0')
3425 codeset_name = getenv (
"LC_CTYPE");
3426 if (codeset_name ==
NULL || codeset_name[0] ==
'\0')
3427 codeset_name = getenv (
"LANG");
3428 if (codeset_name ==
NULL)
3430 else if (strchr (codeset_name,
'.') !=
NULL)
3431 codeset_name = strchr (codeset_name,
'.') + 1;
3434 if (strcasecmp (codeset_name,
"UTF-8") == 0
3435 || strcasecmp (codeset_name,
"UTF8") == 0)
3440 dfa->map_notascii = 0;
3443 #ifdef RE_ENABLE_I18N 3444 if (dfa->mb_cur_max > 1)
3447 dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
3452 dfa->sb_char = (re_bitset_ptr_t) calloc (
sizeof (bitset_t), 1);
3453 if (BE (dfa->sb_char ==
NULL, 0))
3457 for (i = 0, ch = 0; i < BITSET_WORDS; ++
i)
3458 for (j = 0; j < BITSET_WORD_BITS; ++
j, ++ch)
3460 wint_t wch = __btowc (ch);
3462 dfa->sb_char[
i] |= (bitset_word_t) 1 << j;
3464 if (isascii (ch) && wch != ch)
3465 dfa->map_notascii = 1;
3472 if (BE (dfa->nodes ==
NULL || dfa->state_table ==
NULL, 0))
3483 init_word_char (re_dfa_t *dfa)
3486 dfa->word_ops_used = 1;
3487 for (i = 0, ch = 0; i < BITSET_WORDS; ++
i)
3488 for (j = 0; j < BITSET_WORD_BITS; ++
j, ++ch)
3489 if (isalnum (ch) || ch ==
'_')
3490 dfa->word_char[
i] |= (bitset_word_t) 1 << j;
3496 free_workarea_compile (
regex_t *preg)
3498 re_dfa_t *dfa = (re_dfa_t *) preg->
buffer;
3499 bin_tree_storage_t *storage, *next;
3500 for (storage = dfa->str_tree_storage; storage; storage = next)
3502 next = storage->next;
3505 dfa->str_tree_storage =
NULL;
3506 dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
3507 dfa->str_tree =
NULL;
3508 re_free (dfa->org_indices);
3509 dfa->org_indices =
NULL;
3515 create_initial_state (re_dfa_t *dfa)
3519 re_node_set init_nodes;
3523 first = dfa->str_tree->first->node_idx;
3524 dfa->init_node =
first;
3525 err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
3533 if (dfa->nbackref > 0)
3534 for (i = 0; i < init_nodes.nelem; ++
i)
3536 int node_idx = init_nodes.elems[
i];
3537 re_token_type_t
type = dfa->nodes[node_idx].type;
3540 if (type != OP_BACK_REF)
3542 for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)
3544 re_token_t *clexp_node;
3545 clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
3546 if (clexp_node->type == OP_CLOSE_SUBEXP
3547 && clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx)
3550 if (clexp_idx == init_nodes.nelem)
3553 if (type == OP_BACK_REF)
3555 int dest_idx = dfa->edests[node_idx].elems[0];
3556 if (!re_node_set_contains (&init_nodes, dest_idx))
3558 re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
3565 dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
3567 if (BE (dfa->init_state ==
NULL, 0))
3569 if (dfa->init_state->has_constraint)
3571 dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
3573 dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
3575 dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
3579 if (BE (dfa->init_state_word ==
NULL || dfa->init_state_nl ==
NULL 3580 || dfa->init_state_begbuf ==
NULL, 0))
3584 dfa->init_state_word = dfa->init_state_nl
3585 = dfa->init_state_begbuf = dfa->init_state;
3587 re_node_set_free (&init_nodes);
3591 #ifdef RE_ENABLE_I18N 3597 optimize_utf8 (re_dfa_t *dfa)
3599 int node,
i, mb_chars = 0, has_period = 0;
3601 for (node = 0; node < dfa->nodes_len; ++node)
3602 switch (dfa->nodes[node].type)
3605 if (dfa->nodes[node].opr.c >= 0x80)
3609 switch (dfa->nodes[node].opr.idx)
3627 case OP_DUP_ASTERISK:
3628 case OP_OPEN_SUBEXP:
3629 case OP_CLOSE_SUBEXP:
3631 case COMPLEX_BRACKET:
3633 case SIMPLE_BRACKET:
3635 assert (0x80 % BITSET_WORD_BITS == 0);
3636 for (i = 0x80 / BITSET_WORD_BITS; i < BITSET_WORDS; ++
i)
3637 if (dfa->nodes[node].opr.sbcset[i])
3644 if (mb_chars || has_period)
3645 for (node = 0; node < dfa->nodes_len; ++node)
3647 if (dfa->nodes[node].type == CHARACTER
3648 && dfa->nodes[node].opr.c >= 0x80)
3649 dfa->nodes[node].mb_partial = 0;
3650 else if (dfa->nodes[node].type == OP_PERIOD)
3651 dfa->nodes[node].type = OP_UTF8_PERIOD;
3655 dfa->mb_cur_max = 1;
3657 dfa->has_mb_node = dfa->nbackref > 0 || has_period;
3667 re_dfa_t *dfa = (re_dfa_t *) preg->
buffer;
3671 dfa->nexts = re_malloc (
int, dfa->nodes_alloc);
3672 dfa->org_indices = re_malloc (
int, dfa->nodes_alloc);
3673 dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
3674 dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
3675 if (BE (dfa->nexts ==
NULL || dfa->org_indices ==
NULL || dfa->edests ==
NULL 3676 || dfa->eclosures ==
NULL, 0))
3679 dfa->subexp_map = re_malloc (
int, preg->
re_nsub);
3680 if (dfa->subexp_map !=
NULL)
3683 for (i = 0; i < preg->
re_nsub; i++)
3684 dfa->subexp_map[i] = i;
3685 preorder (dfa->str_tree, optimize_subexps, dfa);
3686 for (i = 0; i < preg->
re_nsub; i++)
3687 if (dfa->subexp_map[i] != i)
3691 free (dfa->subexp_map);
3692 dfa->subexp_map =
NULL;
3696 ret = postorder (dfa->str_tree, lower_subexps, preg);
3699 ret = postorder (dfa->str_tree, calc_first, dfa);
3702 preorder (dfa->str_tree, calc_next, dfa);
3703 ret = preorder (dfa->str_tree, link_nfa_nodes, dfa);
3706 ret = calc_eclosure (dfa);
3712 if ((!preg->
no_sub && preg->
re_nsub > 0 && dfa->has_plural_match)
3715 dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len);
3716 if (BE (dfa->inveclosures ==
NULL, 0))
3718 ret = calc_inveclosure (dfa);
3728 postorder (bin_tree_t *root,
reg_errcode_t (fn (
void *, bin_tree_t *)),
3731 bin_tree_t *node, *prev;
3733 for (node = root; ; )
3737 while (node->left || node->right)
3748 if (node->parent ==
NULL)
3751 node = node->parent;
3754 while (node->right == prev || node->right ==
NULL);
3760 preorder (bin_tree_t *root,
reg_errcode_t (fn (
void *, bin_tree_t *)),
3765 for (node = root; ; )
3776 bin_tree_t *prev =
NULL;
3777 while (node->right == prev || node->right ==
NULL)
3780 node = node->parent;
3793 optimize_subexps (
void *extra, bin_tree_t *node)
3795 re_dfa_t *dfa = (re_dfa_t *) extra;
3797 if (node->token.type == OP_BACK_REF && dfa->subexp_map)
3799 int idx = node->token.opr.idx;
3800 node->token.opr.idx = dfa->subexp_map[idx];
3801 dfa->used_bkref_map |= 1 << node->token.opr.idx;
3804 else if (node->token.type == SUBEXP
3805 && node->left && node->left->token.type == SUBEXP)
3807 int other_idx = node->left->token.opr.idx;
3809 node->left = node->left->left;
3811 node->left->parent = node;
3813 dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
3814 if (other_idx < BITSET_WORD_BITS)
3815 dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx);
3824 lower_subexps (
void *extra, bin_tree_t *node)
3829 if (node->left && node->left->token.type == SUBEXP)
3831 node->left = lower_subexp (&err, preg, node->left);
3833 node->left->parent = node;
3835 if (node->right && node->right->token.type == SUBEXP)
3837 node->right = lower_subexp (&err, preg, node->right);
3839 node->right->parent = node;
3848 re_dfa_t *dfa = (re_dfa_t *) preg->
buffer;
3849 bin_tree_t *body = node->left;
3850 bin_tree_t *op, *cls, *tree1, *
tree;
3857 && node->left !=
NULL 3858 && (node->token.opr.idx >= BITSET_WORD_BITS
3859 || !(dfa->used_bkref_map
3860 & ((bitset_word_t) 1 << node->token.opr.idx))))
3865 op = create_tree (dfa,
NULL,
NULL, OP_OPEN_SUBEXP);
3866 cls = create_tree (dfa,
NULL,
NULL, OP_CLOSE_SUBEXP);
3867 tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls;
3868 tree = create_tree (dfa, op, tree1, CONCAT);
3875 op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx;
3876 op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp;
3883 calc_first (
void *extra, bin_tree_t *node)
3885 re_dfa_t *dfa = (re_dfa_t *) extra;
3886 if (node->token.type == CONCAT)
3888 node->first = node->left->first;
3889 node->node_idx = node->left->node_idx;
3894 node->node_idx = re_dfa_add_node (dfa, node->token);
3895 if (BE (node->node_idx == -1, 0))
3903 calc_next (
void *extra, bin_tree_t *node)
3905 switch (node->token.type)
3907 case OP_DUP_ASTERISK:
3908 node->left->next = node;
3911 node->left->next = node->right->first;
3912 node->right->next = node->next;
3916 node->left->next = node->next;
3918 node->right->next = node->next;
3926 link_nfa_nodes (
void *extra, bin_tree_t *node)
3928 re_dfa_t *dfa = (re_dfa_t *) extra;
3929 int idx = node->node_idx;
3932 switch (node->token.type)
3938 assert (node->next ==
NULL);
3941 case OP_DUP_ASTERISK:
3945 dfa->has_plural_match = 1;
3946 if (node->left !=
NULL)
3947 left = node->left->first->node_idx;
3949 left = node->next->node_idx;
3950 if (node->right !=
NULL)
3951 right = node->right->first->node_idx;
3953 right = node->next->node_idx;
3955 assert (right > -1);
3956 err = re_node_set_init_2 (dfa->edests + idx, left, right);
3961 case OP_OPEN_SUBEXP:
3962 case OP_CLOSE_SUBEXP:
3963 err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx);
3967 dfa->nexts[idx] = node->next->node_idx;
3968 if (node->token.type == OP_BACK_REF)
3969 re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);
3973 assert (!IS_EPSILON_NODE (node->token.type));
3974 dfa->nexts[idx] = node->next->node_idx;
3987 duplicate_node_closure (re_dfa_t *dfa,
int top_org_node,
int top_clone_node,
3988 int root_node,
unsigned int init_constraint)
3990 int org_node, clone_node,
ret;
3991 unsigned int constraint = init_constraint;
3992 for (org_node = top_org_node, clone_node = top_clone_node;;)
3994 int org_dest, clone_dest;
3995 if (dfa->nodes[org_node].type == OP_BACK_REF)
4001 org_dest = dfa->nexts[org_node];
4002 re_node_set_empty (dfa->edests + clone_node);
4003 clone_dest = duplicate_node (dfa, org_dest, constraint);
4004 if (BE (clone_dest == -1, 0))
4006 dfa->nexts[clone_node] = dfa->nexts[org_node];
4007 ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
4008 if (BE (ret < 0, 0))
4011 else if (dfa->edests[org_node].nelem == 0)
4016 dfa->nexts[clone_node] = dfa->nexts[org_node];
4019 else if (dfa->edests[org_node].nelem == 1)
4023 org_dest = dfa->edests[org_node].elems[0];
4024 re_node_set_empty (dfa->edests + clone_node);
4025 if (dfa->nodes[org_node].type == ANCHOR)
4028 if (org_node == root_node && clone_node != org_node)
4033 ret = re_node_set_insert (dfa->edests + clone_node,
4035 if (BE (ret < 0, 0))
4039 constraint |= dfa->nodes[org_node].opr.ctx_type;
4041 clone_dest = duplicate_node (dfa, org_dest, constraint);
4042 if (BE (clone_dest == -1, 0))
4044 ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
4045 if (BE (ret < 0, 0))
4052 org_dest = dfa->edests[org_node].elems[0];
4053 re_node_set_empty (dfa->edests + clone_node);
4055 clone_dest = search_duplicated_node (dfa, org_dest, constraint);
4056 if (clone_dest == -1)
4060 clone_dest = duplicate_node (dfa, org_dest, constraint);
4061 if (BE (clone_dest == -1, 0))
4063 ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
4064 if (BE (ret < 0, 0))
4066 err = duplicate_node_closure (dfa, org_dest, clone_dest,
4067 root_node, constraint);
4075 ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
4076 if (BE (ret < 0, 0))
4080 org_dest = dfa->edests[org_node].elems[1];
4081 clone_dest = duplicate_node (dfa, org_dest, constraint);
4082 if (BE (clone_dest == -1, 0))
4084 ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
4085 if (BE (ret < 0, 0))
4088 org_node = org_dest;
4089 clone_node = clone_dest;
4098 search_duplicated_node (
const re_dfa_t *dfa,
int org_node,
4099 unsigned int constraint)
4102 for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx)
4104 if (org_node == dfa->org_indices[idx]
4105 && constraint == dfa->nodes[idx].constraint)
4116 duplicate_node (re_dfa_t *dfa,
int org_idx,
unsigned int constraint)
4118 int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
4119 if (BE (dup_idx != -1, 1))
4121 dfa->nodes[dup_idx].constraint = constraint;
4122 if (dfa->nodes[org_idx].type == ANCHOR)
4123 dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type;
4124 dfa->nodes[dup_idx].duplicated = 1;
4127 dfa->org_indices[dup_idx] = org_idx;
4133 calc_inveclosure (re_dfa_t *dfa)
4136 for (idx = 0; idx < dfa->nodes_len; ++idx)
4137 re_node_set_init_empty (dfa->inveclosures + idx);
4139 for (src = 0; src < dfa->nodes_len; ++src)
4141 int *elems = dfa->eclosures[src].elems;
4142 for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
4144 ret = re_node_set_insert_last (dfa->inveclosures + elems[idx], src);
4145 if (BE (ret == -1, 0))
4156 calc_eclosure (re_dfa_t *dfa)
4158 int node_idx, incomplete;
4160 assert (dfa->nodes_len > 0);
4164 for (node_idx = 0; ; ++node_idx)
4167 re_node_set eclosure_elem;
4168 if (node_idx == dfa->nodes_len)
4177 assert (dfa->eclosures[node_idx].nelem != -1);
4181 if (dfa->eclosures[node_idx].nelem != 0)
4184 err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1);
4188 if (dfa->eclosures[node_idx].nelem == 0)
4191 re_node_set_free (&eclosure_elem);
4200 calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
int node,
int root)
4203 unsigned int constraint;
4205 re_node_set eclosure;
4207 err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
4213 dfa->eclosures[node].nelem = -1;
4215 constraint = ((dfa->nodes[node].type == ANCHOR)
4216 ? dfa->nodes[node].opr.ctx_type : 0);
4220 && dfa->edests[node].nelem
4221 && !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
4223 err = duplicate_node_closure (dfa, node, node, node, constraint);
4229 if (IS_EPSILON_NODE(dfa->nodes[node].type))
4230 for (i = 0; i < dfa->edests[node].nelem; ++
i)
4232 re_node_set eclosure_elem;
4233 int edest = dfa->edests[node].elems[
i];
4236 if (dfa->eclosures[edest].nelem == -1)
4243 if (dfa->eclosures[edest].nelem == 0)
4245 err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0);
4250 eclosure_elem = dfa->eclosures[edest];
4252 re_node_set_merge (&eclosure, &eclosure_elem);
4255 if (dfa->eclosures[edest].nelem == 0)
4258 re_node_set_free (&eclosure_elem);
4263 re_node_set_insert (&eclosure, node);
4264 if (incomplete && !root)
4265 dfa->eclosures[node].nelem = 0;
4267 dfa->eclosures[node] = eclosure;
4268 *new_set = eclosure;
4281 re_string_skip_bytes (input, peek_token (
result, input, syntax));
4293 if (re_string_eoi (input))
4295 token->type = END_OF_RE;
4299 c = re_string_peek_byte (input, 0);
4302 token->word_char = 0;
4303 #ifdef RE_ENABLE_I18N 4304 token->mb_partial = 0;
4305 if (input->mb_cur_max > 1 &&
4306 !re_string_first_byte (input, re_string_cur_idx (input)))
4308 token->type = CHARACTER;
4309 token->mb_partial = 1;
4316 if (re_string_cur_idx (input) + 1 >= re_string_length (input))
4318 token->type = BACK_SLASH;
4322 c2 = re_string_peek_byte_case (input, 1);
4324 token->type = CHARACTER;
4325 #ifdef RE_ENABLE_I18N 4326 if (input->mb_cur_max > 1)
4328 wint_t wc = re_string_wchar_at (input,
4329 re_string_cur_idx (input) + 1);
4330 token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
4334 token->word_char = IS_WORD_CHAR (c2) != 0;
4340 token->type = OP_ALT;
4342 case '1':
case '2':
case '3':
case '4':
case '5':
4343 case '6':
case '7':
case '8':
case '9':
4346 token->type = OP_BACK_REF;
4347 token->opr.idx = c2 -
'1';
4353 token->type = ANCHOR;
4354 token->opr.ctx_type = WORD_FIRST;
4358 if (!(syntax & RE_NO_GNU_OPS))
4360 token->type = ANCHOR;
4361 token->opr.ctx_type = WORD_LAST;
4365 if (!(syntax & RE_NO_GNU_OPS))
4367 token->type = ANCHOR;
4368 token->opr.ctx_type = WORD_DELIM;
4372 if (!(syntax & RE_NO_GNU_OPS))
4374 token->type = ANCHOR;
4375 token->opr.ctx_type = NOT_WORD_DELIM;
4379 if (!(syntax & RE_NO_GNU_OPS))
4380 token->type = OP_WORD;
4383 if (!(syntax & RE_NO_GNU_OPS))
4384 token->type = OP_NOTWORD;
4387 if (!(syntax & RE_NO_GNU_OPS))
4388 token->type = OP_SPACE;
4391 if (!(syntax & RE_NO_GNU_OPS))
4392 token->type = OP_NOTSPACE;
4395 if (!(syntax & RE_NO_GNU_OPS))
4397 token->type = ANCHOR;
4398 token->opr.ctx_type = BUF_FIRST;
4402 if (!(syntax & RE_NO_GNU_OPS))
4404 token->type = ANCHOR;
4405 token->opr.ctx_type = BUF_LAST;
4410 token->type = OP_OPEN_SUBEXP;
4413 if (!(syntax & RE_NO_BK_PARENS))
4414 token->type = OP_CLOSE_SUBEXP;
4418 token->type = OP_DUP_PLUS;
4421 if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
4422 token->type = OP_DUP_QUESTION;
4426 token->type = OP_OPEN_DUP_NUM;
4429 if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
4430 token->type = OP_CLOSE_DUP_NUM;
4438 token->type = CHARACTER;
4439 #ifdef RE_ENABLE_I18N 4440 if (input->mb_cur_max > 1)
4442 wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input));
4443 token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
4447 token->word_char = IS_WORD_CHAR (token->opr.c);
4453 token->type = OP_ALT;
4457 token->type = OP_ALT;
4460 token->type = OP_DUP_ASTERISK;
4463 if (!(syntax & RE_LIMITED_OPS) && !(syntax &
RE_BK_PLUS_QM))
4464 token->type = OP_DUP_PLUS;
4467 if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
4468 token->type = OP_DUP_QUESTION;
4472 token->type = OP_OPEN_DUP_NUM;
4475 if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
4476 token->type = OP_CLOSE_DUP_NUM;
4480 token->type = OP_OPEN_SUBEXP;
4483 if (syntax & RE_NO_BK_PARENS)
4484 token->type = OP_CLOSE_SUBEXP;
4487 token->type = OP_OPEN_BRACKET;
4490 token->type = OP_PERIOD;
4494 re_string_cur_idx (input) != 0)
4496 char prev = re_string_peek_byte (input, -1);
4497 if (!(syntax & RE_NEWLINE_ALT) || prev !=
'\n')
4500 token->type = ANCHOR;
4501 token->opr.ctx_type = LINE_FIRST;
4505 re_string_cur_idx (input) + 1 != re_string_length (input))
4508 re_string_skip_bytes (input, 1);
4509 peek_token (&next, input, syntax);
4510 re_string_skip_bytes (input, -1);
4511 if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP)
4514 token->type = ANCHOR;
4515 token->opr.ctx_type = LINE_LAST;
4531 if (re_string_eoi (input))
4533 token->type = END_OF_RE;
4536 c = re_string_peek_byte (input, 0);
4539 #ifdef RE_ENABLE_I18N 4540 if (input->mb_cur_max > 1 &&
4541 !re_string_first_byte (input, re_string_cur_idx (input)))
4543 token->type = CHARACTER;
4549 && re_string_cur_idx (input) + 1 < re_string_length (input))
4553 re_string_skip_bytes (input, 1);
4554 c2 = re_string_peek_byte (input, 0);
4556 token->type = CHARACTER;
4563 if (re_string_cur_idx (input) + 1 < re_string_length (input))
4564 c2 = re_string_peek_byte (input, 1);
4572 token->type = OP_OPEN_COLL_ELEM;
4575 token->type = OP_OPEN_EQUIV_CLASS;
4580 token->type = OP_OPEN_CHAR_CLASS;
4585 token->type = CHARACTER;
4595 token->type = OP_CHARSET_RANGE;
4598 token->type = OP_CLOSE_BRACKET;
4601 token->type = OP_NON_MATCH_LIST;
4604 token->type = CHARACTER;
4627 re_dfa_t *dfa = (re_dfa_t *) preg->
buffer;
4628 bin_tree_t *
tree, *eor, *root;
4629 re_token_t current_token;
4632 tree = parse_reg_exp (regexp, preg, ¤t_token,
syntax, 0, err);
4635 eor = create_tree (dfa,
NULL,
NULL, END_OF_RE);
4637 root = create_tree (dfa, tree, eor, CONCAT);
4640 if (BE (eor ==
NULL || root ==
NULL, 0))
4658 parse_reg_exp (re_string_t *regexp,
regex_t *preg, re_token_t *token,
4661 re_dfa_t *dfa = (re_dfa_t *) preg->
buffer;
4663 tree = parse_branch (regexp, preg, token,
syntax, nest, err);
4667 while (token->type == OP_ALT)
4670 if (token->type != OP_ALT && token->type != END_OF_RE
4671 && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
4673 branch = parse_branch (regexp, preg, token,
syntax, nest, err);
4679 tree = create_tree (dfa, tree, branch, OP_ALT);
4680 if (BE (tree ==
NULL, 0))
4699 parse_branch (re_string_t *regexp,
regex_t *preg, re_token_t *token,
4703 re_dfa_t *dfa = (re_dfa_t *) preg->
buffer;
4704 tree = parse_expression (regexp, preg, token,
syntax, nest, err);
4708 while (token->type != OP_ALT && token->type != END_OF_RE
4709 && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
4711 exp = parse_expression (regexp, preg, token,
syntax, nest, err);
4718 tree = create_tree (dfa, tree, exp, CONCAT);
4725 else if (tree ==
NULL)
4739 parse_expression (re_string_t *regexp,
regex_t *preg, re_token_t *token,
4742 re_dfa_t *dfa = (re_dfa_t *) preg->
buffer;
4744 switch (token->type)
4753 #ifdef RE_ENABLE_I18N 4754 if (dfa->mb_cur_max > 1)
4756 while (!re_string_eoi (regexp)
4757 && !re_string_first_byte (regexp, re_string_cur_idx (regexp)))
4759 bin_tree_t *mbc_remain;
4760 fetch_token (token, regexp,
syntax);
4761 mbc_remain = create_token_tree (dfa,
NULL,
NULL, token);
4762 tree = create_tree (dfa,
tree, mbc_remain, CONCAT);
4772 case OP_OPEN_SUBEXP:
4773 tree = parse_sub_exp (regexp, preg, token,
syntax, nest + 1, err);
4777 case OP_OPEN_BRACKET:
4778 tree = parse_bracket_exp (regexp, dfa, token,
syntax, err);
4783 if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1))
4788 dfa->used_bkref_map |= 1 << token->opr.idx;
4796 dfa->has_mb_node = 1;
4798 case OP_OPEN_DUP_NUM:
4805 case OP_DUP_ASTERISK:
4807 case OP_DUP_QUESTION:
4815 fetch_token (token, regexp,
syntax);
4816 return parse_expression (regexp, preg, token,
syntax, nest, err);
4819 case OP_CLOSE_SUBEXP:
4820 if ((token->type == OP_CLOSE_SUBEXP) &&
4827 case OP_CLOSE_DUP_NUM:
4831 token->type = CHARACTER;
4842 if ((token->opr.ctx_type
4843 & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST))
4844 && dfa->word_ops_used == 0)
4845 init_word_char (dfa);
4846 if (token->opr.ctx_type == WORD_DELIM
4847 || token->opr.ctx_type == NOT_WORD_DELIM)
4849 bin_tree_t *tree_first, *tree_last;
4850 if (token->opr.ctx_type == WORD_DELIM)
4852 token->opr.ctx_type = WORD_FIRST;
4853 tree_first = create_token_tree (dfa,
NULL,
NULL, token);
4854 token->opr.ctx_type = WORD_LAST;
4858 token->opr.ctx_type = INSIDE_WORD;
4859 tree_first = create_token_tree (dfa,
NULL,
NULL, token);
4860 token->opr.ctx_type = INSIDE_NOTWORD;
4862 tree_last = create_token_tree (dfa,
NULL,
NULL, token);
4863 tree = create_tree (dfa, tree_first, tree_last, OP_ALT);
4883 fetch_token (token, regexp,
syntax);
4892 if (dfa->mb_cur_max > 1)
4893 dfa->has_mb_node = 1;
4897 tree = build_charclass_op (dfa, regexp->trans,
4898 (
const unsigned char *)
"alnum",
4899 (
const unsigned char *)
"_",
4900 token->type == OP_NOTWORD, err);
4906 tree = build_charclass_op (dfa, regexp->trans,
4907 (
const unsigned char *)
"space",
4908 (
const unsigned char *)
"",
4909 token->type == OP_NOTSPACE, err);
4926 fetch_token (token, regexp,
syntax);
4928 while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS
4929 || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM)
4936 && (token->type == OP_DUP_ASTERISK
4937 || token->type == OP_OPEN_DUP_NUM))
4955 parse_sub_exp (re_string_t *regexp,
regex_t *preg, re_token_t *token,
4958 re_dfa_t *dfa = (re_dfa_t *) preg->
buffer;
4966 if (token->type == OP_CLOSE_SUBEXP)
4970 tree = parse_reg_exp (regexp, preg, token,
syntax, nest, err);
4971 if (BE (*err ==
REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0))
4977 if (cur_nsub <=
'9' -
'1')
4978 dfa->completed_bkref_map |= 1 << cur_nsub;
4986 tree->token.opr.idx = cur_nsub;
4993 parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
4997 int i, start,
end, start_idx = re_string_cur_idx (regexp);
4998 re_token_t start_token = *token;
5000 if (token->type == OP_OPEN_DUP_NUM)
5003 start = fetch_number (regexp, token,
syntax);
5006 if (token->type == CHARACTER && token->opr.c ==
',')
5014 if (BE (start != -2, 1))
5017 end = ((token->type == OP_CLOSE_DUP_NUM) ? start
5018 : ((token->type == CHARACTER && token->opr.c ==
',')
5019 ? fetch_number (regexp, token,
syntax) : -2));
5021 if (BE (start == -2 || end == -2, 0))
5026 if (token->type == END_OF_RE)
5035 re_string_set_index (regexp, start_idx);
5036 *token = start_token;
5037 token->type = CHARACTER;
5043 if (BE (end != -1 && start > end, 0))
5052 start = (token->type == OP_DUP_PLUS) ? 1 : 0;
5053 end = (token->type == OP_DUP_QUESTION) ? 1 : -1;
5056 fetch_token (token, regexp,
syntax);
5058 if (BE (elem ==
NULL, 0))
5060 if (BE (start == 0 && end == 0, 0))
5062 postorder (elem, free_tree,
NULL);
5067 if (BE (start > 0, 0))
5070 for (i = 2; i <= start; ++
i)
5072 elem = duplicate_tree (elem, dfa);
5073 tree = create_tree (dfa, tree, elem, CONCAT);
5074 if (BE (elem ==
NULL || tree ==
NULL, 0))
5075 goto parse_dup_op_espace;
5082 elem = duplicate_tree (elem, dfa);
5088 if (elem->token.type == SUBEXP)
5089 postorder (elem, mark_opt_subexp, (
void *) (
long) elem->token.opr.idx);
5091 tree = create_tree (dfa, elem,
NULL, (end == -1 ? OP_DUP_ASTERISK : OP_ALT));
5092 if (BE (tree ==
NULL, 0))
5093 goto parse_dup_op_espace;
5098 for (i = start + 2; i <=
end; ++
i)
5100 elem = duplicate_tree (elem, dfa);
5101 tree = create_tree (dfa, tree, elem, CONCAT);
5102 if (BE (elem ==
NULL || tree ==
NULL, 0))
5103 goto parse_dup_op_espace;
5105 tree = create_tree (dfa, tree,
NULL, OP_ALT);
5106 if (BE (tree ==
NULL, 0))
5107 goto parse_dup_op_espace;
5111 tree = create_tree (dfa, old_tree, tree, CONCAT);
5115 parse_dup_op_espace:
5122 #define BRACKET_NAME_BUF_SIZE 32 5134 # ifdef RE_ENABLE_I18N 5135 build_range_exp (bitset_t sbcset, re_charset_t *mbcset,
int *range_alloc,
5136 bracket_elem_t *start_elem, bracket_elem_t *end_elem)
5138 build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem,
5139 bracket_elem_t *end_elem)
5142 unsigned int start_ch, end_ch;
5144 if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
5145 || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
5151 if (BE ((start_elem->type == COLL_SYM
5152 && strlen ((
char *) start_elem->opr.name) > 1)
5153 || (end_elem->type == COLL_SYM
5154 && strlen ((
char *) end_elem->opr.name) > 1), 0))
5157 # ifdef RE_ENABLE_I18N 5162 wchar_t cmp_buf[6] = {
L'\0',
L'\0', L
'\0', L
'\0', L
'\0', L
'\0'};
5164 start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
5165 : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
5167 end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
5168 : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
5170 start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
5171 ? __btowc (start_ch) : start_elem->opr.wch);
5172 end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
5173 ? __btowc (end_ch) : end_elem->opr.wch);
5174 if (start_wc == WEOF || end_wc == WEOF)
5176 cmp_buf[0] = start_wc;
5177 cmp_buf[4] = end_wc;
5178 if (wcscoll (cmp_buf, cmp_buf + 4) > 0)
5189 if (BE (*range_alloc == mbcset->nranges, 0))
5192 wchar_t *new_array_start, *new_array_end;
5196 new_nranges = 2 * mbcset->nranges + 1;
5199 new_array_start = re_realloc (mbcset->range_starts,
wchar_t,
5201 new_array_end = re_realloc (mbcset->range_ends,
wchar_t,
5204 if (BE (new_array_start ==
NULL || new_array_end ==
NULL, 0))
5207 mbcset->range_starts = new_array_start;
5208 mbcset->range_ends = new_array_end;
5209 *range_alloc = new_nranges;
5212 mbcset->range_starts[mbcset->nranges] = start_wc;
5213 mbcset->range_ends[mbcset->nranges++] = end_wc;
5217 for (wc = 0; wc < SBC_MAX; ++wc)
5220 if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
5221 && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
5222 bitset_set (sbcset, wc);
5228 start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
5229 : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
5231 end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
5232 : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
5234 if (start_ch > end_ch)
5237 for (ch = 0; ch < SBC_MAX; ++ch)
5238 if (start_ch <= ch && ch <= end_ch)
5239 bitset_set (sbcset, ch);
5255 # ifdef RE_ENABLE_I18N 5256 build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
5257 int *coll_sym_alloc,
const unsigned char *
name)
5259 build_collating_symbol (bitset_t sbcset,
const unsigned char *
name)
5262 size_t name_len = strlen ((
const char *)
name);
5263 if (BE (name_len != 1, 0))
5267 bitset_set (sbcset, name[0]);
5277 parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
5281 const unsigned char *collseqmb;
5282 const char *collseqwc;
5286 const unsigned char *extra;
5293 __attribute ((always_inline))
5294 seek_collating_symbol_entry (
name, name_len)
5295 const unsigned char *
name;
5298 int32_t hash = elem_hash ((
const char *) name, name_len);
5299 int32_t elem = hash % table_size;
5300 if (symb_table[2 * elem] != 0)
5302 int32_t second = hash % (table_size - 2) + 1;
5307 if (symb_table[2 * elem] == hash
5309 && name_len == extra[symb_table[2 * elem + 1]]
5311 && memcmp (name, &extra[symb_table[2 * elem + 1] + 1],
5321 while (symb_table[2 * elem] != 0);
5330 auto inline unsigned int 5331 __attribute ((always_inline))
5332 lookup_collation_sequence_value (br_elem)
5333 bracket_elem_t *br_elem;
5335 if (br_elem->type == SB_CHAR)
5341 return collseqmb[br_elem->opr.ch];
5344 wint_t wc = __btowc (br_elem->opr.ch);
5345 return __collseq_table_lookup (collseqwc, wc);
5348 else if (br_elem->type == MB_CHAR)
5350 return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
5352 else if (br_elem->type == COLL_SYM)
5354 size_t sym_name_len = strlen ((
char *) br_elem->opr.name);
5358 elem = seek_collating_symbol_entry (br_elem->opr.name,
5360 if (symb_table[2 * elem] != 0)
5363 idx = symb_table[2 * elem + 1];
5365 idx += 1 + extra[idx];
5367 idx += 1 + extra[idx];
5369 idx = (idx + 3) & ~3;
5371 idx +=
sizeof (
unsigned int);
5373 idx +=
sizeof (
unsigned int) *
5374 (1 + *(
unsigned int *) (extra + idx));
5376 return *(
unsigned int *) (extra + idx);
5378 else if (symb_table[2 * elem] == 0 && sym_name_len == 1)
5382 return collseqmb[br_elem->opr.name[0]];
5385 else if (sym_name_len == 1)
5386 return collseqmb[br_elem->opr.name[0]];
5399 __attribute ((always_inline))
5400 build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
5401 re_charset_t *mbcset;
5404 bracket_elem_t *start_elem, *end_elem;
5412 if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS
5413 || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,
5417 start_collseq = lookup_collation_sequence_value (start_elem);
5418 end_collseq = lookup_collation_sequence_value (end_elem);
5420 if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0))
5429 if (nrules > 0 || dfa->mb_cur_max > 1)
5432 if (BE (*range_alloc == mbcset->nranges, 0))
5440 new_nranges = 2 * mbcset->nranges + 1;
5441 new_array_start = re_realloc (mbcset->range_starts,
uint32_t,
5443 new_array_end = re_realloc (mbcset->range_ends,
uint32_t,
5446 if (BE (new_array_start ==
NULL || new_array_end ==
NULL, 0))
5449 mbcset->range_starts = new_array_start;
5450 mbcset->range_ends = new_array_end;
5451 *range_alloc = new_nranges;
5454 mbcset->range_starts[mbcset->nranges] = start_collseq;
5455 mbcset->range_ends[mbcset->nranges++] = end_collseq;
5459 for (ch = 0; ch < SBC_MAX; ch++)
5466 ch_collseq = collseqmb[ch];
5468 ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
5469 if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
5470 bitset_set (sbcset, ch);
5482 __attribute ((always_inline))
5483 build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
5484 re_charset_t *mbcset;
5485 int *coll_sym_alloc;
5487 const unsigned char *
name;
5490 size_t name_len = strlen ((
const char *) name);
5493 elem = seek_collating_symbol_entry (name, name_len);
5494 if (symb_table[2 * elem] != 0)
5497 idx = symb_table[2 * elem + 1];
5499 idx += 1 + extra[idx];
5501 else if (symb_table[2 * elem] == 0 && name_len == 1)
5505 bitset_set (sbcset, name[0]);
5513 if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0))
5517 int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
5521 new_coll_sym_alloc);
5522 if (BE (new_coll_syms ==
NULL, 0))
5524 mbcset->coll_syms = new_coll_syms;
5525 *coll_sym_alloc = new_coll_sym_alloc;
5527 mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
5532 if (BE (name_len != 1, 0))
5536 bitset_set (sbcset, name[0]);
5543 re_token_t br_token;
5544 re_bitset_ptr_t sbcset;
5545 #ifdef RE_ENABLE_I18N 5546 re_charset_t *mbcset;
5547 int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;
5548 int equiv_class_alloc = 0, char_class_alloc = 0;
5551 bin_tree_t *work_tree;
5553 int first_round = 1;
5555 collseqmb = (
const unsigned char *)
5556 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
5557 nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
5563 collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
5564 table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
5565 symb_table = (
const int32_t *) _NL_CURRENT (LC_COLLATE,
5566 _NL_COLLATE_SYMB_TABLEMB);
5567 extra = (
const unsigned char *) _NL_CURRENT (LC_COLLATE,
5568 _NL_COLLATE_SYMB_EXTRAMB);
5571 sbcset = (re_bitset_ptr_t) calloc (
sizeof (bitset_t), 1);
5572 #ifdef RE_ENABLE_I18N 5573 mbcset = (re_charset_t *) calloc (
sizeof (re_charset_t), 1);
5575 #ifdef RE_ENABLE_I18N 5576 if (BE (sbcset ==
NULL || mbcset ==
NULL, 0))
5578 if (BE (sbcset ==
NULL, 0))
5585 token_len = peek_token_bracket (token, regexp,
syntax);
5586 if (BE (token->type == END_OF_RE, 0))
5589 goto parse_bracket_exp_free_return;
5591 if (token->type == OP_NON_MATCH_LIST)
5593 #ifdef RE_ENABLE_I18N 5594 mbcset->non_match = 1;
5598 bitset_set (sbcset,
'\0');
5599 re_string_skip_bytes (regexp, token_len);
5600 token_len = peek_token_bracket (token, regexp,
syntax);
5601 if (BE (token->type == END_OF_RE, 0))
5604 goto parse_bracket_exp_free_return;
5609 if (token->type == OP_CLOSE_BRACKET)
5610 token->type = CHARACTER;
5614 bracket_elem_t start_elem, end_elem;
5615 unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE];
5616 unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE];
5618 int token_len2 = 0, is_range_exp = 0;
5621 start_elem.opr.name = start_name_buf;
5622 ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
5627 goto parse_bracket_exp_free_return;
5632 token_len = peek_token_bracket (token, regexp,
syntax);
5635 if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS)
5637 if (BE (token->type == END_OF_RE, 0))
5640 goto parse_bracket_exp_free_return;
5642 if (token->type == OP_CHARSET_RANGE)
5644 re_string_skip_bytes (regexp, token_len);
5645 token_len2 = peek_token_bracket (&token2, regexp,
syntax);
5646 if (BE (token2.type == END_OF_RE, 0))
5649 goto parse_bracket_exp_free_return;
5651 if (token2.type == OP_CLOSE_BRACKET)
5654 re_string_skip_bytes (regexp, -token_len);
5655 token->type = CHARACTER;
5662 if (is_range_exp == 1)
5664 end_elem.opr.name = end_name_buf;
5665 ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2,
5670 goto parse_bracket_exp_free_return;
5673 token_len = peek_token_bracket (token, regexp,
syntax);
5676 *err = build_range_exp (sbcset, mbcset, &range_alloc,
5677 &start_elem, &end_elem);
5679 # ifdef RE_ENABLE_I18N 5680 *err = build_range_exp (sbcset,
5681 dfa->mb_cur_max > 1 ? mbcset :
NULL,
5682 &range_alloc, &start_elem, &end_elem);
5684 *err = build_range_exp (sbcset, &start_elem, &end_elem);
5688 goto parse_bracket_exp_free_return;
5692 switch (start_elem.type)
5695 bitset_set (sbcset, start_elem.opr.ch);
5697 #ifdef RE_ENABLE_I18N 5700 if (BE (mbchar_alloc == mbcset->nmbchars, 0))
5702 wchar_t *new_mbchars;
5705 mbchar_alloc = 2 * mbcset->nmbchars + 1;
5707 new_mbchars = re_realloc (mbcset->mbchars,
wchar_t,
5709 if (BE (new_mbchars ==
NULL, 0))
5710 goto parse_bracket_exp_espace;
5711 mbcset->mbchars = new_mbchars;
5713 mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
5717 *err = build_equiv_class (sbcset,
5718 #ifdef RE_ENABLE_I18N
5719 mbcset, &equiv_class_alloc,
5721 start_elem.opr.name);
5723 goto parse_bracket_exp_free_return;
5726 *err = build_collating_symbol (sbcset,
5727 #ifdef RE_ENABLE_I18N
5728 mbcset, &coll_sym_alloc,
5730 start_elem.opr.name);
5732 goto parse_bracket_exp_free_return;
5735 *err = build_charclass (regexp->trans, sbcset,
5736 #ifdef RE_ENABLE_I18N
5737 mbcset, &char_class_alloc,
5739 start_elem.opr.name,
syntax);
5741 goto parse_bracket_exp_free_return;
5748 if (BE (token->type == END_OF_RE, 0))
5751 goto parse_bracket_exp_free_return;
5753 if (token->type == OP_CLOSE_BRACKET)
5757 re_string_skip_bytes (regexp, token_len);
5761 bitset_not (sbcset);
5763 #ifdef RE_ENABLE_I18N 5765 if (dfa->mb_cur_max > 1)
5766 bitset_mask (sbcset, dfa->sb_char);
5768 if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes
5769 || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes
5770 || mbcset->non_match)))
5772 bin_tree_t *mbc_tree;
5775 dfa->has_mb_node = 1;
5776 br_token.type = COMPLEX_BRACKET;
5777 br_token.opr.mbcset = mbcset;
5778 mbc_tree = create_token_tree (dfa,
NULL,
NULL, &br_token);
5779 if (BE (mbc_tree ==
NULL, 0))
5780 goto parse_bracket_exp_espace;
5781 for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx)
5782 if (sbcset[sbc_idx])
5786 if (sbc_idx < BITSET_WORDS)
5789 br_token.type = SIMPLE_BRACKET;
5790 br_token.opr.sbcset = sbcset;
5791 work_tree = create_token_tree (dfa,
NULL,
NULL, &br_token);
5792 if (BE (work_tree ==
NULL, 0))
5793 goto parse_bracket_exp_espace;
5796 work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
5797 if (BE (work_tree ==
NULL, 0))
5798 goto parse_bracket_exp_espace;
5803 work_tree = mbc_tree;
5809 #ifdef RE_ENABLE_I18N 5810 free_charset (mbcset);
5813 br_token.type = SIMPLE_BRACKET;
5814 br_token.opr.sbcset = sbcset;
5815 work_tree = create_token_tree (dfa,
NULL,
NULL, &br_token);
5816 if (BE (work_tree ==
NULL, 0))
5817 goto parse_bracket_exp_espace;
5821 parse_bracket_exp_espace:
5823 parse_bracket_exp_free_return:
5825 #ifdef RE_ENABLE_I18N 5826 free_charset (mbcset);
5834 parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp,
5835 re_token_t *token,
int token_len, re_dfa_t *dfa,
5838 #ifdef RE_ENABLE_I18N 5840 cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp));
5841 if (cur_char_size > 1)
5843 elem->type = MB_CHAR;
5844 elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp));
5845 re_string_skip_bytes (regexp, cur_char_size);
5849 re_string_skip_bytes (regexp, token_len);
5850 if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS
5851 || token->type == OP_OPEN_EQUIV_CLASS)
5852 return parse_bracket_symbol (elem, regexp, token);
5853 if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen)
5858 (void) peek_token_bracket (&token2, regexp,
syntax);
5859 if (token2.type != OP_CLOSE_BRACKET)
5864 elem->type = SB_CHAR;
5865 elem->opr.ch = token->opr.c;
5874 parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp,
5877 unsigned char ch, delim = token->opr.c;
5879 if (re_string_eoi(regexp))
5883 if (i >= BRACKET_NAME_BUF_SIZE)
5885 if (token->type == OP_OPEN_CHAR_CLASS)
5886 ch = re_string_fetch_byte_case (regexp);
5888 ch = re_string_fetch_byte (regexp);
5889 if (re_string_eoi(regexp))
5891 if (ch == delim && re_string_peek_byte (regexp, 0) ==
']')
5893 elem->opr.name[
i] = ch;
5895 re_string_skip_bytes (regexp, 1);
5896 elem->opr.name[
i] =
'\0';
5897 switch (token->type)
5899 case OP_OPEN_COLL_ELEM:
5900 elem->type = COLL_SYM;
5902 case OP_OPEN_EQUIV_CLASS:
5903 elem->type = EQUIV_CLASS;
5905 case OP_OPEN_CHAR_CLASS:
5906 elem->type = CHAR_CLASS;
5921 #ifdef RE_ENABLE_I18N 5922 build_equiv_class (bitset_t sbcset, re_charset_t *mbcset,
5923 int *equiv_class_alloc,
const unsigned char *
name)
5925 build_equiv_class (bitset_t sbcset,
const unsigned char *
name)
5929 uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
5933 const unsigned char *weights, *extra, *cp;
5934 unsigned char char_buf[2];
5939 # include <locale/weight.h> 5942 table = (
const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
5943 weights = (
const unsigned char *) _NL_CURRENT (LC_COLLATE,
5944 _NL_COLLATE_WEIGHTMB);
5945 extra = (
const unsigned char *) _NL_CURRENT (LC_COLLATE,
5946 _NL_COLLATE_EXTRAMB);
5947 indirect = (
const int32_t *) _NL_CURRENT (LC_COLLATE,
5948 _NL_COLLATE_INDIRECTMB);
5949 idx1 = findidx (&cp);
5950 if (BE (idx1 == 0 || cp <
name + strlen ((
const char *)
name), 0))
5955 char_buf[1] = (
unsigned char)
'\0';
5956 len = weights[idx1];
5957 for (ch = 0; ch < SBC_MAX; ++ch)
5961 idx2 = findidx (&cp);
5968 if (len == weights[idx2])
5971 while (cnt <= len &&
5972 weights[idx1 + 1 + cnt] == weights[idx2 + 1 + cnt])
5976 bitset_set (sbcset, ch);
5980 if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0))
5984 int new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
5986 int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes,
5988 new_equiv_class_alloc);
5989 if (BE (new_equiv_classes ==
NULL, 0))
5991 mbcset->equiv_classes = new_equiv_classes;
5992 *equiv_class_alloc = new_equiv_class_alloc;
5994 mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
5999 if (BE (strlen ((
const char *)
name) != 1, 0))
6001 bitset_set (sbcset, *name);
6013 #ifdef RE_ENABLE_I18N 6015 re_charset_t *mbcset,
int *char_class_alloc,
6023 const char *
name = (
const char *) class_name;
6028 && (strcmp (name,
"upper") == 0 || strcmp (name,
"lower") == 0))
6031 #ifdef RE_ENABLE_I18N 6033 if (BE (*char_class_alloc == mbcset->nchar_classes, 0))
6037 int new_char_class_alloc = 2 * mbcset->nchar_classes + 1;
6039 wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t,
6040 new_char_class_alloc);
6041 if (BE (new_char_classes ==
NULL, 0))
6043 mbcset->char_classes = new_char_classes;
6044 *char_class_alloc = new_char_class_alloc;
6046 mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);
6049 #define BUILD_CHARCLASS_LOOP(ctype_func) \ 6051 if (BE (trans != NULL, 0)) \ 6053 for (i = 0; i < SBC_MAX; ++i) \ 6054 if (ctype_func (i)) \ 6055 bitset_set (sbcset, trans[i]); \ 6059 for (i = 0; i < SBC_MAX; ++i) \ 6060 if (ctype_func (i)) \ 6061 bitset_set (sbcset, i); \ 6065 if (strcmp (name,
"alnum") == 0)
6066 BUILD_CHARCLASS_LOOP (isalnum);
6067 else if (strcmp (name,
"cntrl") == 0)
6068 BUILD_CHARCLASS_LOOP (iscntrl);
6069 else if (strcmp (name,
"lower") == 0)
6070 BUILD_CHARCLASS_LOOP (islower);
6071 else if (strcmp (name,
"space") == 0)
6072 BUILD_CHARCLASS_LOOP (isspace);
6073 else if (strcmp (name,
"alpha") == 0)
6074 BUILD_CHARCLASS_LOOP (isalpha);
6075 else if (strcmp (name,
"digit") == 0)
6076 BUILD_CHARCLASS_LOOP (isdigit);
6077 else if (strcmp (name,
"print") == 0)
6078 BUILD_CHARCLASS_LOOP (isprint);
6079 else if (strcmp (name,
"upper") == 0)
6080 BUILD_CHARCLASS_LOOP (isupper);
6081 else if (strcmp (name,
"blank") == 0)
6082 BUILD_CHARCLASS_LOOP (isblank);
6083 else if (strcmp (name,
"graph") == 0)
6084 BUILD_CHARCLASS_LOOP (isgraph);
6085 else if (strcmp (name,
"punct") == 0)
6086 BUILD_CHARCLASS_LOOP (ispunct);
6087 else if (strcmp (name,
"xdigit") == 0)
6088 BUILD_CHARCLASS_LOOP (isxdigit);
6097 const unsigned char *class_name,
6098 const unsigned char *extra,
int non_match,
6101 re_bitset_ptr_t sbcset;
6102 #ifdef RE_ENABLE_I18N 6103 re_charset_t *mbcset;
6107 re_token_t br_token;
6110 sbcset = (re_bitset_ptr_t) calloc (
sizeof (bitset_t), 1);
6111 #ifdef RE_ENABLE_I18N 6112 mbcset = (re_charset_t *) calloc (
sizeof (re_charset_t), 1);
6115 #ifdef RE_ENABLE_I18N 6116 if (BE (sbcset ==
NULL || mbcset ==
NULL, 0))
6118 if (BE (sbcset ==
NULL, 0))
6127 #ifdef RE_ENABLE_I18N 6132 mbcset->non_match = 1;
6137 ret = build_charclass (
trans, sbcset,
6138 #ifdef RE_ENABLE_I18N
6146 #ifdef RE_ENABLE_I18N 6147 free_charset (mbcset);
6153 for (; *extra; extra++)
6154 bitset_set (sbcset, *extra);
6158 bitset_not (sbcset);
6160 #ifdef RE_ENABLE_I18N 6162 if (dfa->mb_cur_max > 1)
6163 bitset_mask (sbcset, dfa->sb_char);
6167 br_token.type = SIMPLE_BRACKET;
6168 br_token.opr.sbcset = sbcset;
6169 tree = create_token_tree (dfa,
NULL,
NULL, &br_token);
6170 if (BE (tree ==
NULL, 0))
6171 goto build_word_op_espace;
6173 #ifdef RE_ENABLE_I18N 6174 if (dfa->mb_cur_max > 1)
6176 bin_tree_t *mbc_tree;
6178 br_token.type = COMPLEX_BRACKET;
6179 br_token.opr.mbcset = mbcset;
6180 dfa->has_mb_node = 1;
6181 mbc_tree = create_token_tree (dfa,
NULL,
NULL, &br_token);
6182 if (BE (mbc_tree ==
NULL, 0))
6183 goto build_word_op_espace;
6185 tree = create_tree (dfa, tree, mbc_tree, OP_ALT);
6186 if (BE (mbc_tree !=
NULL, 1))
6191 free_charset (mbcset);
6198 build_word_op_espace:
6200 #ifdef RE_ENABLE_I18N 6201 free_charset (mbcset);
6219 fetch_token (token, input, syntax);
6221 if (BE (token->type == END_OF_RE, 0))
6223 if (token->type == OP_CLOSE_DUP_NUM || c ==
',')
6225 num = ((token->type != CHARACTER || c <
'0' ||
'9' < c || num == -2)
6226 ? -2 : ((num == -1) ? c -
'0' : num * 10 + c -
'0'));
6232 #ifdef RE_ENABLE_I18N 6234 free_charset (re_charset_t *cset)
6236 re_free (cset->mbchars);
6238 re_free (cset->coll_syms);
6239 re_free (cset->equiv_classes);
6240 re_free (cset->range_starts);
6241 re_free (cset->range_ends);
6243 re_free (cset->char_classes);
6253 create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
6254 re_token_type_t
type)
6258 return create_token_tree (dfa, left, right, &t);
6262 create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
6263 const re_token_t *token)
6266 if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0))
6268 bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1);
6270 if (storage ==
NULL)
6272 storage->next = dfa->str_tree_storage;
6273 dfa->str_tree_storage = storage;
6274 dfa->str_tree_storage_idx = 0;
6276 tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++];
6278 tree->parent =
NULL;
6280 tree->right =
right;
6281 tree->token = *token;
6282 tree->token.duplicated = 0;
6283 tree->token.opt_subexp = 0;
6286 tree->node_idx = -1;
6289 left->parent = tree;
6291 right->parent = tree;
6299 mark_opt_subexp (
void *extra, bin_tree_t *node)
6301 int idx = (
int) (
long) extra;
6302 if (node->token.type == SUBEXP && node->token.opr.idx == idx)
6303 node->token.opt_subexp = 1;
6311 free_token (re_token_t *node)
6313 #ifdef RE_ENABLE_I18N 6314 if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
6315 free_charset (node->opr.mbcset);
6318 if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
6319 re_free (node->opr.sbcset);
6326 free_tree (
void *extra, bin_tree_t *node)
6328 free_token (&node->token);
6339 duplicate_tree (
const bin_tree_t *root, re_dfa_t *dfa)
6341 const bin_tree_t *node;
6342 bin_tree_t *dup_root;
6343 bin_tree_t **p_new = &dup_root, *dup_node = root->parent;
6345 for (node = root; ; )
6348 *p_new = create_token_tree (dfa,
NULL,
NULL, &node->token);
6351 (*p_new)->parent = dup_node;
6352 (*p_new)->token.duplicated = 1;
6359 p_new = &dup_node->left;
6363 const bin_tree_t *prev =
NULL;
6364 while (node->right == prev || node->right ==
NULL)
6367 node = node->parent;
6368 dup_node = dup_node->parent;
6373 p_new = &dup_node->right;
6385 static reg_errcode_t match_ctx_init (re_match_context_t *cache,
int eflags,
6386 int n) internal_function;
6387 static void match_ctx_clean (re_match_context_t *mctx) internal_function;
6388 static void match_ctx_free (re_match_context_t *cache) internal_function;
6389 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache,
int node,
6390 int str_idx,
int from,
int to)
6392 static int search_cur_bkref_entry (
const re_match_context_t *mctx,
int str_idx)
6394 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx,
int node,
6395 int str_idx) internal_function;
6396 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
6397 int node,
int str_idx)
6399 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
6400 re_dfastate_t **limited_sts,
int last_node,
6404 const char *
string,
int length,
6405 int start,
int range,
int stop,
6407 int eflags) internal_function;
6409 const char *string1,
int length1,
6410 const char *string2,
int length2,
6412 int stop,
int ret_len) internal_function;
6414 const char *
string,
int length,
int start,
6416 int ret_len) internal_function;
6419 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
6421 static int check_matching (re_match_context_t *mctx,
int fl_longest_match,
6422 int *p_match_first) internal_function;
6423 static int check_halt_state_context (
const re_match_context_t *mctx,
6424 const re_dfastate_t *state,
int idx)
6426 static void update_regs (
const re_dfa_t *dfa,
regmatch_t *pmatch,
6428 int cur_idx,
int nmatch) internal_function;
6429 static reg_errcode_t push_fail_stack (
struct re_fail_stack_t *fs,
6430 int str_idx,
int dest_node,
int nregs,
6432 re_node_set *eps_via_nodes)
6435 const re_match_context_t *mctx,
6437 int fl_backtrack) internal_function;
6438 static reg_errcode_t free_fail_stack_return (
struct re_fail_stack_t *fs)
6441 #ifdef RE_ENABLE_I18N 6442 static int sift_states_iter_mb (
const re_match_context_t *mctx,
6443 re_sift_context_t *sctx,
6444 int node_idx,
int str_idx,
int max_str_idx)
6447 static reg_errcode_t sift_states_backward (
const re_match_context_t *mctx,
6448 re_sift_context_t *sctx)
6450 static reg_errcode_t build_sifted_states (
const re_match_context_t *mctx,
6451 re_sift_context_t *sctx,
int str_idx,
6452 re_node_set *cur_dest)
6454 static reg_errcode_t update_cur_sifted_state (
const re_match_context_t *mctx,
6455 re_sift_context_t *sctx,
6457 re_node_set *dest_nodes)
6459 static reg_errcode_t add_epsilon_src_nodes (
const re_dfa_t *dfa,
6460 re_node_set *dest_nodes,
6461 const re_node_set *candidates)
6463 static int check_dst_limits (
const re_match_context_t *mctx,
6464 re_node_set *limits,
6465 int dst_node,
int dst_idx,
int src_node,
6466 int src_idx) internal_function;
6467 static int check_dst_limits_calc_pos_1 (
const re_match_context_t *mctx,
6468 int boundaries,
int subexp_idx,
6469 int from_node,
int bkref_idx)
6471 static int check_dst_limits_calc_pos (
const re_match_context_t *mctx,
6472 int limit,
int subexp_idx,
6473 int node,
int str_idx,
6474 int bkref_idx) internal_function;
6475 static reg_errcode_t check_subexp_limits (
const re_dfa_t *dfa,
6476 re_node_set *dest_nodes,
6477 const re_node_set *candidates,
6478 re_node_set *limits,
6479 struct re_backref_cache_entry *bkref_ents,
6480 int str_idx) internal_function;
6481 static reg_errcode_t sift_states_bkref (
const re_match_context_t *mctx,
6482 re_sift_context_t *sctx,
6483 int str_idx,
const re_node_set *candidates)
6485 static reg_errcode_t merge_state_array (
const re_dfa_t *dfa,
6486 re_dfastate_t **dst,
6487 re_dfastate_t **src,
int num)
6489 static re_dfastate_t *find_recover_state (
reg_errcode_t *err,
6490 re_match_context_t *mctx) internal_function;
6492 re_match_context_t *mctx,
6493 re_dfastate_t *state) internal_function;
6494 static re_dfastate_t *merge_state_with_log (
reg_errcode_t *err,
6495 re_match_context_t *mctx,
6496 re_dfastate_t *next_state)
6498 static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
6499 re_node_set *cur_nodes,
6500 int str_idx) internal_function;
6503 re_match_context_t *mctx,
6504 re_dfastate_t *pstate)
6507 #ifdef RE_ENABLE_I18N 6508 static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
6509 re_dfastate_t *pstate)
6512 static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
6513 const re_node_set *
nodes)
6516 int bkref_node,
int bkref_str_idx)
6518 static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
6519 const re_sub_match_top_t *sub_top,
6520 re_sub_match_last_t *sub_last,
6521 int bkref_node,
int bkref_str)
6523 static int find_subexp_node (
const re_dfa_t *dfa,
const re_node_set *
nodes,
6524 int subexp_idx,
int type) internal_function;
6525 static reg_errcode_t check_arrival (re_match_context_t *mctx,
6526 state_array_t *
path,
int top_node,
6527 int top_str,
int last_node,
int last_str,
6528 int type) internal_function;
6529 static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
6531 re_node_set *cur_nodes,
6532 re_node_set *next_nodes)
6534 static reg_errcode_t check_arrival_expand_ecl (
const re_dfa_t *dfa,
6535 re_node_set *cur_nodes,
6536 int ex_subexp,
int type)
6538 static reg_errcode_t check_arrival_expand_ecl_sub (
const re_dfa_t *dfa,
6539 re_node_set *dst_nodes,
6540 int target,
int ex_subexp,
6541 int type) internal_function;
6542 static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
6543 re_node_set *cur_nodes,
int cur_str,
6544 int subexp_num,
int type)
6546 static int build_trtable (
const re_dfa_t *dfa,
6547 re_dfastate_t *state) internal_function;
6548 #ifdef RE_ENABLE_I18N 6549 static int check_node_accept_bytes (
const re_dfa_t *dfa,
int node_idx,
6550 const re_string_t *input,
int idx)
6553 static unsigned int find_collation_sequence_value (
const unsigned char *mbs,
6558 static int group_nodes_into_DFAstates (
const re_dfa_t *dfa,
6559 const re_dfastate_t *state,
6560 re_node_set *states_node,
6561 bitset_t *states_ch) internal_function;
6562 static int check_node_accept (
const re_match_context_t *mctx,
6563 const re_token_t *node,
int idx)
6565 static reg_errcode_t extend_buffers (re_match_context_t *mctx)
6585 regexec (preg,
string, nmatch, pmatch, eflags)
6594 re_dfa_t *dfa = (re_dfa_t *) preg->
buffer;
6601 start = pmatch[0].
rm_so;
6602 length = pmatch[0].
rm_eo;
6607 length = strlen (
string);
6610 __libc_lock_lock (dfa->lock);
6612 err = re_search_internal (preg,
string, length, start, length - start,
6613 length, 0,
NULL, eflags);
6615 err = re_search_internal (preg,
string, length, start, length - start,
6616 length, nmatch, pmatch, eflags);
6617 __libc_lock_unlock (dfa->lock);
6622 # include <shlib-compat.h> 6623 versioned_symbol (libc, __regexec,
regexec, GLIBC_2_3_4);
6625 # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) 6626 __typeof__ (__regexec) __compat_regexec;
6629 attribute_compat_text_section
6631 const char *
__restrict string,
size_t nmatch,
6634 return regexec (preg,
string, nmatch, pmatch,
6637 compat_symbol (libc, __compat_regexec,
regexec, GLIBC_2_0);
6671 re_match (bufp,
string, length, start, regs)
6677 return re_search_stub (bufp,
string, length,
start, 0, length, regs, 1);
6687 int length, start,
range;
6690 return re_search_stub (bufp,
string, length,
start,
range, length, regs, 0);
6697 re_match_2 (bufp, string1, length1, string2, length2,
start, regs, stop)
6699 const char *string1, *string2;
6700 int length1, length2, start, stop;
6703 return re_search_2_stub (bufp, string1, length1, string2, length2,
6704 start, 0, regs, stop, 1);
6713 const char *string1, *string2;
6714 int length1, length2, start,
range, stop;
6717 return re_search_2_stub (bufp, string1, length1, string2, length2,
6725 re_search_2_stub (bufp, string1, length1, string2, length2,
start,
range, regs,
6728 const char *string1, *string2;
6729 int length1, length2, start,
range, stop, ret_len;
6734 int len = length1 + length2;
6737 if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
6744 char *
s = re_malloc (
char, len);
6746 if (BE (s ==
NULL, 0))
6749 memcpy (__mempcpy (s, string1, length1), string2, length2);
6751 memcpy (s, string1, length1);
6752 memcpy (s + length1, string2, length2);
6762 rval = re_search_stub (bufp, str, len,
start,
range, stop, regs,
6765 re_free ((
char *) str);
6775 re_search_stub (bufp,
string, length,
start,
range, stop, regs, ret_len)
6778 int length, start,
range, stop, ret_len;
6785 re_dfa_t *dfa = (re_dfa_t *) bufp->
buffer;
6788 if (BE (start < 0 || start > length, 0))
6795 __libc_lock_lock (dfa->lock);
6804 if (BE (bufp->
no_sub, 0))
6814 if (BE (nregs < 1, 0))
6824 if (BE (pmatch ==
NULL, 0))
6830 result = re_search_internal (bufp,
string, length,
start,
range, stop,
6831 nregs, pmatch, eflags);
6838 else if (regs !=
NULL)
6847 if (BE (rval == 0, 1))
6851 assert (pmatch[0].rm_so ==
start);
6855 rval = pmatch[0].
rm_so;
6859 __libc_lock_unlock (dfa->lock);
6864 re_copy_regs (regs, pmatch, nregs, regs_allocated)
6867 int nregs, regs_allocated;
6871 int need_regs = nregs + 1;
6888 if (BE (need_regs > regs->
num_regs, 0))
6892 if (BE (new_start ==
NULL, 0) || BE (new_end ==
NULL, 0))
6894 regs->
start = new_start;
6895 regs->
end = new_end;
6908 for (i = 0; i < nregs; ++
i)
6914 regs->
start[i] = regs->
end[i] = -1;
6943 regs->
start = starts;
6960 #if defined _REGEX_RE_COMP || defined _LIBC 6984 re_search_internal (preg,
string, length,
start,
range, stop, nmatch, pmatch,
6993 const re_dfa_t *dfa = (
const re_dfa_t *) preg->
buffer;
6994 int left_lim, right_lim, incr;
6995 int fl_longest_match, match_first, match_kind, match_last = -1;
6998 #
if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901
L)
6999 re_match_context_t mctx = { .dfa = dfa };
7001 re_match_context_t mctx;
7007 #
if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901
L))
7008 memset (&mctx,
'\0',
sizeof (re_match_context_t));
7012 extra_nmatch = (nmatch > preg->
re_nsub) ? nmatch - (preg->
re_nsub + 1) : 0;
7013 nmatch -= extra_nmatch;
7016 if (BE (preg->
used == 0 || dfa->init_state ==
NULL 7017 || dfa->init_state_word ==
NULL || dfa->init_state_nl ==
NULL 7018 || dfa->init_state_begbuf ==
NULL, 0))
7029 if (dfa->init_state->nodes.nelem == 0
7030 && dfa->init_state_word->nodes.nelem == 0
7031 && (dfa->init_state_nl->nodes.nelem == 0
7040 fl_longest_match = (nmatch != 0 || dfa->nbackref);
7042 err = re_string_allocate (&mctx.input,
string, length, dfa->nodes_len + 1,
7046 mctx.input.stop = stop;
7047 mctx.input.raw_stop = stop;
7050 err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
7058 if (nmatch > 1 || dfa->has_mb_node)
7060 mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
7061 if (BE (mctx.state_log ==
NULL, 0))
7068 mctx.state_log =
NULL;
7070 match_first =
start;
7071 mctx.input.tip_context = (eflags &
REG_NOTBOL) ? CONTEXT_BEGBUF
7072 : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
7075 incr = (
range < 0) ? -1 : 1;
7078 sb = dfa->mb_cur_max == 1;
7082 | (
range >= 0 ? 2 : 0)
7083 | (
t !=
NULL ? 1 : 0))
7086 for (;; match_first += incr)
7089 if (match_first < left_lim || right_lim < match_first)
7105 while (BE (match_first < right_lim, 1)
7106 && !fastmap[
t[(
unsigned char)
string[match_first]]])
7108 goto forward_match_found_start_or_reached_end;
7112 while (BE (match_first < right_lim, 1)
7113 && !fastmap[(
unsigned char)
string[match_first]])
7116 forward_match_found_start_or_reached_end:
7117 if (BE (match_first == right_lim, 0))
7119 ch = match_first >= length
7120 ? 0 : (
unsigned char)
string[match_first];
7121 if (!fastmap[
t ?
t[ch] : ch])
7129 while (match_first >= left_lim)
7131 ch = match_first >= length
7132 ? 0 : (
unsigned char)
string[match_first];
7133 if (fastmap[
t ?
t[ch] : ch])
7137 if (match_first < left_lim)
7149 unsigned int offset = match_first - mctx.input.raw_mbs_idx;
7150 if (BE (offset >= (
unsigned int) mctx.input.valid_raw_len, 0))
7152 err = re_string_reconstruct (&mctx.input, match_first,
7157 offset = match_first - mctx.input.raw_mbs_idx;
7161 ch = (match_first >= length
7162 ? 0 : re_string_byte_at (&mctx.input, offset));
7165 match_first += incr;
7166 if (match_first < left_lim || match_first > right_lim)
7177 err = re_string_reconstruct (&mctx.input, match_first, eflags);
7181 #ifdef RE_ENABLE_I18N 7184 if (!sb && !re_string_first_byte (&mctx.input, 0))
7190 mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
7191 match_last = check_matching (&mctx, fl_longest_match,
7193 if (match_last != -1)
7195 if (BE (match_last == -2, 0))
7202 mctx.match_last = match_last;
7203 if ((!preg->
no_sub && nmatch > 1) || dfa->nbackref)
7205 re_dfastate_t *pstate = mctx.state_log[match_last];
7206 mctx.last_node = check_halt_state_context (&mctx, pstate,
7209 if ((!preg->
no_sub && nmatch > 1 && dfa->has_plural_match)
7212 err = prune_impossible_nodes (&mctx);
7224 match_ctx_clean (&mctx);
7228 assert (match_last != -1);
7238 for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
7239 pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
7242 pmatch[0].
rm_so = 0;
7243 pmatch[0].
rm_eo = mctx.match_last;
7245 if (!preg->
no_sub && nmatch > 1)
7247 err = set_regs (preg, &mctx, nmatch, pmatch,
7248 dfa->has_plural_match && dfa->nbackref > 0);
7256 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
7257 if (pmatch[reg_idx].rm_so != -1)
7259 #ifdef RE_ENABLE_I18N 7260 if (BE (mctx.input.offsets_needed != 0, 0))
7262 pmatch[reg_idx].
rm_so =
7263 (pmatch[reg_idx].
rm_so == mctx.input.valid_len
7264 ? mctx.input.valid_raw_len
7265 : mctx.input.offsets[pmatch[reg_idx].
rm_so]);
7266 pmatch[reg_idx].
rm_eo =
7267 (pmatch[reg_idx].
rm_eo == mctx.input.valid_len
7268 ? mctx.input.valid_raw_len
7269 : mctx.input.offsets[pmatch[reg_idx].
rm_eo]);
7272 assert (mctx.input.offsets_needed == 0);
7274 pmatch[reg_idx].
rm_so += match_first;
7275 pmatch[reg_idx].
rm_eo += match_first;
7277 for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
7279 pmatch[nmatch + reg_idx].
rm_so = -1;
7280 pmatch[nmatch + reg_idx].
rm_eo = -1;
7283 if (dfa->subexp_map)
7284 for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
7285 if (dfa->subexp_map[reg_idx] != reg_idx)
7287 pmatch[reg_idx + 1].
rm_so 7288 = pmatch[dfa->subexp_map[reg_idx] + 1].
rm_so;
7289 pmatch[reg_idx + 1].
rm_eo 7290 = pmatch[dfa->subexp_map[reg_idx] + 1].
rm_eo;
7295 re_free (mctx.state_log);
7297 match_ctx_free (&mctx);
7298 re_string_destruct (&mctx.input);
7303 prune_impossible_nodes (mctx)
7304 re_match_context_t *mctx;
7306 const re_dfa_t *
const dfa = mctx->dfa;
7307 int halt_node, match_last;
7309 re_dfastate_t **sifted_states;
7310 re_dfastate_t **lim_states =
NULL;
7311 re_sift_context_t sctx;
7313 assert (mctx->state_log !=
NULL);
7315 match_last = mctx->match_last;
7316 halt_node = mctx->last_node;
7317 sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
7318 if (BE (sifted_states ==
NULL, 0))
7325 lim_states = re_malloc (re_dfastate_t *, match_last + 1);
7326 if (BE (lim_states ==
NULL, 0))
7333 memset (lim_states,
'\0',
7334 sizeof (re_dfastate_t *) * (match_last + 1));
7335 sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
7337 ret = sift_states_backward (mctx, &sctx);
7338 re_node_set_free (&sctx.limits);
7341 if (sifted_states[0] !=
NULL || lim_states[0] !=
NULL)
7351 }
while (mctx->state_log[match_last] ==
NULL 7352 || !mctx->state_log[match_last]->halt);
7353 halt_node = check_halt_state_context (mctx,
7354 mctx->state_log[match_last],
7357 ret = merge_state_array (dfa, sifted_states, lim_states,
7359 re_free (lim_states);
7366 sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
7367 ret = sift_states_backward (mctx, &sctx);
7368 re_node_set_free (&sctx.limits);
7372 re_free (mctx->state_log);
7373 mctx->state_log = sifted_states;
7374 sifted_states =
NULL;
7375 mctx->last_node = halt_node;
7376 mctx->match_last = match_last;
7379 re_free (sifted_states);
7380 re_free (lim_states);
7388 static inline re_dfastate_t *
7389 __attribute ((always_inline)) internal_function
7390 acquire_init_state_context (
reg_errcode_t *err,
const re_match_context_t *mctx,
7393 const re_dfa_t *
const dfa = mctx->dfa;
7394 if (dfa->init_state->has_constraint)
7396 unsigned int context;
7397 context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
7398 if (IS_WORD_CONTEXT (context))
7399 return dfa->init_state_word;
7400 else if (IS_ORDINARY_CONTEXT (context))
7401 return dfa->init_state;
7402 else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
7403 return dfa->init_state_begbuf;
7404 else if (IS_NEWLINE_CONTEXT (context))
7405 return dfa->init_state_nl;
7406 else if (IS_BEGBUF_CONTEXT (context))
7409 return re_acquire_state_context (err, dfa,
7410 dfa->init_state->entrance_nodes,
7415 return dfa->init_state;
7418 return dfa->init_state;
7432 check_matching (re_match_context_t *mctx,
int fl_longest_match,
7435 const re_dfa_t *
const dfa = mctx->dfa;
7438 int match_last = -1;
7439 int cur_str_idx = re_string_cur_idx (&mctx->input);
7440 re_dfastate_t *cur_state;
7441 int at_init_state = p_match_first !=
NULL;
7442 int next_start_idx = cur_str_idx;
7445 cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
7447 if (BE (cur_state ==
NULL, 0))
7453 if (mctx->state_log !=
NULL)
7455 mctx->state_log[cur_str_idx] = cur_state;
7459 if (BE (dfa->nbackref, 0))
7462 err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
7466 if (cur_state->has_backref)
7468 err = transit_state_bkref (mctx, &cur_state->nodes);
7476 if (BE (cur_state->halt, 0))
7478 if (!cur_state->has_constraint
7479 || check_halt_state_context (mctx, cur_state, cur_str_idx))
7481 if (!fl_longest_match)
7485 match_last = cur_str_idx;
7491 while (!re_string_eoi (&mctx->input))
7493 re_dfastate_t *old_state = cur_state;
7494 int next_char_idx = re_string_cur_idx (&mctx->input) + 1;
7496 if (BE (next_char_idx >= mctx->input.bufs_len, 0)
7497 || (BE (next_char_idx >= mctx->input.valid_len, 0)
7498 && mctx->input.valid_len < mctx->input.len))
7500 err = extend_buffers (mctx);
7508 cur_state = transit_state (&err, mctx, cur_state);
7509 if (mctx->state_log !=
NULL)
7510 cur_state = merge_state_with_log (&err, mctx, cur_state);
7512 if (cur_state ==
NULL)
7520 if (mctx->state_log ==
NULL 7521 || (match && !fl_longest_match)
7522 || (cur_state = find_recover_state (&err, mctx)) ==
NULL)
7526 if (BE (at_init_state, 0))
7528 if (old_state == cur_state)
7529 next_start_idx = next_char_idx;
7534 if (cur_state->halt)
7538 if (!cur_state->has_constraint
7539 || check_halt_state_context (mctx, cur_state,
7540 re_string_cur_idx (&mctx->input)))
7543 match_last = re_string_cur_idx (&mctx->input);
7547 p_match_first =
NULL;
7548 if (!fl_longest_match)
7555 *p_match_first += next_start_idx;
7564 check_halt_node_context (
const re_dfa_t *dfa,
int node,
unsigned int context)
7566 re_token_type_t
type = dfa->nodes[node].type;
7567 unsigned int constraint = dfa->nodes[node].constraint;
7568 if (type != END_OF_RE)
7572 if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
7583 check_halt_state_context (
const re_match_context_t *mctx,
7584 const re_dfastate_t *state,
int idx)
7587 unsigned int context;
7589 assert (state->halt);
7591 context = re_string_context_at (&mctx->input, idx, mctx->eflags);
7592 for (i = 0; i < state->nodes.nelem; ++
i)
7593 if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
7594 return state->nodes.elems[
i];
7605 proceed_next_node (
const re_match_context_t *mctx,
int nregs,
regmatch_t *regs,
7606 int *pidx,
int node, re_node_set *eps_via_nodes,
7607 struct re_fail_stack_t *fs)
7609 const re_dfa_t *
const dfa = mctx->dfa;
7611 if (IS_EPSILON_NODE (dfa->nodes[node].type))
7613 re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
7614 re_node_set *edests = &dfa->edests[node];
7616 err = re_node_set_insert (eps_via_nodes, node);
7617 if (BE (err < 0, 0))
7620 for (dest_node = -1, i = 0; i < edests->nelem; ++
i)
7622 int candidate = edests->elems[
i];
7623 if (!re_node_set_contains (cur_nodes, candidate))
7625 if (dest_node == -1)
7626 dest_node = candidate;
7632 if (re_node_set_contains (eps_via_nodes, dest_node))
7637 && push_fail_stack (fs, *pidx, candidate, nregs, regs,
7650 re_token_type_t
type = dfa->nodes[node].type;
7652 #ifdef RE_ENABLE_I18N 7653 if (dfa->nodes[node].accept_mb)
7654 naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
7657 if (type == OP_BACK_REF)
7659 int subexp_idx = dfa->nodes[node].opr.idx + 1;
7660 naccepted = regs[subexp_idx].
rm_eo - regs[subexp_idx].
rm_so;
7663 if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
7667 char *buf = (
char *) re_string_get_buffer (&mctx->input);
7668 if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
7677 err = re_node_set_insert (eps_via_nodes, node);
7678 if (BE (err < 0, 0))
7680 dest_node = dfa->edests[node].elems[0];
7681 if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
7688 || check_node_accept (mctx, dfa->nodes + node, *pidx))
7690 int dest_node = dfa->nexts[node];
7691 *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
7692 if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] ==
NULL 7693 || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
7696 re_node_set_empty (eps_via_nodes);
7705 push_fail_stack (
struct re_fail_stack_t *fs,
int str_idx,
int dest_node,
7706 int nregs,
regmatch_t *regs, re_node_set *eps_via_nodes)
7709 int num = fs->num++;
7710 if (fs->num == fs->alloc)
7712 struct re_fail_stack_ent_t *new_array;
7713 new_array = realloc (fs->stack, (sizeof (
struct re_fail_stack_ent_t)
7715 if (new_array ==
NULL)
7718 fs->stack = new_array;
7720 fs->stack[num].idx = str_idx;
7721 fs->stack[num].node = dest_node;
7722 fs->stack[num].regs = re_malloc (
regmatch_t, nregs);
7723 if (fs->stack[num].regs ==
NULL)
7725 memcpy (fs->stack[num].regs, regs, sizeof (
regmatch_t) * nregs);
7726 err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
7732 pop_fail_stack (
struct re_fail_stack_t *fs,
int *pidx,
int nregs,
7733 regmatch_t *regs, re_node_set *eps_via_nodes)
7735 int num = --fs->num;
7737 *pidx = fs->stack[num].idx;
7738 memcpy (regs, fs->stack[num].regs, sizeof (
regmatch_t) * nregs);
7739 re_node_set_free (eps_via_nodes);
7740 re_free (fs->stack[num].regs);
7741 *eps_via_nodes = fs->stack[num].eps_via_nodes;
7742 return fs->stack[num].node;
7752 set_regs (
const regex_t *preg,
const re_match_context_t *mctx,
size_t nmatch,
7755 const re_dfa_t *dfa = (
const re_dfa_t *) preg->
buffer;
7757 re_node_set eps_via_nodes;
7758 struct re_fail_stack_t *fs;
7759 struct re_fail_stack_t fs_body = { 0, 2,
NULL };
7761 int prev_idx_match_malloced = 0;
7764 assert (nmatch > 1);
7765 assert (mctx->state_log !=
NULL);
7770 fs->stack = re_malloc (
struct re_fail_stack_ent_t, fs->alloc);
7771 if (fs->stack ==
NULL)
7777 cur_node = dfa->init_node;
7778 re_node_set_init_empty (&eps_via_nodes);
7780 if (__libc_use_alloca (nmatch *
sizeof (
regmatch_t)))
7784 prev_idx_match = re_malloc (
regmatch_t, nmatch);
7785 if (prev_idx_match ==
NULL)
7787 free_fail_stack_return (fs);
7790 prev_idx_match_malloced = 1;
7792 memcpy (prev_idx_match, pmatch,
sizeof (
regmatch_t) * nmatch);
7794 for (idx = pmatch[0].rm_so; idx <= pmatch[0].
rm_eo ;)
7796 update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
7798 if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
7803 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
7804 if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
7806 if (reg_idx == nmatch)
7808 re_node_set_free (&eps_via_nodes);
7809 if (prev_idx_match_malloced)
7810 re_free (prev_idx_match);
7811 return free_fail_stack_return (fs);
7813 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
7818 re_node_set_free (&eps_via_nodes);
7819 if (prev_idx_match_malloced)
7820 re_free (prev_idx_match);
7826 cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
7827 &eps_via_nodes, fs);
7829 if (BE (cur_node < 0, 0))
7831 if (BE (cur_node == -2, 0))
7833 re_node_set_free (&eps_via_nodes);
7834 if (prev_idx_match_malloced)
7835 re_free (prev_idx_match);
7836 free_fail_stack_return (fs);
7840 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
7844 re_node_set_free (&eps_via_nodes);
7845 if (prev_idx_match_malloced)
7846 re_free (prev_idx_match);
7851 re_node_set_free (&eps_via_nodes);
7852 if (prev_idx_match_malloced)
7853 re_free (prev_idx_match);
7854 return free_fail_stack_return (fs);
7859 free_fail_stack_return (
struct re_fail_stack_t *fs)
7864 for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
7866 re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
7867 re_free (fs->stack[fs_idx].regs);
7869 re_free (fs->stack);
7876 update_regs (
const re_dfa_t *dfa,
regmatch_t *pmatch,
7877 regmatch_t *prev_idx_match,
int cur_node,
int cur_idx,
int nmatch)
7879 int type = dfa->nodes[cur_node].type;
7880 if (type == OP_OPEN_SUBEXP)
7882 int reg_num = dfa->nodes[cur_node].opr.idx + 1;
7885 if (reg_num < nmatch)
7887 pmatch[reg_num].
rm_so = cur_idx;
7888 pmatch[reg_num].
rm_eo = -1;
7891 else if (type == OP_CLOSE_SUBEXP)
7893 int reg_num = dfa->nodes[cur_node].opr.idx + 1;
7894 if (reg_num < nmatch)
7897 if (pmatch[reg_num].rm_so < cur_idx)
7899 pmatch[reg_num].
rm_eo = cur_idx;
7902 memcpy (prev_idx_match, pmatch,
sizeof (
regmatch_t) * nmatch);
7906 if (dfa->nodes[cur_node].opt_subexp
7907 && prev_idx_match[reg_num].
rm_so != -1)
7913 memcpy (pmatch, prev_idx_match,
sizeof (
regmatch_t) * nmatch);
7917 pmatch[reg_num].
rm_eo = cur_idx;
7943 #define STATE_NODE_CONTAINS(state,node) \ 7944 ((state) != NULL && re_node_set_contains (&(state)->nodes, node)) 7948 sift_states_backward (
const re_match_context_t *mctx, re_sift_context_t *sctx)
7952 int str_idx = sctx->last_str_idx;
7953 re_node_set cur_dest;
7956 assert (mctx->state_log !=
NULL && mctx->state_log[str_idx] !=
NULL);
7961 err = re_node_set_init_1 (&cur_dest, sctx->last_node);
7964 err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
7972 null_cnt = (sctx->sifted_states[str_idx] ==
NULL) ? null_cnt + 1 : 0;
7973 if (null_cnt > mctx->max_mb_elem_len)
7975 memset (sctx->sifted_states,
'\0',
7976 sizeof (re_dfastate_t *) * str_idx);
7977 re_node_set_free (&cur_dest);
7980 re_node_set_empty (&cur_dest);
7983 if (mctx->state_log[str_idx])
7985 err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
7994 err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
8000 re_node_set_free (&cur_dest);
8006 build_sifted_states (
const re_match_context_t *mctx, re_sift_context_t *sctx,
8007 int str_idx, re_node_set *cur_dest)
8009 const re_dfa_t *
const dfa = mctx->dfa;
8010 const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
8020 for (i = 0; i < cur_src->nelem; i++)
8022 int prev_node = cur_src->elems[
i];
8027 re_token_type_t
type = dfa->nodes[prev_node].type;
8028 assert (!IS_EPSILON_NODE (type));
8030 #ifdef RE_ENABLE_I18N 8032 if (dfa->nodes[prev_node].accept_mb)
8033 naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
8034 str_idx, sctx->last_str_idx);
8040 && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
8041 && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
8042 dfa->nexts[prev_node]))
8048 if (sctx->limits.nelem)
8050 int to_idx = str_idx + naccepted;
8051 if (check_dst_limits (mctx, &sctx->limits,
8052 dfa->nexts[prev_node], to_idx,
8053 prev_node, str_idx))
8056 ret = re_node_set_insert (cur_dest, prev_node);
8057 if (BE (ret == -1, 0))
8068 clean_state_log_if_needed (re_match_context_t *mctx,
int next_state_log_idx)
8070 int top = mctx->state_log_top;
8072 if (next_state_log_idx >= mctx->input.bufs_len
8073 || (next_state_log_idx >= mctx->input.valid_len
8074 && mctx->input.valid_len < mctx->input.len))
8077 err = extend_buffers (mctx);
8082 if (top < next_state_log_idx)
8084 memset (mctx->state_log + top + 1,
'\0',
8085 sizeof (re_dfastate_t *) * (next_state_log_idx - top));
8086 mctx->state_log_top = next_state_log_idx;
8093 merge_state_array (
const re_dfa_t *dfa, re_dfastate_t **dst,
8094 re_dfastate_t **src,
int num)
8098 for (st_idx = 0; st_idx < num; ++st_idx)
8100 if (dst[st_idx] ==
NULL)
8101 dst[st_idx] = src[st_idx];
8102 else if (src[st_idx] !=
NULL)
8104 re_node_set merged_set;
8105 err = re_node_set_init_union (&merged_set, &dst[st_idx]->
nodes,
8106 &src[st_idx]->
nodes);
8109 dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
8110 re_node_set_free (&merged_set);
8120 update_cur_sifted_state (
const re_match_context_t *mctx,
8121 re_sift_context_t *sctx,
int str_idx,
8122 re_node_set *dest_nodes)
8124 const re_dfa_t *
const dfa = mctx->dfa;
8126 const re_node_set *candidates;
8127 candidates = ((mctx->state_log[str_idx] ==
NULL) ?
NULL 8128 : &mctx->state_log[str_idx]->nodes);
8130 if (dest_nodes->nelem == 0)
8131 sctx->sifted_states[str_idx] =
NULL;
8138 err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
8143 if (sctx->limits.nelem)
8145 err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
8146 mctx->bkref_ents, str_idx);
8152 sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
8157 if (candidates && mctx->state_log[str_idx]->has_backref)
8159 err = sift_states_bkref (mctx, sctx, str_idx, candidates);
8168 add_epsilon_src_nodes (
const re_dfa_t *dfa, re_node_set *dest_nodes,
8169 const re_node_set *candidates)
8174 re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
8178 if (!state->inveclosure.alloc)
8180 err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
8183 for (i = 0; i < dest_nodes->nelem; i++)
8184 re_node_set_merge (&state->inveclosure,
8185 dfa->inveclosures + dest_nodes->elems[i]);
8187 return re_node_set_add_intersect (dest_nodes, candidates,
8188 &state->inveclosure);
8193 sub_epsilon_src_nodes (
const re_dfa_t *dfa,
int node, re_node_set *dest_nodes,
8194 const re_node_set *candidates)
8198 re_node_set *inv_eclosure = dfa->inveclosures + node;
8199 re_node_set except_nodes;
8200 re_node_set_init_empty (&except_nodes);
8201 for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
8203 int cur_node = inv_eclosure->elems[ecl_idx];
8204 if (cur_node == node)
8206 if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
8208 int edst1 = dfa->edests[cur_node].elems[0];
8209 int edst2 = ((dfa->edests[cur_node].nelem > 1)
8210 ? dfa->edests[cur_node].elems[1] : -1);
8211 if ((!re_node_set_contains (inv_eclosure, edst1)
8212 && re_node_set_contains (dest_nodes, edst1))
8214 && !re_node_set_contains (inv_eclosure, edst2)
8215 && re_node_set_contains (dest_nodes, edst2)))
8217 err = re_node_set_add_intersect (&except_nodes, candidates,
8218 dfa->inveclosures + cur_node);
8221 re_node_set_free (&except_nodes);
8227 for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
8229 int cur_node = inv_eclosure->elems[ecl_idx];
8230 if (!re_node_set_contains (&except_nodes, cur_node))
8232 int idx = re_node_set_contains (dest_nodes, cur_node) - 1;
8233 re_node_set_remove_at (dest_nodes, idx);
8236 re_node_set_free (&except_nodes);
8242 check_dst_limits (
const re_match_context_t *mctx, re_node_set *limits,
8243 int dst_node,
int dst_idx,
int src_node,
int src_idx)
8245 const re_dfa_t *
const dfa = mctx->dfa;
8246 int lim_idx, src_pos, dst_pos;
8248 int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
8249 int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
8250 for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
8253 struct re_backref_cache_entry *ent;
8254 ent = mctx->bkref_ents + limits->elems[lim_idx];
8255 subexp_idx = dfa->nodes[ent->node].opr.idx;
8257 dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
8258 subexp_idx, dst_node, dst_idx,
8260 src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
8261 subexp_idx, src_node, src_idx,
8268 if (src_pos == dst_pos)
8278 check_dst_limits_calc_pos_1 (
const re_match_context_t *mctx,
int boundaries,
8279 int subexp_idx,
int from_node,
int bkref_idx)
8281 const re_dfa_t *
const dfa = mctx->dfa;
8282 const re_node_set *eclosures = dfa->eclosures + from_node;
8287 for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
8289 int node = eclosures->elems[node_idx];
8290 switch (dfa->nodes[node].type)
8293 if (bkref_idx != -1)
8295 struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
8300 if (ent->node != node)
8303 if (subexp_idx < BITSET_WORD_BITS
8304 && !(ent->eps_reachable_subexps_map
8305 & ((bitset_word_t) 1 << subexp_idx)))
8314 dst = dfa->edests[node].elems[0];
8315 if (dst == from_node)
8324 check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
8328 if (cpos == 0 && (boundaries & 2))
8331 if (subexp_idx < BITSET_WORD_BITS)
8332 ent->eps_reachable_subexps_map
8333 &= ~((bitset_word_t) 1 << subexp_idx);
8335 while (ent++->more);
8339 case OP_OPEN_SUBEXP:
8340 if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
8344 case OP_CLOSE_SUBEXP:
8345 if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
8354 return (boundaries & 2) ? 1 : 0;
8359 check_dst_limits_calc_pos (
const re_match_context_t *mctx,
int limit,
8360 int subexp_idx,
int from_node,
int str_idx,
8363 struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
8367 if (str_idx < lim->subexp_from)
8370 if (lim->subexp_to < str_idx)
8374 boundaries = (str_idx == lim->subexp_from);
8375 boundaries |= (str_idx == lim->subexp_to) << 1;
8376 if (boundaries == 0)
8380 return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
8381 from_node, bkref_idx);
8389 check_subexp_limits (
const re_dfa_t *dfa, re_node_set *dest_nodes,
8390 const re_node_set *candidates, re_node_set *limits,
8391 struct re_backref_cache_entry *bkref_ents,
int str_idx)
8394 int node_idx, lim_idx;
8396 for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
8399 struct re_backref_cache_entry *ent;
8400 ent = bkref_ents + limits->elems[lim_idx];
8402 if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
8405 subexp_idx = dfa->nodes[ent->node].opr.idx;
8406 if (ent->subexp_to == str_idx)
8410 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
8412 int node = dest_nodes->elems[node_idx];
8413 re_token_type_t
type = dfa->nodes[node].type;
8414 if (type == OP_OPEN_SUBEXP
8415 && subexp_idx == dfa->nodes[node].opr.idx)
8417 else if (type == OP_CLOSE_SUBEXP
8418 && subexp_idx == dfa->nodes[node].opr.idx)
8426 err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
8434 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
8436 int node = dest_nodes->elems[node_idx];
8437 if (!re_node_set_contains (dfa->inveclosures + node,
8439 && !re_node_set_contains (dfa->eclosures + node,
8444 err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
8454 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
8456 int node = dest_nodes->elems[node_idx];
8457 re_token_type_t
type = dfa->nodes[node].type;
8458 if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
8460 if (subexp_idx != dfa->nodes[node].opr.idx)
8464 err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
8477 sift_states_bkref (
const re_match_context_t *mctx, re_sift_context_t *sctx,
8478 int str_idx,
const re_node_set *candidates)
8480 const re_dfa_t *
const dfa = mctx->dfa;
8483 re_sift_context_t local_sctx;
8484 int first_idx = search_cur_bkref_entry (mctx, str_idx);
8486 if (first_idx == -1)
8489 local_sctx.sifted_states =
NULL;
8491 for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
8494 re_token_type_t
type;
8495 struct re_backref_cache_entry *entry;
8496 node = candidates->elems[node_idx];
8497 type = dfa->nodes[node].type;
8499 if (node == sctx->last_node && str_idx == sctx->last_str_idx)
8501 if (type != OP_BACK_REF)
8504 entry = mctx->bkref_ents + first_idx;
8505 enabled_idx = first_idx;
8512 re_dfastate_t *cur_state;
8514 if (entry->node != node)
8516 subexp_len = entry->subexp_to - entry->subexp_from;
8517 to_idx = str_idx + subexp_len;
8518 dst_node = (subexp_len ? dfa->nexts[node]
8519 : dfa->edests[node].elems[0]);
8521 if (to_idx > sctx->last_str_idx
8522 || sctx->sifted_states[to_idx] ==
NULL 8523 || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
8524 || check_dst_limits (mctx, &sctx->limits, node,
8525 str_idx, dst_node, to_idx))
8528 if (local_sctx.sifted_states ==
NULL)
8531 err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
8535 local_sctx.last_node = node;
8536 local_sctx.last_str_idx = str_idx;
8537 ret = re_node_set_insert (&local_sctx.limits, enabled_idx);
8538 if (BE (ret < 0, 0))
8543 cur_state = local_sctx.sifted_states[str_idx];
8544 err = sift_states_backward (mctx, &local_sctx);
8547 if (sctx->limited_states !=
NULL)
8549 err = merge_state_array (dfa, sctx->limited_states,
8550 local_sctx.sifted_states,
8555 local_sctx.sifted_states[str_idx] = cur_state;
8556 re_node_set_remove (&local_sctx.limits, enabled_idx);
8559 entry = mctx->bkref_ents + enabled_idx;
8561 while (enabled_idx++, entry++->more);
8565 if (local_sctx.sifted_states !=
NULL)
8567 re_node_set_free (&local_sctx.limits);
8574 #ifdef RE_ENABLE_I18N 8577 sift_states_iter_mb (
const re_match_context_t *mctx, re_sift_context_t *sctx,
8578 int node_idx,
int str_idx,
int max_str_idx)
8580 const re_dfa_t *
const dfa = mctx->dfa;
8583 naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
8584 if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
8585 !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
8586 dfa->nexts[node_idx]))
8605 static re_dfastate_t *
8607 transit_state (
reg_errcode_t *err, re_match_context_t *mctx,
8608 re_dfastate_t *state)
8610 re_dfastate_t **trtable;
8613 #ifdef RE_ENABLE_I18N 8615 if (BE (state->accept_mb, 0))
8617 *err = transit_state_mb (mctx, state);
8627 return transit_state_sb (err, mctx, state);
8631 ch = re_string_fetch_byte (&mctx->input);
8634 trtable = state->trtable;
8635 if (BE (trtable !=
NULL, 1))
8638 trtable = state->word_trtable;
8639 if (BE (trtable !=
NULL, 1))
8641 unsigned int context;
8643 = re_string_context_at (&mctx->input,
8644 re_string_cur_idx (&mctx->input) - 1,
8646 if (IS_WORD_CONTEXT (context))
8647 return trtable[ch + SBC_MAX];
8652 if (!build_trtable (mctx->dfa, state))
8665 merge_state_with_log (
reg_errcode_t *err, re_match_context_t *mctx,
8666 re_dfastate_t *next_state)
8668 const re_dfa_t *
const dfa = mctx->dfa;
8669 int cur_idx = re_string_cur_idx (&mctx->input);
8671 if (cur_idx > mctx->state_log_top)
8673 mctx->state_log[cur_idx] = next_state;
8674 mctx->state_log_top = cur_idx;
8676 else if (mctx->state_log[cur_idx] == 0)
8678 mctx->state_log[cur_idx] = next_state;
8682 re_dfastate_t *pstate;
8683 unsigned int context;
8684 re_node_set next_nodes, *log_nodes, *table_nodes =
NULL;
8689 pstate = mctx->state_log[cur_idx];
8690 log_nodes = pstate->entrance_nodes;
8691 if (next_state !=
NULL)
8693 table_nodes = next_state->entrance_nodes;
8694 *err = re_node_set_init_union (&next_nodes, table_nodes,
8700 next_nodes = *log_nodes;
8704 context = re_string_context_at (&mctx->input,
8705 re_string_cur_idx (&mctx->input) - 1,
8707 next_state = mctx->state_log[cur_idx]
8708 = re_acquire_state_context (err, dfa, &next_nodes, context);
8712 if (table_nodes !=
NULL)
8713 re_node_set_free (&next_nodes);
8716 if (BE (dfa->nbackref, 0) && next_state !=
NULL)
8721 *err = check_subexp_matching_top (mctx, &next_state->nodes,
8727 if (next_state->has_backref)
8729 *err = transit_state_bkref (mctx, &next_state->nodes);
8732 next_state = mctx->state_log[cur_idx];
8744 find_recover_state (
reg_errcode_t *err, re_match_context_t *mctx)
8746 re_dfastate_t *cur_state;
8749 int max = mctx->state_log_top;
8750 int cur_str_idx = re_string_cur_idx (&mctx->input);
8754 if (++cur_str_idx > max)
8756 re_string_skip_bytes (&mctx->input, 1);
8758 while (mctx->state_log[cur_str_idx] ==
NULL);
8760 cur_state = merge_state_with_log (err, mctx,
NULL);
8775 check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
8778 const re_dfa_t *
const dfa = mctx->dfa;
8787 for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
8789 int node = cur_nodes->elems[node_idx];
8790 if (dfa->nodes[node].type == OP_OPEN_SUBEXP
8791 && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
8792 && (dfa->used_bkref_map
8793 & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
8795 err = match_ctx_add_subtop (mctx, node, str_idx);
8807 static re_dfastate_t *
8808 transit_state_sb (
reg_errcode_t *err, re_match_context_t *mctx,
8809 re_dfastate_t *state)
8811 const re_dfa_t *
const dfa = mctx->dfa;
8812 re_node_set next_nodes;
8813 re_dfastate_t *next_state;
8814 int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
8815 unsigned int context;
8817 *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
8820 for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
8822 int cur_node = state->nodes.elems[node_cnt];
8823 if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
8825 *err = re_node_set_merge (&next_nodes,
8826 dfa->eclosures + dfa->nexts[cur_node]);
8829 re_node_set_free (&next_nodes);
8834 context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
8835 next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
8839 re_node_set_free (&next_nodes);
8840 re_string_skip_bytes (&mctx->input, 1);
8845 #ifdef RE_ENABLE_I18N 8848 transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
8850 const re_dfa_t *
const dfa = mctx->dfa;
8854 for (i = 0; i < pstate->nodes.nelem; ++
i)
8856 re_node_set dest_nodes, *new_nodes;
8857 int cur_node_idx = pstate->nodes.elems[
i];
8858 int naccepted, dest_idx;
8859 unsigned int context;
8860 re_dfastate_t *dest_state;
8862 if (!dfa->nodes[cur_node_idx].accept_mb)
8865 if (dfa->nodes[cur_node_idx].constraint)
8867 context = re_string_context_at (&mctx->input,
8868 re_string_cur_idx (&mctx->input),
8870 if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
8876 naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
8877 re_string_cur_idx (&mctx->input));
8882 dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
8883 mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
8884 : mctx->max_mb_elem_len);
8885 err = clean_state_log_if_needed (mctx, dest_idx);
8889 assert (dfa->nexts[cur_node_idx] != -1);
8891 new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
8893 dest_state = mctx->state_log[dest_idx];
8894 if (dest_state ==
NULL)
8895 dest_nodes = *new_nodes;
8898 err = re_node_set_init_union (&dest_nodes,
8899 dest_state->entrance_nodes, new_nodes);
8903 context = re_string_context_at (&mctx->input, dest_idx - 1,
8905 mctx->state_log[dest_idx]
8906 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
8907 if (dest_state !=
NULL)
8908 re_node_set_free (&dest_nodes);
8918 transit_state_bkref (re_match_context_t *mctx,
const re_node_set *
nodes)
8920 const re_dfa_t *
const dfa = mctx->dfa;
8923 int cur_str_idx = re_string_cur_idx (&mctx->input);
8925 for (i = 0; i < nodes->nelem; ++
i)
8927 int dest_str_idx, prev_nelem, bkc_idx;
8928 int node_idx = nodes->elems[
i];
8929 unsigned int context;
8930 const re_token_t *node = dfa->nodes + node_idx;
8931 re_node_set *new_dest_nodes;
8934 if (node->type != OP_BACK_REF)
8937 if (node->constraint)
8939 context = re_string_context_at (&mctx->input, cur_str_idx,
8941 if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
8947 bkc_idx = mctx->nbkref_ents;
8948 err = get_subexp (mctx, node_idx, cur_str_idx);
8955 assert (dfa->nexts[node_idx] != -1);
8957 for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
8960 re_dfastate_t *dest_state;
8961 struct re_backref_cache_entry *bkref_ent;
8962 bkref_ent = mctx->bkref_ents + bkc_idx;
8963 if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
8965 subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
8966 new_dest_nodes = (subexp_len == 0
8967 ? dfa->eclosures + dfa->edests[node_idx].elems[0]
8968 : dfa->eclosures + dfa->nexts[node_idx]);
8969 dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
8970 - bkref_ent->subexp_from);
8971 context = re_string_context_at (&mctx->input, dest_str_idx - 1,
8973 dest_state = mctx->state_log[dest_str_idx];
8974 prev_nelem = ((mctx->state_log[cur_str_idx] ==
NULL) ? 0
8975 : mctx->state_log[cur_str_idx]->nodes.nelem);
8977 if (dest_state ==
NULL)
8979 mctx->state_log[dest_str_idx]
8980 = re_acquire_state_context (&err, dfa, new_dest_nodes,
8982 if (BE (mctx->state_log[dest_str_idx] ==
NULL 8988 re_node_set dest_nodes;
8989 err = re_node_set_init_union (&dest_nodes,
8990 dest_state->entrance_nodes,
8994 re_node_set_free (&dest_nodes);
8997 mctx->state_log[dest_str_idx]
8998 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
8999 re_node_set_free (&dest_nodes);
9000 if (BE (mctx->state_log[dest_str_idx] ==
NULL 9007 && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
9009 err = check_subexp_matching_top (mctx, new_dest_nodes,
9013 err = transit_state_bkref (mctx, new_dest_nodes);
9032 get_subexp (re_match_context_t *mctx,
int bkref_node,
int bkref_str_idx)
9034 const re_dfa_t *
const dfa = mctx->dfa;
9035 int subexp_num, sub_top_idx;
9036 const char *buf = (
const char *) re_string_get_buffer (&mctx->input);
9038 int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
9039 if (cache_idx != -1)
9041 const struct re_backref_cache_entry *entry
9042 = mctx->bkref_ents + cache_idx;
9044 if (entry->node == bkref_node)
9046 while (entry++->more);
9049 subexp_num = dfa->nodes[bkref_node].opr.idx;
9052 for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
9055 re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
9056 re_sub_match_last_t *sub_last;
9057 int sub_last_idx, sl_str, bkref_str_off;
9059 if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
9062 sl_str = sub_top->str_idx;
9063 bkref_str_off = bkref_str_idx;
9066 for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
9069 sub_last = sub_top->lasts[sub_last_idx];
9070 sl_str_diff = sub_last->str_idx - sl_str;
9073 if (sl_str_diff > 0)
9075 if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
9078 if (bkref_str_off + sl_str_diff > mctx->input.len)
9081 err = clean_state_log_if_needed (mctx,
9086 buf = (
const char *) re_string_get_buffer (&mctx->input);
9088 if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
9092 bkref_str_off += sl_str_diff;
9093 sl_str += sl_str_diff;
9094 err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
9099 buf = (
const char *) re_string_get_buffer (&mctx->input);
9107 if (sub_last_idx < sub_top->nlasts)
9109 if (sub_last_idx > 0)
9112 for (; sl_str <= bkref_str_idx; ++sl_str)
9114 int cls_node, sl_str_off;
9115 const re_node_set *
nodes;
9116 sl_str_off = sl_str - sub_top->str_idx;
9121 if (BE (bkref_str_off >= mctx->input.valid_len, 0))
9124 if (bkref_str_off >= mctx->input.len)
9127 err = extend_buffers (mctx);
9131 buf = (
const char *) re_string_get_buffer (&mctx->input);
9133 if (buf [bkref_str_off++] != buf[sl_str - 1])
9137 if (mctx->state_log[sl_str] ==
NULL)
9140 nodes = &mctx->state_log[sl_str]->nodes;
9141 cls_node = find_subexp_node (dfa, nodes, subexp_num,
9145 if (sub_top->path ==
NULL)
9147 sub_top->path = calloc (
sizeof (state_array_t),
9148 sl_str - sub_top->str_idx + 1);
9149 if (sub_top->path ==
NULL)
9154 err = check_arrival (mctx, sub_top->path, sub_top->node,
9155 sub_top->str_idx, cls_node, sl_str,
9161 sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
9162 if (BE (sub_last ==
NULL, 0))
9164 err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
9181 get_subexp_sub (re_match_context_t *mctx,
const re_sub_match_top_t *sub_top,
9182 re_sub_match_last_t *sub_last,
int bkref_node,
int bkref_str)
9187 err = check_arrival (mctx, &sub_last->path, sub_last->node,
9188 sub_last->str_idx, bkref_node, bkref_str,
9192 err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
9196 to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
9197 return clean_state_log_if_needed (mctx, to_idx);
9210 find_subexp_node (
const re_dfa_t *dfa,
const re_node_set *
nodes,
9211 int subexp_idx,
int type)
9214 for (cls_idx = 0; cls_idx <
nodes->nelem; ++cls_idx)
9216 int cls_node =
nodes->elems[cls_idx];
9217 const re_token_t *node = dfa->nodes + cls_node;
9218 if (node->type == type
9219 && node->opr.idx == subexp_idx)
9232 check_arrival (re_match_context_t *mctx, state_array_t *
path,
int top_node,
9233 int top_str,
int last_node,
int last_str,
int type)
9235 const re_dfa_t *
const dfa = mctx->dfa;
9237 int subexp_num, backup_cur_idx, str_idx, null_cnt;
9238 re_dfastate_t *cur_state =
NULL;
9239 re_node_set *cur_nodes, next_nodes;
9240 re_dfastate_t **backup_state_log;
9241 unsigned int context;
9243 subexp_num = dfa->nodes[top_node].opr.idx;
9245 if (BE (
path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
9247 re_dfastate_t **new_array;
9248 int old_alloc =
path->alloc;
9249 path->alloc += last_str + mctx->max_mb_elem_len + 1;
9250 new_array = re_realloc (
path->array, re_dfastate_t *,
path->alloc);
9251 if (BE (new_array ==
NULL, 0))
9253 path->alloc = old_alloc;
9256 path->array = new_array;
9257 memset (new_array + old_alloc,
'\0',
9258 sizeof (re_dfastate_t *) * (
path->alloc - old_alloc));
9261 str_idx =
path->next_idx ?
path->next_idx : top_str;
9264 backup_state_log = mctx->state_log;
9265 backup_cur_idx = mctx->input.cur_idx;
9266 mctx->state_log =
path->array;
9267 mctx->input.cur_idx = str_idx;
9270 context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
9271 if (str_idx == top_str)
9273 err = re_node_set_init_1 (&next_nodes, top_node);
9276 err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
9279 re_node_set_free (&next_nodes);
9285 cur_state = mctx->state_log[str_idx];
9286 if (cur_state && cur_state->has_backref)
9288 err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
9293 re_node_set_init_empty (&next_nodes);
9295 if (str_idx == top_str || (cur_state && cur_state->has_backref))
9297 if (next_nodes.nelem)
9299 err = expand_bkref_cache (mctx, &next_nodes, str_idx,
9303 re_node_set_free (&next_nodes);
9307 cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
9310 re_node_set_free (&next_nodes);
9313 mctx->state_log[str_idx] = cur_state;
9316 for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
9318 re_node_set_empty (&next_nodes);
9319 if (mctx->state_log[str_idx + 1])
9321 err = re_node_set_merge (&next_nodes,
9322 &mctx->state_log[str_idx + 1]->nodes);
9325 re_node_set_free (&next_nodes);
9331 err = check_arrival_add_next_nodes (mctx, str_idx,
9332 &cur_state->non_eps_nodes,
9336 re_node_set_free (&next_nodes);
9341 if (next_nodes.nelem)
9343 err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
9346 re_node_set_free (&next_nodes);
9349 err = expand_bkref_cache (mctx, &next_nodes, str_idx,
9353 re_node_set_free (&next_nodes);
9357 context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
9358 cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
9361 re_node_set_free (&next_nodes);
9364 mctx->state_log[str_idx] = cur_state;
9365 null_cnt = cur_state ==
NULL ? null_cnt + 1 : 0;
9367 re_node_set_free (&next_nodes);
9368 cur_nodes = (mctx->state_log[last_str] ==
NULL ?
NULL 9369 : &mctx->state_log[last_str]->nodes);
9370 path->next_idx = str_idx;
9373 mctx->state_log = backup_state_log;
9374 mctx->input.cur_idx = backup_cur_idx;
9377 if (cur_nodes !=
NULL && re_node_set_contains (cur_nodes, last_node))
9393 check_arrival_add_next_nodes (re_match_context_t *mctx,
int str_idx,
9394 re_node_set *cur_nodes, re_node_set *next_nodes)
9396 const re_dfa_t *
const dfa = mctx->dfa;
9400 re_node_set union_set;
9401 re_node_set_init_empty (&union_set);
9402 for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
9405 int cur_node = cur_nodes->elems[cur_idx];
9407 re_token_type_t
type = dfa->nodes[cur_node].type;
9408 assert (!IS_EPSILON_NODE (type));
9410 #ifdef RE_ENABLE_I18N 9412 if (dfa->nodes[cur_node].accept_mb)
9414 naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
9418 re_dfastate_t *dest_state;
9419 int next_node = dfa->nexts[cur_node];
9420 int next_idx = str_idx + naccepted;
9421 dest_state = mctx->state_log[next_idx];
9422 re_node_set_empty (&union_set);
9425 err = re_node_set_merge (&union_set, &dest_state->nodes);
9428 re_node_set_free (&union_set);
9432 result = re_node_set_insert (&union_set, next_node);
9433 if (BE (result < 0, 0))
9435 re_node_set_free (&union_set);
9438 mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
9440 if (BE (mctx->state_log[next_idx] ==
NULL 9443 re_node_set_free (&union_set);
9450 || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
9452 result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
9453 if (BE (result < 0, 0))
9455 re_node_set_free (&union_set);
9460 re_node_set_free (&union_set);
9472 check_arrival_expand_ecl (
const re_dfa_t *dfa, re_node_set *cur_nodes,
9473 int ex_subexp,
int type)
9476 int idx, outside_node;
9477 re_node_set new_nodes;
9479 assert (cur_nodes->nelem);
9481 err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
9487 for (idx = 0; idx < cur_nodes->nelem; ++idx)
9489 int cur_node = cur_nodes->elems[idx];
9490 const re_node_set *eclosure = dfa->eclosures + cur_node;
9491 outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
9492 if (outside_node == -1)
9495 err = re_node_set_merge (&new_nodes, eclosure);
9498 re_node_set_free (&new_nodes);
9505 err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
9509 re_node_set_free (&new_nodes);
9514 re_node_set_free (cur_nodes);
9515 *cur_nodes = new_nodes;
9525 check_arrival_expand_ecl_sub (
const re_dfa_t *dfa, re_node_set *dst_nodes,
9526 int target,
int ex_subexp,
int type)
9529 for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
9533 if (dfa->nodes[cur_node].type == type
9534 && dfa->nodes[cur_node].opr.idx == ex_subexp)
9536 if (type == OP_CLOSE_SUBEXP)
9538 err = re_node_set_insert (dst_nodes, cur_node);
9539 if (BE (err == -1, 0))
9544 err = re_node_set_insert (dst_nodes, cur_node);
9545 if (BE (err == -1, 0))
9547 if (dfa->edests[cur_node].nelem == 0)
9549 if (dfa->edests[cur_node].nelem == 2)
9551 err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
9552 dfa->edests[cur_node].elems[1],
9557 cur_node = dfa->edests[cur_node].elems[0];
9569 expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
9570 int cur_str,
int subexp_num,
int type)
9572 const re_dfa_t *
const dfa = mctx->dfa;
9574 int cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
9575 struct re_backref_cache_entry *ent;
9577 if (cache_idx_start == -1)
9581 ent = mctx->bkref_ents + cache_idx_start;
9584 int to_idx, next_node;
9587 if (!re_node_set_contains (cur_nodes, ent->node))
9590 to_idx = cur_str + ent->subexp_to - ent->subexp_from;
9593 if (to_idx == cur_str)
9597 re_node_set new_dests;
9599 next_node = dfa->edests[ent->node].elems[0];
9600 if (re_node_set_contains (cur_nodes, next_node))
9602 err = re_node_set_init_1 (&new_dests, next_node);
9603 err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
9604 err3 = re_node_set_merge (cur_nodes, &new_dests);
9605 re_node_set_free (&new_dests);
9618 re_node_set union_set;
9619 next_node = dfa->nexts[ent->node];
9620 if (mctx->state_log[to_idx])
9623 if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
9626 err = re_node_set_init_copy (&union_set,
9627 &mctx->state_log[to_idx]->nodes);
9628 ret = re_node_set_insert (&union_set, next_node);
9631 re_node_set_free (&union_set);
9638 err = re_node_set_init_1 (&union_set, next_node);
9642 mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
9643 re_node_set_free (&union_set);
9644 if (BE (mctx->state_log[to_idx] ==
NULL 9649 while (ent++->more);
9658 build_trtable (
const re_dfa_t *dfa, re_dfastate_t *state)
9661 int i,
j, ch, need_word_trtable = 0;
9662 bitset_word_t elem, mask;
9663 bool dests_node_malloced =
false;
9664 bool dest_states_malloced =
false;
9666 re_dfastate_t **trtable;
9667 re_dfastate_t **dest_states =
NULL, **dest_states_word, **dest_states_nl;
9668 re_node_set follows, *dests_node;
9670 bitset_t acceptable;
9674 re_node_set dests_node[SBC_MAX];
9675 bitset_t dests_ch[SBC_MAX];
9682 if (__libc_use_alloca (
sizeof (
struct dests_alloc)))
9683 dests_alloc = (
struct dests_alloc *) alloca (
sizeof (
struct dests_alloc));
9686 dests_alloc = re_malloc (
struct dests_alloc, 1);
9687 if (BE (dests_alloc ==
NULL, 0))
9689 dests_node_malloced =
true;
9691 dests_node = dests_alloc->dests_node;
9692 dests_ch = dests_alloc->dests_ch;
9695 state->word_trtable = state->trtable =
NULL;
9699 ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
9700 if (BE (ndests <= 0, 0))
9702 if (dests_node_malloced)
9707 state->trtable = (re_dfastate_t **)
9708 calloc (
sizeof (re_dfastate_t *), SBC_MAX);
9714 err = re_node_set_alloc (&follows, ndests + 1);
9718 if (__libc_use_alloca ((
sizeof (re_node_set) +
sizeof (bitset_t)) * SBC_MAX
9719 + ndests * 3 *
sizeof (re_dfastate_t *)))
9720 dest_states = (re_dfastate_t **)
9721 alloca (ndests * 3 *
sizeof (re_dfastate_t *));
9724 dest_states = (re_dfastate_t **)
9725 malloc (ndests * 3 *
sizeof (re_dfastate_t *));
9726 if (BE (dest_states ==
NULL, 0))
9729 if (dest_states_malloced)
9731 re_node_set_free (&follows);
9732 for (i = 0; i < ndests; ++
i)
9733 re_node_set_free (dests_node + i);
9734 if (dests_node_malloced)
9738 dest_states_malloced =
true;
9740 dest_states_word = dest_states + ndests;
9741 dest_states_nl = dest_states_word + ndests;
9742 bitset_empty (acceptable);
9745 for (i = 0; i < ndests; ++
i)
9748 re_node_set_empty (&follows);
9750 for (j = 0; j < dests_node[
i].nelem; ++
j)
9752 next_node = dfa->nexts[dests_node[
i].elems[
j]];
9753 if (next_node != -1)
9755 err = re_node_set_merge (&follows, dfa->eclosures + next_node);
9760 dest_states[
i] = re_acquire_state_context (&err, dfa, &follows, 0);
9765 if (dest_states[i]->has_constraint)
9767 dest_states_word[
i] = re_acquire_state_context (&err, dfa, &follows,
9772 if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
9773 need_word_trtable = 1;
9775 dest_states_nl[
i] = re_acquire_state_context (&err, dfa, &follows,
9782 dest_states_word[
i] = dest_states[
i];
9783 dest_states_nl[
i] = dest_states[
i];
9785 bitset_merge (acceptable, dests_ch[i]);
9788 if (!BE (need_word_trtable, 0))
9794 trtable = state->trtable =
9795 (re_dfastate_t **) calloc (
sizeof (re_dfastate_t *), SBC_MAX);
9796 if (BE (trtable ==
NULL, 0))
9800 for (i = 0; i < BITSET_WORDS; ++
i)
9801 for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
9803 mask <<= 1, elem >>= 1, ++ch)
9804 if (BE (elem & 1, 0))
9808 for (j = 0; (dests_ch[
j][
i] & mask) == 0; ++
j)
9812 if (dfa->word_char[i] & mask)
9813 trtable[ch] = dest_states_word[
j];
9815 trtable[ch] = dest_states[
j];
9825 trtable = state->word_trtable =
9826 (re_dfastate_t **) calloc (
sizeof (re_dfastate_t *), 2 * SBC_MAX);
9827 if (BE (trtable ==
NULL, 0))
9831 for (i = 0; i < BITSET_WORDS; ++
i)
9832 for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
9834 mask <<= 1, elem >>= 1, ++ch)
9835 if (BE (elem & 1, 0))
9839 for (j = 0; (dests_ch[
j][
i] & mask) == 0; ++
j)
9843 trtable[ch] = dest_states[
j];
9844 trtable[ch + SBC_MAX] = dest_states_word[
j];
9849 if (bitset_contain (acceptable, NEWLINE_CHAR))
9852 for (j = 0; j < ndests; ++
j)
9853 if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
9856 trtable[NEWLINE_CHAR] = dest_states_nl[
j];
9857 if (need_word_trtable)
9858 trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[
j];
9865 if (dest_states_malloced)
9868 re_node_set_free (&follows);
9869 for (i = 0; i < ndests; ++
i)
9870 re_node_set_free (dests_node + i);
9872 if (dests_node_malloced)
9885 group_nodes_into_DFAstates (
const re_dfa_t *dfa,
const re_dfastate_t *state,
9886 re_node_set *dests_node, bitset_t *dests_ch)
9893 const re_node_set *cur_nodes = &state->nodes;
9894 bitset_empty (accepts);
9898 for (i = 0; i < cur_nodes->nelem; ++
i)
9900 re_token_t *node = &dfa->nodes[cur_nodes->elems[
i]];
9901 re_token_type_t
type = node->type;
9902 unsigned int constraint = node->constraint;
9905 if (type == CHARACTER)
9906 bitset_set (accepts, node->opr.c);
9907 else if (type == SIMPLE_BRACKET)
9909 bitset_merge (accepts, node->opr.sbcset);
9911 else if (type == OP_PERIOD)
9913 #ifdef RE_ENABLE_I18N 9914 if (dfa->mb_cur_max > 1)
9915 bitset_merge (accepts, dfa->sb_char);
9918 bitset_set_all (accepts);
9920 bitset_clear (accepts,
'\n');
9922 bitset_clear (accepts,
'\0');
9924 #ifdef RE_ENABLE_I18N 9925 else if (type == OP_UTF8_PERIOD)
9927 memset (accepts,
'\xff',
sizeof (bitset_t) / 2);
9929 bitset_clear (accepts,
'\n');
9931 bitset_clear (accepts,
'\0');
9941 if (constraint & NEXT_NEWLINE_CONSTRAINT)
9943 bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
9944 bitset_empty (accepts);
9945 if (accepts_newline)
9946 bitset_set (accepts, NEWLINE_CHAR);
9950 if (constraint & NEXT_ENDBUF_CONSTRAINT)
9952 bitset_empty (accepts);
9956 if (constraint & NEXT_WORD_CONSTRAINT)
9958 bitset_word_t any_set = 0;
9959 if (type == CHARACTER && !node->word_char)
9961 bitset_empty (accepts);
9964 #ifdef RE_ENABLE_I18N 9965 if (dfa->mb_cur_max > 1)
9966 for (j = 0; j < BITSET_WORDS; ++
j)
9967 any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
9970 for (j = 0; j < BITSET_WORDS; ++
j)
9971 any_set |= (accepts[j] &= dfa->word_char[j]);
9975 if (constraint & NEXT_NOTWORD_CONSTRAINT)
9977 bitset_word_t any_set = 0;
9978 if (type == CHARACTER && node->word_char)
9980 bitset_empty (accepts);
9983 #ifdef RE_ENABLE_I18N 9984 if (dfa->mb_cur_max > 1)
9985 for (j = 0; j < BITSET_WORDS; ++
j)
9986 any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
9989 for (j = 0; j < BITSET_WORDS; ++
j)
9990 any_set |= (accepts[j] &= ~dfa->word_char[j]);
9998 for (j = 0; j < ndests; ++
j)
10003 bitset_word_t has_intersec, not_subset, not_consumed;
10006 if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
10011 for (k = 0; k < BITSET_WORDS; ++k)
10012 has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
10018 not_subset = not_consumed = 0;
10019 for (k = 0; k < BITSET_WORDS; ++k)
10021 not_subset |= remains[k] = ~accepts[k] & dests_ch[
j][k];
10022 not_consumed |= accepts[k] = accepts[k] & ~dests_ch[
j][k];
10029 bitset_copy (dests_ch[ndests], remains);
10030 bitset_copy (dests_ch[j], intersec);
10031 err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
10038 result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
10039 if (BE (result < 0, 0))
10049 bitset_copy (dests_ch[ndests], accepts);
10050 err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
10054 bitset_empty (accepts);
10059 for (j = 0; j < ndests; ++
j)
10060 re_node_set_free (dests_node + j);
10064 #ifdef RE_ENABLE_I18N 10075 check_node_accept_bytes (
const re_dfa_t *dfa,
int node_idx,
10076 const re_string_t *input,
int str_idx)
10078 const re_token_t *node = dfa->nodes + node_idx;
10079 int char_len, elem_len;
10082 if (BE (node->type == OP_UTF8_PERIOD, 0))
10084 unsigned char c = re_string_byte_at (input, str_idx),
d;
10085 if (BE (c < 0xc2, 1))
10088 if (str_idx + 2 > input->len)
10091 d = re_string_byte_at (input, str_idx + 1);
10093 return (d < 0x80 || d > 0xbf) ? 0 : 2;
10097 if (c == 0xe0 &&
d < 0xa0)
10103 if (c == 0xf0 &&
d < 0x90)
10109 if (c == 0xf8 &&
d < 0x88)
10115 if (c == 0xfc &&
d < 0x84)
10121 if (str_idx + char_len > input->len)
10124 for (i = 1; i < char_len; ++
i)
10126 d = re_string_byte_at (input, str_idx + i);
10127 if (d < 0x80 || d > 0xbf)
10133 char_len = re_string_char_size_at (input, str_idx);
10134 if (node->type == OP_PERIOD)
10142 re_string_byte_at (input, str_idx) ==
'\n') ||
10144 re_string_byte_at (input, str_idx) ==
'\0'))
10149 elem_len = re_string_elem_size_at (input, str_idx);
10150 if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
10153 if (node->type == COMPLEX_BRACKET)
10155 const re_charset_t *cset = node->opr.mbcset;
10157 const unsigned char *pin
10158 = ((
const unsigned char *) re_string_get_buffer (input) + str_idx);
10163 wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
10164 ? re_string_wchar_at (input, str_idx) : 0);
10167 for (i = 0; i < cset->nmbchars; ++
i)
10168 if (wc == cset->mbchars[i])
10170 match_len = char_len;
10171 goto check_node_accept_bytes_match;
10174 for (i = 0; i < cset->nchar_classes; ++
i)
10176 wctype_t wt = cset->char_classes[
i];
10177 if (__iswctype (wc, wt))
10179 match_len = char_len;
10180 goto check_node_accept_bytes_match;
10185 nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
10188 unsigned int in_collseq = 0;
10190 const unsigned char *weights, *extra;
10191 const char *collseqwc;
10194 # include <locale/weight.h> 10197 if (cset->ncoll_syms)
10198 extra = (
const unsigned char *)
10199 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
10200 for (i = 0; i < cset->ncoll_syms; ++
i)
10202 const unsigned char *coll_sym = extra + cset->coll_syms[
i];
10205 if (*coll_sym != elem_len)
10208 for (j = 0; j < *coll_sym; j++)
10209 if (pin[j] != coll_sym[1 + j])
10211 if (j == *coll_sym)
10215 goto check_node_accept_bytes_match;
10221 if (elem_len <= char_len)
10223 collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
10224 in_collseq = __collseq_table_lookup (collseqwc, wc);
10227 in_collseq = find_collation_sequence_value (pin, elem_len);
10230 for (i = 0; i < cset->nranges; ++
i)
10231 if (cset->range_starts[i] <= in_collseq
10232 && in_collseq <= cset->range_ends[i])
10234 match_len = elem_len;
10235 goto check_node_accept_bytes_match;
10239 if (cset->nequiv_classes)
10241 const unsigned char *cp = pin;
10243 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
10244 weights = (
const unsigned char *)
10245 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
10246 extra = (
const unsigned char *)
10247 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
10249 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
10250 idx = findidx (&cp);
10252 for (i = 0; i < cset->nequiv_classes; ++
i)
10254 int32_t equiv_class_idx = cset->equiv_classes[
i];
10255 size_t weight_len = weights[idx];
10256 if (weight_len == weights[equiv_class_idx])
10259 while (cnt <= weight_len
10260 && (weights[equiv_class_idx + 1 + cnt]
10261 == weights[idx + 1 + cnt]))
10263 if (cnt > weight_len)
10265 match_len = elem_len;
10266 goto check_node_accept_bytes_match;
10277 wchar_t cmp_buf[] = {
L'\0',
L'\0', wc, L
'\0', L
'\0', L
'\0'};
10279 wchar_t cmp_buf[] = {L
'\0', L
'\0', L
'\0', L
'\0', L
'\0', L
'\0'};
10282 for (i = 0; i < cset->nranges; ++
i)
10284 cmp_buf[0] = cset->range_starts[
i];
10285 cmp_buf[4] = cset->range_ends[
i];
10286 if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
10287 && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
10289 match_len = char_len;
10290 goto check_node_accept_bytes_match;
10294 check_node_accept_bytes_match:
10295 if (!cset->non_match)
10302 return (elem_len > char_len) ? elem_len : char_len;
10309 static unsigned int 10311 find_collation_sequence_value (
const unsigned char *mbs,
size_t mbs_len)
10313 uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
10319 const unsigned char *collseq = (
const unsigned char *)
10320 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
10321 return collseq[mbs[0]];
10328 const unsigned char *extra = (
const unsigned char *)
10329 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
10330 int32_t extrasize = (
const unsigned char *)
10331 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
10333 for (idx = 0; idx < extrasize;)
10335 int mbs_cnt, found = 0;
10338 idx = idx + extra[idx] + 1;
10339 elem_mbs_len = extra[idx++];
10340 if (mbs_len == elem_mbs_len)
10342 for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
10343 if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
10345 if (mbs_cnt == elem_mbs_len)
10350 idx += elem_mbs_len;
10352 idx = (idx + 3) & ~3;
10356 idx = idx +
sizeof (
uint32_t) * (extra[idx] + 1);
10359 return *(
uint32_t *) (extra + idx);
10374 check_node_accept (
const re_match_context_t *mctx,
const re_token_t *node,
10378 ch = re_string_byte_at (&mctx->input, idx);
10379 switch (node->type)
10382 if (node->opr.c != ch)
10386 case SIMPLE_BRACKET:
10387 if (!bitset_contain (node->opr.sbcset, ch))
10391 #ifdef RE_ENABLE_I18N 10392 case OP_UTF8_PERIOD:
10407 if (node->constraint)
10411 unsigned int context = re_string_context_at (&mctx->input, idx,
10413 if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
10424 extend_buffers (re_match_context_t *mctx)
10427 re_string_t *pstr = &mctx->input;
10430 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
10434 if (mctx->state_log !=
NULL)
10440 re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
10441 pstr->bufs_len + 1);
10442 if (BE (new_array ==
NULL, 0))
10444 mctx->state_log = new_array;
10450 #ifdef RE_ENABLE_I18N 10451 if (pstr->mb_cur_max > 1)
10453 ret = build_wcs_upper_buffer (pstr);
10459 build_upper_buffer (pstr);
10463 #ifdef RE_ENABLE_I18N 10464 if (pstr->mb_cur_max > 1)
10465 build_wcs_buffer (pstr);
10469 if (pstr->trans !=
NULL)
10470 re_string_translate_buffer (pstr);
10483 match_ctx_init (re_match_context_t *mctx,
int eflags,
int n)
10485 mctx->eflags = eflags;
10486 mctx->match_last = -1;
10489 mctx->bkref_ents = re_malloc (
struct re_backref_cache_entry, n);
10490 mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
10491 if (BE (mctx->bkref_ents ==
NULL || mctx->sub_tops ==
NULL, 0))
10499 mctx->abkref_ents =
n;
10500 mctx->max_mb_elem_len = 1;
10501 mctx->asub_tops =
n;
10511 match_ctx_clean (re_match_context_t *mctx)
10514 for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
10517 re_sub_match_top_t *top = mctx->sub_tops[st_idx];
10518 for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
10520 re_sub_match_last_t *
last = top->lasts[sl_idx];
10521 re_free (last->path.array);
10524 re_free (top->lasts);
10527 re_free (top->path->array);
10528 re_free (top->path);
10533 mctx->nsub_tops = 0;
10534 mctx->nbkref_ents = 0;
10541 match_ctx_free (re_match_context_t *mctx)
10544 match_ctx_clean (mctx);
10545 re_free (mctx->sub_tops);
10546 re_free (mctx->bkref_ents);
10556 match_ctx_add_entry (re_match_context_t *mctx,
int node,
int str_idx,
int from,
10559 if (mctx->nbkref_ents >= mctx->abkref_ents)
10561 struct re_backref_cache_entry* new_entry;
10562 new_entry = re_realloc (mctx->bkref_ents,
struct re_backref_cache_entry,
10563 mctx->abkref_ents * 2);
10564 if (BE (new_entry ==
NULL, 0))
10566 re_free (mctx->bkref_ents);
10569 mctx->bkref_ents = new_entry;
10570 memset (mctx->bkref_ents + mctx->nbkref_ents,
'\0',
10571 sizeof (
struct re_backref_cache_entry) * mctx->abkref_ents);
10572 mctx->abkref_ents *= 2;
10574 if (mctx->nbkref_ents > 0
10575 && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
10576 mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
10578 mctx->bkref_ents[mctx->nbkref_ents].node = node;
10579 mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
10580 mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
10581 mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
10591 mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
10592 = (from == to ? ~0 : 0);
10594 mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
10595 if (mctx->max_mb_elem_len < to - from)
10596 mctx->max_mb_elem_len = to - from;
10605 search_cur_bkref_entry (
const re_match_context_t *mctx,
int str_idx)
10608 last = right = mctx->nbkref_ents;
10609 for (left = 0; left <
right;)
10611 mid = (left +
right) / 2;
10612 if (mctx->bkref_ents[mid].str_idx < str_idx)
10617 if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
10628 match_ctx_add_subtop (re_match_context_t *mctx,
int node,
int str_idx)
10631 assert (mctx->sub_tops !=
NULL);
10632 assert (mctx->asub_tops > 0);
10634 if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
10636 int new_asub_tops = mctx->asub_tops * 2;
10637 re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
10638 re_sub_match_top_t *,
10640 if (BE (new_array ==
NULL, 0))
10642 mctx->sub_tops = new_array;
10643 mctx->asub_tops = new_asub_tops;
10645 mctx->sub_tops[mctx->nsub_tops] = calloc (1,
sizeof (re_sub_match_top_t));
10646 if (BE (mctx->sub_tops[mctx->nsub_tops] ==
NULL, 0))
10648 mctx->sub_tops[mctx->nsub_tops]->node = node;
10649 mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
10656 static re_sub_match_last_t *
10658 match_ctx_add_sublast (re_sub_match_top_t *subtop,
int node,
int str_idx)
10660 re_sub_match_last_t *new_entry;
10661 if (BE (subtop->nlasts == subtop->alasts, 0))
10663 int new_alasts = 2 * subtop->alasts + 1;
10664 re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
10665 re_sub_match_last_t *,
10667 if (BE (new_array ==
NULL, 0))
10669 subtop->lasts = new_array;
10670 subtop->alasts = new_alasts;
10672 new_entry = calloc (1,
sizeof (re_sub_match_last_t));
10673 if (BE (new_entry !=
NULL, 1))
10675 subtop->lasts[subtop->nlasts] = new_entry;
10676 new_entry->node = node;
10677 new_entry->str_idx = str_idx;
10685 sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
10686 re_dfastate_t **limited_sts,
int last_node,
int last_str_idx)
10688 sctx->sifted_states = sifted_sts;
10689 sctx->limited_states = limited_sts;
10690 sctx->last_node = last_node;
10691 sctx->last_str_idx = last_str_idx;
10692 re_node_set_init_empty (&sctx->limits);
10698 # include <shlib-compat.h> 10699 # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3) 10700 link_warning (re_max_failures,
"the 're_max_failures' variable is obsolete and will go away.")
10701 int re_max_failures = 2000;
void regfree(regex_t *__preg)
reg_syntax_t re_set_syntax(reg_syntax_t __syntax)
bool match(const T &xpr, std::string ref, std::string str_xpr="")
for(size_t i=1;i< poses.size();++i)
std::ofstream out("Result.txt")
#define RE_SYNTAX_POSIX_BASIC
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy y set format x g set format y g set format x2 g set format y2 g set format z g set angles radians set nogrid set key title set key left top Right noreverse box linetype linewidth samplen spacing width set nolabel set noarrow set nologscale set logscale x set set pointsize set encoding default set nopolar set noparametric set set set set surface set nocontour set clabel set mapping cartesian set nohidden3d set cntrparam order set cntrparam linear set cntrparam levels auto set cntrparam points set size set set xzeroaxis lt lw set x2zeroaxis lt lw set yzeroaxis lt lw set y2zeroaxis lt lw set tics in set ticslevel set tics set mxtics default set mytics default set mx2tics default set my2tics default set xtics border mirror norotate autofreq set ytics border mirror norotate autofreq set ztics border nomirror norotate autofreq set nox2tics set noy2tics set timestamp bottom norotate offset
#define RE_NO_EMPTY_RANGES
size_t regerror(int __errcode, const regex_t *__restrict __preg, char *__restrict __errbuf, size_t __errbuf_size)
#define RE_CONTEXT_INDEP_OPS
EIGEN_CONSTEXPR Index first(const T &x) EIGEN_NOEXCEPT
static const symbolic::SymbolExpr< internal::symbolic_last_tag > last
#define RE_CONTEXT_INVALID_OPS
static const Similarity3 id
#define RE_UNMATCHED_RIGHT_PAREN_ORD
#define RE_TRANSLATE_TYPE
int re_match(struct re_pattern_buffer *__buffer, const char *__string, int __length, int __start, struct re_registers *__regs)
EIGEN_DEVICE_FUNC const ExpReturnType exp() const
#define RE_HAT_LISTS_NOT_NEWLINE
void re_set_registers(struct re_pattern_buffer *__buffer, struct re_registers *__regs, unsigned int __num_regs, regoff_t *__starts, regoff_t *__ends)
int re_match_2(struct re_pattern_buffer *__buffer, const char *__string1, int __length1, const char *__string2, int __length2, int __start, struct re_registers *__regs, int __stop)
int re_search_2(struct re_pattern_buffer *__buffer, const char *__string1, int __length1, const char *__string2, int __length2, int __start, int __range, struct re_registers *__regs, int __stop)
Matrix stack(size_t nrMatrices,...)
int regexec(const regex_t *__restrict __preg, const char *__restrict __string, size_t __nmatch, regmatch_t __pmatch[__restrict_arr], int __eflags)
#define RE_BACKSLASH_ESCAPE_IN_LISTS
int re_search(struct re_pattern_buffer *__buffer, const char *__string, int __length, int __start, int __range, struct re_registers *__regs)
unsigned fastmap_accurate
const char * re_compile_pattern(const char *__pattern, size_t __length, struct re_pattern_buffer *__buffer)
#define RE_INVALID_INTERVAL_ORD
int regcomp(regex_t *__restrict __preg, const char *__restrict __pattern, int __cflags)
EIGEN_DEVICE_FUNC const Scalar & q
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy y set format x g set format y g set format x2 g set format y2 g set format z g set angles radians set nogrid set key title set key left top Right noreverse box linetype linewidth samplen spacing width set nolabel set noarrow set nologscale set logscale x set offsets
#define RE_CONTEXT_INVALID_DUP
unsigned long int allocated
unsigned long int reg_syntax_t
Double_ range(const Point2_ &p, const Point2_ &q)
static EIGEN_DEPRECATED const end_t end
#define RE_SYNTAX_POSIX_EXTENDED
Annotation for function names.
reg_syntax_t re_syntax_options
size_t len(handle h)
Get the length of a Python object.
def parse(input_path, output_path, quiet=False, generate_xml_flag=True)
#define RE_CARET_ANCHORS_HERE
#define RE_CONTEXT_INDEP_ANCHORS
int re_compile_fastmap(struct re_pattern_buffer *__buffer)
RE_TRANSLATE_TYPE translate