mirror of https://github.com/acidanthera/audk.git
MdeModulePkg RegularExpressionDxe: Update Oniguruma from v6.9.0 to v6.9.3
BZ: https://bugzilla.tianocore.org/show_bug.cgi?id=2066 Update Oniguruma to the latest version v6.9.3. Oniguruma https://github.com/kkos/oniguruma This release is the security fix release. It includes the changes: Fixed CVE-2019-13224 Fixed CVE-2019-13225 Fixed many problems (found by libfuzzer programs) Verify VS2015, GCC5 build. Verify RegularExpressionProtocol GetInfo() and Match() function. Cc: Jian J Wang <jian.j.wang@intel.com> Cc: Hao A Wu <hao.a.wu@intel.com> Cc: Cinnamon Shia <cinnamon.shia@hpe.com> Signed-off-by: Liming Gao <liming.gao@intel.com> Reviewed-by: Hao A Wu <hao.a.wu@intel.com>
This commit is contained in:
parent
ecc32c90ee
commit
b26691c471
|
@ -113,6 +113,6 @@ OnigEncodingType OnigEncodingASCII = {
|
|||
init,
|
||||
0, /* is_initialized */
|
||||
onigenc_always_true_is_valid_mbc_string,
|
||||
ENC_FLAG_ASCII_COMPATIBLE,
|
||||
ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,
|
||||
0, 0
|
||||
};
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
oniguruma.h - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -38,9 +38,9 @@ extern "C" {
|
|||
#define ONIGURUMA
|
||||
#define ONIGURUMA_VERSION_MAJOR 6
|
||||
#define ONIGURUMA_VERSION_MINOR 9
|
||||
#define ONIGURUMA_VERSION_TEENY 0
|
||||
#define ONIGURUMA_VERSION_TEENY 3
|
||||
|
||||
#define ONIGURUMA_VERSION_INT 60900
|
||||
#define ONIGURUMA_VERSION_INT 60903
|
||||
|
||||
#ifndef P_
|
||||
#if defined(__STDC__) || defined(_WIN32)
|
||||
|
@ -54,6 +54,7 @@ extern "C" {
|
|||
# define PV_(args) args
|
||||
#endif
|
||||
|
||||
#ifndef ONIG_STATIC
|
||||
#ifndef ONIG_EXTERN
|
||||
#if defined(_WIN32) && !defined(__GNUC__)
|
||||
#if defined(ONIGURUMA_EXPORT)
|
||||
|
@ -67,6 +68,9 @@ extern "C" {
|
|||
#ifndef ONIG_EXTERN
|
||||
#define ONIG_EXTERN extern
|
||||
#endif
|
||||
#else
|
||||
#define ONIG_EXTERN extern
|
||||
#endif
|
||||
|
||||
/* PART: character encoding */
|
||||
|
||||
|
@ -387,8 +391,10 @@ typedef unsigned int OnigOptionType;
|
|||
#define ONIG_OPTION_DIGIT_IS_ASCII (ONIG_OPTION_WORD_IS_ASCII << 1)
|
||||
#define ONIG_OPTION_SPACE_IS_ASCII (ONIG_OPTION_DIGIT_IS_ASCII << 1)
|
||||
#define ONIG_OPTION_POSIX_IS_ASCII (ONIG_OPTION_SPACE_IS_ASCII << 1)
|
||||
#define ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER (ONIG_OPTION_POSIX_IS_ASCII << 1)
|
||||
#define ONIG_OPTION_TEXT_SEGMENT_WORD (ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER << 1)
|
||||
|
||||
#define ONIG_OPTION_MAXBIT ONIG_OPTION_POSIX_IS_ASCII /* limit */
|
||||
#define ONIG_OPTION_MAXBIT ONIG_OPTION_TEXT_SEGMENT_WORD /* limit */
|
||||
|
||||
#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt))
|
||||
#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt))
|
||||
|
@ -492,10 +498,12 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
|
|||
#define ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE (1U<<23) /* \R \r\n else [\x0a-\x0d] */
|
||||
#define ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT (1U<<24) /* \N (?-m:.), \O (?m:.) */
|
||||
#define ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP (1U<<25) /* (?~...) */
|
||||
#define ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER (1U<<26) /* \X \y \Y */
|
||||
#define ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER (1U<<26) /* obsoleted: use next */
|
||||
#define ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT (1U<<26) /* \X \y \Y */
|
||||
#define ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL (1U<<27) /* (?R), (?&name)... */
|
||||
#define ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS (1U<<28) /* (?{...}) (?{{...}}) */
|
||||
#define ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME (1U<<29) /* (*name) (*name{a,..}) */
|
||||
#define ONIG_SYN_OP2_OPTION_ONIGURUMA (1U<<30) /* (?imxWDSPy) */
|
||||
|
||||
/* syntax (behavior) */
|
||||
#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1U<<31) /* not implemented */
|
||||
|
@ -515,6 +523,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
|
|||
#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1U<<21) /* [..\w..] etc.. */
|
||||
#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1U<<22)
|
||||
#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1U<<23) /* [0-9-a]=[0-9\-a] */
|
||||
#define ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC (1U<<26)
|
||||
/* syntax (behavior) warning */
|
||||
#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1U<<24) /* [,-,] */
|
||||
#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1U<<25) /* (?:a*)+ */
|
||||
|
@ -764,6 +773,8 @@ int onig_init P_((void));
|
|||
ONIG_EXTERN
|
||||
int EFIAPI onig_error_code_to_str PV_((OnigUChar* s, int err_code, ...));
|
||||
ONIG_EXTERN
|
||||
int onig_is_error_code_needs_param PV_((int code));
|
||||
ONIG_EXTERN
|
||||
void onig_set_warn_func P_((OnigWarnFunc f));
|
||||
ONIG_EXTERN
|
||||
void onig_set_verb_warn_func P_((OnigWarnFunc f));
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -2,7 +2,7 @@
|
|||
regenc.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -77,6 +77,17 @@ enc_is_inited(OnigEncoding enc)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int OnigEncInited;
|
||||
|
||||
extern int
|
||||
onigenc_init(void)
|
||||
{
|
||||
if (OnigEncInited != 0) return 0;
|
||||
|
||||
OnigEncInited = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_end(void)
|
||||
{
|
||||
|
@ -86,15 +97,10 @@ onigenc_end(void)
|
|||
InitedList[i].enc = 0;
|
||||
InitedList[i].inited = 0;
|
||||
}
|
||||
|
||||
InitedListNum = 0;
|
||||
return ONIG_NORMAL;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_init(void)
|
||||
{
|
||||
return 0;
|
||||
OnigEncInited = 0;
|
||||
return ONIG_NORMAL;
|
||||
}
|
||||
|
||||
extern int
|
||||
|
@ -167,7 +173,7 @@ onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const U
|
|||
|
||||
extern UChar*
|
||||
onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
|
||||
const UChar* start, const UChar* s, const UChar** prev)
|
||||
const UChar* start, const UChar* s, const UChar** prev)
|
||||
{
|
||||
UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
|
||||
|
||||
|
@ -231,7 +237,7 @@ onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
|
|||
{
|
||||
int n = 0;
|
||||
UChar* q = (UChar* )p;
|
||||
|
||||
|
||||
while (q < end) {
|
||||
q += ONIGENC_MBC_ENC_LEN(enc, q);
|
||||
n++;
|
||||
|
@ -244,7 +250,7 @@ onigenc_strlen_null(OnigEncoding enc, const UChar* s)
|
|||
{
|
||||
int n = 0;
|
||||
UChar* p = (UChar* )s;
|
||||
|
||||
|
||||
while (1) {
|
||||
if (*p == '\0') {
|
||||
UChar* q;
|
||||
|
@ -511,7 +517,7 @@ const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = {
|
|||
|
||||
extern int
|
||||
onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
|
||||
OnigApplyAllCaseFoldFunc f, void* arg)
|
||||
OnigApplyAllCaseFoldFunc f, void* arg)
|
||||
{
|
||||
OnigCodePoint code;
|
||||
int i, r;
|
||||
|
@ -533,8 +539,8 @@ onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
|
|||
|
||||
extern int
|
||||
onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
|
||||
const OnigUChar* p, const OnigUChar* end ARG_UNUSED,
|
||||
OnigCaseFoldCodeItem items[])
|
||||
const OnigUChar* p, const OnigUChar* end ARG_UNUSED,
|
||||
OnigCaseFoldCodeItem items[])
|
||||
{
|
||||
if (0x41 <= *p && *p <= 0x5a) {
|
||||
items[0].byte_len = 1;
|
||||
|
@ -554,7 +560,7 @@ onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
|
|||
|
||||
static int
|
||||
ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
|
||||
OnigApplyAllCaseFoldFunc f, void* arg)
|
||||
OnigApplyAllCaseFoldFunc f, void* arg)
|
||||
{
|
||||
static OnigCodePoint ss[] = { 0x73, 0x73 };
|
||||
|
||||
|
@ -600,7 +606,7 @@ onigenc_get_case_fold_codes_by_str_with_map(int map_size,
|
|||
items[0].code_len = 1;
|
||||
items[0].code[0] = (OnigCodePoint )(*p + 0x20);
|
||||
if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1
|
||||
&& (*(p+1) == 0x53 || *(p+1) == 0x73)) {
|
||||
&& (*(p+1) == 0x53 || *(p+1) == 0x73)) {
|
||||
/* SS */
|
||||
items[1].byte_len = 2;
|
||||
items[1].code_len = 1;
|
||||
|
@ -615,7 +621,7 @@ onigenc_get_case_fold_codes_by_str_with_map(int map_size,
|
|||
items[0].code_len = 1;
|
||||
items[0].code[0] = (OnigCodePoint )(*p - 0x20);
|
||||
if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1
|
||||
&& (*(p+1) == 0x73 || *(p+1) == 0x53)) {
|
||||
&& (*(p+1) == 0x73 || *(p+1) == 0x53)) {
|
||||
/* ss */
|
||||
items[1].byte_len = 2;
|
||||
items[1].code_len = 1;
|
||||
|
@ -653,16 +659,16 @@ onigenc_get_case_fold_codes_by_str_with_map(int map_size,
|
|||
|
||||
for (i = 0; i < map_size; i++) {
|
||||
if (*p == map[i].from) {
|
||||
items[0].byte_len = 1;
|
||||
items[0].code_len = 1;
|
||||
items[0].code[0] = map[i].to;
|
||||
return 1;
|
||||
items[0].byte_len = 1;
|
||||
items[0].code_len = 1;
|
||||
items[0].code[0] = map[i].to;
|
||||
return 1;
|
||||
}
|
||||
else if (*p == map[i].to) {
|
||||
items[0].byte_len = 1;
|
||||
items[0].code_len = 1;
|
||||
items[0].code[0] = map[i].from;
|
||||
return 1;
|
||||
items[0].byte_len = 1;
|
||||
items[0].code_len = 1;
|
||||
items[0].code[0] = map[i].from;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -673,8 +679,8 @@ onigenc_get_case_fold_codes_by_str_with_map(int map_size,
|
|||
|
||||
extern int
|
||||
onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED,
|
||||
OnigCodePoint* sb_out ARG_UNUSED,
|
||||
const OnigCodePoint* ranges[] ARG_UNUSED)
|
||||
OnigCodePoint* sb_out ARG_UNUSED,
|
||||
const OnigCodePoint* ranges[] ARG_UNUSED)
|
||||
{
|
||||
return ONIG_NO_SUPPORT_CONFIG;
|
||||
}
|
||||
|
@ -691,7 +697,7 @@ onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end)
|
|||
/* for single byte encodings */
|
||||
extern int
|
||||
onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,
|
||||
const UChar*end ARG_UNUSED, UChar* lower)
|
||||
const UChar*end ARG_UNUSED, UChar* lower)
|
||||
{
|
||||
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
|
||||
|
||||
|
@ -702,7 +708,7 @@ onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,
|
|||
#if 0
|
||||
extern int
|
||||
onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag,
|
||||
const UChar** pp, const UChar* end)
|
||||
const UChar** pp, const UChar* end)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
|
@ -738,35 +744,35 @@ onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf)
|
|||
|
||||
extern UChar*
|
||||
onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED,
|
||||
const UChar* s)
|
||||
const UChar* s)
|
||||
{
|
||||
return (UChar* )s;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED,
|
||||
const UChar* end ARG_UNUSED)
|
||||
const UChar* end ARG_UNUSED)
|
||||
{
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED,
|
||||
const UChar* end ARG_UNUSED)
|
||||
const UChar* end ARG_UNUSED)
|
||||
{
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_always_true_is_valid_mbc_string(const UChar* s ARG_UNUSED,
|
||||
const UChar* end ARG_UNUSED)
|
||||
const UChar* end ARG_UNUSED)
|
||||
{
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_length_check_is_valid_mbc_string(OnigEncoding enc,
|
||||
const UChar* p, const UChar* end)
|
||||
const UChar* p, const UChar* end)
|
||||
{
|
||||
while (p < end) {
|
||||
p += enclen(enc, p);
|
||||
|
@ -805,7 +811,7 @@ onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
|
|||
extern int
|
||||
onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
|
||||
const UChar** pp, const UChar* end ARG_UNUSED,
|
||||
UChar* lower)
|
||||
UChar* lower)
|
||||
{
|
||||
int len;
|
||||
const UChar *p = *pp;
|
||||
|
@ -847,6 +853,8 @@ onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,
|
|||
extern int
|
||||
onigenc_mb2_code_to_mbclen(OnigCodePoint code)
|
||||
{
|
||||
if ((code & (~0xffff)) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE;
|
||||
|
||||
if ((code & 0xff00) != 0) return 2;
|
||||
else return 1;
|
||||
}
|
||||
|
@ -946,7 +954,7 @@ onigenc_is_mbc_word_ascii(OnigEncoding enc, UChar* s, const UChar* end)
|
|||
|
||||
extern int
|
||||
onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
|
||||
unsigned int ctype)
|
||||
unsigned int ctype)
|
||||
{
|
||||
if (code < 128)
|
||||
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
|
||||
|
@ -961,7 +969,7 @@ onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
|
|||
|
||||
extern int
|
||||
onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
|
||||
unsigned int ctype)
|
||||
unsigned int ctype)
|
||||
{
|
||||
if (code < 128)
|
||||
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
regenc.h - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -114,6 +114,7 @@ struct PropertyNameCtype {
|
|||
/* #define USE_CRNL_AS_LINE_TERMINATOR */
|
||||
#define USE_UNICODE_PROPERTIES
|
||||
#define USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER
|
||||
#define USE_UNICODE_WORD_BREAK
|
||||
/* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */
|
||||
/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTS #18 */
|
||||
|
||||
|
@ -121,8 +122,20 @@ struct PropertyNameCtype {
|
|||
#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII
|
||||
|
||||
|
||||
#define ENC_SKIP_OFFSET_1_OR_0 7
|
||||
|
||||
#define ENC_FLAG_ASCII_COMPATIBLE (1<<0)
|
||||
#define ENC_FLAG_UNICODE (1<<1)
|
||||
#define ENC_FLAG_SKIP_OFFSET_MASK (7<<2)
|
||||
#define ENC_FLAG_SKIP_OFFSET_0 0
|
||||
#define ENC_FLAG_SKIP_OFFSET_1 (1<<2)
|
||||
#define ENC_FLAG_SKIP_OFFSET_2 (2<<2)
|
||||
#define ENC_FLAG_SKIP_OFFSET_3 (3<<2)
|
||||
#define ENC_FLAG_SKIP_OFFSET_4 (4<<2)
|
||||
#define ENC_FLAG_SKIP_OFFSET_1_OR_0 (ENC_SKIP_OFFSET_1_OR_0<<2)
|
||||
|
||||
#define ENC_GET_SKIP_OFFSET(enc) \
|
||||
(((enc)->flag & ENC_FLAG_SKIP_OFFSET_MASK)>>2)
|
||||
|
||||
|
||||
/* for encoding system implementation (internal) */
|
||||
|
@ -162,15 +175,19 @@ extern int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, u
|
|||
extern struct PropertyNameCtype* onigenc_euc_jp_lookup_property_name P_((register const char *str, register size_t len));
|
||||
extern struct PropertyNameCtype* onigenc_sjis_lookup_property_name P_((register const char *str, register size_t len));
|
||||
|
||||
/* in enc/unicode.c */
|
||||
/* in unicode.c */
|
||||
extern int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype));
|
||||
extern int onigenc_utf16_32_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[]));
|
||||
extern int onigenc_unicode_ctype_code_range P_((OnigCtype ctype, const OnigCodePoint* ranges[]));
|
||||
extern int onigenc_unicode_get_case_fold_codes_by_str P_((OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
|
||||
extern int onigenc_unicode_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold));
|
||||
extern int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
|
||||
|
||||
extern int onigenc_egcb_is_break_position P_((OnigEncoding enc, UChar* p, UChar* prev, const UChar* start, const UChar* end));
|
||||
|
||||
#ifdef USE_UNICODE_WORD_BREAK
|
||||
extern int onigenc_wb_is_break_position P_((OnigEncoding enc, UChar* p, UChar* prev, const UChar* start, const UChar* end));
|
||||
#endif
|
||||
|
||||
#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8)
|
||||
#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc)
|
||||
|
@ -252,7 +269,7 @@ extern const unsigned short OnigEncAsciiCtypeTable[];
|
|||
#define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \
|
||||
(ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_UPPER) ||\
|
||||
ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_LOWER))
|
||||
|
||||
|
||||
#define ONIGENC_IS_UNICODE_ENCODING(enc) \
|
||||
(((enc)->flag & ENC_FLAG_UNICODE) != 0)
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
regerror.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -31,13 +31,7 @@
|
|||
#if 0
|
||||
#include <stdio.h> /* for vsnprintf() */
|
||||
|
||||
#ifdef HAVE_STDARG_PROTOTYPES
|
||||
#include <stdarg.h>
|
||||
#define va_init_list(a,b) va_start(a,b)
|
||||
#else
|
||||
#include <varargs.h>
|
||||
#define va_init_list(a,b) va_start(a)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
extern UChar*
|
||||
|
@ -213,13 +207,17 @@ static void sprint_byte_with_x(char* s, unsigned int v)
|
|||
}
|
||||
|
||||
static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,
|
||||
UChar buf[], int buf_size, int *is_over)
|
||||
UChar buf[], int buf_size, int *is_over)
|
||||
{
|
||||
int len;
|
||||
UChar *p;
|
||||
OnigCodePoint code;
|
||||
|
||||
if (ONIGENC_MBC_MINLEN(enc) > 1) {
|
||||
if (!s) {
|
||||
len = 0;
|
||||
*is_over = 0;
|
||||
}
|
||||
else if (ONIGENC_MBC_MINLEN(enc) > 1) {
|
||||
p = s;
|
||||
len = 0;
|
||||
while (p < end) {
|
||||
|
@ -249,7 +247,7 @@ static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,
|
|||
if (len >= buf_size) break;
|
||||
}
|
||||
|
||||
*is_over = ((p < end) ? 1 : 0);
|
||||
*is_over = p < end;
|
||||
}
|
||||
else {
|
||||
len = MIN((int )(end - s), buf_size);
|
||||
|
@ -261,19 +259,27 @@ static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,
|
|||
}
|
||||
|
||||
|
||||
extern int
|
||||
onig_is_error_code_needs_param(int code)
|
||||
{
|
||||
switch (code) {
|
||||
case ONIGERR_UNDEFINED_NAME_REFERENCE:
|
||||
case ONIGERR_UNDEFINED_GROUP_REFERENCE:
|
||||
case ONIGERR_MULTIPLEX_DEFINED_NAME:
|
||||
case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
|
||||
case ONIGERR_INVALID_GROUP_NAME:
|
||||
case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
|
||||
case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
|
||||
return 1;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* for ONIG_MAX_ERROR_MESSAGE_LEN */
|
||||
#define MAX_ERROR_PAR_LEN 30
|
||||
|
||||
extern int
|
||||
EFIAPI
|
||||
#ifdef HAVE_STDARG_PROTOTYPES
|
||||
onig_error_code_to_str(UChar* s, int code, ...)
|
||||
#else
|
||||
onig_error_code_to_str(s, code, va_alist)
|
||||
UChar* s;
|
||||
int code;
|
||||
va_dcl
|
||||
#endif
|
||||
extern int EFIAPI onig_error_code_to_str(UChar* s, int code, ...)
|
||||
{
|
||||
UChar *p, *q;
|
||||
OnigErrorInfo* einfo;
|
||||
|
@ -333,21 +339,8 @@ onig_error_code_to_str(s, code, va_alist)
|
|||
}
|
||||
|
||||
|
||||
void
|
||||
EFIAPI
|
||||
#ifdef HAVE_STDARG_PROTOTYPES
|
||||
onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
|
||||
UChar* pat, UChar* pat_end, const UChar *fmt, ...)
|
||||
#else
|
||||
onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
|
||||
UChar buf[];
|
||||
int bufsize;
|
||||
OnigEncoding enc;
|
||||
UChar* pat;
|
||||
UChar* pat_end;
|
||||
const UChar *fmt;
|
||||
va_dcl
|
||||
#endif
|
||||
void EFIAPI onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
|
||||
UChar* pat, UChar* pat_end, const UChar *fmt, ...)
|
||||
{
|
||||
int n, need, len;
|
||||
UChar *p, *s, *bp;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -2,7 +2,7 @@
|
|||
reggnu.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -39,7 +39,7 @@ re_free_registers(OnigRegion* r)
|
|||
|
||||
extern int
|
||||
re_adjust_startpos(regex_t* reg, const char* string, int size,
|
||||
int startpos, int range)
|
||||
int startpos, int range)
|
||||
{
|
||||
if (startpos > 0 && ONIGENC_MBC_MAXLEN(reg->enc) != 1 && startpos < size) {
|
||||
UChar *p;
|
||||
|
@ -59,20 +59,20 @@ re_adjust_startpos(regex_t* reg, const char* string, int size,
|
|||
|
||||
extern int
|
||||
re_match(regex_t* reg, const char* str, int size, int pos,
|
||||
struct re_registers* regs)
|
||||
struct re_registers* regs)
|
||||
{
|
||||
return onig_match(reg, (UChar* )str, (UChar* )(str + size),
|
||||
(UChar* )(str + pos), regs, ONIG_OPTION_NONE);
|
||||
(UChar* )(str + pos), regs, ONIG_OPTION_NONE);
|
||||
}
|
||||
|
||||
extern int
|
||||
re_search(regex_t* bufp, const char* string, int size, int startpos, int range,
|
||||
struct re_registers* regs)
|
||||
struct re_registers* regs)
|
||||
{
|
||||
return onig_search(bufp, (UChar* )string, (UChar* )(string + size),
|
||||
(UChar* )(string + startpos),
|
||||
(UChar* )(string + startpos + range),
|
||||
regs, ONIG_OPTION_NONE);
|
||||
(UChar* )(string + startpos),
|
||||
(UChar* )(string + startpos + range),
|
||||
regs, ONIG_OPTION_NONE);
|
||||
}
|
||||
|
||||
extern int
|
||||
|
@ -103,9 +103,9 @@ re_alloc_pattern(regex_t** reg)
|
|||
if (IS_NULL(*reg)) return ONIGERR_MEMORY;
|
||||
|
||||
return onig_reg_init(*reg, ONIG_OPTION_DEFAULT,
|
||||
ONIGENC_CASE_FOLD_DEFAULT,
|
||||
OnigEncDefaultCharEncoding,
|
||||
OnigDefaultSyntax);
|
||||
ONIGENC_CASE_FOLD_DEFAULT,
|
||||
OnigEncDefaultCharEncoding,
|
||||
OnigDefaultSyntax);
|
||||
}
|
||||
|
||||
extern void
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
regint.h - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -54,44 +54,37 @@
|
|||
#define PLATFORM_UNALIGNED_WORD_ACCESS
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
#define USE_GOTO_LABELS_AS_VALUES
|
||||
#endif
|
||||
|
||||
/* config */
|
||||
/* spec. config */
|
||||
#define USE_CALL
|
||||
#define USE_CALLOUT
|
||||
#define USE_BACKREF_WITH_LEVEL /* \k<name+n>, \k<name-n> */
|
||||
#define USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT /* /(?:()|())*\2/ */
|
||||
#define USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT /* /(?:()|())*\2/ */
|
||||
#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
|
||||
#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
|
||||
|
||||
#define USE_RETRY_LIMIT_IN_MATCH
|
||||
#ifdef USE_GOTO_LABELS_AS_VALUES
|
||||
#define USE_THREADED_CODE
|
||||
#define USE_DIRECT_THREADED_CODE
|
||||
#endif
|
||||
|
||||
/* internal config */
|
||||
#define USE_OP_PUSH_OR_JUMP_EXACT
|
||||
#define USE_QUANT_PEEK_NEXT
|
||||
#define USE_ST_LIBRARY
|
||||
|
||||
#define USE_WORD_BEGIN_END /* "\<", "\>" */
|
||||
#define USE_CAPTURE_HISTORY
|
||||
#define USE_VARIABLE_META_CHARS
|
||||
#define USE_POSIX_API_REGION_OPTION
|
||||
#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
|
||||
|
||||
#include "regenc.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
# ifndef HAVE_STDARG_PROTOTYPES
|
||||
# define HAVE_STDARG_PROTOTYPES 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* escape Mac OS X/Xcode 2.4/gcc 4.0.1 problem */
|
||||
#if defined(__APPLE__) && defined(__GNUC__) && __GNUC__ >= 4
|
||||
# ifndef HAVE_STDARG_PROTOTYPES
|
||||
# define HAVE_STDARG_PROTOTYPES 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_STDARG_H
|
||||
# ifndef HAVE_STDARG_PROTOTYPES
|
||||
# define HAVE_STDARG_PROTOTYPES 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
#define INIT_MATCH_STACK_SIZE 160
|
||||
#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */
|
||||
#define DEFAULT_RETRY_LIMIT_IN_MATCH 10000000
|
||||
|
@ -103,12 +96,6 @@
|
|||
#undef ONIG_ESCAPE_UCHAR_COLLISION
|
||||
#endif
|
||||
|
||||
#define USE_WORD_BEGIN_END /* "\<", "\>" */
|
||||
#define USE_CAPTURE_HISTORY
|
||||
#define USE_VARIABLE_META_CHARS
|
||||
#define USE_POSIX_API_REGION_OPTION
|
||||
#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
|
||||
|
||||
#define xmalloc malloc
|
||||
#define xrealloc realloc
|
||||
#define xcalloc calloc
|
||||
|
@ -150,17 +137,10 @@
|
|||
#define xstrcat(dest,src,size) strcat(dest,src)
|
||||
#endif
|
||||
|
||||
|
||||
// #include <stddef.h>
|
||||
|
||||
#ifdef HAVE_LIMITS_H
|
||||
#include <limits.h>
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
#ifdef HAVE_STDLIB_H
|
||||
#include <stddef.h>
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_STDINT_H
|
||||
#include <stdint.h>
|
||||
|
@ -170,11 +150,7 @@
|
|||
#include <alloca.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_STRING_H
|
||||
# include <string.h>
|
||||
#else
|
||||
# include <strings.h>
|
||||
#endif
|
||||
#include <string.h>
|
||||
|
||||
#include <ctype.h>
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
|
@ -226,6 +202,7 @@ typedef UINTN uintptr_t;
|
|||
#define CHECK_NULL_RETURN_MEMERR(p) if (IS_NULL(p)) return ONIGERR_MEMORY
|
||||
#define NULL_UCHARP ((UChar* )0)
|
||||
|
||||
#define CHAR_MAP_SIZE 256
|
||||
#define INFINITE_LEN ONIG_INFINITE_DISTANCE
|
||||
|
||||
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
|
||||
|
@ -290,64 +267,6 @@ typedef struct {
|
|||
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
const UChar* pattern;
|
||||
const UChar* pattern_end;
|
||||
#ifdef USE_CALLOUT
|
||||
void* tag_table;
|
||||
int callout_num;
|
||||
int callout_list_alloc;
|
||||
CalloutListEntry* callout_list; /* index: callout num */
|
||||
#endif
|
||||
} RegexExt;
|
||||
|
||||
#define REG_EXTP(reg) ((RegexExt* )((reg)->chain))
|
||||
#define REG_EXTPL(reg) ((reg)->chain)
|
||||
|
||||
struct re_pattern_buffer {
|
||||
/* common members of BBuf(bytes-buffer) */
|
||||
unsigned char* p; /* compiled pattern */
|
||||
unsigned int used; /* used space for p */
|
||||
unsigned int alloc; /* allocated space for p */
|
||||
|
||||
int num_mem; /* used memory(...) num counted from 1 */
|
||||
int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
|
||||
int num_null_check; /* OP_EMPTY_CHECK_START/END id counter */
|
||||
int num_comb_exp_check; /* no longer used (combination explosion check) */
|
||||
int num_call; /* number of subexp call */
|
||||
unsigned int capture_history; /* (?@...) flag (1-31) */
|
||||
unsigned int bt_mem_start; /* need backtrack flag */
|
||||
unsigned int bt_mem_end; /* need backtrack flag */
|
||||
int stack_pop_level;
|
||||
int repeat_range_alloc;
|
||||
OnigRepeatRange* repeat_range;
|
||||
|
||||
OnigEncoding enc;
|
||||
OnigOptionType options;
|
||||
OnigSyntaxType* syntax;
|
||||
OnigCaseFoldType case_fold_flag;
|
||||
void* name_table;
|
||||
|
||||
/* optimization info (string search, char-map and anchors) */
|
||||
int optimize; /* optimize flag */
|
||||
int threshold_len; /* search str-length for apply optimize */
|
||||
int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
|
||||
OnigLen anchor_dmin; /* (SEMI_)END_BUF anchor distance */
|
||||
OnigLen anchor_dmax; /* (SEMI_)END_BUF anchor distance */
|
||||
int sub_anchor; /* start-anchor for exact or map */
|
||||
unsigned char *exact;
|
||||
unsigned char *exact_end;
|
||||
unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */
|
||||
int *int_map; /* BM skip for exact_len > 255 */
|
||||
int *int_map_backward; /* BM skip for backward search */
|
||||
OnigLen dmin; /* min-distance of exact or map */
|
||||
OnigLen dmax; /* max-distance of exact or map */
|
||||
|
||||
/* regex_t link chain */
|
||||
struct re_pattern_buffer* chain; /* escape compile-conflict */
|
||||
};
|
||||
|
||||
|
||||
/* stack pop level */
|
||||
enum StackPopLevel {
|
||||
STACK_POP_LEVEL_FREE = 0,
|
||||
|
@ -357,12 +276,13 @@ enum StackPopLevel {
|
|||
|
||||
/* optimize flags */
|
||||
enum OptimizeType {
|
||||
OPTIMIZE_NONE = 0,
|
||||
OPTIMIZE_EXACT = 1, /* Slow Search */
|
||||
OPTIMIZE_EXACT_BM = 2, /* Boyer Moore Search */
|
||||
OPTIMIZE_EXACT_BM_NO_REV = 3, /* BM (but not simple match) */
|
||||
OPTIMIZE_EXACT_IC = 4, /* Slow Search (ignore case) */
|
||||
OPTIMIZE_MAP = 5 /* char map */
|
||||
OPTIMIZE_NONE = 0,
|
||||
OPTIMIZE_STR, /* Slow Search */
|
||||
OPTIMIZE_STR_FAST, /* Sunday quick search / BMH */
|
||||
OPTIMIZE_STR_FAST_STEP_FORWARD, /* Sunday quick search / BMH */
|
||||
OPTIMIZE_STR_CASE_FOLD_FAST, /* Sunday quick search / BMH (ignore case) */
|
||||
OPTIMIZE_STR_CASE_FOLD, /* Slow Search (ignore case) */
|
||||
OPTIMIZE_MAP /* char map */
|
||||
};
|
||||
|
||||
/* bit status */
|
||||
|
@ -436,8 +356,8 @@ typedef unsigned int MemStatusType;
|
|||
#define DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag) \
|
||||
((case_fold_flag) & ~INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR)
|
||||
|
||||
#define REPEAT_INFINITE -1
|
||||
#define IS_REPEAT_INFINITE(n) ((n) == REPEAT_INFINITE)
|
||||
#define INFINITE_REPEAT -1
|
||||
#define IS_INFINITE_REPEAT(n) ((n) == INFINITE_REPEAT)
|
||||
|
||||
/* bitset */
|
||||
#define BITS_PER_BYTE 8
|
||||
|
@ -475,7 +395,7 @@ typedef struct _BBuf {
|
|||
unsigned int alloc;
|
||||
} BBuf;
|
||||
|
||||
#define BB_INIT(buf,size) onig_bbuf_init((BBuf* )(buf), (size))
|
||||
#define BB_INIT(buf,size) bbuf_init((BBuf* )(buf), (size))
|
||||
|
||||
#define BB_SIZE_INC(buf,inc) do{\
|
||||
(buf)->alloc += (inc);\
|
||||
|
@ -551,32 +471,32 @@ typedef struct _BBuf {
|
|||
|
||||
|
||||
/* has body */
|
||||
#define ANCHOR_PREC_READ (1<<0)
|
||||
#define ANCHOR_PREC_READ_NOT (1<<1)
|
||||
#define ANCHOR_LOOK_BEHIND (1<<2)
|
||||
#define ANCHOR_LOOK_BEHIND_NOT (1<<3)
|
||||
#define ANCR_PREC_READ (1<<0)
|
||||
#define ANCR_PREC_READ_NOT (1<<1)
|
||||
#define ANCR_LOOK_BEHIND (1<<2)
|
||||
#define ANCR_LOOK_BEHIND_NOT (1<<3)
|
||||
/* no body */
|
||||
#define ANCHOR_BEGIN_BUF (1<<4)
|
||||
#define ANCHOR_BEGIN_LINE (1<<5)
|
||||
#define ANCHOR_BEGIN_POSITION (1<<6)
|
||||
#define ANCHOR_END_BUF (1<<7)
|
||||
#define ANCHOR_SEMI_END_BUF (1<<8)
|
||||
#define ANCHOR_END_LINE (1<<9)
|
||||
#define ANCHOR_WORD_BOUNDARY (1<<10)
|
||||
#define ANCHOR_NO_WORD_BOUNDARY (1<<11)
|
||||
#define ANCHOR_WORD_BEGIN (1<<12)
|
||||
#define ANCHOR_WORD_END (1<<13)
|
||||
#define ANCHOR_ANYCHAR_INF (1<<14)
|
||||
#define ANCHOR_ANYCHAR_INF_ML (1<<15)
|
||||
#define ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY (1<<16)
|
||||
#define ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY (1<<17)
|
||||
#define ANCR_BEGIN_BUF (1<<4)
|
||||
#define ANCR_BEGIN_LINE (1<<5)
|
||||
#define ANCR_BEGIN_POSITION (1<<6)
|
||||
#define ANCR_END_BUF (1<<7)
|
||||
#define ANCR_SEMI_END_BUF (1<<8)
|
||||
#define ANCR_END_LINE (1<<9)
|
||||
#define ANCR_WORD_BOUNDARY (1<<10)
|
||||
#define ANCR_NO_WORD_BOUNDARY (1<<11)
|
||||
#define ANCR_WORD_BEGIN (1<<12)
|
||||
#define ANCR_WORD_END (1<<13)
|
||||
#define ANCR_ANYCHAR_INF (1<<14)
|
||||
#define ANCR_ANYCHAR_INF_ML (1<<15)
|
||||
#define ANCR_TEXT_SEGMENT_BOUNDARY (1<<16)
|
||||
#define ANCR_NO_TEXT_SEGMENT_BOUNDARY (1<<17)
|
||||
|
||||
|
||||
#define ANCHOR_HAS_BODY(a) ((a)->type < ANCHOR_BEGIN_BUF)
|
||||
#define ANCHOR_HAS_BODY(a) ((a)->type < ANCR_BEGIN_BUF)
|
||||
|
||||
#define IS_WORD_ANCHOR_TYPE(type) \
|
||||
((type) == ANCHOR_WORD_BOUNDARY || (type) == ANCHOR_NO_WORD_BOUNDARY || \
|
||||
(type) == ANCHOR_WORD_BEGIN || (type) == ANCHOR_WORD_END)
|
||||
((type) == ANCR_WORD_BOUNDARY || (type) == ANCR_NO_WORD_BOUNDARY || \
|
||||
(type) == ANCR_WORD_BEGIN || (type) == ANCR_WORD_END)
|
||||
|
||||
/* operation code */
|
||||
enum OpCode {
|
||||
|
@ -605,9 +525,6 @@ enum OpCode {
|
|||
OP_CCLASS_NOT,
|
||||
OP_CCLASS_MB_NOT,
|
||||
OP_CCLASS_MIX_NOT,
|
||||
#ifdef USE_OP_CCLASS_NODE
|
||||
OP_CCLASS_NODE, /* pointer to CClassNode node */
|
||||
#endif
|
||||
|
||||
OP_ANYCHAR, /* "." */
|
||||
OP_ANYCHAR_ML, /* "." multi-line */
|
||||
|
@ -625,8 +542,7 @@ enum OpCode {
|
|||
OP_WORD_BEGIN,
|
||||
OP_WORD_END,
|
||||
|
||||
OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY,
|
||||
OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY,
|
||||
OP_TEXT_SEGMENT_BOUNDARY,
|
||||
|
||||
OP_BEGIN_BUF,
|
||||
OP_END_BUF,
|
||||
|
@ -642,6 +558,7 @@ enum OpCode {
|
|||
OP_BACKREF_MULTI,
|
||||
OP_BACKREF_MULTI_IC,
|
||||
OP_BACKREF_WITH_LEVEL, /* \k<xxx+n>, \k<xxx-n> */
|
||||
OP_BACKREF_WITH_LEVEL_IC, /* \k<xxx+n>, \k<xxx-n> */
|
||||
OP_BACKREF_CHECK, /* (?(n)), (?('name')) */
|
||||
OP_BACKREF_CHECK_WITH_LEVEL, /* (?(n-level)), (?('name-level')) */
|
||||
|
||||
|
@ -657,7 +574,9 @@ enum OpCode {
|
|||
OP_PUSH,
|
||||
OP_PUSH_SUPER,
|
||||
OP_POP_OUT,
|
||||
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
|
||||
OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */
|
||||
#endif
|
||||
OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */
|
||||
OP_REPEAT, /* {n,m} */
|
||||
OP_REPEAT_NG, /* {n,m}? (non greedy) */
|
||||
|
@ -704,6 +623,11 @@ enum UpdateVarType {
|
|||
UPDATE_VAR_RIGHT_RANGE_INIT = 4,
|
||||
};
|
||||
|
||||
enum TextSegmentBoundaryType {
|
||||
EXTENDED_GRAPHEME_CLUSTER_BOUNDARY = 0,
|
||||
WORD_BOUNDARY = 1,
|
||||
};
|
||||
|
||||
typedef int RelAddrType;
|
||||
typedef int AbsAddrType;
|
||||
typedef int LengthType;
|
||||
|
@ -747,13 +671,16 @@ typedef int ModeType;
|
|||
|
||||
|
||||
/* op-code + arg size */
|
||||
#if 0
|
||||
#define SIZE_OP_ANYCHAR_STAR SIZE_OPCODE
|
||||
#define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT (SIZE_OPCODE + 1)
|
||||
#define SIZE_OP_JUMP (SIZE_OPCODE + SIZE_RELADDR)
|
||||
#define SIZE_OP_PUSH (SIZE_OPCODE + SIZE_RELADDR)
|
||||
#define SIZE_OP_PUSH_SUPER (SIZE_OPCODE + SIZE_RELADDR)
|
||||
#define SIZE_OP_POP_OUT SIZE_OPCODE
|
||||
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
|
||||
#define SIZE_OP_PUSH_OR_JUMP_EXACT1 (SIZE_OPCODE + SIZE_RELADDR + 1)
|
||||
#endif
|
||||
#define SIZE_OP_PUSH_IF_PEEK_NEXT (SIZE_OPCODE + SIZE_RELADDR + 1)
|
||||
#define SIZE_OP_REPEAT_INC (SIZE_OPCODE + SIZE_MEMNUM)
|
||||
#define SIZE_OP_REPEAT_INC_NG (SIZE_OPCODE + SIZE_MEMNUM)
|
||||
|
@ -786,6 +713,56 @@ typedef int ModeType;
|
|||
#define SIZE_OP_CALLOUT_NAME (SIZE_OPCODE + SIZE_MEMNUM + SIZE_MEMNUM)
|
||||
#endif
|
||||
|
||||
#else /* if 0 */
|
||||
|
||||
/* for relative address increment to go next op. */
|
||||
#define SIZE_INC_OP 1
|
||||
|
||||
#define SIZE_OP_ANYCHAR_STAR 1
|
||||
#define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT 1
|
||||
#define SIZE_OP_JUMP 1
|
||||
#define SIZE_OP_PUSH 1
|
||||
#define SIZE_OP_PUSH_SUPER 1
|
||||
#define SIZE_OP_POP_OUT 1
|
||||
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
|
||||
#define SIZE_OP_PUSH_OR_JUMP_EXACT1 1
|
||||
#endif
|
||||
#define SIZE_OP_PUSH_IF_PEEK_NEXT 1
|
||||
#define SIZE_OP_REPEAT 1
|
||||
#define SIZE_OP_REPEAT_INC 1
|
||||
#define SIZE_OP_REPEAT_INC_NG 1
|
||||
#define SIZE_OP_WORD_BOUNDARY 1
|
||||
#define SIZE_OP_PREC_READ_START 1
|
||||
#define SIZE_OP_PREC_READ_NOT_START 1
|
||||
#define SIZE_OP_PREC_READ_END 1
|
||||
#define SIZE_OP_PREC_READ_NOT_END 1
|
||||
#define SIZE_OP_BACKREF 1
|
||||
#define SIZE_OP_FAIL 1
|
||||
#define SIZE_OP_MEMORY_START 1
|
||||
#define SIZE_OP_MEMORY_START_PUSH 1
|
||||
#define SIZE_OP_MEMORY_END_PUSH 1
|
||||
#define SIZE_OP_MEMORY_END_PUSH_REC 1
|
||||
#define SIZE_OP_MEMORY_END 1
|
||||
#define SIZE_OP_MEMORY_END_REC 1
|
||||
#define SIZE_OP_ATOMIC_START 1
|
||||
#define SIZE_OP_ATOMIC_END 1
|
||||
#define SIZE_OP_EMPTY_CHECK_START 1
|
||||
#define SIZE_OP_EMPTY_CHECK_END 1
|
||||
#define SIZE_OP_LOOK_BEHIND 1
|
||||
#define SIZE_OP_LOOK_BEHIND_NOT_START 1
|
||||
#define SIZE_OP_LOOK_BEHIND_NOT_END 1
|
||||
#define SIZE_OP_CALL 1
|
||||
#define SIZE_OP_RETURN 1
|
||||
#define SIZE_OP_PUSH_SAVE_VAL 1
|
||||
#define SIZE_OP_UPDATE_VAR 1
|
||||
|
||||
#ifdef USE_CALLOUT
|
||||
#define SIZE_OP_CALLOUT_CONTENTS 1
|
||||
#define SIZE_OP_CALLOUT_NAME 1
|
||||
#endif
|
||||
#endif /* if 0 */
|
||||
|
||||
|
||||
#define MC_ESC(syn) (syn)->meta_char_table.esc
|
||||
#define MC_ANYCHAR(syn) (syn)->meta_char_table.anychar
|
||||
#define MC_ANYTIME(syn) (syn)->meta_char_table.anytime
|
||||
|
@ -837,8 +814,186 @@ typedef int ModeType;
|
|||
#define NCCLASS_CLEAR_NOT(nd) NCCLASS_FLAG_CLEAR(nd, FLAG_NCCLASS_NOT)
|
||||
#define IS_NCCLASS_NOT(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_NOT)
|
||||
|
||||
extern void onig_add_end_call(void (*func)(void));
|
||||
|
||||
typedef struct {
|
||||
#ifdef USE_DIRECT_THREADED_CODE
|
||||
const void* opaddr;
|
||||
#else
|
||||
enum OpCode opcode;
|
||||
#endif
|
||||
union {
|
||||
struct {
|
||||
UChar s[16]; /* Now used first 7 bytes only. */
|
||||
} exact;
|
||||
struct {
|
||||
UChar* s;
|
||||
LengthType n; /* number of chars */
|
||||
} exact_n; /* EXACTN, EXACTN_IC, EXACTMB2N, EXACTMB3N */
|
||||
struct {
|
||||
UChar* s;
|
||||
LengthType n; /* number of chars */
|
||||
LengthType len; /* char byte length */
|
||||
} exact_len_n; /* EXACTMBN */
|
||||
struct {
|
||||
BitSetRef bsp;
|
||||
} cclass;
|
||||
struct {
|
||||
void* mb;
|
||||
} cclass_mb;
|
||||
struct {
|
||||
void* mb; /* mb must be same position with cclass_mb for match_at(). */
|
||||
BitSetRef bsp;
|
||||
} cclass_mix;
|
||||
struct {
|
||||
UChar c;
|
||||
} anychar_star_peek_next;
|
||||
struct {
|
||||
ModeType mode;
|
||||
} word_boundary; /* OP_WORD_BOUNDARY, OP_NO_WORD_BOUNDARY, OP_WORD_BEGIN, OP_WORD_END */
|
||||
struct {
|
||||
enum TextSegmentBoundaryType type;
|
||||
int not;
|
||||
} text_segment_boundary;
|
||||
struct {
|
||||
union {
|
||||
MemNumType n1; /* num == 1 */
|
||||
MemNumType* ns; /* num > 1 */
|
||||
};
|
||||
int num;
|
||||
int nest_level;
|
||||
} backref_general; /* BACKREF_MULTI, BACKREF_MULTI_IC, BACKREF_WITH_LEVEL, BACKREF_CHECK, BACKREF_CHECK_WITH_LEVEL, */
|
||||
struct {
|
||||
MemNumType n1;
|
||||
} backref_n; /* BACKREF_N, BACKREF_N_IC */
|
||||
struct {
|
||||
MemNumType num;
|
||||
} memory_start; /* MEMORY_START, MEMORY_START_PUSH */
|
||||
struct {
|
||||
MemNumType num;
|
||||
} memory_end; /* MEMORY_END, MEMORY_END_REC, MEMORY_END_PUSH, MEMORY_END_PUSH_REC */
|
||||
struct {
|
||||
RelAddrType addr;
|
||||
} jump;
|
||||
struct {
|
||||
RelAddrType addr;
|
||||
} push;
|
||||
struct {
|
||||
RelAddrType addr;
|
||||
UChar c;
|
||||
} push_or_jump_exact1;
|
||||
struct {
|
||||
RelAddrType addr;
|
||||
UChar c;
|
||||
} push_if_peek_next;
|
||||
struct {
|
||||
MemNumType id;
|
||||
RelAddrType addr;
|
||||
} repeat; /* REPEAT, REPEAT_NG */
|
||||
struct {
|
||||
MemNumType id;
|
||||
} repeat_inc; /* REPEAT_INC, REPEAT_INC_SG, REPEAT_INC_NG, REPEAT_INC_NG_SG */
|
||||
struct {
|
||||
MemNumType mem;
|
||||
} empty_check_start;
|
||||
struct {
|
||||
MemNumType mem;
|
||||
} empty_check_end; /* EMPTY_CHECK_END, EMPTY_CHECK_END_MEMST, EMPTY_CHECK_END_MEMST_PUSH */
|
||||
struct {
|
||||
RelAddrType addr;
|
||||
} prec_read_not_start;
|
||||
struct {
|
||||
LengthType len;
|
||||
} look_behind;
|
||||
struct {
|
||||
LengthType len;
|
||||
RelAddrType addr;
|
||||
} look_behind_not_start;
|
||||
struct {
|
||||
AbsAddrType addr;
|
||||
} call;
|
||||
struct {
|
||||
SaveType type;
|
||||
MemNumType id;
|
||||
} push_save_val;
|
||||
struct {
|
||||
UpdateVarType type;
|
||||
MemNumType id;
|
||||
} update_var;
|
||||
#ifdef USE_CALLOUT
|
||||
struct {
|
||||
MemNumType num;
|
||||
} callout_contents;
|
||||
struct {
|
||||
MemNumType num;
|
||||
MemNumType id;
|
||||
} callout_name;
|
||||
#endif
|
||||
};
|
||||
} Operation;
|
||||
|
||||
typedef struct {
|
||||
const UChar* pattern;
|
||||
const UChar* pattern_end;
|
||||
#ifdef USE_CALLOUT
|
||||
void* tag_table;
|
||||
int callout_num;
|
||||
int callout_list_alloc;
|
||||
CalloutListEntry* callout_list; /* index: callout num */
|
||||
#endif
|
||||
} RegexExt;
|
||||
|
||||
struct re_pattern_buffer {
|
||||
/* common members of BBuf(bytes-buffer) */
|
||||
Operation* ops;
|
||||
#ifdef USE_DIRECT_THREADED_CODE
|
||||
enum OpCode* ocs;
|
||||
#endif
|
||||
Operation* ops_curr;
|
||||
unsigned int ops_used; /* used space for ops */
|
||||
unsigned int ops_alloc; /* allocated space for ops */
|
||||
unsigned char* string_pool;
|
||||
unsigned char* string_pool_end;
|
||||
|
||||
int num_mem; /* used memory(...) num counted from 1 */
|
||||
int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
|
||||
int num_null_check; /* OP_EMPTY_CHECK_START/END id counter */
|
||||
int num_call; /* number of subexp call */
|
||||
unsigned int capture_history; /* (?@...) flag (1-31) */
|
||||
unsigned int bt_mem_start; /* need backtrack flag */
|
||||
unsigned int bt_mem_end; /* need backtrack flag */
|
||||
int stack_pop_level;
|
||||
int repeat_range_alloc;
|
||||
OnigRepeatRange* repeat_range;
|
||||
|
||||
OnigEncoding enc;
|
||||
OnigOptionType options;
|
||||
OnigSyntaxType* syntax;
|
||||
OnigCaseFoldType case_fold_flag;
|
||||
void* name_table;
|
||||
|
||||
/* optimization info (string search, char-map and anchors) */
|
||||
int optimize; /* optimize flag */
|
||||
int threshold_len; /* search str-length for apply optimize */
|
||||
int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
|
||||
OnigLen anchor_dmin; /* (SEMI_)END_BUF anchor distance */
|
||||
OnigLen anchor_dmax; /* (SEMI_)END_BUF anchor distance */
|
||||
int sub_anchor; /* start-anchor for exact or map */
|
||||
unsigned char *exact;
|
||||
unsigned char *exact_end;
|
||||
unsigned char map[CHAR_MAP_SIZE]; /* used as BMH skip or char-map */
|
||||
int map_offset;
|
||||
OnigLen dmin; /* min-distance of exact or map */
|
||||
OnigLen dmax; /* max-distance of exact or map */
|
||||
RegexExt* extp;
|
||||
};
|
||||
|
||||
#define COP(reg) ((reg)->ops_curr)
|
||||
#define COP_CURR_OFFSET(reg) ((reg)->ops_used - 1)
|
||||
#define COP_CURR_OFFSET_BYTES(reg, p) \
|
||||
((int )((char* )(&((reg)->ops_curr->p)) - (char* )((reg)->ops)))
|
||||
|
||||
|
||||
extern void onig_add_end_call(void (*func)(void));
|
||||
|
||||
#ifdef ONIG_DEBUG
|
||||
|
||||
|
@ -854,13 +1009,12 @@ extern int onig_print_statistics P_((FILE* f));
|
|||
|
||||
extern void onig_warning(const char* s);
|
||||
extern UChar* onig_error_code_to_format P_((int code));
|
||||
extern void EFIAPI onig_snprintf_with_pattern PV_((UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...));
|
||||
extern int onig_bbuf_init P_((BBuf* buf, int size));
|
||||
extern void EFIAPI onig_snprintf_with_pattern PV_((UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...));
|
||||
extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo));
|
||||
extern void onig_transfer P_((regex_t* to, regex_t* from));
|
||||
extern int onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, void* /* CClassNode* */ cc));
|
||||
extern RegexExt* onig_get_regex_ext(regex_t* reg);
|
||||
extern int onig_ext_set_pattern(regex_t* reg, const UChar* pattern, const UChar* pattern_end);
|
||||
extern int onig_positive_int_multiply(int x, int y);
|
||||
|
||||
#ifdef USE_CALLOUT
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -4,7 +4,7 @@
|
|||
regparse.h - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -31,6 +31,10 @@
|
|||
|
||||
#include "regint.h"
|
||||
|
||||
#define NODE_STRING_MARGIN 16
|
||||
#define NODE_STRING_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
|
||||
#define NODE_BACKREFS_SIZE 6
|
||||
|
||||
/* node type */
|
||||
typedef enum {
|
||||
NODE_STRING = 0,
|
||||
|
@ -38,7 +42,7 @@ typedef enum {
|
|||
NODE_CTYPE = 2,
|
||||
NODE_BACKREF = 3,
|
||||
NODE_QUANT = 4,
|
||||
NODE_ENCLOSURE = 5,
|
||||
NODE_BAG = 5,
|
||||
NODE_ANCHOR = 6,
|
||||
NODE_LIST = 7,
|
||||
NODE_ALT = 8,
|
||||
|
@ -46,161 +50,29 @@ typedef enum {
|
|||
NODE_GIMMICK = 10
|
||||
} NodeType;
|
||||
|
||||
enum BagType {
|
||||
BAG_MEMORY = 0,
|
||||
BAG_OPTION = 1,
|
||||
BAG_STOP_BACKTRACK = 2,
|
||||
BAG_IF_ELSE = 3,
|
||||
};
|
||||
|
||||
enum GimmickType {
|
||||
GIMMICK_FAIL = 0,
|
||||
GIMMICK_KEEP = 1,
|
||||
GIMMICK_SAVE = 2,
|
||||
GIMMICK_UPDATE_VAR = 3,
|
||||
GIMMICK_FAIL = 0,
|
||||
GIMMICK_SAVE = 1,
|
||||
GIMMICK_UPDATE_VAR = 2,
|
||||
#ifdef USE_CALLOUT
|
||||
GIMMICK_CALLOUT = 4,
|
||||
GIMMICK_CALLOUT = 3,
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
/* node type bit */
|
||||
#define NODE_TYPE2BIT(type) (1<<(type))
|
||||
|
||||
#define NODE_BIT_STRING NODE_TYPE2BIT(NODE_STRING)
|
||||
#define NODE_BIT_CCLASS NODE_TYPE2BIT(NODE_CCLASS)
|
||||
#define NODE_BIT_CTYPE NODE_TYPE2BIT(NODE_CTYPE)
|
||||
#define NODE_BIT_BACKREF NODE_TYPE2BIT(NODE_BACKREF)
|
||||
#define NODE_BIT_QUANT NODE_TYPE2BIT(NODE_QUANT)
|
||||
#define NODE_BIT_ENCLOSURE NODE_TYPE2BIT(NODE_ENCLOSURE)
|
||||
#define NODE_BIT_ANCHOR NODE_TYPE2BIT(NODE_ANCHOR)
|
||||
#define NODE_BIT_LIST NODE_TYPE2BIT(NODE_LIST)
|
||||
#define NODE_BIT_ALT NODE_TYPE2BIT(NODE_ALT)
|
||||
#define NODE_BIT_CALL NODE_TYPE2BIT(NODE_CALL)
|
||||
#define NODE_BIT_GIMMICK NODE_TYPE2BIT(NODE_GIMMICK)
|
||||
|
||||
#define NODE_IS_SIMPLE_TYPE(node) \
|
||||
((NODE_TYPE2BIT(NODE_TYPE(node)) & \
|
||||
(NODE_BIT_STRING | NODE_BIT_CCLASS | NODE_BIT_CTYPE | NODE_BIT_BACKREF)) != 0)
|
||||
|
||||
#define NODE_TYPE(node) ((node)->u.base.node_type)
|
||||
#define NODE_SET_TYPE(node, ntype) (node)->u.base.node_type = (ntype)
|
||||
|
||||
#define STR_(node) (&((node)->u.str))
|
||||
#define CCLASS_(node) (&((node)->u.cclass))
|
||||
#define CTYPE_(node) (&((node)->u.ctype))
|
||||
#define BACKREF_(node) (&((node)->u.backref))
|
||||
#define QUANT_(node) (&((node)->u.quant))
|
||||
#define ENCLOSURE_(node) (&((node)->u.enclosure))
|
||||
#define ANCHOR_(node) (&((node)->u.anchor))
|
||||
#define CONS_(node) (&((node)->u.cons))
|
||||
#define CALL_(node) (&((node)->u.call))
|
||||
#define GIMMICK_(node) (&((node)->u.gimmick))
|
||||
|
||||
#define NODE_CAR(node) (CONS_(node)->car)
|
||||
#define NODE_CDR(node) (CONS_(node)->cdr)
|
||||
|
||||
#define CTYPE_ANYCHAR -1
|
||||
#define NODE_IS_ANYCHAR(node) \
|
||||
(NODE_TYPE(node) == NODE_CTYPE && CTYPE_(node)->ctype == CTYPE_ANYCHAR)
|
||||
|
||||
#define CTYPE_OPTION(node, reg) \
|
||||
(NODE_IS_FIXED_OPTION(node) ? CTYPE_(node)->options : reg->options)
|
||||
|
||||
|
||||
#define ANCHOR_ANYCHAR_INF_MASK (ANCHOR_ANYCHAR_INF | ANCHOR_ANYCHAR_INF_ML)
|
||||
#define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
|
||||
|
||||
enum EnclosureType {
|
||||
ENCLOSURE_MEMORY = 0,
|
||||
ENCLOSURE_OPTION = 1,
|
||||
ENCLOSURE_STOP_BACKTRACK = 2,
|
||||
ENCLOSURE_IF_ELSE = 3,
|
||||
enum BodyEmptyType {
|
||||
BODY_IS_NOT_EMPTY = 0,
|
||||
BODY_IS_EMPTY_POSSIBILITY = 1,
|
||||
BODY_IS_EMPTY_POSSIBILITY_MEM = 2,
|
||||
BODY_IS_EMPTY_POSSIBILITY_REC = 3
|
||||
};
|
||||
|
||||
#define NODE_STRING_MARGIN 16
|
||||
#define NODE_STRING_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
|
||||
#define NODE_BACKREFS_SIZE 6
|
||||
|
||||
#define NODE_STRING_RAW (1<<0) /* by backslashed number */
|
||||
#define NODE_STRING_AMBIG (1<<1)
|
||||
#define NODE_STRING_DONT_GET_OPT_INFO (1<<2)
|
||||
|
||||
#define NODE_STRING_LEN(node) (int )((node)->u.str.end - (node)->u.str.s)
|
||||
#define NODE_STRING_SET_RAW(node) (node)->u.str.flag |= NODE_STRING_RAW
|
||||
#define NODE_STRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NODE_STRING_RAW
|
||||
#define NODE_STRING_SET_AMBIG(node) (node)->u.str.flag |= NODE_STRING_AMBIG
|
||||
#define NODE_STRING_SET_DONT_GET_OPT_INFO(node) \
|
||||
(node)->u.str.flag |= NODE_STRING_DONT_GET_OPT_INFO
|
||||
#define NODE_STRING_IS_RAW(node) \
|
||||
(((node)->u.str.flag & NODE_STRING_RAW) != 0)
|
||||
#define NODE_STRING_IS_AMBIG(node) \
|
||||
(((node)->u.str.flag & NODE_STRING_AMBIG) != 0)
|
||||
#define NODE_STRING_IS_DONT_GET_OPT_INFO(node) \
|
||||
(((node)->u.str.flag & NODE_STRING_DONT_GET_OPT_INFO) != 0)
|
||||
|
||||
#define BACKREFS_P(br) \
|
||||
(IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static)
|
||||
|
||||
enum QuantBodyEmpty {
|
||||
QUANT_BODY_IS_NOT_EMPTY = 0,
|
||||
QUANT_BODY_IS_EMPTY = 1,
|
||||
QUANT_BODY_IS_EMPTY_MEM = 2,
|
||||
QUANT_BODY_IS_EMPTY_REC = 3
|
||||
};
|
||||
|
||||
/* node status bits */
|
||||
#define NODE_ST_MIN_FIXED (1<<0)
|
||||
#define NODE_ST_MAX_FIXED (1<<1)
|
||||
#define NODE_ST_CLEN_FIXED (1<<2)
|
||||
#define NODE_ST_MARK1 (1<<3)
|
||||
#define NODE_ST_MARK2 (1<<4)
|
||||
#define NODE_ST_STOP_BT_SIMPLE_REPEAT (1<<5)
|
||||
#define NODE_ST_RECURSION (1<<6)
|
||||
#define NODE_ST_CALLED (1<<7)
|
||||
#define NODE_ST_ADDR_FIXED (1<<8)
|
||||
#define NODE_ST_NAMED_GROUP (1<<9)
|
||||
#define NODE_ST_IN_REAL_REPEAT (1<<10) /* STK_REPEAT is nested in stack. */
|
||||
#define NODE_ST_IN_ZERO_REPEAT (1<<11) /* (....){0} */
|
||||
#define NODE_ST_IN_MULTI_ENTRY (1<<12)
|
||||
#define NODE_ST_NEST_LEVEL (1<<13)
|
||||
#define NODE_ST_BY_NUMBER (1<<14) /* {n,m} */
|
||||
#define NODE_ST_BY_NAME (1<<15) /* backref by name */
|
||||
#define NODE_ST_BACKREF (1<<16)
|
||||
#define NODE_ST_CHECKER (1<<17)
|
||||
#define NODE_ST_FIXED_OPTION (1<<18)
|
||||
#define NODE_ST_PROHIBIT_RECURSION (1<<19)
|
||||
#define NODE_ST_SUPER (1<<20)
|
||||
|
||||
|
||||
#define NODE_STATUS(node) (((Node* )node)->u.base.status)
|
||||
#define NODE_STATUS_ADD(node,f) (NODE_STATUS(node) |= (NODE_ST_ ## f))
|
||||
#define NODE_STATUS_REMOVE(node,f) (NODE_STATUS(node) &= ~(NODE_ST_ ## f))
|
||||
|
||||
#define NODE_IS_BY_NUMBER(node) ((NODE_STATUS(node) & NODE_ST_BY_NUMBER) != 0)
|
||||
#define NODE_IS_IN_REAL_REPEAT(node) ((NODE_STATUS(node) & NODE_ST_IN_REAL_REPEAT) != 0)
|
||||
#define NODE_IS_CALLED(node) ((NODE_STATUS(node) & NODE_ST_CALLED) != 0)
|
||||
#define NODE_IS_IN_MULTI_ENTRY(node) ((NODE_STATUS(node) & NODE_ST_IN_MULTI_ENTRY) != 0)
|
||||
#define NODE_IS_RECURSION(node) ((NODE_STATUS(node) & NODE_ST_RECURSION) != 0)
|
||||
#define NODE_IS_IN_ZERO_REPEAT(node) ((NODE_STATUS(node) & NODE_ST_IN_ZERO_REPEAT) != 0)
|
||||
#define NODE_IS_NAMED_GROUP(node) ((NODE_STATUS(node) & NODE_ST_NAMED_GROUP) != 0)
|
||||
#define NODE_IS_ADDR_FIXED(node) ((NODE_STATUS(node) & NODE_ST_ADDR_FIXED) != 0)
|
||||
#define NODE_IS_CLEN_FIXED(node) ((NODE_STATUS(node) & NODE_ST_CLEN_FIXED) != 0)
|
||||
#define NODE_IS_MIN_FIXED(node) ((NODE_STATUS(node) & NODE_ST_MIN_FIXED) != 0)
|
||||
#define NODE_IS_MAX_FIXED(node) ((NODE_STATUS(node) & NODE_ST_MAX_FIXED) != 0)
|
||||
#define NODE_IS_MARK1(node) ((NODE_STATUS(node) & NODE_ST_MARK1) != 0)
|
||||
#define NODE_IS_MARK2(node) ((NODE_STATUS(node) & NODE_ST_MARK2) != 0)
|
||||
#define NODE_IS_NEST_LEVEL(node) ((NODE_STATUS(node) & NODE_ST_NEST_LEVEL) != 0)
|
||||
#define NODE_IS_BY_NAME(node) ((NODE_STATUS(node) & NODE_ST_BY_NAME) != 0)
|
||||
#define NODE_IS_BACKREF(node) ((NODE_STATUS(node) & NODE_ST_BACKREF) != 0)
|
||||
#define NODE_IS_CHECKER(node) ((NODE_STATUS(node) & NODE_ST_CHECKER) != 0)
|
||||
#define NODE_IS_FIXED_OPTION(node) ((NODE_STATUS(node) & NODE_ST_FIXED_OPTION) != 0)
|
||||
#define NODE_IS_SUPER(node) ((NODE_STATUS(node) & NODE_ST_SUPER) != 0)
|
||||
#define NODE_IS_PROHIBIT_RECURSION(node) \
|
||||
((NODE_STATUS(node) & NODE_ST_PROHIBIT_RECURSION) != 0)
|
||||
#define NODE_IS_STOP_BT_SIMPLE_REPEAT(node) \
|
||||
((NODE_STATUS(node) & NODE_ST_STOP_BT_SIMPLE_REPEAT) != 0)
|
||||
|
||||
#define NODE_BODY(node) ((node)->u.base.body)
|
||||
#define NODE_QUANT_BODY(node) ((node)->body)
|
||||
#define NODE_ENCLOSURE_BODY(node) ((node)->body)
|
||||
#define NODE_CALL_BODY(node) ((node)->body)
|
||||
#define NODE_ANCHOR_BODY(node) ((node)->body)
|
||||
|
||||
|
||||
typedef struct {
|
||||
NodeType node_type;
|
||||
int status;
|
||||
|
@ -208,7 +80,7 @@ typedef struct {
|
|||
UChar* s;
|
||||
UChar* end;
|
||||
unsigned int flag;
|
||||
int capa; /* (allocated size - 1) or 0: use buf[] */
|
||||
int capacity; /* (allocated size - 1) or 0: use buf[] */
|
||||
UChar buf[NODE_STRING_BUF_SIZE];
|
||||
} StrNode;
|
||||
|
||||
|
@ -229,7 +101,7 @@ typedef struct {
|
|||
int lower;
|
||||
int upper;
|
||||
int greedy;
|
||||
enum QuantBodyEmpty body_empty_info;
|
||||
enum BodyEmptyType emptiness;
|
||||
struct _Node* head_exact;
|
||||
struct _Node* next_head_exact;
|
||||
int is_refered; /* include called node. don't eliminate even if {0} */
|
||||
|
@ -240,7 +112,7 @@ typedef struct {
|
|||
int status;
|
||||
struct _Node* body;
|
||||
|
||||
enum EnclosureType type;
|
||||
enum BagType type;
|
||||
union {
|
||||
struct {
|
||||
int regnum;
|
||||
|
@ -262,7 +134,7 @@ typedef struct {
|
|||
OnigLen max_len; /* max length (byte) */
|
||||
int char_len; /* character length */
|
||||
int opt_count; /* referenced count in optimize_nodes() */
|
||||
} EnclosureNode;
|
||||
} BagNode;
|
||||
|
||||
#ifdef USE_CALL
|
||||
|
||||
|
@ -280,7 +152,7 @@ typedef struct {
|
|||
typedef struct {
|
||||
NodeType node_type;
|
||||
int status;
|
||||
struct _Node* body; /* to EnclosureNode : ENCLOSURE_MEMORY */
|
||||
struct _Node* body; /* to BagNode : BAG_MEMORY */
|
||||
|
||||
int by_number;
|
||||
int group_num;
|
||||
|
@ -350,7 +222,7 @@ typedef struct _Node {
|
|||
StrNode str;
|
||||
CClassNode cclass;
|
||||
QuantNode quant;
|
||||
EnclosureNode enclosure;
|
||||
BagNode bag;
|
||||
BackRefNode backref;
|
||||
AnchorNode anchor;
|
||||
ConsAltNode cons;
|
||||
|
@ -362,9 +234,134 @@ typedef struct _Node {
|
|||
} u;
|
||||
} Node;
|
||||
|
||||
|
||||
#define NULL_NODE ((Node* )0)
|
||||
|
||||
|
||||
/* node type bit */
|
||||
#define NODE_TYPE2BIT(type) (1<<(type))
|
||||
|
||||
#define NODE_BIT_STRING NODE_TYPE2BIT(NODE_STRING)
|
||||
#define NODE_BIT_CCLASS NODE_TYPE2BIT(NODE_CCLASS)
|
||||
#define NODE_BIT_CTYPE NODE_TYPE2BIT(NODE_CTYPE)
|
||||
#define NODE_BIT_BACKREF NODE_TYPE2BIT(NODE_BACKREF)
|
||||
#define NODE_BIT_QUANT NODE_TYPE2BIT(NODE_QUANT)
|
||||
#define NODE_BIT_BAG NODE_TYPE2BIT(NODE_BAG)
|
||||
#define NODE_BIT_ANCHOR NODE_TYPE2BIT(NODE_ANCHOR)
|
||||
#define NODE_BIT_LIST NODE_TYPE2BIT(NODE_LIST)
|
||||
#define NODE_BIT_ALT NODE_TYPE2BIT(NODE_ALT)
|
||||
#define NODE_BIT_CALL NODE_TYPE2BIT(NODE_CALL)
|
||||
#define NODE_BIT_GIMMICK NODE_TYPE2BIT(NODE_GIMMICK)
|
||||
|
||||
#define NODE_TYPE(node) ((node)->u.base.node_type)
|
||||
#define NODE_SET_TYPE(node, ntype) (node)->u.base.node_type = (ntype)
|
||||
|
||||
#define STR_(node) (&((node)->u.str))
|
||||
#define CCLASS_(node) (&((node)->u.cclass))
|
||||
#define CTYPE_(node) (&((node)->u.ctype))
|
||||
#define BACKREF_(node) (&((node)->u.backref))
|
||||
#define QUANT_(node) (&((node)->u.quant))
|
||||
#define BAG_(node) (&((node)->u.bag))
|
||||
#define ANCHOR_(node) (&((node)->u.anchor))
|
||||
#define CONS_(node) (&((node)->u.cons))
|
||||
#define CALL_(node) (&((node)->u.call))
|
||||
#define GIMMICK_(node) (&((node)->u.gimmick))
|
||||
|
||||
#define NODE_CAR(node) (CONS_(node)->car)
|
||||
#define NODE_CDR(node) (CONS_(node)->cdr)
|
||||
|
||||
#define CTYPE_ANYCHAR -1
|
||||
#define NODE_IS_ANYCHAR(node) \
|
||||
(NODE_TYPE(node) == NODE_CTYPE && CTYPE_(node)->ctype == CTYPE_ANYCHAR)
|
||||
|
||||
#define CTYPE_OPTION(node, reg) \
|
||||
(NODE_IS_FIXED_OPTION(node) ? CTYPE_(node)->options : reg->options)
|
||||
|
||||
|
||||
#define ANCR_ANYCHAR_INF_MASK (ANCR_ANYCHAR_INF | ANCR_ANYCHAR_INF_ML)
|
||||
#define ANCR_END_BUF_MASK (ANCR_END_BUF | ANCR_SEMI_END_BUF)
|
||||
|
||||
#define NODE_STRING_RAW (1<<0) /* by backslashed number */
|
||||
#define NODE_STRING_AMBIG (1<<1)
|
||||
#define NODE_STRING_GOOD_AMBIG (1<<2)
|
||||
#define NODE_STRING_DONT_GET_OPT_INFO (1<<3)
|
||||
|
||||
#define NODE_STRING_LEN(node) (int )((node)->u.str.end - (node)->u.str.s)
|
||||
#define NODE_STRING_SET_RAW(node) (node)->u.str.flag |= NODE_STRING_RAW
|
||||
#define NODE_STRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NODE_STRING_RAW
|
||||
#define NODE_STRING_SET_AMBIG(node) (node)->u.str.flag |= NODE_STRING_AMBIG
|
||||
#define NODE_STRING_SET_GOOD_AMBIG(node) (node)->u.str.flag |= NODE_STRING_GOOD_AMBIG
|
||||
#define NODE_STRING_SET_DONT_GET_OPT_INFO(node) \
|
||||
(node)->u.str.flag |= NODE_STRING_DONT_GET_OPT_INFO
|
||||
#define NODE_STRING_IS_RAW(node) \
|
||||
(((node)->u.str.flag & NODE_STRING_RAW) != 0)
|
||||
#define NODE_STRING_IS_AMBIG(node) \
|
||||
(((node)->u.str.flag & NODE_STRING_AMBIG) != 0)
|
||||
#define NODE_STRING_IS_GOOD_AMBIG(node) \
|
||||
(((node)->u.str.flag & NODE_STRING_GOOD_AMBIG) != 0)
|
||||
#define NODE_STRING_IS_DONT_GET_OPT_INFO(node) \
|
||||
(((node)->u.str.flag & NODE_STRING_DONT_GET_OPT_INFO) != 0)
|
||||
|
||||
#define BACKREFS_P(br) \
|
||||
(IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static)
|
||||
|
||||
/* node status bits */
|
||||
#define NODE_ST_MIN_FIXED (1<<0)
|
||||
#define NODE_ST_MAX_FIXED (1<<1)
|
||||
#define NODE_ST_CLEN_FIXED (1<<2)
|
||||
#define NODE_ST_MARK1 (1<<3)
|
||||
#define NODE_ST_MARK2 (1<<4)
|
||||
#define NODE_ST_STRICT_REAL_REPEAT (1<<5)
|
||||
#define NODE_ST_RECURSION (1<<6)
|
||||
#define NODE_ST_CALLED (1<<7)
|
||||
#define NODE_ST_ADDR_FIXED (1<<8)
|
||||
#define NODE_ST_NAMED_GROUP (1<<9)
|
||||
#define NODE_ST_IN_REAL_REPEAT (1<<10) /* STK_REPEAT is nested in stack. */
|
||||
#define NODE_ST_IN_ZERO_REPEAT (1<<11) /* (....){0} */
|
||||
#define NODE_ST_IN_MULTI_ENTRY (1<<12)
|
||||
#define NODE_ST_NEST_LEVEL (1<<13)
|
||||
#define NODE_ST_BY_NUMBER (1<<14) /* {n,m} */
|
||||
#define NODE_ST_BY_NAME (1<<15) /* backref by name */
|
||||
#define NODE_ST_BACKREF (1<<16)
|
||||
#define NODE_ST_CHECKER (1<<17)
|
||||
#define NODE_ST_FIXED_OPTION (1<<18)
|
||||
#define NODE_ST_PROHIBIT_RECURSION (1<<19)
|
||||
#define NODE_ST_SUPER (1<<20)
|
||||
|
||||
|
||||
#define NODE_STATUS(node) (((Node* )node)->u.base.status)
|
||||
#define NODE_STATUS_ADD(node,f) (NODE_STATUS(node) |= (NODE_ST_ ## f))
|
||||
#define NODE_STATUS_REMOVE(node,f) (NODE_STATUS(node) &= ~(NODE_ST_ ## f))
|
||||
|
||||
#define NODE_IS_BY_NUMBER(node) ((NODE_STATUS(node) & NODE_ST_BY_NUMBER) != 0)
|
||||
#define NODE_IS_IN_REAL_REPEAT(node) ((NODE_STATUS(node) & NODE_ST_IN_REAL_REPEAT) != 0)
|
||||
#define NODE_IS_CALLED(node) ((NODE_STATUS(node) & NODE_ST_CALLED) != 0)
|
||||
#define NODE_IS_IN_MULTI_ENTRY(node) ((NODE_STATUS(node) & NODE_ST_IN_MULTI_ENTRY) != 0)
|
||||
#define NODE_IS_RECURSION(node) ((NODE_STATUS(node) & NODE_ST_RECURSION) != 0)
|
||||
#define NODE_IS_IN_ZERO_REPEAT(node) ((NODE_STATUS(node) & NODE_ST_IN_ZERO_REPEAT) != 0)
|
||||
#define NODE_IS_NAMED_GROUP(node) ((NODE_STATUS(node) & NODE_ST_NAMED_GROUP) != 0)
|
||||
#define NODE_IS_ADDR_FIXED(node) ((NODE_STATUS(node) & NODE_ST_ADDR_FIXED) != 0)
|
||||
#define NODE_IS_CLEN_FIXED(node) ((NODE_STATUS(node) & NODE_ST_CLEN_FIXED) != 0)
|
||||
#define NODE_IS_MIN_FIXED(node) ((NODE_STATUS(node) & NODE_ST_MIN_FIXED) != 0)
|
||||
#define NODE_IS_MAX_FIXED(node) ((NODE_STATUS(node) & NODE_ST_MAX_FIXED) != 0)
|
||||
#define NODE_IS_MARK1(node) ((NODE_STATUS(node) & NODE_ST_MARK1) != 0)
|
||||
#define NODE_IS_MARK2(node) ((NODE_STATUS(node) & NODE_ST_MARK2) != 0)
|
||||
#define NODE_IS_NEST_LEVEL(node) ((NODE_STATUS(node) & NODE_ST_NEST_LEVEL) != 0)
|
||||
#define NODE_IS_BY_NAME(node) ((NODE_STATUS(node) & NODE_ST_BY_NAME) != 0)
|
||||
#define NODE_IS_BACKREF(node) ((NODE_STATUS(node) & NODE_ST_BACKREF) != 0)
|
||||
#define NODE_IS_CHECKER(node) ((NODE_STATUS(node) & NODE_ST_CHECKER) != 0)
|
||||
#define NODE_IS_FIXED_OPTION(node) ((NODE_STATUS(node) & NODE_ST_FIXED_OPTION) != 0)
|
||||
#define NODE_IS_SUPER(node) ((NODE_STATUS(node) & NODE_ST_SUPER) != 0)
|
||||
#define NODE_IS_PROHIBIT_RECURSION(node) \
|
||||
((NODE_STATUS(node) & NODE_ST_PROHIBIT_RECURSION) != 0)
|
||||
#define NODE_IS_STRICT_REAL_REPEAT(node) \
|
||||
((NODE_STATUS(node) & NODE_ST_STRICT_REAL_REPEAT) != 0)
|
||||
|
||||
#define NODE_BODY(node) ((node)->u.base.body)
|
||||
#define NODE_QUANT_BODY(node) ((node)->body)
|
||||
#define NODE_BAG_BODY(node) ((node)->body)
|
||||
#define NODE_CALL_BODY(node) ((node)->body)
|
||||
#define NODE_ANCHOR_BODY(node) ((node)->body)
|
||||
|
||||
#define SCANENV_MEMENV_SIZE 8
|
||||
#define SCANENV_MEMENV(senv) \
|
||||
(IS_NOT_NULL((senv)->mem_env_dynamic) ? \
|
||||
|
@ -434,7 +431,7 @@ extern void onig_node_conv_to_str_node P_((Node* node, int raw));
|
|||
extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));
|
||||
extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end));
|
||||
extern void onig_node_free P_((Node* node));
|
||||
extern Node* onig_node_new_enclosure P_((int type));
|
||||
extern Node* onig_node_new_bag P_((enum BagType type));
|
||||
extern Node* onig_node_new_anchor P_((int type, int ascii_mode));
|
||||
extern Node* onig_node_new_str P_((const UChar* s, const UChar* end));
|
||||
extern Node* onig_node_new_list P_((Node* left, Node* right));
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
regposerr.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -37,13 +37,7 @@
|
|||
//#include "config.h"
|
||||
#include "onigposix.h"
|
||||
|
||||
#if 0
|
||||
#ifdef HAVE_STRING_H
|
||||
# include <string.h>
|
||||
#else
|
||||
# include <strings.h>
|
||||
#endif
|
||||
#endif
|
||||
//#include <string.h>
|
||||
|
||||
#if defined(__GNUC__)
|
||||
# define ARG_UNUSED __attribute__ ((unused))
|
||||
|
@ -86,7 +80,7 @@ static char* ESTRING[] = {
|
|||
|
||||
extern size_t
|
||||
regerror(int posix_ecode, const regex_t* reg ARG_UNUSED, char* buf,
|
||||
size_t size)
|
||||
size_t size)
|
||||
{
|
||||
char* s;
|
||||
char tbuf[35];
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
regposix.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -30,6 +30,7 @@
|
|||
#define regex_t onig_regex_t
|
||||
#include "regint.h"
|
||||
#undef regex_t
|
||||
|
||||
#include "onigposix.h"
|
||||
|
||||
#define ONIG_C(reg) ((onig_regex_t* )((reg)->onig))
|
||||
|
@ -148,6 +149,8 @@ regcomp(regex_t* reg, const char* pattern, int posix_options)
|
|||
OnigSyntaxType* syntax = OnigDefaultSyntax;
|
||||
OnigOptionType options;
|
||||
|
||||
reg->onig = (void* )0;
|
||||
|
||||
if ((posix_options & REG_EXTENDED) == 0)
|
||||
syntax = ONIG_SYNTAX_POSIX_BASIC;
|
||||
|
||||
|
@ -163,8 +166,8 @@ regcomp(regex_t* reg, const char* pattern, int posix_options)
|
|||
|
||||
ENC_STRING_LEN(OnigEncDefaultCharEncoding, pattern, len);
|
||||
r = onig_new(PONIG_C(reg), (UChar* )pattern, (UChar* )(pattern + len),
|
||||
options, OnigEncDefaultCharEncoding, syntax,
|
||||
(OnigErrorInfo* )NULL);
|
||||
options, OnigEncDefaultCharEncoding, syntax,
|
||||
(OnigErrorInfo* )NULL);
|
||||
if (r != ONIG_NORMAL) {
|
||||
return onig2posix_error_code(r);
|
||||
}
|
||||
|
@ -175,7 +178,7 @@ regcomp(regex_t* reg, const char* pattern, int posix_options)
|
|||
|
||||
extern int
|
||||
regexec(regex_t* reg, const char* str, size_t nmatch,
|
||||
regmatch_t pmatch[], int posix_options)
|
||||
regmatch_t pmatch[], int posix_options)
|
||||
{
|
||||
int r, i, len;
|
||||
UChar* end;
|
||||
|
@ -203,7 +206,7 @@ regexec(regex_t* reg, const char* str, size_t nmatch,
|
|||
ENC_STRING_LEN(ONIG_C(reg)->enc, str, len);
|
||||
end = (UChar* )(str + len);
|
||||
r = onig_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end,
|
||||
(OnigRegion* )pm, options);
|
||||
(OnigRegion* )pm, options);
|
||||
|
||||
if (r >= 0) {
|
||||
r = 0; /* Match */
|
||||
|
@ -235,6 +238,7 @@ extern void
|
|||
regfree(regex_t* reg)
|
||||
{
|
||||
onig_free(ONIG_C(reg));
|
||||
reg->onig = (void* )0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -272,7 +276,7 @@ typedef struct {
|
|||
|
||||
static int
|
||||
i_wrapper(const UChar* name, const UChar* name_end, int ng, int* gs,
|
||||
onig_regex_t* reg ARG_UNUSED, void* arg)
|
||||
onig_regex_t* reg ARG_UNUSED, void* arg)
|
||||
{
|
||||
i_wrap* warg = (i_wrap* )arg;
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
regsyntax.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -67,8 +67,8 @@ OnigSyntaxType OnigSyntaxPosixExtended = {
|
|||
ONIG_SYN_OP_BRACE_INTERVAL |
|
||||
ONIG_SYN_OP_PLUS_ONE_INF | ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_VBAR_ALT )
|
||||
, 0
|
||||
, ( ONIG_SYN_CONTEXT_INDEP_ANCHORS |
|
||||
ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS |
|
||||
, ( ONIG_SYN_CONTEXT_INDEP_ANCHORS |
|
||||
ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS |
|
||||
ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP |
|
||||
ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
|
||||
, ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
|
||||
|
@ -174,11 +174,12 @@ OnigSyntaxType OnigSyntaxPerl = {
|
|||
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
|
||||
, ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
|
||||
ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
|
||||
ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT | ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL |
|
||||
ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |
|
||||
ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |
|
||||
ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS |
|
||||
ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME |
|
||||
ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER |
|
||||
ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT |
|
||||
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
|
||||
ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
|
||||
ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |
|
||||
|
@ -207,11 +208,12 @@ OnigSyntaxType OnigSyntaxPerl_NG = {
|
|||
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
|
||||
, ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
|
||||
ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
|
||||
ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT | ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL |
|
||||
ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |
|
||||
ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |
|
||||
ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS |
|
||||
ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME |
|
||||
ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER |
|
||||
ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT |
|
||||
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
|
||||
ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
|
||||
ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP |
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
unicode.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -504,6 +504,281 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
|
|||
#endif
|
||||
|
||||
|
||||
#ifdef USE_UNICODE_WORD_BREAK
|
||||
|
||||
enum WB_TYPE {
|
||||
WB_Any = 0,
|
||||
WB_ALetter,
|
||||
WB_CR,
|
||||
WB_Double_Quote,
|
||||
WB_Extend,
|
||||
WB_ExtendNumLet,
|
||||
WB_Format,
|
||||
WB_Hebrew_Letter,
|
||||
WB_Katakana,
|
||||
WB_LF,
|
||||
WB_MidLetter,
|
||||
WB_MidNum,
|
||||
WB_MidNumLet,
|
||||
WB_Newline,
|
||||
WB_Numeric,
|
||||
WB_Regional_Indicator,
|
||||
WB_Single_Quote,
|
||||
WB_WSegSpace,
|
||||
WB_ZWJ,
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
OnigCodePoint start;
|
||||
OnigCodePoint end;
|
||||
enum WB_TYPE type;
|
||||
} WB_RANGE_TYPE;
|
||||
|
||||
#include "unicode_wb_data.c"
|
||||
|
||||
static enum WB_TYPE
|
||||
wb_get_type(OnigCodePoint code)
|
||||
{
|
||||
OnigCodePoint low, high, x;
|
||||
enum WB_TYPE type;
|
||||
|
||||
for (low = 0, high = (OnigCodePoint )WB_RANGE_NUM; low < high; ) {
|
||||
x = (low + high) >> 1;
|
||||
if (code > WB_RANGES[x].end)
|
||||
low = x + 1;
|
||||
else
|
||||
high = x;
|
||||
}
|
||||
|
||||
type = (low < (OnigCodePoint )WB_RANGE_NUM &&
|
||||
code >= WB_RANGES[low].start) ?
|
||||
WB_RANGES[low].type : WB_Any;
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
#define IS_WB_IGNORE_TAIL(t) ((t) == WB_Extend || (t) == WB_Format || (t) == WB_ZWJ)
|
||||
#define IS_WB_AHLetter(t) ((t) == WB_ALetter || (t) == WB_Hebrew_Letter)
|
||||
#define IS_WB_MidNumLetQ(t) ((t) == WB_MidNumLet || (t) == WB_Single_Quote)
|
||||
|
||||
static int
|
||||
wb_get_next_main_code(OnigEncoding enc, UChar* p, const UChar* end,
|
||||
OnigCodePoint* rcode, enum WB_TYPE* rtype)
|
||||
{
|
||||
OnigCodePoint code;
|
||||
enum WB_TYPE type;
|
||||
|
||||
while (TRUE) {
|
||||
p += enclen(enc, p);
|
||||
if (p >= end) break;
|
||||
|
||||
code = ONIGENC_MBC_TO_CODE(enc, p, end);
|
||||
type = wb_get_type(code);
|
||||
if (! IS_WB_IGNORE_TAIL(type)) {
|
||||
*rcode = code;
|
||||
*rtype = type;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_wb_is_break_position(OnigEncoding enc, UChar* p, UChar* prev,
|
||||
const UChar* start, const UChar* end)
|
||||
{
|
||||
int r;
|
||||
UChar* pp;
|
||||
OnigCodePoint cfrom;
|
||||
OnigCodePoint cfrom2;
|
||||
OnigCodePoint cto;
|
||||
OnigCodePoint cto2;
|
||||
enum WB_TYPE from;
|
||||
enum WB_TYPE from2;
|
||||
enum WB_TYPE to;
|
||||
enum WB_TYPE to2;
|
||||
|
||||
/* WB1: sot / Any */
|
||||
if (p == start) return TRUE;
|
||||
/* WB2: Any / eot */
|
||||
if (p == end) return TRUE;
|
||||
|
||||
if (IS_NULL(prev)) {
|
||||
prev = onigenc_get_prev_char_head(enc, start, p);
|
||||
if (IS_NULL(prev)) return TRUE;
|
||||
}
|
||||
|
||||
cfrom = ONIGENC_MBC_TO_CODE(enc, prev, end);
|
||||
cto = ONIGENC_MBC_TO_CODE(enc, p, end);
|
||||
|
||||
from = wb_get_type(cfrom);
|
||||
to = wb_get_type(cto);
|
||||
|
||||
/* short cut */
|
||||
if (from == 0 && to == 0) goto WB999;
|
||||
|
||||
/* WB3: CR + LF */
|
||||
if (from == WB_CR && to == WB_LF) return FALSE;
|
||||
|
||||
/* WB3a: (Newline|CR|LF) / */
|
||||
if (from == WB_Newline || from == WB_CR || from == WB_LF) return TRUE;
|
||||
/* WB3b: / (Newline|CR|LF) */
|
||||
if (to == WB_Newline || to == WB_CR || to == WB_LF) return TRUE;
|
||||
|
||||
/* WB3c: ZWJ + {Extended_Pictographic} */
|
||||
if (from == WB_ZWJ) {
|
||||
if (onigenc_unicode_is_code_ctype(cto, PROP_INDEX_EXTENDEDPICTOGRAPHIC))
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* WB3d: WSegSpace + WSegSpace */
|
||||
if (from == WB_WSegSpace && to == WB_WSegSpace) return FALSE;
|
||||
|
||||
/* WB4: X (Extend|Format|ZWJ)* -> X */
|
||||
if (IS_WB_IGNORE_TAIL(to)) return FALSE;
|
||||
if (IS_WB_IGNORE_TAIL(from)) {
|
||||
while ((pp = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {
|
||||
prev = pp;
|
||||
cfrom = ONIGENC_MBC_TO_CODE(enc, prev, end);
|
||||
from = wb_get_type(cfrom);
|
||||
if (! IS_WB_IGNORE_TAIL(from))
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (IS_WB_AHLetter(from)) {
|
||||
/* WB5: AHLetter + AHLetter */
|
||||
if (IS_WB_AHLetter(to)) return FALSE;
|
||||
|
||||
/* WB6: AHLetter + (MidLetter | MidNumLetQ) AHLetter */
|
||||
if (to == WB_MidLetter || IS_WB_MidNumLetQ(to)) {
|
||||
r = wb_get_next_main_code(enc, p, end, &cto2, &to2);
|
||||
if (r == 1) {
|
||||
if (IS_WB_AHLetter(to2)) return FALSE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* WB7: AHLetter (MidLetter | MidNumLetQ) + AHLetter */
|
||||
if (from == WB_MidLetter || IS_WB_MidNumLetQ(from)) {
|
||||
if (IS_WB_AHLetter(to)) {
|
||||
from2 = WB_Any;
|
||||
while ((pp = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {
|
||||
prev = pp;
|
||||
cfrom2 = ONIGENC_MBC_TO_CODE(enc, prev, end);
|
||||
from2 = wb_get_type(cfrom2);
|
||||
if (! IS_WB_IGNORE_TAIL(from2))
|
||||
break;
|
||||
}
|
||||
|
||||
if (IS_WB_AHLetter(from2)) return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
if (from == WB_Hebrew_Letter) {
|
||||
/* WB7a: Hebrew_Letter + Single_Quote */
|
||||
if (to == WB_Single_Quote) return FALSE;
|
||||
|
||||
/* WB7b: Hebrew_Letter + Double_Quote Hebrew_Letter */
|
||||
if (to == WB_Double_Quote) {
|
||||
r = wb_get_next_main_code(enc, p, end, &cto2, &to2);
|
||||
if (r == 1) {
|
||||
if (to2 == WB_Hebrew_Letter) return FALSE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* WB7c: Hebrew_Letter Double_Quote + Hebrew_Letter */
|
||||
if (from == WB_Double_Quote) {
|
||||
if (to == WB_Hebrew_Letter) {
|
||||
from2 = WB_Any;
|
||||
while ((pp = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {
|
||||
prev = pp;
|
||||
cfrom2 = ONIGENC_MBC_TO_CODE(enc, prev, end);
|
||||
from2 = wb_get_type(cfrom2);
|
||||
if (! IS_WB_IGNORE_TAIL(from2))
|
||||
break;
|
||||
}
|
||||
|
||||
if (from2 == WB_Hebrew_Letter) return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
if (to == WB_Numeric) {
|
||||
/* WB8: Numeric + Numeric */
|
||||
if (from == WB_Numeric) return FALSE;
|
||||
|
||||
/* WB9: AHLetter + Numeric */
|
||||
if (IS_WB_AHLetter(from)) return FALSE;
|
||||
|
||||
/* WB11: Numeric (MidNum | MidNumLetQ) + Numeric */
|
||||
if (from == WB_MidNum || IS_WB_MidNumLetQ(from)) {
|
||||
from2 = WB_Any;
|
||||
while ((pp = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {
|
||||
prev = pp;
|
||||
cfrom2 = ONIGENC_MBC_TO_CODE(enc, prev, end);
|
||||
from2 = wb_get_type(cfrom2);
|
||||
if (! IS_WB_IGNORE_TAIL(from2))
|
||||
break;
|
||||
}
|
||||
|
||||
if (from2 == WB_Numeric) return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
if (from == WB_Numeric) {
|
||||
/* WB10: Numeric + AHLetter */
|
||||
if (IS_WB_AHLetter(to)) return FALSE;
|
||||
|
||||
/* WB12: Numeric + (MidNum | MidNumLetQ) Numeric */
|
||||
if (to == WB_MidNum || IS_WB_MidNumLetQ(to)) {
|
||||
r = wb_get_next_main_code(enc, p, end, &cto2, &to2);
|
||||
if (r == 1) {
|
||||
if (to2 == WB_Numeric) return FALSE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* WB13: Katakana + Katakana */
|
||||
if (from == WB_Katakana && to == WB_Katakana) return FALSE;
|
||||
|
||||
/* WB13a: (AHLetter | Numeric | Katakana | ExtendNumLet) + ExtendNumLet */
|
||||
if (IS_WB_AHLetter(from) || from == WB_Numeric || from == WB_Katakana
|
||||
|| from == WB_ExtendNumLet) {
|
||||
if (to == WB_ExtendNumLet) return FALSE;
|
||||
}
|
||||
|
||||
/* WB13b: ExtendNumLet + (AHLetter | Numeric | Katakana) */
|
||||
if (from == WB_ExtendNumLet) {
|
||||
if (IS_WB_AHLetter(to) || to == WB_Numeric || to == WB_Katakana)
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
/* WB15: sot (RI RI)* RI + RI */
|
||||
/* WB16: [^RI] (RI RI)* RI + RI */
|
||||
if (from == WB_Regional_Indicator && to == WB_Regional_Indicator) {
|
||||
int n = 0;
|
||||
while ((prev = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {
|
||||
cfrom2 = ONIGENC_MBC_TO_CODE(enc, prev, end);
|
||||
from2 = wb_get_type(cfrom2);
|
||||
if (from2 != WB_Regional_Indicator)
|
||||
break;
|
||||
|
||||
n++;
|
||||
}
|
||||
if ((n % 2) == 0) return FALSE;
|
||||
}
|
||||
|
||||
WB999:
|
||||
/* WB999: Any / Any */
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
#endif /* USE_UNICODE_WORD_BREAK */
|
||||
|
||||
|
||||
#ifdef USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER
|
||||
|
||||
enum EGCB_BREAK_TYPE {
|
||||
|
@ -657,8 +932,7 @@ onigenc_egcb_is_break_position(OnigEncoding enc, UChar* p, UChar* prev,
|
|||
|
||||
#ifdef USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER
|
||||
if (! ONIGENC_IS_UNICODE_ENCODING(enc)) {
|
||||
if (from == 0x000d && to == 0x000a) return 0;
|
||||
else return 1;
|
||||
return from != 0x000d || to != 0x000a;
|
||||
}
|
||||
|
||||
btype = unicode_egcb_is_break_2code(from, to);
|
||||
|
@ -701,8 +975,7 @@ onigenc_egcb_is_break_position(OnigEncoding enc, UChar* p, UChar* prev,
|
|||
return 1;
|
||||
|
||||
#else
|
||||
if (from == 0x000d && to == 0x000a) return 0;
|
||||
else return 1;
|
||||
return from != 0x000d || to != 0x000a;
|
||||
#endif /* USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER */
|
||||
}
|
||||
|
||||
|
@ -729,6 +1002,7 @@ onig_unicode_define_user_property(const char* name, OnigCodePoint* ranges)
|
|||
int len;
|
||||
int c;
|
||||
char* s;
|
||||
UChar* uname;
|
||||
|
||||
if (UserDefinedPropertyNum >= USER_DEFINED_PROPERTY_MAX_NUM)
|
||||
return ONIGERR_TOO_MANY_USER_DEFINED_OBJECTS;
|
||||
|
@ -741,10 +1015,11 @@ onig_unicode_define_user_property(const char* name, OnigCodePoint* ranges)
|
|||
if (s == 0)
|
||||
return ONIGERR_MEMORY;
|
||||
|
||||
uname = (UChar* )name;
|
||||
n = 0;
|
||||
for (i = 0; i < len; i++) {
|
||||
c = name[i];
|
||||
if (c <= 0 || c >= 0x80) {
|
||||
c = uname[i];
|
||||
if (c < 0x20 || c >= 0x80) {
|
||||
xfree(s);
|
||||
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
|
||||
}
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#define GRAPHEME_BREAK_PROPERTY_VERSION 11_0_0
|
||||
#define GRAPHEME_BREAK_PROPERTY_VERSION 12_1_0
|
||||
|
||||
/*
|
||||
CR
|
||||
|
@ -43,7 +43,7 @@ V
|
|||
ZWJ
|
||||
*/
|
||||
|
||||
static int EGCB_RANGE_NUM = 1321;
|
||||
static int EGCB_RANGE_NUM = 1326;
|
||||
static EGCB_RANGE_TYPE EGCB_RANGES[] = {
|
||||
{0x000000, 0x000009, EGCB_Control },
|
||||
{0x00000a, 0x00000a, EGCB_LF },
|
||||
|
@ -197,8 +197,7 @@ static EGCB_RANGE_TYPE EGCB_RANGES[] = {
|
|||
{0x000e47, 0x000e4e, EGCB_Extend },
|
||||
{0x000eb1, 0x000eb1, EGCB_Extend },
|
||||
{0x000eb3, 0x000eb3, EGCB_SpacingMark },
|
||||
{0x000eb4, 0x000eb9, EGCB_Extend },
|
||||
{0x000ebb, 0x000ebc, EGCB_Extend },
|
||||
{0x000eb4, 0x000ebc, EGCB_Extend },
|
||||
{0x000ec8, 0x000ecd, EGCB_Extend },
|
||||
{0x000f18, 0x000f19, EGCB_Extend },
|
||||
{0x000f35, 0x000f35, EGCB_Extend },
|
||||
|
@ -271,9 +270,7 @@ static EGCB_RANGE_TYPE EGCB_RANGES[] = {
|
|||
{0x001ab0, 0x001abe, EGCB_Extend },
|
||||
{0x001b00, 0x001b03, EGCB_Extend },
|
||||
{0x001b04, 0x001b04, EGCB_SpacingMark },
|
||||
{0x001b34, 0x001b34, EGCB_Extend },
|
||||
{0x001b35, 0x001b35, EGCB_SpacingMark },
|
||||
{0x001b36, 0x001b3a, EGCB_Extend },
|
||||
{0x001b34, 0x001b3a, EGCB_Extend },
|
||||
{0x001b3b, 0x001b3b, EGCB_SpacingMark },
|
||||
{0x001b3c, 0x001b3c, EGCB_Extend },
|
||||
{0x001b3d, 0x001b41, EGCB_SpacingMark },
|
||||
|
@ -305,7 +302,6 @@ static EGCB_RANGE_TYPE EGCB_RANGES[] = {
|
|||
{0x001ce1, 0x001ce1, EGCB_SpacingMark },
|
||||
{0x001ce2, 0x001ce8, EGCB_Extend },
|
||||
{0x001ced, 0x001ced, EGCB_Extend },
|
||||
{0x001cf2, 0x001cf3, EGCB_SpacingMark },
|
||||
{0x001cf4, 0x001cf4, EGCB_Extend },
|
||||
{0x001cf7, 0x001cf7, EGCB_SpacingMark },
|
||||
{0x001cf8, 0x001cf9, EGCB_Extend },
|
||||
|
@ -348,8 +344,8 @@ static EGCB_RANGE_TYPE EGCB_RANGES[] = {
|
|||
{0x00a9b4, 0x00a9b5, EGCB_SpacingMark },
|
||||
{0x00a9b6, 0x00a9b9, EGCB_Extend },
|
||||
{0x00a9ba, 0x00a9bb, EGCB_SpacingMark },
|
||||
{0x00a9bc, 0x00a9bc, EGCB_Extend },
|
||||
{0x00a9bd, 0x00a9c0, EGCB_SpacingMark },
|
||||
{0x00a9bc, 0x00a9bd, EGCB_Extend },
|
||||
{0x00a9be, 0x00a9c0, EGCB_SpacingMark },
|
||||
{0x00a9e5, 0x00a9e5, EGCB_Extend },
|
||||
{0x00aa29, 0x00aa2e, EGCB_Extend },
|
||||
{0x00aa2f, 0x00aa30, EGCB_SpacingMark },
|
||||
|
@ -1177,7 +1173,6 @@ static EGCB_RANGE_TYPE EGCB_RANGES[] = {
|
|||
{0x00d789, 0x00d7a3, EGCB_LVT },
|
||||
{0x00d7b0, 0x00d7c6, EGCB_V },
|
||||
{0x00d7cb, 0x00d7fb, EGCB_T },
|
||||
{0x00d800, 0x00dfff, EGCB_Control },
|
||||
{0x00fb1e, 0x00fb1e, EGCB_Extend },
|
||||
{0x00fe00, 0x00fe0f, EGCB_Extend },
|
||||
{0x00fe20, 0x00fe2f, EGCB_Extend },
|
||||
|
@ -1291,6 +1286,12 @@ static EGCB_RANGE_TYPE EGCB_RANGES[] = {
|
|||
{0x01182f, 0x011837, EGCB_Extend },
|
||||
{0x011838, 0x011838, EGCB_SpacingMark },
|
||||
{0x011839, 0x01183a, EGCB_Extend },
|
||||
{0x0119d1, 0x0119d3, EGCB_SpacingMark },
|
||||
{0x0119d4, 0x0119d7, EGCB_Extend },
|
||||
{0x0119da, 0x0119db, EGCB_Extend },
|
||||
{0x0119dc, 0x0119df, EGCB_SpacingMark },
|
||||
{0x0119e0, 0x0119e0, EGCB_Extend },
|
||||
{0x0119e4, 0x0119e4, EGCB_SpacingMark },
|
||||
{0x011a01, 0x011a0a, EGCB_Extend },
|
||||
{0x011a33, 0x011a38, EGCB_Extend },
|
||||
{0x011a39, 0x011a39, EGCB_SpacingMark },
|
||||
|
@ -1300,7 +1301,7 @@ static EGCB_RANGE_TYPE EGCB_RANGES[] = {
|
|||
{0x011a51, 0x011a56, EGCB_Extend },
|
||||
{0x011a57, 0x011a58, EGCB_SpacingMark },
|
||||
{0x011a59, 0x011a5b, EGCB_Extend },
|
||||
{0x011a86, 0x011a89, EGCB_Prepend },
|
||||
{0x011a84, 0x011a89, EGCB_Prepend },
|
||||
{0x011a8a, 0x011a96, EGCB_Extend },
|
||||
{0x011a97, 0x011a97, EGCB_SpacingMark },
|
||||
{0x011a98, 0x011a99, EGCB_Extend },
|
||||
|
@ -1330,9 +1331,11 @@ static EGCB_RANGE_TYPE EGCB_RANGES[] = {
|
|||
{0x011d97, 0x011d97, EGCB_Extend },
|
||||
{0x011ef3, 0x011ef4, EGCB_Extend },
|
||||
{0x011ef5, 0x011ef6, EGCB_SpacingMark },
|
||||
{0x013430, 0x013438, EGCB_Control },
|
||||
{0x016af0, 0x016af4, EGCB_Extend },
|
||||
{0x016b30, 0x016b36, EGCB_Extend },
|
||||
{0x016f51, 0x016f7e, EGCB_SpacingMark },
|
||||
{0x016f4f, 0x016f4f, EGCB_Extend },
|
||||
{0x016f51, 0x016f87, EGCB_SpacingMark },
|
||||
{0x016f8f, 0x016f92, EGCB_Extend },
|
||||
{0x01bc9d, 0x01bc9e, EGCB_Extend },
|
||||
{0x01bca0, 0x01bca3, EGCB_Control },
|
||||
|
@ -1357,6 +1360,8 @@ static EGCB_RANGE_TYPE EGCB_RANGES[] = {
|
|||
{0x01e01b, 0x01e021, EGCB_Extend },
|
||||
{0x01e023, 0x01e024, EGCB_Extend },
|
||||
{0x01e026, 0x01e02a, EGCB_Extend },
|
||||
{0x01e130, 0x01e136, EGCB_Extend },
|
||||
{0x01e2ec, 0x01e2ef, EGCB_Extend },
|
||||
{0x01e8d0, 0x01e8d6, EGCB_Extend },
|
||||
{0x01e944, 0x01e94a, EGCB_Extend },
|
||||
{0x01f1e6, 0x01f1ff, EGCB_Regional_Indicator },
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,7 +1,7 @@
|
|||
/* This file was converted by gperf_fold_key_conv.py
|
||||
from gperf output file. */
|
||||
/* ANSI-C code produced by gperf version 3.1 */
|
||||
/* Command-line: /usr/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N onigenc_unicode_fold2_key unicode_fold2_key.gperf */
|
||||
/* Command-line: /usr/local/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N onigenc_unicode_fold2_key unicode_fold2_key.gperf */
|
||||
/* Computed positions: -k'3,6' */
|
||||
|
||||
|
||||
|
@ -225,5 +225,3 @@ onigenc_unicode_fold2_key(OnigCodePoint codes[])
|
|||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/* This file was converted by gperf_fold_key_conv.py
|
||||
from gperf output file. */
|
||||
/* ANSI-C code produced by gperf version 3.1 */
|
||||
/* Command-line: /usr/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N onigenc_unicode_fold3_key unicode_fold3_key.gperf */
|
||||
/* Command-line: /usr/local/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N onigenc_unicode_fold3_key unicode_fold3_key.gperf */
|
||||
/* Computed positions: -k'3,6,9' */
|
||||
|
||||
|
||||
|
@ -135,5 +135,3 @@ onigenc_unicode_fold3_key(OnigCodePoint codes[])
|
|||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -2,7 +2,7 @@
|
|||
utf16_le.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -95,7 +95,15 @@ static const int EncLen_UTF16[] = {
|
|||
static int
|
||||
utf16le_code_to_mbclen(OnigCodePoint code)
|
||||
{
|
||||
return (code > 0xffff ? 4 : 2);
|
||||
if (code > 0xffff) {
|
||||
if (code > 0x10ffff)
|
||||
return ONIGERR_INVALID_CODE_POINT_VALUE;
|
||||
else
|
||||
return 4;
|
||||
}
|
||||
else {
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -110,7 +118,16 @@ is_valid_mbc_string(const UChar* p, const UChar* end)
|
|||
const UChar* end1 = end - 1;
|
||||
|
||||
while (p < end1) {
|
||||
p += utf16le_mbc_enc_len(p);
|
||||
int len = utf16le_mbc_enc_len(p);
|
||||
if (len == 4) {
|
||||
if (p + 3 < end && ! UTF16_IS_SURROGATE_SECOND(*(p + 3)))
|
||||
return FALSE;
|
||||
}
|
||||
else
|
||||
if (UTF16_IS_SURROGATE_SECOND(*(p + 1)))
|
||||
return FALSE;
|
||||
|
||||
p += len;
|
||||
}
|
||||
|
||||
if (p != end)
|
||||
|
@ -184,7 +201,7 @@ utf16le_code_to_mbc(OnigCodePoint code, UChar *buf)
|
|||
|
||||
static int
|
||||
utf16le_mbc_case_fold(OnigCaseFoldType flag,
|
||||
const UChar** pp, const UChar* end, UChar* fold)
|
||||
const UChar** pp, const UChar* end, UChar* fold)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
|
@ -207,13 +224,13 @@ utf16le_mbc_case_fold(OnigCaseFoldType flag,
|
|||
}
|
||||
else
|
||||
return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF16_LE, flag, pp, end,
|
||||
fold);
|
||||
fold);
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
utf16le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp,
|
||||
const UChar* end)
|
||||
const UChar* end)
|
||||
{
|
||||
const UChar* p = *pp;
|
||||
|
||||
|
@ -252,7 +269,8 @@ utf16le_left_adjust_char_head(const UChar* start, const UChar* s)
|
|||
s--;
|
||||
}
|
||||
|
||||
if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1)
|
||||
if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1 &&
|
||||
UTF16_IS_SURROGATE_FIRST(*(s-1)))
|
||||
s -= 2;
|
||||
|
||||
return (UChar* )s;
|
||||
|
@ -263,7 +281,7 @@ utf16le_get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
|||
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
|
||||
{
|
||||
return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF16_LE,
|
||||
flag, p, end, items);
|
||||
flag, p, end, items);
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingUTF16_LE = {
|
||||
|
@ -286,6 +304,6 @@ OnigEncodingType OnigEncodingUTF16_LE = {
|
|||
init,
|
||||
0, /* is_initialized */
|
||||
is_valid_mbc_string,
|
||||
ENC_FLAG_UNICODE,
|
||||
ENC_FLAG_UNICODE|ENC_FLAG_SKIP_OFFSET_1,
|
||||
0, 0
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue