MdeModulePkg RegularExpressionDxe: Update Oniguruma from v6.9.0 to v6.9.3

BZ: https://bugzilla.tianocore.org/show_bug.cgi?id=2066
Update Oniguruma to the latest version v6.9.3.
Oniguruma https://github.com/kkos/oniguruma
This release is the security fix release. It includes the changes:
Fixed CVE-2019-13224
Fixed CVE-2019-13225
Fixed many problems (found by libfuzzer programs)

Verify VS2015, GCC5 build.
Verify RegularExpressionProtocol GetInfo() and Match() function.

Cc: Jian J Wang <jian.j.wang@intel.com>
Cc: Hao A Wu <hao.a.wu@intel.com>
Cc: Cinnamon Shia <cinnamon.shia@hpe.com>
Signed-off-by: Liming Gao <liming.gao@intel.com>
Reviewed-by: Hao A Wu <hao.a.wu@intel.com>
This commit is contained in:
Liming Gao 2019-08-08 19:53:03 +08:00
parent ecc32c90ee
commit b26691c471
25 changed files with 16407 additions and 13648 deletions

View File

@ -113,6 +113,6 @@ OnigEncodingType OnigEncodingASCII = {
init,
0, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
ENC_FLAG_ASCII_COMPATIBLE,
ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,
0, 0
};

View File

@ -4,7 +4,7 @@
oniguruma.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -38,9 +38,9 @@ extern "C" {
#define ONIGURUMA
#define ONIGURUMA_VERSION_MAJOR 6
#define ONIGURUMA_VERSION_MINOR 9
#define ONIGURUMA_VERSION_TEENY 0
#define ONIGURUMA_VERSION_TEENY 3
#define ONIGURUMA_VERSION_INT 60900
#define ONIGURUMA_VERSION_INT 60903
#ifndef P_
#if defined(__STDC__) || defined(_WIN32)
@ -54,6 +54,7 @@ extern "C" {
# define PV_(args) args
#endif
#ifndef ONIG_STATIC
#ifndef ONIG_EXTERN
#if defined(_WIN32) && !defined(__GNUC__)
#if defined(ONIGURUMA_EXPORT)
@ -67,6 +68,9 @@ extern "C" {
#ifndef ONIG_EXTERN
#define ONIG_EXTERN extern
#endif
#else
#define ONIG_EXTERN extern
#endif
/* PART: character encoding */
@ -387,8 +391,10 @@ typedef unsigned int OnigOptionType;
#define ONIG_OPTION_DIGIT_IS_ASCII (ONIG_OPTION_WORD_IS_ASCII << 1)
#define ONIG_OPTION_SPACE_IS_ASCII (ONIG_OPTION_DIGIT_IS_ASCII << 1)
#define ONIG_OPTION_POSIX_IS_ASCII (ONIG_OPTION_SPACE_IS_ASCII << 1)
#define ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER (ONIG_OPTION_POSIX_IS_ASCII << 1)
#define ONIG_OPTION_TEXT_SEGMENT_WORD (ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER << 1)
#define ONIG_OPTION_MAXBIT ONIG_OPTION_POSIX_IS_ASCII /* limit */
#define ONIG_OPTION_MAXBIT ONIG_OPTION_TEXT_SEGMENT_WORD /* limit */
#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt))
#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt))
@ -492,10 +498,12 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE (1U<<23) /* \R \r\n else [\x0a-\x0d] */
#define ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT (1U<<24) /* \N (?-m:.), \O (?m:.) */
#define ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP (1U<<25) /* (?~...) */
#define ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER (1U<<26) /* \X \y \Y */
#define ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER (1U<<26) /* obsoleted: use next */
#define ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT (1U<<26) /* \X \y \Y */
#define ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL (1U<<27) /* (?R), (?&name)... */
#define ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS (1U<<28) /* (?{...}) (?{{...}}) */
#define ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME (1U<<29) /* (*name) (*name{a,..}) */
#define ONIG_SYN_OP2_OPTION_ONIGURUMA (1U<<30) /* (?imxWDSPy) */
/* syntax (behavior) */
#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1U<<31) /* not implemented */
@ -515,6 +523,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1U<<21) /* [..\w..] etc.. */
#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1U<<22)
#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1U<<23) /* [0-9-a]=[0-9\-a] */
#define ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC (1U<<26)
/* syntax (behavior) warning */
#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1U<<24) /* [,-,] */
#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1U<<25) /* (?:a*)+ */
@ -764,6 +773,8 @@ int onig_init P_((void));
ONIG_EXTERN
int EFIAPI onig_error_code_to_str PV_((OnigUChar* s, int err_code, ...));
ONIG_EXTERN
int onig_is_error_code_needs_param PV_((int code));
ONIG_EXTERN
void onig_set_warn_func P_((OnigWarnFunc f));
ONIG_EXTERN
void onig_set_verb_warn_func P_((OnigWarnFunc f));

File diff suppressed because it is too large Load Diff

View File

@ -2,7 +2,7 @@
regenc.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -77,6 +77,17 @@ enc_is_inited(OnigEncoding enc)
return 0;
}
static int OnigEncInited;
extern int
onigenc_init(void)
{
if (OnigEncInited != 0) return 0;
OnigEncInited = 1;
return 0;
}
extern int
onigenc_end(void)
{
@ -86,15 +97,10 @@ onigenc_end(void)
InitedList[i].enc = 0;
InitedList[i].inited = 0;
}
InitedListNum = 0;
return ONIG_NORMAL;
}
extern int
onigenc_init(void)
{
return 0;
OnigEncInited = 0;
return ONIG_NORMAL;
}
extern int
@ -167,7 +173,7 @@ onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const U
extern UChar*
onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
const UChar* start, const UChar* s, const UChar** prev)
const UChar* start, const UChar* s, const UChar** prev)
{
UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
@ -231,7 +237,7 @@ onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
{
int n = 0;
UChar* q = (UChar* )p;
while (q < end) {
q += ONIGENC_MBC_ENC_LEN(enc, q);
n++;
@ -244,7 +250,7 @@ onigenc_strlen_null(OnigEncoding enc, const UChar* s)
{
int n = 0;
UChar* p = (UChar* )s;
while (1) {
if (*p == '\0') {
UChar* q;
@ -511,7 +517,7 @@ const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = {
extern int
onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
OnigApplyAllCaseFoldFunc f, void* arg)
OnigApplyAllCaseFoldFunc f, void* arg)
{
OnigCodePoint code;
int i, r;
@ -533,8 +539,8 @@ onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
extern int
onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
const OnigUChar* p, const OnigUChar* end ARG_UNUSED,
OnigCaseFoldCodeItem items[])
const OnigUChar* p, const OnigUChar* end ARG_UNUSED,
OnigCaseFoldCodeItem items[])
{
if (0x41 <= *p && *p <= 0x5a) {
items[0].byte_len = 1;
@ -554,7 +560,7 @@ onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
static int
ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
OnigApplyAllCaseFoldFunc f, void* arg)
OnigApplyAllCaseFoldFunc f, void* arg)
{
static OnigCodePoint ss[] = { 0x73, 0x73 };
@ -600,7 +606,7 @@ onigenc_get_case_fold_codes_by_str_with_map(int map_size,
items[0].code_len = 1;
items[0].code[0] = (OnigCodePoint )(*p + 0x20);
if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1
&& (*(p+1) == 0x53 || *(p+1) == 0x73)) {
&& (*(p+1) == 0x53 || *(p+1) == 0x73)) {
/* SS */
items[1].byte_len = 2;
items[1].code_len = 1;
@ -615,7 +621,7 @@ onigenc_get_case_fold_codes_by_str_with_map(int map_size,
items[0].code_len = 1;
items[0].code[0] = (OnigCodePoint )(*p - 0x20);
if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1
&& (*(p+1) == 0x73 || *(p+1) == 0x53)) {
&& (*(p+1) == 0x73 || *(p+1) == 0x53)) {
/* ss */
items[1].byte_len = 2;
items[1].code_len = 1;
@ -653,16 +659,16 @@ onigenc_get_case_fold_codes_by_str_with_map(int map_size,
for (i = 0; i < map_size; i++) {
if (*p == map[i].from) {
items[0].byte_len = 1;
items[0].code_len = 1;
items[0].code[0] = map[i].to;
return 1;
items[0].byte_len = 1;
items[0].code_len = 1;
items[0].code[0] = map[i].to;
return 1;
}
else if (*p == map[i].to) {
items[0].byte_len = 1;
items[0].code_len = 1;
items[0].code[0] = map[i].from;
return 1;
items[0].byte_len = 1;
items[0].code_len = 1;
items[0].code[0] = map[i].from;
return 1;
}
}
}
@ -673,8 +679,8 @@ onigenc_get_case_fold_codes_by_str_with_map(int map_size,
extern int
onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED,
OnigCodePoint* sb_out ARG_UNUSED,
const OnigCodePoint* ranges[] ARG_UNUSED)
OnigCodePoint* sb_out ARG_UNUSED,
const OnigCodePoint* ranges[] ARG_UNUSED)
{
return ONIG_NO_SUPPORT_CONFIG;
}
@ -691,7 +697,7 @@ onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end)
/* for single byte encodings */
extern int
onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,
const UChar*end ARG_UNUSED, UChar* lower)
const UChar*end ARG_UNUSED, UChar* lower)
{
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
@ -702,7 +708,7 @@ onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,
#if 0
extern int
onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag,
const UChar** pp, const UChar* end)
const UChar** pp, const UChar* end)
{
const UChar* p = *pp;
@ -738,35 +744,35 @@ onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf)
extern UChar*
onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED,
const UChar* s)
const UChar* s)
{
return (UChar* )s;
}
extern int
onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED,
const UChar* end ARG_UNUSED)
const UChar* end ARG_UNUSED)
{
return TRUE;
}
extern int
onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED,
const UChar* end ARG_UNUSED)
const UChar* end ARG_UNUSED)
{
return FALSE;
}
extern int
onigenc_always_true_is_valid_mbc_string(const UChar* s ARG_UNUSED,
const UChar* end ARG_UNUSED)
const UChar* end ARG_UNUSED)
{
return TRUE;
}
extern int
onigenc_length_check_is_valid_mbc_string(OnigEncoding enc,
const UChar* p, const UChar* end)
const UChar* p, const UChar* end)
{
while (p < end) {
p += enclen(enc, p);
@ -805,7 +811,7 @@ onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
extern int
onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
const UChar** pp, const UChar* end ARG_UNUSED,
UChar* lower)
UChar* lower)
{
int len;
const UChar *p = *pp;
@ -847,6 +853,8 @@ onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,
extern int
onigenc_mb2_code_to_mbclen(OnigCodePoint code)
{
if ((code & (~0xffff)) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE;
if ((code & 0xff00) != 0) return 2;
else return 1;
}
@ -946,7 +954,7 @@ onigenc_is_mbc_word_ascii(OnigEncoding enc, UChar* s, const UChar* end)
extern int
onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
unsigned int ctype)
unsigned int ctype)
{
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
@ -961,7 +969,7 @@ onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
extern int
onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
unsigned int ctype)
unsigned int ctype)
{
if (code < 128)
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);

View File

@ -4,7 +4,7 @@
regenc.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -114,6 +114,7 @@ struct PropertyNameCtype {
/* #define USE_CRNL_AS_LINE_TERMINATOR */
#define USE_UNICODE_PROPERTIES
#define USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER
#define USE_UNICODE_WORD_BREAK
/* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */
/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTS #18 */
@ -121,8 +122,20 @@ struct PropertyNameCtype {
#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII
#define ENC_SKIP_OFFSET_1_OR_0 7
#define ENC_FLAG_ASCII_COMPATIBLE (1<<0)
#define ENC_FLAG_UNICODE (1<<1)
#define ENC_FLAG_SKIP_OFFSET_MASK (7<<2)
#define ENC_FLAG_SKIP_OFFSET_0 0
#define ENC_FLAG_SKIP_OFFSET_1 (1<<2)
#define ENC_FLAG_SKIP_OFFSET_2 (2<<2)
#define ENC_FLAG_SKIP_OFFSET_3 (3<<2)
#define ENC_FLAG_SKIP_OFFSET_4 (4<<2)
#define ENC_FLAG_SKIP_OFFSET_1_OR_0 (ENC_SKIP_OFFSET_1_OR_0<<2)
#define ENC_GET_SKIP_OFFSET(enc) \
(((enc)->flag & ENC_FLAG_SKIP_OFFSET_MASK)>>2)
/* for encoding system implementation (internal) */
@ -162,15 +175,19 @@ extern int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, u
extern struct PropertyNameCtype* onigenc_euc_jp_lookup_property_name P_((register const char *str, register size_t len));
extern struct PropertyNameCtype* onigenc_sjis_lookup_property_name P_((register const char *str, register size_t len));
/* in enc/unicode.c */
/* in unicode.c */
extern int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype));
extern int onigenc_utf16_32_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[]));
extern int onigenc_unicode_ctype_code_range P_((OnigCtype ctype, const OnigCodePoint* ranges[]));
extern int onigenc_unicode_get_case_fold_codes_by_str P_((OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
extern int onigenc_unicode_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold));
extern int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
extern int onigenc_egcb_is_break_position P_((OnigEncoding enc, UChar* p, UChar* prev, const UChar* start, const UChar* end));
#ifdef USE_UNICODE_WORD_BREAK
extern int onigenc_wb_is_break_position P_((OnigEncoding enc, UChar* p, UChar* prev, const UChar* start, const UChar* end));
#endif
#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8)
#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc)
@ -252,7 +269,7 @@ extern const unsigned short OnigEncAsciiCtypeTable[];
#define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \
(ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_UPPER) ||\
ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_LOWER))
#define ONIGENC_IS_UNICODE_ENCODING(enc) \
(((enc)->flag & ENC_FLAG_UNICODE) != 0)

View File

@ -2,7 +2,7 @@
regerror.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -31,13 +31,7 @@
#if 0
#include <stdio.h> /* for vsnprintf() */
#ifdef HAVE_STDARG_PROTOTYPES
#include <stdarg.h>
#define va_init_list(a,b) va_start(a,b)
#else
#include <varargs.h>
#define va_init_list(a,b) va_start(a)
#endif
#endif
extern UChar*
@ -213,13 +207,17 @@ static void sprint_byte_with_x(char* s, unsigned int v)
}
static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,
UChar buf[], int buf_size, int *is_over)
UChar buf[], int buf_size, int *is_over)
{
int len;
UChar *p;
OnigCodePoint code;
if (ONIGENC_MBC_MINLEN(enc) > 1) {
if (!s) {
len = 0;
*is_over = 0;
}
else if (ONIGENC_MBC_MINLEN(enc) > 1) {
p = s;
len = 0;
while (p < end) {
@ -249,7 +247,7 @@ static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,
if (len >= buf_size) break;
}
*is_over = ((p < end) ? 1 : 0);
*is_over = p < end;
}
else {
len = MIN((int )(end - s), buf_size);
@ -261,19 +259,27 @@ static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,
}
extern int
onig_is_error_code_needs_param(int code)
{
switch (code) {
case ONIGERR_UNDEFINED_NAME_REFERENCE:
case ONIGERR_UNDEFINED_GROUP_REFERENCE:
case ONIGERR_MULTIPLEX_DEFINED_NAME:
case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
case ONIGERR_INVALID_GROUP_NAME:
case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
return 1;
default:
return 0;
}
}
/* for ONIG_MAX_ERROR_MESSAGE_LEN */
#define MAX_ERROR_PAR_LEN 30
extern int
EFIAPI
#ifdef HAVE_STDARG_PROTOTYPES
onig_error_code_to_str(UChar* s, int code, ...)
#else
onig_error_code_to_str(s, code, va_alist)
UChar* s;
int code;
va_dcl
#endif
extern int EFIAPI onig_error_code_to_str(UChar* s, int code, ...)
{
UChar *p, *q;
OnigErrorInfo* einfo;
@ -333,21 +339,8 @@ onig_error_code_to_str(s, code, va_alist)
}
void
EFIAPI
#ifdef HAVE_STDARG_PROTOTYPES
onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
UChar* pat, UChar* pat_end, const UChar *fmt, ...)
#else
onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
UChar buf[];
int bufsize;
OnigEncoding enc;
UChar* pat;
UChar* pat_end;
const UChar *fmt;
va_dcl
#endif
void EFIAPI onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
UChar* pat, UChar* pat_end, const UChar *fmt, ...)
{
int n, need, len;
UChar *p, *s, *bp;

File diff suppressed because it is too large Load Diff

View File

@ -2,7 +2,7 @@
reggnu.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -39,7 +39,7 @@ re_free_registers(OnigRegion* r)
extern int
re_adjust_startpos(regex_t* reg, const char* string, int size,
int startpos, int range)
int startpos, int range)
{
if (startpos > 0 && ONIGENC_MBC_MAXLEN(reg->enc) != 1 && startpos < size) {
UChar *p;
@ -59,20 +59,20 @@ re_adjust_startpos(regex_t* reg, const char* string, int size,
extern int
re_match(regex_t* reg, const char* str, int size, int pos,
struct re_registers* regs)
struct re_registers* regs)
{
return onig_match(reg, (UChar* )str, (UChar* )(str + size),
(UChar* )(str + pos), regs, ONIG_OPTION_NONE);
(UChar* )(str + pos), regs, ONIG_OPTION_NONE);
}
extern int
re_search(regex_t* bufp, const char* string, int size, int startpos, int range,
struct re_registers* regs)
struct re_registers* regs)
{
return onig_search(bufp, (UChar* )string, (UChar* )(string + size),
(UChar* )(string + startpos),
(UChar* )(string + startpos + range),
regs, ONIG_OPTION_NONE);
(UChar* )(string + startpos),
(UChar* )(string + startpos + range),
regs, ONIG_OPTION_NONE);
}
extern int
@ -103,9 +103,9 @@ re_alloc_pattern(regex_t** reg)
if (IS_NULL(*reg)) return ONIGERR_MEMORY;
return onig_reg_init(*reg, ONIG_OPTION_DEFAULT,
ONIGENC_CASE_FOLD_DEFAULT,
OnigEncDefaultCharEncoding,
OnigDefaultSyntax);
ONIGENC_CASE_FOLD_DEFAULT,
OnigEncDefaultCharEncoding,
OnigDefaultSyntax);
}
extern void

View File

@ -4,7 +4,7 @@
regint.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -54,44 +54,37 @@
#define PLATFORM_UNALIGNED_WORD_ACCESS
#endif
#ifdef __GNUC__
#define USE_GOTO_LABELS_AS_VALUES
#endif
/* config */
/* spec. config */
#define USE_CALL
#define USE_CALLOUT
#define USE_BACKREF_WITH_LEVEL /* \k<name+n>, \k<name-n> */
#define USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT /* /(?:()|())*\2/ */
#define USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT /* /(?:()|())*\2/ */
#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
#define USE_RETRY_LIMIT_IN_MATCH
#ifdef USE_GOTO_LABELS_AS_VALUES
#define USE_THREADED_CODE
#define USE_DIRECT_THREADED_CODE
#endif
/* internal config */
#define USE_OP_PUSH_OR_JUMP_EXACT
#define USE_QUANT_PEEK_NEXT
#define USE_ST_LIBRARY
#define USE_WORD_BEGIN_END /* "\<", "\>" */
#define USE_CAPTURE_HISTORY
#define USE_VARIABLE_META_CHARS
#define USE_POSIX_API_REGION_OPTION
#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
#include "regenc.h"
#ifdef __cplusplus
# ifndef HAVE_STDARG_PROTOTYPES
# define HAVE_STDARG_PROTOTYPES 1
# endif
#endif
/* escape Mac OS X/Xcode 2.4/gcc 4.0.1 problem */
#if defined(__APPLE__) && defined(__GNUC__) && __GNUC__ >= 4
# ifndef HAVE_STDARG_PROTOTYPES
# define HAVE_STDARG_PROTOTYPES 1
# endif
#endif
#ifdef HAVE_STDARG_H
# ifndef HAVE_STDARG_PROTOTYPES
# define HAVE_STDARG_PROTOTYPES 1
# endif
#endif
#define INIT_MATCH_STACK_SIZE 160
#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */
#define DEFAULT_RETRY_LIMIT_IN_MATCH 10000000
@ -103,12 +96,6 @@
#undef ONIG_ESCAPE_UCHAR_COLLISION
#endif
#define USE_WORD_BEGIN_END /* "\<", "\>" */
#define USE_CAPTURE_HISTORY
#define USE_VARIABLE_META_CHARS
#define USE_POSIX_API_REGION_OPTION
#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
#define xmalloc malloc
#define xrealloc realloc
#define xcalloc calloc
@ -150,17 +137,10 @@
#define xstrcat(dest,src,size) strcat(dest,src)
#endif
// #include <stddef.h>
#ifdef HAVE_LIMITS_H
#include <limits.h>
#endif
#if 0
#ifdef HAVE_STDLIB_H
#include <stddef.h>
#include <limits.h>
#include <stdlib.h>
#endif
#ifdef HAVE_STDINT_H
#include <stdint.h>
@ -170,11 +150,7 @@
#include <alloca.h>
#endif
#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif
#include <string.h>
#include <ctype.h>
#ifdef HAVE_SYS_TYPES_H
@ -226,6 +202,7 @@ typedef UINTN uintptr_t;
#define CHECK_NULL_RETURN_MEMERR(p) if (IS_NULL(p)) return ONIGERR_MEMORY
#define NULL_UCHARP ((UChar* )0)
#define CHAR_MAP_SIZE 256
#define INFINITE_LEN ONIG_INFINITE_DISTANCE
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
@ -290,64 +267,6 @@ typedef struct {
#endif
typedef struct {
const UChar* pattern;
const UChar* pattern_end;
#ifdef USE_CALLOUT
void* tag_table;
int callout_num;
int callout_list_alloc;
CalloutListEntry* callout_list; /* index: callout num */
#endif
} RegexExt;
#define REG_EXTP(reg) ((RegexExt* )((reg)->chain))
#define REG_EXTPL(reg) ((reg)->chain)
struct re_pattern_buffer {
/* common members of BBuf(bytes-buffer) */
unsigned char* p; /* compiled pattern */
unsigned int used; /* used space for p */
unsigned int alloc; /* allocated space for p */
int num_mem; /* used memory(...) num counted from 1 */
int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
int num_null_check; /* OP_EMPTY_CHECK_START/END id counter */
int num_comb_exp_check; /* no longer used (combination explosion check) */
int num_call; /* number of subexp call */
unsigned int capture_history; /* (?@...) flag (1-31) */
unsigned int bt_mem_start; /* need backtrack flag */
unsigned int bt_mem_end; /* need backtrack flag */
int stack_pop_level;
int repeat_range_alloc;
OnigRepeatRange* repeat_range;
OnigEncoding enc;
OnigOptionType options;
OnigSyntaxType* syntax;
OnigCaseFoldType case_fold_flag;
void* name_table;
/* optimization info (string search, char-map and anchors) */
int optimize; /* optimize flag */
int threshold_len; /* search str-length for apply optimize */
int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
OnigLen anchor_dmin; /* (SEMI_)END_BUF anchor distance */
OnigLen anchor_dmax; /* (SEMI_)END_BUF anchor distance */
int sub_anchor; /* start-anchor for exact or map */
unsigned char *exact;
unsigned char *exact_end;
unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */
int *int_map; /* BM skip for exact_len > 255 */
int *int_map_backward; /* BM skip for backward search */
OnigLen dmin; /* min-distance of exact or map */
OnigLen dmax; /* max-distance of exact or map */
/* regex_t link chain */
struct re_pattern_buffer* chain; /* escape compile-conflict */
};
/* stack pop level */
enum StackPopLevel {
STACK_POP_LEVEL_FREE = 0,
@ -357,12 +276,13 @@ enum StackPopLevel {
/* optimize flags */
enum OptimizeType {
OPTIMIZE_NONE = 0,
OPTIMIZE_EXACT = 1, /* Slow Search */
OPTIMIZE_EXACT_BM = 2, /* Boyer Moore Search */
OPTIMIZE_EXACT_BM_NO_REV = 3, /* BM (but not simple match) */
OPTIMIZE_EXACT_IC = 4, /* Slow Search (ignore case) */
OPTIMIZE_MAP = 5 /* char map */
OPTIMIZE_NONE = 0,
OPTIMIZE_STR, /* Slow Search */
OPTIMIZE_STR_FAST, /* Sunday quick search / BMH */
OPTIMIZE_STR_FAST_STEP_FORWARD, /* Sunday quick search / BMH */
OPTIMIZE_STR_CASE_FOLD_FAST, /* Sunday quick search / BMH (ignore case) */
OPTIMIZE_STR_CASE_FOLD, /* Slow Search (ignore case) */
OPTIMIZE_MAP /* char map */
};
/* bit status */
@ -436,8 +356,8 @@ typedef unsigned int MemStatusType;
#define DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag) \
((case_fold_flag) & ~INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR)
#define REPEAT_INFINITE -1
#define IS_REPEAT_INFINITE(n) ((n) == REPEAT_INFINITE)
#define INFINITE_REPEAT -1
#define IS_INFINITE_REPEAT(n) ((n) == INFINITE_REPEAT)
/* bitset */
#define BITS_PER_BYTE 8
@ -475,7 +395,7 @@ typedef struct _BBuf {
unsigned int alloc;
} BBuf;
#define BB_INIT(buf,size) onig_bbuf_init((BBuf* )(buf), (size))
#define BB_INIT(buf,size) bbuf_init((BBuf* )(buf), (size))
#define BB_SIZE_INC(buf,inc) do{\
(buf)->alloc += (inc);\
@ -551,32 +471,32 @@ typedef struct _BBuf {
/* has body */
#define ANCHOR_PREC_READ (1<<0)
#define ANCHOR_PREC_READ_NOT (1<<1)
#define ANCHOR_LOOK_BEHIND (1<<2)
#define ANCHOR_LOOK_BEHIND_NOT (1<<3)
#define ANCR_PREC_READ (1<<0)
#define ANCR_PREC_READ_NOT (1<<1)
#define ANCR_LOOK_BEHIND (1<<2)
#define ANCR_LOOK_BEHIND_NOT (1<<3)
/* no body */
#define ANCHOR_BEGIN_BUF (1<<4)
#define ANCHOR_BEGIN_LINE (1<<5)
#define ANCHOR_BEGIN_POSITION (1<<6)
#define ANCHOR_END_BUF (1<<7)
#define ANCHOR_SEMI_END_BUF (1<<8)
#define ANCHOR_END_LINE (1<<9)
#define ANCHOR_WORD_BOUNDARY (1<<10)
#define ANCHOR_NO_WORD_BOUNDARY (1<<11)
#define ANCHOR_WORD_BEGIN (1<<12)
#define ANCHOR_WORD_END (1<<13)
#define ANCHOR_ANYCHAR_INF (1<<14)
#define ANCHOR_ANYCHAR_INF_ML (1<<15)
#define ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY (1<<16)
#define ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY (1<<17)
#define ANCR_BEGIN_BUF (1<<4)
#define ANCR_BEGIN_LINE (1<<5)
#define ANCR_BEGIN_POSITION (1<<6)
#define ANCR_END_BUF (1<<7)
#define ANCR_SEMI_END_BUF (1<<8)
#define ANCR_END_LINE (1<<9)
#define ANCR_WORD_BOUNDARY (1<<10)
#define ANCR_NO_WORD_BOUNDARY (1<<11)
#define ANCR_WORD_BEGIN (1<<12)
#define ANCR_WORD_END (1<<13)
#define ANCR_ANYCHAR_INF (1<<14)
#define ANCR_ANYCHAR_INF_ML (1<<15)
#define ANCR_TEXT_SEGMENT_BOUNDARY (1<<16)
#define ANCR_NO_TEXT_SEGMENT_BOUNDARY (1<<17)
#define ANCHOR_HAS_BODY(a) ((a)->type < ANCHOR_BEGIN_BUF)
#define ANCHOR_HAS_BODY(a) ((a)->type < ANCR_BEGIN_BUF)
#define IS_WORD_ANCHOR_TYPE(type) \
((type) == ANCHOR_WORD_BOUNDARY || (type) == ANCHOR_NO_WORD_BOUNDARY || \
(type) == ANCHOR_WORD_BEGIN || (type) == ANCHOR_WORD_END)
((type) == ANCR_WORD_BOUNDARY || (type) == ANCR_NO_WORD_BOUNDARY || \
(type) == ANCR_WORD_BEGIN || (type) == ANCR_WORD_END)
/* operation code */
enum OpCode {
@ -605,9 +525,6 @@ enum OpCode {
OP_CCLASS_NOT,
OP_CCLASS_MB_NOT,
OP_CCLASS_MIX_NOT,
#ifdef USE_OP_CCLASS_NODE
OP_CCLASS_NODE, /* pointer to CClassNode node */
#endif
OP_ANYCHAR, /* "." */
OP_ANYCHAR_ML, /* "." multi-line */
@ -625,8 +542,7 @@ enum OpCode {
OP_WORD_BEGIN,
OP_WORD_END,
OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY,
OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY,
OP_TEXT_SEGMENT_BOUNDARY,
OP_BEGIN_BUF,
OP_END_BUF,
@ -642,6 +558,7 @@ enum OpCode {
OP_BACKREF_MULTI,
OP_BACKREF_MULTI_IC,
OP_BACKREF_WITH_LEVEL, /* \k<xxx+n>, \k<xxx-n> */
OP_BACKREF_WITH_LEVEL_IC, /* \k<xxx+n>, \k<xxx-n> */
OP_BACKREF_CHECK, /* (?(n)), (?('name')) */
OP_BACKREF_CHECK_WITH_LEVEL, /* (?(n-level)), (?('name-level')) */
@ -657,7 +574,9 @@ enum OpCode {
OP_PUSH,
OP_PUSH_SUPER,
OP_POP_OUT,
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */
#endif
OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */
OP_REPEAT, /* {n,m} */
OP_REPEAT_NG, /* {n,m}? (non greedy) */
@ -704,6 +623,11 @@ enum UpdateVarType {
UPDATE_VAR_RIGHT_RANGE_INIT = 4,
};
enum TextSegmentBoundaryType {
EXTENDED_GRAPHEME_CLUSTER_BOUNDARY = 0,
WORD_BOUNDARY = 1,
};
typedef int RelAddrType;
typedef int AbsAddrType;
typedef int LengthType;
@ -747,13 +671,16 @@ typedef int ModeType;
/* op-code + arg size */
#if 0
#define SIZE_OP_ANYCHAR_STAR SIZE_OPCODE
#define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT (SIZE_OPCODE + 1)
#define SIZE_OP_JUMP (SIZE_OPCODE + SIZE_RELADDR)
#define SIZE_OP_PUSH (SIZE_OPCODE + SIZE_RELADDR)
#define SIZE_OP_PUSH_SUPER (SIZE_OPCODE + SIZE_RELADDR)
#define SIZE_OP_POP_OUT SIZE_OPCODE
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
#define SIZE_OP_PUSH_OR_JUMP_EXACT1 (SIZE_OPCODE + SIZE_RELADDR + 1)
#endif
#define SIZE_OP_PUSH_IF_PEEK_NEXT (SIZE_OPCODE + SIZE_RELADDR + 1)
#define SIZE_OP_REPEAT_INC (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_REPEAT_INC_NG (SIZE_OPCODE + SIZE_MEMNUM)
@ -786,6 +713,56 @@ typedef int ModeType;
#define SIZE_OP_CALLOUT_NAME (SIZE_OPCODE + SIZE_MEMNUM + SIZE_MEMNUM)
#endif
#else /* if 0 */
/* for relative address increment to go next op. */
#define SIZE_INC_OP 1
#define SIZE_OP_ANYCHAR_STAR 1
#define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT 1
#define SIZE_OP_JUMP 1
#define SIZE_OP_PUSH 1
#define SIZE_OP_PUSH_SUPER 1
#define SIZE_OP_POP_OUT 1
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
#define SIZE_OP_PUSH_OR_JUMP_EXACT1 1
#endif
#define SIZE_OP_PUSH_IF_PEEK_NEXT 1
#define SIZE_OP_REPEAT 1
#define SIZE_OP_REPEAT_INC 1
#define SIZE_OP_REPEAT_INC_NG 1
#define SIZE_OP_WORD_BOUNDARY 1
#define SIZE_OP_PREC_READ_START 1
#define SIZE_OP_PREC_READ_NOT_START 1
#define SIZE_OP_PREC_READ_END 1
#define SIZE_OP_PREC_READ_NOT_END 1
#define SIZE_OP_BACKREF 1
#define SIZE_OP_FAIL 1
#define SIZE_OP_MEMORY_START 1
#define SIZE_OP_MEMORY_START_PUSH 1
#define SIZE_OP_MEMORY_END_PUSH 1
#define SIZE_OP_MEMORY_END_PUSH_REC 1
#define SIZE_OP_MEMORY_END 1
#define SIZE_OP_MEMORY_END_REC 1
#define SIZE_OP_ATOMIC_START 1
#define SIZE_OP_ATOMIC_END 1
#define SIZE_OP_EMPTY_CHECK_START 1
#define SIZE_OP_EMPTY_CHECK_END 1
#define SIZE_OP_LOOK_BEHIND 1
#define SIZE_OP_LOOK_BEHIND_NOT_START 1
#define SIZE_OP_LOOK_BEHIND_NOT_END 1
#define SIZE_OP_CALL 1
#define SIZE_OP_RETURN 1
#define SIZE_OP_PUSH_SAVE_VAL 1
#define SIZE_OP_UPDATE_VAR 1
#ifdef USE_CALLOUT
#define SIZE_OP_CALLOUT_CONTENTS 1
#define SIZE_OP_CALLOUT_NAME 1
#endif
#endif /* if 0 */
#define MC_ESC(syn) (syn)->meta_char_table.esc
#define MC_ANYCHAR(syn) (syn)->meta_char_table.anychar
#define MC_ANYTIME(syn) (syn)->meta_char_table.anytime
@ -837,8 +814,186 @@ typedef int ModeType;
#define NCCLASS_CLEAR_NOT(nd) NCCLASS_FLAG_CLEAR(nd, FLAG_NCCLASS_NOT)
#define IS_NCCLASS_NOT(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_NOT)
extern void onig_add_end_call(void (*func)(void));
typedef struct {
#ifdef USE_DIRECT_THREADED_CODE
const void* opaddr;
#else
enum OpCode opcode;
#endif
union {
struct {
UChar s[16]; /* Now used first 7 bytes only. */
} exact;
struct {
UChar* s;
LengthType n; /* number of chars */
} exact_n; /* EXACTN, EXACTN_IC, EXACTMB2N, EXACTMB3N */
struct {
UChar* s;
LengthType n; /* number of chars */
LengthType len; /* char byte length */
} exact_len_n; /* EXACTMBN */
struct {
BitSetRef bsp;
} cclass;
struct {
void* mb;
} cclass_mb;
struct {
void* mb; /* mb must be same position with cclass_mb for match_at(). */
BitSetRef bsp;
} cclass_mix;
struct {
UChar c;
} anychar_star_peek_next;
struct {
ModeType mode;
} word_boundary; /* OP_WORD_BOUNDARY, OP_NO_WORD_BOUNDARY, OP_WORD_BEGIN, OP_WORD_END */
struct {
enum TextSegmentBoundaryType type;
int not;
} text_segment_boundary;
struct {
union {
MemNumType n1; /* num == 1 */
MemNumType* ns; /* num > 1 */
};
int num;
int nest_level;
} backref_general; /* BACKREF_MULTI, BACKREF_MULTI_IC, BACKREF_WITH_LEVEL, BACKREF_CHECK, BACKREF_CHECK_WITH_LEVEL, */
struct {
MemNumType n1;
} backref_n; /* BACKREF_N, BACKREF_N_IC */
struct {
MemNumType num;
} memory_start; /* MEMORY_START, MEMORY_START_PUSH */
struct {
MemNumType num;
} memory_end; /* MEMORY_END, MEMORY_END_REC, MEMORY_END_PUSH, MEMORY_END_PUSH_REC */
struct {
RelAddrType addr;
} jump;
struct {
RelAddrType addr;
} push;
struct {
RelAddrType addr;
UChar c;
} push_or_jump_exact1;
struct {
RelAddrType addr;
UChar c;
} push_if_peek_next;
struct {
MemNumType id;
RelAddrType addr;
} repeat; /* REPEAT, REPEAT_NG */
struct {
MemNumType id;
} repeat_inc; /* REPEAT_INC, REPEAT_INC_SG, REPEAT_INC_NG, REPEAT_INC_NG_SG */
struct {
MemNumType mem;
} empty_check_start;
struct {
MemNumType mem;
} empty_check_end; /* EMPTY_CHECK_END, EMPTY_CHECK_END_MEMST, EMPTY_CHECK_END_MEMST_PUSH */
struct {
RelAddrType addr;
} prec_read_not_start;
struct {
LengthType len;
} look_behind;
struct {
LengthType len;
RelAddrType addr;
} look_behind_not_start;
struct {
AbsAddrType addr;
} call;
struct {
SaveType type;
MemNumType id;
} push_save_val;
struct {
UpdateVarType type;
MemNumType id;
} update_var;
#ifdef USE_CALLOUT
struct {
MemNumType num;
} callout_contents;
struct {
MemNumType num;
MemNumType id;
} callout_name;
#endif
};
} Operation;
typedef struct {
const UChar* pattern;
const UChar* pattern_end;
#ifdef USE_CALLOUT
void* tag_table;
int callout_num;
int callout_list_alloc;
CalloutListEntry* callout_list; /* index: callout num */
#endif
} RegexExt;
struct re_pattern_buffer {
/* common members of BBuf(bytes-buffer) */
Operation* ops;
#ifdef USE_DIRECT_THREADED_CODE
enum OpCode* ocs;
#endif
Operation* ops_curr;
unsigned int ops_used; /* used space for ops */
unsigned int ops_alloc; /* allocated space for ops */
unsigned char* string_pool;
unsigned char* string_pool_end;
int num_mem; /* used memory(...) num counted from 1 */
int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
int num_null_check; /* OP_EMPTY_CHECK_START/END id counter */
int num_call; /* number of subexp call */
unsigned int capture_history; /* (?@...) flag (1-31) */
unsigned int bt_mem_start; /* need backtrack flag */
unsigned int bt_mem_end; /* need backtrack flag */
int stack_pop_level;
int repeat_range_alloc;
OnigRepeatRange* repeat_range;
OnigEncoding enc;
OnigOptionType options;
OnigSyntaxType* syntax;
OnigCaseFoldType case_fold_flag;
void* name_table;
/* optimization info (string search, char-map and anchors) */
int optimize; /* optimize flag */
int threshold_len; /* search str-length for apply optimize */
int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
OnigLen anchor_dmin; /* (SEMI_)END_BUF anchor distance */
OnigLen anchor_dmax; /* (SEMI_)END_BUF anchor distance */
int sub_anchor; /* start-anchor for exact or map */
unsigned char *exact;
unsigned char *exact_end;
unsigned char map[CHAR_MAP_SIZE]; /* used as BMH skip or char-map */
int map_offset;
OnigLen dmin; /* min-distance of exact or map */
OnigLen dmax; /* max-distance of exact or map */
RegexExt* extp;
};
#define COP(reg) ((reg)->ops_curr)
#define COP_CURR_OFFSET(reg) ((reg)->ops_used - 1)
#define COP_CURR_OFFSET_BYTES(reg, p) \
((int )((char* )(&((reg)->ops_curr->p)) - (char* )((reg)->ops)))
extern void onig_add_end_call(void (*func)(void));
#ifdef ONIG_DEBUG
@ -854,13 +1009,12 @@ extern int onig_print_statistics P_((FILE* f));
extern void onig_warning(const char* s);
extern UChar* onig_error_code_to_format P_((int code));
extern void EFIAPI onig_snprintf_with_pattern PV_((UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...));
extern int onig_bbuf_init P_((BBuf* buf, int size));
extern void EFIAPI onig_snprintf_with_pattern PV_((UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...));
extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo));
extern void onig_transfer P_((regex_t* to, regex_t* from));
extern int onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, void* /* CClassNode* */ cc));
extern RegexExt* onig_get_regex_ext(regex_t* reg);
extern int onig_ext_set_pattern(regex_t* reg, const UChar* pattern, const UChar* pattern_end);
extern int onig_positive_int_multiply(int x, int y);
#ifdef USE_CALLOUT

View File

@ -4,7 +4,7 @@
regparse.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -31,6 +31,10 @@
#include "regint.h"
#define NODE_STRING_MARGIN 16
#define NODE_STRING_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
#define NODE_BACKREFS_SIZE 6
/* node type */
typedef enum {
NODE_STRING = 0,
@ -38,7 +42,7 @@ typedef enum {
NODE_CTYPE = 2,
NODE_BACKREF = 3,
NODE_QUANT = 4,
NODE_ENCLOSURE = 5,
NODE_BAG = 5,
NODE_ANCHOR = 6,
NODE_LIST = 7,
NODE_ALT = 8,
@ -46,161 +50,29 @@ typedef enum {
NODE_GIMMICK = 10
} NodeType;
enum BagType {
BAG_MEMORY = 0,
BAG_OPTION = 1,
BAG_STOP_BACKTRACK = 2,
BAG_IF_ELSE = 3,
};
enum GimmickType {
GIMMICK_FAIL = 0,
GIMMICK_KEEP = 1,
GIMMICK_SAVE = 2,
GIMMICK_UPDATE_VAR = 3,
GIMMICK_FAIL = 0,
GIMMICK_SAVE = 1,
GIMMICK_UPDATE_VAR = 2,
#ifdef USE_CALLOUT
GIMMICK_CALLOUT = 4,
GIMMICK_CALLOUT = 3,
#endif
};
/* node type bit */
#define NODE_TYPE2BIT(type) (1<<(type))
#define NODE_BIT_STRING NODE_TYPE2BIT(NODE_STRING)
#define NODE_BIT_CCLASS NODE_TYPE2BIT(NODE_CCLASS)
#define NODE_BIT_CTYPE NODE_TYPE2BIT(NODE_CTYPE)
#define NODE_BIT_BACKREF NODE_TYPE2BIT(NODE_BACKREF)
#define NODE_BIT_QUANT NODE_TYPE2BIT(NODE_QUANT)
#define NODE_BIT_ENCLOSURE NODE_TYPE2BIT(NODE_ENCLOSURE)
#define NODE_BIT_ANCHOR NODE_TYPE2BIT(NODE_ANCHOR)
#define NODE_BIT_LIST NODE_TYPE2BIT(NODE_LIST)
#define NODE_BIT_ALT NODE_TYPE2BIT(NODE_ALT)
#define NODE_BIT_CALL NODE_TYPE2BIT(NODE_CALL)
#define NODE_BIT_GIMMICK NODE_TYPE2BIT(NODE_GIMMICK)
#define NODE_IS_SIMPLE_TYPE(node) \
((NODE_TYPE2BIT(NODE_TYPE(node)) & \
(NODE_BIT_STRING | NODE_BIT_CCLASS | NODE_BIT_CTYPE | NODE_BIT_BACKREF)) != 0)
#define NODE_TYPE(node) ((node)->u.base.node_type)
#define NODE_SET_TYPE(node, ntype) (node)->u.base.node_type = (ntype)
#define STR_(node) (&((node)->u.str))
#define CCLASS_(node) (&((node)->u.cclass))
#define CTYPE_(node) (&((node)->u.ctype))
#define BACKREF_(node) (&((node)->u.backref))
#define QUANT_(node) (&((node)->u.quant))
#define ENCLOSURE_(node) (&((node)->u.enclosure))
#define ANCHOR_(node) (&((node)->u.anchor))
#define CONS_(node) (&((node)->u.cons))
#define CALL_(node) (&((node)->u.call))
#define GIMMICK_(node) (&((node)->u.gimmick))
#define NODE_CAR(node) (CONS_(node)->car)
#define NODE_CDR(node) (CONS_(node)->cdr)
#define CTYPE_ANYCHAR -1
#define NODE_IS_ANYCHAR(node) \
(NODE_TYPE(node) == NODE_CTYPE && CTYPE_(node)->ctype == CTYPE_ANYCHAR)
#define CTYPE_OPTION(node, reg) \
(NODE_IS_FIXED_OPTION(node) ? CTYPE_(node)->options : reg->options)
#define ANCHOR_ANYCHAR_INF_MASK (ANCHOR_ANYCHAR_INF | ANCHOR_ANYCHAR_INF_ML)
#define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
enum EnclosureType {
ENCLOSURE_MEMORY = 0,
ENCLOSURE_OPTION = 1,
ENCLOSURE_STOP_BACKTRACK = 2,
ENCLOSURE_IF_ELSE = 3,
enum BodyEmptyType {
BODY_IS_NOT_EMPTY = 0,
BODY_IS_EMPTY_POSSIBILITY = 1,
BODY_IS_EMPTY_POSSIBILITY_MEM = 2,
BODY_IS_EMPTY_POSSIBILITY_REC = 3
};
#define NODE_STRING_MARGIN 16
#define NODE_STRING_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
#define NODE_BACKREFS_SIZE 6
#define NODE_STRING_RAW (1<<0) /* by backslashed number */
#define NODE_STRING_AMBIG (1<<1)
#define NODE_STRING_DONT_GET_OPT_INFO (1<<2)
#define NODE_STRING_LEN(node) (int )((node)->u.str.end - (node)->u.str.s)
#define NODE_STRING_SET_RAW(node) (node)->u.str.flag |= NODE_STRING_RAW
#define NODE_STRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NODE_STRING_RAW
#define NODE_STRING_SET_AMBIG(node) (node)->u.str.flag |= NODE_STRING_AMBIG
#define NODE_STRING_SET_DONT_GET_OPT_INFO(node) \
(node)->u.str.flag |= NODE_STRING_DONT_GET_OPT_INFO
#define NODE_STRING_IS_RAW(node) \
(((node)->u.str.flag & NODE_STRING_RAW) != 0)
#define NODE_STRING_IS_AMBIG(node) \
(((node)->u.str.flag & NODE_STRING_AMBIG) != 0)
#define NODE_STRING_IS_DONT_GET_OPT_INFO(node) \
(((node)->u.str.flag & NODE_STRING_DONT_GET_OPT_INFO) != 0)
#define BACKREFS_P(br) \
(IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static)
enum QuantBodyEmpty {
QUANT_BODY_IS_NOT_EMPTY = 0,
QUANT_BODY_IS_EMPTY = 1,
QUANT_BODY_IS_EMPTY_MEM = 2,
QUANT_BODY_IS_EMPTY_REC = 3
};
/* node status bits */
#define NODE_ST_MIN_FIXED (1<<0)
#define NODE_ST_MAX_FIXED (1<<1)
#define NODE_ST_CLEN_FIXED (1<<2)
#define NODE_ST_MARK1 (1<<3)
#define NODE_ST_MARK2 (1<<4)
#define NODE_ST_STOP_BT_SIMPLE_REPEAT (1<<5)
#define NODE_ST_RECURSION (1<<6)
#define NODE_ST_CALLED (1<<7)
#define NODE_ST_ADDR_FIXED (1<<8)
#define NODE_ST_NAMED_GROUP (1<<9)
#define NODE_ST_IN_REAL_REPEAT (1<<10) /* STK_REPEAT is nested in stack. */
#define NODE_ST_IN_ZERO_REPEAT (1<<11) /* (....){0} */
#define NODE_ST_IN_MULTI_ENTRY (1<<12)
#define NODE_ST_NEST_LEVEL (1<<13)
#define NODE_ST_BY_NUMBER (1<<14) /* {n,m} */
#define NODE_ST_BY_NAME (1<<15) /* backref by name */
#define NODE_ST_BACKREF (1<<16)
#define NODE_ST_CHECKER (1<<17)
#define NODE_ST_FIXED_OPTION (1<<18)
#define NODE_ST_PROHIBIT_RECURSION (1<<19)
#define NODE_ST_SUPER (1<<20)
#define NODE_STATUS(node) (((Node* )node)->u.base.status)
#define NODE_STATUS_ADD(node,f) (NODE_STATUS(node) |= (NODE_ST_ ## f))
#define NODE_STATUS_REMOVE(node,f) (NODE_STATUS(node) &= ~(NODE_ST_ ## f))
#define NODE_IS_BY_NUMBER(node) ((NODE_STATUS(node) & NODE_ST_BY_NUMBER) != 0)
#define NODE_IS_IN_REAL_REPEAT(node) ((NODE_STATUS(node) & NODE_ST_IN_REAL_REPEAT) != 0)
#define NODE_IS_CALLED(node) ((NODE_STATUS(node) & NODE_ST_CALLED) != 0)
#define NODE_IS_IN_MULTI_ENTRY(node) ((NODE_STATUS(node) & NODE_ST_IN_MULTI_ENTRY) != 0)
#define NODE_IS_RECURSION(node) ((NODE_STATUS(node) & NODE_ST_RECURSION) != 0)
#define NODE_IS_IN_ZERO_REPEAT(node) ((NODE_STATUS(node) & NODE_ST_IN_ZERO_REPEAT) != 0)
#define NODE_IS_NAMED_GROUP(node) ((NODE_STATUS(node) & NODE_ST_NAMED_GROUP) != 0)
#define NODE_IS_ADDR_FIXED(node) ((NODE_STATUS(node) & NODE_ST_ADDR_FIXED) != 0)
#define NODE_IS_CLEN_FIXED(node) ((NODE_STATUS(node) & NODE_ST_CLEN_FIXED) != 0)
#define NODE_IS_MIN_FIXED(node) ((NODE_STATUS(node) & NODE_ST_MIN_FIXED) != 0)
#define NODE_IS_MAX_FIXED(node) ((NODE_STATUS(node) & NODE_ST_MAX_FIXED) != 0)
#define NODE_IS_MARK1(node) ((NODE_STATUS(node) & NODE_ST_MARK1) != 0)
#define NODE_IS_MARK2(node) ((NODE_STATUS(node) & NODE_ST_MARK2) != 0)
#define NODE_IS_NEST_LEVEL(node) ((NODE_STATUS(node) & NODE_ST_NEST_LEVEL) != 0)
#define NODE_IS_BY_NAME(node) ((NODE_STATUS(node) & NODE_ST_BY_NAME) != 0)
#define NODE_IS_BACKREF(node) ((NODE_STATUS(node) & NODE_ST_BACKREF) != 0)
#define NODE_IS_CHECKER(node) ((NODE_STATUS(node) & NODE_ST_CHECKER) != 0)
#define NODE_IS_FIXED_OPTION(node) ((NODE_STATUS(node) & NODE_ST_FIXED_OPTION) != 0)
#define NODE_IS_SUPER(node) ((NODE_STATUS(node) & NODE_ST_SUPER) != 0)
#define NODE_IS_PROHIBIT_RECURSION(node) \
((NODE_STATUS(node) & NODE_ST_PROHIBIT_RECURSION) != 0)
#define NODE_IS_STOP_BT_SIMPLE_REPEAT(node) \
((NODE_STATUS(node) & NODE_ST_STOP_BT_SIMPLE_REPEAT) != 0)
#define NODE_BODY(node) ((node)->u.base.body)
#define NODE_QUANT_BODY(node) ((node)->body)
#define NODE_ENCLOSURE_BODY(node) ((node)->body)
#define NODE_CALL_BODY(node) ((node)->body)
#define NODE_ANCHOR_BODY(node) ((node)->body)
typedef struct {
NodeType node_type;
int status;
@ -208,7 +80,7 @@ typedef struct {
UChar* s;
UChar* end;
unsigned int flag;
int capa; /* (allocated size - 1) or 0: use buf[] */
int capacity; /* (allocated size - 1) or 0: use buf[] */
UChar buf[NODE_STRING_BUF_SIZE];
} StrNode;
@ -229,7 +101,7 @@ typedef struct {
int lower;
int upper;
int greedy;
enum QuantBodyEmpty body_empty_info;
enum BodyEmptyType emptiness;
struct _Node* head_exact;
struct _Node* next_head_exact;
int is_refered; /* include called node. don't eliminate even if {0} */
@ -240,7 +112,7 @@ typedef struct {
int status;
struct _Node* body;
enum EnclosureType type;
enum BagType type;
union {
struct {
int regnum;
@ -262,7 +134,7 @@ typedef struct {
OnigLen max_len; /* max length (byte) */
int char_len; /* character length */
int opt_count; /* referenced count in optimize_nodes() */
} EnclosureNode;
} BagNode;
#ifdef USE_CALL
@ -280,7 +152,7 @@ typedef struct {
typedef struct {
NodeType node_type;
int status;
struct _Node* body; /* to EnclosureNode : ENCLOSURE_MEMORY */
struct _Node* body; /* to BagNode : BAG_MEMORY */
int by_number;
int group_num;
@ -350,7 +222,7 @@ typedef struct _Node {
StrNode str;
CClassNode cclass;
QuantNode quant;
EnclosureNode enclosure;
BagNode bag;
BackRefNode backref;
AnchorNode anchor;
ConsAltNode cons;
@ -362,9 +234,134 @@ typedef struct _Node {
} u;
} Node;
#define NULL_NODE ((Node* )0)
/* node type bit */
#define NODE_TYPE2BIT(type) (1<<(type))
#define NODE_BIT_STRING NODE_TYPE2BIT(NODE_STRING)
#define NODE_BIT_CCLASS NODE_TYPE2BIT(NODE_CCLASS)
#define NODE_BIT_CTYPE NODE_TYPE2BIT(NODE_CTYPE)
#define NODE_BIT_BACKREF NODE_TYPE2BIT(NODE_BACKREF)
#define NODE_BIT_QUANT NODE_TYPE2BIT(NODE_QUANT)
#define NODE_BIT_BAG NODE_TYPE2BIT(NODE_BAG)
#define NODE_BIT_ANCHOR NODE_TYPE2BIT(NODE_ANCHOR)
#define NODE_BIT_LIST NODE_TYPE2BIT(NODE_LIST)
#define NODE_BIT_ALT NODE_TYPE2BIT(NODE_ALT)
#define NODE_BIT_CALL NODE_TYPE2BIT(NODE_CALL)
#define NODE_BIT_GIMMICK NODE_TYPE2BIT(NODE_GIMMICK)
#define NODE_TYPE(node) ((node)->u.base.node_type)
#define NODE_SET_TYPE(node, ntype) (node)->u.base.node_type = (ntype)
#define STR_(node) (&((node)->u.str))
#define CCLASS_(node) (&((node)->u.cclass))
#define CTYPE_(node) (&((node)->u.ctype))
#define BACKREF_(node) (&((node)->u.backref))
#define QUANT_(node) (&((node)->u.quant))
#define BAG_(node) (&((node)->u.bag))
#define ANCHOR_(node) (&((node)->u.anchor))
#define CONS_(node) (&((node)->u.cons))
#define CALL_(node) (&((node)->u.call))
#define GIMMICK_(node) (&((node)->u.gimmick))
#define NODE_CAR(node) (CONS_(node)->car)
#define NODE_CDR(node) (CONS_(node)->cdr)
#define CTYPE_ANYCHAR -1
#define NODE_IS_ANYCHAR(node) \
(NODE_TYPE(node) == NODE_CTYPE && CTYPE_(node)->ctype == CTYPE_ANYCHAR)
#define CTYPE_OPTION(node, reg) \
(NODE_IS_FIXED_OPTION(node) ? CTYPE_(node)->options : reg->options)
#define ANCR_ANYCHAR_INF_MASK (ANCR_ANYCHAR_INF | ANCR_ANYCHAR_INF_ML)
#define ANCR_END_BUF_MASK (ANCR_END_BUF | ANCR_SEMI_END_BUF)
#define NODE_STRING_RAW (1<<0) /* by backslashed number */
#define NODE_STRING_AMBIG (1<<1)
#define NODE_STRING_GOOD_AMBIG (1<<2)
#define NODE_STRING_DONT_GET_OPT_INFO (1<<3)
#define NODE_STRING_LEN(node) (int )((node)->u.str.end - (node)->u.str.s)
#define NODE_STRING_SET_RAW(node) (node)->u.str.flag |= NODE_STRING_RAW
#define NODE_STRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NODE_STRING_RAW
#define NODE_STRING_SET_AMBIG(node) (node)->u.str.flag |= NODE_STRING_AMBIG
#define NODE_STRING_SET_GOOD_AMBIG(node) (node)->u.str.flag |= NODE_STRING_GOOD_AMBIG
#define NODE_STRING_SET_DONT_GET_OPT_INFO(node) \
(node)->u.str.flag |= NODE_STRING_DONT_GET_OPT_INFO
#define NODE_STRING_IS_RAW(node) \
(((node)->u.str.flag & NODE_STRING_RAW) != 0)
#define NODE_STRING_IS_AMBIG(node) \
(((node)->u.str.flag & NODE_STRING_AMBIG) != 0)
#define NODE_STRING_IS_GOOD_AMBIG(node) \
(((node)->u.str.flag & NODE_STRING_GOOD_AMBIG) != 0)
#define NODE_STRING_IS_DONT_GET_OPT_INFO(node) \
(((node)->u.str.flag & NODE_STRING_DONT_GET_OPT_INFO) != 0)
#define BACKREFS_P(br) \
(IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static)
/* node status bits */
#define NODE_ST_MIN_FIXED (1<<0)
#define NODE_ST_MAX_FIXED (1<<1)
#define NODE_ST_CLEN_FIXED (1<<2)
#define NODE_ST_MARK1 (1<<3)
#define NODE_ST_MARK2 (1<<4)
#define NODE_ST_STRICT_REAL_REPEAT (1<<5)
#define NODE_ST_RECURSION (1<<6)
#define NODE_ST_CALLED (1<<7)
#define NODE_ST_ADDR_FIXED (1<<8)
#define NODE_ST_NAMED_GROUP (1<<9)
#define NODE_ST_IN_REAL_REPEAT (1<<10) /* STK_REPEAT is nested in stack. */
#define NODE_ST_IN_ZERO_REPEAT (1<<11) /* (....){0} */
#define NODE_ST_IN_MULTI_ENTRY (1<<12)
#define NODE_ST_NEST_LEVEL (1<<13)
#define NODE_ST_BY_NUMBER (1<<14) /* {n,m} */
#define NODE_ST_BY_NAME (1<<15) /* backref by name */
#define NODE_ST_BACKREF (1<<16)
#define NODE_ST_CHECKER (1<<17)
#define NODE_ST_FIXED_OPTION (1<<18)
#define NODE_ST_PROHIBIT_RECURSION (1<<19)
#define NODE_ST_SUPER (1<<20)
#define NODE_STATUS(node) (((Node* )node)->u.base.status)
#define NODE_STATUS_ADD(node,f) (NODE_STATUS(node) |= (NODE_ST_ ## f))
#define NODE_STATUS_REMOVE(node,f) (NODE_STATUS(node) &= ~(NODE_ST_ ## f))
#define NODE_IS_BY_NUMBER(node) ((NODE_STATUS(node) & NODE_ST_BY_NUMBER) != 0)
#define NODE_IS_IN_REAL_REPEAT(node) ((NODE_STATUS(node) & NODE_ST_IN_REAL_REPEAT) != 0)
#define NODE_IS_CALLED(node) ((NODE_STATUS(node) & NODE_ST_CALLED) != 0)
#define NODE_IS_IN_MULTI_ENTRY(node) ((NODE_STATUS(node) & NODE_ST_IN_MULTI_ENTRY) != 0)
#define NODE_IS_RECURSION(node) ((NODE_STATUS(node) & NODE_ST_RECURSION) != 0)
#define NODE_IS_IN_ZERO_REPEAT(node) ((NODE_STATUS(node) & NODE_ST_IN_ZERO_REPEAT) != 0)
#define NODE_IS_NAMED_GROUP(node) ((NODE_STATUS(node) & NODE_ST_NAMED_GROUP) != 0)
#define NODE_IS_ADDR_FIXED(node) ((NODE_STATUS(node) & NODE_ST_ADDR_FIXED) != 0)
#define NODE_IS_CLEN_FIXED(node) ((NODE_STATUS(node) & NODE_ST_CLEN_FIXED) != 0)
#define NODE_IS_MIN_FIXED(node) ((NODE_STATUS(node) & NODE_ST_MIN_FIXED) != 0)
#define NODE_IS_MAX_FIXED(node) ((NODE_STATUS(node) & NODE_ST_MAX_FIXED) != 0)
#define NODE_IS_MARK1(node) ((NODE_STATUS(node) & NODE_ST_MARK1) != 0)
#define NODE_IS_MARK2(node) ((NODE_STATUS(node) & NODE_ST_MARK2) != 0)
#define NODE_IS_NEST_LEVEL(node) ((NODE_STATUS(node) & NODE_ST_NEST_LEVEL) != 0)
#define NODE_IS_BY_NAME(node) ((NODE_STATUS(node) & NODE_ST_BY_NAME) != 0)
#define NODE_IS_BACKREF(node) ((NODE_STATUS(node) & NODE_ST_BACKREF) != 0)
#define NODE_IS_CHECKER(node) ((NODE_STATUS(node) & NODE_ST_CHECKER) != 0)
#define NODE_IS_FIXED_OPTION(node) ((NODE_STATUS(node) & NODE_ST_FIXED_OPTION) != 0)
#define NODE_IS_SUPER(node) ((NODE_STATUS(node) & NODE_ST_SUPER) != 0)
#define NODE_IS_PROHIBIT_RECURSION(node) \
((NODE_STATUS(node) & NODE_ST_PROHIBIT_RECURSION) != 0)
#define NODE_IS_STRICT_REAL_REPEAT(node) \
((NODE_STATUS(node) & NODE_ST_STRICT_REAL_REPEAT) != 0)
#define NODE_BODY(node) ((node)->u.base.body)
#define NODE_QUANT_BODY(node) ((node)->body)
#define NODE_BAG_BODY(node) ((node)->body)
#define NODE_CALL_BODY(node) ((node)->body)
#define NODE_ANCHOR_BODY(node) ((node)->body)
#define SCANENV_MEMENV_SIZE 8
#define SCANENV_MEMENV(senv) \
(IS_NOT_NULL((senv)->mem_env_dynamic) ? \
@ -434,7 +431,7 @@ extern void onig_node_conv_to_str_node P_((Node* node, int raw));
extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));
extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end));
extern void onig_node_free P_((Node* node));
extern Node* onig_node_new_enclosure P_((int type));
extern Node* onig_node_new_bag P_((enum BagType type));
extern Node* onig_node_new_anchor P_((int type, int ascii_mode));
extern Node* onig_node_new_str P_((const UChar* s, const UChar* end));
extern Node* onig_node_new_list P_((Node* left, Node* right));

View File

@ -2,7 +2,7 @@
regposerr.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -37,13 +37,7 @@
//#include "config.h"
#include "onigposix.h"
#if 0
#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif
#endif
//#include <string.h>
#if defined(__GNUC__)
# define ARG_UNUSED __attribute__ ((unused))
@ -86,7 +80,7 @@ static char* ESTRING[] = {
extern size_t
regerror(int posix_ecode, const regex_t* reg ARG_UNUSED, char* buf,
size_t size)
size_t size)
{
char* s;
char tbuf[35];

View File

@ -2,7 +2,7 @@
regposix.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -30,6 +30,7 @@
#define regex_t onig_regex_t
#include "regint.h"
#undef regex_t
#include "onigposix.h"
#define ONIG_C(reg) ((onig_regex_t* )((reg)->onig))
@ -148,6 +149,8 @@ regcomp(regex_t* reg, const char* pattern, int posix_options)
OnigSyntaxType* syntax = OnigDefaultSyntax;
OnigOptionType options;
reg->onig = (void* )0;
if ((posix_options & REG_EXTENDED) == 0)
syntax = ONIG_SYNTAX_POSIX_BASIC;
@ -163,8 +166,8 @@ regcomp(regex_t* reg, const char* pattern, int posix_options)
ENC_STRING_LEN(OnigEncDefaultCharEncoding, pattern, len);
r = onig_new(PONIG_C(reg), (UChar* )pattern, (UChar* )(pattern + len),
options, OnigEncDefaultCharEncoding, syntax,
(OnigErrorInfo* )NULL);
options, OnigEncDefaultCharEncoding, syntax,
(OnigErrorInfo* )NULL);
if (r != ONIG_NORMAL) {
return onig2posix_error_code(r);
}
@ -175,7 +178,7 @@ regcomp(regex_t* reg, const char* pattern, int posix_options)
extern int
regexec(regex_t* reg, const char* str, size_t nmatch,
regmatch_t pmatch[], int posix_options)
regmatch_t pmatch[], int posix_options)
{
int r, i, len;
UChar* end;
@ -203,7 +206,7 @@ regexec(regex_t* reg, const char* str, size_t nmatch,
ENC_STRING_LEN(ONIG_C(reg)->enc, str, len);
end = (UChar* )(str + len);
r = onig_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end,
(OnigRegion* )pm, options);
(OnigRegion* )pm, options);
if (r >= 0) {
r = 0; /* Match */
@ -235,6 +238,7 @@ extern void
regfree(regex_t* reg)
{
onig_free(ONIG_C(reg));
reg->onig = (void* )0;
}
@ -272,7 +276,7 @@ typedef struct {
static int
i_wrapper(const UChar* name, const UChar* name_end, int ng, int* gs,
onig_regex_t* reg ARG_UNUSED, void* arg)
onig_regex_t* reg ARG_UNUSED, void* arg)
{
i_wrap* warg = (i_wrap* )arg;

View File

@ -2,7 +2,7 @@
regsyntax.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -67,8 +67,8 @@ OnigSyntaxType OnigSyntaxPosixExtended = {
ONIG_SYN_OP_BRACE_INTERVAL |
ONIG_SYN_OP_PLUS_ONE_INF | ONIG_SYN_OP_QMARK_ZERO_ONE | ONIG_SYN_OP_VBAR_ALT )
, 0
, ( ONIG_SYN_CONTEXT_INDEP_ANCHORS |
ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS |
, ( ONIG_SYN_CONTEXT_INDEP_ANCHORS |
ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS |
ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP |
ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
, ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
@ -174,11 +174,12 @@ OnigSyntaxType OnigSyntaxPerl = {
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
, ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT | ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL |
ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |
ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |
ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS |
ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME |
ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER |
ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT |
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |
@ -207,11 +208,12 @@ OnigSyntaxType OnigSyntaxPerl_NG = {
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
, ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT | ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL |
ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |
ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |
ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS |
ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME |
ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER |
ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT |
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP |

View File

@ -2,7 +2,7 @@
unicode.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -504,6 +504,281 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
#endif
#ifdef USE_UNICODE_WORD_BREAK
enum WB_TYPE {
WB_Any = 0,
WB_ALetter,
WB_CR,
WB_Double_Quote,
WB_Extend,
WB_ExtendNumLet,
WB_Format,
WB_Hebrew_Letter,
WB_Katakana,
WB_LF,
WB_MidLetter,
WB_MidNum,
WB_MidNumLet,
WB_Newline,
WB_Numeric,
WB_Regional_Indicator,
WB_Single_Quote,
WB_WSegSpace,
WB_ZWJ,
};
typedef struct {
OnigCodePoint start;
OnigCodePoint end;
enum WB_TYPE type;
} WB_RANGE_TYPE;
#include "unicode_wb_data.c"
static enum WB_TYPE
wb_get_type(OnigCodePoint code)
{
OnigCodePoint low, high, x;
enum WB_TYPE type;
for (low = 0, high = (OnigCodePoint )WB_RANGE_NUM; low < high; ) {
x = (low + high) >> 1;
if (code > WB_RANGES[x].end)
low = x + 1;
else
high = x;
}
type = (low < (OnigCodePoint )WB_RANGE_NUM &&
code >= WB_RANGES[low].start) ?
WB_RANGES[low].type : WB_Any;
return type;
}
#define IS_WB_IGNORE_TAIL(t) ((t) == WB_Extend || (t) == WB_Format || (t) == WB_ZWJ)
#define IS_WB_AHLetter(t) ((t) == WB_ALetter || (t) == WB_Hebrew_Letter)
#define IS_WB_MidNumLetQ(t) ((t) == WB_MidNumLet || (t) == WB_Single_Quote)
static int
wb_get_next_main_code(OnigEncoding enc, UChar* p, const UChar* end,
OnigCodePoint* rcode, enum WB_TYPE* rtype)
{
OnigCodePoint code;
enum WB_TYPE type;
while (TRUE) {
p += enclen(enc, p);
if (p >= end) break;
code = ONIGENC_MBC_TO_CODE(enc, p, end);
type = wb_get_type(code);
if (! IS_WB_IGNORE_TAIL(type)) {
*rcode = code;
*rtype = type;
return 1;
}
}
return 0;
}
extern int
onigenc_wb_is_break_position(OnigEncoding enc, UChar* p, UChar* prev,
const UChar* start, const UChar* end)
{
int r;
UChar* pp;
OnigCodePoint cfrom;
OnigCodePoint cfrom2;
OnigCodePoint cto;
OnigCodePoint cto2;
enum WB_TYPE from;
enum WB_TYPE from2;
enum WB_TYPE to;
enum WB_TYPE to2;
/* WB1: sot / Any */
if (p == start) return TRUE;
/* WB2: Any / eot */
if (p == end) return TRUE;
if (IS_NULL(prev)) {
prev = onigenc_get_prev_char_head(enc, start, p);
if (IS_NULL(prev)) return TRUE;
}
cfrom = ONIGENC_MBC_TO_CODE(enc, prev, end);
cto = ONIGENC_MBC_TO_CODE(enc, p, end);
from = wb_get_type(cfrom);
to = wb_get_type(cto);
/* short cut */
if (from == 0 && to == 0) goto WB999;
/* WB3: CR + LF */
if (from == WB_CR && to == WB_LF) return FALSE;
/* WB3a: (Newline|CR|LF) / */
if (from == WB_Newline || from == WB_CR || from == WB_LF) return TRUE;
/* WB3b: / (Newline|CR|LF) */
if (to == WB_Newline || to == WB_CR || to == WB_LF) return TRUE;
/* WB3c: ZWJ + {Extended_Pictographic} */
if (from == WB_ZWJ) {
if (onigenc_unicode_is_code_ctype(cto, PROP_INDEX_EXTENDEDPICTOGRAPHIC))
return FALSE;
}
/* WB3d: WSegSpace + WSegSpace */
if (from == WB_WSegSpace && to == WB_WSegSpace) return FALSE;
/* WB4: X (Extend|Format|ZWJ)* -> X */
if (IS_WB_IGNORE_TAIL(to)) return FALSE;
if (IS_WB_IGNORE_TAIL(from)) {
while ((pp = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {
prev = pp;
cfrom = ONIGENC_MBC_TO_CODE(enc, prev, end);
from = wb_get_type(cfrom);
if (! IS_WB_IGNORE_TAIL(from))
break;
}
}
if (IS_WB_AHLetter(from)) {
/* WB5: AHLetter + AHLetter */
if (IS_WB_AHLetter(to)) return FALSE;
/* WB6: AHLetter + (MidLetter | MidNumLetQ) AHLetter */
if (to == WB_MidLetter || IS_WB_MidNumLetQ(to)) {
r = wb_get_next_main_code(enc, p, end, &cto2, &to2);
if (r == 1) {
if (IS_WB_AHLetter(to2)) return FALSE;
}
}
}
/* WB7: AHLetter (MidLetter | MidNumLetQ) + AHLetter */
if (from == WB_MidLetter || IS_WB_MidNumLetQ(from)) {
if (IS_WB_AHLetter(to)) {
from2 = WB_Any;
while ((pp = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {
prev = pp;
cfrom2 = ONIGENC_MBC_TO_CODE(enc, prev, end);
from2 = wb_get_type(cfrom2);
if (! IS_WB_IGNORE_TAIL(from2))
break;
}
if (IS_WB_AHLetter(from2)) return FALSE;
}
}
if (from == WB_Hebrew_Letter) {
/* WB7a: Hebrew_Letter + Single_Quote */
if (to == WB_Single_Quote) return FALSE;
/* WB7b: Hebrew_Letter + Double_Quote Hebrew_Letter */
if (to == WB_Double_Quote) {
r = wb_get_next_main_code(enc, p, end, &cto2, &to2);
if (r == 1) {
if (to2 == WB_Hebrew_Letter) return FALSE;
}
}
}
/* WB7c: Hebrew_Letter Double_Quote + Hebrew_Letter */
if (from == WB_Double_Quote) {
if (to == WB_Hebrew_Letter) {
from2 = WB_Any;
while ((pp = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {
prev = pp;
cfrom2 = ONIGENC_MBC_TO_CODE(enc, prev, end);
from2 = wb_get_type(cfrom2);
if (! IS_WB_IGNORE_TAIL(from2))
break;
}
if (from2 == WB_Hebrew_Letter) return FALSE;
}
}
if (to == WB_Numeric) {
/* WB8: Numeric + Numeric */
if (from == WB_Numeric) return FALSE;
/* WB9: AHLetter + Numeric */
if (IS_WB_AHLetter(from)) return FALSE;
/* WB11: Numeric (MidNum | MidNumLetQ) + Numeric */
if (from == WB_MidNum || IS_WB_MidNumLetQ(from)) {
from2 = WB_Any;
while ((pp = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {
prev = pp;
cfrom2 = ONIGENC_MBC_TO_CODE(enc, prev, end);
from2 = wb_get_type(cfrom2);
if (! IS_WB_IGNORE_TAIL(from2))
break;
}
if (from2 == WB_Numeric) return FALSE;
}
}
if (from == WB_Numeric) {
/* WB10: Numeric + AHLetter */
if (IS_WB_AHLetter(to)) return FALSE;
/* WB12: Numeric + (MidNum | MidNumLetQ) Numeric */
if (to == WB_MidNum || IS_WB_MidNumLetQ(to)) {
r = wb_get_next_main_code(enc, p, end, &cto2, &to2);
if (r == 1) {
if (to2 == WB_Numeric) return FALSE;
}
}
}
/* WB13: Katakana + Katakana */
if (from == WB_Katakana && to == WB_Katakana) return FALSE;
/* WB13a: (AHLetter | Numeric | Katakana | ExtendNumLet) + ExtendNumLet */
if (IS_WB_AHLetter(from) || from == WB_Numeric || from == WB_Katakana
|| from == WB_ExtendNumLet) {
if (to == WB_ExtendNumLet) return FALSE;
}
/* WB13b: ExtendNumLet + (AHLetter | Numeric | Katakana) */
if (from == WB_ExtendNumLet) {
if (IS_WB_AHLetter(to) || to == WB_Numeric || to == WB_Katakana)
return FALSE;
}
/* WB15: sot (RI RI)* RI + RI */
/* WB16: [^RI] (RI RI)* RI + RI */
if (from == WB_Regional_Indicator && to == WB_Regional_Indicator) {
int n = 0;
while ((prev = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {
cfrom2 = ONIGENC_MBC_TO_CODE(enc, prev, end);
from2 = wb_get_type(cfrom2);
if (from2 != WB_Regional_Indicator)
break;
n++;
}
if ((n % 2) == 0) return FALSE;
}
WB999:
/* WB999: Any / Any */
return TRUE;
}
#endif /* USE_UNICODE_WORD_BREAK */
#ifdef USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER
enum EGCB_BREAK_TYPE {
@ -657,8 +932,7 @@ onigenc_egcb_is_break_position(OnigEncoding enc, UChar* p, UChar* prev,
#ifdef USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER
if (! ONIGENC_IS_UNICODE_ENCODING(enc)) {
if (from == 0x000d && to == 0x000a) return 0;
else return 1;
return from != 0x000d || to != 0x000a;
}
btype = unicode_egcb_is_break_2code(from, to);
@ -701,8 +975,7 @@ onigenc_egcb_is_break_position(OnigEncoding enc, UChar* p, UChar* prev,
return 1;
#else
if (from == 0x000d && to == 0x000a) return 0;
else return 1;
return from != 0x000d || to != 0x000a;
#endif /* USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER */
}
@ -729,6 +1002,7 @@ onig_unicode_define_user_property(const char* name, OnigCodePoint* ranges)
int len;
int c;
char* s;
UChar* uname;
if (UserDefinedPropertyNum >= USER_DEFINED_PROPERTY_MAX_NUM)
return ONIGERR_TOO_MANY_USER_DEFINED_OBJECTS;
@ -741,10 +1015,11 @@ onig_unicode_define_user_property(const char* name, OnigCodePoint* ranges)
if (s == 0)
return ONIGERR_MEMORY;
uname = (UChar* )name;
n = 0;
for (i = 0; i < len; i++) {
c = name[i];
if (c <= 0 || c >= 0x80) {
c = uname[i];
if (c < 0x20 || c >= 0x80) {
xfree(s);
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
}

View File

@ -25,7 +25,7 @@
* SUCH DAMAGE.
*/
#define GRAPHEME_BREAK_PROPERTY_VERSION 11_0_0
#define GRAPHEME_BREAK_PROPERTY_VERSION 12_1_0
/*
CR
@ -43,7 +43,7 @@ V
ZWJ
*/
static int EGCB_RANGE_NUM = 1321;
static int EGCB_RANGE_NUM = 1326;
static EGCB_RANGE_TYPE EGCB_RANGES[] = {
{0x000000, 0x000009, EGCB_Control },
{0x00000a, 0x00000a, EGCB_LF },
@ -197,8 +197,7 @@ static EGCB_RANGE_TYPE EGCB_RANGES[] = {
{0x000e47, 0x000e4e, EGCB_Extend },
{0x000eb1, 0x000eb1, EGCB_Extend },
{0x000eb3, 0x000eb3, EGCB_SpacingMark },
{0x000eb4, 0x000eb9, EGCB_Extend },
{0x000ebb, 0x000ebc, EGCB_Extend },
{0x000eb4, 0x000ebc, EGCB_Extend },
{0x000ec8, 0x000ecd, EGCB_Extend },
{0x000f18, 0x000f19, EGCB_Extend },
{0x000f35, 0x000f35, EGCB_Extend },
@ -271,9 +270,7 @@ static EGCB_RANGE_TYPE EGCB_RANGES[] = {
{0x001ab0, 0x001abe, EGCB_Extend },
{0x001b00, 0x001b03, EGCB_Extend },
{0x001b04, 0x001b04, EGCB_SpacingMark },
{0x001b34, 0x001b34, EGCB_Extend },
{0x001b35, 0x001b35, EGCB_SpacingMark },
{0x001b36, 0x001b3a, EGCB_Extend },
{0x001b34, 0x001b3a, EGCB_Extend },
{0x001b3b, 0x001b3b, EGCB_SpacingMark },
{0x001b3c, 0x001b3c, EGCB_Extend },
{0x001b3d, 0x001b41, EGCB_SpacingMark },
@ -305,7 +302,6 @@ static EGCB_RANGE_TYPE EGCB_RANGES[] = {
{0x001ce1, 0x001ce1, EGCB_SpacingMark },
{0x001ce2, 0x001ce8, EGCB_Extend },
{0x001ced, 0x001ced, EGCB_Extend },
{0x001cf2, 0x001cf3, EGCB_SpacingMark },
{0x001cf4, 0x001cf4, EGCB_Extend },
{0x001cf7, 0x001cf7, EGCB_SpacingMark },
{0x001cf8, 0x001cf9, EGCB_Extend },
@ -348,8 +344,8 @@ static EGCB_RANGE_TYPE EGCB_RANGES[] = {
{0x00a9b4, 0x00a9b5, EGCB_SpacingMark },
{0x00a9b6, 0x00a9b9, EGCB_Extend },
{0x00a9ba, 0x00a9bb, EGCB_SpacingMark },
{0x00a9bc, 0x00a9bc, EGCB_Extend },
{0x00a9bd, 0x00a9c0, EGCB_SpacingMark },
{0x00a9bc, 0x00a9bd, EGCB_Extend },
{0x00a9be, 0x00a9c0, EGCB_SpacingMark },
{0x00a9e5, 0x00a9e5, EGCB_Extend },
{0x00aa29, 0x00aa2e, EGCB_Extend },
{0x00aa2f, 0x00aa30, EGCB_SpacingMark },
@ -1177,7 +1173,6 @@ static EGCB_RANGE_TYPE EGCB_RANGES[] = {
{0x00d789, 0x00d7a3, EGCB_LVT },
{0x00d7b0, 0x00d7c6, EGCB_V },
{0x00d7cb, 0x00d7fb, EGCB_T },
{0x00d800, 0x00dfff, EGCB_Control },
{0x00fb1e, 0x00fb1e, EGCB_Extend },
{0x00fe00, 0x00fe0f, EGCB_Extend },
{0x00fe20, 0x00fe2f, EGCB_Extend },
@ -1291,6 +1286,12 @@ static EGCB_RANGE_TYPE EGCB_RANGES[] = {
{0x01182f, 0x011837, EGCB_Extend },
{0x011838, 0x011838, EGCB_SpacingMark },
{0x011839, 0x01183a, EGCB_Extend },
{0x0119d1, 0x0119d3, EGCB_SpacingMark },
{0x0119d4, 0x0119d7, EGCB_Extend },
{0x0119da, 0x0119db, EGCB_Extend },
{0x0119dc, 0x0119df, EGCB_SpacingMark },
{0x0119e0, 0x0119e0, EGCB_Extend },
{0x0119e4, 0x0119e4, EGCB_SpacingMark },
{0x011a01, 0x011a0a, EGCB_Extend },
{0x011a33, 0x011a38, EGCB_Extend },
{0x011a39, 0x011a39, EGCB_SpacingMark },
@ -1300,7 +1301,7 @@ static EGCB_RANGE_TYPE EGCB_RANGES[] = {
{0x011a51, 0x011a56, EGCB_Extend },
{0x011a57, 0x011a58, EGCB_SpacingMark },
{0x011a59, 0x011a5b, EGCB_Extend },
{0x011a86, 0x011a89, EGCB_Prepend },
{0x011a84, 0x011a89, EGCB_Prepend },
{0x011a8a, 0x011a96, EGCB_Extend },
{0x011a97, 0x011a97, EGCB_SpacingMark },
{0x011a98, 0x011a99, EGCB_Extend },
@ -1330,9 +1331,11 @@ static EGCB_RANGE_TYPE EGCB_RANGES[] = {
{0x011d97, 0x011d97, EGCB_Extend },
{0x011ef3, 0x011ef4, EGCB_Extend },
{0x011ef5, 0x011ef6, EGCB_SpacingMark },
{0x013430, 0x013438, EGCB_Control },
{0x016af0, 0x016af4, EGCB_Extend },
{0x016b30, 0x016b36, EGCB_Extend },
{0x016f51, 0x016f7e, EGCB_SpacingMark },
{0x016f4f, 0x016f4f, EGCB_Extend },
{0x016f51, 0x016f87, EGCB_SpacingMark },
{0x016f8f, 0x016f92, EGCB_Extend },
{0x01bc9d, 0x01bc9e, EGCB_Extend },
{0x01bca0, 0x01bca3, EGCB_Control },
@ -1357,6 +1360,8 @@ static EGCB_RANGE_TYPE EGCB_RANGES[] = {
{0x01e01b, 0x01e021, EGCB_Extend },
{0x01e023, 0x01e024, EGCB_Extend },
{0x01e026, 0x01e02a, EGCB_Extend },
{0x01e130, 0x01e136, EGCB_Extend },
{0x01e2ec, 0x01e2ef, EGCB_Extend },
{0x01e8d0, 0x01e8d6, EGCB_Extend },
{0x01e944, 0x01e94a, EGCB_Extend },
{0x01f1e6, 0x01f1ff, EGCB_Regional_Indicator },

View File

@ -1,7 +1,7 @@
/* This file was converted by gperf_fold_key_conv.py
from gperf output file. */
/* ANSI-C code produced by gperf version 3.1 */
/* Command-line: /usr/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N onigenc_unicode_fold2_key unicode_fold2_key.gperf */
/* Command-line: /usr/local/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N onigenc_unicode_fold2_key unicode_fold2_key.gperf */
/* Computed positions: -k'3,6' */
@ -225,5 +225,3 @@ onigenc_unicode_fold2_key(OnigCodePoint codes[])
}
return -1;
}

View File

@ -1,7 +1,7 @@
/* This file was converted by gperf_fold_key_conv.py
from gperf output file. */
/* ANSI-C code produced by gperf version 3.1 */
/* Command-line: /usr/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N onigenc_unicode_fold3_key unicode_fold3_key.gperf */
/* Command-line: /usr/local/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N onigenc_unicode_fold3_key unicode_fold3_key.gperf */
/* Computed positions: -k'3,6,9' */
@ -135,5 +135,3 @@ onigenc_unicode_fold3_key(OnigCodePoint codes[])
}
return -1;
}

File diff suppressed because it is too large Load Diff

View File

@ -2,7 +2,7 @@
utf16_le.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -95,7 +95,15 @@ static const int EncLen_UTF16[] = {
static int
utf16le_code_to_mbclen(OnigCodePoint code)
{
return (code > 0xffff ? 4 : 2);
if (code > 0xffff) {
if (code > 0x10ffff)
return ONIGERR_INVALID_CODE_POINT_VALUE;
else
return 4;
}
else {
return 2;
}
}
static int
@ -110,7 +118,16 @@ is_valid_mbc_string(const UChar* p, const UChar* end)
const UChar* end1 = end - 1;
while (p < end1) {
p += utf16le_mbc_enc_len(p);
int len = utf16le_mbc_enc_len(p);
if (len == 4) {
if (p + 3 < end && ! UTF16_IS_SURROGATE_SECOND(*(p + 3)))
return FALSE;
}
else
if (UTF16_IS_SURROGATE_SECOND(*(p + 1)))
return FALSE;
p += len;
}
if (p != end)
@ -184,7 +201,7 @@ utf16le_code_to_mbc(OnigCodePoint code, UChar *buf)
static int
utf16le_mbc_case_fold(OnigCaseFoldType flag,
const UChar** pp, const UChar* end, UChar* fold)
const UChar** pp, const UChar* end, UChar* fold)
{
const UChar* p = *pp;
@ -207,13 +224,13 @@ utf16le_mbc_case_fold(OnigCaseFoldType flag,
}
else
return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF16_LE, flag, pp, end,
fold);
fold);
}
#if 0
static int
utf16le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp,
const UChar* end)
const UChar* end)
{
const UChar* p = *pp;
@ -252,7 +269,8 @@ utf16le_left_adjust_char_head(const UChar* start, const UChar* s)
s--;
}
if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1)
if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1 &&
UTF16_IS_SURROGATE_FIRST(*(s-1)))
s -= 2;
return (UChar* )s;
@ -263,7 +281,7 @@ utf16le_get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF16_LE,
flag, p, end, items);
flag, p, end, items);
}
OnigEncodingType OnigEncodingUTF16_LE = {
@ -286,6 +304,6 @@ OnigEncodingType OnigEncodingUTF16_LE = {
init,
0, /* is_initialized */
is_valid_mbc_string,
ENC_FLAG_UNICODE,
ENC_FLAG_UNICODE|ENC_FLAG_SKIP_OFFSET_1,
0, 0
};