mirror of https://github.com/acidanthera/audk.git
MdeModulePkg RegularExpressionDxe: Update Oniguruma to 6.9.0
https://bugzilla.tianocore.org/show_bug.cgi?id=1200 Update Oniguruma to the latest version v6.9.0. Oniguruma https://github.com/kkos/oniguruma Verify VS2017, GCC5 build. Verify RegularExpressionProtocol GetInfo() and Match() function. Contributed-under: TianoCore Contribution Agreement 1.1 Signed-off-by: Dongao Guo <dongao.guo@intel.com> Reviewed-by: Liming Gao <liming.gao@intel.com> Reviewed-by: Cinnamon Shia <cinnamon.shia@hpe.com>
This commit is contained in:
parent
a364928195
commit
b602265d55
|
@ -1 +1 @@
|
|||
sndgk393 AT ybb DOT ne DOT jp (K.Kosako)
|
||||
<kkosako0@gmail.com> (K.Kosako)
|
||||
|
|
|
@ -1,28 +1,26 @@
|
|||
Oniguruma LICENSE
|
||||
-----------------
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
Copyright (c) 2002-2018 K.Kosako <kkosako0@gmail.com>
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGE.
|
||||
|
|
|
@ -1,12 +1,30 @@
|
|||
README 2007/05/31
|
||||
README 2018/04/05
|
||||
|
||||
Oniguruma ---- (C) K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
Oniguruma ---- (C) K.Kosako
|
||||
|
||||
http://www.geocities.jp/kosako3/oniguruma/
|
||||
https://github.com/kkos/oniguruma
|
||||
|
||||
Oniguruma is a regular expressions library.
|
||||
The characteristics of this library is that different character encoding
|
||||
for every regular expression object can be specified.
|
||||
FIXED Security Issues (in Oniguruma 6.3.0):
|
||||
CVE-2017-9224, CVE-2017-9225, CVE-2017-9226
|
||||
CVE-2017-9227, CVE-2017-9228, CVE-2017-9229
|
||||
|
||||
---
|
||||
Oniguruma is a modern and flexible regular expressions library. It
|
||||
encompasses features from different regular expression implementations
|
||||
that traditionally exist in different languages. It comes close to
|
||||
being a complete superset of all regular expression features found
|
||||
in other regular expression implementations.
|
||||
|
||||
Its features include:
|
||||
* Character encoding can be specified per regular expression object.
|
||||
* Several regular expression types are supported:
|
||||
* POSIX
|
||||
* Grep
|
||||
* GNU Regex
|
||||
* Perl
|
||||
* Java
|
||||
* Ruby
|
||||
* Emacs
|
||||
|
||||
Supported character encodings:
|
||||
|
||||
|
@ -30,18 +48,16 @@ Install
|
|||
|
||||
Case 1: Unix and Cygwin platform
|
||||
|
||||
1. ./configure
|
||||
2. make
|
||||
3. make install
|
||||
1. autoreconf -vfi (* case: configure script is not found.)
|
||||
|
||||
2. ./configure
|
||||
3. make
|
||||
4. make install
|
||||
|
||||
* uninstall
|
||||
|
||||
make uninstall
|
||||
|
||||
* test (ASCII/EUC-JP)
|
||||
|
||||
make atest
|
||||
|
||||
* configuration check
|
||||
|
||||
onig-config --cflags
|
||||
|
@ -51,18 +67,19 @@ Install
|
|||
|
||||
|
||||
|
||||
Case 2: Win32 platform (VC++)
|
||||
Case 2: Windows 64/32bit platform (Visual Studio)
|
||||
|
||||
1. copy win32\Makefile Makefile
|
||||
2. copy win32\config.h config.h
|
||||
3. nmake
|
||||
execute make_win64 or make_win32
|
||||
|
||||
onig_s.lib: static link library
|
||||
onig.dll: dynamic link library
|
||||
src/onig_s.lib: static link library
|
||||
src/onig.dll: dynamic link library
|
||||
|
||||
* test (ASCII/Shift_JIS)
|
||||
4. copy win32\testc.c testc.c
|
||||
5. nmake ctest
|
||||
1. cd src
|
||||
2. copy ..\windows\testc.c .
|
||||
3. nmake -f Makefile.windows ctest
|
||||
|
||||
(I have checked by Visual Studio Community 2015)
|
||||
|
||||
|
||||
|
||||
|
@ -103,6 +120,7 @@ Sample Programs
|
|||
sample/posix.c POSIX API sample.
|
||||
sample/sql.c example of the variable meta characters.
|
||||
(SQL-like pattern matching)
|
||||
sample/user_property.c example of user defined Unicode property.
|
||||
|
||||
Test Programs
|
||||
sample/syntax.c Perl, Java and ASIS syntax test.
|
||||
|
@ -136,54 +154,42 @@ Source Files
|
|||
regposerr.c POSIX error message function.
|
||||
regposix.c POSIX API functions.
|
||||
|
||||
enc/mktable.c character type table generator.
|
||||
enc/ascii.c ASCII encoding.
|
||||
enc/euc_jp.c EUC-JP encoding.
|
||||
enc/euc_tw.c EUC-TW encoding.
|
||||
enc/euc_kr.c EUC-KR, EUC-CN encoding.
|
||||
enc/sjis.c Shift_JIS encoding.
|
||||
enc/big5.c Big5 encoding.
|
||||
enc/gb18030.c GB18030 encoding.
|
||||
enc/koi8.c KOI8 encoding.
|
||||
enc/koi8_r.c KOI8-R encoding.
|
||||
enc/cp1251.c CP1251 encoding.
|
||||
enc/iso8859_1.c ISO-8859-1 encoding. (Latin-1)
|
||||
enc/iso8859_2.c ISO-8859-2 encoding. (Latin-2)
|
||||
enc/iso8859_3.c ISO-8859-3 encoding. (Latin-3)
|
||||
enc/iso8859_4.c ISO-8859-4 encoding. (Latin-4)
|
||||
enc/iso8859_5.c ISO-8859-5 encoding. (Cyrillic)
|
||||
enc/iso8859_6.c ISO-8859-6 encoding. (Arabic)
|
||||
enc/iso8859_7.c ISO-8859-7 encoding. (Greek)
|
||||
enc/iso8859_8.c ISO-8859-8 encoding. (Hebrew)
|
||||
enc/iso8859_9.c ISO-8859-9 encoding. (Latin-5 or Turkish)
|
||||
enc/iso8859_10.c ISO-8859-10 encoding. (Latin-6 or Nordic)
|
||||
enc/iso8859_11.c ISO-8859-11 encoding. (Thai)
|
||||
enc/iso8859_13.c ISO-8859-13 encoding. (Latin-7 or Baltic Rim)
|
||||
enc/iso8859_14.c ISO-8859-14 encoding. (Latin-8 or Celtic)
|
||||
enc/iso8859_15.c ISO-8859-15 encoding. (Latin-9 or West European with Euro)
|
||||
enc/iso8859_16.c ISO-8859-16 encoding.
|
||||
mktable.c character type table generator.
|
||||
ascii.c ASCII encoding.
|
||||
euc_jp.c EUC-JP encoding.
|
||||
euc_tw.c EUC-TW encoding.
|
||||
euc_kr.c EUC-KR, EUC-CN encoding.
|
||||
sjis.c Shift_JIS encoding.
|
||||
big5.c Big5 encoding.
|
||||
gb18030.c GB18030 encoding.
|
||||
koi8.c KOI8 encoding.
|
||||
koi8_r.c KOI8-R encoding.
|
||||
cp1251.c CP1251 encoding.
|
||||
iso8859_1.c ISO-8859-1 encoding. (Latin-1)
|
||||
iso8859_2.c ISO-8859-2 encoding. (Latin-2)
|
||||
iso8859_3.c ISO-8859-3 encoding. (Latin-3)
|
||||
iso8859_4.c ISO-8859-4 encoding. (Latin-4)
|
||||
iso8859_5.c ISO-8859-5 encoding. (Cyrillic)
|
||||
iso8859_6.c ISO-8859-6 encoding. (Arabic)
|
||||
iso8859_7.c ISO-8859-7 encoding. (Greek)
|
||||
iso8859_8.c ISO-8859-8 encoding. (Hebrew)
|
||||
iso8859_9.c ISO-8859-9 encoding. (Latin-5 or Turkish)
|
||||
iso8859_10.c ISO-8859-10 encoding. (Latin-6 or Nordic)
|
||||
iso8859_11.c ISO-8859-11 encoding. (Thai)
|
||||
iso8859_13.c ISO-8859-13 encoding. (Latin-7 or Baltic Rim)
|
||||
iso8859_14.c ISO-8859-14 encoding. (Latin-8 or Celtic)
|
||||
iso8859_15.c ISO-8859-15 encoding. (Latin-9 or West European with Euro)
|
||||
iso8859_16.c ISO-8859-16 encoding.
|
||||
(Latin-10 or South-Eastern European with Euro)
|
||||
enc/utf8.c UTF-8 encoding.
|
||||
enc/utf16_be.c UTF-16BE encoding.
|
||||
enc/utf16_le.c UTF-16LE encoding.
|
||||
enc/utf32_be.c UTF-32BE encoding.
|
||||
enc/utf32_le.c UTF-32LE encoding.
|
||||
enc/unicode.c Unicode information data.
|
||||
utf8.c UTF-8 encoding.
|
||||
utf16_be.c UTF-16BE encoding.
|
||||
utf16_le.c UTF-16LE encoding.
|
||||
utf32_be.c UTF-32BE encoding.
|
||||
utf32_le.c UTF-32LE encoding.
|
||||
unicode.c common codes of Unicode encoding.
|
||||
|
||||
win32/Makefile Makefile for Win32 (VC++)
|
||||
win32/config.h config.h for Win32
|
||||
|
||||
|
||||
|
||||
ToDo
|
||||
|
||||
? case fold flag: Katakana <-> Hiragana.
|
||||
? add ONIG_OPTION_NOTBOS/NOTEOS. (\A, \z, \Z)
|
||||
?? \X (== \PM\pM*)
|
||||
?? implement syntax behavior ONIG_SYN_CONTEXT_INDEP_ANCHORS.
|
||||
?? transmission stopper. (return ONIG_STOP from match_at())
|
||||
|
||||
and I'm thankful to Akinori MUSHA.
|
||||
|
||||
|
||||
Mail Address: K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
|
|
|
@ -0,0 +1,118 @@
|
|||
/**********************************************************************
|
||||
ascii.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regint.h" /* for USE_CALLOUT */
|
||||
|
||||
static int
|
||||
init(void)
|
||||
{
|
||||
#ifdef USE_CALLOUT
|
||||
|
||||
int id;
|
||||
OnigEncoding enc;
|
||||
char* name;
|
||||
unsigned int args[4];
|
||||
OnigValue opts[4];
|
||||
|
||||
enc = ONIG_ENCODING_ASCII;
|
||||
|
||||
name = "FAIL"; BC0_P(name, fail);
|
||||
name = "MISMATCH"; BC0_P(name, mismatch);
|
||||
|
||||
name = "MAX";
|
||||
args[0] = ONIG_TYPE_TAG | ONIG_TYPE_LONG;
|
||||
args[1] = ONIG_TYPE_CHAR;
|
||||
opts[0].c = 'X';
|
||||
BC_B_O(name, max, 2, args, 1, opts);
|
||||
|
||||
name = "ERROR";
|
||||
args[0] = ONIG_TYPE_LONG; opts[0].l = ONIG_ABORT;
|
||||
BC_P_O(name, error, 1, args, 1, opts);
|
||||
|
||||
name = "COUNT";
|
||||
args[0] = ONIG_TYPE_CHAR; opts[0].c = '>';
|
||||
BC_B_O(name, count, 1, args, 1, opts);
|
||||
|
||||
name = "TOTAL_COUNT";
|
||||
args[0] = ONIG_TYPE_CHAR; opts[0].c = '>';
|
||||
BC_B_O(name, total_count, 1, args, 1, opts);
|
||||
|
||||
name = "CMP";
|
||||
args[0] = ONIG_TYPE_TAG | ONIG_TYPE_LONG;
|
||||
args[1] = ONIG_TYPE_STRING;
|
||||
args[2] = ONIG_TYPE_TAG | ONIG_TYPE_LONG;
|
||||
BC_P(name, cmp, 3, args);
|
||||
|
||||
#endif /* USE_CALLOUT */
|
||||
|
||||
return ONIG_NORMAL;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int
|
||||
is_initialized(void)
|
||||
{
|
||||
/* Don't use this function */
|
||||
/* can't answer, because builtin callout entries removed in onig_end() */
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
if (code < 128)
|
||||
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
|
||||
else
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingASCII = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
"US-ASCII", /* name */
|
||||
1, /* max enc length */
|
||||
1, /* min enc length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
onigenc_ascii_mbc_case_fold,
|
||||
onigenc_ascii_apply_all_case_fold,
|
||||
onigenc_ascii_get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
ascii_is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match,
|
||||
init,
|
||||
0, /* is_initialized */
|
||||
onigenc_always_true_is_valid_mbc_string,
|
||||
ENC_FLAG_ASCII_COMPATIBLE,
|
||||
0, 0
|
||||
};
|
File diff suppressed because it is too large
Load Diff
|
@ -1,8 +1,8 @@
|
|||
/**********************************************************************
|
||||
ascii.c - Oniguruma (regular expression library)
|
||||
onig_init.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2016-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -27,32 +27,19 @@
|
|||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regenc.h"
|
||||
#include "regint.h"
|
||||
|
||||
static int
|
||||
ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
/* onig_init(): deprecated function */
|
||||
extern int
|
||||
onig_init(void)
|
||||
{
|
||||
if (code < 128)
|
||||
return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
|
||||
else
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
OnigEncodingType OnigEncodingASCII = {
|
||||
onigenc_single_byte_mbc_enc_len,
|
||||
"US-ASCII", /* name */
|
||||
1, /* max byte length */
|
||||
1, /* min byte length */
|
||||
onigenc_is_mbc_newline_0x0a,
|
||||
onigenc_single_byte_mbc_to_code,
|
||||
onigenc_single_byte_code_to_mbclen,
|
||||
onigenc_single_byte_code_to_mbc,
|
||||
onigenc_ascii_mbc_case_fold,
|
||||
onigenc_ascii_apply_all_case_fold,
|
||||
onigenc_ascii_get_case_fold_codes_by_str,
|
||||
onigenc_minimum_property_name_to_ctype,
|
||||
ascii_is_code_ctype,
|
||||
onigenc_not_support_get_ctype_code_range,
|
||||
onigenc_single_byte_left_adjust_char_head,
|
||||
onigenc_always_true_is_allowed_reverse_match
|
||||
#if 0
|
||||
OnigEncoding encs[] = {
|
||||
ONIG_ENCODING_UTF8
|
||||
};
|
||||
|
||||
return onig_initialize(encs, sizeof(encs)/sizeof(encs[0]));
|
||||
#else
|
||||
return onig_initialize(0, 0);
|
||||
#endif
|
||||
}
|
|
@ -35,10 +35,12 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define RE_MBCTYPE_ASCII 0
|
||||
#define RE_MBCTYPE_EUC 1
|
||||
#define RE_MBCTYPE_SJIS 2
|
||||
#define RE_MBCTYPE_UTF8 3
|
||||
enum {
|
||||
RE_MBCTYPE_ASCII = 0,
|
||||
RE_MBCTYPE_EUC = 1,
|
||||
RE_MBCTYPE_SJIS = 2,
|
||||
RE_MBCTYPE_UTF8 = 3
|
||||
};
|
||||
|
||||
/* GNU regex options */
|
||||
#ifndef RE_NREGS
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
onigposix.h - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -39,7 +39,7 @@ extern "C" {
|
|||
#define REG_NEWLINE (1<<1)
|
||||
#define REG_NOTBOL (1<<2)
|
||||
#define REG_NOTEOL (1<<3)
|
||||
#define REG_EXTENDED (1<<4) /* if not setted, Basic Onigular Expression */
|
||||
#define REG_EXTENDED (1<<4) /* if not set, Basic Onigular Expression */
|
||||
#define REG_NOSUB (1<<5)
|
||||
|
||||
/* POSIX error codes */
|
||||
|
@ -61,7 +61,7 @@ extern "C" {
|
|||
#define REG_EONIG_INTERNAL 14
|
||||
#define REG_EONIG_BADWC 15
|
||||
#define REG_EONIG_BADARG 16
|
||||
#define REG_EONIG_THREAD 17
|
||||
/* #define REG_EONIG_THREAD 17 */
|
||||
|
||||
/* character encodings (for reg_set_encoding()) */
|
||||
#define REG_POSIX_ENCODING_ASCII 0
|
||||
|
@ -97,7 +97,7 @@ typedef struct {
|
|||
|
||||
#ifndef ONIG_EXTERN
|
||||
#if defined(_WIN32) && !defined(__GNUC__)
|
||||
#if defined(EXPORT)
|
||||
#if defined(ONIGURUMA_EXPORT)
|
||||
#define ONIG_EXTERN extern __declspec(dllexport)
|
||||
#else
|
||||
#define ONIG_EXTERN extern __declspec(dllimport)
|
||||
|
@ -128,6 +128,7 @@ ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex;
|
|||
ONIG_EXTERN OnigSyntaxType OnigSyntaxJava;
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl;
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby;
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxOniguruma;
|
||||
|
||||
/* predefined syntaxes (see regsyntax.c) */
|
||||
#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic)
|
||||
|
@ -138,6 +139,7 @@ ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby;
|
|||
#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava)
|
||||
#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl)
|
||||
#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby)
|
||||
#define ONIG_SYNTAX_ONIGURUMA (&OnigSyntaxOniguruma)
|
||||
/* default syntax */
|
||||
#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax
|
||||
|
||||
|
@ -147,6 +149,7 @@ ONIG_EXTERN int onig_set_default_syntax P_((OnigSyntaxType* syntax));
|
|||
ONIG_EXTERN void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from));
|
||||
ONIG_EXTERN const char* onig_version P_((void));
|
||||
ONIG_EXTERN const char* onig_copyright P_((void));
|
||||
ONIG_EXTERN int onig_end P_((void));
|
||||
|
||||
#endif /* ONIGURUMA_H */
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
oniguruma.h - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2009 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -36,31 +36,11 @@ extern "C" {
|
|||
#endif
|
||||
|
||||
#define ONIGURUMA
|
||||
#define ONIGURUMA_VERSION_MAJOR 5
|
||||
#define ONIGURUMA_VERSION_MAJOR 6
|
||||
#define ONIGURUMA_VERSION_MINOR 9
|
||||
#define ONIGURUMA_VERSION_TEENY 6
|
||||
#define ONIGURUMA_VERSION_TEENY 0
|
||||
|
||||
#ifdef __cplusplus
|
||||
# ifndef HAVE_PROTOTYPES
|
||||
# define HAVE_PROTOTYPES 1
|
||||
# endif
|
||||
# ifndef HAVE_STDARG_PROTOTYPES
|
||||
# define HAVE_STDARG_PROTOTYPES 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* escape Mac OS X/Xcode 2.4/gcc 4.0.1 problem */
|
||||
#if defined(__APPLE__) && defined(__GNUC__) && __GNUC__ >= 4
|
||||
# ifndef HAVE_STDARG_PROTOTYPES
|
||||
# define HAVE_STDARG_PROTOTYPES 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_STDARG_H
|
||||
# ifndef HAVE_STDARG_PROTOTYPES
|
||||
# define HAVE_STDARG_PROTOTYPES 1
|
||||
# endif
|
||||
#endif
|
||||
#define ONIGURUMA_VERSION_INT 60900
|
||||
|
||||
#ifndef P_
|
||||
#if defined(__STDC__) || defined(_WIN32)
|
||||
|
@ -71,16 +51,12 @@ extern "C" {
|
|||
#endif
|
||||
|
||||
#ifndef PV_
|
||||
#ifdef HAVE_STDARG_PROTOTYPES
|
||||
# define PV_(args) args
|
||||
#else
|
||||
# define PV_(args) ()
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef ONIG_EXTERN
|
||||
#if defined(_WIN32) && !defined(__GNUC__)
|
||||
#if defined(EXPORT) || defined(RUBY_EXPORT)
|
||||
#if defined(ONIGURUMA_EXPORT)
|
||||
#define ONIG_EXTERN extern __declspec(dllexport)
|
||||
#else
|
||||
#define ONIG_EXTERN extern __declspec(dllimport)
|
||||
|
@ -98,17 +74,12 @@ extern "C" {
|
|||
#define UChar OnigUChar
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
# include <windows.h>
|
||||
typedef ULONG_PTR OnigCodePoint;
|
||||
#else
|
||||
typedef unsigned long OnigCodePoint;
|
||||
#endif
|
||||
typedef unsigned int OnigCodePoint;
|
||||
typedef unsigned char OnigUChar;
|
||||
typedef unsigned int OnigCtype;
|
||||
typedef unsigned int OnigDistance;
|
||||
typedef unsigned int OnigLen;
|
||||
|
||||
#define ONIG_INFINITE_DISTANCE ~((OnigDistance )0)
|
||||
#define ONIG_INFINITE_DISTANCE ~((OnigLen )0)
|
||||
|
||||
typedef unsigned int OnigCaseFoldType; /* case fold flag */
|
||||
|
||||
|
@ -166,6 +137,12 @@ typedef struct OnigEncodingTypeST {
|
|||
int (*get_ctype_code_range)(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[]);
|
||||
OnigUChar* (*left_adjust_char_head)(const OnigUChar* start, const OnigUChar* p);
|
||||
int (*is_allowed_reverse_match)(const OnigUChar* p, const OnigUChar* end);
|
||||
int (*init)(void);
|
||||
int (*is_initialized)(void);
|
||||
int (*is_valid_mbc_string)(const OnigUChar* s, const OnigUChar* end);
|
||||
unsigned int flag;
|
||||
OnigCodePoint sb_range;
|
||||
int index;
|
||||
} OnigEncodingType;
|
||||
|
||||
typedef OnigEncodingType* OnigEncoding;
|
||||
|
@ -243,21 +220,24 @@ ONIG_EXTERN OnigEncodingType OnigEncodingGB18030;
|
|||
/* 18: 6(max-byte) * 3(case-fold chars) */
|
||||
|
||||
/* character types */
|
||||
#define ONIGENC_CTYPE_NEWLINE 0
|
||||
#define ONIGENC_CTYPE_ALPHA 1
|
||||
#define ONIGENC_CTYPE_BLANK 2
|
||||
#define ONIGENC_CTYPE_CNTRL 3
|
||||
#define ONIGENC_CTYPE_DIGIT 4
|
||||
#define ONIGENC_CTYPE_GRAPH 5
|
||||
#define ONIGENC_CTYPE_LOWER 6
|
||||
#define ONIGENC_CTYPE_PRINT 7
|
||||
#define ONIGENC_CTYPE_PUNCT 8
|
||||
#define ONIGENC_CTYPE_SPACE 9
|
||||
#define ONIGENC_CTYPE_UPPER 10
|
||||
#define ONIGENC_CTYPE_XDIGIT 11
|
||||
#define ONIGENC_CTYPE_WORD 12
|
||||
#define ONIGENC_CTYPE_ALNUM 13 /* alpha || digit */
|
||||
#define ONIGENC_CTYPE_ASCII 14
|
||||
typedef enum {
|
||||
ONIGENC_CTYPE_NEWLINE = 0,
|
||||
ONIGENC_CTYPE_ALPHA = 1,
|
||||
ONIGENC_CTYPE_BLANK = 2,
|
||||
ONIGENC_CTYPE_CNTRL = 3,
|
||||
ONIGENC_CTYPE_DIGIT = 4,
|
||||
ONIGENC_CTYPE_GRAPH = 5,
|
||||
ONIGENC_CTYPE_LOWER = 6,
|
||||
ONIGENC_CTYPE_PRINT = 7,
|
||||
ONIGENC_CTYPE_PUNCT = 8,
|
||||
ONIGENC_CTYPE_SPACE = 9,
|
||||
ONIGENC_CTYPE_UPPER = 10,
|
||||
ONIGENC_CTYPE_XDIGIT = 11,
|
||||
ONIGENC_CTYPE_WORD = 12,
|
||||
ONIGENC_CTYPE_ALNUM = 13, /* alpha || digit */
|
||||
ONIGENC_CTYPE_ASCII = 14
|
||||
} OnigEncCtype;
|
||||
|
||||
#define ONIGENC_MAX_STD_CTYPE ONIGENC_CTYPE_ASCII
|
||||
|
||||
|
||||
|
@ -270,7 +250,7 @@ ONIG_EXTERN OnigEncodingType OnigEncodingGB18030;
|
|||
#define ONIGENC_IS_CODE_ASCII(code) ((code) < 128)
|
||||
#define ONIGENC_IS_MBC_WORD(enc,s,end) \
|
||||
ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE(enc,s,end))
|
||||
|
||||
#define ONIGENC_IS_MBC_WORD_ASCII(enc,s,end) onigenc_is_mbc_word_ascii(enc,s,end)
|
||||
|
||||
#define ONIGENC_NAME(enc) ((enc)->name)
|
||||
|
||||
|
@ -280,6 +260,8 @@ ONIG_EXTERN OnigEncodingType OnigEncodingGB18030;
|
|||
(enc)->is_allowed_reverse_match(s,end)
|
||||
#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \
|
||||
(enc)->left_adjust_char_head(start, s)
|
||||
#define ONIGENC_IS_VALID_MBC_STRING(enc,s,end) \
|
||||
(enc)->is_valid_mbc_string(s,end)
|
||||
#define ONIGENC_APPLY_ALL_CASE_FOLD(enc,case_fold_flag,f,arg) \
|
||||
(enc)->apply_all_case_fold(case_fold_flag,f,arg)
|
||||
#define ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc,case_fold_flag,p,end,acs) \
|
||||
|
@ -340,6 +322,8 @@ OnigUChar* onigenc_step_back P_((OnigEncoding enc, const OnigUChar* start, const
|
|||
ONIG_EXTERN
|
||||
int onigenc_init P_((void));
|
||||
ONIG_EXTERN
|
||||
int onig_initialize_encoding P_((OnigEncoding enc));
|
||||
ONIG_EXTERN
|
||||
int onigenc_set_default_encoding P_((OnigEncoding enc));
|
||||
ONIG_EXTERN
|
||||
OnigEncoding onigenc_get_default_encoding P_((void));
|
||||
|
@ -359,13 +343,17 @@ ONIG_EXTERN
|
|||
int onigenc_strlen_null P_((OnigEncoding enc, const OnigUChar* p));
|
||||
ONIG_EXTERN
|
||||
int onigenc_str_bytelen_null P_((OnigEncoding enc, const OnigUChar* p));
|
||||
|
||||
ONIG_EXTERN
|
||||
int onigenc_is_valid_mbc_string P_((OnigEncoding enc, const OnigUChar* s, const OnigUChar* end));
|
||||
ONIG_EXTERN
|
||||
OnigUChar* onigenc_strdup P_((OnigEncoding enc, const OnigUChar* s, const OnigUChar* end));
|
||||
|
||||
|
||||
/* PART: regular expression */
|
||||
|
||||
/* config parameters */
|
||||
#define ONIG_NREGION 10
|
||||
#define ONIG_MAX_CAPTURE_NUM 2147483647 /* 2**31 - 1 */
|
||||
#define ONIG_MAX_BACKREF_NUM 1000
|
||||
#define ONIG_MAX_REPEAT_NUM 100000
|
||||
#define ONIG_MAX_MULTI_BYTE_RANGES_NUM 10000
|
||||
|
@ -378,6 +366,7 @@ typedef unsigned int OnigOptionType;
|
|||
|
||||
/* options */
|
||||
#define ONIG_OPTION_NONE 0U
|
||||
/* options (compile time) */
|
||||
#define ONIG_OPTION_IGNORECASE 1U
|
||||
#define ONIG_OPTION_EXTEND (ONIG_OPTION_IGNORECASE << 1)
|
||||
#define ONIG_OPTION_MULTILINE (ONIG_OPTION_EXTEND << 1)
|
||||
|
@ -391,7 +380,15 @@ typedef unsigned int OnigOptionType;
|
|||
#define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1)
|
||||
#define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1)
|
||||
#define ONIG_OPTION_POSIX_REGION (ONIG_OPTION_NOTEOL << 1)
|
||||
#define ONIG_OPTION_MAXBIT ONIG_OPTION_POSIX_REGION /* limit */
|
||||
#define ONIG_OPTION_CHECK_VALIDITY_OF_STRING (ONIG_OPTION_POSIX_REGION << 1)
|
||||
/* #define ONIG_OPTION_CRLF_AS_LINE_SEPARATOR (ONIG_OPTION_CHECK_VALIDITY_OF_STRING << 1) */
|
||||
/* options (compile time) */
|
||||
#define ONIG_OPTION_WORD_IS_ASCII (ONIG_OPTION_CHECK_VALIDITY_OF_STRING << 4)
|
||||
#define ONIG_OPTION_DIGIT_IS_ASCII (ONIG_OPTION_WORD_IS_ASCII << 1)
|
||||
#define ONIG_OPTION_SPACE_IS_ASCII (ONIG_OPTION_DIGIT_IS_ASCII << 1)
|
||||
#define ONIG_OPTION_POSIX_IS_ASCII (ONIG_OPTION_SPACE_IS_ASCII << 1)
|
||||
|
||||
#define ONIG_OPTION_MAXBIT ONIG_OPTION_POSIX_IS_ASCII /* limit */
|
||||
|
||||
#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt))
|
||||
#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt))
|
||||
|
@ -416,6 +413,7 @@ ONIG_EXTERN OnigSyntaxType OnigSyntaxJava;
|
|||
ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl;
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl_NG;
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby;
|
||||
ONIG_EXTERN OnigSyntaxType OnigSyntaxOniguruma;
|
||||
|
||||
/* predefined syntaxes (see regsyntax.c) */
|
||||
#define ONIG_SYNTAX_ASIS (&OnigSyntaxASIS)
|
||||
|
@ -428,6 +426,7 @@ ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby;
|
|||
#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl)
|
||||
#define ONIG_SYNTAX_PERL_NG (&OnigSyntaxPerl_NG)
|
||||
#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby)
|
||||
#define ONIG_SYNTAX_ONIGURUMA (&OnigSyntaxOniguruma)
|
||||
|
||||
/* default syntax */
|
||||
ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
|
||||
|
@ -465,6 +464,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
|
|||
#define ONIG_SYN_OP_ESC_OCTAL3 (1U<<28) /* \OOO */
|
||||
#define ONIG_SYN_OP_ESC_X_HEX2 (1U<<29) /* \xHH */
|
||||
#define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1U<<30) /* \x{7HHHHHHH} */
|
||||
#define ONIG_SYN_OP_ESC_O_BRACE_OCTAL (1U<<31) /* \o{1OOOOOOOOOO} */
|
||||
|
||||
#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1U<<0) /* \Q...\E */
|
||||
#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1U<<1) /* (?...) */
|
||||
|
@ -487,6 +487,15 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
|
|||
/* #define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1U<<18) */
|
||||
#define ONIG_SYN_OP2_ESC_H_XDIGIT (1U<<19) /* \h, \H */
|
||||
#define ONIG_SYN_OP2_INEFFECTIVE_ESCAPE (1U<<20) /* \ */
|
||||
#define ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE (1U<<21) /* (?(n)) (?(...)...|...) */
|
||||
#define ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP (1U<<22) /* \K */
|
||||
#define ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE (1U<<23) /* \R \r\n else [\x0a-\x0d] */
|
||||
#define ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT (1U<<24) /* \N (?-m:.), \O (?m:.) */
|
||||
#define ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP (1U<<25) /* (?~...) */
|
||||
#define ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER (1U<<26) /* \X \y \Y */
|
||||
#define ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL (1U<<27) /* (?R), (?&name)... */
|
||||
#define ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS (1U<<28) /* (?{...}) (?{{...}}) */
|
||||
#define ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME (1U<<29) /* (*name) (*name{a,..}) */
|
||||
|
||||
/* syntax (behavior) */
|
||||
#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1U<<31) /* not implemented */
|
||||
|
@ -526,6 +535,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
|
|||
#define ONIG_NORMAL 0
|
||||
#define ONIG_MISMATCH -1
|
||||
#define ONIG_NO_SUPPORT_CONFIG -2
|
||||
#define ONIG_ABORT -3
|
||||
|
||||
/* internal error */
|
||||
#define ONIGERR_MEMORY -5
|
||||
|
@ -535,8 +545,11 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
|
|||
#define ONIGERR_UNDEFINED_BYTECODE -13
|
||||
#define ONIGERR_UNEXPECTED_BYTECODE -14
|
||||
#define ONIGERR_MATCH_STACK_LIMIT_OVER -15
|
||||
#define ONIGERR_PARSE_DEPTH_LIMIT_OVER -16
|
||||
#define ONIGERR_RETRY_LIMIT_IN_MATCH_OVER -17
|
||||
#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED -21
|
||||
#define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22
|
||||
#define ONIGERR_FAIL_TO_INITIALIZE -23
|
||||
/* general error */
|
||||
#define ONIGERR_INVALID_ARGUMENT -30
|
||||
/* syntax error */
|
||||
|
@ -573,6 +586,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
|
|||
#define ONIGERR_TOO_BIG_BACKREF_NUMBER -207
|
||||
#define ONIGERR_INVALID_BACKREF -208
|
||||
#define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED -209
|
||||
#define ONIGERR_TOO_MANY_CAPTURES -210
|
||||
#define ONIGERR_TOO_LONG_WIDE_CHAR_VALUE -212
|
||||
#define ONIGERR_EMPTY_GROUP_NAME -214
|
||||
#define ONIGERR_INVALID_GROUP_NAME -215
|
||||
|
@ -584,17 +598,29 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
|
|||
#define ONIGERR_NEVER_ENDING_RECURSION -221
|
||||
#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222
|
||||
#define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223
|
||||
#define ONIGERR_INVALID_IF_ELSE_SYNTAX -224
|
||||
#define ONIGERR_INVALID_ABSENT_GROUP_PATTERN -225
|
||||
#define ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN -226
|
||||
#define ONIGERR_INVALID_CALLOUT_PATTERN -227
|
||||
#define ONIGERR_INVALID_CALLOUT_NAME -228
|
||||
#define ONIGERR_UNDEFINED_CALLOUT_NAME -229
|
||||
#define ONIGERR_INVALID_CALLOUT_BODY -230
|
||||
#define ONIGERR_INVALID_CALLOUT_TAG_NAME -231
|
||||
#define ONIGERR_INVALID_CALLOUT_ARG -232
|
||||
#define ONIGERR_INVALID_CODE_POINT_VALUE -400
|
||||
#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400
|
||||
#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401
|
||||
#define ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION -402
|
||||
#define ONIGERR_INVALID_COMBINATION_OF_OPTIONS -403
|
||||
#define ONIGERR_TOO_MANY_USER_DEFINED_OBJECTS -404
|
||||
#define ONIGERR_TOO_LONG_PROPERTY_NAME -405
|
||||
#define ONIGERR_LIBRARY_IS_NOT_INITIALIZED -500
|
||||
|
||||
/* errors related to thread */
|
||||
#define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001
|
||||
/* #define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001 */
|
||||
|
||||
|
||||
/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */
|
||||
/* must be smaller than MEM_STATUS_BITS_NUM (unsigned int * 8) */
|
||||
#define ONIG_MAX_CAPTURE_HISTORY_GROUP 31
|
||||
#define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \
|
||||
((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i])
|
||||
|
@ -646,59 +672,8 @@ extern void onig_null_warn P_((const char* s));
|
|||
|
||||
#define ONIG_CHAR_TABLE_SIZE 256
|
||||
|
||||
/* regex_t state */
|
||||
#define ONIG_STATE_NORMAL 0
|
||||
#define ONIG_STATE_SEARCHING 1
|
||||
#define ONIG_STATE_COMPILING -1
|
||||
#define ONIG_STATE_MODIFY -2
|
||||
|
||||
#define ONIG_STATE(reg) \
|
||||
((reg)->state > 0 ? ONIG_STATE_SEARCHING : (reg)->state)
|
||||
|
||||
typedef struct re_pattern_buffer {
|
||||
/* common members of BBuf(bytes-buffer) */
|
||||
unsigned char* p; /* compiled pattern */
|
||||
unsigned int used; /* used space for p */
|
||||
unsigned int alloc; /* allocated space for p */
|
||||
|
||||
int state; /* normal, searching, compiling */
|
||||
int num_mem; /* used memory(...) num counted from 1 */
|
||||
int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
|
||||
int num_null_check; /* OP_NULL_CHECK_START/END id counter */
|
||||
int num_comb_exp_check; /* combination explosion check */
|
||||
int num_call; /* number of subexp call */
|
||||
unsigned int capture_history; /* (?@...) flag (1-31) */
|
||||
unsigned int bt_mem_start; /* need backtrack flag */
|
||||
unsigned int bt_mem_end; /* need backtrack flag */
|
||||
int stack_pop_level;
|
||||
int repeat_range_alloc;
|
||||
OnigRepeatRange* repeat_range;
|
||||
|
||||
OnigEncoding enc;
|
||||
OnigOptionType options;
|
||||
OnigSyntaxType* syntax;
|
||||
OnigCaseFoldType case_fold_flag;
|
||||
void* name_table;
|
||||
|
||||
/* optimization info (string search, char-map and anchors) */
|
||||
int optimize; /* optimize flag */
|
||||
int threshold_len; /* search str-length for apply optimize */
|
||||
int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
|
||||
OnigDistance anchor_dmin; /* (SEMI_)END_BUF anchor distance */
|
||||
OnigDistance anchor_dmax; /* (SEMI_)END_BUF anchor distance */
|
||||
int sub_anchor; /* start-anchor for exact or map */
|
||||
unsigned char *exact;
|
||||
unsigned char *exact_end;
|
||||
unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */
|
||||
int *int_map; /* BM skip for exact_len > 255 */
|
||||
int *int_map_backward; /* BM skip for backward search */
|
||||
OnigDistance dmin; /* min-distance of exact or map */
|
||||
OnigDistance dmax; /* max-distance of exact or map */
|
||||
|
||||
/* regex_t link chain */
|
||||
struct re_pattern_buffer* chain; /* escape compile-conflict */
|
||||
} OnigRegexType;
|
||||
|
||||
struct re_pattern_buffer;
|
||||
typedef struct re_pattern_buffer OnigRegexType;
|
||||
typedef OnigRegexType* OnigRegex;
|
||||
|
||||
#ifndef ONIG_ESCAPE_REGEX_T_COLLISION
|
||||
|
@ -715,7 +690,75 @@ typedef struct {
|
|||
OnigCaseFoldType case_fold_flag;
|
||||
} OnigCompileInfo;
|
||||
|
||||
|
||||
/* types for callout */
|
||||
typedef enum {
|
||||
ONIG_CALLOUT_IN_PROGRESS = 1, /* 1<<0 */
|
||||
ONIG_CALLOUT_IN_RETRACTION = 2 /* 1<<1 */
|
||||
} OnigCalloutIn;
|
||||
|
||||
#define ONIG_CALLOUT_IN_BOTH (ONIG_CALLOUT_IN_PROGRESS | ONIG_CALLOUT_IN_RETRACTION)
|
||||
|
||||
typedef enum {
|
||||
ONIG_CALLOUT_OF_CONTENTS = 0,
|
||||
ONIG_CALLOUT_OF_NAME = 1
|
||||
} OnigCalloutOf;
|
||||
|
||||
typedef enum {
|
||||
ONIG_CALLOUT_TYPE_SINGLE = 0,
|
||||
ONIG_CALLOUT_TYPE_START_CALL = 1,
|
||||
ONIG_CALLOUT_TYPE_BOTH_CALL = 2,
|
||||
ONIG_CALLOUT_TYPE_START_MARK_END_CALL = 3,
|
||||
} OnigCalloutType;
|
||||
|
||||
|
||||
#define ONIG_NON_NAME_ID -1
|
||||
#define ONIG_NON_CALLOUT_NUM 0
|
||||
|
||||
#define ONIG_CALLOUT_MAX_ARGS_NUM 4
|
||||
#define ONIG_CALLOUT_DATA_SLOT_NUM 5
|
||||
|
||||
struct OnigCalloutArgsStruct;
|
||||
typedef struct OnigCalloutArgsStruct OnigCalloutArgs;
|
||||
|
||||
typedef int (*OnigCalloutFunc)(OnigCalloutArgs* args, void* user_data);
|
||||
|
||||
/* callout function return values (less than -1: error code) */
|
||||
typedef enum {
|
||||
ONIG_CALLOUT_FAIL = 1,
|
||||
ONIG_CALLOUT_SUCCESS = 0
|
||||
} OnigCalloutResult;
|
||||
|
||||
typedef enum {
|
||||
ONIG_TYPE_VOID = 0,
|
||||
ONIG_TYPE_LONG = 1<<0,
|
||||
ONIG_TYPE_CHAR = 1<<1,
|
||||
ONIG_TYPE_STRING = 1<<2,
|
||||
ONIG_TYPE_POINTER = 1<<3,
|
||||
ONIG_TYPE_TAG = 1<<4,
|
||||
} OnigType;
|
||||
|
||||
typedef union {
|
||||
long l;
|
||||
OnigCodePoint c;
|
||||
struct {
|
||||
OnigUChar* start;
|
||||
OnigUChar* end;
|
||||
} s;
|
||||
void* p;
|
||||
int tag; /* tag -> callout_num */
|
||||
} OnigValue;
|
||||
|
||||
|
||||
struct OnigMatchParamStruct;
|
||||
typedef struct OnigMatchParamStruct OnigMatchParam;
|
||||
|
||||
|
||||
/* Oniguruma Native API */
|
||||
|
||||
ONIG_EXTERN
|
||||
int onig_initialize P_((OnigEncoding encodings[], int number_of_encodings));
|
||||
/* onig_init(): deprecated function. Use onig_initialize(). */
|
||||
ONIG_EXTERN
|
||||
int onig_init P_((void));
|
||||
ONIG_EXTERN
|
||||
|
@ -727,7 +770,7 @@ void onig_set_verb_warn_func P_((OnigWarnFunc f));
|
|||
ONIG_EXTERN
|
||||
int onig_new P_((OnigRegex*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
|
||||
ONIG_EXTERN
|
||||
int onig_reg_init P_((regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, OnigSyntaxType* syntax));
|
||||
int onig_reg_init P_((OnigRegex reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, OnigSyntaxType* syntax));
|
||||
int onig_new_without_alloc P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
|
||||
ONIG_EXTERN
|
||||
int onig_new_deluxe P_((OnigRegex* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
|
||||
|
@ -736,14 +779,16 @@ void onig_free P_((OnigRegex));
|
|||
ONIG_EXTERN
|
||||
void onig_free_body P_((OnigRegex));
|
||||
ONIG_EXTERN
|
||||
int onig_recompile P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
|
||||
ONIG_EXTERN
|
||||
int onig_recompile_deluxe P_((OnigRegex reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
|
||||
int onig_scan(OnigRegex reg, const OnigUChar* str, const OnigUChar* end, OnigRegion* region, OnigOptionType option, int (*scan_callback)(int, int, OnigRegion*, void*), void* callback_arg);
|
||||
ONIG_EXTERN
|
||||
int onig_search P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option));
|
||||
ONIG_EXTERN
|
||||
int onig_search_with_param P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option, OnigMatchParam* mp));
|
||||
ONIG_EXTERN
|
||||
int onig_match P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option));
|
||||
ONIG_EXTERN
|
||||
int onig_match_with_param P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option, OnigMatchParam* mp));
|
||||
ONIG_EXTERN
|
||||
OnigRegion* onig_region_new P_((void));
|
||||
ONIG_EXTERN
|
||||
void onig_region_init P_((OnigRegion* region));
|
||||
|
@ -816,12 +861,141 @@ unsigned int onig_get_match_stack_limit_size P_((void));
|
|||
ONIG_EXTERN
|
||||
int onig_set_match_stack_limit_size P_((unsigned int size));
|
||||
ONIG_EXTERN
|
||||
unsigned long onig_get_retry_limit_in_match P_((void));
|
||||
ONIG_EXTERN
|
||||
int onig_set_retry_limit_in_match P_((unsigned long n));
|
||||
ONIG_EXTERN
|
||||
unsigned int onig_get_parse_depth_limit P_((void));
|
||||
ONIG_EXTERN
|
||||
int onig_set_capture_num_limit P_((int num));
|
||||
ONIG_EXTERN
|
||||
int onig_set_parse_depth_limit P_((unsigned int depth));
|
||||
ONIG_EXTERN
|
||||
int onig_unicode_define_user_property P_((const char* name, OnigCodePoint* ranges));
|
||||
ONIG_EXTERN
|
||||
int onig_end P_((void));
|
||||
ONIG_EXTERN
|
||||
const char* onig_version P_((void));
|
||||
ONIG_EXTERN
|
||||
const char* onig_copyright P_((void));
|
||||
|
||||
/* for OnigMatchParam */
|
||||
ONIG_EXTERN
|
||||
OnigMatchParam* onig_new_match_param P_((void));
|
||||
ONIG_EXTERN
|
||||
void onig_free_match_param P_((OnigMatchParam* p));
|
||||
ONIG_EXTERN
|
||||
void onig_free_match_param_content P_((OnigMatchParam* p));
|
||||
ONIG_EXTERN
|
||||
int onig_initialize_match_param P_((OnigMatchParam* mp));
|
||||
ONIG_EXTERN
|
||||
int onig_set_match_stack_limit_size_of_match_param P_((OnigMatchParam* param, unsigned int limit));
|
||||
ONIG_EXTERN
|
||||
int onig_set_retry_limit_in_match_of_match_param P_((OnigMatchParam* param, unsigned long limit));
|
||||
ONIG_EXTERN
|
||||
int onig_set_progress_callout_of_match_param P_((OnigMatchParam* param, OnigCalloutFunc f));
|
||||
ONIG_EXTERN
|
||||
int onig_set_retraction_callout_of_match_param P_((OnigMatchParam* param, OnigCalloutFunc f));
|
||||
ONIG_EXTERN
|
||||
int onig_set_callout_user_data_of_match_param P_((OnigMatchParam* param, void* user_data));
|
||||
|
||||
/* for callout functions */
|
||||
ONIG_EXTERN
|
||||
OnigCalloutFunc onig_get_progress_callout P_((void));
|
||||
ONIG_EXTERN
|
||||
int onig_set_progress_callout P_((OnigCalloutFunc f));
|
||||
ONIG_EXTERN
|
||||
OnigCalloutFunc onig_get_retraction_callout P_((void));
|
||||
ONIG_EXTERN
|
||||
int onig_set_retraction_callout P_((OnigCalloutFunc f));
|
||||
ONIG_EXTERN
|
||||
int onig_set_callout_of_name P_((OnigEncoding enc, OnigCalloutType type, OnigUChar* name, OnigUChar* name_end, int callout_in, OnigCalloutFunc callout, OnigCalloutFunc end_callout, int arg_num, unsigned int arg_types[], int optional_arg_num, OnigValue opt_defaults[]));
|
||||
ONIG_EXTERN
|
||||
OnigUChar* onig_get_callout_name_by_name_id P_((int id));
|
||||
ONIG_EXTERN
|
||||
int onig_get_callout_num_by_tag P_((OnigRegex reg, const OnigUChar* tag, const OnigUChar* tag_end));
|
||||
ONIG_EXTERN
|
||||
int onig_get_callout_data_by_tag P_((OnigRegex reg, OnigMatchParam* mp, const OnigUChar* tag, const OnigUChar* tag_end, int slot, OnigType* type, OnigValue* val));
|
||||
ONIG_EXTERN
|
||||
int onig_set_callout_data_by_tag P_((OnigRegex reg, OnigMatchParam* mp, const OnigUChar* tag, const OnigUChar* tag_end, int slot, OnigType type, OnigValue* val));
|
||||
|
||||
/* used in callout functions */
|
||||
ONIG_EXTERN
|
||||
int onig_get_callout_num_by_callout_args P_((OnigCalloutArgs* args));
|
||||
ONIG_EXTERN
|
||||
OnigCalloutIn onig_get_callout_in_by_callout_args P_((OnigCalloutArgs* args));
|
||||
ONIG_EXTERN
|
||||
int onig_get_name_id_by_callout_args P_((OnigCalloutArgs* args));
|
||||
ONIG_EXTERN
|
||||
const OnigUChar* onig_get_contents_by_callout_args P_((OnigCalloutArgs* args));
|
||||
ONIG_EXTERN
|
||||
const OnigUChar* onig_get_contents_end_by_callout_args P_((OnigCalloutArgs* args));
|
||||
ONIG_EXTERN
|
||||
int onig_get_args_num_by_callout_args P_((OnigCalloutArgs* args));
|
||||
ONIG_EXTERN
|
||||
int onig_get_passed_args_num_by_callout_args P_((OnigCalloutArgs* args));
|
||||
ONIG_EXTERN
|
||||
int onig_get_arg_by_callout_args P_((OnigCalloutArgs* args, int index, OnigType* type, OnigValue* val));
|
||||
ONIG_EXTERN
|
||||
const OnigUChar* onig_get_string_by_callout_args P_((OnigCalloutArgs* args));
|
||||
ONIG_EXTERN
|
||||
const OnigUChar* onig_get_string_end_by_callout_args P_((OnigCalloutArgs* args));
|
||||
ONIG_EXTERN
|
||||
const OnigUChar* onig_get_start_by_callout_args P_((OnigCalloutArgs* args));
|
||||
ONIG_EXTERN
|
||||
const OnigUChar* onig_get_right_range_by_callout_args P_((OnigCalloutArgs* args));
|
||||
ONIG_EXTERN
|
||||
const OnigUChar* onig_get_current_by_callout_args P_((OnigCalloutArgs* args));
|
||||
ONIG_EXTERN
|
||||
OnigRegex onig_get_regex_by_callout_args P_((OnigCalloutArgs* args));
|
||||
ONIG_EXTERN
|
||||
unsigned long onig_get_retry_counter_by_callout_args P_((OnigCalloutArgs* args));
|
||||
ONIG_EXTERN
|
||||
int onig_callout_tag_is_exist_at_callout_num P_((OnigRegex reg, int callout_num));
|
||||
ONIG_EXTERN
|
||||
const OnigUChar* onig_get_callout_tag_start P_((OnigRegex reg, int callout_num));
|
||||
ONIG_EXTERN
|
||||
const OnigUChar* onig_get_callout_tag_end P_((OnigRegex reg, int callout_num));
|
||||
ONIG_EXTERN
|
||||
int onig_get_callout_data_dont_clear_old P_((OnigRegex reg, OnigMatchParam* mp, int callout_num, int slot, OnigType* type, OnigValue* val));
|
||||
ONIG_EXTERN
|
||||
int onig_get_callout_data_by_callout_args_self_dont_clear_old P_((OnigCalloutArgs* args, int slot, OnigType* type, OnigValue* val));
|
||||
ONIG_EXTERN
|
||||
int onig_get_callout_data P_((OnigRegex reg, OnigMatchParam* mp, int callout_num, int slot, OnigType* type, OnigValue* val));
|
||||
ONIG_EXTERN
|
||||
int onig_get_callout_data_by_callout_args P_((OnigCalloutArgs* args, int callout_num, int slot, OnigType* type, OnigValue* val));
|
||||
ONIG_EXTERN
|
||||
int onig_get_callout_data_by_callout_args_self P_((OnigCalloutArgs* args, int slot, OnigType* type, OnigValue* val));
|
||||
ONIG_EXTERN
|
||||
int onig_set_callout_data P_((OnigRegex reg, OnigMatchParam* mp, int callout_num, int slot, OnigType type, OnigValue* val));
|
||||
ONIG_EXTERN
|
||||
int onig_set_callout_data_by_callout_args P_((OnigCalloutArgs* args, int callout_num, int slot, OnigType type, OnigValue* val));
|
||||
ONIG_EXTERN
|
||||
int onig_set_callout_data_by_callout_args_self P_((OnigCalloutArgs* args, int slot, OnigType type, OnigValue* val));
|
||||
ONIG_EXTERN
|
||||
int onig_get_capture_range_in_callout P_((OnigCalloutArgs* args, int mem_num, int* begin, int* end));
|
||||
ONIG_EXTERN
|
||||
int onig_get_used_stack_size_in_callout P_((OnigCalloutArgs* args, int* used_num, int* used_bytes));
|
||||
|
||||
/* builtin callout functions */
|
||||
ONIG_EXTERN
|
||||
int onig_builtin_fail P_((OnigCalloutArgs* args, void* user_data));
|
||||
ONIG_EXTERN
|
||||
int onig_builtin_mismatch P_((OnigCalloutArgs* args, void* user_data));
|
||||
ONIG_EXTERN
|
||||
int onig_builtin_error P_((OnigCalloutArgs* args, void* user_data));
|
||||
ONIG_EXTERN
|
||||
int onig_builtin_count P_((OnigCalloutArgs* args, void* user_data));
|
||||
ONIG_EXTERN
|
||||
int onig_builtin_total_count P_((OnigCalloutArgs* args, void* user_data));
|
||||
ONIG_EXTERN
|
||||
int onig_builtin_max P_((OnigCalloutArgs* args, void* user_data));
|
||||
ONIG_EXTERN
|
||||
int onig_builtin_cmp P_((OnigCalloutArgs* args, void* user_data));
|
||||
|
||||
ONIG_EXTERN
|
||||
int onig_setup_builtin_monitors_by_ascii_encoded_name P_((void* fp));
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -2,11 +2,9 @@
|
|||
regenc.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* (C) Copyright 2015 Hewlett Packard Enterprise Development LP<BR>
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
|
@ -33,12 +31,98 @@
|
|||
|
||||
OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
|
||||
|
||||
#define INITED_LIST_SIZE 20
|
||||
|
||||
static int InitedListNum;
|
||||
|
||||
static struct {
|
||||
OnigEncoding enc;
|
||||
int inited;
|
||||
} InitedList[INITED_LIST_SIZE];
|
||||
|
||||
static int
|
||||
enc_inited_entry(OnigEncoding enc)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < InitedListNum; i++) {
|
||||
if (InitedList[i].enc == enc) {
|
||||
InitedList[i].inited = 1;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
i = InitedListNum;
|
||||
if (i < INITED_LIST_SIZE - 1) {
|
||||
InitedList[i].enc = enc;
|
||||
InitedList[i].inited = 1;
|
||||
InitedListNum++;
|
||||
return i;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int
|
||||
enc_is_inited(OnigEncoding enc)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < InitedListNum; i++) {
|
||||
if (InitedList[i].enc == enc) {
|
||||
return InitedList[i].inited;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_end(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < InitedListNum; i++) {
|
||||
InitedList[i].enc = 0;
|
||||
InitedList[i].inited = 0;
|
||||
}
|
||||
|
||||
InitedListNum = 0;
|
||||
return ONIG_NORMAL;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_init(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int
|
||||
onig_initialize_encoding(OnigEncoding enc)
|
||||
{
|
||||
int r;
|
||||
|
||||
if (enc != ONIG_ENCODING_ASCII &&
|
||||
ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc)) {
|
||||
OnigEncoding ascii = ONIG_ENCODING_ASCII;
|
||||
if (ascii->init != 0 && enc_is_inited(ascii) == 0) {
|
||||
r = ascii->init();
|
||||
if (r != ONIG_NORMAL) return r;
|
||||
enc_inited_entry(ascii);
|
||||
}
|
||||
}
|
||||
|
||||
if (enc->init != 0 &&
|
||||
enc_is_inited(enc) == 0) {
|
||||
r = (enc->init)();
|
||||
if (r == ONIG_NORMAL)
|
||||
enc_inited_entry(enc);
|
||||
return r;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern OnigEncoding
|
||||
onigenc_get_default_encoding(void)
|
||||
{
|
||||
|
@ -52,6 +136,25 @@ onigenc_set_default_encoding(OnigEncoding enc)
|
|||
return 0;
|
||||
}
|
||||
|
||||
extern UChar*
|
||||
onigenc_strdup(OnigEncoding enc, const UChar* s, const UChar* end)
|
||||
{
|
||||
int slen, term_len, i;
|
||||
UChar *r;
|
||||
|
||||
slen = (int )(end - s);
|
||||
term_len = ONIGENC_MBC_MINLEN(enc);
|
||||
|
||||
r = (UChar* )xmalloc(slen + term_len);
|
||||
CHECK_NULL_RETURN(r);
|
||||
xmemcpy(r, s, slen);
|
||||
|
||||
for (i = 0; i < term_len; i++)
|
||||
r[slen + i] = (UChar )0;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
extern UChar*
|
||||
onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
|
||||
{
|
||||
|
@ -99,6 +202,20 @@ onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n)
|
|||
return (UChar* )s;
|
||||
}
|
||||
|
||||
#if 0
|
||||
extern int
|
||||
onigenc_mbc_enc_len_end(OnigEncoding enc, const UChar* p, const UChar* end)
|
||||
{
|
||||
int len;
|
||||
int n;
|
||||
|
||||
len = ONIGENC_MBC_ENC_LEN(enc, p);
|
||||
n = (int )(end - p);
|
||||
|
||||
return (n < len ? n : len);
|
||||
}
|
||||
#endif
|
||||
|
||||
extern UChar*
|
||||
onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)
|
||||
{
|
||||
|
@ -172,74 +289,74 @@ onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
|
|||
}
|
||||
|
||||
const UChar OnigEncAsciiToLowerCaseTable[] = {
|
||||
0000, 0001, 0002, 0003, 0004, 0005, 0006, 0007,
|
||||
0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
|
||||
0020, 0021, 0022, 0023, 0024, 0025, 0026, 0027,
|
||||
0030, 0031, 0032, 0033, 0034, 0035, 0036, 0037,
|
||||
0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
|
||||
0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
|
||||
0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
|
||||
0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
|
||||
0100, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
|
||||
0150, 0151, 0152, 0153, 0154, 0155, 0156, 0157,
|
||||
0160, 0161, 0162, 0163, 0164, 0165, 0166, 0167,
|
||||
0170, 0171, 0172, 0133, 0134, 0135, 0136, 0137,
|
||||
0140, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
|
||||
0150, 0151, 0152, 0153, 0154, 0155, 0156, 0157,
|
||||
0160, 0161, 0162, 0163, 0164, 0165, 0166, 0167,
|
||||
0170, 0171, 0172, 0173, 0174, 0175, 0176, 0177,
|
||||
0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
|
||||
0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
|
||||
0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
|
||||
0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
|
||||
0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
|
||||
0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
|
||||
0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
|
||||
0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
|
||||
0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
|
||||
0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
|
||||
0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
|
||||
0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
|
||||
0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
|
||||
0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
|
||||
0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
|
||||
0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377,
|
||||
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
|
||||
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
|
||||
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
|
||||
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
|
||||
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
|
||||
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
|
||||
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
|
||||
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
|
||||
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
|
||||
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
|
||||
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
|
||||
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
|
||||
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
|
||||
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
|
||||
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
|
||||
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
|
||||
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
|
||||
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
|
||||
'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
|
||||
'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
|
||||
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
|
||||
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
|
||||
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
|
||||
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
|
||||
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
|
||||
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
|
||||
};
|
||||
|
||||
#ifdef USE_UPPER_CASE_TABLE
|
||||
const UChar OnigEncAsciiToUpperCaseTable[256] = {
|
||||
0000, 0001, 0002, 0003, 0004, 0005, 0006, 0007,
|
||||
0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
|
||||
0020, 0021, 0022, 0023, 0024, 0025, 0026, 0027,
|
||||
0030, 0031, 0032, 0033, 0034, 0035, 0036, 0037,
|
||||
0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
|
||||
0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
|
||||
0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
|
||||
0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
|
||||
0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
|
||||
0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
|
||||
0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
|
||||
0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
|
||||
0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
|
||||
0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
|
||||
0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
|
||||
0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
|
||||
0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
|
||||
0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
|
||||
0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
|
||||
0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
|
||||
0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
|
||||
0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
|
||||
0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
|
||||
0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
|
||||
0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
|
||||
0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
|
||||
0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
|
||||
0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
|
||||
0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
|
||||
0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
|
||||
0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
|
||||
0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377,
|
||||
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
|
||||
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
|
||||
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
|
||||
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
|
||||
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
|
||||
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
|
||||
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
|
||||
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
|
||||
'\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
|
||||
'\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
|
||||
'\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
|
||||
'\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
|
||||
'\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
|
||||
'\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
|
||||
'\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
|
||||
'\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
|
||||
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
|
||||
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
|
||||
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
|
||||
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
|
||||
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
|
||||
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
|
||||
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
|
||||
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
|
||||
'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
|
||||
'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
|
||||
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
|
||||
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
|
||||
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
|
||||
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
|
||||
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
|
||||
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
|
||||
};
|
||||
#endif
|
||||
|
||||
|
@ -279,74 +396,74 @@ const unsigned short OnigEncAsciiCtypeTable[256] = {
|
|||
};
|
||||
|
||||
const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
|
||||
0000, 0001, 0002, 0003, 0004, 0005, 0006, 0007,
|
||||
0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
|
||||
0020, 0021, 0022, 0023, 0024, 0025, 0026, 0027,
|
||||
0030, 0031, 0032, 0033, 0034, 0035, 0036, 0037,
|
||||
0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
|
||||
0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
|
||||
0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
|
||||
0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
|
||||
0100, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
|
||||
0150, 0151, 0152, 0153, 0154, 0155, 0156, 0157,
|
||||
0160, 0161, 0162, 0163, 0164, 0165, 0166, 0167,
|
||||
0170, 0171, 0172, 0133, 0134, 0135, 0136, 0137,
|
||||
0140, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
|
||||
0150, 0151, 0152, 0153, 0154, 0155, 0156, 0157,
|
||||
0160, 0161, 0162, 0163, 0164, 0165, 0166, 0167,
|
||||
0170, 0171, 0172, 0173, 0174, 0175, 0176, 0177,
|
||||
0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
|
||||
0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
|
||||
0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
|
||||
0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
|
||||
0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
|
||||
0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
|
||||
0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
|
||||
0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
|
||||
0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
|
||||
0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
|
||||
0360, 0361, 0362, 0363, 0364, 0365, 0366, 0327,
|
||||
0370, 0371, 0372, 0373, 0374, 0375, 0376, 0337,
|
||||
0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
|
||||
0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
|
||||
0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
|
||||
0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
|
||||
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
|
||||
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
|
||||
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
|
||||
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
|
||||
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
|
||||
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
|
||||
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
|
||||
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
|
||||
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
|
||||
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
||||
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
||||
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
||||
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
|
||||
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
|
||||
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
|
||||
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
|
||||
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
|
||||
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
|
||||
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
|
||||
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
|
||||
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
|
||||
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
|
||||
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
|
||||
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
|
||||
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
|
||||
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
|
||||
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
|
||||
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
|
||||
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
|
||||
};
|
||||
|
||||
#ifdef USE_UPPER_CASE_TABLE
|
||||
const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
|
||||
0000, 0001, 0002, 0003, 0004, 0005, 0006, 0007,
|
||||
0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
|
||||
0020, 0021, 0022, 0023, 0024, 0025, 0026, 0027,
|
||||
0030, 0031, 0032, 0033, 0034, 0035, 0036, 0037,
|
||||
0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
|
||||
0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
|
||||
0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
|
||||
0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
|
||||
0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
|
||||
0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
|
||||
0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
|
||||
0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
|
||||
0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
|
||||
0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
|
||||
0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
|
||||
0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
|
||||
0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
|
||||
0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
|
||||
0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
|
||||
0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
|
||||
0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
|
||||
0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
|
||||
0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
|
||||
0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
|
||||
0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
|
||||
0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
|
||||
0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
|
||||
0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
|
||||
0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
|
||||
0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
|
||||
0320, 0321, 0322, 0323, 0324, 0325, 0326, 0367,
|
||||
0330, 0331, 0332, 0333, 0334, 0335, 0336, 0377,
|
||||
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
|
||||
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
|
||||
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
|
||||
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
|
||||
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
|
||||
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
|
||||
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
|
||||
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
|
||||
'\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
|
||||
'\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
|
||||
'\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
|
||||
'\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
|
||||
'\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
|
||||
'\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
|
||||
'\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
|
||||
'\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
|
||||
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
|
||||
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
|
||||
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
|
||||
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
|
||||
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
|
||||
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
|
||||
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
|
||||
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
|
||||
'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
|
||||
'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
|
||||
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
|
||||
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
|
||||
'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
|
||||
'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
|
||||
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367',
|
||||
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377',
|
||||
};
|
||||
#endif
|
||||
|
||||
|
@ -640,6 +757,33 @@ onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED,
|
|||
return FALSE;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_always_true_is_valid_mbc_string(const UChar* s ARG_UNUSED,
|
||||
const UChar* end ARG_UNUSED)
|
||||
{
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_length_check_is_valid_mbc_string(OnigEncoding enc,
|
||||
const UChar* p, const UChar* end)
|
||||
{
|
||||
while (p < end) {
|
||||
p += enclen(enc, p);
|
||||
}
|
||||
|
||||
if (p != end)
|
||||
return FALSE;
|
||||
else
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_is_valid_mbc_string(OnigEncoding enc, const UChar* s, const UChar* end)
|
||||
{
|
||||
return ONIGENC_IS_VALID_MBC_STRING(enc, s, end);
|
||||
}
|
||||
|
||||
extern OnigCodePoint
|
||||
onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
|
||||
{
|
||||
|
@ -790,6 +934,16 @@ onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
|
|||
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_is_mbc_word_ascii(OnigEncoding enc, UChar* s, const UChar* end)
|
||||
{
|
||||
OnigCodePoint code = ONIGENC_MBC_TO_CODE(enc, s, end);
|
||||
|
||||
if (code > 127) return 0;
|
||||
|
||||
return ONIGENC_IS_ASCII_CODE_WORD(code);
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
|
||||
unsigned int ctype)
|
||||
|
@ -839,66 +993,29 @@ onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* Property management */
|
||||
static int
|
||||
resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize)
|
||||
extern int
|
||||
onig_codes_cmp(OnigCodePoint a[], OnigCodePoint b[], int n)
|
||||
{
|
||||
int size;
|
||||
const OnigCodePoint **list = *plist;
|
||||
int i;
|
||||
|
||||
size = sizeof(OnigCodePoint*) * new_size;
|
||||
if (IS_NULL(list)) {
|
||||
list = (const OnigCodePoint** )xmalloc(size);
|
||||
for (i = 0; i < n; i++) {
|
||||
if (a[i] != b[i])
|
||||
return -1;
|
||||
}
|
||||
else {
|
||||
list = (const OnigCodePoint** )xrealloc((void* )list, size, *psize * sizeof(OnigCodePoint*));
|
||||
}
|
||||
|
||||
if (IS_NULL(list)) return ONIGERR_MEMORY;
|
||||
|
||||
*plist = list;
|
||||
*psize = new_size;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop,
|
||||
hash_table_type **table, const OnigCodePoint*** plist, int *pnum,
|
||||
int *psize)
|
||||
onig_codes_byte_at(OnigCodePoint codes[], int at)
|
||||
{
|
||||
#define PROP_INIT_SIZE 16
|
||||
int index;
|
||||
int b;
|
||||
OnigCodePoint code;
|
||||
|
||||
int r;
|
||||
index = at / 3;
|
||||
b = at % 3;
|
||||
code = codes[index];
|
||||
|
||||
if (*psize <= *pnum) {
|
||||
int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2);
|
||||
r = resize_property_list(new_size, plist, psize);
|
||||
if (r != 0) return r;
|
||||
}
|
||||
|
||||
(*plist)[*pnum] = prop;
|
||||
|
||||
if (ONIG_IS_NULL(*table)) {
|
||||
*table = onig_st_init_strend_table_with_size(PROP_INIT_SIZE);
|
||||
if (ONIG_IS_NULL(*table)) return ONIGERR_MEMORY;
|
||||
}
|
||||
|
||||
*pnum = *pnum + 1;
|
||||
onig_st_insert_strend(*table, name, name + strlen_s((char* )name, MAX_STRING_SIZE),
|
||||
(hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE));
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_property_list_init(int (*f)(void))
|
||||
{
|
||||
int r;
|
||||
|
||||
THREAD_ATOMIC_START;
|
||||
|
||||
r = f();
|
||||
|
||||
THREAD_ATOMIC_END;
|
||||
return r;
|
||||
return ((code >> ((2 - b) * 8)) & 0xff);
|
||||
}
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
regenc.h - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -29,11 +29,13 @@
|
|||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef PACKAGE
|
||||
/* PACKAGE is defined in config.h */
|
||||
//#include "config.h"
|
||||
#ifndef ONIGURUMA_EXPORT
|
||||
#define ONIGURUMA_EXPORT
|
||||
#endif
|
||||
|
||||
//#include "config.h"
|
||||
//#include <stddef.h>
|
||||
|
||||
#ifdef ONIG_ESCAPE_UCHAR_COLLISION
|
||||
#undef ONIG_ESCAPE_UCHAR_COLLISION
|
||||
#endif
|
||||
|
@ -71,6 +73,8 @@ typedef struct {
|
|||
#define ONIG_CHECK_NULL_RETURN(p) if (ONIG_IS_NULL(p)) return NULL
|
||||
#define ONIG_CHECK_NULL_RETURN_VAL(p,val) if (ONIG_IS_NULL(p)) return (val)
|
||||
|
||||
#define MAX_CODE_POINT (~((OnigCodePoint )0))
|
||||
|
||||
#define enclen(enc,p) ONIGENC_MBC_ENC_LEN(enc,p)
|
||||
|
||||
/* character types bit flag */
|
||||
|
@ -102,88 +106,157 @@ typedef struct {
|
|||
short int len;
|
||||
} PosixBracketEntryType;
|
||||
|
||||
struct PropertyNameCtype {
|
||||
char *name;
|
||||
int ctype;
|
||||
};
|
||||
|
||||
/* #define USE_CRNL_AS_LINE_TERMINATOR */
|
||||
#define USE_UNICODE_PROPERTIES
|
||||
#define USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER
|
||||
/* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */
|
||||
/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTF#18 */
|
||||
/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTS #18 */
|
||||
|
||||
|
||||
#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII
|
||||
|
||||
|
||||
#define ENC_FLAG_ASCII_COMPATIBLE (1<<0)
|
||||
#define ENC_FLAG_UNICODE (1<<1)
|
||||
|
||||
|
||||
/* for encoding system implementation (internal) */
|
||||
ONIG_EXTERN int onigenc_ascii_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
|
||||
ONIG_EXTERN int onigenc_ascii_get_case_fold_codes_by_str P_((OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
|
||||
ONIG_EXTERN int onigenc_apply_all_case_fold_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
|
||||
ONIG_EXTERN int onigenc_get_case_fold_codes_by_str_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
|
||||
ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[]));
|
||||
ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end));
|
||||
extern int onigenc_end(void);
|
||||
extern int onigenc_ascii_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
|
||||
extern int onigenc_ascii_get_case_fold_codes_by_str P_((OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
|
||||
extern int onigenc_apply_all_case_fold_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
|
||||
extern int onigenc_get_case_fold_codes_by_str_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
|
||||
extern int onigenc_not_support_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[]));
|
||||
extern int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end));
|
||||
|
||||
|
||||
/* methods for single byte encoding */
|
||||
ONIG_EXTERN int onigenc_ascii_mbc_case_fold P_((OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower));
|
||||
ONIG_EXTERN int onigenc_single_byte_mbc_enc_len P_((const UChar* p));
|
||||
ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code P_((const UChar* p, const UChar* end));
|
||||
ONIG_EXTERN int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code));
|
||||
ONIG_EXTERN int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf));
|
||||
ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((const UChar* start, const UChar* s));
|
||||
ONIG_EXTERN int onigenc_always_true_is_allowed_reverse_match P_((const UChar* s, const UChar* end));
|
||||
ONIG_EXTERN int onigenc_always_false_is_allowed_reverse_match P_((const UChar* s, const UChar* end));
|
||||
extern int onigenc_ascii_mbc_case_fold P_((OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower));
|
||||
extern int onigenc_single_byte_mbc_enc_len P_((const UChar* p));
|
||||
extern OnigCodePoint onigenc_single_byte_mbc_to_code P_((const UChar* p, const UChar* end));
|
||||
extern int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code));
|
||||
extern int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf));
|
||||
extern UChar* onigenc_single_byte_left_adjust_char_head P_((const UChar* start, const UChar* s));
|
||||
extern int onigenc_always_true_is_allowed_reverse_match P_((const UChar* s, const UChar* end));
|
||||
extern int onigenc_always_false_is_allowed_reverse_match P_((const UChar* s, const UChar* end));
|
||||
extern int onigenc_always_true_is_valid_mbc_string P_((const UChar* s, const UChar* end));
|
||||
extern int onigenc_length_check_is_valid_mbc_string P_((OnigEncoding enc, const UChar* s, const UChar* end));
|
||||
|
||||
/* methods for multi byte encoding */
|
||||
ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, const UChar* p, const UChar* end));
|
||||
ONIG_EXTERN int onigenc_mbn_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower));
|
||||
ONIG_EXTERN int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code));
|
||||
ONIG_EXTERN int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
|
||||
ONIG_EXTERN int onigenc_minimum_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end));
|
||||
ONIG_EXTERN int onigenc_unicode_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end));
|
||||
ONIG_EXTERN int onigenc_mb2_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
|
||||
ONIG_EXTERN int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code));
|
||||
ONIG_EXTERN int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
|
||||
ONIG_EXTERN int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
|
||||
|
||||
extern OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, const UChar* p, const UChar* end));
|
||||
extern int onigenc_mbn_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower));
|
||||
extern int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code));
|
||||
extern int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
|
||||
extern int onigenc_minimum_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end));
|
||||
extern int onigenc_unicode_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end));
|
||||
extern int onigenc_is_mbc_word_ascii P_((OnigEncoding enc, UChar* s, const UChar* end));
|
||||
extern int onigenc_mb2_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
|
||||
extern int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code));
|
||||
extern int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
|
||||
extern int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
|
||||
extern struct PropertyNameCtype* onigenc_euc_jp_lookup_property_name P_((register const char *str, register size_t len));
|
||||
extern struct PropertyNameCtype* onigenc_sjis_lookup_property_name P_((register const char *str, register size_t len));
|
||||
|
||||
/* in enc/unicode.c */
|
||||
ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype));
|
||||
ONIG_EXTERN int onigenc_utf16_32_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[]));
|
||||
ONIG_EXTERN int onigenc_unicode_ctype_code_range P_((int ctype, const OnigCodePoint* ranges[]));
|
||||
ONIG_EXTERN int onigenc_unicode_get_case_fold_codes_by_str P_((OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
|
||||
ONIG_EXTERN int onigenc_unicode_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold));
|
||||
ONIG_EXTERN int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
|
||||
extern int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype));
|
||||
extern int onigenc_utf16_32_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[]));
|
||||
extern int onigenc_unicode_ctype_code_range P_((OnigCtype ctype, const OnigCodePoint* ranges[]));
|
||||
extern int onigenc_unicode_get_case_fold_codes_by_str P_((OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
|
||||
extern int onigenc_unicode_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold));
|
||||
extern int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
|
||||
extern int onigenc_egcb_is_break_position P_((OnigEncoding enc, UChar* p, UChar* prev, const UChar* start, const UChar* end));
|
||||
|
||||
|
||||
#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8)
|
||||
#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc)
|
||||
|
||||
/* from unicode generated codes */
|
||||
#define FOLDS1_FOLD(i) (OnigUnicodeFolds1 + (i))
|
||||
#define FOLDS2_FOLD(i) (OnigUnicodeFolds2 + (i))
|
||||
#define FOLDS3_FOLD(i) (OnigUnicodeFolds3 + (i))
|
||||
#define FOLDS1_UNFOLDS_NUM(i) (OnigUnicodeFolds1[(i)+1])
|
||||
#define FOLDS2_UNFOLDS_NUM(i) (OnigUnicodeFolds2[(i)+2])
|
||||
#define FOLDS3_UNFOLDS_NUM(i) (OnigUnicodeFolds3[(i)+3])
|
||||
#define FOLDS1_UNFOLDS(i) (OnigUnicodeFolds1 + (i) + 2)
|
||||
#define FOLDS2_UNFOLDS(i) (OnigUnicodeFolds2 + (i) + 3)
|
||||
#define FOLDS3_UNFOLDS(i) (OnigUnicodeFolds3 + (i) + 4)
|
||||
#define FOLDS1_NEXT_INDEX(i) ((i) + 2 + OnigUnicodeFolds1[(i)+1])
|
||||
#define FOLDS2_NEXT_INDEX(i) ((i) + 3 + OnigUnicodeFolds2[(i)+2])
|
||||
#define FOLDS3_NEXT_INDEX(i) ((i) + 4 + OnigUnicodeFolds3[(i)+3])
|
||||
|
||||
#define FOLDS_FOLD_ADDR_BUK(buk, addr) do {\
|
||||
if ((buk)->fold_len == 1)\
|
||||
addr = OnigUnicodeFolds1 + (buk)->index;\
|
||||
else if ((buk)->fold_len == 2)\
|
||||
addr = OnigUnicodeFolds2 + (buk)->index;\
|
||||
else if ((buk)->fold_len == 3)\
|
||||
addr = OnigUnicodeFolds3 + (buk)->index;\
|
||||
else\
|
||||
addr = 0;\
|
||||
} while (0)
|
||||
|
||||
extern OnigCodePoint OnigUnicodeFolds1[];
|
||||
extern OnigCodePoint OnigUnicodeFolds2[];
|
||||
extern OnigCodePoint OnigUnicodeFolds3[];
|
||||
|
||||
struct ByUnfoldKey {
|
||||
OnigCodePoint code;
|
||||
short int index;
|
||||
short int fold_len;
|
||||
};
|
||||
|
||||
extern const struct ByUnfoldKey* onigenc_unicode_unfold_key(OnigCodePoint code);
|
||||
extern int onigenc_unicode_fold1_key(OnigCodePoint code[]);
|
||||
extern int onigenc_unicode_fold2_key(OnigCodePoint code[]);
|
||||
extern int onigenc_unicode_fold3_key(OnigCodePoint code[]);
|
||||
|
||||
extern int onig_codes_cmp(OnigCodePoint a[], OnigCodePoint b[], int n);
|
||||
extern int onig_codes_byte_at(OnigCodePoint code[], int at);
|
||||
|
||||
|
||||
|
||||
#define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \
|
||||
OnigEncISO_8859_1_ToLowerCaseTable[c]
|
||||
#define ONIGENC_ISO_8859_1_TO_UPPER_CASE(c) \
|
||||
OnigEncISO_8859_1_ToUpperCaseTable[c]
|
||||
|
||||
ONIG_EXTERN const UChar OnigEncISO_8859_1_ToLowerCaseTable[];
|
||||
ONIG_EXTERN const UChar OnigEncISO_8859_1_ToUpperCaseTable[];
|
||||
extern const UChar OnigEncISO_8859_1_ToLowerCaseTable[];
|
||||
extern const UChar OnigEncISO_8859_1_ToUpperCaseTable[];
|
||||
|
||||
ONIG_EXTERN int
|
||||
extern int
|
||||
onigenc_with_ascii_strncmp P_((OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n));
|
||||
ONIG_EXTERN UChar*
|
||||
extern UChar*
|
||||
onigenc_step P_((OnigEncoding enc, const UChar* p, const UChar* end, int n));
|
||||
|
||||
/* defined in regexec.c, but used in enc/xxx.c */
|
||||
extern int onig_is_in_code_range P_((const UChar* p, OnigCodePoint code));
|
||||
|
||||
ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding;
|
||||
ONIG_EXTERN const UChar OnigEncAsciiToLowerCaseTable[];
|
||||
ONIG_EXTERN const UChar OnigEncAsciiToUpperCaseTable[];
|
||||
ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[];
|
||||
extern OnigEncoding OnigEncDefaultCharEncoding;
|
||||
extern const UChar OnigEncAsciiToLowerCaseTable[];
|
||||
extern const UChar OnigEncAsciiToUpperCaseTable[];
|
||||
extern const unsigned short OnigEncAsciiCtypeTable[];
|
||||
|
||||
|
||||
#define ONIGENC_IS_ASCII_CODE(code) ((code) < 0x80)
|
||||
#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c]
|
||||
#define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) OnigEncAsciiToUpperCaseTable[c]
|
||||
#define ONIGENC_IS_ASCII_CODE_CTYPE(code,ctype) \
|
||||
((OnigEncAsciiCtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
|
||||
#define ONIGENC_IS_ASCII_CODE_WORD(code) \
|
||||
((OnigEncAsciiCtypeTable[code] & CTYPE_TO_BIT(ONIGENC_CTYPE_WORD)) != 0)
|
||||
#define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \
|
||||
(ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_UPPER) ||\
|
||||
ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_LOWER))
|
||||
|
||||
#define ONIGENC_IS_UNICODE_ENCODING(enc) \
|
||||
(((enc)->flag & ENC_FLAG_UNICODE) != 0)
|
||||
|
||||
#define ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc) \
|
||||
(((enc)->flag & ENC_FLAG_ASCII_COMPATIBLE) != 0)
|
||||
|
||||
#endif /* REGENC_H */
|
||||
|
|
|
@ -2,11 +2,9 @@
|
|||
regerror.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* (C) Copyright 2015 Hewlett Packard Enterprise Development LP<BR>
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
|
@ -29,10 +27,8 @@
|
|||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#define HAVE_STDARG_PROTOTYPES
|
||||
|
||||
#include "regint.h"
|
||||
|
||||
#define HAVE_STDARG_PROTOTYPES
|
||||
#if 0
|
||||
#include <stdio.h> /* for vsnprintf() */
|
||||
|
||||
|
@ -50,17 +46,21 @@ onig_error_code_to_format(int code)
|
|||
{
|
||||
char *p;
|
||||
|
||||
if (code >= 0) return (UChar* )0;
|
||||
|
||||
switch (code) {
|
||||
case ONIG_MISMATCH:
|
||||
p = "mismatch"; break;
|
||||
case ONIG_NO_SUPPORT_CONFIG:
|
||||
p = "no support in this configuration"; break;
|
||||
case ONIG_ABORT:
|
||||
p = "abort"; break;
|
||||
case ONIGERR_MEMORY:
|
||||
p = "fail to memory allocation"; break;
|
||||
case ONIGERR_MATCH_STACK_LIMIT_OVER:
|
||||
p = "match-stack limit over"; break;
|
||||
case ONIGERR_PARSE_DEPTH_LIMIT_OVER:
|
||||
p = "parse depth limit over"; break;
|
||||
case ONIGERR_RETRY_LIMIT_IN_MATCH_OVER:
|
||||
p = "retry-limit-in-match over"; break;
|
||||
case ONIGERR_TYPE_BUG:
|
||||
p = "undefined type (bug)"; break;
|
||||
case ONIGERR_PARSER_BUG:
|
||||
|
@ -75,6 +75,8 @@ onig_error_code_to_format(int code)
|
|||
p = "default multibyte-encoding is not setted"; break;
|
||||
case ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR:
|
||||
p = "can't convert to wide-char on specified multibyte-encoding"; break;
|
||||
case ONIGERR_FAIL_TO_INITIALIZE:
|
||||
p = "fail to initialize"; break;
|
||||
case ONIGERR_INVALID_ARGUMENT:
|
||||
p = "invalid argument"; break;
|
||||
case ONIGERR_END_PATTERN_AT_LEFT_BRACE:
|
||||
|
@ -138,13 +140,11 @@ onig_error_code_to_format(int code)
|
|||
case ONIGERR_TOO_BIG_BACKREF_NUMBER:
|
||||
p = "too big backref number"; break;
|
||||
case ONIGERR_INVALID_BACKREF:
|
||||
#ifdef USE_NAMED_GROUP
|
||||
p = "invalid backref number/name"; break;
|
||||
#else
|
||||
p = "invalid backref number"; break;
|
||||
#endif
|
||||
case ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED:
|
||||
p = "numbered backref/call is not allowed. (use name)"; break;
|
||||
case ONIGERR_TOO_MANY_CAPTURES:
|
||||
p = "too many captures"; break;
|
||||
case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
|
||||
p = "too big wide-char value"; break;
|
||||
case ONIGERR_TOO_LONG_WIDE_CHAR_VALUE:
|
||||
|
@ -156,11 +156,7 @@ onig_error_code_to_format(int code)
|
|||
case ONIGERR_INVALID_GROUP_NAME:
|
||||
p = "invalid group name <%n>"; break;
|
||||
case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
|
||||
#ifdef USE_NAMED_GROUP
|
||||
p = "invalid char in group name <%n>"; break;
|
||||
#else
|
||||
p = "invalid char in group number <%n>"; break;
|
||||
#endif
|
||||
case ONIGERR_UNDEFINED_NAME_REFERENCE:
|
||||
p = "undefined name <%n> reference"; break;
|
||||
case ONIGERR_UNDEFINED_GROUP_REFERENCE:
|
||||
|
@ -175,12 +171,30 @@ onig_error_code_to_format(int code)
|
|||
p = "group number is too big for capture history"; break;
|
||||
case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
|
||||
p = "invalid character property name {%n}"; break;
|
||||
case ONIGERR_INVALID_IF_ELSE_SYNTAX:
|
||||
p = "invalid if-else syntax"; break;
|
||||
case ONIGERR_INVALID_ABSENT_GROUP_PATTERN:
|
||||
p = "invalid absent group pattern"; break;
|
||||
case ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN:
|
||||
p = "invalid absent group generator pattern"; break;
|
||||
case ONIGERR_INVALID_CALLOUT_PATTERN:
|
||||
p = "invalid callout pattern"; break;
|
||||
case ONIGERR_INVALID_CALLOUT_NAME:
|
||||
p = "invalid callout name"; break;
|
||||
case ONIGERR_UNDEFINED_CALLOUT_NAME:
|
||||
p = "undefined callout name"; break;
|
||||
case ONIGERR_INVALID_CALLOUT_BODY:
|
||||
p = "invalid callout body"; break;
|
||||
case ONIGERR_INVALID_CALLOUT_TAG_NAME:
|
||||
p = "invalid callout tag name"; break;
|
||||
case ONIGERR_INVALID_CALLOUT_ARG:
|
||||
p = "invalid callout arg"; break;
|
||||
case ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION:
|
||||
p = "not supported encoding combination"; break;
|
||||
case ONIGERR_INVALID_COMBINATION_OF_OPTIONS:
|
||||
p = "invalid combination of options"; break;
|
||||
case ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT:
|
||||
p = "over thread pass limit count"; break;
|
||||
case ONIGERR_LIBRARY_IS_NOT_INITIALIZED:
|
||||
p = "library is not initialized"; break;
|
||||
|
||||
default:
|
||||
p = "undefined error code"; break;
|
||||
|
@ -309,10 +323,6 @@ onig_error_code_to_str(s, code, va_alist)
|
|||
|
||||
default:
|
||||
q = onig_error_code_to_format(code);
|
||||
if (q == NULL) {
|
||||
len = 0;
|
||||
break;
|
||||
}
|
||||
len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, q);
|
||||
xmemcpy(s, q, len);
|
||||
s[len] = '\0';
|
||||
|
@ -357,21 +367,12 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
|
|||
|
||||
p = pat;
|
||||
while (p < pat_end) {
|
||||
if (*p == '\\') {
|
||||
*s++ = *p++;
|
||||
len = enclen(enc, p);
|
||||
while (len-- > 0) *s++ = *p++;
|
||||
}
|
||||
else if (*p == '/') {
|
||||
*s++ = (unsigned char )'\\';
|
||||
*s++ = *p++;
|
||||
}
|
||||
else if (ONIGENC_IS_MBC_HEAD(enc, p)) {
|
||||
if (ONIGENC_IS_MBC_HEAD(enc, p)) {
|
||||
len = enclen(enc, p);
|
||||
if (ONIGENC_MBC_MINLEN(enc) == 1) {
|
||||
while (len-- > 0) *s++ = *p++;
|
||||
}
|
||||
else { /* for UTF16 */
|
||||
else { /* for UTF16/32 */
|
||||
int blen;
|
||||
|
||||
while (len-- > 0) {
|
||||
|
@ -382,6 +383,15 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (*p == '\\') {
|
||||
*s++ = *p++;
|
||||
len = enclen(enc, p);
|
||||
while (len-- > 0) *s++ = *p++;
|
||||
}
|
||||
else if (*p == '/') {
|
||||
*s++ = (unsigned char )'\\';
|
||||
*s++ = *p++;
|
||||
}
|
||||
else if (!ONIGENC_IS_CODE_PRINT(enc, *p) &&
|
||||
!ONIGENC_IS_CODE_SPACE(enc, *p)) {
|
||||
sprint_byte_with_x((char* )bs, (unsigned int )(*p++));
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -2,11 +2,9 @@
|
|||
reggnu.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* (C) Copyright 2015 Hewlett Packard Enterprise Development LP<BR>
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
|
@ -30,10 +28,7 @@
|
|||
*/
|
||||
|
||||
#include "regint.h"
|
||||
|
||||
#ifndef ONIGGNU_H
|
||||
#include "oniggnu.h"
|
||||
#endif
|
||||
|
||||
extern void
|
||||
re_free_registers(OnigRegion* r)
|
||||
|
@ -95,29 +90,6 @@ re_compile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
|
|||
return r;
|
||||
}
|
||||
|
||||
#ifdef USE_RECOMPILE_API
|
||||
extern int
|
||||
re_recompile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
|
||||
{
|
||||
int r;
|
||||
OnigErrorInfo einfo;
|
||||
OnigEncoding enc;
|
||||
|
||||
/* I think encoding and options should be arguments of this function.
|
||||
But this is adapted to present re.c. (2002/11/29)
|
||||
*/
|
||||
enc = OnigEncDefaultCharEncoding;
|
||||
|
||||
r = onig_recompile(reg, (UChar* )pattern, (UChar* )(pattern + size),
|
||||
reg->options, enc, OnigDefaultSyntax, &einfo);
|
||||
if (r != ONIG_NORMAL) {
|
||||
if (IS_NOT_NULL(ebuf))
|
||||
(void )onig_error_code_to_str((UChar* )ebuf, r, &einfo);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
#endif
|
||||
|
||||
extern void
|
||||
re_free_pattern(regex_t* reg)
|
||||
{
|
||||
|
@ -151,9 +123,9 @@ re_mbcinit(int mb_code)
|
|||
case RE_MBCTYPE_ASCII:
|
||||
enc = ONIG_ENCODING_ASCII;
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
|
||||
onig_initialize(&enc, 1);
|
||||
|
||||
onigenc_set_default_encoding(enc);
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -4,11 +4,9 @@
|
|||
regparse.h - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* (C) Copyright 2015 Hewlett Packard Enterprise Development LP<BR>
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
|
@ -34,172 +32,239 @@
|
|||
#include "regint.h"
|
||||
|
||||
/* node type */
|
||||
#define NT_STR 0
|
||||
#define NT_CCLASS 1
|
||||
#define NT_CTYPE 2
|
||||
#define NT_CANY 3
|
||||
#define NT_BREF 4
|
||||
#define NT_QTFR 5
|
||||
#define NT_ENCLOSE 6
|
||||
#define NT_ANCHOR 7
|
||||
#define NT_LIST 8
|
||||
#define NT_ALT 9
|
||||
#define NT_CALL 10
|
||||
typedef enum {
|
||||
NODE_STRING = 0,
|
||||
NODE_CCLASS = 1,
|
||||
NODE_CTYPE = 2,
|
||||
NODE_BACKREF = 3,
|
||||
NODE_QUANT = 4,
|
||||
NODE_ENCLOSURE = 5,
|
||||
NODE_ANCHOR = 6,
|
||||
NODE_LIST = 7,
|
||||
NODE_ALT = 8,
|
||||
NODE_CALL = 9,
|
||||
NODE_GIMMICK = 10
|
||||
} NodeType;
|
||||
|
||||
enum GimmickType {
|
||||
GIMMICK_FAIL = 0,
|
||||
GIMMICK_KEEP = 1,
|
||||
GIMMICK_SAVE = 2,
|
||||
GIMMICK_UPDATE_VAR = 3,
|
||||
#ifdef USE_CALLOUT
|
||||
GIMMICK_CALLOUT = 4,
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
/* node type bit */
|
||||
#define NTYPE2BIT(type) (1<<(type))
|
||||
#define NODE_TYPE2BIT(type) (1<<(type))
|
||||
|
||||
#define BIT_NT_STR NTYPE2BIT(NT_STR)
|
||||
#define BIT_NT_CCLASS NTYPE2BIT(NT_CCLASS)
|
||||
#define BIT_NT_CTYPE NTYPE2BIT(NT_CTYPE)
|
||||
#define BIT_NT_CANY NTYPE2BIT(NT_CANY)
|
||||
#define BIT_NT_BREF NTYPE2BIT(NT_BREF)
|
||||
#define BIT_NT_QTFR NTYPE2BIT(NT_QTFR)
|
||||
#define BIT_NT_ENCLOSE NTYPE2BIT(NT_ENCLOSE)
|
||||
#define BIT_NT_ANCHOR NTYPE2BIT(NT_ANCHOR)
|
||||
#define BIT_NT_LIST NTYPE2BIT(NT_LIST)
|
||||
#define BIT_NT_ALT NTYPE2BIT(NT_ALT)
|
||||
#define BIT_NT_CALL NTYPE2BIT(NT_CALL)
|
||||
#define NODE_BIT_STRING NODE_TYPE2BIT(NODE_STRING)
|
||||
#define NODE_BIT_CCLASS NODE_TYPE2BIT(NODE_CCLASS)
|
||||
#define NODE_BIT_CTYPE NODE_TYPE2BIT(NODE_CTYPE)
|
||||
#define NODE_BIT_BACKREF NODE_TYPE2BIT(NODE_BACKREF)
|
||||
#define NODE_BIT_QUANT NODE_TYPE2BIT(NODE_QUANT)
|
||||
#define NODE_BIT_ENCLOSURE NODE_TYPE2BIT(NODE_ENCLOSURE)
|
||||
#define NODE_BIT_ANCHOR NODE_TYPE2BIT(NODE_ANCHOR)
|
||||
#define NODE_BIT_LIST NODE_TYPE2BIT(NODE_LIST)
|
||||
#define NODE_BIT_ALT NODE_TYPE2BIT(NODE_ALT)
|
||||
#define NODE_BIT_CALL NODE_TYPE2BIT(NODE_CALL)
|
||||
#define NODE_BIT_GIMMICK NODE_TYPE2BIT(NODE_GIMMICK)
|
||||
|
||||
#define IS_NODE_TYPE_SIMPLE(type) \
|
||||
((NTYPE2BIT(type) & (BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE |\
|
||||
BIT_NT_CANY | BIT_NT_BREF)) != 0)
|
||||
#define NODE_IS_SIMPLE_TYPE(node) \
|
||||
((NODE_TYPE2BIT(NODE_TYPE(node)) & \
|
||||
(NODE_BIT_STRING | NODE_BIT_CCLASS | NODE_BIT_CTYPE | NODE_BIT_BACKREF)) != 0)
|
||||
|
||||
#define NTYPE(node) ((node)->u.base.type)
|
||||
#define SET_NTYPE(node, ntype) (node)->u.base.type = (ntype)
|
||||
#define NODE_TYPE(node) ((node)->u.base.node_type)
|
||||
#define NODE_SET_TYPE(node, ntype) (node)->u.base.node_type = (ntype)
|
||||
|
||||
#define NSTR(node) (&((node)->u.str))
|
||||
#define NCCLASS(node) (&((node)->u.cclass))
|
||||
#define NCTYPE(node) (&((node)->u.ctype))
|
||||
#define NBREF(node) (&((node)->u.bref))
|
||||
#define NQTFR(node) (&((node)->u.qtfr))
|
||||
#define NENCLOSE(node) (&((node)->u.enclose))
|
||||
#define NANCHOR(node) (&((node)->u.anchor))
|
||||
#define NCONS(node) (&((node)->u.cons))
|
||||
#define NCALL(node) (&((node)->u.call))
|
||||
#define STR_(node) (&((node)->u.str))
|
||||
#define CCLASS_(node) (&((node)->u.cclass))
|
||||
#define CTYPE_(node) (&((node)->u.ctype))
|
||||
#define BACKREF_(node) (&((node)->u.backref))
|
||||
#define QUANT_(node) (&((node)->u.quant))
|
||||
#define ENCLOSURE_(node) (&((node)->u.enclosure))
|
||||
#define ANCHOR_(node) (&((node)->u.anchor))
|
||||
#define CONS_(node) (&((node)->u.cons))
|
||||
#define CALL_(node) (&((node)->u.call))
|
||||
#define GIMMICK_(node) (&((node)->u.gimmick))
|
||||
|
||||
#define NCAR(node) (NCONS(node)->car)
|
||||
#define NCDR(node) (NCONS(node)->cdr)
|
||||
#define NODE_CAR(node) (CONS_(node)->car)
|
||||
#define NODE_CDR(node) (CONS_(node)->cdr)
|
||||
|
||||
#define CTYPE_ANYCHAR -1
|
||||
#define NODE_IS_ANYCHAR(node) \
|
||||
(NODE_TYPE(node) == NODE_CTYPE && CTYPE_(node)->ctype == CTYPE_ANYCHAR)
|
||||
|
||||
#define CTYPE_OPTION(node, reg) \
|
||||
(NODE_IS_FIXED_OPTION(node) ? CTYPE_(node)->options : reg->options)
|
||||
|
||||
|
||||
|
||||
#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML)
|
||||
#define ANCHOR_ANYCHAR_INF_MASK (ANCHOR_ANYCHAR_INF | ANCHOR_ANYCHAR_INF_ML)
|
||||
#define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
|
||||
|
||||
#define ENCLOSE_MEMORY (1<<0)
|
||||
#define ENCLOSE_OPTION (1<<1)
|
||||
#define ENCLOSE_STOP_BACKTRACK (1<<2)
|
||||
enum EnclosureType {
|
||||
ENCLOSURE_MEMORY = 0,
|
||||
ENCLOSURE_OPTION = 1,
|
||||
ENCLOSURE_STOP_BACKTRACK = 2,
|
||||
ENCLOSURE_IF_ELSE = 3,
|
||||
};
|
||||
|
||||
#define NODE_STR_MARGIN 16
|
||||
#define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
|
||||
#define NODE_STRING_MARGIN 16
|
||||
#define NODE_STRING_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
|
||||
#define NODE_BACKREFS_SIZE 6
|
||||
|
||||
#define NSTR_RAW (1<<0) /* by backslashed number */
|
||||
#define NSTR_AMBIG (1<<1)
|
||||
#define NSTR_DONT_GET_OPT_INFO (1<<2)
|
||||
#define NODE_STRING_RAW (1<<0) /* by backslashed number */
|
||||
#define NODE_STRING_AMBIG (1<<1)
|
||||
#define NODE_STRING_DONT_GET_OPT_INFO (1<<2)
|
||||
|
||||
#define NSTRING_LEN(node) ((int)((node)->u.str.end - (node)->u.str.s))
|
||||
#define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW
|
||||
#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW
|
||||
#define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= NSTR_AMBIG
|
||||
#define NSTRING_SET_DONT_GET_OPT_INFO(node) \
|
||||
(node)->u.str.flag |= NSTR_DONT_GET_OPT_INFO
|
||||
#define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0)
|
||||
#define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & NSTR_AMBIG) != 0)
|
||||
#define NSTRING_IS_DONT_GET_OPT_INFO(node) \
|
||||
(((node)->u.str.flag & NSTR_DONT_GET_OPT_INFO) != 0)
|
||||
#define NODE_STRING_LEN(node) (int )((node)->u.str.end - (node)->u.str.s)
|
||||
#define NODE_STRING_SET_RAW(node) (node)->u.str.flag |= NODE_STRING_RAW
|
||||
#define NODE_STRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NODE_STRING_RAW
|
||||
#define NODE_STRING_SET_AMBIG(node) (node)->u.str.flag |= NODE_STRING_AMBIG
|
||||
#define NODE_STRING_SET_DONT_GET_OPT_INFO(node) \
|
||||
(node)->u.str.flag |= NODE_STRING_DONT_GET_OPT_INFO
|
||||
#define NODE_STRING_IS_RAW(node) \
|
||||
(((node)->u.str.flag & NODE_STRING_RAW) != 0)
|
||||
#define NODE_STRING_IS_AMBIG(node) \
|
||||
(((node)->u.str.flag & NODE_STRING_AMBIG) != 0)
|
||||
#define NODE_STRING_IS_DONT_GET_OPT_INFO(node) \
|
||||
(((node)->u.str.flag & NODE_STRING_DONT_GET_OPT_INFO) != 0)
|
||||
|
||||
#define BACKREFS_P(br) \
|
||||
(IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static);
|
||||
(IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static)
|
||||
|
||||
#define NQ_TARGET_ISNOT_EMPTY 0
|
||||
#define NQ_TARGET_IS_EMPTY 1
|
||||
#define NQ_TARGET_IS_EMPTY_MEM 2
|
||||
#define NQ_TARGET_IS_EMPTY_REC 3
|
||||
enum QuantBodyEmpty {
|
||||
QUANT_BODY_IS_NOT_EMPTY = 0,
|
||||
QUANT_BODY_IS_EMPTY = 1,
|
||||
QUANT_BODY_IS_EMPTY_MEM = 2,
|
||||
QUANT_BODY_IS_EMPTY_REC = 3
|
||||
};
|
||||
|
||||
/* status bits */
|
||||
#define NST_MIN_FIXED (1<<0)
|
||||
#define NST_MAX_FIXED (1<<1)
|
||||
#define NST_CLEN_FIXED (1<<2)
|
||||
#define NST_MARK1 (1<<3)
|
||||
#define NST_MARK2 (1<<4)
|
||||
#define NST_MEM_BACKREFED (1<<5)
|
||||
#define NST_STOP_BT_SIMPLE_REPEAT (1<<6)
|
||||
#define NST_RECURSION (1<<7)
|
||||
#define NST_CALLED (1<<8)
|
||||
#define NST_ADDR_FIXED (1<<9)
|
||||
#define NST_NAMED_GROUP (1<<10)
|
||||
#define NST_NAME_REF (1<<11)
|
||||
#define NST_IN_REPEAT (1<<12) /* STK_REPEAT is nested in stack. */
|
||||
#define NST_NEST_LEVEL (1<<13)
|
||||
#define NST_BY_NUMBER (1<<14) /* {n,m} */
|
||||
/* node status bits */
|
||||
#define NODE_ST_MIN_FIXED (1<<0)
|
||||
#define NODE_ST_MAX_FIXED (1<<1)
|
||||
#define NODE_ST_CLEN_FIXED (1<<2)
|
||||
#define NODE_ST_MARK1 (1<<3)
|
||||
#define NODE_ST_MARK2 (1<<4)
|
||||
#define NODE_ST_STOP_BT_SIMPLE_REPEAT (1<<5)
|
||||
#define NODE_ST_RECURSION (1<<6)
|
||||
#define NODE_ST_CALLED (1<<7)
|
||||
#define NODE_ST_ADDR_FIXED (1<<8)
|
||||
#define NODE_ST_NAMED_GROUP (1<<9)
|
||||
#define NODE_ST_IN_REAL_REPEAT (1<<10) /* STK_REPEAT is nested in stack. */
|
||||
#define NODE_ST_IN_ZERO_REPEAT (1<<11) /* (....){0} */
|
||||
#define NODE_ST_IN_MULTI_ENTRY (1<<12)
|
||||
#define NODE_ST_NEST_LEVEL (1<<13)
|
||||
#define NODE_ST_BY_NUMBER (1<<14) /* {n,m} */
|
||||
#define NODE_ST_BY_NAME (1<<15) /* backref by name */
|
||||
#define NODE_ST_BACKREF (1<<16)
|
||||
#define NODE_ST_CHECKER (1<<17)
|
||||
#define NODE_ST_FIXED_OPTION (1<<18)
|
||||
#define NODE_ST_PROHIBIT_RECURSION (1<<19)
|
||||
#define NODE_ST_SUPER (1<<20)
|
||||
|
||||
#define SET_ENCLOSE_STATUS(node,f) (node)->u.enclose.state |= (f)
|
||||
#define CLEAR_ENCLOSE_STATUS(node,f) (node)->u.enclose.state &= ~(f)
|
||||
|
||||
#define IS_ENCLOSE_CALLED(en) (((en)->state & NST_CALLED) != 0)
|
||||
#define IS_ENCLOSE_ADDR_FIXED(en) (((en)->state & NST_ADDR_FIXED) != 0)
|
||||
#define IS_ENCLOSE_RECURSION(en) (((en)->state & NST_RECURSION) != 0)
|
||||
#define IS_ENCLOSE_MARK1(en) (((en)->state & NST_MARK1) != 0)
|
||||
#define IS_ENCLOSE_MARK2(en) (((en)->state & NST_MARK2) != 0)
|
||||
#define IS_ENCLOSE_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0)
|
||||
#define IS_ENCLOSE_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0)
|
||||
#define IS_ENCLOSE_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0)
|
||||
#define IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(en) \
|
||||
(((en)->state & NST_STOP_BT_SIMPLE_REPEAT) != 0)
|
||||
#define IS_ENCLOSE_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0)
|
||||
#define NODE_STATUS(node) (((Node* )node)->u.base.status)
|
||||
#define NODE_STATUS_ADD(node,f) (NODE_STATUS(node) |= (NODE_ST_ ## f))
|
||||
#define NODE_STATUS_REMOVE(node,f) (NODE_STATUS(node) &= ~(NODE_ST_ ## f))
|
||||
|
||||
#define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION
|
||||
#define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0)
|
||||
#define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0)
|
||||
#define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0)
|
||||
#define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0)
|
||||
#define IS_QUANTIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0)
|
||||
#define IS_QUANTIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0)
|
||||
#define NODE_IS_BY_NUMBER(node) ((NODE_STATUS(node) & NODE_ST_BY_NUMBER) != 0)
|
||||
#define NODE_IS_IN_REAL_REPEAT(node) ((NODE_STATUS(node) & NODE_ST_IN_REAL_REPEAT) != 0)
|
||||
#define NODE_IS_CALLED(node) ((NODE_STATUS(node) & NODE_ST_CALLED) != 0)
|
||||
#define NODE_IS_IN_MULTI_ENTRY(node) ((NODE_STATUS(node) & NODE_ST_IN_MULTI_ENTRY) != 0)
|
||||
#define NODE_IS_RECURSION(node) ((NODE_STATUS(node) & NODE_ST_RECURSION) != 0)
|
||||
#define NODE_IS_IN_ZERO_REPEAT(node) ((NODE_STATUS(node) & NODE_ST_IN_ZERO_REPEAT) != 0)
|
||||
#define NODE_IS_NAMED_GROUP(node) ((NODE_STATUS(node) & NODE_ST_NAMED_GROUP) != 0)
|
||||
#define NODE_IS_ADDR_FIXED(node) ((NODE_STATUS(node) & NODE_ST_ADDR_FIXED) != 0)
|
||||
#define NODE_IS_CLEN_FIXED(node) ((NODE_STATUS(node) & NODE_ST_CLEN_FIXED) != 0)
|
||||
#define NODE_IS_MIN_FIXED(node) ((NODE_STATUS(node) & NODE_ST_MIN_FIXED) != 0)
|
||||
#define NODE_IS_MAX_FIXED(node) ((NODE_STATUS(node) & NODE_ST_MAX_FIXED) != 0)
|
||||
#define NODE_IS_MARK1(node) ((NODE_STATUS(node) & NODE_ST_MARK1) != 0)
|
||||
#define NODE_IS_MARK2(node) ((NODE_STATUS(node) & NODE_ST_MARK2) != 0)
|
||||
#define NODE_IS_NEST_LEVEL(node) ((NODE_STATUS(node) & NODE_ST_NEST_LEVEL) != 0)
|
||||
#define NODE_IS_BY_NAME(node) ((NODE_STATUS(node) & NODE_ST_BY_NAME) != 0)
|
||||
#define NODE_IS_BACKREF(node) ((NODE_STATUS(node) & NODE_ST_BACKREF) != 0)
|
||||
#define NODE_IS_CHECKER(node) ((NODE_STATUS(node) & NODE_ST_CHECKER) != 0)
|
||||
#define NODE_IS_FIXED_OPTION(node) ((NODE_STATUS(node) & NODE_ST_FIXED_OPTION) != 0)
|
||||
#define NODE_IS_SUPER(node) ((NODE_STATUS(node) & NODE_ST_SUPER) != 0)
|
||||
#define NODE_IS_PROHIBIT_RECURSION(node) \
|
||||
((NODE_STATUS(node) & NODE_ST_PROHIBIT_RECURSION) != 0)
|
||||
#define NODE_IS_STOP_BT_SIMPLE_REPEAT(node) \
|
||||
((NODE_STATUS(node) & NODE_ST_STOP_BT_SIMPLE_REPEAT) != 0)
|
||||
|
||||
#define NODE_BODY(node) ((node)->u.base.body)
|
||||
#define NODE_QUANT_BODY(node) ((node)->body)
|
||||
#define NODE_ENCLOSURE_BODY(node) ((node)->body)
|
||||
#define NODE_CALL_BODY(node) ((node)->body)
|
||||
#define NODE_ANCHOR_BODY(node) ((node)->body)
|
||||
|
||||
#define CALLNODE_REFNUM_UNDEF -1
|
||||
|
||||
typedef struct {
|
||||
NodeBase base;
|
||||
NodeType node_type;
|
||||
int status;
|
||||
|
||||
UChar* s;
|
||||
UChar* end;
|
||||
unsigned int flag;
|
||||
int capa; /* (allocated size - 1) or 0: use buf[] */
|
||||
UChar buf[NODE_STR_BUF_SIZE];
|
||||
UChar buf[NODE_STRING_BUF_SIZE];
|
||||
} StrNode;
|
||||
|
||||
typedef struct {
|
||||
NodeBase base;
|
||||
int state;
|
||||
struct _Node* target;
|
||||
NodeType node_type;
|
||||
int status;
|
||||
|
||||
unsigned int flags;
|
||||
BitSet bs;
|
||||
BBuf* mbuf; /* multi-byte info or NULL */
|
||||
} CClassNode;
|
||||
|
||||
typedef struct {
|
||||
NodeType node_type;
|
||||
int status;
|
||||
struct _Node* body;
|
||||
|
||||
int lower;
|
||||
int upper;
|
||||
int greedy;
|
||||
int target_empty_info;
|
||||
enum QuantBodyEmpty body_empty_info;
|
||||
struct _Node* head_exact;
|
||||
struct _Node* next_head_exact;
|
||||
int is_refered; /* include called node. don't eliminate even if {0} */
|
||||
#ifdef USE_COMBINATION_EXPLOSION_CHECK
|
||||
int comb_exp_check_num; /* 1,2,3...: check, 0: no check */
|
||||
#endif
|
||||
} QtfrNode;
|
||||
} QuantNode;
|
||||
|
||||
typedef struct {
|
||||
NodeBase base;
|
||||
int state;
|
||||
int type;
|
||||
int regnum;
|
||||
OnigOptionType option;
|
||||
struct _Node* target;
|
||||
AbsAddrType call_addr;
|
||||
/* for multiple call reference */
|
||||
OnigDistance min_len; /* min length (byte) */
|
||||
OnigDistance max_len; /* max length (byte) */
|
||||
int char_len; /* character length */
|
||||
int opt_count; /* referenced count in optimize_node_left() */
|
||||
} EncloseNode;
|
||||
NodeType node_type;
|
||||
int status;
|
||||
struct _Node* body;
|
||||
|
||||
#ifdef USE_SUBEXP_CALL
|
||||
enum EnclosureType type;
|
||||
union {
|
||||
struct {
|
||||
int regnum;
|
||||
AbsAddrType called_addr;
|
||||
int entry_count;
|
||||
int called_state;
|
||||
} m;
|
||||
struct {
|
||||
OnigOptionType options;
|
||||
} o;
|
||||
struct {
|
||||
/* body is condition */
|
||||
struct _Node* Then;
|
||||
struct _Node* Else;
|
||||
} te;
|
||||
};
|
||||
/* for multiple call reference */
|
||||
OnigLen min_len; /* min length (byte) */
|
||||
OnigLen max_len; /* max length (byte) */
|
||||
int char_len; /* character length */
|
||||
int opt_count; /* referenced count in optimize_nodes() */
|
||||
} EnclosureNode;
|
||||
|
||||
#ifdef USE_CALL
|
||||
|
||||
typedef struct {
|
||||
int offset;
|
||||
|
@ -213,101 +278,140 @@ typedef struct {
|
|||
} UnsetAddrList;
|
||||
|
||||
typedef struct {
|
||||
NodeBase base;
|
||||
int state;
|
||||
NodeType node_type;
|
||||
int status;
|
||||
struct _Node* body; /* to EnclosureNode : ENCLOSURE_MEMORY */
|
||||
|
||||
int by_number;
|
||||
int group_num;
|
||||
UChar* name;
|
||||
UChar* name_end;
|
||||
struct _Node* target; /* EncloseNode : ENCLOSE_MEMORY */
|
||||
UnsetAddrList* unset_addr_list;
|
||||
int entry_count;
|
||||
} CallNode;
|
||||
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
NodeBase base;
|
||||
int state;
|
||||
NodeType node_type;
|
||||
int status;
|
||||
|
||||
int back_num;
|
||||
int back_static[NODE_BACKREFS_SIZE];
|
||||
int* back_dynamic;
|
||||
int nest_level;
|
||||
} BRefNode;
|
||||
} BackRefNode;
|
||||
|
||||
typedef struct {
|
||||
NodeBase base;
|
||||
NodeType node_type;
|
||||
int status;
|
||||
struct _Node* body;
|
||||
|
||||
int type;
|
||||
struct _Node* target;
|
||||
int char_len;
|
||||
int ascii_mode;
|
||||
} AnchorNode;
|
||||
|
||||
typedef struct {
|
||||
NodeBase base;
|
||||
NodeType node_type;
|
||||
int status;
|
||||
|
||||
struct _Node* car;
|
||||
struct _Node* cdr;
|
||||
} ConsAltNode;
|
||||
|
||||
typedef struct {
|
||||
NodeBase base;
|
||||
NodeType node_type;
|
||||
int status;
|
||||
|
||||
int ctype;
|
||||
int not;
|
||||
OnigOptionType options;
|
||||
int ascii_mode;
|
||||
} CtypeNode;
|
||||
|
||||
typedef struct {
|
||||
NodeType node_type;
|
||||
int status;
|
||||
|
||||
enum GimmickType type;
|
||||
int detail_type;
|
||||
int num;
|
||||
int id;
|
||||
} GimmickNode;
|
||||
|
||||
typedef struct _Node {
|
||||
union {
|
||||
NodeBase base;
|
||||
struct {
|
||||
NodeType node_type;
|
||||
int status;
|
||||
struct _Node* body;
|
||||
} base;
|
||||
|
||||
StrNode str;
|
||||
CClassNode cclass;
|
||||
QtfrNode qtfr;
|
||||
EncloseNode enclose;
|
||||
BRefNode bref;
|
||||
QuantNode quant;
|
||||
EnclosureNode enclosure;
|
||||
BackRefNode backref;
|
||||
AnchorNode anchor;
|
||||
ConsAltNode cons;
|
||||
CtypeNode ctype;
|
||||
#ifdef USE_SUBEXP_CALL
|
||||
#ifdef USE_CALL
|
||||
CallNode call;
|
||||
#endif
|
||||
GimmickNode gimmick;
|
||||
} u;
|
||||
} Node;
|
||||
|
||||
|
||||
#define NULL_NODE ((Node* )0)
|
||||
|
||||
#define SCANENV_MEMNODES_SIZE 8
|
||||
#define SCANENV_MEM_NODES(senv) \
|
||||
(IS_NOT_NULL((senv)->mem_nodes_dynamic) ? \
|
||||
(senv)->mem_nodes_dynamic : (senv)->mem_nodes_static)
|
||||
#define SCANENV_MEMENV_SIZE 8
|
||||
#define SCANENV_MEMENV(senv) \
|
||||
(IS_NOT_NULL((senv)->mem_env_dynamic) ? \
|
||||
(senv)->mem_env_dynamic : (senv)->mem_env_static)
|
||||
|
||||
typedef struct {
|
||||
OnigOptionType option;
|
||||
Node* node;
|
||||
#if 0
|
||||
int in;
|
||||
int recursion;
|
||||
#endif
|
||||
} MemEnv;
|
||||
|
||||
typedef struct {
|
||||
enum SaveType type;
|
||||
} SaveItem;
|
||||
|
||||
typedef struct {
|
||||
OnigOptionType options;
|
||||
OnigCaseFoldType case_fold_flag;
|
||||
OnigEncoding enc;
|
||||
OnigSyntaxType* syntax;
|
||||
BitStatusType capture_history;
|
||||
BitStatusType bt_mem_start;
|
||||
BitStatusType bt_mem_end;
|
||||
BitStatusType backrefed_mem;
|
||||
MemStatusType capture_history;
|
||||
MemStatusType bt_mem_start;
|
||||
MemStatusType bt_mem_end;
|
||||
MemStatusType backrefed_mem;
|
||||
UChar* pattern;
|
||||
UChar* pattern_end;
|
||||
UChar* error;
|
||||
UChar* error_end;
|
||||
regex_t* reg; /* for reg->names only */
|
||||
int num_call;
|
||||
#ifdef USE_SUBEXP_CALL
|
||||
#ifdef USE_CALL
|
||||
UnsetAddrList* unset_addr_list;
|
||||
int has_call_zero;
|
||||
#endif
|
||||
int num_mem;
|
||||
#ifdef USE_NAMED_GROUP
|
||||
int num_named;
|
||||
#endif
|
||||
int mem_alloc;
|
||||
Node* mem_nodes_static[SCANENV_MEMNODES_SIZE];
|
||||
Node** mem_nodes_dynamic;
|
||||
#ifdef USE_COMBINATION_EXPLOSION_CHECK
|
||||
int num_comb_exp_check;
|
||||
int comb_exp_max_regnum;
|
||||
int curr_max_regnum;
|
||||
int has_recursion;
|
||||
#endif
|
||||
MemEnv mem_env_static[SCANENV_MEMENV_SIZE];
|
||||
MemEnv* mem_env_dynamic;
|
||||
unsigned int parse_depth;
|
||||
|
||||
int keep_num;
|
||||
int save_num;
|
||||
int save_alloc_num;
|
||||
SaveItem* saves;
|
||||
} ScanEnv;
|
||||
|
||||
|
||||
|
@ -315,13 +419,11 @@ typedef struct {
|
|||
#define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0)
|
||||
#define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0)
|
||||
|
||||
#ifdef USE_NAMED_GROUP
|
||||
typedef struct {
|
||||
int new_val;
|
||||
} GroupNumRemap;
|
||||
|
||||
extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map));
|
||||
#endif
|
||||
|
||||
extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n));
|
||||
extern void onig_strcpy P_((UChar* dest, const UChar* src, const UChar* end));
|
||||
|
@ -332,22 +434,25 @@ extern void onig_node_conv_to_str_node P_((Node* node, int raw));
|
|||
extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));
|
||||
extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end));
|
||||
extern void onig_node_free P_((Node* node));
|
||||
extern Node* onig_node_new_enclose P_((int type));
|
||||
extern Node* onig_node_new_anchor P_((int type));
|
||||
extern Node* onig_node_new_enclosure P_((int type));
|
||||
extern Node* onig_node_new_anchor P_((int type, int ascii_mode));
|
||||
extern Node* onig_node_new_str P_((const UChar* s, const UChar* end));
|
||||
extern Node* onig_node_new_list P_((Node* left, Node* right));
|
||||
extern Node* onig_node_list_add P_((Node* list, Node* x));
|
||||
extern Node* onig_node_new_alt P_((Node* left, Node* right));
|
||||
extern void onig_node_str_clear P_((Node* node));
|
||||
extern int onig_free_node_list P_((void));
|
||||
extern int onig_names_free P_((regex_t* reg));
|
||||
extern int onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
|
||||
extern int onig_parse_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
|
||||
extern int onig_free_shared_cclass_table P_((void));
|
||||
extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
|
||||
extern OnigLen onig_get_tiny_min_len(Node* node, unsigned int inhibit_node_types, int* invalid_node);
|
||||
|
||||
#ifdef USE_CALLOUT
|
||||
extern int onig_global_callout_names_free(void);
|
||||
#endif
|
||||
|
||||
#ifdef ONIG_DEBUG
|
||||
#ifdef USE_NAMED_GROUP
|
||||
extern int onig_print_names(FILE*, regex_t*);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif /* REGPARSE_H */
|
||||
|
|
|
@ -2,11 +2,9 @@
|
|||
regposerr.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* (C) Copyright 2015 Hewlett Packard Enterprise Development LP<BR>
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
|
@ -29,7 +27,14 @@
|
|||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*#include "config.h"*/
|
||||
/* Can't include regint.h etc.. for conflict of regex_t.
|
||||
Define ONIGURUMA_EXPORT here for onigposix.h.
|
||||
*/
|
||||
#ifndef ONIGURUMA_EXPORT
|
||||
#define ONIGURUMA_EXPORT
|
||||
#endif
|
||||
|
||||
//#include "config.h"
|
||||
#include "onigposix.h"
|
||||
|
||||
#if 0
|
||||
|
@ -46,6 +51,14 @@
|
|||
# define ARG_UNUSED
|
||||
#endif
|
||||
|
||||
#if defined(_WIN32) && !defined(__GNUC__)
|
||||
#define xsnprintf sprintf_s
|
||||
#define xstrncpy(dest,src,size) strncpy_s(dest,size,src,_TRUNCATE)
|
||||
#else
|
||||
#define xsnprintf snprintf
|
||||
#define xstrncpy strncpy
|
||||
#endif
|
||||
|
||||
static char* ESTRING[] = {
|
||||
NULL,
|
||||
"failed to match", /* REG_NOMATCH */
|
||||
|
@ -65,8 +78,7 @@ static char* ESTRING[] = {
|
|||
/* Extended errors */
|
||||
"internal error", /* REG_EONIG_INTERNAL */
|
||||
"invalid wide char value", /* REG_EONIG_BADWC */
|
||||
"invalid argument", /* REG_EONIG_BADARG */
|
||||
"multi-thread error" /* REG_EONIG_THREAD */
|
||||
"invalid argument" /* REG_EONIG_BADARG */
|
||||
};
|
||||
|
||||
//#include <stdio.h>
|
||||
|
|
|
@ -2,11 +2,9 @@
|
|||
regposix.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* (C) Copyright 2015 Hewlett Packard Enterprise Development LP<BR>
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
|
@ -60,8 +58,10 @@ onig2posix_error_code(int code)
|
|||
static const O2PERR o2p[] = {
|
||||
{ ONIG_MISMATCH, REG_NOMATCH },
|
||||
{ ONIG_NO_SUPPORT_CONFIG, REG_EONIG_INTERNAL },
|
||||
{ ONIG_ABORT, REG_EONIG_INTERNAL },
|
||||
{ ONIGERR_MEMORY, REG_ESPACE },
|
||||
{ ONIGERR_MATCH_STACK_LIMIT_OVER, REG_EONIG_INTERNAL },
|
||||
{ ONIGERR_RETRY_LIMIT_IN_MATCH_OVER, REG_EONIG_INTERNAL },
|
||||
{ ONIGERR_TYPE_BUG, REG_EONIG_INTERNAL },
|
||||
{ ONIGERR_PARSER_BUG, REG_EONIG_INTERNAL },
|
||||
{ ONIGERR_STACK_BUG, REG_EONIG_INTERNAL },
|
||||
|
@ -69,6 +69,7 @@ onig2posix_error_code(int code)
|
|||
{ ONIGERR_UNEXPECTED_BYTECODE, REG_EONIG_INTERNAL },
|
||||
{ ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED, REG_EONIG_BADARG },
|
||||
{ ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR, REG_EONIG_BADARG },
|
||||
{ ONIGERR_FAIL_TO_INITIALIZE, REG_EONIG_INTERNAL },
|
||||
{ ONIGERR_INVALID_ARGUMENT, REG_EONIG_BADARG },
|
||||
{ ONIGERR_END_PATTERN_AT_LEFT_BRACE, REG_EBRACE },
|
||||
{ ONIGERR_END_PATTERN_AT_LEFT_BRACKET, REG_EBRACK },
|
||||
|
@ -115,9 +116,17 @@ onig2posix_error_code(int code)
|
|||
{ ONIGERR_NEVER_ENDING_RECURSION, REG_BADPAT },
|
||||
{ ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY, REG_BADPAT },
|
||||
{ ONIGERR_INVALID_CHAR_PROPERTY_NAME, REG_BADPAT },
|
||||
{ ONIGERR_INVALID_IF_ELSE_SYNTAX, REG_BADPAT },
|
||||
{ ONIGERR_INVALID_ABSENT_GROUP_PATTERN, REG_BADPAT },
|
||||
{ ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN, REG_BADPAT },
|
||||
{ ONIGERR_INVALID_CALLOUT_PATTERN, REG_BADPAT },
|
||||
{ ONIGERR_INVALID_CALLOUT_NAME, REG_BADPAT },
|
||||
{ ONIGERR_UNDEFINED_CALLOUT_NAME, REG_BADPAT },
|
||||
{ ONIGERR_INVALID_CALLOUT_BODY, REG_BADPAT },
|
||||
{ ONIGERR_INVALID_CALLOUT_TAG_NAME, REG_BADPAT },
|
||||
{ ONIGERR_INVALID_CALLOUT_ARG, REG_BADPAT },
|
||||
{ ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION, REG_EONIG_BADARG },
|
||||
{ ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT, REG_EONIG_THREAD }
|
||||
|
||||
{ ONIGERR_LIBRARY_IS_NOT_INITIALIZED, REG_EONIG_INTERNAL }
|
||||
};
|
||||
|
||||
int i;
|
||||
|
@ -241,11 +250,10 @@ reg_set_encoding(int mb_code)
|
|||
case REG_POSIX_ENCODING_UTF16_LE:
|
||||
enc = ONIG_ENCODING_UTF16_LE;
|
||||
break;
|
||||
|
||||
default:
|
||||
return;
|
||||
}
|
||||
|
||||
onig_initialize(&enc, 1);
|
||||
|
||||
onigenc_set_default_encoding(enc);
|
||||
}
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
regsyntax.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -168,13 +168,22 @@ OnigSyntaxType OnigSyntaxJava = {
|
|||
OnigSyntaxType OnigSyntaxPerl = {
|
||||
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
|
||||
ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
|
||||
ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
|
||||
ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |
|
||||
ONIG_SYN_OP_ESC_CONTROL_CHARS |
|
||||
ONIG_SYN_OP_ESC_C_CONTROL )
|
||||
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
|
||||
, ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
|
||||
ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
|
||||
ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |
|
||||
ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |
|
||||
ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS |
|
||||
ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME |
|
||||
ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER |
|
||||
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
|
||||
ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT )
|
||||
ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
|
||||
ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |
|
||||
ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |
|
||||
ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT )
|
||||
, SYN_GNU_REGEX_BV
|
||||
, ONIG_OPTION_SINGLELINE
|
||||
,
|
||||
|
@ -192,16 +201,26 @@ OnigSyntaxType OnigSyntaxPerl = {
|
|||
OnigSyntaxType OnigSyntaxPerl_NG = {
|
||||
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
|
||||
ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
|
||||
ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
|
||||
ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |
|
||||
ONIG_SYN_OP_ESC_CONTROL_CHARS |
|
||||
ONIG_SYN_OP_ESC_C_CONTROL )
|
||||
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
|
||||
, ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
|
||||
ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
|
||||
ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |
|
||||
ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |
|
||||
ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS |
|
||||
ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME |
|
||||
ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER |
|
||||
ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
|
||||
ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
|
||||
ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP |
|
||||
ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
|
||||
ONIG_SYN_OP2_ESC_G_SUBEXP_CALL )
|
||||
ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
|
||||
ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |
|
||||
ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |
|
||||
ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT |
|
||||
ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL )
|
||||
, ( SYN_GNU_REGEX_BV |
|
||||
ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
|
||||
ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME )
|
||||
|
@ -223,7 +242,7 @@ extern int
|
|||
onig_set_default_syntax(OnigSyntaxType* syntax)
|
||||
{
|
||||
if (IS_NULL(syntax))
|
||||
syntax = ONIG_SYNTAX_RUBY;
|
||||
syntax = ONIG_SYNTAX_ONIGURUMA;
|
||||
|
||||
OnigDefaultSyntax = syntax;
|
||||
return 0;
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
regversion.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -27,8 +27,8 @@
|
|||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
//#include "config.h"
|
||||
#include "oniguruma.h"
|
||||
#include "regint.h"
|
||||
//#include <stdio.h>
|
||||
|
||||
extern const char*
|
||||
|
@ -36,9 +36,7 @@ onig_version(void)
|
|||
{
|
||||
static char s[12];
|
||||
|
||||
sprintf_s(s,
|
||||
sizeof(s),
|
||||
"%d.%d.%d",
|
||||
sprintf_s(s, sizeof(s), "%d.%d.%d",
|
||||
ONIGURUMA_VERSION_MAJOR,
|
||||
ONIGURUMA_VERSION_MINOR,
|
||||
ONIGURUMA_VERSION_TEENY);
|
||||
|
@ -50,9 +48,8 @@ onig_copyright(void)
|
|||
{
|
||||
static char s[58];
|
||||
|
||||
sprintf_s(s,
|
||||
sizeof(s),
|
||||
"Oniguruma %d.%d.%d : Copyright (C) 2002-2008 K.Kosako",
|
||||
sprintf_s(s, sizeof(s),
|
||||
"Oniguruma %d.%d.%d : Copyright (C) 2002-2018 K.Kosako",
|
||||
ONIGURUMA_VERSION_MAJOR,
|
||||
ONIGURUMA_VERSION_MINOR,
|
||||
ONIGURUMA_VERSION_TEENY);
|
||||
|
|
|
@ -117,8 +117,7 @@ new_size(size)
|
|||
|
||||
for (i = 0, newsize = MINSIZE;
|
||||
i < (int )(sizeof(primes)/sizeof(primes[0]));
|
||||
i++, newsize <<= 1)
|
||||
{
|
||||
i++, newsize <<= 1) {
|
||||
if (newsize > size) return primes[i];
|
||||
}
|
||||
/* Ran out of polynomials */
|
||||
|
@ -131,11 +130,13 @@ static int collision = 0;
|
|||
static int init_st = 0;
|
||||
|
||||
static void
|
||||
stat_col()
|
||||
stat_col(void)
|
||||
{
|
||||
FILE *f = fopen("/tmp/col", "w");
|
||||
fprintf(f, "collision: %d\n", collision);
|
||||
fclose(f);
|
||||
if (f == 0) return ;
|
||||
|
||||
(void) fprintf(f, "collision: %d\n", collision);
|
||||
(void) fclose(f);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -156,11 +157,16 @@ st_init_table_with_size(type, size)
|
|||
size = new_size(size); /* round up to prime number */
|
||||
|
||||
tbl = alloc(st_table);
|
||||
CHECK_NULL_RETURN(tbl);
|
||||
if (tbl == 0) return 0;
|
||||
|
||||
tbl->type = type;
|
||||
tbl->num_entries = 0;
|
||||
tbl->num_bins = size;
|
||||
tbl->bins = (st_table_entry **)Calloc(size, sizeof(st_table_entry*));
|
||||
if (tbl->bins == 0) {
|
||||
free(tbl);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return tbl;
|
||||
}
|
||||
|
@ -259,19 +265,15 @@ st_lookup(table, key, value)
|
|||
}
|
||||
}
|
||||
|
||||
#define ADD_DIRECT(table, key, value, hash_val, bin_pos)\
|
||||
#define ADD_DIRECT(table, key, value, hash_val, bin_pos, ret) \
|
||||
do {\
|
||||
st_table_entry *entry;\
|
||||
if (table->num_entries/(table->num_bins) > ST_DEFAULT_MAX_DENSITY) {\
|
||||
rehash(table);\
|
||||
bin_pos = hash_val % table->num_bins;\
|
||||
}\
|
||||
\
|
||||
entry = alloc(st_table_entry);\
|
||||
if (entry == NULL) {\
|
||||
break;\
|
||||
}\
|
||||
\
|
||||
if (IS_NULL(entry)) return ret;\
|
||||
entry->hash = hash_val;\
|
||||
entry->key = key;\
|
||||
entry->record = value;\
|
||||
|
@ -293,7 +295,7 @@ st_insert(table, key, value)
|
|||
FIND_ENTRY(table, ptr, hash_val, bin_pos);
|
||||
|
||||
if (ptr == 0) {
|
||||
ADD_DIRECT(table, key, value, hash_val, bin_pos);
|
||||
ADD_DIRECT(table, key, value, hash_val, bin_pos, ONIGERR_MEMORY);
|
||||
return 0;
|
||||
}
|
||||
else {
|
||||
|
@ -312,7 +314,7 @@ st_add_direct(table, key, value)
|
|||
|
||||
hash_val = do_hash(key, table);
|
||||
bin_pos = hash_val % table->num_bins;
|
||||
ADD_DIRECT(table, key, value, hash_val, bin_pos);
|
||||
ADD_DIRECT(table, key, value, hash_val, bin_pos,);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -325,7 +327,7 @@ rehash(table)
|
|||
|
||||
new_num_bins = new_size(old_num_bins+1);
|
||||
new_bins = (st_table_entry**)Calloc(new_num_bins, sizeof(st_table_entry*));
|
||||
if (new_bins == NULL) {
|
||||
if (new_bins == 0) {
|
||||
return ;
|
||||
}
|
||||
|
||||
|
|
|
@ -10,9 +10,10 @@
|
|||
# include <windows.h>
|
||||
typedef ULONG_PTR st_data_t;
|
||||
#else
|
||||
typedef unsigned long st_data_t;
|
||||
// typedef unsigned long st_data_t;
|
||||
#endif
|
||||
#define ST_DATA_T_DEFINED
|
||||
typedef UINTN st_data_t;
|
||||
|
||||
typedef struct st_table st_table;
|
||||
|
||||
|
|
|
@ -0,0 +1,873 @@
|
|||
/**********************************************************************
|
||||
unicode.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "regint.h"
|
||||
|
||||
struct PoolPropertyNameCtype {
|
||||
short int name;
|
||||
short int ctype;
|
||||
};
|
||||
|
||||
#define ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code,ctype) \
|
||||
((EncUNICODE_ISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
|
||||
|
||||
static const unsigned short EncUNICODE_ISO_8859_1_CtypeTable[256] = {
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x428c, 0x4289, 0x4288, 0x4288, 0x4288, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
|
||||
0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
|
||||
0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
|
||||
0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
|
||||
0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
|
||||
0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
|
||||
0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
|
||||
0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
|
||||
0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0,
|
||||
0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0,
|
||||
0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
|
||||
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
|
||||
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
|
||||
};
|
||||
|
||||
#include "st.h"
|
||||
|
||||
#include "unicode_fold_data.c"
|
||||
|
||||
extern int
|
||||
onigenc_unicode_mbc_case_fold(OnigEncoding enc,
|
||||
OnigCaseFoldType flag ARG_UNUSED, const UChar** pp, const UChar* end,
|
||||
UChar* fold)
|
||||
{
|
||||
const struct ByUnfoldKey* buk;
|
||||
|
||||
OnigCodePoint code;
|
||||
int i, len, rlen;
|
||||
const UChar *p = *pp;
|
||||
|
||||
code = ONIGENC_MBC_TO_CODE(enc, p, end);
|
||||
len = enclen(enc, p);
|
||||
*pp += len;
|
||||
|
||||
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
|
||||
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
|
||||
if (code == 0x0130) {
|
||||
return ONIGENC_CODE_TO_MBC(enc, 0x0069, fold);
|
||||
}
|
||||
#if 0
|
||||
if (code == 0x0049) {
|
||||
return ONIGENC_CODE_TO_MBC(enc, 0x0131, fold);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
buk = onigenc_unicode_unfold_key(code);
|
||||
if (buk != 0) {
|
||||
if (buk->fold_len == 1) {
|
||||
return ONIGENC_CODE_TO_MBC(enc, *FOLDS1_FOLD(buk->index), fold);
|
||||
}
|
||||
else {
|
||||
OnigCodePoint* addr;
|
||||
|
||||
FOLDS_FOLD_ADDR_BUK(buk, addr);
|
||||
rlen = 0;
|
||||
for (i = 0; i < buk->fold_len; i++) {
|
||||
OnigCodePoint c = addr[i];
|
||||
len = ONIGENC_CODE_TO_MBC(enc, c, fold);
|
||||
fold += len;
|
||||
rlen += len;
|
||||
}
|
||||
return rlen;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
*fold++ = *p++;
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
static int
|
||||
apply_case_fold1(int from, int to, OnigApplyAllCaseFoldFunc f, void* arg)
|
||||
{
|
||||
int i, j, k, n, r;
|
||||
|
||||
for (i = from; i < to; ) {
|
||||
OnigCodePoint fold = *FOLDS1_FOLD(i);
|
||||
n = FOLDS1_UNFOLDS_NUM(i);
|
||||
for (j = 0; j < n; j++) {
|
||||
OnigCodePoint unfold = FOLDS1_UNFOLDS(i)[j];
|
||||
|
||||
r = (*f)(fold, &unfold, 1, arg);
|
||||
if (r != 0) return r;
|
||||
r = (*f)(unfold, &fold, 1, arg);
|
||||
if (r != 0) return r;
|
||||
|
||||
for (k = 0; k < j; k++) {
|
||||
OnigCodePoint unfold2 = FOLDS1_UNFOLDS(i)[k];
|
||||
r = (*f)(unfold, &unfold2, 1, arg);
|
||||
if (r != 0) return r;
|
||||
r = (*f)(unfold2, &unfold, 1, arg);
|
||||
if (r != 0) return r;
|
||||
}
|
||||
}
|
||||
|
||||
i = FOLDS1_NEXT_INDEX(i);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
apply_case_fold2(int from, int to, OnigApplyAllCaseFoldFunc f, void* arg)
|
||||
{
|
||||
int i, j, k, n, r;
|
||||
|
||||
for (i = from; i < to; ) {
|
||||
OnigCodePoint* fold = FOLDS2_FOLD(i);
|
||||
n = FOLDS2_UNFOLDS_NUM(i);
|
||||
for (j = 0; j < n; j++) {
|
||||
OnigCodePoint unfold = FOLDS2_UNFOLDS(i)[j];
|
||||
|
||||
r = (*f)(unfold, fold, 2, arg);
|
||||
if (r != 0) return r;
|
||||
|
||||
for (k = 0; k < j; k++) {
|
||||
OnigCodePoint unfold2 = FOLDS2_UNFOLDS(i)[k];
|
||||
r = (*f)(unfold, &unfold2, 1, arg);
|
||||
if (r != 0) return r;
|
||||
r = (*f)(unfold2, &unfold, 1, arg);
|
||||
if (r != 0) return r;
|
||||
}
|
||||
}
|
||||
|
||||
i = FOLDS2_NEXT_INDEX(i);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
apply_case_fold3(int from, int to, OnigApplyAllCaseFoldFunc f, void* arg)
|
||||
{
|
||||
int i, j, k, n, r;
|
||||
|
||||
for (i = from; i < to; ) {
|
||||
OnigCodePoint* fold = FOLDS3_FOLD(i);
|
||||
n = FOLDS3_UNFOLDS_NUM(i);
|
||||
for (j = 0; j < n; j++) {
|
||||
OnigCodePoint unfold = FOLDS3_UNFOLDS(i)[j];
|
||||
|
||||
r = (*f)(unfold, fold, 3, arg);
|
||||
if (r != 0) return r;
|
||||
|
||||
for (k = 0; k < j; k++) {
|
||||
OnigCodePoint unfold2 = FOLDS3_UNFOLDS(i)[k];
|
||||
r = (*f)(unfold, &unfold2, 1, arg);
|
||||
if (r != 0) return r;
|
||||
r = (*f)(unfold2, &unfold, 1, arg);
|
||||
if (r != 0) return r;
|
||||
}
|
||||
}
|
||||
|
||||
i = FOLDS3_NEXT_INDEX(i);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag,
|
||||
OnigApplyAllCaseFoldFunc f, void* arg)
|
||||
{
|
||||
int r;
|
||||
|
||||
r = apply_case_fold1(0, FOLDS1_NORMAL_END_INDEX, f, arg);
|
||||
if (r != 0) return r;
|
||||
|
||||
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
|
||||
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
|
||||
code = 0x0131;
|
||||
r = (*f)(0x0049, &code, 1, arg);
|
||||
if (r != 0) return r;
|
||||
code = 0x0049;
|
||||
r = (*f)(0x0131, &code, 1, arg);
|
||||
if (r != 0) return r;
|
||||
|
||||
code = 0x0130;
|
||||
r = (*f)(0x0069, &code, 1, arg);
|
||||
if (r != 0) return r;
|
||||
code = 0x0069;
|
||||
r = (*f)(0x0130, &code, 1, arg);
|
||||
if (r != 0) return r;
|
||||
}
|
||||
else {
|
||||
#endif
|
||||
r = apply_case_fold1(FOLDS1_NORMAL_END_INDEX, FOLDS1_END_INDEX, f, arg);
|
||||
if (r != 0) return r;
|
||||
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
|
||||
}
|
||||
#endif
|
||||
|
||||
if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) == 0)
|
||||
return 0;
|
||||
|
||||
r = apply_case_fold2(0, FOLDS2_NORMAL_END_INDEX, f, arg);
|
||||
if (r != 0) return r;
|
||||
|
||||
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
|
||||
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) == 0) {
|
||||
#endif
|
||||
r = apply_case_fold2(FOLDS2_NORMAL_END_INDEX, FOLDS2_END_INDEX, f, arg);
|
||||
if (r != 0) return r;
|
||||
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
|
||||
}
|
||||
#endif
|
||||
|
||||
r = apply_case_fold3(0, FOLDS3_NORMAL_END_INDEX, f, arg);
|
||||
if (r != 0) return r;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
|
||||
OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end,
|
||||
OnigCaseFoldCodeItem items[])
|
||||
{
|
||||
int n, m, i, j, k, len;
|
||||
OnigCodePoint code, codes[3];
|
||||
const struct ByUnfoldKey* buk;
|
||||
|
||||
n = 0;
|
||||
|
||||
code = ONIGENC_MBC_TO_CODE(enc, p, end);
|
||||
len = enclen(enc, p);
|
||||
|
||||
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
|
||||
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
|
||||
if (code == 0x0049) {
|
||||
items[0].byte_len = len;
|
||||
items[0].code_len = 1;
|
||||
items[0].code[0] = 0x0131;
|
||||
return 1;
|
||||
}
|
||||
else if (code == 0x0130) {
|
||||
items[0].byte_len = len;
|
||||
items[0].code_len = 1;
|
||||
items[0].code[0] = 0x0069;
|
||||
return 1;
|
||||
}
|
||||
else if (code == 0x0131) {
|
||||
items[0].byte_len = len;
|
||||
items[0].code_len = 1;
|
||||
items[0].code[0] = 0x0049;
|
||||
return 1;
|
||||
}
|
||||
else if (code == 0x0069) {
|
||||
items[0].byte_len = len;
|
||||
items[0].code_len = 1;
|
||||
items[0].code[0] = 0x0130;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
buk = onigenc_unicode_unfold_key(code);
|
||||
if (buk != 0) {
|
||||
if (buk->fold_len == 1) {
|
||||
int un;
|
||||
items[0].byte_len = len;
|
||||
items[0].code_len = 1;
|
||||
items[0].code[0] = *FOLDS1_FOLD(buk->index);
|
||||
n++;
|
||||
|
||||
un = FOLDS1_UNFOLDS_NUM(buk->index);
|
||||
for (i = 0; i < un; i++) {
|
||||
OnigCodePoint unfold = FOLDS1_UNFOLDS(buk->index)[i];
|
||||
if (unfold != code) {
|
||||
items[n].byte_len = len;
|
||||
items[n].code_len = 1;
|
||||
items[n].code[0] = unfold;
|
||||
n++;
|
||||
}
|
||||
}
|
||||
code = items[0].code[0]; /* for multi-code to unfold search. */
|
||||
}
|
||||
else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
|
||||
OnigCodePoint cs[3][4];
|
||||
int fn, ncs[3];
|
||||
|
||||
if (buk->fold_len == 2) {
|
||||
m = FOLDS2_UNFOLDS_NUM(buk->index);
|
||||
for (i = 0; i < m; i++) {
|
||||
OnigCodePoint unfold = FOLDS2_UNFOLDS(buk->index)[i];
|
||||
if (unfold == code) continue;
|
||||
|
||||
items[n].byte_len = len;
|
||||
items[n].code_len = 1;
|
||||
items[n].code[0] = unfold;
|
||||
n++;
|
||||
}
|
||||
|
||||
for (fn = 0; fn < 2; fn++) {
|
||||
int index;
|
||||
cs[fn][0] = FOLDS2_FOLD(buk->index)[fn];
|
||||
index = onigenc_unicode_fold1_key(&cs[fn][0]);
|
||||
if (index >= 0) {
|
||||
int m = FOLDS1_UNFOLDS_NUM(index);
|
||||
for (i = 0; i < m; i++) {
|
||||
cs[fn][i+1] = FOLDS1_UNFOLDS(index)[i];
|
||||
}
|
||||
ncs[fn] = m + 1;
|
||||
}
|
||||
else
|
||||
ncs[fn] = 1;
|
||||
}
|
||||
|
||||
for (i = 0; i < ncs[0]; i++) {
|
||||
for (j = 0; j < ncs[1]; j++) {
|
||||
items[n].byte_len = len;
|
||||
items[n].code_len = 2;
|
||||
items[n].code[0] = cs[0][i];
|
||||
items[n].code[1] = cs[1][j];
|
||||
n++;
|
||||
}
|
||||
}
|
||||
}
|
||||
else { /* fold_len == 3 */
|
||||
m = FOLDS3_UNFOLDS_NUM(buk->index);
|
||||
for (i = 0; i < m; i++) {
|
||||
OnigCodePoint unfold = FOLDS3_UNFOLDS(buk->index)[i];
|
||||
if (unfold == code) continue;
|
||||
|
||||
items[n].byte_len = len;
|
||||
items[n].code_len = 1;
|
||||
items[n].code[0] = unfold;
|
||||
n++;
|
||||
}
|
||||
|
||||
for (fn = 0; fn < 3; fn++) {
|
||||
int index;
|
||||
cs[fn][0] = FOLDS3_FOLD(buk->index)[fn];
|
||||
index = onigenc_unicode_fold1_key(&cs[fn][0]);
|
||||
if (index >= 0) {
|
||||
int m = FOLDS1_UNFOLDS_NUM(index);
|
||||
for (i = 0; i < m; i++) {
|
||||
cs[fn][i+1] = FOLDS1_UNFOLDS(index)[i];
|
||||
}
|
||||
ncs[fn] = m + 1;
|
||||
}
|
||||
else
|
||||
ncs[fn] = 1;
|
||||
}
|
||||
|
||||
for (i = 0; i < ncs[0]; i++) {
|
||||
for (j = 0; j < ncs[1]; j++) {
|
||||
for (k = 0; k < ncs[2]; k++) {
|
||||
items[n].byte_len = len;
|
||||
items[n].code_len = 3;
|
||||
items[n].code[0] = cs[0][i];
|
||||
items[n].code[1] = cs[1][j];
|
||||
items[n].code[2] = cs[2][k];
|
||||
n++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* multi char folded code is not head of another folded multi char */
|
||||
return n;
|
||||
}
|
||||
}
|
||||
else {
|
||||
int index = onigenc_unicode_fold1_key(&code);
|
||||
if (index >= 0) {
|
||||
int m = FOLDS1_UNFOLDS_NUM(index);
|
||||
for (i = 0; i < m; i++) {
|
||||
items[n].byte_len = len;
|
||||
items[n].code_len = 1;
|
||||
items[n].code[0] = FOLDS1_UNFOLDS(index)[i];
|
||||
n++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) == 0)
|
||||
return n;
|
||||
|
||||
p += len;
|
||||
if (p < end) {
|
||||
int clen;
|
||||
int index;
|
||||
|
||||
codes[0] = code;
|
||||
code = ONIGENC_MBC_TO_CODE(enc, p, end);
|
||||
|
||||
buk = onigenc_unicode_unfold_key(code);
|
||||
if (buk != 0 && buk->fold_len == 1) {
|
||||
codes[1] = *FOLDS1_FOLD(buk->index);
|
||||
}
|
||||
else
|
||||
codes[1] = code;
|
||||
|
||||
clen = enclen(enc, p);
|
||||
len += clen;
|
||||
|
||||
index = onigenc_unicode_fold2_key(codes);
|
||||
if (index >= 0) {
|
||||
m = FOLDS2_UNFOLDS_NUM(index);
|
||||
for (i = 0; i < m; i++) {
|
||||
items[n].byte_len = len;
|
||||
items[n].code_len = 1;
|
||||
items[n].code[0] = FOLDS2_UNFOLDS(index)[i];
|
||||
n++;
|
||||
}
|
||||
}
|
||||
|
||||
p += clen;
|
||||
if (p < end) {
|
||||
code = ONIGENC_MBC_TO_CODE(enc, p, end);
|
||||
buk = onigenc_unicode_unfold_key(code);
|
||||
if (buk != 0 && buk->fold_len == 1) {
|
||||
codes[2] = *FOLDS1_FOLD(buk->index);
|
||||
}
|
||||
else
|
||||
codes[2] = code;
|
||||
|
||||
clen = enclen(enc, p);
|
||||
len += clen;
|
||||
|
||||
index = onigenc_unicode_fold3_key(codes);
|
||||
if (index >= 0) {
|
||||
m = FOLDS3_UNFOLDS_NUM(index);
|
||||
for (i = 0; i < m; i++) {
|
||||
items[n].byte_len = len;
|
||||
items[n].code_len = 1;
|
||||
items[n].code[0] = FOLDS3_UNFOLDS(index)[i];
|
||||
n++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
#ifdef USE_UNICODE_PROPERTIES
|
||||
#include "unicode_property_data.c"
|
||||
#else
|
||||
#include "unicode_property_data_posix.c"
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER
|
||||
|
||||
enum EGCB_BREAK_TYPE {
|
||||
EGCB_NOT_BREAK = 0,
|
||||
EGCB_BREAK = 1,
|
||||
EGCB_BREAK_UNDEF_GB11 = 2,
|
||||
EGCB_BREAK_UNDEF_RI_RI = 3
|
||||
};
|
||||
|
||||
enum EGCB_TYPE {
|
||||
EGCB_Other = 0,
|
||||
EGCB_CR = 1,
|
||||
EGCB_LF = 2,
|
||||
EGCB_Control = 3,
|
||||
EGCB_Extend = 4,
|
||||
EGCB_Prepend = 5,
|
||||
EGCB_Regional_Indicator = 6,
|
||||
EGCB_SpacingMark = 7,
|
||||
EGCB_ZWJ = 8,
|
||||
#if 0
|
||||
/* obsoleted */
|
||||
EGCB_E_Base = 9,
|
||||
EGCB_E_Base_GAZ = 10,
|
||||
EGCB_E_Modifier = 11,
|
||||
EGCB_Glue_After_Zwj = 12,
|
||||
#endif
|
||||
EGCB_L = 13,
|
||||
EGCB_LV = 14,
|
||||
EGCB_LVT = 15,
|
||||
EGCB_T = 16,
|
||||
EGCB_V = 17
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
OnigCodePoint start;
|
||||
OnigCodePoint end;
|
||||
enum EGCB_TYPE type;
|
||||
} EGCB_RANGE_TYPE;
|
||||
|
||||
#include "unicode_egcb_data.c"
|
||||
|
||||
static enum EGCB_TYPE
|
||||
egcb_get_type(OnigCodePoint code)
|
||||
{
|
||||
OnigCodePoint low, high, x;
|
||||
enum EGCB_TYPE type;
|
||||
|
||||
for (low = 0, high = (OnigCodePoint )EGCB_RANGE_NUM; low < high; ) {
|
||||
x = (low + high) >> 1;
|
||||
if (code > EGCB_RANGES[x].end)
|
||||
low = x + 1;
|
||||
else
|
||||
high = x;
|
||||
}
|
||||
|
||||
type = (low < (OnigCodePoint )EGCB_RANGE_NUM &&
|
||||
code >= EGCB_RANGES[low].start) ?
|
||||
EGCB_RANGES[low].type : EGCB_Other;
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
#define IS_CONTROL_CR_LF(code) ((code) <= EGCB_Control && (code) >= EGCB_CR)
|
||||
#define IS_HANGUL(code) ((code) >= EGCB_L)
|
||||
|
||||
/* GB1 and GB2 are outside of this function. */
|
||||
static enum EGCB_BREAK_TYPE
|
||||
unicode_egcb_is_break_2code(OnigCodePoint from_code, OnigCodePoint to_code)
|
||||
{
|
||||
enum EGCB_TYPE from;
|
||||
enum EGCB_TYPE to;
|
||||
|
||||
from = egcb_get_type(from_code);
|
||||
to = egcb_get_type(to_code);
|
||||
|
||||
/* short cut */
|
||||
if (from == 0 && to == 0) goto GB999;
|
||||
|
||||
/* GB3 */
|
||||
if (from == EGCB_CR && to == EGCB_LF) return EGCB_NOT_BREAK;
|
||||
/* GB4 */
|
||||
if (IS_CONTROL_CR_LF(from)) return EGCB_BREAK;
|
||||
/* GB5 */
|
||||
if (IS_CONTROL_CR_LF(to)) return EGCB_BREAK;
|
||||
|
||||
if (IS_HANGUL(from) && IS_HANGUL(to)) {
|
||||
/* GB6 */
|
||||
if (from == EGCB_L && to != EGCB_T) return EGCB_NOT_BREAK;
|
||||
/* GB7 */
|
||||
if ((from == EGCB_LV || from == EGCB_V)
|
||||
&& (to == EGCB_V || to == EGCB_T)) return EGCB_NOT_BREAK;
|
||||
|
||||
/* GB8 */
|
||||
if ((to == EGCB_T) && (from == EGCB_LVT || from == EGCB_T))
|
||||
return EGCB_NOT_BREAK;
|
||||
|
||||
goto GB999;
|
||||
}
|
||||
|
||||
/* GB9 */
|
||||
if (to == EGCB_Extend || to == EGCB_ZWJ) return EGCB_NOT_BREAK;
|
||||
|
||||
/* GB9a */
|
||||
if (to == EGCB_SpacingMark) return EGCB_NOT_BREAK;
|
||||
/* GB9b */
|
||||
if (from == EGCB_Prepend) return EGCB_NOT_BREAK;
|
||||
|
||||
/* GB10 removed */
|
||||
|
||||
/* GB11 */
|
||||
if (from == EGCB_ZWJ) {
|
||||
if (onigenc_unicode_is_code_ctype(to_code, PROP_INDEX_EXTENDEDPICTOGRAPHIC))
|
||||
return EGCB_BREAK_UNDEF_GB11;
|
||||
|
||||
goto GB999;
|
||||
}
|
||||
|
||||
/* GB12, GB13 */
|
||||
if (from == EGCB_Regional_Indicator && to == EGCB_Regional_Indicator) {
|
||||
return EGCB_BREAK_UNDEF_RI_RI;
|
||||
}
|
||||
|
||||
GB999:
|
||||
return EGCB_BREAK;
|
||||
}
|
||||
|
||||
#endif /* USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER */
|
||||
|
||||
extern int
|
||||
onigenc_egcb_is_break_position(OnigEncoding enc, UChar* p, UChar* prev,
|
||||
const UChar* start, const UChar* end)
|
||||
{
|
||||
OnigCodePoint from;
|
||||
OnigCodePoint to;
|
||||
#ifdef USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER
|
||||
enum EGCB_BREAK_TYPE btype;
|
||||
enum EGCB_TYPE type;
|
||||
#endif
|
||||
|
||||
/* GB1 and GB2 */
|
||||
if (p == start) return 1;
|
||||
if (p == end) return 1;
|
||||
|
||||
if (IS_NULL(prev)) {
|
||||
prev = onigenc_get_prev_char_head(enc, start, p);
|
||||
if (IS_NULL(prev)) return 1;
|
||||
}
|
||||
|
||||
from = ONIGENC_MBC_TO_CODE(enc, prev, end);
|
||||
to = ONIGENC_MBC_TO_CODE(enc, p, end);
|
||||
|
||||
#ifdef USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER
|
||||
if (! ONIGENC_IS_UNICODE_ENCODING(enc)) {
|
||||
if (from == 0x000d && to == 0x000a) return 0;
|
||||
else return 1;
|
||||
}
|
||||
|
||||
btype = unicode_egcb_is_break_2code(from, to);
|
||||
switch (btype) {
|
||||
case EGCB_NOT_BREAK:
|
||||
return 0;
|
||||
break;
|
||||
case EGCB_BREAK:
|
||||
return 1;
|
||||
break;
|
||||
|
||||
case EGCB_BREAK_UNDEF_GB11:
|
||||
while ((prev = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {
|
||||
from = ONIGENC_MBC_TO_CODE(enc, prev, end);
|
||||
if (onigenc_unicode_is_code_ctype(from, PROP_INDEX_EXTENDEDPICTOGRAPHIC))
|
||||
return 0;
|
||||
|
||||
type = egcb_get_type(from);
|
||||
if (type != EGCB_Extend)
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case EGCB_BREAK_UNDEF_RI_RI:
|
||||
{
|
||||
int n = 0;
|
||||
while ((prev = onigenc_get_prev_char_head(enc, start, prev)) != NULL) {
|
||||
from = ONIGENC_MBC_TO_CODE(enc, prev, end);
|
||||
type = egcb_get_type(from);
|
||||
if (type != EGCB_Regional_Indicator)
|
||||
break;
|
||||
|
||||
n++;
|
||||
}
|
||||
if ((n % 2) == 0) return 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return 1;
|
||||
|
||||
#else
|
||||
if (from == 0x000d && to == 0x000a) return 0;
|
||||
else return 1;
|
||||
#endif /* USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER */
|
||||
}
|
||||
|
||||
|
||||
#define USER_DEFINED_PROPERTY_MAX_NUM 20
|
||||
|
||||
typedef struct {
|
||||
int ctype;
|
||||
OnigCodePoint* ranges;
|
||||
} UserDefinedPropertyValue;
|
||||
|
||||
static int UserDefinedPropertyNum;
|
||||
static UserDefinedPropertyValue
|
||||
UserDefinedPropertyRanges[USER_DEFINED_PROPERTY_MAX_NUM];
|
||||
static st_table* UserDefinedPropertyTable;
|
||||
|
||||
extern int
|
||||
onig_unicode_define_user_property(const char* name, OnigCodePoint* ranges)
|
||||
{
|
||||
UserDefinedPropertyValue* e;
|
||||
int r;
|
||||
int i;
|
||||
int n;
|
||||
int len;
|
||||
int c;
|
||||
char* s;
|
||||
|
||||
if (UserDefinedPropertyNum >= USER_DEFINED_PROPERTY_MAX_NUM)
|
||||
return ONIGERR_TOO_MANY_USER_DEFINED_OBJECTS;
|
||||
|
||||
len = (int )strlen_s(name,MAX_STRING_SIZE);
|
||||
if (len >= PROPERTY_NAME_MAX_SIZE)
|
||||
return ONIGERR_TOO_LONG_PROPERTY_NAME;
|
||||
|
||||
s = (char* )xmalloc(len + 1);
|
||||
if (s == 0)
|
||||
return ONIGERR_MEMORY;
|
||||
|
||||
n = 0;
|
||||
for (i = 0; i < len; i++) {
|
||||
c = name[i];
|
||||
if (c <= 0 || c >= 0x80) {
|
||||
xfree(s);
|
||||
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
|
||||
}
|
||||
|
||||
if (c != ' ' && c != '-' && c != '_') {
|
||||
s[n] = c;
|
||||
n++;
|
||||
}
|
||||
}
|
||||
s[n] = '\0';
|
||||
|
||||
if (UserDefinedPropertyTable == 0) {
|
||||
UserDefinedPropertyTable = onig_st_init_strend_table_with_size(10);
|
||||
}
|
||||
|
||||
e = UserDefinedPropertyRanges + UserDefinedPropertyNum;
|
||||
e->ctype = CODE_RANGES_NUM + UserDefinedPropertyNum;
|
||||
e->ranges = ranges;
|
||||
r = onig_st_insert_strend(UserDefinedPropertyTable,
|
||||
(const UChar* )s, (const UChar* )s + n,
|
||||
(hash_data_type )((void* )e));
|
||||
if (r < 0) return r;
|
||||
|
||||
UserDefinedPropertyNum++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype)
|
||||
{
|
||||
if (
|
||||
#ifdef USE_UNICODE_PROPERTIES
|
||||
ctype <= ONIGENC_MAX_STD_CTYPE &&
|
||||
#endif
|
||||
code < 256) {
|
||||
return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype);
|
||||
}
|
||||
|
||||
if (ctype >= CODE_RANGES_NUM) {
|
||||
int index = ctype - CODE_RANGES_NUM;
|
||||
if (index < UserDefinedPropertyNum)
|
||||
return onig_is_in_code_range((UChar* )UserDefinedPropertyRanges[index].ranges, code);
|
||||
else
|
||||
return ONIGERR_TYPE_BUG;
|
||||
}
|
||||
|
||||
return onig_is_in_code_range((UChar* )CodeRanges[ctype], code);
|
||||
}
|
||||
|
||||
|
||||
extern int
|
||||
onigenc_unicode_ctype_code_range(OnigCtype ctype, const OnigCodePoint* ranges[])
|
||||
{
|
||||
if (ctype >= CODE_RANGES_NUM) {
|
||||
int index = ctype - CODE_RANGES_NUM;
|
||||
if (index < UserDefinedPropertyNum) {
|
||||
*ranges = UserDefinedPropertyRanges[index].ranges;
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
return ONIGERR_TYPE_BUG;
|
||||
}
|
||||
|
||||
*ranges = CodeRanges[ctype];
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_utf16_32_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out,
|
||||
const OnigCodePoint* ranges[])
|
||||
{
|
||||
*sb_out = 0x00;
|
||||
return onigenc_unicode_ctype_code_range(ctype, ranges);
|
||||
}
|
||||
|
||||
extern int
|
||||
onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* name, UChar* end)
|
||||
{
|
||||
int len;
|
||||
UChar *p;
|
||||
OnigCodePoint code;
|
||||
const struct PoolPropertyNameCtype* pc;
|
||||
char buf[PROPERTY_NAME_MAX_SIZE];
|
||||
|
||||
p = name;
|
||||
len = 0;
|
||||
while (p < end) {
|
||||
code = ONIGENC_MBC_TO_CODE(enc, p, end);
|
||||
if (code >= 0x80)
|
||||
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
|
||||
|
||||
if (code != ' ' && code != '-' && code != '_') {
|
||||
buf[len++] = (char )code;
|
||||
if (len >= PROPERTY_NAME_MAX_SIZE)
|
||||
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
|
||||
}
|
||||
|
||||
p += enclen(enc, p);
|
||||
}
|
||||
|
||||
buf[len] = 0;
|
||||
|
||||
if (UserDefinedPropertyTable != 0) {
|
||||
UserDefinedPropertyValue* e;
|
||||
e = (UserDefinedPropertyValue* )NULL;
|
||||
onig_st_lookup_strend(UserDefinedPropertyTable,
|
||||
(const UChar* )buf, (const UChar* )buf + len,
|
||||
(hash_data_type* )((void* )(&e)));
|
||||
if (e != 0) {
|
||||
return e->ctype;
|
||||
}
|
||||
}
|
||||
|
||||
pc = unicode_lookup_property_name(buf, len);
|
||||
if (pc != 0) {
|
||||
/* fprintf(stderr, "LOOKUP: %s: %d\n", buf, pc->ctype); */
|
||||
#ifndef USE_UNICODE_PROPERTIES
|
||||
if (pc->ctype > ONIGENC_MAX_STD_CTYPE)
|
||||
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
|
||||
#endif
|
||||
|
||||
return (int )pc->ctype;
|
||||
}
|
||||
|
||||
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,229 @@
|
|||
/* This file was converted by gperf_fold_key_conv.py
|
||||
from gperf output file. */
|
||||
/* ANSI-C code produced by gperf version 3.1 */
|
||||
/* Command-line: /usr/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N onigenc_unicode_fold2_key unicode_fold2_key.gperf */
|
||||
/* Computed positions: -k'3,6' */
|
||||
|
||||
|
||||
|
||||
/* This gperf source file was generated by make_unicode_fold_data.py */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2017-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
//#include <string.h>
|
||||
#include "regenc.h"
|
||||
|
||||
#define TOTAL_KEYWORDS 59
|
||||
#define MIN_WORD_LENGTH 6
|
||||
#define MAX_WORD_LENGTH 6
|
||||
#define MIN_HASH_VALUE 0
|
||||
#define MAX_HASH_VALUE 58
|
||||
/* maximum key range = 59, duplicates = 0 */
|
||||
|
||||
#ifdef __GNUC__
|
||||
__inline
|
||||
#else
|
||||
#ifdef __cplusplus
|
||||
inline
|
||||
#endif
|
||||
#endif
|
||||
/*ARGSUSED*/
|
||||
static unsigned int
|
||||
hash(OnigCodePoint codes[])
|
||||
{
|
||||
static const unsigned char asso_values[] =
|
||||
{
|
||||
58, 57, 56, 55, 54, 53, 52, 16, 50, 59,
|
||||
15, 59, 25, 59, 59, 59, 59, 59, 59, 3,
|
||||
59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
|
||||
59, 59, 49, 48, 47, 46, 45, 44, 43, 42,
|
||||
59, 59, 59, 59, 59, 59, 59, 59, 59, 21,
|
||||
59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
|
||||
59, 59, 59, 59, 59, 59, 2, 59, 59, 59,
|
||||
59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
|
||||
59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
|
||||
59, 59, 59, 59, 59, 59, 40, 20, 39, 38,
|
||||
37, 14, 5, 36, 20, 7, 25, 34, 29, 32,
|
||||
16, 59, 31, 59, 59, 2, 1, 59, 25, 15,
|
||||
59, 14, 59, 59, 28, 59, 2, 59, 59, 59,
|
||||
11, 59, 59, 59, 59, 59, 59, 59, 59, 59,
|
||||
59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
|
||||
59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
|
||||
59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
|
||||
59, 59, 24, 59, 22, 59, 59, 11, 59, 59,
|
||||
59, 59, 59, 7, 59, 0, 59, 59, 16, 59,
|
||||
1, 59, 59, 16, 59, 59, 59, 15, 59, 59,
|
||||
59, 6, 59, 59, 59, 59, 0, 59, 59, 59,
|
||||
59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
|
||||
59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
|
||||
59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
|
||||
59, 59, 59, 59, 59, 59, 59, 59, 59, 59,
|
||||
59, 59, 59, 59, 59, 59
|
||||
};
|
||||
return asso_values[(unsigned char)onig_codes_byte_at(codes, 5)] + asso_values[(unsigned char)onig_codes_byte_at(codes, 2)];
|
||||
}
|
||||
|
||||
int
|
||||
onigenc_unicode_fold2_key(OnigCodePoint codes[])
|
||||
{
|
||||
static const short int wordlist[] =
|
||||
{
|
||||
|
||||
101,
|
||||
|
||||
253,
|
||||
|
||||
76,
|
||||
|
||||
29,
|
||||
|
||||
24,
|
||||
|
||||
239,
|
||||
|
||||
96,
|
||||
|
||||
71,
|
||||
|
||||
92,
|
||||
|
||||
67,
|
||||
|
||||
4,
|
||||
|
||||
62,
|
||||
|
||||
8,
|
||||
|
||||
58,
|
||||
|
||||
234,
|
||||
|
||||
109,
|
||||
|
||||
164,
|
||||
|
||||
88,
|
||||
|
||||
84,
|
||||
|
||||
80,
|
||||
|
||||
214,
|
||||
|
||||
0,
|
||||
|
||||
54,
|
||||
|
||||
261,
|
||||
|
||||
50,
|
||||
|
||||
105,
|
||||
|
||||
121,
|
||||
|
||||
125,
|
||||
|
||||
257,
|
||||
|
||||
42,
|
||||
|
||||
38,
|
||||
|
||||
249,
|
||||
|
||||
46,
|
||||
|
||||
117,
|
||||
|
||||
12,
|
||||
|
||||
113,
|
||||
|
||||
244,
|
||||
|
||||
229,
|
||||
|
||||
224,
|
||||
|
||||
219,
|
||||
|
||||
209,
|
||||
|
||||
16,
|
||||
|
||||
204,
|
||||
|
||||
199,
|
||||
|
||||
194,
|
||||
|
||||
189,
|
||||
|
||||
184,
|
||||
|
||||
179,
|
||||
|
||||
174,
|
||||
|
||||
169,
|
||||
|
||||
20,
|
||||
|
||||
34,
|
||||
|
||||
159,
|
||||
|
||||
154,
|
||||
|
||||
149,
|
||||
|
||||
144,
|
||||
|
||||
139,
|
||||
|
||||
134,
|
||||
|
||||
129
|
||||
};
|
||||
|
||||
if (0 == 0)
|
||||
{
|
||||
int key = hash(codes);
|
||||
|
||||
if (key <= MAX_HASH_VALUE)
|
||||
{
|
||||
int index = wordlist[key];
|
||||
|
||||
if (index >= 0 && onig_codes_cmp(codes, OnigUnicodeFolds2 + index, 2) == 0)
|
||||
return index;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,139 @@
|
|||
/* This file was converted by gperf_fold_key_conv.py
|
||||
from gperf output file. */
|
||||
/* ANSI-C code produced by gperf version 3.1 */
|
||||
/* Command-line: /usr/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N onigenc_unicode_fold3_key unicode_fold3_key.gperf */
|
||||
/* Computed positions: -k'3,6,9' */
|
||||
|
||||
|
||||
|
||||
/* This gperf source file was generated by make_unicode_fold_data.py */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2017-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
//#include <string.h>
|
||||
#include "regenc.h"
|
||||
|
||||
#define TOTAL_KEYWORDS 14
|
||||
#define MIN_WORD_LENGTH 9
|
||||
#define MAX_WORD_LENGTH 9
|
||||
#define MIN_HASH_VALUE 0
|
||||
#define MAX_HASH_VALUE 13
|
||||
/* maximum key range = 14, duplicates = 0 */
|
||||
|
||||
#ifdef __GNUC__
|
||||
__inline
|
||||
#else
|
||||
#ifdef __cplusplus
|
||||
inline
|
||||
#endif
|
||||
#endif
|
||||
/*ARGSUSED*/
|
||||
static unsigned int
|
||||
hash(OnigCodePoint codes[])
|
||||
{
|
||||
static const unsigned char asso_values[] =
|
||||
{
|
||||
6, 3, 14, 14, 14, 14, 14, 14, 1, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 0,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 0, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 4, 14, 14, 5, 14, 14, 4, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 10, 14, 14,
|
||||
14, 14, 14, 9, 14, 1, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 0, 14, 14,
|
||||
14, 8, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14
|
||||
};
|
||||
return asso_values[(unsigned char)onig_codes_byte_at(codes, 8)] + asso_values[(unsigned char)onig_codes_byte_at(codes, 5)] + asso_values[(unsigned char)onig_codes_byte_at(codes, 2)];
|
||||
}
|
||||
|
||||
int
|
||||
onigenc_unicode_fold3_key(OnigCodePoint codes[])
|
||||
{
|
||||
static const short int wordlist[] =
|
||||
{
|
||||
|
||||
62,
|
||||
|
||||
47,
|
||||
|
||||
31,
|
||||
|
||||
57,
|
||||
|
||||
41,
|
||||
|
||||
25,
|
||||
|
||||
52,
|
||||
|
||||
36,
|
||||
|
||||
20,
|
||||
|
||||
67,
|
||||
|
||||
15,
|
||||
|
||||
10,
|
||||
|
||||
5,
|
||||
|
||||
0
|
||||
};
|
||||
|
||||
if (0 == 0)
|
||||
{
|
||||
int key = hash(codes);
|
||||
|
||||
if (key <= MAX_HASH_VALUE)
|
||||
{
|
||||
int index = wordlist[key];
|
||||
|
||||
if (index >= 0 && onig_codes_cmp(codes, OnigUnicodeFolds3 + index, 3) == 0)
|
||||
return index;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -2,7 +2,7 @@
|
|||
utf16_le.c - Oniguruma (regular expression library)
|
||||
**********************************************************************/
|
||||
/*-
|
||||
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -26,8 +26,52 @@
|
|||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
#include "regint.h" /* for USE_CALLOUT */
|
||||
|
||||
#include "regenc.h"
|
||||
static int
|
||||
init(void)
|
||||
{
|
||||
#ifdef USE_CALLOUT
|
||||
|
||||
int id;
|
||||
OnigEncoding enc;
|
||||
char* name;
|
||||
unsigned int args[4];
|
||||
OnigValue opts[4];
|
||||
|
||||
enc = ONIG_ENCODING_UTF16_LE;
|
||||
|
||||
name = "F\000A\000I\000L\000\000\000"; BC0_P(name, fail);
|
||||
name = "M\000I\000S\000M\000A\000T\000C\000H\000\000\000"; BC0_P(name, mismatch);
|
||||
|
||||
name = "M\000A\000X\000\000\000";
|
||||
args[0] = ONIG_TYPE_TAG | ONIG_TYPE_LONG;
|
||||
args[1] = ONIG_TYPE_CHAR;
|
||||
opts[0].c = 'X';
|
||||
BC_B_O(name, max, 2, args, 1, opts);
|
||||
|
||||
name = "E\000R\000R\000O\000R\000\000\000";
|
||||
args[0] = ONIG_TYPE_LONG; opts[0].l = ONIG_ABORT;
|
||||
BC_P_O(name, error, 1, args, 1, opts);
|
||||
|
||||
name = "C\000O\000U\000N\000T\000\000\000";
|
||||
args[0] = ONIG_TYPE_CHAR; opts[0].c = '>';
|
||||
BC_B_O(name, count, 1, args, 1, opts);
|
||||
|
||||
name = "T\000O\000T\000A\000L\000_\000C\000O\000U\000N\000T\000\000\000";
|
||||
args[0] = ONIG_TYPE_CHAR; opts[0].c = '>';
|
||||
BC_B_O(name, total_count, 1, args, 1, opts);
|
||||
|
||||
name = "C\000M\000P\000\000\000";
|
||||
args[0] = ONIG_TYPE_TAG | ONIG_TYPE_LONG;
|
||||
args[1] = ONIG_TYPE_STRING;
|
||||
args[2] = ONIG_TYPE_TAG | ONIG_TYPE_LONG;
|
||||
BC_P(name, cmp, 3, args);
|
||||
|
||||
#endif /* USE_CALLOUT */
|
||||
|
||||
return ONIG_NORMAL;
|
||||
}
|
||||
|
||||
static const int EncLen_UTF16[] = {
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
|
@ -60,6 +104,21 @@ utf16le_mbc_enc_len(const UChar* p)
|
|||
return EncLen_UTF16[*(p+1)];
|
||||
}
|
||||
|
||||
static int
|
||||
is_valid_mbc_string(const UChar* p, const UChar* end)
|
||||
{
|
||||
const UChar* end1 = end - 1;
|
||||
|
||||
while (p < end1) {
|
||||
p += utf16le_mbc_enc_len(p);
|
||||
}
|
||||
|
||||
if (p != end)
|
||||
return FALSE;
|
||||
else
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static int
|
||||
utf16le_is_mbc_newline(const UChar* p, const UChar* end)
|
||||
{
|
||||
|
@ -73,6 +132,7 @@ utf16le_is_mbc_newline(const UChar* p, const UChar* end)
|
|||
#endif
|
||||
*p == 0x85) && *(p+1) == 0x00)
|
||||
return 1;
|
||||
|
||||
if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28))
|
||||
return 1;
|
||||
#endif
|
||||
|
@ -109,8 +169,8 @@ utf16le_code_to_mbc(OnigCodePoint code, UChar *buf)
|
|||
plane = (code >> 16) - 1;
|
||||
high = (code & 0xff00) >> 8;
|
||||
|
||||
*p++ = (UChar)(((plane & 0x03) << 6) + (high >> 2));
|
||||
*p++ = (UChar)((plane >> 2) + 0xd8);
|
||||
*p++ = ((plane & 0x03) << 6) + (high >> 2);
|
||||
*p++ = (plane >> 2) + 0xd8;
|
||||
*p++ = (UChar )(code & 0xff);
|
||||
*p = (high & 0x03) + 0xdc;
|
||||
return 4;
|
||||
|
@ -209,8 +269,8 @@ utf16le_get_case_fold_codes_by_str(OnigCaseFoldType flag,
|
|||
OnigEncodingType OnigEncodingUTF16_LE = {
|
||||
utf16le_mbc_enc_len,
|
||||
"UTF-16LE", /* name */
|
||||
4, /* max byte length */
|
||||
2, /* min byte length */
|
||||
4, /* max enc length */
|
||||
2, /* min enc length */
|
||||
utf16le_is_mbc_newline,
|
||||
utf16le_mbc_to_code,
|
||||
utf16le_code_to_mbclen,
|
||||
|
@ -222,5 +282,10 @@ OnigEncodingType OnigEncodingUTF16_LE = {
|
|||
onigenc_unicode_is_code_ctype,
|
||||
onigenc_utf16_32_get_ctype_code_range,
|
||||
utf16le_left_adjust_char_head,
|
||||
onigenc_always_false_is_allowed_reverse_match
|
||||
onigenc_always_false_is_allowed_reverse_match,
|
||||
init,
|
||||
0, /* is_initialized */
|
||||
is_valid_mbc_string,
|
||||
ENC_FLAG_UNICODE,
|
||||
0, 0
|
||||
};
|
|
@ -29,6 +29,7 @@
|
|||
Oniguruma/OnigurumaIntrinsics.c | MSFT
|
||||
|
||||
# Upstream Oniguruma code
|
||||
Oniguruma/onig_init.c
|
||||
Oniguruma/oniguruma.h
|
||||
Oniguruma/regcomp.c
|
||||
Oniguruma/regenc.c
|
||||
|
@ -50,9 +51,13 @@
|
|||
Oniguruma/st.h
|
||||
|
||||
# Supported Character Encodings
|
||||
Oniguruma/enc/ascii.c
|
||||
Oniguruma/enc/unicode.c
|
||||
Oniguruma/enc/utf16_le.c
|
||||
Oniguruma/ascii.c
|
||||
Oniguruma/unicode.c
|
||||
Oniguruma/unicode_fold1_key.c
|
||||
Oniguruma/unicode_fold2_key.c
|
||||
Oniguruma/unicode_fold3_key.c
|
||||
Oniguruma/unicode_unfold_key.c
|
||||
Oniguruma/utf16_le.c
|
||||
|
||||
[Packages]
|
||||
MdePkg/MdePkg.dec
|
||||
|
@ -75,10 +80,7 @@
|
|||
|
||||
[BuildOptions]
|
||||
# Override MSFT build option to remove /Oi and /GL
|
||||
MSFT:DEBUG_*_IA32_CC_FLAGS == /nologo /c /WX /GS- /W4 /Gs32768 /D UNICODE /O1b2 /FIAutoGen.h /EHs-c- /GR- /GF /Gy /Zi /Gm
|
||||
MSFT:RELEASE_*_IA32_CC_FLAGS == /nologo /c /WX /GS- /W4 /Gs32768 /D UNICODE /O1b2 /FIAutoGen.h /EHs-c- /GR- /GF
|
||||
MSFT:DEBUG_*_X64_CC_FLAGS == /nologo /c /WX /GS- /W4 /Gs32768 /D UNICODE /O1b2s /FIAutoGen.h /EHs-c- /GR- /GF /Gy /Zi /Gm /X
|
||||
MSFT:RELEASE_*_X64_CC_FLAGS == /nologo /c /WX /GS- /W4 /Gs32768 /D UNICODE /O1b2s /FIAutoGen.h /EHs-c- /GR- /GF /Gy /X
|
||||
MSFT:*_*_*_CC_FLAGS = /GL-
|
||||
INTEL:*_*_*_CC_FLAGS = /Oi-
|
||||
|
||||
# Oniguruma: potentially uninitialized local variable used
|
||||
|
@ -93,3 +95,17 @@
|
|||
# Oniguruma: 'type cast' : truncation from 'OnigUChar *' to 'unsigned int'
|
||||
MSFT:*_*_*_CC_FLAGS = /wd4305 /wd4306
|
||||
|
||||
# Oniguruma: nameless union declared in regparse.h
|
||||
MSFT:*_*_*_CC_FLAGS = /wd4201
|
||||
|
||||
# Oniguruma: 'type cast' : "int" to "OnigUChar", function pointer to "void *"
|
||||
MSFT:*_*_*_CC_FLAGS = /wd4244 /wd4054
|
||||
|
||||
# Oniguruma: previous local declaration
|
||||
MSFT:*_*_*_CC_FLAGS = /wd4456
|
||||
|
||||
# Oniguruma: signed and unsigned mismatch/cast
|
||||
MSFT:*_*_*_CC_FLAGS = /wd4018 /wd4245 /wd4389
|
||||
|
||||
# Oniguruma: error: variable 'fp' set but not used
|
||||
GCC:*_*_*_CC_FLAGS = -Wno-error=unused-but-set-variable
|
||||
|
|
Loading…
Reference in New Issue