diff --git a/src/big5.c b/src/big5.c index 79ae1e34..faff845a 100644 --- a/src/big5.c +++ b/src/big5.c @@ -2,7 +2,7 @@ big5.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2019 K.Kosako + * Copyright (c) 2002-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -58,8 +58,15 @@ static int big5_code_to_mbclen(OnigCodePoint code) { if ((code & (~0xffff)) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE; - if ((code & 0xff00) != 0) return 2; - if (EncLen_BIG5[(int )(code & 0xff)] == 1) return 1; + + if ((code & 0xff00) != 0) { + if (EncLen_BIG5[(int )(code >> 8) & 0xff] == 2) + return 2; + } + else { + if (EncLen_BIG5[(int )(code & 0xff)] == 1) + return 1; + } return ONIGERR_INVALID_CODE_POINT_VALUE; } diff --git a/src/euc_jp.c b/src/euc_jp.c index 640b3e3c..6ddd91c7 100644 --- a/src/euc_jp.c +++ b/src/euc_jp.c @@ -2,7 +2,7 @@ euc_jp.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2019 K.Kosako + * Copyright (c) 2002-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -114,10 +114,20 @@ static int code_to_mbclen(OnigCodePoint code) { if (ONIGENC_IS_CODE_ASCII(code)) return 1; - else if ((code & 0xff0000) != 0) return 3; - else if ((code & 0xff00) != 0) return 2; - else - return ONIGERR_INVALID_CODE_POINT_VALUE; + else if ((code & 0xff0000) != 0) { + if (EncLen_EUCJP[(int )(code >> 16) & 0xff] == 3) + return 3; + } + else if ((code & 0xff00) != 0) { + if (EncLen_EUCJP[(int )(code >> 8) & 0xff] == 2) + return 2; + } + else if (code < 256) { + if (EncLen_EUCJP[(int )(code & 0xff)] == 1) + return 1; + } + + return ONIGERR_INVALID_CODE_POINT_VALUE; } static int diff --git a/src/euc_kr.c b/src/euc_kr.c index 7fa50afb..b0e9fbff 100644 --- a/src/euc_kr.c +++ b/src/euc_kr.c @@ -2,7 +2,7 @@ euc_kr.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2019 K.Kosako + * Copyright (c) 2002-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -58,8 +58,15 @@ static int euckr_code_to_mbclen(OnigCodePoint code) { if ((code & (~0xffff)) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE; - if ((code & 0xff00) != 0) return 2; - if (EncLen_EUCKR[(int )(code & 0xff)] == 1) return 1; + + if ((code & 0xff00) != 0) { + if (EncLen_EUCKR[(int )(code >> 8) & 0xff] == 2) + return 2; + } + else { + if (EncLen_EUCKR[(int )(code & 0xff)] == 1) + return 1; + } return ONIGERR_INVALID_CODE_POINT_VALUE; } diff --git a/src/euc_tw.c b/src/euc_tw.c index 8e72b970..99dc5eca 100644 --- a/src/euc_tw.c +++ b/src/euc_tw.c @@ -2,7 +2,7 @@ euc_tw.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2019 K.Kosako + * Copyright (c) 2002-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -57,15 +57,22 @@ euctw_mbc_enc_len(const UChar* p) static int euctw_code_to_mbclen(OnigCodePoint code) { - if ((code & 0xff000000) != 0) return 4; - else if ((code & 0xff0000) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE; - else if ((code & 0xff00) != 0) return 2; + if ((code & 0xff000000) != 0) { + if (EncLen_EUCTW[(int )(code >> 24) & 0xff] == 4) + return 4; + } + else if ((code & 0xff0000) != 0) + return ONIGERR_INVALID_CODE_POINT_VALUE; + else if ((code & 0xff00) != 0) { + if (EncLen_EUCTW[(int )(code >> 8) & 0xff] == 2) + return 2; + } else { if (EncLen_EUCTW[(int )(code & 0xff)] == 1) return 1; - - return ONIGERR_INVALID_CODE_POINT_VALUE; } + + return ONIGERR_INVALID_CODE_POINT_VALUE; } static int diff --git a/src/gb18030.c b/src/gb18030.c index 1385a7fa..7409d3e0 100644 --- a/src/gb18030.c +++ b/src/gb18030.c @@ -89,15 +89,25 @@ gb18030_mbc_enc_len(const UChar* p) static int gb18030_code_to_mbclen(OnigCodePoint code) { - if ((code & 0xff000000) != 0) return 4; - else if ((code & 0xff0000) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE; - else if ((code & 0xff00) != 0) return 2; + if ((code & 0xff000000) != 0) { + if (GB18030_MAP[(int )(code >> 24) & 0xff] == CM) + if (GB18030_MAP[(int )(code >> 16) & 0xff] == C4) + return 4; + } + else if ((code & 0xff0000) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE; + else if ((code & 0xff00) != 0) { + if (GB18030_MAP[(int )(code >> 8) & 0xff] == CM) { + char c = GB18030_MAP[(int )code & 0xff]; + if (c == CM || c == C2) + return 2; + } + } else { - if (GB18030_MAP[(int )(code & 0xff)] == CM) - return ONIGERR_INVALID_CODE_POINT_VALUE; - - return 1; + if (GB18030_MAP[(int )(code & 0xff)] != CM) + return 1; } + + return ONIGERR_INVALID_CODE_POINT_VALUE; } static int diff --git a/src/regexec.c b/src/regexec.c index 1b6895d6..cd4c1c19 100644 --- a/src/regexec.c +++ b/src/regexec.c @@ -2362,6 +2362,10 @@ static int string_cmp_ic(OnigEncoding enc, int case_fold_flag, p1++; p2++; } + if (s2 >= end2) { + if (s1 < end1) return 0; + else break; + } } *ps2 = s2; diff --git a/src/regparse.c b/src/regparse.c index cc015a76..e815b941 100644 --- a/src/regparse.c +++ b/src/regparse.c @@ -4564,7 +4564,7 @@ fetch_interval(UChar** src, UChar* end, PToken* tok, ScanEnv* env) /* \M-, \C-, \c, or \... */ static int -fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val) +fetch_escaped_value_raw(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val) { int v; OnigCodePoint c; @@ -4583,7 +4583,7 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val) if (PEND) return ONIGERR_END_PATTERN_AT_META; PFETCH_S(c); if (c == MC_ESC(env->syntax)) { - v = fetch_escaped_value(&p, end, env, &c); + v = fetch_escaped_value_raw(&p, end, env, &c); if (v < 0) return v; } c = ((c & 0xff) | 0x80); @@ -4612,7 +4612,7 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val) } else { if (c == MC_ESC(env->syntax)) { - v = fetch_escaped_value(&p, end, env, &c); + v = fetch_escaped_value_raw(&p, end, env, &c); if (v < 0) return v; } c &= 0x9f; @@ -4634,6 +4634,21 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val) return 0; } +static int +fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val) +{ + int r; + int len; + + r = fetch_escaped_value_raw(src, end, env, val); + if (r != 0) return r; + + len = ONIGENC_CODE_TO_MBCLEN(env->enc, *val); + if (len < 0) return len; + + return 0; +} + static int fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env); static OnigCodePoint @@ -5192,7 +5207,7 @@ fetch_token_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env, int state) else { int curr_state; - curr_state = (state == CS_RANGE) ? CPS_EMPTY : CPS_START; + curr_state = (state == CS_RANGE) ? CPS_EMPTY : CPS_START; r = check_code_point_sequence_cc(p, end, tok->base_num, enc, curr_state); if (r < 0) return r; @@ -8419,8 +8434,9 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end, case TK_CODE_POINT: { UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; - len = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf); + len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.code); if (len < 0) return len; + len = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf); #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG *np = node_new_str_crude(buf, buf + len, env->options); #else diff --git a/src/sjis.c b/src/sjis.c index 1fd92d93..10afd9d3 100644 --- a/src/sjis.c +++ b/src/sjis.c @@ -2,7 +2,7 @@ sjis.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2019 K.Kosako + * Copyright (c) 2002-2020 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -113,13 +113,15 @@ static int code_to_mbclen(OnigCodePoint code) { if (code < 256) { - return EncLen_SJIS[(int )code] == 1; + if (EncLen_SJIS[(int )code] == 1) + return 1; } - else if (code <= 0xffff) { - return 2; + else if (code < 0x10000) { + if (EncLen_SJIS[(int )(code >> 8) & 0xff] == 2) + return 2; } - else - return ONIGERR_INVALID_CODE_POINT_VALUE; + + return ONIGERR_INVALID_CODE_POINT_VALUE; } static OnigCodePoint