Skip to content

Commit

Permalink
minor tweaks to validate_utf() to avoid truncation of error offset
Browse files Browse the repository at this point in the history
e8cdae3 (Correct an incorrect cast., 2017-04-14) started changing
some of the casts to fit the type of the offset variable, so
complete that, and for consitency add them to the non UTF-8 code.
  • Loading branch information
carenas committed Dec 29, 2022
1 parent 0746b3d commit d60fe62
Showing 1 changed file with 24 additions and 24 deletions.
48 changes: 24 additions & 24 deletions src/pcre2_valid_utf.c
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ for (p = string; length > 0; p++)

if (((d = *(++p)) & 0xc0) != 0x80)
{
*erroroffset = (int)(p - string) - 1;
*erroroffset = (PCRE2_SIZE)(p - string) - 1;
return PCRE2_ERROR_UTF8_ERR6;
}

Expand All @@ -186,7 +186,7 @@ for (p = string; length > 0; p++)

case 1: if ((c & 0x3e) == 0)
{
*erroroffset = (int)(p - string) - 1;
*erroroffset = (PCRE2_SIZE)(p - string) - 1;
return PCRE2_ERROR_UTF8_ERR15;
}
break;
Expand All @@ -198,17 +198,17 @@ for (p = string; length > 0; p++)
case 2:
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
{
*erroroffset = (int)(p - string) - 2;
*erroroffset = (PCRE2_SIZE)(p - string) - 2;
return PCRE2_ERROR_UTF8_ERR7;
}
if (c == 0xe0 && (d & 0x20) == 0)
{
*erroroffset = (int)(p - string) - 2;
*erroroffset = (PCRE2_SIZE)(p - string) - 2;
return PCRE2_ERROR_UTF8_ERR16;
}
if (c == 0xed && d >= 0xa0)
{
*erroroffset = (int)(p - string) - 2;
*erroroffset = (PCRE2_SIZE)(p - string) - 2;
return PCRE2_ERROR_UTF8_ERR14;
}
break;
Expand All @@ -220,22 +220,22 @@ for (p = string; length > 0; p++)
case 3:
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
{
*erroroffset = (int)(p - string) - 2;
*erroroffset = (PCRE2_SIZE)(p - string) - 2;
return PCRE2_ERROR_UTF8_ERR7;
}
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
{
*erroroffset = (int)(p - string) - 3;
*erroroffset = (PCRE2_SIZE)(p - string) - 3;
return PCRE2_ERROR_UTF8_ERR8;
}
if (c == 0xf0 && (d & 0x30) == 0)
{
*erroroffset = (int)(p - string) - 3;
*erroroffset = (PCRE2_SIZE)(p - string) - 3;
return PCRE2_ERROR_UTF8_ERR17;
}
if (c > 0xf4 || (c == 0xf4 && d > 0x8f))
{
*erroroffset = (int)(p - string) - 3;
*erroroffset = (PCRE2_SIZE)(p - string) - 3;
return PCRE2_ERROR_UTF8_ERR13;
}
break;
Expand All @@ -251,22 +251,22 @@ for (p = string; length > 0; p++)
case 4:
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
{
*erroroffset = (int)(p - string) - 2;
*erroroffset = (PCRE2_SIZE)(p - string) - 2;
return PCRE2_ERROR_UTF8_ERR7;
}
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
{
*erroroffset = (int)(p - string) - 3;
*erroroffset = (PCRE2_SIZE)(p - string) - 3;
return PCRE2_ERROR_UTF8_ERR8;
}
if ((*(++p) & 0xc0) != 0x80) /* Fifth byte */
{
*erroroffset = (int)(p - string) - 4;
*erroroffset = (PCRE2_SIZE)(p - string) - 4;
return PCRE2_ERROR_UTF8_ERR9;
}
if (c == 0xf8 && (d & 0x38) == 0)
{
*erroroffset = (int)(p - string) - 4;
*erroroffset = (PCRE2_SIZE)(p - string) - 4;
return PCRE2_ERROR_UTF8_ERR18;
}
break;
Expand All @@ -277,27 +277,27 @@ for (p = string; length > 0; p++)
case 5:
if ((*(++p) & 0xc0) != 0x80) /* Third byte */
{
*erroroffset = (int)(p - string) - 2;
*erroroffset = (PCRE2_SIZE)(p - string) - 2;
return PCRE2_ERROR_UTF8_ERR7;
}
if ((*(++p) & 0xc0) != 0x80) /* Fourth byte */
{
*erroroffset = (int)(p - string) - 3;
*erroroffset = (PCRE2_SIZE)(p - string) - 3;
return PCRE2_ERROR_UTF8_ERR8;
}
if ((*(++p) & 0xc0) != 0x80) /* Fifth byte */
{
*erroroffset = (int)(p - string) - 4;
*erroroffset = (PCRE2_SIZE)(p - string) - 4;
return PCRE2_ERROR_UTF8_ERR9;
}
if ((*(++p) & 0xc0) != 0x80) /* Sixth byte */
{
*erroroffset = (int)(p - string) - 5;
*erroroffset = (PCRE2_SIZE)(p - string) - 5;
return PCRE2_ERROR_UTF8_ERR10;
}
if (c == 0xfc && (d & 0x3c) == 0)
{
*erroroffset = (int)(p - string) - 5;
*erroroffset = (PCRE2_SIZE)(p - string) - 5;
return PCRE2_ERROR_UTF8_ERR19;
}
break;
Expand All @@ -309,7 +309,7 @@ for (p = string; length > 0; p++)

if (ab > 3)
{
*erroroffset = (int)(p - string) - ab;
*erroroffset = (PCRE2_SIZE)(p - string) - ab;
return (ab == 4)? PCRE2_ERROR_UTF8_ERR11 : PCRE2_ERROR_UTF8_ERR12;
}
}
Expand Down Expand Up @@ -340,21 +340,21 @@ for (p = string; length > 0; p++)
/* High surrogate. Must be a followed by a low surrogate. */
if (length == 0)
{
*erroroffset = p - string;
*erroroffset = (PCRE2_SIZE)(p - string);
return PCRE2_ERROR_UTF16_ERR1;
}
p++;
length--;
if ((*p & 0xfc00) != 0xdc00)
{
*erroroffset = p - string - 1;
*erroroffset = (PCRE2_SIZE)(p - string) - 1;
return PCRE2_ERROR_UTF16_ERR2;
}
}
else
{
/* Isolated low surrogate. Always an error. */
*erroroffset = p - string;
*erroroffset = (PCRE2_SIZE)(p - string);
return PCRE2_ERROR_UTF16_ERR3;
}
}
Expand All @@ -379,14 +379,14 @@ for (p = string; length > 0; length--, p++)
/* Normal UTF-32 code point. Neither high nor low surrogate. */
if (c > 0x10ffffu)
{
*erroroffset = p - string;
*erroroffset = (PCRE2_SIZE)(p - string);
return PCRE2_ERROR_UTF32_ERR2;
}
}
else
{
/* A surrogate */
*erroroffset = p - string;
*erroroffset = (PCRE2_SIZE)(p - string);
return PCRE2_ERROR_UTF32_ERR1;
}
}
Expand Down

0 comments on commit d60fe62

Please sign in to comment.