Skip to content

Commit

Permalink
Merge pull request #14 from earl/fix-read-string
Browse files Browse the repository at this point in the history
Fix encoding detection for READ/string
  • Loading branch information
zsx committed Nov 18, 2014
2 parents 93a700d + b5294df commit f48c1ae
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 7 deletions.
2 changes: 1 addition & 1 deletion src/core/p-file.c
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ REBINT Mode_Syms[] = {
if (args & (AM_READ_STRING | AM_READ_LINES)) {
REBSER *nser = Decode_UTF_String(BIN_HEAD(ser), file->actual, -1);
if (nser == NULL) {
nser = ser;
Trap0(RE_BAD_DECODE);
}
Set_String(ds, nser);
if (args & AM_READ_LINES) Set_Block(ds, Split_Lines(ds));
Expand Down
15 changes: 9 additions & 6 deletions src/core/s-unicode.c
Original file line number Diff line number Diff line change
Expand Up @@ -922,7 +922,9 @@ ConversionResult ConvertUTF8toUTF32 (
** Do all the details to decode a string.
** Input is a byte series. Len is len of input.
** The utf is 0, 8, +/-16, +/-32.
** A special -1 means use the BOM.
** A special -1 means use the BOM, if present, or UTF-8 otherwise.
**
** Returns the decoded string or NULL for unsupported encodings.
**
***********************************************************************/
{
Expand All @@ -931,17 +933,17 @@ ConversionResult ConvertUTF8toUTF32 (
REBINT size;

if (utf == -1) {
// Try to detect UTF encoding from a BOM. Returns 0 if no BOM present.
utf = What_UTF(bp, len);
if (utf) {
if (utf != 0) {
if (utf == 8) bp += 3, len -= 3;
else if (utf == -16 || utf == 16) bp += 2, len -= 2;
//else if (utf == -32 || utf == 32) bp += 4, len -= 4;
else return NULL;
} else {
return NULL;
}
}
else if (utf == 0 || utf == 8) {

if (utf == 0 || utf == 8) {
size = Decode_UTF8((REBUNI*)Reset_Buffer(ser, len), bp, len, TRUE);
}
else if (utf == -16 || utf == 16) {
Expand All @@ -951,7 +953,8 @@ ConversionResult ConvertUTF8toUTF32 (
// size = Decode_UTF32((REBUNI*)Reset_Buffer(ser, len/4 + 1), bp, len, utf < 0, TRUE);
// }
else {
return NULL; /* should never be here */
// Encoding is unsupported or not yet implemented.
return NULL;
}

if (size < 0) {
Expand Down

0 comments on commit f48c1ae

Please sign in to comment.