Skip to content

Commit

Permalink
[utf] More debug. Refs #249
Browse files Browse the repository at this point in the history
  • Loading branch information
rkd77 committed Jul 31, 2023
1 parent 54d712a commit a5694d0
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 2 deletions.
19 changes: 18 additions & 1 deletion src/document/html/renderer.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <stdint.h>
#endif

#include <stdio.h>
/* Our current implementation of combining characters requires
* wcwidth(). Therefore the configure script should have disabled
* CONFIG_COMBINE if wcwidth() doesn't exist. */
Expand Down Expand Up @@ -504,6 +505,8 @@ set_hline(struct html_context *html_context, const char *chars, int charslen,
if (realloc_spaces(part, x + charslen))
return 0;

fprintf(stderr, "set_hline: chars='%s', charslen=%d\n", chars, charslen);

/* U+00AD SOFT HYPHEN characters in HTML documents are
* supposed to be displayed only if the word is broken at that
* point. ELinks currently does not use them, so it should
Expand Down Expand Up @@ -556,6 +559,7 @@ set_hline(struct html_context *html_context, const char *chars, int charslen,
unicode_val_T data;

if (document->buf_length) {
fprintf(stderr, "%s:%d:%s document->buf_length=%d\n", __FILE__, __LINE__, __FUNCTION__, document->buf_length);
/* previous char was broken in the middle */
int length = utf8charlen(document->buf);
unsigned char i;
Expand All @@ -566,7 +570,10 @@ set_hline(struct html_context *html_context, const char *chars, int charslen,
}
document->buf_length = i;
document->buf[i] = '\0';
fprintf(stderr, "%s:%d:%s document->buf_length=%d\n", __FILE__, __LINE__, __FUNCTION__, document->buf_length);
data = utf8_to_unicode(&buf_ptr, buf_ptr + i);
fprintf(stderr, "%s:%d:%s data=%d\n", __FILE__, __LINE__, __FUNCTION__, data);

if (data != UCS_NO_CHAR) {
/* FIXME: If there was invalid
* UTF-8 in the buffer,
Expand All @@ -579,6 +586,7 @@ set_hline(struct html_context *html_context, const char *chars, int charslen,
* each byte may have arrived in
* a separate call. */
document->buf_length = 0;
fprintf(stderr, "%s:%d:%s document->buf_length=%d\n", __FILE__, __LINE__, __FUNCTION__, document->buf_length);
goto good_char;
} else {
/* Still not full char */
Expand All @@ -592,11 +600,15 @@ set_hline(struct html_context *html_context, const char *chars, int charslen,

while (chars < end) {
/* ELinks does not use NBSP_CHAR in UTF-8. */

fprintf(stderr, "%s:%d:%s document->buf_length=%d\n", __FILE__, __LINE__, __FUNCTION__, document->buf_length);
data = utf8_to_unicode((char **)&chars, end);
fprintf(stderr, "%s:%d:%s data=%d\n", __FILE__, __LINE__, __FUNCTION__, data);

if (data == UCS_NO_CHAR) {
part->spaces[x] = 0;
if (charslen == 1) {
fprintf(stderr, "%s:%d:%s charslen=1\n", __FILE__, __LINE__, __FUNCTION__);

/* HR */
unsigned char attr = schar->attr;

Expand All @@ -613,12 +625,14 @@ set_hline(struct html_context *html_context, const char *chars, int charslen,
document->buf[i] = *chars++;
}
document->buf_length = i;
fprintf(stderr, "%s:%d:%s document->buf_length=%d\n", __FILE__, __LINE__, __FUNCTION__, document->buf_length);
break;
}
/* not reached */
}

good_char:
fprintf(stderr, "%s:%d:%s good_char data=%d\n", __FILE__, __LINE__, __FUNCTION__, data);
if (data == UCS_SOFT_HYPHEN)
continue;

Expand Down Expand Up @@ -732,7 +746,10 @@ set_hline(struct html_context *html_context, const char *chars, int charslen,
while (chars < end) {
unicode_val_T data;

fprintf(stderr, "%s:%d:%s charslen=%d\n", __FILE__, __LINE__, __FUNCTION__, charslen);
data = utf8_to_unicode((char **)&chars, end);
fprintf(stderr, "%s:%d:%s data=%d\n", __FILE__, __LINE__, __FUNCTION__, data);

#ifdef CONFIG_COMBINE
if (data == UCS_SOFT_HYPHEN
|| (data != UCS_NO_CHAR && wcwidth((wchar_t)data) == 0))
Expand Down
2 changes: 2 additions & 0 deletions src/intl/charsets.c
Original file line number Diff line number Diff line change
Expand Up @@ -737,6 +737,8 @@ utf8_to_unicode(char **string, const char *end)
unicode_val_T u;
int length;

fprintf(stderr, "utf8_to_unicode: str='%s', end='%s'\n", str, end);

length = utf8char_len_tab[str[0]];

if (str + length > (const unsigned char *)end) {
Expand Down
2 changes: 1 addition & 1 deletion test/chars.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
U+00C0 1À2Á3Â4Ã5Ä6Å7Æ8Ç9È
5Ä6Å7Æ8Ç9È

0 comments on commit a5694d0

Please sign in to comment.