Skip to content

Commit

Permalink
uptex: support IVS upto VS256, new kcatcode latin_ucs(14) (#46,#150,#153
Browse files Browse the repository at this point in the history
)
  • Loading branch information
t-tk committed Apr 6, 2024
1 parent 6f1cf69 commit 92aeca9
Show file tree
Hide file tree
Showing 7 changed files with 139 additions and 38 deletions.
3 changes: 2 additions & 1 deletion source/texk/web2c/euptexdir/euptex.ch1
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,9 @@ if j=1 then
begin cur_chr:=fromBUFF(ustringcast(buffer), limit+1, loc);
cur_tok:=kcat_code(kcatcodekey(cur_chr));
if (multistrlen(ustringcast(buffer), limit+1,loc)>1)and
check_kcat_code(cur_tok) then
check_kcat_code(cur_tok,cur_chr) then
begin if (cur_tok=not_cjk) then cur_tok:=other_kchar;
if (cur_tok=latin_ucs) then cur_tok:=other_token;
cur_tok:=cur_chr+cur_tok*max_cjk_val;
loc:=loc+multistrlen(ustringcast(buffer), limit+1,loc);
end
Expand Down
2 changes: 2 additions & 0 deletions source/texk/web2c/euptexdir/euptex.defines
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@
@define function ischarkanji ();
@define function ismultiprn ();
@define function calcpos ();
@define function ktokentocmd ();
@define function ktokentochr ();
@define function kcatcodekey ();
@define function multilenbuffchar ();
@define function nrestmultichr ();
Expand Down
6 changes: 3 additions & 3 deletions source/texk/web2c/euptexdir/pdfstrcmp-eup-post.ch
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
@x
if (cc=not_cjk) then cc:=other_kchar;
@y
if cat>=kanji then cc:=cat else if (cc=not_cjk) then cc:=other_kchar;
if (cat>=kanji)and(cat<=modifier) then cc:=cat else if (cc=not_cjk) then cc:=other_kchar;
@z
@x
Expand All @@ -17,15 +17,15 @@
else t:=left_brace_token*cat+t;
@y
if (t=" ")and(cat=0) then t:=space_token
else if (cat=0)or(cat>=kanji) then t:=other_token+t
else if (cat=0)or((cat>=kanji)and(cat<=modifier)) then t:=other_token+t
else if cat=active_char then t:= cs_token_flag + active_base + t
else t:=left_brace_token*cat+t;
@z
@x
@d illegal_Ucharcat_wchar_catcode(#)==(#<kanji)or(#>other_kchar)
@y
@d illegal_Ucharcat_wchar_catcode(#)==(#<kanji)or(#>hangul)
@d illegal_Ucharcat_wchar_catcode(#)==(#<kanji)or(#>modifier)
@z
@x
Expand Down
44 changes: 40 additions & 4 deletions source/texk/web2c/uptexdir/kanji.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,36 @@
#include "kanji.h"

#define CS_TOKEN_FLAG 0x1FFFFFFF
#define IVS_CHAR_LIMIT 0x4400000
#define CJK_CHAR_LIMIT 0x1000000
#define UCS_CHAR_LIMIT 0x120000
#define CJK_TOKEN_FLAG 0xFFFFFF
#define CAT_LEFT_BRACE 1
#define CAT_DELIM_NUM 15
#define KCAT_KANJI 16
#define KCAT_MODIFIER 20
#define KCAT_KANJI_IVS 23

/* TOKEN */
boolean check_kanji (integer c)
{
integer c0, c1;

if (c >= CS_TOKEN_FLAG) return false;
else if (!(XXHi(c)>=KCAT_KANJI && XXHi(c)<=KCAT_MODIFIER)) return false;
else return is_char_kanji(c & CJK_TOKEN_FLAG);

c0 = c & CJK_TOKEN_FLAG;
c1 = XXHi(c);
if (c1>=CAT_LEFT_BRACE && c1<=CAT_DELIM_NUM &&
c0 < UCS_CHAR_LIMIT) {
return is_char_kanji(c0);
}
else if (c1>=KCAT_KANJI && c1<=KCAT_MODIFIER) {
return is_char_kanji(c0);
}
else if (c1>=KCAT_KANJI_IVS+1 && c1<=KCAT_KANJI_IVS+4) {
return is_char_kanji(c - KCAT_KANJI_IVS * CJK_CHAR_LIMIT);
}
return false;
}

boolean is_char_ascii(integer c)
Expand All @@ -27,7 +46,7 @@ boolean is_char_ascii(integer c)
boolean is_char_kanji(integer c)
{
if (is_internalUPTEX())
return ((c >= 0)&&(c<CJK_CHAR_LIMIT));
return ((c >= 0)&&(c<IVS_CHAR_LIMIT));
else
return iskanji1(Hi(c)) && iskanji2(Lo(c));
}
Expand Down Expand Up @@ -55,6 +74,22 @@ integer calc_pos(integer c)
return(c1 + c2); /* ret = 0..255 */
}

integer ktoken_to_cmd(integer c)
{
if (c > KCAT_KANJI_IVS * CJK_CHAR_LIMIT)
return KCAT_KANJI;
else
return (c / CJK_CHAR_LIMIT);
}

integer ktoken_to_chr(integer c)
{
if (c > KCAT_KANJI_IVS * CJK_CHAR_LIMIT)
return (c - KCAT_KANJI_IVS * CJK_CHAR_LIMIT);
else
return (c % CJK_CHAR_LIMIT);
}

/* Ref. http://www.unicode.org/Public/UNIDATA/Blocks.txt */
/* # Blocks-15.1.0.txt */
/* # Date: 2023-07-28, 15:47:20 GMT */
Expand Down Expand Up @@ -426,7 +461,8 @@ static long ucs_range[]={
0x400000, /* Standardized Variation Sequence */
0x800000, /* Emoji Keycap Sequence */
0x800080, /* Ideographic Variation Sequence */ /* 0x16C */
CJK_CHAR_LIMIT
CJK_CHAR_LIMIT, /* Ideographic Variation Sequence, VS49..VS256 */
IVS_CHAR_LIMIT
};

#define NUCS_RANGE (sizeof(ucs_range)/sizeof(ucs_range[0]))
Expand Down
4 changes: 4 additions & 0 deletions source/texk/web2c/uptexdir/kanji.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ extern boolean is_char_kanji (integer c);
extern boolean ismultiprn (integer c);
extern integer calc_pos (integer c);
#define calcpos calc_pos
extern integer ktoken_to_cmd (integer c);
extern integer ktoken_to_chr (integer c);
#define ktokentocmd ktoken_to_cmd
#define ktokentochr ktoken_to_chr
extern integer kcatcodekey (integer c);
extern integer multilenbuffchar (integer c);

Expand Down
Loading

0 comments on commit 92aeca9

Please sign in to comment.