-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathicu_spoof.c
122 lines (102 loc) · 3.22 KB
/
icu_spoof.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
/*
* icu_spoof.c
*
* Part of icu_ext: a PostgreSQL extension to expose functionality from ICU
* (see http://icu-project.org)
*
* By Daniel Vérité, 2018-2023. See LICENSE.md
*/
#include "icu_ext.h"
#include "funcapi.h"
#include "utils/builtins.h"
#include "utils/pg_locale.h"
#include "unicode/uspoof.h"
PG_FUNCTION_INFO_V1(icu_confusable_string_skeleton);
PG_FUNCTION_INFO_V1(icu_spoof_check);
PG_FUNCTION_INFO_V1(icu_confusable_strings_check);
/*
* Get the "skeleton" for an input string.
* Two strings are confusable if their skeletons are identical.
*/
Datum
icu_confusable_string_skeleton(PG_FUNCTION_ARGS)
{
text *txt1 = PG_GETARG_TEXT_PP(0);
int32_t len1 = VARSIZE_ANY_EXHDR(txt1);
UErrorCode status = U_ZERO_ERROR;
USpoofChecker *sc;
int32_t ulen1, ulen_skel, result_len;
UChar *uchar1, *uchar_skel;
char *result;
sc = uspoof_open(&status);
if (!sc)
elog(ERROR, "ICU uspoof_open failed");
ulen1 = icu_to_uchar(&uchar1, text_to_cstring(txt1), len1);
// maximum of equal length sounds like a sane guess for the first try
ulen_skel = ulen1;
uchar_skel = (UChar*) palloc((ulen_skel)*sizeof(UChar));
ulen_skel = uspoof_getSkeleton(sc, 0, uchar1, ulen1, uchar_skel, ulen_skel, &status);
if (U_FAILURE(status) && status == U_BUFFER_OVERFLOW_ERROR) {
// try again with a properly sized buffer
status = U_ZERO_ERROR;
pfree(uchar_skel);
uchar_skel = (UChar*) palloc((ulen_skel)*sizeof(UChar));
ulen_skel = uspoof_getSkeleton(sc, 0, uchar1, ulen1, uchar_skel, ulen_skel, &status);
}
uspoof_close(sc);
if (U_FAILURE(status))
elog(ERROR, "ICU uspoof_getSkeleton failed: %s", u_errorName(status));
result_len = icu_from_uchar(&result, uchar_skel, ulen_skel);
PG_RETURN_TEXT_P(cstring_to_text_with_len(result, result_len));
}
/*
* Check whether the input string is likely to be an attempt at
* confusing a reader.
*/
Datum
icu_spoof_check(PG_FUNCTION_ARGS)
{
text *txt1 = PG_GETARG_TEXT_PP(0);
int32_t len1 = VARSIZE_ANY_EXHDR(txt1);
UErrorCode status = U_ZERO_ERROR;
USpoofChecker *sc;
int32_t bitmask;
int32_t ulen1;
UChar *uchar1;
sc = uspoof_open(&status);
if (!sc)
elog(ERROR, "ICU uspoof_open failed");
ulen1 = icu_to_uchar(&uchar1, text_to_cstring(txt1), len1);
bitmask = uspoof_check(sc, uchar1, ulen1, NULL, &status);
uspoof_close(sc);
if (U_FAILURE(status))
elog(ERROR, "ICU uspoof_areConfusable failed: %s", u_errorName(status));
PG_RETURN_BOOL(bitmask != 0);
}
/*
* Check whether the two input strings are visually confusable with
* each other.
*/
Datum
icu_confusable_strings_check(PG_FUNCTION_ARGS)
{
text *txt1 = PG_GETARG_TEXT_PP(0);
int32_t len1 = VARSIZE_ANY_EXHDR(txt1);
text *txt2 = PG_GETARG_TEXT_PP(1);
int32_t len2 = VARSIZE_ANY_EXHDR(txt2);
int32_t ulen1, ulen2;
UChar *uchar1, *uchar2;
USpoofChecker *sc;
UErrorCode status = U_ZERO_ERROR;
int32_t bitmask;
sc = uspoof_open(&status);
if (!sc)
elog(ERROR, "ICU uspoof_open failed");
ulen1 = icu_to_uchar(&uchar1, text_to_cstring(txt1), len1);
ulen2 = icu_to_uchar(&uchar2, text_to_cstring(txt2), len2);
bitmask = uspoof_areConfusable(sc, uchar1, ulen1, uchar2, ulen2, &status);
uspoof_close(sc);
if (U_FAILURE(status))
elog(ERROR, "ICU uspoof_areConfusable failed: %s", u_errorName(status));
PG_RETURN_BOOL(bitmask != 0);
}