Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix conversion of varchar to binary varbinary and vice versa #1957

Merged
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
982793f
initial commit
tanscorpio7 Oct 26, 2023
745aaeb
fix indent
tanscorpio7 Oct 26, 2023
d9391a2
allow 0x00 when encoding to UTF-8
tanscorpio7 Oct 26, 2023
cd3bbfc
add tests
tanscorpio7 Oct 26, 2023
d2db3fb
correct upgrade tests for babel_function_string
tanscorpio7 Oct 26, 2023
3011a28
update tests
tanscorpio7 Oct 31, 2023
a8ddfae
Merge branch 'BABEL_3_X_DEV' into BABEL_1940
tanscorpio7 Nov 2, 2023
c05919d
use correct func to build typename
tanscorpio7 Nov 2, 2023
7f9da36
empty commit rerun actions
tanscorpio7 Nov 2, 2023
e033ba4
release syscache tuples
tanscorpio7 Nov 3, 2023
e948fd0
empty commit rerun actions post engine changes
tanscorpio7 Nov 3, 2023
cdd4e35
rerun actions with empty commit
tanscorpio7 Nov 3, 2023
62729cf
add few more tests
tanscorpio7 Nov 3, 2023
a193211
rerun actions with empty commit
tanscorpio7 Nov 7, 2023
d2e85f6
Merge remote-tracking branch 'upstream/BABEL_3_X_DEV' into BABEL_1940
tanscorpio7 Nov 7, 2023
d16c126
empty commit
tanscorpio7 Nov 7, 2023
5f4507c
reject null byte
tanscorpio7 Nov 8, 2023
3454376
Merge branch 'BABEL_3_X_DEV' into BABEL_1940
tanscorpio7 Nov 8, 2023
b798d50
empty commit to rerun tests
tanscorpio7 Nov 8, 2023
c0b7cf5
allow trailing null byte in cast to varchar
tanscorpio7 Nov 8, 2023
66d3e7f
remove varchar varbinary cast from geography/geometry cast
tanscorpio7 Nov 9, 2023
cdf2397
push fix
tanscorpio7 Nov 9, 2023
78b5e53
try fix for geometry test failures
tanscorpio7 Nov 9, 2023
c3a3b99
add cast for binary to var binary
tanscorpio7 Nov 9, 2023
3cb99b8
Merge branch 'BABEL_3_X_DEV' into BABEL_1940
tanscorpio7 Nov 9, 2023
0d8bbf6
empty commit
tanscorpio7 Nov 9, 2023
25fda37
fix typmod for binary data type
tanscorpio7 Nov 9, 2023
b768e82
remove typmod fix for now
tanscorpio7 Nov 9, 2023
d944eb0
push correct test out file
tanscorpio7 Nov 9, 2023
f2e7f81
just comments
tanscorpio7 Nov 10, 2023
278c0d5
add some varbinary binary test case
tanscorpio7 Nov 10, 2023
0bbf3c8
some minor refactoring
tanscorpio7 Nov 10, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions contrib/babelfishpg_common/src/encoding/mb/conv.c
Original file line number Diff line number Diff line change
Expand Up @@ -401,10 +401,6 @@ TsqlLocalToUtf(const unsigned char *iso, int len,
unsigned char b3 = 0;
unsigned char b4 = 0;

/* "break" cases all represent errors */
if (*iso == '\0')
break;

tanscorpio7 marked this conversation as resolved.
Show resolved Hide resolved
if (!IS_HIGHBIT_SET(*iso))
{
/* ASCII case is easy, assume it's one-to-one conversion */
Expand Down
43 changes: 31 additions & 12 deletions contrib/babelfishpg_common/src/varbinary.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
#include "access/hash.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_type.h"
#include "collation.h"
#include "common/int.h"
#include "encoding/encoding.h"
#include "lib/hyperloglog.h"
#include "libpq/pqformat.h"
#include "miscadmin.h"
Expand Down Expand Up @@ -621,13 +623,16 @@ Datum
varcharvarbinary(PG_FUNCTION_ARGS)
{
VarChar *source = PG_GETARG_VARCHAR_PP(0);
char *data = VARDATA_ANY(source);
char *data = VARDATA_ANY(source); /* Source string is UTF-8 */
char *encoded_data;
char *rp;
size_t len = VARSIZE_ANY_EXHDR(source);
int32 typmod = PG_GETARG_INT32(1);
bool isExplicit = PG_GETARG_BOOL(2);
int32 maxlen;
bytea *result;
coll_info collInfo;
int encodedByteLen;

if (!isExplicit)
ereport(ERROR,
Expand All @@ -636,20 +641,26 @@ varcharvarbinary(PG_FUNCTION_ARGS)
"varbinary is not allowed. Use the CONVERT function "
"to run this query.")));

/* If typmod is -1 (or invalid), use the actual length */
collInfo = lookup_collation_table(get_server_collation_oid_internal(false));
tanscorpio7 marked this conversation as resolved.
Show resolved Hide resolved
encoded_data = encoding_conv_util(data, len, PG_UTF8, collInfo.enc, &encodedByteLen);

/*
* If typmod is -1 (or invalid), use the actual length
* Length should be checked after encoding into server enc
tanscorpio7 marked this conversation as resolved.
Show resolved Hide resolved
*/
if (typmod < (int32) VARHDRSZ)
maxlen = len;
maxlen = encodedByteLen;
else
maxlen = typmod - VARHDRSZ;

if (len > maxlen)
len = maxlen;
if (encodedByteLen > maxlen)
encodedByteLen = maxlen;

result = (bytea *) palloc(len + VARHDRSZ);
SET_VARSIZE(result, len + VARHDRSZ);
result = (bytea *) palloc(encodedByteLen + VARHDRSZ);
SET_VARSIZE(result, encodedByteLen + VARHDRSZ);

rp = VARDATA(result);
memcpy(rp, data, len);
memcpy(rp, encoded_data, encodedByteLen);

PG_RETURN_BYTEA_P(result);
}
Expand Down Expand Up @@ -697,21 +708,29 @@ Datum
varbinaryvarchar(PG_FUNCTION_ARGS)
{
bytea *source = PG_GETARG_BYTEA_PP(0);
char *data = VARDATA_ANY(source);
char *data = VARDATA_ANY(source); /* Source data is server encoded */
VarChar *result;
char *encoded_result;
size_t len = VARSIZE_ANY_EXHDR(source);
int32 typmod = PG_GETARG_INT32(1);
int32 maxlen = typmod - VARHDRSZ;
VarChar *result;
coll_info collInfo;
int encodedByteLen;

collInfo = lookup_collation_table(get_server_collation_oid_internal(false));
tanscorpio7 marked this conversation as resolved.
Show resolved Hide resolved

/*
* Cast the entire input binary data if maxlen is invalid or supplied data
* fits it
*/
if (maxlen < 0 || len <= maxlen)
result = (VarChar *) cstring_to_text_with_len(data, len);
encoded_result = encoding_conv_util(data, len, collInfo.enc, PG_UTF8, &encodedByteLen);
/* Else truncate it */
else
result = (VarChar *) cstring_to_text_with_len(data, maxlen);
encoded_result = encoding_conv_util(data, maxlen, collInfo.enc, PG_UTF8, &encodedByteLen);

result = (VarChar *) cstring_to_text_with_len(encoded_result, encodedByteLen);

PG_RETURN_VARCHAR_P(result);
}

Expand Down
32 changes: 31 additions & 1 deletion contrib/babelfishpg_tsql/src/pltsql_coerce.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "catalog/pg_type.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_namespace.h"
#include "collation.h"
#include "executor/spi.h"
#include "mb/pg_wchar.h"
#include "nodes/makefuncs.h"
Expand All @@ -32,6 +33,7 @@
#include "utils/lsyscache.h"
#include "utils/syscache.h"
#include "pltsql_instr.h"
#include "parser/parse_target.h"


#include <math.h>
Expand Down Expand Up @@ -972,7 +974,7 @@ tsql_coerce_string_literal_hook(ParseCallbackState *pcbstate, Oid targetTypeId,
if (ccontext != COERCION_EXPLICIT)
{
/*
* T-SQL may forbid casting from string literal to certain
* T-SQL forbids implicit casting from string literal to certain
* datatypes (i.e. binary, varbinary)
*/
if ((*common_utility_plugin_ptr->is_tsql_binary_datatype) (baseTypeId))
Expand Down Expand Up @@ -1100,6 +1102,34 @@ tsql_coerce_string_literal_hook(ParseCallbackState *pcbstate, Oid targetTypeId,
newcon->constvalue = stringTypeDatum(baseType, value, inputTypeMod);
}
}
else if ((*common_utility_plugin_ptr->is_tsql_binary_datatype) (baseTypeId) ||
(*common_utility_plugin_ptr->is_tsql_varbinary_datatype) (baseTypeId))
{
/*
* binary datatype should be passed in client encoding
* when explicit cast is called
*/

TypeName *varcharTypeName = makeTypeName("varchar");
tanscorpio7 marked this conversation as resolved.
Show resolved Hide resolved
Node *result;
Const *tempcon;

typenameTypeIdAndMod(NULL, (const TypeName *)varcharTypeName, &baseTypeId, &baseTypeMod);
baseType = typeidType(baseTypeId);
pfree(varcharTypeName);

tempcon = makeConst(TEXTOID, -1,
tsql_get_server_collation_oid_internal(false),
-1, PointerGetDatum(cstring_to_text(value)),
false, false);
tanscorpio7 marked this conversation as resolved.
Show resolved Hide resolved

result = coerce_to_target_type(NULL, (Node *) tempcon, baseTypeId,
targetTypeId, targetTypeMod,
COERCION_EXPLICIT,
COERCE_EXPLICIT_CAST,
location);
return result;
}
else
{
newcon->constvalue = stringTypeDatum(baseType, value, inputTypeMod);
Expand Down
217 changes: 217 additions & 0 deletions test/JDBC/expected/BABEL_1940.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
SELECT CONVERT(VARCHAR(10), 0x123456789)
GO
~~START~~
varchar
#Eg‰
~~END~~


SELECT CONVERT(VARCHAR(10), 0x80)
GO
~~START~~
varchar
~~END~~


SELECT CONVERT(VARCHAR(10), 0xaaa)
GO
~~START~~
varchar
<newline>ª
~~END~~


SELECT CONVERT(VARCHAR(10), 0x330033)
GO
~~START~~
varchar
3
~~END~~


SELECT CONVERT(VARBINARY(10), 'ウ')
GO
~~START~~
varbinary
3F
~~END~~


SELECT CONVERT(VARBINARY(10), 'パ')
GO
~~START~~
varbinary
3F3F
~~END~~


SELECT CONVERT(VARBINARY(10), 'A')
GO
~~START~~
varbinary
41
~~END~~


SELECT CONVERT(VARBINARY(10), 'ア')
GO
~~START~~
varbinary
3F
~~END~~


SELECT CONVERT(VARBINARY(10), 0x81)
GO
~~START~~
varbinary
81
~~END~~


SELECT CONVERT(VARBINARY(10), 0x330033)
GO
~~START~~
varbinary
330033
~~END~~


DECLARE @key varchar(20) = 'part1'
DECLARE @email varchar(20) = 'part2'
SELECT CONVERT(VARCHAR(10), HASHBYTES('SHA1', @key + LOWER(@email)))
GO
~~START~~
varchar
æ/fact¢+Ó
~~END~~



CREATE TABLE babel_1940_t1 (a VARBINARY(9))
GO

INSERT INTO babel_1940_t1 VALUES(0x80)
INSERT INTO babel_1940_t1 VALUES(0xaaa)
INSERT INTO babel_1940_t1 VALUES(0x123456789)
GO
~~ROW COUNT: 1~~

~~ROW COUNT: 1~~

~~ROW COUNT: 1~~


SELECT * FROM babel_1940_t1
GO
~~START~~
varbinary
80
0AAA
0123456789
~~END~~


SELECT CONVERT(VARCHAR(9), a) FROM babel_1940_t1
GO
~~START~~
varchar
<newline>ª
#Eg‰
~~END~~


SELECT CAST(a as VARCHAR(9)) FROM babel_1940_t1
GO
~~START~~
varchar
<newline>ª
#Eg‰
~~END~~


SELECT CAST(a as VARCHAR(10)) FROM babel_1940_t1
GO
~~START~~
varchar
<newline>ª
#Eg‰
~~END~~



CREATE TABLE babel_1940_t2(a varchar(10) collate japanese_cs_as);
GO

insert into babel_1940_t2 values ('ウ'), ('C'), ('パ'), ('3'), ('c'), ('イ'), ('C'),('ハ'),('1'),
('ア'),('パ'), ('b'), ('2'), ('B'),('1'), ('A'),('ア'),('A'), ('a'),('AbC'), ('aBc');
GO
~~ROW COUNT: 21~~


SELECT CONVERT(VARBINARY(10), a) FROM babel_1940_t2
GO
~~START~~
varbinary
3F
43
3F3F
3F
63
3F
3F
3F
tanscorpio7 marked this conversation as resolved.
Show resolved Hide resolved
31
3F
3F
62
32
42
3F
3F
3F
41
61
416243
614263
~~END~~


SELECT CONVERT(VARBINARY(10), CONVERT(VARCHAR(10), CONVERT(VARCHAR(10), a))) FROM babel_1940_t2
tanscorpio7 marked this conversation as resolved.
Show resolved Hide resolved
GO
~~START~~
varbinary
3F
43
3F3F
3F
63
3F
3F
3F
31
3F
3F
62
32
42
3F
3F
3F
41
61
416243
614263
~~END~~



DROP TABLE babel_1940_t2
GO

DROP TABLE babel_1940_t1
GO
Loading
Loading