Skip to content

Commit

Permalink
Added wrapper function for Substring to handle binary datatypes and m…
Browse files Browse the repository at this point in the history
…ultibyte character (babelfish-for-postgresql#2747)

This commit aims to fix following issue --

1. SUBSTRING() function with last argument missing raises strange error message.
2. SUBSTRING() should work on [VAR]BINARY, Image argument.

In order to fix these issues,  added error message handling in ANTLR parser to throw appropriate error message for
different number of arguments. Also, added wrapper function handling for SUBSTRING() function to handle binary
datatypes and multibyte characters.

Engine PR: babelfish-for-postgresql/postgresql_modified_for_babelfish#415

Task: BABEL- 4118, BABEL- 3658
Signed-off-by: Anikait Agrawal <[email protected]>
  • Loading branch information
Anikait143 authored and staticlibs committed Oct 18, 2024
1 parent aa3fafd commit d1f9dfd
Show file tree
Hide file tree
Showing 46 changed files with 9,242 additions and 81 deletions.
1 change: 1 addition & 0 deletions contrib/babelfishpg_common/src/babelfishpg_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ get_common_utility_plugin(void)
common_utility_plugin_var.TdsGetMetaData = &TdsGetMetaData;
common_utility_plugin_var.TdsGetVariantBaseType = &TdsGetVariantBaseType;
common_utility_plugin_var.lookup_tsql_datatype_oid = &lookup_tsql_datatype_oid;
common_utility_plugin_var.resolve_pg_type_to_tsql = &resolve_pg_type_to_tsql;
common_utility_plugin_var.GetUTF8CodePoint = &GetUTF8CodePoint;
}
return &common_utility_plugin_var;
Expand Down
1 change: 1 addition & 0 deletions contrib/babelfishpg_common/src/babelfishpg_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ typedef struct common_utility_plugin
bool *isBaseDec, bool *isBaseBin,
bool *isBaseDate, int *variantHeaderLen);
Oid (*lookup_tsql_datatype_oid) (const char *typestr);
const char *(*resolve_pg_type_to_tsql) (Oid oid);
int32_t (*GetUTF8CodePoint) (const unsigned char *in, int len, int *consumed_p);

} common_utility_plugin;
16 changes: 16 additions & 0 deletions contrib/babelfishpg_common/src/typecode.c
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,22 @@ lookup_tsql_datatype_oid(const char *typename)
return typoid;
}

/* type_infos will return const char * so caller should not attempt to modify it */
const char *
resolve_pg_type_to_tsql(Oid oid)
{
ht_oid2typecode_entry_t *entry;

if (OidIsValid(oid))
{
entry = hash_search(ht_oid2typecode, &oid, HASH_FIND, NULL);

if (entry && entry->persist_id < TOTAL_TYPECODE_COUNT)
return type_infos[entry->persist_id].tsql_typname;
}
return NULL;
}

bool
is_tsql_sysname_datatype(Oid oid)
{
Expand Down
1 change: 1 addition & 0 deletions contrib/babelfishpg_common/src/typecode.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ extern void handle_type_and_collation(struct Node *node, Oid typid, Oid collatio
extern bool check_target_type_is_sys_varchar(Oid funcid);
extern type_info_t get_tsql_type_info(uint8_t type_code);
extern Datum translate_pg_type_to_tsql(PG_FUNCTION_ARGS);
extern const char *resolve_pg_type_to_tsql(Oid oid);
extern Oid get_tsql_datatype_oid(char *type_name);

/*
Expand Down
47 changes: 41 additions & 6 deletions contrib/babelfishpg_tsql/sql/sys_functions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2955,23 +2955,58 @@ IS 'This function returns column or parameter information. Currently only works
-- substring --
CREATE OR REPLACE FUNCTION sys.substring(string TEXT, i INTEGER, j INTEGER)
RETURNS sys.VARCHAR
AS 'babelfishpg_tsql', 'tsql_varchar_substr' LANGUAGE C IMMUTABLE PARALLEL SAFE;
AS 'babelfishpg_tsql', 'tsql_varchar_substr' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;

CREATE OR REPLACE FUNCTION sys.substring(string NTEXT, i INTEGER, j INTEGER)
RETURNS sys.NVARCHAR
AS 'babelfishpg_tsql', 'tsql_varchar_substr' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;

CREATE OR REPLACE FUNCTION sys.substring(string sys.VARCHAR, i INTEGER, j INTEGER)
RETURNS sys.VARCHAR
AS 'babelfishpg_tsql', 'tsql_varchar_substr' LANGUAGE C IMMUTABLE PARALLEL SAFE;
AS 'babelfishpg_tsql', 'tsql_varchar_substr' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;

CREATE OR REPLACE FUNCTION sys.substring(string sys.VARCHAR, i INTEGER, j INTEGER)
CREATE OR REPLACE FUNCTION sys.substring(string sys.BPCHAR, i INTEGER, j INTEGER)
RETURNS sys.VARCHAR
AS 'babelfishpg_tsql', 'tsql_varchar_substr' LANGUAGE C IMMUTABLE PARALLEL SAFE;
AS 'babelfishpg_tsql', 'tsql_varchar_substr' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;

CREATE OR REPLACE FUNCTION sys.substring(string sys.NVARCHAR, i INTEGER, j INTEGER)
RETURNS sys.NVARCHAR
AS 'babelfishpg_tsql', 'tsql_varchar_substr' LANGUAGE C IMMUTABLE PARALLEL SAFE;
AS 'babelfishpg_tsql', 'tsql_varchar_substr' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;

CREATE OR REPLACE FUNCTION sys.substring(string sys.NCHAR, i INTEGER, j INTEGER)
RETURNS sys.NVARCHAR
AS 'babelfishpg_tsql', 'tsql_varchar_substr' LANGUAGE C IMMUTABLE PARALLEL SAFE;
AS 'babelfishpg_tsql', 'tsql_varchar_substr' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;

CREATE OR REPLACE FUNCTION sys.substring(string sys.VARBINARY, i INTEGER, j INTEGER)
RETURNS sys.VARBINARY
AS 'babelfishpg_tsql', 'tsql_varbinary_substr' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;

CREATE OR REPLACE FUNCTION sys.substring(string ANYELEMENT, i INTEGER, j INTEGER)
RETURNS sys.VARBINARY
AS
$BODY$
DECLARE
type_oid oid;
string_arg_datatype text;
string_basetype oid;
BEGIN
type_oid := pg_typeof(string);
string_arg_datatype := sys.translate_pg_type_to_tsql(type_oid);
IF string_arg_datatype IS NULL THEN
-- for User Defined Datatype, use immediate base type to check for argument datatype validation
string_basetype := sys.bbf_get_immediate_base_type_of_UDT(type_oid);
string_arg_datatype := sys.translate_pg_type_to_tsql(string_basetype);
END IF;

-- restricting arguments with invalid datatypes for substring function
IF string_arg_datatype NOT IN ('binary', 'image') THEN
RAISE EXCEPTION 'Argument data type % is invalid for argument 1 of substring function.', string_arg_datatype;
END IF;

RETURN sys.substring(string::sys.VARBINARY, i, j);
END;
$BODY$
LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;

-- wrapper functions for upper --
-- Function to handle datatypes which are implicitly convertable to VARCHAR
Expand Down
6 changes: 3 additions & 3 deletions contrib/babelfishpg_tsql/src/hooks.c
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ static void fill_missing_values_in_copyfrom(Relation rel, Datum *values, bool *n
/*****************************************
* Utility Hooks
*****************************************/
static void pltsql_report_proc_not_found_error(List *names, List *argnames, Oid *input_typeids, int nargs, ParseState *pstate, int location, bool proc_call);
static void pltsql_report_proc_not_found_error(List *names, List *fargs, List *argnames, Oid *input_typeids, int nargs, ParseState *pstate, int location, bool proc_call);
extern PLtsql_execstate *get_outermost_tsql_estate(int *nestlevel);
extern PLtsql_execstate *get_current_tsql_estate();
static void pltsql_store_view_definition(const char *queryString, ObjectAddress address);
Expand Down Expand Up @@ -2069,7 +2069,7 @@ get_trigger_object_address(List *object, Relation *relp, bool missing_ok, bool o

/* Generate similar error message with SQL Server when function/procedure is not found if possible. */
void
pltsql_report_proc_not_found_error(List *names, List *given_argnames, Oid *input_typeids, int nargs, ParseState *pstate, int location, bool proc_call)
pltsql_report_proc_not_found_error(List *names, List *fargs, List *given_argnames, Oid *input_typeids, int nargs, ParseState *pstate, int location, bool proc_call)
{
FuncCandidateList candidates = NULL,
current_candidate = NULL;
Expand Down Expand Up @@ -2127,7 +2127,7 @@ pltsql_report_proc_not_found_error(List *names, List *given_argnames, Oid *input
* Check whether function is an special function or not, and
* report appropriate error if applicable
*/
validate_special_function(schemaname, funcname, nargs, input_typeids);
validate_special_function(schemaname, funcname, fargs, nargs, input_typeids);
}

ereport(ERROR,
Expand Down
2 changes: 1 addition & 1 deletion contrib/babelfishpg_tsql/src/pltsql.h
Original file line number Diff line number Diff line change
Expand Up @@ -2132,7 +2132,7 @@ extern void exec_alter_role_cmd(char *query_str, RoleSpec *role);
/*
* Functions in pltsql_coerce.c
*/
extern bool validate_special_function(char *proc_nsname, char *proc_name, int nargs, Oid *input_typeids);
extern bool validate_special_function(char *proc_nsname, char *proc_name, List* fargs, int nargs, Oid *input_typeids);
extern void init_special_function_list(void);

#endif /* PLTSQL_H */
69 changes: 61 additions & 8 deletions contrib/babelfishpg_tsql/src/pltsql_coerce.c
Original file line number Diff line number Diff line change
Expand Up @@ -1006,15 +1006,15 @@ init_special_function_list()
* and also validate the input argument data types.
*/
bool
validate_special_function(char *func_nsname, char *func_name, int nargs, Oid *input_typeids)
validate_special_function(char *func_nsname, char *func_name, List* fargs, int nargs, Oid *input_typeids)
{
tsql_special_function_t *special_func;
bool type_match;
Oid input_type_id, valid_type_id, base_type_id;
Oid sys_varcharoid;

/* Sanity checks */
if (func_name == NULL || (nargs != 0 && input_typeids == NULL))
if (func_name == NULL || (nargs != 0 && input_typeids == NULL) || fargs == NIL)
return false;

/*
Expand Down Expand Up @@ -1045,15 +1045,67 @@ validate_special_function(char *func_nsname, char *func_name, int nargs, Oid *in

/* If function is not a special function no additional handling required */
if (special_func == NULL)
{
/* report error for case when NULL casted to different datatypes and passed as 2nd or 3rd argument of SUBSTRING() function */
if (strlen(func_name) == 9 && strncmp(func_name, "substring", 9) == 0)
{
for (int i = 1; i < nargs; i++)
{
Node *arg = (Node *) lfirst(list_nth_cell(fargs, i));

if (input_typeids[i] == UNKNOWNOID)
continue;

/* Throw error when input is constant and NULL */
if (IsA(arg, Const) && ((Const *)arg)->constisnull)
{
const char *typ_name;
int len;

if (common_utility_plugin_ptr == NULL)
ereport(ERROR,
(errcode(ERRCODE_INTERNAL_ERROR),
errmsg("Failed to find common utility plugin.")));

typ_name = (*common_utility_plugin_ptr->resolve_pg_type_to_tsql) (input_typeids[i]);
if(typ_name)
{
len = strlen(typ_name);

if (!((len == 3 && strncmp(typ_name,"int", 3) == 0) ||
(len == 7 && strncmp(typ_name,"tinyint", 7) == 0) ||
(len == 8 && strncmp(typ_name,"smallint", 8) == 0) ||
(len == 6 && strncmp(typ_name,"bigint", 6) == 0)))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_FUNCTION),
errmsg("Argument data type %s is invalid for argument %d of substring function.",
format_type_be(input_typeids[i]), i+1)));
}
}
}
}
return false;
}

sys_varcharoid = get_sys_varcharoid();

/* Report error in case of invalid argument datatype */
for (int i = 0; i < special_func->nargs; i++)
{
/* for unknown literals consider its type as sys.VARCHAR */
input_type_id = (input_typeids[i] == UNKNOWNOID) ? sys_varcharoid : input_typeids[i];
/*
* if argument is NULL then keep its typeId as UNKNOWN and skip the report error handling
* otherwise consider it as sys.VARCHAR
*/
if (input_typeids[i] == UNKNOWNOID)
{
Node *arg = (Node *) lfirst(list_nth_cell(fargs, i));
if (IsA(arg, Const) && ((Const *)arg)->constisnull)
continue;
else
input_type_id = sys_varcharoid;
}
else
input_type_id = input_typeids[i];

/* for UDT use its base type for input argument datatype validation */
base_type_id = get_immediate_base_type_of_UDT_internal(input_type_id);
Expand Down Expand Up @@ -1090,7 +1142,7 @@ validate_special_function(char *func_nsname, char *func_name, int nargs, Oid *in
* based on matching return type. Also throw error in case of invalid argument data type.
*/
static FuncCandidateList
tsql_func_select_candidate_for_special_func(List *names, int nargs, Oid *input_typeids, FuncCandidateList candidates)
tsql_func_select_candidate_for_special_func(List *names, List *fargs, int nargs, Oid *input_typeids, FuncCandidateList candidates)
{
FuncCandidateList current_candidate, best_candidate;
Oid expr_result_type;
Expand All @@ -1102,7 +1154,7 @@ tsql_func_select_candidate_for_special_func(List *names, int nargs, Oid *input_t

DeconstructQualifiedName(names, &proc_nsname, &proc_name);

is_func_validated = validate_special_function(proc_nsname, proc_name, nargs, input_typeids);
is_func_validated = validate_special_function(proc_nsname, proc_name, fargs, nargs, input_typeids);

/* Return NULL if function is not a special function */
if (!is_func_validated)
Expand All @@ -1121,7 +1173,7 @@ tsql_func_select_candidate_for_special_func(List *names, int nargs, Oid *input_t
if ((*common_utility_plugin_ptr->is_tsql_nvarchar_datatype)(input_typeids[1])
|| (*common_utility_plugin_ptr->is_tsql_nchar_datatype)(input_typeids[1]))
{
expr_result_type = (*common_utility_plugin_ptr->lookup_tsql_datatype_oid) ("nvarchar");
expr_result_type = (*common_utility_plugin_ptr->lookup_tsql_datatype_oid) ("nvarchar");
}
else if ((*common_utility_plugin_ptr->is_tsql_varchar_datatype)(input_typeids[1])
|| (*common_utility_plugin_ptr->is_tsql_bpchar_datatype)(input_typeids[1])
Expand Down Expand Up @@ -1194,6 +1246,7 @@ tsql_func_select_candidate_for_special_func(List *names, int nargs, Oid *input_t

static FuncCandidateList
tsql_func_select_candidate(List *names,
List *fargs,
int nargs,
Oid *input_typeids,
FuncCandidateList candidates,
Expand All @@ -1207,7 +1260,7 @@ tsql_func_select_candidate(List *names,
bool candidates_are_opers = false;

if (is_special)
return tsql_func_select_candidate_for_special_func(names, nargs, input_typeids, candidates);
return tsql_func_select_candidate_for_special_func(names, fargs, nargs, input_typeids, candidates);

if (unknowns_resolved)
{
Expand Down
16 changes: 13 additions & 3 deletions contrib/babelfishpg_tsql/src/string.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ PG_FUNCTION_INFO_V1(quotename);
PG_FUNCTION_INFO_V1(string_escape);
PG_FUNCTION_INFO_V1(formatmessage);
PG_FUNCTION_INFO_V1(tsql_varchar_substr);
PG_FUNCTION_INFO_V1(tsql_varbinary_substr);
PG_FUNCTION_INFO_V1(float_str);

/*
Expand Down Expand Up @@ -497,14 +498,23 @@ prepare_format_string(StringInfo buf, char *msg_string, int nargs,
Datum
tsql_varchar_substr(PG_FUNCTION_ARGS)
{
if (PG_ARGISNULL(0) || PG_ARGISNULL(1) || PG_ARGISNULL(2))
PG_RETURN_NULL();

PG_RETURN_VARCHAR_P(DirectFunctionCall3(text_substr, PG_GETARG_DATUM(0),
PG_GETARG_INT32(1),
PG_GETARG_INT32(2)));
}

/*
* tsql_varbinary_substr()
* Return a substring starting at the specified position.
*/
Datum
tsql_varbinary_substr(PG_FUNCTION_ARGS)
{
PG_RETURN_BYTEA_P(DirectFunctionCall3(bytea_substr, PG_GETARG_DATUM(0),
PG_GETARG_INT32(1),
PG_GETARG_INT32(2)));
}

/*
* Returns character data converted from numeric data. The character data
* is right-justified, with a specified length and decimal precision.
Expand Down
Loading

0 comments on commit d1f9dfd

Please sign in to comment.