Skip to content

Commit

Permalink
Update age_load to load scalar property values with appropriate type (#…
Browse files Browse the repository at this point in the history
…1519) (#1525) (#1581)

* Update age_load to load scalar property values with appropriate type (#1519) (#1525)

Previously, property values from csv files were always loaded as strings. This
patch adds a boolean function parameter `load_as_agtype`. When this parameter
is true, values are converted to an appropriate scalar type (i.e. string, bool,
numeric, null) while loading. Otherwise, values are loaded as string. It uses
the agtype_value_from_cstring() function for conversion.

Additional change(s):
-------------------
 - Fix: for csv rows in edge files, create_agtype_from_list_i()'s start_index
   is corrected to 4

Note:
----
 - It applies both patch 1519 and 1525.

Conflicts:
        src/backend/utils/load/ag_load_labels.c
        src/backend/utils/load/age_load.c
        src/include/utils/load/ag_load_edges.h
        src/include/utils/load/age_load.h

* Borrow json_validate() from PG16

The json_validate() function is borrowed from PG16 with minor changes.
Because it does not exist in other versions of PG.

* Update json_validate() to support PG12 and below

For PG12 and below, pg_parse_json() does not return an error code.
A TRY-CATCH block is used to catch errors and return false instead.
  • Loading branch information
rafsun42 authored Feb 9, 2024
1 parent 601550b commit d993734
Show file tree
Hide file tree
Showing 14 changed files with 336 additions and 26 deletions.
18 changes: 18 additions & 0 deletions age--1.5.0--y.y.y.sql
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,21 @@
-- Please add all additions, deletions, and modifications to the end of this
-- file. We need to keep the order of these changes.

DROP FUNCTION IF EXISTS ag_catalog.load_labels_from_file(name, name, text, bool);
CREATE FUNCTION ag_catalog.load_labels_from_file(graph_name name,
label_name name,
file_path text,
id_field_exists bool default true,
load_as_agtype bool default false)
RETURNS void
LANGUAGE c
AS 'MODULE_PATHNAME';

DROP FUNCTION IF EXISTS ag_catalog.load_edges_from_file(name, name, text);
CREATE FUNCTION ag_catalog.load_edges_from_file(graph_name name,
label_name name,
file_path text,
load_as_agtype bool default false)
RETURNS void
LANGUAGE c
AS 'MODULE_PATHNAME';
7 changes: 7 additions & 0 deletions regress/age_load/data/conversion_edges.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
start_id, start_vertex_type, end_id, end_vertex_type, string, bool, numeric,
1, Person1, 1, Person2, "John Smith", "true", 1
1, Person1, 1, Person2, "John", "false", "-2"
1, Person1, 1, Person2, John Smith, true, 1.4
1, Person1, 1, Person2, """John""", false, -1e10
1, Person1, 1, Person2, null, false, 0
1, Person1, 1, Person2, nUll, false, "3.14"
7 changes: 7 additions & 0 deletions regress/age_load/data/conversion_vertices.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
id, string, bool, numeric,
1, "John Smith", "true", 1
2, "John", "false", "-2"
3, John Smith, true, 1.4
4, """John""", false, -1e10
5, null, false, 0
6, nUll, false, "3.14"
124 changes: 124 additions & 0 deletions regress/expected/age_load.out
Original file line number Diff line number Diff line change
Expand Up @@ -233,3 +233,127 @@ NOTICE: graph "agload_test_graph" has been dropped

(1 row)

--
-- Test property type conversion
--
SELECT create_graph('agload_conversion');
NOTICE: graph "agload_conversion" has been created
create_graph
--------------

(1 row)

-- vertex: load as agtype
SELECT create_vlabel('agload_conversion','Person1');
NOTICE: VLabel "Person1" has been created
create_vlabel
---------------

(1 row)

SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load/conversion_vertices.csv', true, true);
load_labels_from_file
-----------------------

(1 row)

SELECT * FROM cypher('agload_conversion', $$ MATCH (n:Person1) RETURN properties(n) $$) as (a agtype);
a
------------------------------------------------------------------------------------
{"id": 1, "bool": true, "__id__": 1, "string": "John Smith", "numeric": 1}
{"id": 2, "bool": false, "__id__": 2, "string": "John", "numeric": -2}
{"id": 3, "bool": true, "__id__": 3, "string": "John Smith", "numeric": 1.4}
{"id": 4, "bool": false, "__id__": 4, "string": "John", "numeric": -10000000000.0}
{"id": 5, "bool": false, "__id__": 5, "string": null, "numeric": 0}
{"id": 6, "bool": false, "__id__": 6, "string": "nUll", "numeric": 3.14}
(6 rows)

-- vertex: load as string
SELECT create_vlabel('agload_conversion','Person2');
NOTICE: VLabel "Person2" has been created
create_vlabel
---------------

(1 row)

SELECT load_labels_from_file('agload_conversion', 'Person2', 'age_load/conversion_vertices.csv', true, false);
load_labels_from_file
-----------------------

(1 row)

SELECT * FROM cypher('agload_conversion', $$ MATCH (n:Person2) RETURN properties(n) $$) as (a agtype);
a
-------------------------------------------------------------------------------------
{"id": "1", "bool": "true", "__id__": 1, "string": "John Smith", "numeric": "1"}
{"id": "2", "bool": "false", "__id__": 2, "string": "John", "numeric": "-2"}
{"id": "3", "bool": "true", "__id__": 3, "string": "John Smith", "numeric": "1.4"}
{"id": "4", "bool": "false", "__id__": 4, "string": "\"John\"", "numeric": "-1e10"}
{"id": "5", "bool": "false", "__id__": 5, "string": "null", "numeric": "0"}
{"id": "6", "bool": "false", "__id__": 6, "string": "nUll", "numeric": "3.14"}
(6 rows)

-- edge: load as agtype
SELECT create_elabel('agload_conversion','Edges1');
NOTICE: ELabel "Edges1" has been created
create_elabel
---------------

(1 row)

SELECT load_edges_from_file('agload_conversion', 'Edges1', 'age_load/conversion_edges.csv', true);
load_edges_from_file
----------------------

(1 row)

SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e:Edges1]->() RETURN properties(e) $$) as (a agtype);
a
--------------------------------------------------------------
{"bool": true, "string": "John Smith", "numeric": 1}
{"bool": false, "string": "John", "numeric": -2}
{"bool": true, "string": "John Smith", "numeric": 1.4}
{"bool": false, "string": "John", "numeric": -10000000000.0}
{"bool": false, "string": null, "numeric": 0}
{"bool": false, "string": "nUll", "numeric": 3.14}
(6 rows)

-- edge: load as string
SELECT create_elabel('agload_conversion','Edges2');
NOTICE: ELabel "Edges2" has been created
create_elabel
---------------

(1 row)

SELECT load_edges_from_file('agload_conversion', 'Edges2', 'age_load/conversion_edges.csv', false);
load_edges_from_file
----------------------

(1 row)

SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e:Edges2]->() RETURN properties(e) $$) as (a agtype);
a
-------------------------------------------------------------
{"bool": "true", "string": "John Smith", "numeric": "1"}
{"bool": "false", "string": "John", "numeric": "-2"}
{"bool": "true", "string": "John Smith", "numeric": "1.4"}
{"bool": "false", "string": "\"John\"", "numeric": "-1e10"}
{"bool": "false", "string": "null", "numeric": "0"}
{"bool": "false", "string": "nUll", "numeric": "3.14"}
(6 rows)

SELECT drop_graph('agload_conversion', true);
NOTICE: drop cascades to 6 other objects
DETAIL: drop cascades to table agload_conversion._ag_label_vertex
drop cascades to table agload_conversion._ag_label_edge
drop cascades to table agload_conversion."Person1"
drop cascades to table agload_conversion."Person2"
drop cascades to table agload_conversion."Edges1"
drop cascades to table agload_conversion."Edges2"
NOTICE: graph "agload_conversion" has been dropped
drop_graph
------------

(1 row)

27 changes: 27 additions & 0 deletions regress/sql/age_load.sql
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,30 @@ SELECT * FROM cypher('agload_test_graph', $$
$$) AS (result_1 agtype, result_2 agtype);

SELECT drop_graph('agload_test_graph', true);

--
-- Test property type conversion
--
SELECT create_graph('agload_conversion');

-- vertex: load as agtype
SELECT create_vlabel('agload_conversion','Person1');
SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load/conversion_vertices.csv', true, true);
SELECT * FROM cypher('agload_conversion', $$ MATCH (n:Person1) RETURN properties(n) $$) as (a agtype);

-- vertex: load as string
SELECT create_vlabel('agload_conversion','Person2');
SELECT load_labels_from_file('agload_conversion', 'Person2', 'age_load/conversion_vertices.csv', true, false);
SELECT * FROM cypher('agload_conversion', $$ MATCH (n:Person2) RETURN properties(n) $$) as (a agtype);

-- edge: load as agtype
SELECT create_elabel('agload_conversion','Edges1');
SELECT load_edges_from_file('agload_conversion', 'Edges1', 'age_load/conversion_edges.csv', true);
SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e:Edges1]->() RETURN properties(e) $$) as (a agtype);

-- edge: load as string
SELECT create_elabel('agload_conversion','Edges2');
SELECT load_edges_from_file('agload_conversion', 'Edges2', 'age_load/conversion_edges.csv', false);
SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e:Edges2]->() RETURN properties(e) $$) as (a agtype);

SELECT drop_graph('agload_conversion', true);
10 changes: 8 additions & 2 deletions sql/age_main.sql
Original file line number Diff line number Diff line change
Expand Up @@ -120,17 +120,23 @@ CREATE FUNCTION ag_catalog.drop_label(graph_name name, label_name name,
LANGUAGE c
AS 'MODULE_PATHNAME';

--
-- If `load_as_agtype` is true, property values are loaded as agtype; otherwise
-- loaded as string.
--
CREATE FUNCTION ag_catalog.load_labels_from_file(graph_name name,
label_name name,
file_path text,
id_field_exists bool default true)
id_field_exists bool default true,
load_as_agtype bool default false)
RETURNS void
LANGUAGE c
AS 'MODULE_PATHNAME';

CREATE FUNCTION ag_catalog.load_edges_from_file(graph_name name,
label_name name,
file_path text)
file_path text,
load_as_agtype bool default false)
RETURNS void
LANGUAGE c
AS 'MODULE_PATHNAME';
Expand Down
3 changes: 1 addition & 2 deletions src/backend/utils/adt/agtype.c
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ typedef enum /* type categories for datum_to_agtype */
} agt_type_category;

static inline Datum agtype_from_cstring(char *str, int len);
static inline agtype_value *agtype_value_from_cstring(char *str, int len);
size_t check_string_length(size_t len);
static void agtype_in_agtype_annotation(void *pstate, char *annotation);
static void agtype_in_object_start(void *pstate);
Expand Down Expand Up @@ -352,7 +351,7 @@ Datum agtype_out(PG_FUNCTION_ARGS)
* Uses the agtype parser (with hooks) to construct an agtype.
*/

static inline agtype_value *agtype_value_from_cstring(char *str, int len)
agtype_value *agtype_value_from_cstring(char *str, int len)
{
agtype_lex_context *lex;
agtype_in_state state;
Expand Down
6 changes: 4 additions & 2 deletions src/backend/utils/load/ag_load_edges.c
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ void edge_row_cb(int delim __attribute__((unused)), void *data)
end_vertex_graph_id = make_graphid(end_vertex_type_id, end_id_int);

props = create_agtype_from_list_i(cr->header, cr->fields,
n_fields, 3);
n_fields, 4, cr->load_as_agtype);

insert_edge_simple(cr->graph_id, cr->object_name,
object_graph_id, start_vertex_graph_id,
Expand Down Expand Up @@ -158,7 +158,8 @@ int create_edges_from_csv_file(char *file_path,
char *graph_name,
Oid graph_id,
char *object_name,
int object_id )
int object_id,
bool load_as_agtype)
{

FILE *fp;
Expand Down Expand Up @@ -195,6 +196,7 @@ int create_edges_from_csv_file(char *file_path,
cr.graph_id = graph_id;
cr.object_name = object_name;
cr.object_id = object_id;
cr.load_as_agtype = load_as_agtype;

while ((bytes_read=fread(buf, 1, 1024, fp)) > 0)
{
Expand Down
7 changes: 5 additions & 2 deletions src/backend/utils/load/ag_load_labels.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ void vertex_row_cb(int delim __attribute__((unused)), void *data)
object_graph_id = make_graphid(cr->object_id, label_id_int);

props = create_agtype_from_list(cr->header, cr->fields,
n_fields, label_id_int);
n_fields, label_id_int,
cr->load_as_agtype);
insert_vertex_simple(cr->graph_id, cr->object_name,
object_graph_id, props);
pfree(props);
Expand Down Expand Up @@ -145,7 +146,8 @@ int create_labels_from_csv_file(char *file_path,
Oid graph_id,
char *object_name,
int object_id,
bool id_field_exists)
bool id_field_exists,
bool load_as_agtype)
{

FILE *fp;
Expand Down Expand Up @@ -184,6 +186,7 @@ int create_labels_from_csv_file(char *file_path,
cr.object_name = object_name;
cr.object_id = object_id;
cr.id_field_exists = id_field_exists;
cr.load_as_agtype = load_as_agtype;



Expand Down
Loading

0 comments on commit d993734

Please sign in to comment.