diff --git a/.github/workflows/installcheck.yaml b/.github/workflows/installcheck.yaml index fa8b9a4ef..a40c0a9c9 100644 --- a/.github/workflows/installcheck.yaml +++ b/.github/workflows/installcheck.yaml @@ -22,25 +22,37 @@ jobs: path: ~/pg16 key: ${{ runner.os }}-v1-pg16-${{ env.PG_COMMIT_HASH }} - - name: Install PostgreSQL 16 + - name: Install PostgreSQL 16 and some extensions if: steps.pg16cache.outputs.cache-hit != 'true' run: | git clone --depth 1 --branch REL_16_STABLE git://git.postgresql.org/git/postgresql.git ~/pg16source cd ~/pg16source ./configure --prefix=$HOME/pg16 CFLAGS="-std=gnu99 -ggdb -O0" --enable-cassert make install -j$(nproc) > /dev/null + cd contrib + cd fuzzystrmatch + make PG_CONFIG=$HOME/pg16/bin/pg_config install -j$(nproc) > /dev/null + cd ../pg_trgm + make PG_CONFIG=$HOME/pg16/bin/pg_config install -j$(nproc) > /dev/null - uses: actions/checkout@v3 - - name: Build + - name: Build AGE id: build run: | make PG_CONFIG=$HOME/pg16/bin/pg_config install -j$(nproc) + + - name: Pull and build pgvector + id: pgvector + run: | + git clone https://github.com/pgvector/pgvector.git + cd pgvector + make PG_CONFIG=$HOME/pg16/bin/pg_config install -j$(nproc) > /dev/null - name: Regression tests id: regression_tests run: | - make PG_CONFIG=$HOME/pg16/bin/pg_config installcheck + make PG_CONFIG=$HOME/pg16/bin/pg_config installcheck EXTRA_TESTS="pgvector fuzzystrmatch pg_trgm" continue-on-error: true - name: Dump regression test errors diff --git a/Makefile b/Makefile index 400d5a7a1..c0a847830 100644 --- a/Makefile +++ b/Makefile @@ -112,8 +112,13 @@ REGRESS = scan \ name_validation \ jsonb_operators \ list_comprehension \ - map_projection \ - drop + map_projection + +ifneq ($(EXTRA_TESTS),) + REGRESS += $(EXTRA_TESTS) +endif + +REGRESS += drop srcdir=`pwd` diff --git a/regress/expected/expr.out b/regress/expected/expr.out index 51ee9c359..53d737c8c 100644 --- a/regress/expected/expr.out +++ b/regress/expected/expr.out @@ -8767,25 +8767,6 @@ SELECT * FROM cypher('issue_1988', $$ {"id": 844424930131969, "label": "Part", "properties": {"set": "set", "match": "match", "merge": "merge", "create": "create", "delete": "delete", "part_num": 123}}::vertex (4 rows) --- --- Test external extension function logic for fuzzystrmatch --- -SELECT * FROM create_graph('fuzzystrmatch'); -NOTICE: graph "fuzzystrmatch" has been created - create_graph --------------- - -(1 row) - --- These should fail with extension not installed -SELECT * FROM cypher('fuzzystrmatch', $$ RETURN soundex("hello world!") $$) AS (result agtype); -ERROR: extension fuzzystrmatch is not installed for function soundex -LINE 1: SELECT * FROM cypher('fuzzystrmatch', $$ RETURN soundex("hel... - ^ -SELECT * FROM cypher('fuzzystrmatch', $$ RETURN difference("hello world!", "hello world!") $$) AS (result agtype); -ERROR: extension fuzzystrmatch is not installed for function difference -LINE 1: SELECT * FROM cypher('fuzzystrmatch', $$ RETURN difference("... - ^ -- -- Issue 2093: Server crashes when executing SELECT agtype_hash_cmp(agtype_in('[null, null, null, null, null]')); -- @@ -8804,16 +8785,6 @@ SELECT agtype_hash_cmp(agtype_in('[null, null, null, null, null]')); -- -- Cleanup -- -SELECT * FROM drop_graph('fuzzystrmatch', true); -NOTICE: drop cascades to 2 other objects -DETAIL: drop cascades to table fuzzystrmatch._ag_label_vertex -drop cascades to table fuzzystrmatch._ag_label_edge -NOTICE: graph "fuzzystrmatch" has been dropped - drop_graph ------------- - -(1 row) - SELECT * FROM drop_graph('issue_1988', true); NOTICE: drop cascades to 4 other objects DETAIL: drop cascades to table issue_1988._ag_label_vertex diff --git a/regress/expected/fuzzystrmatch.out b/regress/expected/fuzzystrmatch.out new file mode 100644 index 000000000..1d4613ee3 --- /dev/null +++ b/regress/expected/fuzzystrmatch.out @@ -0,0 +1,177 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +LOAD 'age'; +SET search_path=ag_catalog; +SELECT create_graph('graph'); +NOTICE: graph "graph" has been created + create_graph +-------------- + +(1 row) + +-- Should error out +SELECT * FROM cypher('graph', $$ RETURN soundex("hello") $$) AS (n agtype); +ERROR: function soundex does not exist +LINE 1: SELECT * FROM cypher('graph', $$ RETURN soundex("hello") $$)... + ^ +HINT: If the function is from an external extension, make sure the extension is installed and the function is in the search path. +-- Create the extension in the public schema +CREATE EXTENSION fuzzystrmatch SCHEMA public; +-- Should error out +SELECT * FROM cypher('graph', $$ RETURN soundex("hello") $$) AS (n agtype); +ERROR: function soundex does not exist +LINE 1: SELECT * FROM cypher('graph', $$ RETURN soundex("hello") $$)... + ^ +HINT: If the function is from an external extension, make sure the extension is installed and the function is in the search path. +-- Should work +SET search_path=ag_catalog, public; +SELECT * FROM cypher('graph', $$ CREATE (:Person {name: "Jane"}), + (:Person {name: "John"}), + (:Person {name: "Jone"}), + (:Person {name: "Jack"}), + (:Person {name: "Jax"}), + (:Person {name: "Jake"}), + (:Person {name: "Julie"}), + (:Person {name: "Julius"}), + (:Person {name: "Jill"}), + (:Person {name: "Jillie"}), + (:Person {name: "Julian"}) +$$) AS (n agtype); + n +--- +(0 rows) + +SELECT * FROM cypher('graph', $$ MATCH (p) return soundex(p.name) $$) AS (n agtype); + n +-------- + "J500" + "J500" + "J500" + "J200" + "J200" + "J200" + "J400" + "J420" + "J400" + "J400" + "J450" +(11 rows) + +SELECT * FROM cypher('graph', $$ MATCH (p) return levenshtein(p.name, "John") $$) AS (n agtype); + n +--- + 3 + 0 + 2 + 3 + 3 + 3 + 4 + 5 + 3 + 5 + 4 +(11 rows) + +SELECT * FROM cypher('graph', $$ MATCH (p) return difference(p.name, "John") $$) AS (n agtype); + n +--- + 4 + 4 + 4 + 3 + 3 + 3 + 3 + 2 + 3 + 3 + 2 +(11 rows) + +SELECT * FROM cypher('graph', $$ MATCH (p) return metaphone(p.name, 4) $$) AS (n agtype); + n +------- + "JN" + "JN" + "JN" + "JK" + "JKS" + "JK" + "JL" + "JLS" + "JL" + "JL" + "JLN" +(11 rows) + +SELECT * FROM cypher('graph', $$ MATCH (p) return dmetaphone(p.name) $$) AS (n agtype); + n +------- + "JN" + "JN" + "JN" + "JK" + "JKS" + "JK" + "JL" + "JLS" + "JL" + "JL" + "JLN" +(11 rows) + +-- Difference is basically similarity using soundex, https://www.postgresql.org/docs/current/fuzzystrmatch.html +SELECT * FROM cypher('graph', $$ MATCH (p) return p ORDER BY difference(p.name, "Jon") DESC LIMIT 3$$) AS (n agtype); + n +------------------------------------------------------------------------------------ + {"id": 844424930131970, "label": "Person", "properties": {"name": "John"}}::vertex + {"id": 844424930131971, "label": "Person", "properties": {"name": "Jone"}}::vertex + {"id": 844424930131969, "label": "Person", "properties": {"name": "Jane"}}::vertex +(3 rows) + +SELECT * FROM cypher('graph', $$ MATCH (p) return p ORDER BY difference(p.name, "Jak") DESC LIMIT 3$$) AS (n agtype); + n +------------------------------------------------------------------------------------ + {"id": 844424930131972, "label": "Person", "properties": {"name": "Jack"}}::vertex + {"id": 844424930131973, "label": "Person", "properties": {"name": "Jax"}}::vertex + {"id": 844424930131974, "label": "Person", "properties": {"name": "Jake"}}::vertex +(3 rows) + +SELECT * FROM cypher('graph', $$ MATCH (p) return p ORDER BY difference(p.name, "Jil") DESC LIMIT 3$$) AS (n agtype); + n +-------------------------------------------------------------------------------------- + {"id": 844424930131975, "label": "Person", "properties": {"name": "Julie"}}::vertex + {"id": 844424930131977, "label": "Person", "properties": {"name": "Jill"}}::vertex + {"id": 844424930131978, "label": "Person", "properties": {"name": "Jillie"}}::vertex +(3 rows) + +-- Clean up +SELECT drop_graph('graph', true); +NOTICE: drop cascades to 3 other objects +DETAIL: drop cascades to table graph._ag_label_vertex +drop cascades to table graph._ag_label_edge +drop cascades to table graph."Person" +NOTICE: graph "graph" has been dropped + drop_graph +------------ + +(1 row) + +DROP EXTENSION fuzzystrmatch CASCADE; diff --git a/regress/expected/pg_trgm.out b/regress/expected/pg_trgm.out new file mode 100644 index 000000000..5b2a2b9c8 --- /dev/null +++ b/regress/expected/pg_trgm.out @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +LOAD 'age'; +SET search_path=ag_catalog; +SELECT create_graph('graph'); +NOTICE: graph "graph" has been created + create_graph +-------------- + +(1 row) + +-- Should error out +SELECT * FROM cypher('graph', $$ RETURN show_trgm("hello") $$) AS (n agtype); +ERROR: function show_trgm does not exist +LINE 1: SELECT * FROM cypher('graph', $$ RETURN show_trgm("hello") $... + ^ +HINT: If the function is from an external extension, make sure the extension is installed and the function is in the search path. +-- Create the extension in the public schema +CREATE EXTENSION pg_trgm SCHEMA public; +-- Should error out +SELECT * FROM cypher('graph', $$ RETURN show_trgm("hello") $$) AS (n agtype); +ERROR: function show_trgm does not exist +LINE 1: SELECT * FROM cypher('graph', $$ RETURN show_trgm("hello") $... + ^ +HINT: If the function is from an external extension, make sure the extension is installed and the function is in the search path. +-- Should work +SET search_path=ag_catalog, public; +SELECT * FROM cypher('graph', $$ CREATE (:Person {name: "Jane"}), + (:Person {name: "John"}), + (:Person {name: "Jone"}), + (:Person {name: "Jack"}), + (:Person {name: "Jax"}), + (:Person {name: "Jake"}), + (:Person {name: "Julie"}), + (:Person {name: "Julius"}), + (:Person {name: "Jill"}), + (:Person {name: "Jillie"}), + (:Person {name: "Julian"}) +$$) AS (n agtype); + n +--- +(0 rows) + +SELECT * FROM cypher('graph', $$ MATCH (p) return show_trgm(p.name) $$) AS (n text[]); + n +------------------------------------- + {" j"," ja",ane,jan,"ne "} + {" j"," jo","hn ",joh,ohn} + {" j"," jo",jon,"ne ",one} + {" j"," ja",ack,"ck ",jac} + {" j"," ja","ax ",jax} + {" j"," ja",ake,jak,"ke "} + {" j"," ju","ie ",jul,lie,uli} + {" j"," ju",ius,jul,liu,uli,"us "} + {" j"," ji",ill,jil,"ll "} + {" j"," ji","ie ",ill,jil,lie,lli} + {" j"," ju","an ",ian,jul,lia,uli} +(11 rows) + +SELECT * FROM cypher('graph', $$ MATCH (p) with p, similarity(p.name, "Jon") as sim return p.name, sim ORDER BY sim DESC $$) AS (n agtype, s real); + n | s +----------+------------ + "Jone" | 0.5 + "John" | 0.2857143 + "Jax" | 0.14285715 + "Jane" | 0.125 + "Jack" | 0.125 + "Jake" | 0.125 + "Jill" | 0.125 + "Julie" | 0.11111111 + "Julius" | 0.1 + "Julian" | 0.1 + "Jillie" | 0.1 +(11 rows) + +SELECT * FROM cypher('graph', $$ MATCH (p) with p, word_similarity(p.name, "Jon") as sim return p.name, sim ORDER BY sim DESC $$) AS (n agtype, s real); + n | s +----------+------------ + "Jone" | 0.6 + "John" | 0.4 + "Jax" | 0.25 + "Jane" | 0.2 + "Jack" | 0.2 + "Jake" | 0.2 + "Jill" | 0.2 + "Julie" | 0.16666667 + "Julius" | 0.14285715 + "Julian" | 0.14285715 + "Jillie" | 0.14285715 +(11 rows) + +-- Clean up +SELECT drop_graph('graph', true); +NOTICE: drop cascades to 3 other objects +DETAIL: drop cascades to table graph._ag_label_vertex +drop cascades to table graph._ag_label_edge +drop cascades to table graph."Person" +NOTICE: graph "graph" has been dropped + drop_graph +------------ + +(1 row) + +DROP EXTENSION pg_trgm CASCADE; diff --git a/regress/expected/pgvector.out b/regress/expected/pgvector.out new file mode 100644 index 000000000..f1bd53ed4 --- /dev/null +++ b/regress/expected/pgvector.out @@ -0,0 +1,285 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +LOAD 'age'; +SET search_path=ag_catalog; +SELECT create_graph('graph'); +NOTICE: graph "graph" has been created + create_graph +-------------- + +(1 row) + +-- Should error out +SELECT * FROM cypher('graph', $$ RETURN cosine_distance("[1,2,3]", "[1,2,3]") $$) AS (n agtype); +ERROR: function cosine_distance does not exist +LINE 1: SELECT * FROM cypher('graph', $$ RETURN cosine_distance("[1,... + ^ +HINT: If the function is from an external extension, make sure the extension is installed and the function is in the search path. +-- Create the extension in the public schema +CREATE EXTENSION vector SCHEMA public; +-- Should error out +SELECT * FROM cypher('graph', $$ RETURN cosine_distance("[1,2,3]", "[1,2,3]") $$) AS (n agtype); +ERROR: function cosine_distance does not exist +LINE 1: SELECT * FROM cypher('graph', $$ RETURN cosine_distance("[1,... + ^ +HINT: If the function is from an external extension, make sure the extension is installed and the function is in the search path. +-- Should work +SET search_path=ag_catalog, public; +SELECT create_graph('graph'); +ERROR: graph "graph" already exists +SELECT * FROM cypher('graph', $$ RETURN "[1.22,2.22,3.33]"::vector $$) AS (n vector); + n +------------------ + [1.22,2.22,3.33] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN "[1.22,2.22,3.33]"::vector $$) AS (n halfvec); + n +--------------------------------- + [1.2197266,2.2207031,3.3300781] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN "[1.22,2.22,3.33]"::vector $$) AS (n sparsevec); + n +-------------------------- + {1:1.22,2:2.22,3:3.33}/3 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN l2_distance("[1,2,3]", "[1,2,4]") $$) AS (n agtype); + n +----- + 1.0 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN inner_product("[1,2,3]", "[1,2,4]") $$) AS (n agtype); + n +------ + 17.0 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN cosine_distance("[1,2,3]", "[1,2,4]") $$) AS (n agtype); + n +--------------------- + 0.00853986601633272 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN l1_distance("[1,2,3]", "[1,2,4]") $$) AS (n agtype); + n +----- + 1.0 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN vector_dims("[1,2,3]") $$) AS (n agtype); + n +--- + 3 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN vector_norm("[1,2,3]") $$) AS (n agtype); + n +-------------------- + 3.7416573867739413 +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN l2_normalize("[1,2,3]") $$) AS (n vector); + n +----------------------------------- + [0.26726124,0.5345225,0.80178374] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN l2_normalize("[1,2,3]")::text $$) AS (n agtype); + n +------------------------------------- + [0.26726124, 0.5345225, 0.80178374] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN subvector("[1,2,3,4,5,6]", 2, 4) $$) AS (n vector); + n +----------- + [2,3,4,5] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN subvector("[1,2,3,4,5,6]", 2, 4)::text $$) AS (n agtype); + n +-------------- + [2, 3, 4, 5] +(1 row) + +SELECT * FROM cypher('graph', $$ RETURN binary_quantize("[1,2,4]") $$) AS (n bit); + n +----- + 111 +(1 row) + +-- An example usage +SELECT * FROM cypher('graph', $$ + CREATE (:Movie {title: "The Matrix", year: 1999, genre: "Action", plot: "A computer hacker learns about the true nature of reality and joins a rebellion to free humanity from a simulated world controlled by machines.", embedding: "[-0.07594558, 0.04081754, 0.29592122, -0.11921061]"}), + (:Movie {title: "The Matrix Reloaded", year: 2003, genre: "Action", plot: "The rebels continue their fight against the machines, uncovering deeper truths about the Matrix and the nature of their mission.", embedding: "[0.30228977, -0.22839354, 0.35070436, 0.01262819]"}), + (:Movie {title: "The Matrix Revolutions", year: 2003, genre: "Action", plot: "The final battle between humans and machines reaches its climax as the fate of both worlds hangs in the balance.", embedding: "[ 0.12240622, -0.29752459, 0.22620453, 0.24454723]"}), + (:Movie {title: "The Matrix Resurrections", year: 2021, genre: "Action", plot: "Neo returns to a new version of the Matrix and must once again fight to save the people from the control of the machines.", embedding: "[ 0.34717246, -0.13820869, 0.29214213, 0.08090488]"}), + (:Movie {title: "Inception", year: 2010, genre: "Sci-Fi", plot: "A skilled thief is given a chance at redemption if he can successfully perform an inception: planting an idea into someone’s subconscious.", embedding: "[ 0.03923657, 0.39284106, -0.20927092, -0.17770818]"}), + (:Movie {title: "Interstellar", year: 2014, genre: "Sci-Fi", plot: "A group of explorers travel through a wormhole in space in an attempt to ensure humanity’s survival.", embedding: "[-0.29302418, -0.39615033, -0.23393948, -0.09601383]"}), + (:Movie {title: "Avatar", year: 2009, genre: "Sci-Fi", plot: "A paraplegic Marine is sent to the moon Pandora, where he becomes torn between following orders and protecting the world he feels is his home.", embedding: "[-0.13663386, 0.00635589, -0.03038832, -0.08252723]"}), + (:Movie {title: "Blade Runner", year: 1982, genre: "Sci-Fi", plot: "A blade runner must pursue and terminate four replicants who have stolen a ship in space and returned to Earth.", embedding: "[ 0.27215557, -0.1479577, -0.09972772, -0.08234394]"}), + (:Movie {title: "Blade Runner 2049", year: 2017, genre: "Sci-Fi", plot: "A new blade runner unearths a long-buried secret that has the potential to plunge what’s left of society into chaos.", embedding: "[ 0.21560573, -0.07505179, -0.01331814, 0.13403069]"}), + (:Movie {title: "Minority Report", year: 2002, genre: "Sci-Fi", plot: "In a future where a special police unit can arrest murderers before they commit their crimes, a top officer is accused of a future murder.", embedding: "[ 0.24008012, 0.44954908, -0.30905488, 0.15195407]"}), + (:Movie {title: "Total Recall", year: 1990, genre: "Sci-Fi", plot: "A construction worker discovers that his memories have been implanted and becomes embroiled in a conspiracy on Mars.", embedding: "[-0.17471036, 0.14695261, -0.06272433, -0.21795064]"}), + (:Movie {title: "Elysium", year: 2013, genre: "Sci-Fi", plot: "In a future where the rich live on a luxurious space station while the rest of humanity lives in squalor, a man fights to bring equality.", embedding: "[-0.33280967, 0.07733926, 0.11015328, 0.53382836]"}), + (:Movie {title: "Gattaca", year: 1997, genre: "Sci-Fi", plot: "In a future where genetic engineering determines social class, a man defies his fate to achieve his dreams.", embedding: "[-0.21629286, 0.31114665, 0.08303899, 0.46199759]"}), + (:Movie {title: "The Fifth Element", year: 1997, genre: "Sci-Fi", plot: "In a futuristic world, a cab driver becomes the key to saving humanity from an impending cosmic threat.", embedding: "[-0.11528205, -0.0208782, -0.0735215, 0.14327449]"}), + (:Movie {title: "The Terminator", year: 1984, genre: "Action", plot: "A cyborg assassin is sent back in time to kill the mother of the future resistance leader.", embedding: "[ 0.33666933, 0.18040994, -0.01075103, -0.11117851]"}), + (:Movie {title: "Terminator 2: Judgment Day", year: 1991, genre: "Action", plot: "A reprogrammed Terminator is sent to protect the future leader of the human resistance from a more advanced Terminator.", embedding: "[ 0.34698868, 0.06439331, 0.06232323, -0.19534876]"}), + (:Movie {title: "Jurassic Park", year: 1993, genre: "Adventure", plot: "Scientists clone dinosaurs to create a theme park, but things go awry when the creatures escape.", embedding: "[ 0.01794725, -0.11434246, -0.46831815, -0.01049593]"}), + (:Movie {title: "The Avengers", year: 2012, genre: "Action", plot: "Superheroes assemble to face a global threat from an alien invasion led by Loki.", embedding: "[ 0.00546514, -0.37005171, -0.42612838, 0.07968612]"}) +$$) AS (result agtype); + result +-------- +(0 rows) + +SELECT * FROM cypher('graph', $$ MATCH (m:Movie) RETURN m.title, (m.embedding)::vector $$) AS (title agtype, embedding vector); + title | embedding +------------------------------+--------------------------------------------------- + "The Matrix" | [-0.07594558,0.04081754,0.2959212,-0.11921061] + "The Matrix Reloaded" | [0.30228978,-0.22839354,0.35070437,0.01262819] + "The Matrix Revolutions" | [0.12240622,-0.2975246,0.22620453,0.24454723] + "The Matrix Resurrections" | [0.34717247,-0.13820869,0.29214212,0.08090488] + "Inception" | [0.03923657,0.39284107,-0.20927092,-0.17770818] + "Interstellar" | [-0.29302418,-0.39615032,-0.23393948,-0.09601383] + "Avatar" | [-0.13663386,0.00635589,-0.03038832,-0.08252723] + "Blade Runner" | [0.27215558,-0.1479577,-0.09972772,-0.08234394] + "Blade Runner 2049" | [0.21560574,-0.07505179,-0.01331814,0.13403068] + "Minority Report" | [0.24008012,0.44954908,-0.30905488,0.15195407] + "Total Recall" | [-0.17471036,0.14695261,-0.06272433,-0.21795064] + "Elysium" | [-0.33280966,0.07733926,0.11015328,0.5338284] + "Gattaca" | [-0.21629286,0.31114665,0.08303899,0.4619976] + "The Fifth Element" | [-0.11528205,-0.0208782,-0.0735215,0.14327449] + "The Terminator" | [0.33666933,0.18040994,-0.01075103,-0.11117851] + "Terminator 2: Judgment Day" | [0.34698868,0.06439331,0.06232323,-0.19534875] + "Jurassic Park" | [0.01794725,-0.11434246,-0.46831816,-0.01049593] + "The Avengers" | [0.00546514,-0.3700517,-0.4261284,0.07968612] +(18 rows) + +-- Check the dimension of the embedding +SELECT * FROM cypher('graph', $$ MATCH (m:Movie) RETURN m.title, vector_dims(m.embedding) $$) AS (title agtype, dimension int); + title | dimension +------------------------------+----------- + "The Matrix" | 4 + "The Matrix Reloaded" | 4 + "The Matrix Revolutions" | 4 + "The Matrix Resurrections" | 4 + "Inception" | 4 + "Interstellar" | 4 + "Avatar" | 4 + "Blade Runner" | 4 + "Blade Runner 2049" | 4 + "Minority Report" | 4 + "Total Recall" | 4 + "Elysium" | 4 + "Gattaca" | 4 + "The Fifth Element" | 4 + "The Terminator" | 4 + "Terminator 2: Judgment Day" | 4 + "Jurassic Park" | 4 + "The Avengers" | 4 +(18 rows) + +-- Get top 4 most similar movies to The Terminator using cosine distance +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Terminator"}) + RETURN m.title ORDER BY cosine_distance(m.embedding, search.embedding) ASC LIMIT 4 +$$) AS (title agtype); + title +------------------------------ + "The Terminator" + "Terminator 2: Judgment Day" + "Minority Report" + "Blade Runner" +(4 rows) + +-- Get top 4 most similar movies to The Matrix using cosine distance +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY cosine_distance(m.embedding, search.embedding) ASC LIMIT 4 +$$) AS (title agtype); + title +---------------------------- + "The Matrix" + "The Matrix Reloaded" + "The Matrix Resurrections" + "Total Recall" +(4 rows) + +-- l2 norm of the embedding +SELECT * FROM cypher('graph', $$ MATCH (m:Movie) set m.embedding=(l2_normalize(m.embedding))::text return m.title, m.embedding $$) AS (title agtype, embedding agtype); + title | embedding +------------------------------+---------------------------------------------------- + "The Matrix" | "[-0.22980669,0.12351139,0.89543957,-0.36072403]" + "The Matrix Reloaded" | "[0.58534974,-0.44225806,0.6790991,0.024453051]" + "The Matrix Revolutions" | "[0.26431033,-0.6424414,0.4884408,0.528048]" + "The Matrix Resurrections" | "[0.72151977,-0.28723562,0.60715157,0.16814256]" + "Inception" | "[0.08159459,0.81693435,-0.43519026,-0.3695538]" + "Interstellar" | "[-0.5290723,-0.71527255,-0.4223914,-0.17335857]" + "Avatar" | "[-0.84023285,0.039085682,-0.18687363,-0.507503]" + "Blade Runner" | "[0.81074023,-0.44075987,-0.29708475,-0.2452992]" + "Blade Runner 2049" | "[0.8134027,-0.28314334,-0.05024454,0.50564945]" + "Minority Report" | "[0.39031598,0.7308651,-0.5024533,0.24704295]" + "Total Recall" | "[-0.54291505,0.4566574,-0.19491677,-0.67728484]" + "Elysium" | "[-0.517338,0.12022049,0.17122844,0.82981277]" + "Gattaca" | "[-0.35853538,0.51576865,0.13764863,0.765825]" + "The Fifth Element" | "[-0.5788842,-0.10483904,-0.36918527,0.7194471]" + "The Terminator" | "[0.84599304,0.45333964,-0.02701552,-0.27937278]" + "Terminator 2: Judgment Day" | "[0.8501332,0.15776564,0.15269388,-0.4786106]" + "Jurassic Park" | "[0.037194606,-0.23696794,-0.9705615,-0.02175219]" + "The Avengers" | "[0.009587915,-0.6492101,-0.7475897,0.13979948]" +(18 rows) + +-- Get top 4 most similar movies to The Terminator using l2 distance +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Terminator"}) + RETURN m.title ORDER BY l2_distance(m.embedding, search.embedding) ASC LIMIT 4 +$$) AS (title agtype); + title +------------------------------ + "The Terminator" + "Terminator 2: Judgment Day" + "Minority Report" + "Blade Runner" +(4 rows) + +-- Get top 4 most similar movies to The Matrix using l2 distance +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY l2_distance(m.embedding, search.embedding) ASC LIMIT 4 +$$) AS (title agtype); + title +---------------------------- + "The Matrix" + "The Matrix Reloaded" + "The Matrix Resurrections" + "Total Recall" +(4 rows) + +SELECT drop_graph('graph', true); +NOTICE: drop cascades to 3 other objects +DETAIL: drop cascades to table graph._ag_label_vertex +drop cascades to table graph._ag_label_edge +drop cascades to table graph."Movie" +NOTICE: graph "graph" has been dropped + drop_graph +------------ + +(1 row) + +DROP EXTENSION vector CASCADE; diff --git a/regress/sql/expr.sql b/regress/sql/expr.sql index 8519fed40..c3ddee862 100644 --- a/regress/sql/expr.sql +++ b/regress/sql/expr.sql @@ -3536,14 +3536,6 @@ SELECT * FROM cypher('issue_1988', $$ SELECT * FROM cypher('issue_1988', $$ MATCH (p) RETURN p $$) as (p agtype); --- --- Test external extension function logic for fuzzystrmatch --- -SELECT * FROM create_graph('fuzzystrmatch'); --- These should fail with extension not installed -SELECT * FROM cypher('fuzzystrmatch', $$ RETURN soundex("hello world!") $$) AS (result agtype); -SELECT * FROM cypher('fuzzystrmatch', $$ RETURN difference("hello world!", "hello world!") $$) AS (result agtype); - -- -- Issue 2093: Server crashes when executing SELECT agtype_hash_cmp(agtype_in('[null, null, null, null, null]')); -- @@ -3553,7 +3545,6 @@ SELECT agtype_hash_cmp(agtype_in('[null, null, null, null, null]')); -- -- Cleanup -- -SELECT * FROM drop_graph('fuzzystrmatch', true); SELECT * FROM drop_graph('issue_1988', true); SELECT * FROM drop_graph('issue_1953', true); SELECT * FROM drop_graph('expanded_map', true); diff --git a/regress/sql/fuzzystrmatch.sql b/regress/sql/fuzzystrmatch.sql new file mode 100644 index 000000000..b850f46ed --- /dev/null +++ b/regress/sql/fuzzystrmatch.sql @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +LOAD 'age'; +SET search_path=ag_catalog; + +SELECT create_graph('graph'); + +-- Should error out +SELECT * FROM cypher('graph', $$ RETURN soundex("hello") $$) AS (n agtype); + +-- Create the extension in the public schema +CREATE EXTENSION fuzzystrmatch SCHEMA public; + +-- Should error out +SELECT * FROM cypher('graph', $$ RETURN soundex("hello") $$) AS (n agtype); + +-- Should work +SET search_path=ag_catalog, public; +SELECT * FROM cypher('graph', $$ CREATE (:Person {name: "Jane"}), + (:Person {name: "John"}), + (:Person {name: "Jone"}), + (:Person {name: "Jack"}), + (:Person {name: "Jax"}), + (:Person {name: "Jake"}), + (:Person {name: "Julie"}), + (:Person {name: "Julius"}), + (:Person {name: "Jill"}), + (:Person {name: "Jillie"}), + (:Person {name: "Julian"}) +$$) AS (n agtype); +SELECT * FROM cypher('graph', $$ MATCH (p) return soundex(p.name) $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ MATCH (p) return levenshtein(p.name, "John") $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ MATCH (p) return difference(p.name, "John") $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ MATCH (p) return metaphone(p.name, 4) $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ MATCH (p) return dmetaphone(p.name) $$) AS (n agtype); + +-- Difference is basically similarity using soundex, https://www.postgresql.org/docs/current/fuzzystrmatch.html +SELECT * FROM cypher('graph', $$ MATCH (p) return p ORDER BY difference(p.name, "Jon") DESC LIMIT 3$$) AS (n agtype); +SELECT * FROM cypher('graph', $$ MATCH (p) return p ORDER BY difference(p.name, "Jak") DESC LIMIT 3$$) AS (n agtype); +SELECT * FROM cypher('graph', $$ MATCH (p) return p ORDER BY difference(p.name, "Jil") DESC LIMIT 3$$) AS (n agtype); + +-- Clean up +SELECT drop_graph('graph', true); +DROP EXTENSION fuzzystrmatch CASCADE; \ No newline at end of file diff --git a/regress/sql/pg_trgm.sql b/regress/sql/pg_trgm.sql new file mode 100644 index 000000000..a276e5913 --- /dev/null +++ b/regress/sql/pg_trgm.sql @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +LOAD 'age'; +SET search_path=ag_catalog; + +SELECT create_graph('graph'); + +-- Should error out +SELECT * FROM cypher('graph', $$ RETURN show_trgm("hello") $$) AS (n agtype); + +-- Create the extension in the public schema +CREATE EXTENSION pg_trgm SCHEMA public; + +-- Should error out +SELECT * FROM cypher('graph', $$ RETURN show_trgm("hello") $$) AS (n agtype); + +-- Should work +SET search_path=ag_catalog, public; +SELECT * FROM cypher('graph', $$ CREATE (:Person {name: "Jane"}), + (:Person {name: "John"}), + (:Person {name: "Jone"}), + (:Person {name: "Jack"}), + (:Person {name: "Jax"}), + (:Person {name: "Jake"}), + (:Person {name: "Julie"}), + (:Person {name: "Julius"}), + (:Person {name: "Jill"}), + (:Person {name: "Jillie"}), + (:Person {name: "Julian"}) +$$) AS (n agtype); +SELECT * FROM cypher('graph', $$ MATCH (p) return show_trgm(p.name) $$) AS (n text[]); +SELECT * FROM cypher('graph', $$ MATCH (p) with p, similarity(p.name, "Jon") as sim return p.name, sim ORDER BY sim DESC $$) AS (n agtype, s real); +SELECT * FROM cypher('graph', $$ MATCH (p) with p, word_similarity(p.name, "Jon") as sim return p.name, sim ORDER BY sim DESC $$) AS (n agtype, s real); + +-- Clean up +SELECT drop_graph('graph', true); +DROP EXTENSION pg_trgm CASCADE; \ No newline at end of file diff --git a/regress/sql/pgvector.sql b/regress/sql/pgvector.sql new file mode 100644 index 000000000..816d6eb9f --- /dev/null +++ b/regress/sql/pgvector.sql @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +LOAD 'age'; +SET search_path=ag_catalog; + +SELECT create_graph('graph'); + +-- Should error out +SELECT * FROM cypher('graph', $$ RETURN cosine_distance("[1,2,3]", "[1,2,3]") $$) AS (n agtype); + +-- Create the extension in the public schema +CREATE EXTENSION vector SCHEMA public; + +-- Should error out +SELECT * FROM cypher('graph', $$ RETURN cosine_distance("[1,2,3]", "[1,2,3]") $$) AS (n agtype); + +-- Should work +SET search_path=ag_catalog, public; + +SELECT create_graph('graph'); +SELECT * FROM cypher('graph', $$ RETURN "[1.22,2.22,3.33]"::vector $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN "[1.22,2.22,3.33]"::vector $$) AS (n halfvec); +SELECT * FROM cypher('graph', $$ RETURN "[1.22,2.22,3.33]"::vector $$) AS (n sparsevec); + +SELECT * FROM cypher('graph', $$ RETURN l2_distance("[1,2,3]", "[1,2,4]") $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN inner_product("[1,2,3]", "[1,2,4]") $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN cosine_distance("[1,2,3]", "[1,2,4]") $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN l1_distance("[1,2,3]", "[1,2,4]") $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN vector_dims("[1,2,3]") $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN vector_norm("[1,2,3]") $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN l2_normalize("[1,2,3]") $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN l2_normalize("[1,2,3]")::text $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN subvector("[1,2,3,4,5,6]", 2, 4) $$) AS (n vector); +SELECT * FROM cypher('graph', $$ RETURN subvector("[1,2,3,4,5,6]", 2, 4)::text $$) AS (n agtype); +SELECT * FROM cypher('graph', $$ RETURN binary_quantize("[1,2,4]") $$) AS (n bit); + +-- An example usage +SELECT * FROM cypher('graph', $$ + CREATE (:Movie {title: "The Matrix", year: 1999, genre: "Action", plot: "A computer hacker learns about the true nature of reality and joins a rebellion to free humanity from a simulated world controlled by machines.", embedding: "[-0.07594558, 0.04081754, 0.29592122, -0.11921061]"}), + (:Movie {title: "The Matrix Reloaded", year: 2003, genre: "Action", plot: "The rebels continue their fight against the machines, uncovering deeper truths about the Matrix and the nature of their mission.", embedding: "[0.30228977, -0.22839354, 0.35070436, 0.01262819]"}), + (:Movie {title: "The Matrix Revolutions", year: 2003, genre: "Action", plot: "The final battle between humans and machines reaches its climax as the fate of both worlds hangs in the balance.", embedding: "[ 0.12240622, -0.29752459, 0.22620453, 0.24454723]"}), + (:Movie {title: "The Matrix Resurrections", year: 2021, genre: "Action", plot: "Neo returns to a new version of the Matrix and must once again fight to save the people from the control of the machines.", embedding: "[ 0.34717246, -0.13820869, 0.29214213, 0.08090488]"}), + (:Movie {title: "Inception", year: 2010, genre: "Sci-Fi", plot: "A skilled thief is given a chance at redemption if he can successfully perform an inception: planting an idea into someone’s subconscious.", embedding: "[ 0.03923657, 0.39284106, -0.20927092, -0.17770818]"}), + (:Movie {title: "Interstellar", year: 2014, genre: "Sci-Fi", plot: "A group of explorers travel through a wormhole in space in an attempt to ensure humanity’s survival.", embedding: "[-0.29302418, -0.39615033, -0.23393948, -0.09601383]"}), + (:Movie {title: "Avatar", year: 2009, genre: "Sci-Fi", plot: "A paraplegic Marine is sent to the moon Pandora, where he becomes torn between following orders and protecting the world he feels is his home.", embedding: "[-0.13663386, 0.00635589, -0.03038832, -0.08252723]"}), + (:Movie {title: "Blade Runner", year: 1982, genre: "Sci-Fi", plot: "A blade runner must pursue and terminate four replicants who have stolen a ship in space and returned to Earth.", embedding: "[ 0.27215557, -0.1479577, -0.09972772, -0.08234394]"}), + (:Movie {title: "Blade Runner 2049", year: 2017, genre: "Sci-Fi", plot: "A new blade runner unearths a long-buried secret that has the potential to plunge what’s left of society into chaos.", embedding: "[ 0.21560573, -0.07505179, -0.01331814, 0.13403069]"}), + (:Movie {title: "Minority Report", year: 2002, genre: "Sci-Fi", plot: "In a future where a special police unit can arrest murderers before they commit their crimes, a top officer is accused of a future murder.", embedding: "[ 0.24008012, 0.44954908, -0.30905488, 0.15195407]"}), + (:Movie {title: "Total Recall", year: 1990, genre: "Sci-Fi", plot: "A construction worker discovers that his memories have been implanted and becomes embroiled in a conspiracy on Mars.", embedding: "[-0.17471036, 0.14695261, -0.06272433, -0.21795064]"}), + (:Movie {title: "Elysium", year: 2013, genre: "Sci-Fi", plot: "In a future where the rich live on a luxurious space station while the rest of humanity lives in squalor, a man fights to bring equality.", embedding: "[-0.33280967, 0.07733926, 0.11015328, 0.53382836]"}), + (:Movie {title: "Gattaca", year: 1997, genre: "Sci-Fi", plot: "In a future where genetic engineering determines social class, a man defies his fate to achieve his dreams.", embedding: "[-0.21629286, 0.31114665, 0.08303899, 0.46199759]"}), + (:Movie {title: "The Fifth Element", year: 1997, genre: "Sci-Fi", plot: "In a futuristic world, a cab driver becomes the key to saving humanity from an impending cosmic threat.", embedding: "[-0.11528205, -0.0208782, -0.0735215, 0.14327449]"}), + (:Movie {title: "The Terminator", year: 1984, genre: "Action", plot: "A cyborg assassin is sent back in time to kill the mother of the future resistance leader.", embedding: "[ 0.33666933, 0.18040994, -0.01075103, -0.11117851]"}), + (:Movie {title: "Terminator 2: Judgment Day", year: 1991, genre: "Action", plot: "A reprogrammed Terminator is sent to protect the future leader of the human resistance from a more advanced Terminator.", embedding: "[ 0.34698868, 0.06439331, 0.06232323, -0.19534876]"}), + (:Movie {title: "Jurassic Park", year: 1993, genre: "Adventure", plot: "Scientists clone dinosaurs to create a theme park, but things go awry when the creatures escape.", embedding: "[ 0.01794725, -0.11434246, -0.46831815, -0.01049593]"}), + (:Movie {title: "The Avengers", year: 2012, genre: "Action", plot: "Superheroes assemble to face a global threat from an alien invasion led by Loki.", embedding: "[ 0.00546514, -0.37005171, -0.42612838, 0.07968612]"}) +$$) AS (result agtype); +SELECT * FROM cypher('graph', $$ MATCH (m:Movie) RETURN m.title, (m.embedding)::vector $$) AS (title agtype, embedding vector); + +-- Check the dimension of the embedding +SELECT * FROM cypher('graph', $$ MATCH (m:Movie) RETURN m.title, vector_dims(m.embedding) $$) AS (title agtype, dimension int); + +-- Get top 4 most similar movies to The Terminator using cosine distance +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Terminator"}) + RETURN m.title ORDER BY cosine_distance(m.embedding, search.embedding) ASC LIMIT 4 +$$) AS (title agtype); +-- Get top 4 most similar movies to The Matrix using cosine distance +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY cosine_distance(m.embedding, search.embedding) ASC LIMIT 4 +$$) AS (title agtype); +-- l2 norm of the embedding +SELECT * FROM cypher('graph', $$ MATCH (m:Movie) set m.embedding=(l2_normalize(m.embedding))::text return m.title, m.embedding $$) AS (title agtype, embedding agtype); + +-- Get top 4 most similar movies to The Terminator using l2 distance +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Terminator"}) + RETURN m.title ORDER BY l2_distance(m.embedding, search.embedding) ASC LIMIT 4 +$$) AS (title agtype); +-- Get top 4 most similar movies to The Matrix using l2 distance +SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"}) + RETURN m.title ORDER BY l2_distance(m.embedding, search.embedding) ASC LIMIT 4 +$$) AS (title agtype); + +SELECT drop_graph('graph', true); +DROP EXTENSION vector CASCADE; \ No newline at end of file diff --git a/src/backend/parser/cypher_expr.c b/src/backend/parser/cypher_expr.c index dc454ffd5..2f26b020c 100644 --- a/src/backend/parser/cypher_expr.c +++ b/src/backend/parser/cypher_expr.c @@ -25,6 +25,8 @@ #include "postgres.h" #include "catalog/pg_proc.h" +#include "catalog/dependency.h" +#include "commands/extension.h" #include "miscadmin.h" #include "nodes/nodeFuncs.h" #include "optimizer/optimizer.h" @@ -34,6 +36,7 @@ #include "parser/cypher_clause.h" #include "parser/parse_oper.h" #include "parser/parse_relation.h" +#include "parser/parse_type.h" #include "utils/builtins.h" #include "utils/catcache.h" #include "utils/float.h" @@ -97,14 +100,24 @@ static Node *transform_column_ref_for_indirection(cypher_parsestate *cpstate, ColumnRef *cr); static Node *transform_cypher_list_comprehension(cypher_parsestate *cpstate, cypher_unwind *expr); -static bool is_fuzzystrmatch_function(FuncCall *fn); -static void check_for_extension_functions(char *extension, FuncCall *fn); -static List *cast_agtype_input_to_other_type(cypher_parsestate *cpstate, - FuncCall *fn, List *targs); -static Node *cast_input_to_output_type(cypher_parsestate *cpstate, Node *expr, - Oid source_oid, Oid target_oid); +static Node *transform_external_ext_FuncCall(cypher_parsestate *cpstate, + FuncCall *fn, List *targs, + Form_pg_proc procform, + char *extension); +static List *cast_agtype_args_to_target_type(cypher_parsestate *cpstate, + Form_pg_proc procform, + List *fargs, + Oid *target_types); +static Node *cast_to_target_type(cypher_parsestate *cpstate, Node *expr, + Oid source_oid, Oid target_oid); static Node *wrap_text_output_to_agtype(cypher_parsestate *cpstate, FuncExpr *fexpr); +static Form_pg_proc get_procform(FuncCall *fn, bool err_not_found); +static char *get_mapped_extension(Oid func_oid); +static bool is_extension_external(char *extension); +static bool is_pgvector_datatype(char *typename); +static char *construct_age_function_name(char *funcname); +static bool function_exists(char *funcname, char *extension); /* transform a cypher expression */ Node *transform_cypher_expr(cypher_parsestate *cpstate, Node *expr, @@ -1549,6 +1562,7 @@ static Node *transform_cypher_typecast(cypher_parsestate *cpstate, { List *fname; FuncCall *fnode; + ParseState *pstate; /* verify input parameter */ Assert (cpstate != NULL); @@ -1556,6 +1570,7 @@ static Node *transform_cypher_typecast(cypher_parsestate *cpstate, /* create the qualified function name, schema first */ fname = list_make1(makeString("ag_catalog")); + pstate = &cpstate->pstate; /* append the name of the requested typecast function */ if (pg_strcasecmp(ctypecast->typecast, "edge") == 0) @@ -1600,7 +1615,40 @@ static Node *transform_cypher_typecast(cypher_parsestate *cpstate, { fname = lappend(fname, makeString(FUNC_AGTYPE_TYPECAST_PG_TEXT)); } + else if (is_pgvector_datatype(ctypecast->typecast)) + { + TypeName *target_typname; + Oid source_oid; + Oid target_oid; + Node *expr; + + /* transform the expr before casting */ + expr = transform_cypher_expr_recurse(cpstate, + ctypecast->expr); + /* get the source and target oids */ + target_typname = makeTypeNameFromNameList(list_make1( + makeString(ctypecast->typecast))); + target_oid = typenameTypeId(pstate, target_typname); + source_oid = exprType(expr); + + if (source_oid == AGTYPEOID) + { + /* + * Cast to text and then to target type, since we cant + * directly cast agtype to pgvector datatypes. + */ + expr = cast_to_target_type(cpstate, expr, source_oid, TEXTOID); + expr = cast_to_target_type(cpstate, expr, TEXTOID, target_oid); + } + else + { + /* try a direct cast, it will error out if not possible */ + expr = cast_to_target_type(cpstate, expr, source_oid, target_oid); + } + + return expr; + } /* if none was found, error out */ else { @@ -1617,164 +1665,120 @@ static Node *transform_cypher_typecast(cypher_parsestate *cpstate, return transform_FuncCall(cpstate, fnode); } -/* is the function part of the fuzzystrmatch extension */ -static bool is_fuzzystrmatch_function(FuncCall *fn) +static Node *transform_external_ext_FuncCall(cypher_parsestate *cpstate, + FuncCall *fn, List *targs, + Form_pg_proc procform, + char *extension) { - char *funcname = (((String*)linitial(fn->funcname))->sval); + ParseState *pstate = &cpstate->pstate; + FuncExpr *fexpr = NULL; + Node *retval = NULL; + Node *last_srf = pstate->p_last_srf; + Oid *proargtypes; - if (pg_strcasecmp(funcname, "soundex") == 0 || - pg_strcasecmp(funcname, "difference") == 0 || - pg_strcasecmp(funcname, "daitch_mokotoff") == 0 || - pg_strcasecmp(funcname, "soundex_tsvector") == 0 || - pg_strcasecmp(funcname, "levenshtein") == 0 || - pg_strcasecmp(funcname, "levenshtein_less_equal") == 0 || - pg_strcasecmp(funcname, "metaphone") == 0 || - pg_strcasecmp(funcname, "dmetaphone") == 0) - { - return true; - } - return false; -} + /* make sure procform in not NULL */ + Assert(procform != NULL); + proargtypes = procform->proargtypes.values; -/* - * Cast a function's input parameter list from agtype to that function's input - * type. This is used for functions that don't take agtype as input and where - * there isn't an implicit cast to do this for us. - */ -static List *cast_agtype_input_to_other_type(cypher_parsestate *cpstate, - FuncCall *fn, List *targs) -{ - char *funcname = (((String*)linitial(fn->funcname))->sval); - int nargs = fn->args->length; - CatCList *catlist = NULL; - List *new_targs = NIL; - ListCell *lc = NULL; - int i = 0; + /* cast the agtype arguments to the types accepted by function */ + targs = cast_agtype_args_to_target_type(cpstate, procform, targs, proargtypes); - /* get a list of matching functions from the sys cache */ - catlist = SearchSysCacheList1(PROCNAMEARGSNSP, CStringGetDatum(funcname)); + /* now get the function node for the external function */ + fexpr = (FuncExpr *)ParseFuncOrColumn(pstate, fn->funcname, targs, + last_srf, fn, false, + fn->location); - /* iterate through the list of functions for ones that match */ - for (i = 0; i < catlist->n_members; i++) + /* + * This will cast TEXT output to AGTYPE. It will error out if this is + * not possible to do. For TEXT to AGTYPE we need to wrap the output + * due to issues with creating a cast from TEXT to AGTYPE. + */ + if (fexpr->funcresulttype == TEXTOID) { - HeapTuple proctup = &catlist->members[i]->tuple; - Form_pg_proc procform = (Form_pg_proc) GETSTRUCT(proctup); - - /* check that the names, number of args, and variadic match */ - if (pg_strcasecmp(funcname, procform->proname.data) == 0 && - nargs == procform->pronargs && - fn->func_variadic == procform->provariadic) - { - Oid *proargtypes = procform->proargtypes.values; - int j = 0; - - /* - * Rebuild targs with castings to the function's input types from - * targ's output type. - */ - foreach(lc, targs) - { - Oid poid = proargtypes[j]; - Node *targ = lfirst(lc); - Oid toid = exprType(targ); - - /* cast the arg. this will error out if it can't be done. */ - targ = cast_input_to_output_type(cpstate, targ, toid, poid); + retval = wrap_text_output_to_agtype(cpstate, fexpr); + } + else + { + retval = (Node *)fexpr; + } - /* add it to the new argument list */ - new_targs = lappend(new_targs, targ); - j++; - } + /* additional casts or wraps can be done here for other types */ - /* free the old args and replace them with the new ones */ - pfree_if_not_null(targs); - targs = new_targs; - break; - } + /* flag that an aggregate was found during a transform */ + if (retval != NULL && retval->type == T_Aggref) + { + cpstate->exprHasAgg = true; } - /* we need to release the cache list */ - ReleaseSysCacheList(catlist); - return targs; + + /* we can just return it here */ + return retval; } /* - * Verify that a called function, that is mapped to a specific - * function in some other extension, is loaded. Otherwise, bail - * out with an error, stating the issue. - * - * Note: some code borrowed from FuncnameGetCandidates + * Cast a function's input parameter list from agtype to that function's input + * type. This is used for functions that don't take agtype as input and where + * there isn't an implicit cast to do this for us. */ -static void check_for_extension_functions(char *extension, FuncCall *fn) +static List *cast_agtype_args_to_target_type(cypher_parsestate *cpstate, + Form_pg_proc procform, + List *fargs, + Oid *target_types) { - char *funcname = (((String*)linitial(fn->funcname))->sval); - CatCList *catlist = NULL; - bool found = false; + char *funcname = NameStr(procform->proname); + int nargs = procform->pronargs; + ListCell *lc = NULL; int i = 0; - /* get a list of matching functions */ - catlist = SearchSysCacheList1(PROCNAMEARGSNSP, CStringGetDatum(funcname)); - - /* if the catalog list is empty, the extension isn't loaded */ - if (catlist->n_members == 0) + /* verify the length of args are same */ + if (list_length(fargs) != nargs) { ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("extension %s is not installed for function %s", - extension, funcname))); + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("function %s requires %d arguments, %d given", + funcname, nargs, list_length(fargs)))); } - /* iterate through them and verify that they are in the search path */ - for (i = 0; i < catlist->n_members; i++) + /* iterate through the function's args */ + foreach (lc, fargs) { - HeapTuple proctup = &catlist->members[i]->tuple; - Form_pg_proc procform = (Form_pg_proc) GETSTRUCT(proctup); - List *asp = fetch_search_path(false); - ListCell *nsp; + char *target_typname; + Node *expr = lfirst(lc); + Oid source_oid = exprType(expr); + Oid target_oid = target_types[i]; - /* - * Consider only procs that are in the search path and are not in - * the temp namespace. - */ - foreach(nsp, asp) - { - Oid oid = lfirst_oid(nsp); + /* get the typename from target_oid */ + target_typname = format_type_be(target_oid); - if (procform->pronamespace == oid && - isTempNamespace(procform->pronamespace) == false) - { - pfree_if_not_null(asp); - found = true; - break; - } + /* cast the agtype to the target type */ + if (source_oid == AGTYPEOID && is_pgvector_datatype(target_typname)) + { + /* + * There is no cast from agtype to vector, so we first + * cast agtype to text and then text to vector. + */ + expr = cast_to_target_type(cpstate, expr, source_oid, TEXTOID); + expr = cast_to_target_type(cpstate, expr, TEXTOID, target_oid); } - - if (found) + /* additional casts can be added here for other types */ + else { - break; + /* try a direct cast, it will error out if not possible */ + expr = cast_to_target_type(cpstate, expr, source_oid, target_oid); } - pfree_if_not_null(asp); - } - - /* if we didn't find it, it isn't in the search path */ - if (!found) - { - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("extension %s is not in search path for function %s", - extension, funcname))); + lfirst(lc) = expr; + i++; } - /* release the system cache list */ - ReleaseSysCacheList(catlist); + return fargs; } /* * Cast an input type to an output type, error out if not possible. * Thanks to Taha for this idea. */ -static Node *cast_input_to_output_type(cypher_parsestate *cpstate, Node *expr, - Oid source_oid, Oid target_oid) +static Node *cast_to_target_type(cypher_parsestate *cpstate, Node *expr, + Oid source_oid, Oid target_oid) { ParseState *pstate = &cpstate->pstate; @@ -1832,6 +1836,181 @@ static Node *wrap_text_output_to_agtype(cypher_parsestate *cpstate, return retval; } +/* + * Returns Form_pg_proc struct for given function, if the function + * is not in search path, it is not considered. + */ +static Form_pg_proc get_procform(FuncCall *fn, bool err_not_found) +{ + CatCList *catlist = NULL; + Form_pg_proc procform = NULL; + int nargs; + int i = 0; + List *asp; + bool found = false; + char *funcname = (((String*)linitial(fn->funcname))->sval); + + /* get a list of matching functions */ + catlist = SearchSysCacheList1(PROCNAMEARGSNSP, CStringGetDatum(funcname)); + + if (catlist->n_members == 0) + { + ReleaseSysCacheList(catlist); + return NULL; + } + + asp = fetch_search_path(false); + nargs = list_length(fn->args); + + /* iterate through them and verify that they are in the search path */ + for (i = 0; i < catlist->n_members; i++) + { + ListCell *nsp; + HeapTuple proctup = &catlist->members[i]->tuple; + procform = (Form_pg_proc) GETSTRUCT(proctup); + + /* + * Check if the function name, number of arguments, and + * variadic match before checking if it is in the search + * path. + */ + if (pg_strcasecmp(funcname, procform->proname.data) == 0 && + nargs == procform->pronargs && + fn->func_variadic == procform->provariadic) + { + foreach(nsp, asp) + { + Oid oid = lfirst_oid(nsp); + + if (procform->pronamespace == oid && + isTempNamespace(procform->pronamespace) == false) + { + found = true; + break; + } + } + } + + if (found) + { + break; + } + + /* reset procform */ + procform = NULL; + } + + /* Error out if function not found */ + if (err_not_found && (procform == NULL)) + { + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("function %s does not exist", funcname), + errhint("If the function is from an external extension, " + "make sure the extension is installed and the " + "function is in the search path."))); + } + + /* we need to release the cache list */ + ReleaseSysCacheList(catlist); + pfree_if_not_null(asp); + + return procform; +} + +static char *get_mapped_extension(Oid func_oid) +{ + Oid extension_oid; + char *extension = NULL; + + extension_oid = getExtensionOfObject(ProcedureRelationId, func_oid); + extension = get_extension_name(extension_oid); + + return extension; +} + +static bool is_extension_external(char *extension) +{ + return ((extension != NULL) && + (pg_strcasecmp(extension, "age") != 0)); +} + +static bool is_pgvector_datatype(char *typename) +{ + return (pg_strcasecmp(typename, "vector") || + pg_strcasecmp(typename, "halfvec") || + pg_strcasecmp(typename, "sparsevec")); +} + +/* Returns age_ prefiexed lower case function name */ +static char *construct_age_function_name(char *funcname) +{ + int pnlen = strlen(funcname); + char *ag_name = palloc(pnlen + 5); + int i; + + /* copy in the prefix - all AGE functions are prefixed with age_ */ + strncpy(ag_name, "age_", 4); + + /* + * All AGE function names are in lower case. So, copy in the funcname + * in lower case. + */ + for (i = 0; i < pnlen; i++) + { + ag_name[i + 4] = tolower(funcname[i]); + } + + /* terminate it with 0 */ + ag_name[i + 4] = 0; + + return ag_name; +} + + +/* + * Checks if a function exists. If the extension name is given, + * then it checks if the function exists in that extension. + */ +static bool function_exists(char *funcname, char *extension) +{ + CatCList *catlist = NULL; + bool found = false; + int i = 0; + + /* get a list of matching functions */ + catlist = SearchSysCacheList1(PROCNAMEARGSNSP, CStringGetDatum(funcname)); + + if (catlist->n_members == 0) + { + ReleaseSysCacheList(catlist); + return false; + } + else if (extension == NULL) + { + ReleaseSysCacheList(catlist); + return true; + } + + for (i = 0; i < catlist->n_members; i++) + { + HeapTuple proctup = &catlist->members[i]->tuple; + Form_pg_proc procform = (Form_pg_proc) GETSTRUCT(proctup); + char *ext = get_mapped_extension(procform->oid); + + if (ext != NULL && pg_strcasecmp(ext, extension) == 0) + { + found = true; + break; + } + } + + /* we need to release the cache list */ + ReleaseSysCacheList(catlist); + + return found; +} + /* * Code borrowed from PG's transformFuncCall and updated for AGE */ @@ -1843,7 +2022,6 @@ static Node *transform_FuncCall(cypher_parsestate *cpstate, FuncCall *fn) List *fname = NIL; ListCell *arg; Node *retval = NULL; - bool found = false; /* Transform the list of arguments ... */ foreach(arg, fn->args) @@ -1857,120 +2035,84 @@ static Node *transform_FuncCall(cypher_parsestate *cpstate, FuncCall *fn) /* within group should not happen */ Assert(!fn->agg_within_group); - /* - * Check for cypher functions that map to the fuzzystrmatch extension and - * verify that the external functions exist. - */ - if (is_fuzzystrmatch_function(fn)) + /* If it is a qualified function call, let it through. */ + if (list_length(fn->funcname) > 1) { - /* abort if the extension isn't loaded or in the path */ - check_for_extension_functions("fuzzystrmatch", fn); - - /* everything looks good so mark found as true */ - found = true; + fname = fn->funcname; } - /* - * If we found a function that is part of an extension, which is in the - * search_path, then cast the agtype inputs to that function's type inputs. + * Else We need to check if the function call is for + * age or for some external extension. */ - if (found) + else { - FuncExpr *fexpr = NULL; - - /* - * Coerce agtype inputs to function's inputs. this will error out if - * this is not possible to do. - */ - targs = cast_agtype_input_to_other_type(cpstate, fn, targs); - - /* now get the function node for the external function */ - fexpr = (FuncExpr *)ParseFuncOrColumn(pstate, fn->funcname, targs, - last_srf, fn, false, - fn->location); + char *name = strVal(linitial(fn->funcname)); + char *ag_name = construct_age_function_name(name); - /* - * This will cast TEXT outputs to AGTYPE. It will error out if this is - * not possible to do. For TEXT to AGTYPE we need to wrap the output - * due to issues with creating a cast from TEXT to AGTYPE. - */ - if (fexpr->funcresulttype == TEXTOID) + if (function_exists(ag_name, "age")) { - retval = wrap_text_output_to_agtype(cpstate, fexpr); - } - else - { - retval = (Node *)fexpr; - } + /* qualify the name with our schema name */ + fname = list_make2(makeString("ag_catalog"), makeString(ag_name)); - /* additional casts or wraps can be done here for other types */ + /* + * Currently 3 functions need the graph name passed in as the first + * argument - in addition to the other arguments: startNode, endNode, + * and vle. So, check for those 3 functions here and that the arg list + * is not empty. Then prepend the graph name if necessary. + */ + if ((list_length(targs) != 0) && + (strcmp("startNode", name) == 0 || + strcmp("endNode", name) == 0 || + strcmp("vle", name) == 0 || + strcmp("vertex_stats", name) == 0)) + { + char *graph_name = cpstate->graph_name; + Datum d = string_to_agtype(graph_name); + Const *c = makeConst(AGTYPEOID, -1, InvalidOid, -1, d, false, + false); - /* flag that an aggregate was found during a transform */ - if (retval != NULL && retval->type == T_Aggref) - { - cpstate->exprHasAgg = true; + targs = lcons(c, targs); + } } - - /* we can just return it here */ - return retval; - } - - /* - * If the function name is not qualified and not from an extension, then it - * is one of ours. We need to construct its name, and qualify it, so that PG - * can find it. - */ - if (list_length(fn->funcname) == 1) - { - /* get the name, size, and the ag name allocated */ - char *name = ((String*)linitial(fn->funcname))->sval; - int pnlen = strlen(name); - char *ag_name = palloc(pnlen + 5); - int i; - - /* copy in the prefix - all AGE functions are prefixed with age_ */ - strncpy(ag_name, "age_", 4); - - /* - * All AGE function names are in lower case. So, copy in the name - * in lower case. + /* + * If it's not in age, check if it's a potential call to some function + * in another installed extension. */ - for (i = 0; i < pnlen; i++) + else if(function_exists(name, NULL)) { - ag_name[i + 4] = tolower(name[i]); - } - - /* terminate it with 0 */ - ag_name[i + 4] = 0; + Form_pg_proc procform = get_procform(fn, true); + char *extension = get_mapped_extension(procform->oid); - /* qualify the name with our schema name */ - fname = list_make2(makeString("ag_catalog"), makeString(ag_name)); - - /* - * Currently 3 functions need the graph name passed in as the first - * argument - in addition to the other arguments: startNode, endNode, - * and vle. So, check for those 3 functions here and that the arg list - * is not empty. Then prepend the graph name if necessary. - */ - if ((list_length(targs) != 0) && - (strcmp("startNode", name) == 0 || - strcmp("endNode", name) == 0 || - strcmp("vle", name) == 0 || - strcmp("vertex_stats", name) == 0)) + /* + * If the function is from another extension, transform + * it if possible and return the function expr. + */ + if (is_extension_external(extension)) + { + retval = transform_external_ext_FuncCall(cpstate, fn, targs, + procform, extension); + return retval; + } + else + { + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("function %s does not exist", name), + errhint("If the function is from an external extension, " + "make sure the extension is installed and the " + "function is in the search path."))); + } + } + /* no function found */ + else { - char *graph_name = cpstate->graph_name; - Datum d = string_to_agtype(graph_name); - Const *c = makeConst(AGTYPEOID, -1, InvalidOid, -1, d, false, - false); - - targs = lcons(c, targs); + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("function %s does not exist", name), + errhint("If the function is from an external extension, " + "make sure the extension is installed and the " + "function is in the search path."))); } - - } - /* If it is not one of our functions, pass the name list through */ - else - { - fname = fn->funcname; } /* ... and hand off to ParseFuncOrColumn */