Skip to content

Commit

Permalink
Add support for pgvector.
Browse files Browse the repository at this point in the history
[closes #612]

Signed-off-by: Mark Paluch <[email protected]>
  • Loading branch information
mp911de committed Dec 7, 2023
1 parent d193789 commit fc546e4
Show file tree
Hide file tree
Showing 9 changed files with 949 additions and 54 deletions.
98 changes: 50 additions & 48 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -425,54 +425,55 @@ When available, the driver registers also an array variant of the codec.

This reference table shows the type mapping between [PostgreSQL][p] and Java data types:

| PostgreSQL Type | Supported Data Type |
|:------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------|
| [`bigint`][psql-bigint-ref] | [**`Long`**][java-long-ref], [`Boolean`][java-boolean-ref], [`Byte`][java-byte-ref], [`Short`][java-short-ref], [`Integer`][java-integer-ref], [`BigDecimal`][java-bigdecimal-ref], [`BigInteger`][java-biginteger-ref] |
| [`bit`][psql-bit-ref] | Not yet supported.|
| [`bit varying`][psql-bit-ref] | Not yet supported.|
| [`boolean or bool`][psql-boolean-ref] | [`Boolean`][java-boolean-ref]|
| [`box`][psql-box-ref] | **`Box`**|
| [`bytea`][psql-bytea-ref] | [**`ByteBuffer`**][java-ByteBuffer-ref], [`byte[]`][java-byte-ref], [`Blob`][r2dbc-blob-ref]|
| [`character`][psql-character-ref] | [`String`][java-string-ref]|
| [`character varying`][psql-character-ref] | [`String`][java-string-ref]|
| [`cidr`][psql-cidr-ref] | Not yet supported.|
| [`circle`][psql-circle-ref] | **`Circle`**|
| [`date`][psql-date-ref] | [`LocalDate`][java-ld-ref]|
| [`double precision`][psql-floating-point-ref] | [**`Double`**][java-double-ref], [`Float`][java-float-ref], [`Boolean`][java-boolean-ref], [`Byte`][java-byte-ref], [`Short`][java-short-ref], [`Integer`][java-integer-ref], [`Long`][java-long-ref], [`BigDecimal`][java-bigdecimal-ref], [`BigInteger`][java-biginteger-ref]|
| [enumerated types][psql-enum-ref] | Client code `Enum` types through `EnumCodec`|
| [`geometry`][postgis-ref] | **`org.locationtech.jts.geom.Geometry`**|
| [`hstore`][psql-hstore-ref] | [**`Map`**][java-map-ref]|
| [`inet`][psql-inet-ref] | [**`InetAddress`**][java-inet-ref]|
| [`integer`][psql-integer-ref] | [**`Integer`**][java-integer-ref], [`Boolean`][java-boolean-ref], [`Byte`][java-byte-ref], [`Short`][java-short-ref], [`Long`][java-long-ref], [`BigDecimal`][java-bigdecimal-ref], [`BigInteger`][java-biginteger-ref]|
| [`interval`][psql-interval-ref] | **`Interval`**|
| [`json`][psql-json-ref] | **`Json`**, [`String`][java-string-ref]. Reading: `ByteBuf`[`byte[]`][java-primitive-ref], [`ByteBuffer`][java-ByteBuffer-ref], [`String`][java-string-ref], [`InputStream`][java-inputstream-ref]|
| [`jsonb`][psql-json-ref] | **`Json`**, [`String`][java-string-ref]. Reading: `ByteBuf`[`byte[]`][java-primitive-ref], [`ByteBuffer`][java-ByteBuffer-ref], [`String`][java-string-ref], [`InputStream`][java-inputstream-ref]|
| [`line`][psql-line-ref] | **`Line`**|
| [`lseg`][psql-lseq-ref] | **`Lseg`**|
| [`macaddr`][psql-macaddr-ref] | Not yet supported.|
| [`macaddr8`][psql-macaddr8-ref] | Not yet supported.|
| [`money`][psql-money-ref] | Not yet supported. Please don't use this type. It is a very poor implementation. |
| [`name`][psql-name-ref] | [**`String`**][java-string-ref]
| [`numeric`][psql-bignumeric-ref] | [`BigDecimal`][java-bigdecimal-ref], [`Boolean`][java-boolean-ref], [`Byte`][java-byte-ref], [`Short`][java-short-ref], [`Integer`][java-integer-ref], [`Long`][java-long-ref], [`BigInteger`][java-biginteger-ref]|
| [`oid`][psql-oid-ref] | [**`Integer`**][java-integer-ref], [`Boolean`][java-boolean-ref], [`Byte`][java-byte-ref], [`Short`][java-short-ref], [`Long`][java-long-ref], [`BigDecimal`][java-bigdecimal-ref], [`BigInteger`][java-biginteger-ref]|
| [`path`][psql-path-ref] | **`Path`**|
| [`pg_lsn`][psql-pg_lsn-ref] | Not yet supported.|
| [`point`][psql-point-ref] | **`Point`**|
| [`polygon`][psql-polygon-ref] | **`Polygon`**|
| [`real`][psql-real-ref] | [**`Float`**][java-float-ref], [`Double`][java-double-ref], [`Boolean`][java-boolean-ref], [`Byte`][java-byte-ref], [`Short`][java-short-ref], [`Integer`][java-integer-ref], [`Long`][java-long-ref], [`BigDecimal`][java-bigdecimal-ref], [`BigInteger`][java-biginteger-ref]|
| [`smallint`][psql-smallint-ref] | [**`Short`**][java-short-ref], [`Boolean`][java-boolean-ref], [`Byte`][java-byte-ref], [`Integer`][java-integer-ref], [`Long`][java-long-ref], [`BigDecimal`][java-bigdecimal-ref], [`BigInteger`][java-biginteger-ref]|
| [`smallserial`][psql-smallserial-ref] | [**`Integer`**][java-integer-ref], [`Boolean`][java-boolean-ref], [`Byte`][java-byte-ref], [`Short`][java-short-ref], [`Long`][java-long-ref], [`BigDecimal`][java-bigdecimal-ref], [`BigInteger`][java-biginteger-ref]|
| [`serial`][psql-serial-ref] | [**`Long`**][java-long-ref], [`Boolean`][java-boolean-ref], [`Byte`][java-byte-ref], [`Short`][java-short-ref], [`Integer`][java-integer-ref], [`BigDecimal`][java-bigdecimal-ref], [`BigInteger`][java-biginteger-ref]|
| [`text`][psql-text-ref] | [**`String`**][java-string-ref], [`Clob`][r2dbc-clob-ref]|
| [`time [without time zone]`][psql-time-ref] | [`LocalTime`][java-lt-ref]|
| [`time [with time zone]`][psql-time-ref] | [`OffsetTime`][java-ot-ref]|
| [`timestamp [without time zone]`][psql-time-ref]|[**`LocalDateTime`**][java-ldt-ref], [`LocalTime`][java-lt-ref], [`LocalDate`][java-ld-ref], [`java.util.Date`][java-legacy-date-ref]|
| [`timestamp [with time zone]`][psql-time-ref] | [**`OffsetDatetime`**][java-odt-ref], [`ZonedDateTime`][java-zdt-ref], [`Instant`][java-instant-ref]|
| [`tsquery`][psql-tsquery-ref] | Not yet supported.|
| [`tsvector`][psql-tsvector-ref] | Not yet supported.|
| [`txid_snapshot`][psql-txid_snapshot-ref] | Not yet supported.|
| [`uuid`][psql-uuid-ref] | [**`UUID`**][java-uuid-ref], [`String`][java-string-ref]||
| [`xml`][psql-xml-ref] | Not yet supported. |
| PostgreSQL Type | Supported Data Type |
|:-------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------|
| [`bigint`][psql-bigint-ref] | [**`Long`**][java-long-ref], [`Boolean`][java-boolean-ref], [`Byte`][java-byte-ref], [`Short`][java-short-ref], [`Integer`][java-integer-ref], [`BigDecimal`][java-bigdecimal-ref], [`BigInteger`][java-biginteger-ref] |
| [`bit`][psql-bit-ref] | Not yet supported.|
| [`bit varying`][psql-bit-ref] | Not yet supported.|
| [`boolean or bool`][psql-boolean-ref] | [`Boolean`][java-boolean-ref]|
| [`box`][psql-box-ref] | **`Box`**|
| [`bytea`][psql-bytea-ref] | [**`ByteBuffer`**][java-ByteBuffer-ref], [`byte[]`][java-byte-ref], [`Blob`][r2dbc-blob-ref]|
| [`character`][psql-character-ref] | [`String`][java-string-ref]|
| [`character varying`][psql-character-ref] | [`String`][java-string-ref]|
| [`cidr`][psql-cidr-ref] | Not yet supported.|
| [`circle`][psql-circle-ref] | **`Circle`**|
| [`date`][psql-date-ref] | [`LocalDate`][java-ld-ref]|
| [`double precision`][psql-floating-point-ref] | [**`Double`**][java-double-ref], [`Float`][java-float-ref], [`Boolean`][java-boolean-ref], [`Byte`][java-byte-ref], [`Short`][java-short-ref], [`Integer`][java-integer-ref], [`Long`][java-long-ref], [`BigDecimal`][java-bigdecimal-ref], [`BigInteger`][java-biginteger-ref]|
| [enumerated types][psql-enum-ref] | Client code `Enum` types through `EnumCodec`|
| [`geometry`][postgis-ref] | **`org.locationtech.jts.geom.Geometry`**|
| [`hstore`][psql-hstore-ref] | [**`Map`**][java-map-ref]|
| [`inet`][psql-inet-ref] | [**`InetAddress`**][java-inet-ref]|
| [`integer`][psql-integer-ref] | [**`Integer`**][java-integer-ref], [`Boolean`][java-boolean-ref], [`Byte`][java-byte-ref], [`Short`][java-short-ref], [`Long`][java-long-ref], [`BigDecimal`][java-bigdecimal-ref], [`BigInteger`][java-biginteger-ref]|
| [`interval`][psql-interval-ref] | **`Interval`**|
| [`json`][psql-json-ref] | **`Json`**, [`String`][java-string-ref]. Reading: `ByteBuf`[`byte[]`][java-primitive-ref], [`ByteBuffer`][java-ByteBuffer-ref], [`String`][java-string-ref], [`InputStream`][java-inputstream-ref]|
| [`jsonb`][psql-json-ref] | **`Json`**, [`String`][java-string-ref]. Reading: `ByteBuf`[`byte[]`][java-primitive-ref], [`ByteBuffer`][java-ByteBuffer-ref], [`String`][java-string-ref], [`InputStream`][java-inputstream-ref]|
| [`line`][psql-line-ref] | **`Line`**|
| [`lseg`][psql-lseq-ref] | **`Lseg`**|
| [`macaddr`][psql-macaddr-ref] | Not yet supported.|
| [`macaddr8`][psql-macaddr8-ref] | Not yet supported.|
| [`money`][psql-money-ref] | Not yet supported. Please don't use this type. It is a very poor implementation. |
| [`name`][psql-name-ref] | [**`String`**][java-string-ref]
| [`numeric`][psql-bignumeric-ref] | [`BigDecimal`][java-bigdecimal-ref], [`Boolean`][java-boolean-ref], [`Byte`][java-byte-ref], [`Short`][java-short-ref], [`Integer`][java-integer-ref], [`Long`][java-long-ref], [`BigInteger`][java-biginteger-ref]|
| [`oid`][psql-oid-ref] | [**`Integer`**][java-integer-ref], [`Boolean`][java-boolean-ref], [`Byte`][java-byte-ref], [`Short`][java-short-ref], [`Long`][java-long-ref], [`BigDecimal`][java-bigdecimal-ref], [`BigInteger`][java-biginteger-ref]|
| [`path`][psql-path-ref] | **`Path`**|
| [`pg_lsn`][psql-pg_lsn-ref] | Not yet supported.|
| [`point`][psql-point-ref] | **`Point`**|
| [`polygon`][psql-polygon-ref] | **`Polygon`**|
| [`real`][psql-real-ref] | [**`Float`**][java-float-ref], [`Double`][java-double-ref], [`Boolean`][java-boolean-ref], [`Byte`][java-byte-ref], [`Short`][java-short-ref], [`Integer`][java-integer-ref], [`Long`][java-long-ref], [`BigDecimal`][java-bigdecimal-ref], [`BigInteger`][java-biginteger-ref]|
| [`smallint`][psql-smallint-ref] | [**`Short`**][java-short-ref], [`Boolean`][java-boolean-ref], [`Byte`][java-byte-ref], [`Integer`][java-integer-ref], [`Long`][java-long-ref], [`BigDecimal`][java-bigdecimal-ref], [`BigInteger`][java-biginteger-ref]|
| [`smallserial`][psql-smallserial-ref] | [**`Integer`**][java-integer-ref], [`Boolean`][java-boolean-ref], [`Byte`][java-byte-ref], [`Short`][java-short-ref], [`Long`][java-long-ref], [`BigDecimal`][java-bigdecimal-ref], [`BigInteger`][java-biginteger-ref]|
| [`serial`][psql-serial-ref] | [**`Long`**][java-long-ref], [`Boolean`][java-boolean-ref], [`Byte`][java-byte-ref], [`Short`][java-short-ref], [`Integer`][java-integer-ref], [`BigDecimal`][java-bigdecimal-ref], [`BigInteger`][java-biginteger-ref]|
| [`text`][psql-text-ref] | [**`String`**][java-string-ref], [`Clob`][r2dbc-clob-ref]|
| [`time [without time zone]`][psql-time-ref] | [`LocalTime`][java-lt-ref]|
| [`time [with time zone]`][psql-time-ref] | [`OffsetTime`][java-ot-ref]|
| [`timestamp [without time zone]`][psql-time-ref] |[**`LocalDateTime`**][java-ldt-ref], [`LocalTime`][java-lt-ref], [`LocalDate`][java-ld-ref], [`java.util.Date`][java-legacy-date-ref]|
| [`timestamp [with time zone]`][psql-time-ref] | [**`OffsetDatetime`**][java-odt-ref], [`ZonedDateTime`][java-zdt-ref], [`Instant`][java-instant-ref]|
| [`tsquery`][psql-tsquery-ref] | Not yet supported.|
| [`tsvector`][psql-tsvector-ref] | Not yet supported.|
| [`txid_snapshot`][psql-txid_snapshot-ref] | Not yet supported.|
| [`uuid`][psql-uuid-ref] | [**`UUID`**][java-uuid-ref], [`String`][java-string-ref]||
| [`xml`][psql-xml-ref] | Not yet supported. |
| [`vector`][psql-vector-ref] | **`Vector`**, [`float[]`][java-float-ref] |

Types in **bold** indicate the native (default) Java type.

Expand Down Expand Up @@ -550,6 +551,7 @@ Support for the following single-dimensional arrays (read and write):
[psql-xml-ref]: https://www.postgresql.org/docs/current/datatype-xml.html
[psql-runtime-config]: https://www.postgresql.org/docs/current/runtime-config-client.html
[postgis-ref]: http://postgis.net/workshops/postgis-intro/geometries.html
[psql-vector-ref]: https://github.com/pgvector/pgvector

[r2dbc-blob-ref]: https://r2dbc.io/spec/0.9.0.RELEASE/api/io/r2dbc/spi/Blob.html
[r2dbc-clob-ref]: https://r2dbc.io/spec/0.9.0.RELEASE/api/io/r2dbc/spi/Clob.html
Expand Down
17 changes: 11 additions & 6 deletions src/main/java/io/r2dbc/postgresql/codec/BuiltinDynamicCodecs.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import reactor.util.annotation.Nullable;

import java.util.Arrays;
import java.util.Collections;
import java.util.stream.Collectors;

/**
Expand All @@ -44,21 +45,24 @@ enum BuiltinCodec {
public boolean isSupported() {
return this.jtsPresent;
}
};
}, VECTOR("vector");

private final String name;

BuiltinCodec(String name) {
this.name = name;
}

public Codec<?> createCodec(ByteBufAllocator byteBufAllocator, int oid) {
public Iterable<Codec<?>> createCodec(ByteBufAllocator byteBufAllocator, int oid, int typarray) {

switch (this) {
case HSTORE:
return new HStoreCodec(byteBufAllocator, oid);
return Collections.singletonList(new HStoreCodec(byteBufAllocator, oid));
case POSTGIS_GEOMETRY:
return new PostgisGeometryCodec(oid);
return Collections.singletonList(new PostgisGeometryCodec(oid));
case VECTOR:
VectorCodec vectorCodec = new VectorCodec(byteBufAllocator, oid, typarray);
return Arrays.asList(vectorCodec, new VectorCodec.VectorArrayCodec(byteBufAllocator, vectorCodec), new VectorFloatCodec(byteBufAllocator, oid));
default:
throw new UnsupportedOperationException(String.format("Codec %s for OID %d not supported", name(), oid));
}
Expand Down Expand Up @@ -93,11 +97,12 @@ public Publisher<Void> register(PostgresqlConnection connection, ByteBufAllocato
.flatMap(it -> it.map((row, rowMetadata) -> {

int oid = PostgresqlObjectId.toInt(row.get("oid", Long.class));
int typarray = PostgresqlObjectId.toInt(row.get("typarray", Long.class));
String typname = row.get("typname", String.class);

BuiltinCodec lookup = BuiltinCodec.lookup(typname);
if (lookup.isSupported()) {
registry.addLast(lookup.createCodec(byteBufAllocator, oid));
lookup.createCodec(byteBufAllocator, oid, typarray).forEach(registry::addLast);
}

return EMPTY;
Expand All @@ -106,7 +111,7 @@ public Publisher<Void> register(PostgresqlConnection connection, ByteBufAllocato
}

private PostgresqlStatement createQuery(PostgresqlConnection connection) {
return connection.createStatement(String.format("SELECT oid, typname FROM pg_catalog.pg_type WHERE typname IN (%s)", getPlaceholders()));
return connection.createStatement(String.format("SELECT oid, typname, typarray FROM pg_catalog.pg_type WHERE typname IN (%s)", getPlaceholders()));
}

private static String getPlaceholders() {
Expand Down
128 changes: 128 additions & 0 deletions src/main/java/io/r2dbc/postgresql/codec/Vector.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
/*
* Copyright 2023 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.r2dbc.postgresql.codec;

import io.r2dbc.postgresql.util.Assert;

import java.util.Arrays;
import java.util.Collection;

/**
* Value object that maps to the {@code vector} datatype provided by Postgres pgvector.
*
* @since 1.0.3
*/
public class Vector {

private static final Vector EMPTY = new Vector(new float[0]);

private final float[] vec;

private Vector(float[] vec) {
this.vec = Assert.requireNonNull(vec, "Vector must not be null");
}

/**
* Create a new empty {@link Vector}.
*
* @return the empty {@link Vector} object
*/
public static Vector empty() {
return EMPTY;
}

/**
* Create a new {@link Vector} given {@code vector} points.
*
* @param vec the vector values
* @return the new {@link Vector} object
*/
public static Vector of(float... vec) {
Assert.requireNonNull(vec, "Vector must not be null");
return vec.length == 0 ? empty() : new Vector(vec);
}

/**
* Create a new {@link Vector} given {@code vector} points.
*
* @param vec the vector values
* @return the new {@link Vector} object
*/
public static Vector of(Collection<? extends Number> vec) {
Assert.requireNonNull(vec, "Vector must not be null");

if (vec.isEmpty()) {
return empty();
}

float[] floats = new float[vec.size()];
int index = 0;
for (Number number : vec) {
Number next = Assert.requireNonNull(number, "Vector must not contain null elements");
floats[index++] = next.floatValue();
}

return new Vector(floats);
}

/**
* Return the vector values.
*
* @return the vector values.
*/
public float[] getVector() {
if (this.vec.length == 0) {
return this.vec;
}
float[] copy = new float[this.vec.length];
System.arraycopy(this.vec, 0, copy, 0, this.vec.length);
return copy;
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
Vector other = (Vector) o;
return Arrays.equals(this.vec, other.vec);
}

@Override
public int hashCode() {
return Arrays.hashCode(this.vec);
}

@Override
public String toString() {
StringBuilder builder = new StringBuilder();
builder.append('[');

for (int i = 0; i < this.vec.length; i++) {
if (i != 0) {
builder.append(',');
}
builder.append(this.vec[i]);
}
builder.append(']');

return builder.toString();
}
}
Loading

0 comments on commit fc546e4

Please sign in to comment.