Skip to content

Commit

Permalink
feat/sql source (#185)
Browse files Browse the repository at this point in the history
* implement sqlite version of sql indexer and downloader

* add integration test for sqlite source connector

* Drop ids from copied filedata

* migrate env setup over to integration test folder

* add postgres source connector with tests

* bump changelog
  • Loading branch information
rbiseck3 authored Oct 21, 2024
1 parent 7306ac1 commit 977fc0a
Show file tree
Hide file tree
Showing 105 changed files with 2,303 additions and 29 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
## 0.1.1-dev0
## 0.1.1-dev1

### Enhancements

* **Update KDB.AI vectorstore integration to 1.4**
* **Add sqlite and postgres source connectors**

## 0.1.0

Expand Down
1 change: 1 addition & 0 deletions requirements/test.in
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ pytest-mock
unstructured
pytest-asyncio
pytest_tagging
faker

# Connector specific deps
cryptography
Expand Down
14 changes: 9 additions & 5 deletions requirements/test.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# This file was autogenerated by uv via the following command:
# uv pip compile test.in --output-file test.txt --no-strip-extras
# uv pip compile test.in --output-file test.txt --no-strip-extras --python-version 3.9
annotated-types==0.7.0
# via pydantic
anyio==4.6.2.post1
Expand Down Expand Up @@ -27,7 +27,7 @@ click==8.1.7
# python-oxmsg
coverage[toml]==7.6.3
# via pytest-cov
cryptography==43.0.1
cryptography==43.0.3
# via
# -r test.in
# unstructured-client
Expand All @@ -43,6 +43,8 @@ exceptiongroup==1.2.2
# via
# anyio
# pytest
faker==30.6.0
# via -r test.in
filetype==1.2.0
# via unstructured
fsspec==2024.5.0
Expand Down Expand Up @@ -91,7 +93,7 @@ googleapis-common-protos[grpc]==1.65.0
# grpcio-status
grpc-google-iam-v1==0.13.1
# via google-cloud-resource-manager
grpcio==1.66.2
grpcio==1.67.0
# via
# -c ./common/constraints.txt
# google-api-core
Expand Down Expand Up @@ -121,7 +123,7 @@ langdetect==1.0.9
# via unstructured
lxml==5.3.0
# via unstructured
marshmallow==3.22.0
marshmallow==3.23.0
# via dataclasses-json
mypy-extensions==1.0.0
# via typing-inspect
Expand Down Expand Up @@ -159,7 +161,7 @@ protobuf==4.23.4
# grpc-google-iam-v1
# grpcio-status
# proto-plus
psutil==6.0.0
psutil==6.1.0
# via unstructured
pyasn1==0.6.1
# via
Expand Down Expand Up @@ -194,6 +196,7 @@ pytest-tagging==1.5.3
# via -r test.in
python-dateutil==2.8.2
# via
# faker
# google-cloud-bigquery
# unstructured-client
python-iso639==2024.4.27
Expand Down Expand Up @@ -242,6 +245,7 @@ tqdm==4.66.5
typing-extensions==4.12.2
# via
# anyio
# faker
# pydantic
# pydantic-core
# pypdf
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
services:
pgvector:
image: postgres
restart: always
container_name: postgres_src
ports:
- 5433:5432
environment:
POSTGRES_DB: test_db
POSTGRES_USER: unstructured
POSTGRES_PASSWORD: test
volumes:
- ./postgres-schema.sql:/docker-entrypoint-initdb.d/init.sql
healthcheck:
test: [ "CMD-SHELL", "pg_isready", "-d", "test_db", "-U", "unstructured"]
interval: 5s
timeout: 60s
retries: 5
start_period: 5s
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
CREATE TABLE cars (
car_id SERIAL PRIMARY KEY,
brand TEXT NOT NULL,
price INTEGER NOT NULL
);
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
CREATE TABLE cars (
car_id INTEGER PRIMARY KEY,
brand TEXT NOT NULL,
price INTEGER NOT NULL
);
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
{
"directory_structure": [
"cars-1-5fb93ce5.csv",
"cars-10-5fb93ce5.csv",
"cars-11-5fb93ce5.csv",
"cars-12-5fb93ce5.csv",
"cars-13-5fb93ce5.csv",
"cars-14-5fb93ce5.csv",
"cars-15-5fb93ce5.csv",
"cars-16-5fb93ce5.csv",
"cars-17-5fb93ce5.csv",
"cars-18-5fb93ce5.csv",
"cars-19-5fb93ce5.csv",
"cars-2-5fb93ce5.csv",
"cars-20-5fb93ce5.csv",
"cars-21-5fb93ce5.csv",
"cars-22-5fb93ce5.csv",
"cars-23-5fb93ce5.csv",
"cars-24-5fb93ce5.csv",
"cars-25-5fb93ce5.csv",
"cars-26-5fb93ce5.csv",
"cars-27-5fb93ce5.csv",
"cars-28-5fb93ce5.csv",
"cars-29-5fb93ce5.csv",
"cars-3-5fb93ce5.csv",
"cars-30-5fb93ce5.csv",
"cars-31-5fb93ce5.csv",
"cars-32-5fb93ce5.csv",
"cars-33-5fb93ce5.csv",
"cars-34-5fb93ce5.csv",
"cars-35-5fb93ce5.csv",
"cars-36-5fb93ce5.csv",
"cars-37-5fb93ce5.csv",
"cars-38-5fb93ce5.csv",
"cars-39-5fb93ce5.csv",
"cars-4-5fb93ce5.csv",
"cars-40-5fb93ce5.csv",
"cars-5-5fb93ce5.csv",
"cars-6-5fb93ce5.csv",
"cars-7-5fb93ce5.csv",
"cars-8-5fb93ce5.csv",
"cars-9-5fb93ce5.csv"
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"identifier": "cars-1-5fb93ce5",
"connector_type": "postgres",
"source_identifiers": null,
"doc_type": "file",
"metadata": {
"url": null,
"version": null,
"record_locator": null,
"date_created": null,
"date_modified": null,
"date_processed": "1729522956.358926",
"permissions_data": null,
"filesize_bytes": null
},
"additional_metadata": {
"table_name": "cars",
"id_column": "car_id"
},
"reprocess": false,
"local_download_path": "/private/var/folders/n8/rps3wl195pj4p_0vyxqj5jrw0000gn/T/tmpxvakfpd4/cars-1-5fb93ce5.csv"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"identifier": "cars-10-5fb93ce5",
"connector_type": "postgres",
"source_identifiers": null,
"doc_type": "file",
"metadata": {
"url": null,
"version": null,
"record_locator": null,
"date_created": null,
"date_modified": null,
"date_processed": "1729522956.376682",
"permissions_data": null,
"filesize_bytes": null
},
"additional_metadata": {
"table_name": "cars",
"id_column": "car_id"
},
"reprocess": false,
"local_download_path": "/private/var/folders/n8/rps3wl195pj4p_0vyxqj5jrw0000gn/T/tmpxvakfpd4/cars-10-5fb93ce5.csv"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"identifier": "cars-11-5fb93ce5",
"connector_type": "postgres",
"source_identifiers": null,
"doc_type": "file",
"metadata": {
"url": null,
"version": null,
"record_locator": null,
"date_created": null,
"date_modified": null,
"date_processed": "1729522956.388928",
"permissions_data": null,
"filesize_bytes": null
},
"additional_metadata": {
"table_name": "cars",
"id_column": "car_id"
},
"reprocess": false,
"local_download_path": "/private/var/folders/n8/rps3wl195pj4p_0vyxqj5jrw0000gn/T/tmpxvakfpd4/cars-11-5fb93ce5.csv"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"identifier": "cars-12-5fb93ce5",
"connector_type": "postgres",
"source_identifiers": null,
"doc_type": "file",
"metadata": {
"url": null,
"version": null,
"record_locator": null,
"date_created": null,
"date_modified": null,
"date_processed": "1729522956.388928",
"permissions_data": null,
"filesize_bytes": null
},
"additional_metadata": {
"table_name": "cars",
"id_column": "car_id"
},
"reprocess": false,
"local_download_path": "/private/var/folders/n8/rps3wl195pj4p_0vyxqj5jrw0000gn/T/tmpxvakfpd4/cars-12-5fb93ce5.csv"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"identifier": "cars-13-5fb93ce5",
"connector_type": "postgres",
"source_identifiers": null,
"doc_type": "file",
"metadata": {
"url": null,
"version": null,
"record_locator": null,
"date_created": null,
"date_modified": null,
"date_processed": "1729522956.388928",
"permissions_data": null,
"filesize_bytes": null
},
"additional_metadata": {
"table_name": "cars",
"id_column": "car_id"
},
"reprocess": false,
"local_download_path": "/private/var/folders/n8/rps3wl195pj4p_0vyxqj5jrw0000gn/T/tmpxvakfpd4/cars-13-5fb93ce5.csv"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"identifier": "cars-14-5fb93ce5",
"connector_type": "postgres",
"source_identifiers": null,
"doc_type": "file",
"metadata": {
"url": null,
"version": null,
"record_locator": null,
"date_created": null,
"date_modified": null,
"date_processed": "1729522956.388928",
"permissions_data": null,
"filesize_bytes": null
},
"additional_metadata": {
"table_name": "cars",
"id_column": "car_id"
},
"reprocess": false,
"local_download_path": "/private/var/folders/n8/rps3wl195pj4p_0vyxqj5jrw0000gn/T/tmpxvakfpd4/cars-14-5fb93ce5.csv"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"identifier": "cars-15-5fb93ce5",
"connector_type": "postgres",
"source_identifiers": null,
"doc_type": "file",
"metadata": {
"url": null,
"version": null,
"record_locator": null,
"date_created": null,
"date_modified": null,
"date_processed": "1729522956.388928",
"permissions_data": null,
"filesize_bytes": null
},
"additional_metadata": {
"table_name": "cars",
"id_column": "car_id"
},
"reprocess": false,
"local_download_path": "/private/var/folders/n8/rps3wl195pj4p_0vyxqj5jrw0000gn/T/tmpxvakfpd4/cars-15-5fb93ce5.csv"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"identifier": "cars-16-5fb93ce5",
"connector_type": "postgres",
"source_identifiers": null,
"doc_type": "file",
"metadata": {
"url": null,
"version": null,
"record_locator": null,
"date_created": null,
"date_modified": null,
"date_processed": "1729522956.400699",
"permissions_data": null,
"filesize_bytes": null
},
"additional_metadata": {
"table_name": "cars",
"id_column": "car_id"
},
"reprocess": false,
"local_download_path": "/private/var/folders/n8/rps3wl195pj4p_0vyxqj5jrw0000gn/T/tmpxvakfpd4/cars-16-5fb93ce5.csv"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"identifier": "cars-17-5fb93ce5",
"connector_type": "postgres",
"source_identifiers": null,
"doc_type": "file",
"metadata": {
"url": null,
"version": null,
"record_locator": null,
"date_created": null,
"date_modified": null,
"date_processed": "1729522956.400699",
"permissions_data": null,
"filesize_bytes": null
},
"additional_metadata": {
"table_name": "cars",
"id_column": "car_id"
},
"reprocess": false,
"local_download_path": "/private/var/folders/n8/rps3wl195pj4p_0vyxqj5jrw0000gn/T/tmpxvakfpd4/cars-17-5fb93ce5.csv"
}
Loading

0 comments on commit 977fc0a

Please sign in to comment.