Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat/sql source #185

Merged
merged 6 commits into from
Oct 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
## 0.1.1-dev0
## 0.1.1-dev1

### Enhancements

* **Update KDB.AI vectorstore integration to 1.4**
* **Add sqlite and postgres source connectors**

## 0.1.0

Expand Down
1 change: 1 addition & 0 deletions requirements/test.in
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ pytest-mock
unstructured
pytest-asyncio
pytest_tagging
faker

# Connector specific deps
cryptography
Expand Down
14 changes: 9 additions & 5 deletions requirements/test.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# This file was autogenerated by uv via the following command:
# uv pip compile test.in --output-file test.txt --no-strip-extras
# uv pip compile test.in --output-file test.txt --no-strip-extras --python-version 3.9
annotated-types==0.7.0
# via pydantic
anyio==4.6.2.post1
Expand Down Expand Up @@ -27,7 +27,7 @@ click==8.1.7
# python-oxmsg
coverage[toml]==7.6.3
# via pytest-cov
cryptography==43.0.1
cryptography==43.0.3
# via
# -r test.in
# unstructured-client
Expand All @@ -43,6 +43,8 @@ exceptiongroup==1.2.2
# via
# anyio
# pytest
faker==30.6.0
# via -r test.in
filetype==1.2.0
# via unstructured
fsspec==2024.5.0
Expand Down Expand Up @@ -91,7 +93,7 @@ googleapis-common-protos[grpc]==1.65.0
# grpcio-status
grpc-google-iam-v1==0.13.1
# via google-cloud-resource-manager
grpcio==1.66.2
grpcio==1.67.0
# via
# -c ./common/constraints.txt
# google-api-core
Expand Down Expand Up @@ -121,7 +123,7 @@ langdetect==1.0.9
# via unstructured
lxml==5.3.0
# via unstructured
marshmallow==3.22.0
marshmallow==3.23.0
# via dataclasses-json
mypy-extensions==1.0.0
# via typing-inspect
Expand Down Expand Up @@ -159,7 +161,7 @@ protobuf==4.23.4
# grpc-google-iam-v1
# grpcio-status
# proto-plus
psutil==6.0.0
psutil==6.1.0
# via unstructured
pyasn1==0.6.1
# via
Expand Down Expand Up @@ -194,6 +196,7 @@ pytest-tagging==1.5.3
# via -r test.in
python-dateutil==2.8.2
# via
# faker
# google-cloud-bigquery
# unstructured-client
python-iso639==2024.4.27
Expand Down Expand Up @@ -242,6 +245,7 @@ tqdm==4.66.5
typing-extensions==4.12.2
# via
# anyio
# faker
# pydantic
# pydantic-core
# pypdf
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
services:
pgvector:
image: postgres
restart: always
container_name: postgres_src
ports:
- 5433:5432
environment:
POSTGRES_DB: test_db
POSTGRES_USER: unstructured
POSTGRES_PASSWORD: test
volumes:
- ./postgres-schema.sql:/docker-entrypoint-initdb.d/init.sql
healthcheck:
test: [ "CMD-SHELL", "pg_isready", "-d", "test_db", "-U", "unstructured"]
interval: 5s
timeout: 60s
retries: 5
start_period: 5s
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
CREATE TABLE cars (
car_id SERIAL PRIMARY KEY,
brand TEXT NOT NULL,
price INTEGER NOT NULL
);
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
CREATE TABLE cars (
car_id INTEGER PRIMARY KEY,
brand TEXT NOT NULL,
price INTEGER NOT NULL
);
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
{
"directory_structure": [
"cars-1-5fb93ce5.csv",
"cars-10-5fb93ce5.csv",
"cars-11-5fb93ce5.csv",
"cars-12-5fb93ce5.csv",
"cars-13-5fb93ce5.csv",
"cars-14-5fb93ce5.csv",
"cars-15-5fb93ce5.csv",
"cars-16-5fb93ce5.csv",
"cars-17-5fb93ce5.csv",
"cars-18-5fb93ce5.csv",
"cars-19-5fb93ce5.csv",
"cars-2-5fb93ce5.csv",
"cars-20-5fb93ce5.csv",
"cars-21-5fb93ce5.csv",
"cars-22-5fb93ce5.csv",
"cars-23-5fb93ce5.csv",
"cars-24-5fb93ce5.csv",
"cars-25-5fb93ce5.csv",
"cars-26-5fb93ce5.csv",
"cars-27-5fb93ce5.csv",
"cars-28-5fb93ce5.csv",
"cars-29-5fb93ce5.csv",
"cars-3-5fb93ce5.csv",
"cars-30-5fb93ce5.csv",
"cars-31-5fb93ce5.csv",
"cars-32-5fb93ce5.csv",
"cars-33-5fb93ce5.csv",
"cars-34-5fb93ce5.csv",
"cars-35-5fb93ce5.csv",
"cars-36-5fb93ce5.csv",
"cars-37-5fb93ce5.csv",
"cars-38-5fb93ce5.csv",
"cars-39-5fb93ce5.csv",
"cars-4-5fb93ce5.csv",
"cars-40-5fb93ce5.csv",
"cars-5-5fb93ce5.csv",
"cars-6-5fb93ce5.csv",
"cars-7-5fb93ce5.csv",
"cars-8-5fb93ce5.csv",
"cars-9-5fb93ce5.csv"
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"identifier": "cars-1-5fb93ce5",
"connector_type": "postgres",
"source_identifiers": null,
"doc_type": "file",
"metadata": {
"url": null,
"version": null,
"record_locator": null,
"date_created": null,
"date_modified": null,
"date_processed": "1729522956.358926",
"permissions_data": null,
"filesize_bytes": null
},
"additional_metadata": {
"table_name": "cars",
"id_column": "car_id"
},
"reprocess": false,
"local_download_path": "/private/var/folders/n8/rps3wl195pj4p_0vyxqj5jrw0000gn/T/tmpxvakfpd4/cars-1-5fb93ce5.csv"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"identifier": "cars-10-5fb93ce5",
"connector_type": "postgres",
"source_identifiers": null,
"doc_type": "file",
"metadata": {
"url": null,
"version": null,
"record_locator": null,
"date_created": null,
"date_modified": null,
"date_processed": "1729522956.376682",
"permissions_data": null,
"filesize_bytes": null
},
"additional_metadata": {
"table_name": "cars",
"id_column": "car_id"
},
"reprocess": false,
"local_download_path": "/private/var/folders/n8/rps3wl195pj4p_0vyxqj5jrw0000gn/T/tmpxvakfpd4/cars-10-5fb93ce5.csv"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"identifier": "cars-11-5fb93ce5",
"connector_type": "postgres",
"source_identifiers": null,
"doc_type": "file",
"metadata": {
"url": null,
"version": null,
"record_locator": null,
"date_created": null,
"date_modified": null,
"date_processed": "1729522956.388928",
"permissions_data": null,
"filesize_bytes": null
},
"additional_metadata": {
"table_name": "cars",
"id_column": "car_id"
},
"reprocess": false,
"local_download_path": "/private/var/folders/n8/rps3wl195pj4p_0vyxqj5jrw0000gn/T/tmpxvakfpd4/cars-11-5fb93ce5.csv"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"identifier": "cars-12-5fb93ce5",
"connector_type": "postgres",
"source_identifiers": null,
"doc_type": "file",
"metadata": {
"url": null,
"version": null,
"record_locator": null,
"date_created": null,
"date_modified": null,
"date_processed": "1729522956.388928",
"permissions_data": null,
"filesize_bytes": null
},
"additional_metadata": {
"table_name": "cars",
"id_column": "car_id"
},
"reprocess": false,
"local_download_path": "/private/var/folders/n8/rps3wl195pj4p_0vyxqj5jrw0000gn/T/tmpxvakfpd4/cars-12-5fb93ce5.csv"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"identifier": "cars-13-5fb93ce5",
"connector_type": "postgres",
"source_identifiers": null,
"doc_type": "file",
"metadata": {
"url": null,
"version": null,
"record_locator": null,
"date_created": null,
"date_modified": null,
"date_processed": "1729522956.388928",
"permissions_data": null,
"filesize_bytes": null
},
"additional_metadata": {
"table_name": "cars",
"id_column": "car_id"
},
"reprocess": false,
"local_download_path": "/private/var/folders/n8/rps3wl195pj4p_0vyxqj5jrw0000gn/T/tmpxvakfpd4/cars-13-5fb93ce5.csv"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"identifier": "cars-14-5fb93ce5",
"connector_type": "postgres",
"source_identifiers": null,
"doc_type": "file",
"metadata": {
"url": null,
"version": null,
"record_locator": null,
"date_created": null,
"date_modified": null,
"date_processed": "1729522956.388928",
"permissions_data": null,
"filesize_bytes": null
},
"additional_metadata": {
"table_name": "cars",
"id_column": "car_id"
},
"reprocess": false,
"local_download_path": "/private/var/folders/n8/rps3wl195pj4p_0vyxqj5jrw0000gn/T/tmpxvakfpd4/cars-14-5fb93ce5.csv"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"identifier": "cars-15-5fb93ce5",
"connector_type": "postgres",
"source_identifiers": null,
"doc_type": "file",
"metadata": {
"url": null,
"version": null,
"record_locator": null,
"date_created": null,
"date_modified": null,
"date_processed": "1729522956.388928",
"permissions_data": null,
"filesize_bytes": null
},
"additional_metadata": {
"table_name": "cars",
"id_column": "car_id"
},
"reprocess": false,
"local_download_path": "/private/var/folders/n8/rps3wl195pj4p_0vyxqj5jrw0000gn/T/tmpxvakfpd4/cars-15-5fb93ce5.csv"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"identifier": "cars-16-5fb93ce5",
"connector_type": "postgres",
"source_identifiers": null,
"doc_type": "file",
"metadata": {
"url": null,
"version": null,
"record_locator": null,
"date_created": null,
"date_modified": null,
"date_processed": "1729522956.400699",
"permissions_data": null,
"filesize_bytes": null
},
"additional_metadata": {
"table_name": "cars",
"id_column": "car_id"
},
"reprocess": false,
"local_download_path": "/private/var/folders/n8/rps3wl195pj4p_0vyxqj5jrw0000gn/T/tmpxvakfpd4/cars-16-5fb93ce5.csv"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"identifier": "cars-17-5fb93ce5",
"connector_type": "postgres",
"source_identifiers": null,
"doc_type": "file",
"metadata": {
"url": null,
"version": null,
"record_locator": null,
"date_created": null,
"date_modified": null,
"date_processed": "1729522956.400699",
"permissions_data": null,
"filesize_bytes": null
},
"additional_metadata": {
"table_name": "cars",
"id_column": "car_id"
},
"reprocess": false,
"local_download_path": "/private/var/folders/n8/rps3wl195pj4p_0vyxqj5jrw0000gn/T/tmpxvakfpd4/cars-17-5fb93ce5.csv"
}
Loading