Skip to content

Commit

Permalink
Merge branch 'master' into improve_typo
Browse files Browse the repository at this point in the history
  • Loading branch information
gmechali authored Oct 28, 2024
2 parents d2926dc + 499b981 commit 2754ef8
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 8 deletions.
29 changes: 22 additions & 7 deletions build/cdc_data/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,16 @@ if [[ $OUTPUT_DIR == "" ]]; then
exit 1
fi

if [[ $DATA_RUN_MODE != "" ]]; then
if [[ $DATA_RUN_MODE != "schemaupdate" ]]; then
echo "DATA_RUN_MODE must be either empty or 'schemaupdate'"
exit 1
fi
echo "DATA_RUN_MODE=$DATA_RUN_MODE"
else
DATA_RUN_MODE="customdc"
fi

echo "INPUT_DIR=$INPUT_DIR"
echo "OUTPUT_DIR=$OUTPUT_DIR"

Expand All @@ -51,7 +61,7 @@ ADDITIONAL_CATALOG_PATH=$DC_NL_EMBEDDINGS_DIR/custom_catalog.yaml
CUSTOM_EMBEDDINGS_INDEX=user_all_minilm_mem

# Set IS_CUSTOM_DC var to true.
# This is used by the embeddings builder to set up a custom dc env.
# This is used by the embeddings builder to set up a custom dc env.
export IS_CUSTOM_DC=true

if [[ $USE_SQLITE == "true" ]]; then
Expand All @@ -67,15 +77,20 @@ cd $WORKSPACE_DIR/import/simple
# Run importer.
python3 -m stats.main \
--input_dir=$INPUT_DIR \
--output_dir=$DC_OUTPUT_DIR
--output_dir=$DC_OUTPUT_DIR \
--mode=$DATA_RUN_MODE

# cd back to workspace dir to run the embeddings builder.
cd $WORKSPACE_DIR

# Run embeddings builder.
python3 -m tools.nl.embeddings.build_embeddings \
--embeddings_name=$CUSTOM_EMBEDDINGS_INDEX \
if [[ $DATA_RUN_MODE == "schemaupdate" ]]; then
echo "Skipping embeddings builder because run mode is 'schemaupdate'."
echo "Schema update complete."
else
# Run embeddings builder.
python3 -m tools.nl.embeddings.build_embeddings \
--embeddings_name=$CUSTOM_EMBEDDINGS_INDEX \
--output_dir=$DC_NL_EMBEDDINGS_DIR \
--additional_catalog_path=$ADDITIONAL_CATALOG_PATH

echo "Data loading completed."
echo "Data loading complete."
fi
2 changes: 1 addition & 1 deletion import
Submodule import updated 38 files
+5 −5 run_test.sh
+1 −1 simple/run_stats.sh
+45 −18 simple/stats/db.py
+56 −39 simple/stats/runner.py
+143 −22 simple/tests/stats/db_test.py
+2 −2 simple/tests/stats/entities_importer_test.py
+2 −2 simple/tests/stats/events_importer_test.py
+2 −2 simple/tests/stats/mcf_importer_test.py
+2 −2 simple/tests/stats/observations_importer_test.py
+25 −3 simple/tests/stats/runner_test.py
+2 −2 simple/tests/stats/schema_test.py
+40 −0 simple/tests/stats/test_data/db/input/sqlite_current_schema_populated.sql
+36 −0 simple/tests/stats/test_data/db/input/sqlite_old_schema_populated.sql
+2 −0 ...tests/stats/test_data/runner/expected/input_dir_driven_with_existing_old_schema_data/key_value_store.db.csv
+5 −0 simple/tests/stats/test_data/runner/expected/input_dir_driven_with_existing_old_schema_data/nl/sentences.csv
+31 −0 ...le/tests/stats/test_data/runner/expected/input_dir_driven_with_existing_old_schema_data/observations.db.csv
+109 −0 simple/tests/stats/test_data/runner/expected/input_dir_driven_with_existing_old_schema_data/triples.db.csv
+2 −0 simple/tests/stats/test_data/runner/expected/schema_update_only/key_value_store.db.csv
+4 −0 simple/tests/stats/test_data/runner/expected/schema_update_only/observations.db.csv
+6 −0 simple/tests/stats/test_data/runner/expected/schema_update_only/triples.db.csv
+4 −0 simple/tests/stats/test_data/runner/input/input_dir_driven_with_existing_old_schema_data/article_entities.csv
+4 −0 simple/tests/stats/test_data/runner/input/input_dir_driven_with_existing_old_schema_data/author_entities.csv
+44 −0 simple/tests/stats/test_data/runner/input/input_dir_driven_with_existing_old_schema_data/config.json
+15 −0 simple/tests/stats/test_data/runner/input/input_dir_driven_with_existing_old_schema_data/countries.csv
+36 −0 ...stats/test_data/runner/input/input_dir_driven_with_existing_old_schema_data/sqlite_old_schema_populated.sql
+5 −0 simple/tests/stats/test_data/runner/input/input_dir_driven_with_existing_old_schema_data/variable_per_row.csv
+12 −0 simple/tests/stats/test_data/runner/input/input_dir_driven_with_existing_old_schema_data/variables.mcf
+3 −0 simple/tests/stats/test_data/runner/input/input_dir_driven_with_existing_old_schema_data/wikidataids.csv
+4 −0 simple/tests/stats/test_data/runner/input/schema_update_only/article_entities.csv
+4 −0 simple/tests/stats/test_data/runner/input/schema_update_only/author_entities.csv
+44 −0 simple/tests/stats/test_data/runner/input/schema_update_only/config.json
+15 −0 simple/tests/stats/test_data/runner/input/schema_update_only/countries.csv
+36 −0 simple/tests/stats/test_data/runner/input/schema_update_only/sqlite_old_schema_populated.sql
+5 −0 simple/tests/stats/test_data/runner/input/schema_update_only/variable_per_row.csv
+12 −0 simple/tests/stats/test_data/runner/input/schema_update_only/variables.mcf
+3 −0 simple/tests/stats/test_data/runner/input/schema_update_only/wikidataids.csv
+32 −0 simple/tests/stats/test_util.py
+2 −2 simple/tests/stats/variable_per_row_importer_test.py

0 comments on commit 2754ef8

Please sign in to comment.