Skip to content

Commit

Permalink
Do not export text for now
Browse files Browse the repository at this point in the history
  • Loading branch information
smolnar committed May 29, 2024
1 parent 0df8926 commit 6ed7900
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 14 deletions.
14 changes: 1 addition & 13 deletions lib/tasks/ml.rake
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,7 @@ def batch_decrees(batch_size: 10_000, include_text: false)
Arel.sql(
'ARRAY_AGG(DISTINCT legislation_subareas.value) FILTER (WHERE legislation_subareas.value IS NOT NULL)'
),
Arel.sql('ARRAY_AGG(DISTINCT legislations.value) FILTER (WHERE legislations.value IS NOT NULL)'),
Arel.sql(
"
ARRAY_TO_STRING(
(
SELECT ARRAY_AGG(decree_pages.text ORDER BY decree_pages.number ASC) FROM decree_pages
WHERE decree_pages.decree_id = decrees.id
GROUP BY decree_pages.decree_id
),
''
) AS text
"
)
Arel.sql('ARRAY_AGG(DISTINCT legislations.value) FILTER (WHERE legislations.value IS NOT NULL)')
)
end

Expand Down
2 changes: 1 addition & 1 deletion ml/decree-embeddings/embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def base_embed_decrees(vocabulary, decrees):
vectorizer_fit_time_in_ms = (time() - vectorizer_fit_start_time) * 1000

logger.info(
f"Vectorized [{len(vectorizer.get_feature_names_out())}] features of [{len(decrees)}] decrees in [{vectorizer_fit_time_in_ms:.2f}ms]"
f"Vectorized [{len(vectorizer.get_feature_names_out())}] features of [{len(decrees)}] in [{vectorizer_fit_time_in_ms:.2f}ms]"
)

embeddings = []
Expand Down

0 comments on commit 6ed7900

Please sign in to comment.