Skip to content

Commit

Permalink
Release of 8.16.0 (#354)
Browse files Browse the repository at this point in the history
* add to release

* update formatting
  • Loading branch information
joemcelroy authored Nov 13, 2024
1 parent d968b49 commit 4793d83
Show file tree
Hide file tree
Showing 6 changed files with 163 additions and 182 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ jobs:
fail-fast: false
matrix:
es_stack:
- 8.14.2
- 8.15.0
- 8.16.0-SNAPSHOT
- 8.15.3
- 8.16.0
- 8.17.0-SNAPSHOT
runs-on: ubuntu-latest
services:
elasticsearch:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,27 +11,30 @@


def get_client_es():
with open('../config.yml', 'r') as file:
with open("../config.yml", "r") as file:
config = yaml.safe_load(file)
return Elasticsearch(
cloud_id=config['cloud_id'],
api_key=config['api_key']
)
return Elasticsearch(cloud_id=config["cloud_id"], api_key=config["api_key"])


def get_text_vector(sentences):
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
embeddings = model.encode(sentences)
return embeddings


def build_query(term=None, categories=None, product_types=None, brands=None):
must_query = [{"match_all": {}}] if not term else [{
"multi_match": {
"query": term,
"fields": ["name", "category", "description"]
}
}]
must_query = (
[{"match_all": {}}]
if not term
else [
{
"multi_match": {
"query": term,
"fields": ["name", "category", "description"],
}
}
]
)

filters = []
if categories:
Expand All @@ -42,17 +45,23 @@ def build_query(term=None, categories=None, product_types=None, brands=None):
filters.append({"terms": {"brand.keyword": brands}})

return {
"_source": ["id", "brand", "name", "price", "currency", "image_link", "category", "tag_list"],
"query": {
"bool": {
"must": must_query,
"filter": filters
}
}
"_source": [
"id",
"brand",
"name",
"price",
"currency",
"image_link",
"category",
"tag_list",
],
"query": {"bool": {"must": must_query, "filter": filters}},
}


def build_hybrid_query(term=None, categories=None, product_types=None, brands=None, hybrid=False):
def build_hybrid_query(
term=None, categories=None, product_types=None, brands=None, hybrid=False
):
# Standard query
organic_query = build_query(term, categories, product_types, brands)

Expand All @@ -65,81 +74,79 @@ def build_hybrid_query(term=None, categories=None, product_types=None, brands=No
"retriever": {
"rrf": {
"retrievers": [
{
"standard": {
"query": organic_query['query']
}
},
{"standard": {"query": organic_query["query"]}},
{
"knn": {
"field": "description_embeddings",
"query_vector": vector,
"k": 5,
"num_candidates": 20,
"filter": {
"bool": {
"filter": []
}
}
"filter": {"bool": {"filter": []}},
}
}
},
],
"rank_window_size": 20,
"rank_constant": 5
"rank_constant": 5,
}
},
"_source": organic_query['_source']
"_source": organic_query["_source"],
}

if categories:
query['retriever']['rrf']['retrievers'][1]['knn']['filter']['bool']['filter'].append({
"terms": {"category": categories}
})
query["retriever"]["rrf"]["retrievers"][1]["knn"]["filter"]["bool"][
"filter"
].append({"terms": {"category": categories}})
if product_types:
query['retriever']['rrf']['retrievers'][1]['knn']['filter']['bool']['filter'].append({
"terms": {"product_type": product_types}
})
query["retriever"]["rrf"]["retrievers"][1]["knn"]["filter"]["bool"][
"filter"
].append({"terms": {"product_type": product_types}})
if brands:
query['retriever']['rrf']['retrievers'][1]['knn']['filter']['bool']['filter'].append({
"terms": {"brand.keyword": brands}
})
query["retriever"]["rrf"]["retrievers"][1]["knn"]["filter"]["bool"][
"filter"
].append({"terms": {"brand.keyword": brands}})
else:
query = organic_query

return query


def search_products(term, categories=None, product_types=None, brands=None, promote_products=[], hybrid=False):
def search_products(
term,
categories=None,
product_types=None,
brands=None,
promote_products=[],
hybrid=False,
):
query = build_hybrid_query(term, categories, product_types, brands, hybrid)

if promote_products and not hybrid:
query = {
"query": {
"pinned": {
"ids": promote_products,
"organic": query['query']
}
},
"_source": query['_source']
"query": {"pinned": {"ids": promote_products, "organic": query["query"]}},
"_source": query["_source"],
}

print(query)
response = get_client_es().search(index="products-catalog", body=query, size=20)

results = []
for hit in response['hits']['hits']:
for hit in response["hits"]["hits"]:
print(f"Product Name: {hit['_source']['name']}, Score: {hit['_score']}")

results.append({
"id": hit['_source']['id'],
"brand": hit['_source']['brand'],
"name": hit['_source']['name'],
"price": hit['_source']['price'],
"currency": hit['_source']['currency'] if hit['_source']['currency'] else "USD",
"image_link": hit['_source']['image_link'],
"category": hit['_source']['category'],
"tags": hit['_source'].get('tag_list', [])
})
results.append(
{
"id": hit["_source"]["id"],
"brand": hit["_source"]["brand"],
"name": hit["_source"]["name"],
"price": hit["_source"]["price"],
"currency": (
hit["_source"]["currency"] if hit["_source"]["currency"] else "USD"
),
"image_link": hit["_source"]["image_link"],
"category": hit["_source"]["category"],
"tags": hit["_source"].get("tag_list", []),
}
)

return results

Expand All @@ -149,51 +156,55 @@ def get_facets_data(term, categories=None, product_types=None, brands=None):
query["aggs"] = {
"product_types": {"terms": {"field": "product_type"}},
"categories": {"terms": {"field": "category"}},
"brands": {"terms": {"field": "brand.keyword"}}
"brands": {"terms": {"field": "brand.keyword"}},
}
response = get_client_es().search(index="products-catalog", body=query, size=0)

return {
"product_types": [
{"product_type": bucket['key'], "count": bucket['doc_count']}
for bucket in response['aggregations']['product_types']['buckets']
{"product_type": bucket["key"], "count": bucket["doc_count"]}
for bucket in response["aggregations"]["product_types"]["buckets"]
],
"categories": [
{"category": bucket['key'], "count": bucket['doc_count']}
for bucket in response['aggregations']['categories']['buckets']
{"category": bucket["key"], "count": bucket["doc_count"]}
for bucket in response["aggregations"]["categories"]["buckets"]
],
"brands": [
{"brand": bucket['key'], "count": bucket['doc_count']}
for bucket in response['aggregations']['brands']['buckets']
]
{"brand": bucket["key"], "count": bucket["doc_count"]}
for bucket in response["aggregations"]["brands"]["buckets"]
],
}


@app.route('/api/products/search', methods=['GET'])
@app.route("/api/products/search", methods=["GET"])
def search():
query = request.args.get('query')
categories = request.args.getlist('selectedCategories[]')
product_types = request.args.getlist('selectedProductTypes[]')
brands = request.args.getlist('selectedBrands[]')
hybrid = request.args.get('hybrid', 'False').lower() == 'true'
results = search_products(query, categories=categories, product_types=product_types,
brands=brands,
promote_products=promote_products_free_gluten,
hybrid=hybrid)
query = request.args.get("query")
categories = request.args.getlist("selectedCategories[]")
product_types = request.args.getlist("selectedProductTypes[]")
brands = request.args.getlist("selectedBrands[]")
hybrid = request.args.get("hybrid", "False").lower() == "true"
results = search_products(
query,
categories=categories,
product_types=product_types,
brands=brands,
promote_products=promote_products_free_gluten,
hybrid=hybrid,
)
return jsonify(results)


@app.route('/api/products/facets', methods=['GET'])
@app.route("/api/products/facets", methods=["GET"])
def facets():
query = request.args.get('query')
categories = request.args.getlist('selectedCategories[]')
product_types = request.args.getlist('selectedProductTypes[]')
brands = request.args.getlist('selectedBrands[]')
results = get_facets_data(query, categories=categories,
product_types=product_types,
brands=brands)
query = request.args.get("query")
categories = request.args.getlist("selectedCategories[]")
product_types = request.args.getlist("selectedProductTypes[]")
brands = request.args.getlist("selectedBrands[]")
results = get_facets_data(
query, categories=categories, product_types=product_types, brands=brands
)
return jsonify(results)


if __name__ == '__main__':
if __name__ == "__main__":
app.run(debug=True)
Original file line number Diff line number Diff line change
@@ -1,15 +1,26 @@
import csv
import json

desired_fields = ["id", "brand", "name", "price", "price_sign", "currency",
"image_link", "description", "rating", "category",
"product_type", "tag_list"]
desired_fields = [
"id",
"brand",
"name",
"price",
"price_sign",
"currency",
"image_link",
"description",
"rating",
"category",
"product_type",
"tag_list",
]

input_file = "dataset_products.csv" # Replace with your actual filename
output_file = "products.json"

# Open CSV file
with open(input_file, 'r') as csvfile:
with open(input_file, "r") as csvfile:
# Read CSV data using DictReader
csv_reader = csv.DictReader(csvfile)

Expand Down Expand Up @@ -37,8 +48,8 @@
json_data.append(product_data)

# Open JSON file for writing
with open(output_file, 'w') as jsonfile:
with open(output_file, "w") as jsonfile:
# Write JSON data to file with indentation
json.dump(json_data, jsonfile, indent=4)

print(f"Converted CSV data to JSON and saved to {output_file}")
print(f"Converted CSV data to JSON and saved to {output_file}")
Loading

0 comments on commit 4793d83

Please sign in to comment.