Skip to content

Commit

Permalink
Beit-Hatfutsot#174 make persons search case insensitive (+refactor an…
Browse files Browse the repository at this point in the history
…d improve tests)
  • Loading branch information
OriHoch committed May 21, 2017
1 parent 698d1af commit 6ebdc25
Show file tree
Hide file tree
Showing 6 changed files with 1,269 additions and 1,298 deletions.
11 changes: 6 additions & 5 deletions bhs_api/persons.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,12 @@
("yod", "death_year"),
("yom", "marriage_years"))

PERSONS_SEARCH_TEXT_PARAMS = (("first", "first_name_lc"),
("last", "last_name_lc"),
("pob", "BIRT_PLAC_lc"),
("pom", "MARR_PLAC_lc"),
("pod", "DEAT_PLAC_lc"),)
# these are updated in bhs_api.item.update_es functions
PERSONS_SEARCH_TEXT_PARAMS_LOWERCASE = (("first", "first_name_lc"),
("last", "last_name_lc"),
("pob", "BIRT_PLAC_lc"),
("pom", "MARR_PLAC_lc"),
("pod", "DEAT_PLAC_lc"),)

PERSONS_SEARCH_EXACT_PARAMS = (("sex", "gender"),
("treenum", "tree_num"))
Expand Down
12 changes: 7 additions & 5 deletions bhs_api/v1_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

from bhs_api import phonetic
from bhs_api.persons import (PERSONS_SEARCH_DEFAULT_PARAMETERS, PERSONS_SEARCH_REQUIRES_ONE_OF,
PERSONS_SEARCH_YEAR_PARAMS, PERSONS_SEARCH_TEXT_PARAMS, PERSONS_SEARCH_EXACT_PARAMS)
PERSONS_SEARCH_YEAR_PARAMS, PERSONS_SEARCH_TEXT_PARAMS_LOWERCASE, PERSONS_SEARCH_EXACT_PARAMS)

v1_endpoints = Blueprint('v1', __name__)

Expand Down Expand Up @@ -94,9 +94,9 @@ def es_search(q, size, collection=None, from_=0, sort=None, with_persons=False,
must_queries.append({"term": {year_attr: year_value}})
else:
raise Exception("invalid value for {} ({}): {}".format(year_type_param, year_attr, year_type))
for text_param, text_attr in PERSONS_SEARCH_TEXT_PARAMS:
for text_param, text_attr in PERSONS_SEARCH_TEXT_PARAMS_LOWERCASE:
if kwargs[text_param]:
text_value = kwargs[text_param]
text_value = kwargs[text_param].lower()
text_type_param = "{}_t".format(text_param)
text_type = kwargs[text_type_param]
if text_type == "exact":
Expand All @@ -111,8 +111,10 @@ def es_search(q, size, collection=None, from_=0, sort=None, with_persons=False,
for exact_param, exact_attr in PERSONS_SEARCH_EXACT_PARAMS:
if kwargs[exact_param]:
exact_value = kwargs[exact_param]
if exact_param == "sex" and exact_value not in ("F", "M", "U"):
raise Exception ("invalid value for {} ({}): {}".format(exact_param, exact_attr, exact_value))
if exact_param == "sex":
exact_value = exact_value.upper()
if exact_value not in ["F", "M", "U"]:
raise Exception("invalid value for {} ({}): {}".format(exact_param, exact_attr, exact_value))
elif exact_param == "treenum":
try:
exact_value = int(exact_value)
Expand Down
1 change: 1 addition & 0 deletions scripts/elasticsearch_create_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def _get_index_body(self):
"birth_year": {"type": "integer"},
"death_year": {"type": "integer"},
"marriage_years": {"type": "integer"},
# these are updated in bhs_api.item.update_es functions
"first_name_lc": {"type": "text"},
"last_name_lc": {"type": "text"},
"BIRT_PLAC_lc": {"type": "text"},
Expand Down
98 changes: 98 additions & 0 deletions tests/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
from elasticsearch import Elasticsearch
from scripts.elasticsearch_create_index import ElasticsearchCreateIndexCommand
from copy import deepcopy
import os
from bhs_api.item import get_doc_id
from mocks import *


def given_invalid_elasticsearch_client(app):
app.es = Elasticsearch("192.0.2.0", timeout=0.000000001)

def index_doc(app, collection, doc):
doc = deepcopy(doc)
doc.get("Header", {}).setdefault("He_lc", doc.get("Header", {}).get("He", "").lower())
doc.get("Header", {}).setdefault("En_lc", doc.get("Header", {}).get("En", "").lower())
if collection == "persons":
doc_id = "{}_{}_{}".format(doc["tree_num"], doc["tree_version"], doc["person_id"])
else:
doc_id = get_doc_id(collection, doc)
app.es.index(index=app.es_data_db_index_name, doc_type=collection, body=doc, id=doc_id)

def index_docs(app, collections, reuse_db=False):
if not reuse_db or not app.es.indices.exists(app.es_data_db_index_name):
ElasticsearchCreateIndexCommand().create_es_index(es=app.es, es_index_name=app.es_data_db_index_name, delete_existing=True)
for collection, docs in collections.items():
for doc in docs:
index_doc(app, collection, doc)
app.es.indices.refresh(app.es_data_db_index_name)

def given_local_elasticsearch_client_with_test_data(app, session_id=None):
"""
setup elasticsearch on localhost:9200 for testing on a testing index
if given session_id param and it is the same as previous session_id param - will not reindex the docs
"""
app.es = Elasticsearch("localhost")
app.es_data_db_index_name = "bh_dbs_back_pytest"
if not session_id or session_id != getattr(given_local_elasticsearch_client_with_test_data, "_session_id", None):
given_local_elasticsearch_client_with_test_data._session_id = session_id
reuse_db = os.environ.get("REUSE_DB", "") == "1"
index_docs(app, {
"places": [PLACES_BOURGES, PLACES_BOZZOLO],
"photoUnits": [PHOTO_BRICKS, PHOTOS_BOYS_PRAYING],
"familyNames": [FAMILY_NAMES_DERI, FAMILY_NAMES_EDREHY],
"personalities": [PERSONALITIES_FERDINAND, PERSONALITIES_DAVIDOV],
"movies": [MOVIES_MIDAGES, MOVIES_SPAIN],
"persons": [PERSON_EINSTEIN, PERSON_LIVING],
}, reuse_db)


def assert_error_response(res, expected_status_code, expected_error_startswith):
assert res.status_code == expected_status_code
assert res.json["error"].startswith(expected_error_startswith)

def assert_common_elasticsearch_search_results(res):
assert res.status_code == 200, "invalid status, json response: {}".format(res.json)
hits = res.json["hits"]
shards = res.json["_shards"]
assert shards["successful"] > 0
assert shards["failed"] < 1
assert shards["total"] == shards["successful"]
assert res.json["took"] > 0
assert isinstance(res.json["timed_out"], bool)
return hits


def assert_no_results(res):
hits = assert_common_elasticsearch_search_results(res)
assert hits["hits"] == [] and hits["total"] == 0 and hits["max_score"] == None

def assert_search_results(res, num_expected):
hits = assert_common_elasticsearch_search_results(res)
assert len(hits["hits"]) == num_expected and hits["total"] == num_expected
for hit in hits["hits"]:
assert hit["_index"] == "bh_dbs_back_pytest"
yield hit

def assert_search_hit_ids(client, search_params, expected_ids, ignore_order=False):
hit_ids = [hit["_source"].get("Id", hit["_source"].get("id"))
for hit
in assert_search_results(client.get(u"/v1/search?{}".format(search_params)),
len(expected_ids))]
if not ignore_order:
assert hit_ids == expected_ids
else:
assert {id:id for id in hit_ids} == {id:id for id in expected_ids}

def assert_suggest_response(client, collection, string,
expected_http_status_code=200, expected_error_message=None, expected_json=None):
res = client.get(u"/v1/suggest/{}/{}".format(collection, string))
assert res.status_code == expected_http_status_code
if expected_error_message is not None:
assert expected_error_message in res.data
if expected_json is not None:
print(res.json)
assert expected_json == res.json

def dump_res(res):
print(res.status_code, res.data)
Loading

0 comments on commit 6ebdc25

Please sign in to comment.