elasticsearch 7.x compatability (#576)

* es_mapping: update turning off dynamic mappings they changed it in 6.x https://www.elastic.co/guide/en/elasticsearch/reference/current/dynamic.html elastic/elasticsearch#25734 * es_mapping: remove _all field deprecated in 6.0 anyway * es_mapping.yml: fix deprecated mapping type https://www.elastic.co/guide/en/elasticsearch/reference/6.7/removal-of-types.html#_schedule_for_removal_of_mapping_types it gives a really unhelpful error otherwise, oof. * es: fix remaining 7.xisms the enabled: false apparently only applies to "object" fields now, need index: false and the _type got removed everywhere. Seems to work now. * Fix weird offset error with word_delimiter_graph yet another es7-ism i guess * Fix warning and some app stuff for ES 7.x Co-authored-by: Arylide <[email protected]>
nyaadevs · Jul 12, 2020 · 4fcef92 · 4fcef92 · Arylide · Jul 12, 2020
1 parent 72087dd
commit 4fcef92
Show file tree

Hide file tree

Showing 6 changed files with 68 additions and 73 deletions.
diff --git a/create_es.sh b/create_es.sh
@@ -1,4 +1,5 @@
 #!/usr/bin/env bash
+set -e
 
 # create indices named "nyaa" and "sukebei", these are hardcoded
 curl -v -XPUT 'localhost:9200/nyaa?pretty' -H"Content-Type: application/yaml" --data-binary @es_mapping.yml

diff --git a/es_mapping.yml b/es_mapping.yml
@@ -10,7 +10,6 @@ settings:
         char_filter:
           - my_char_filter
         filter:
-          - standard
           - lowercase
       my_index_analyzer:
         type: custom
@@ -52,7 +51,7 @@ settings:
 
     filter:
       my_ngram:
-        type: edgeNGram
+        type: edge_ngram
         min_gram: 1
         max_gram: 15
       fullword_min:
@@ -66,9 +65,13 @@ settings:
         type: pattern_capture
         patterns: ["0*([0-9]*)"]
       word_delimit:
-        type: word_delimiter
+        type: word_delimiter_graph
         preserve_original: true
         split_on_numerics: false
+        # https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-word-delimiter-graph-tokenfilter.html#word-delimiter-graph-tokenfilter-configure-parms
+        # since we're using "trim" filters downstream, otherwise
+        # you get weird lucene errors about startOffset
+        adjust_offsets: false
     char_filter:
       my_char_filter:
         type: mapping
@@ -78,70 +81,65 @@ settings:
     # plus replicas don't really help either.
     number_of_shards: 1
     number_of_replicas : 0
-    mapper:
-      # disable elasticsearch's "helpful" autoschema
-      dynamic: false
-    # since we disabled the _all field, default query the
-    # name of the torrent.
     query:
       default_field: display_name
 mappings:
-  torrent:
-    # don't want everything concatenated
-    _all:
-      enabled: false
-    properties:
-      id:
-        type: long
-      display_name:
-        # TODO could do a fancier tokenizer here to parse out the
-        # the scene convention of stuff in brackets, plus stuff like k-on
-        type: text
-        analyzer: my_index_analyzer
-        fielddata: true # Is this required?
-        fields:
-          # Multi-field for full-word matching (when going over ngram limits)
-          # Note: will have to be queried for, not automatic
-          fullword:
-            type: text
-            analyzer: my_fullword_index_analyzer
-          # Stored for exact phrase matching
-          exact:
-            type: text
-            analyzer: exact_analyzer
-      created_time:
-        type: date
-        # Only in the ES index for generating magnet links
-      info_hash:
-        enabled: false
-      filesize:
-        type: long
-      anonymous:
-        type: boolean
-      trusted:
-        type: boolean
-      remake:
-        type: boolean
-      complete:
-        type: boolean
-      hidden:
-        type: boolean
-      deleted:
-        type: boolean
-      has_torrent:
-        type: boolean
-      download_count:
-        type: long
-      leech_count:
-        type: long
-      seed_count:
-        type: long
-      comment_count:
-        type: long
-      # these ids are really only for filtering, thus keyword
-      uploader_id:
-        type: keyword
-      main_category_id:
-        type: keyword
-      sub_category_id:
-        type: keyword
+  # disable elasticsearch's "helpful" autoschema
+  dynamic: false
+  properties:
+    id:
+      type: long
+    display_name:
+      # TODO could do a fancier tokenizer here to parse out the
+      # the scene convention of stuff in brackets, plus stuff like k-on
+      type: text
+      analyzer: my_index_analyzer
+      fielddata: true # Is this required?
+      fields:
+        # Multi-field for full-word matching (when going over ngram limits)
+        # Note: will have to be queried for, not automatic
+        fullword:
+          type: text
+          analyzer: my_fullword_index_analyzer
+        # Stored for exact phrase matching
+        exact:
+          type: text
+          analyzer: exact_analyzer
+    created_time:
+      type: date
+      #
+    # Only in the ES index for generating magnet links
+    info_hash:
+      type: keyword
+      index: false
+    filesize:
+      type: long
+    anonymous:
+      type: boolean
+    trusted:
+      type: boolean
+    remake:
+      type: boolean
+    complete:
+      type: boolean
+    hidden:
+      type: boolean
+    deleted:
+      type: boolean
+    has_torrent:
+      type: boolean
+    download_count:
+      type: long
+    leech_count:
+      type: long
+    seed_count:
+      type: long
+    comment_count:
+      type: long
+    # these ids are really only for filtering, thus keyword
+    uploader_id:
+      type: keyword
+    main_category_id:
+      type: keyword
+    sub_category_id:
+      type: keyword
diff --git a/import_to_es.py b/import_to_es.py
@@ -34,7 +34,6 @@ def pad_bytes(in_bytes, size):
 def mk_es(t, index_name):
     return {
         "_id": t.id,
-        "_type": "torrent",
         "_index": index_name,
         "_source": {
             # we're also indexing the id as a number so you can

diff --git a/nyaa/templates/search_results.html b/nyaa/templates/search_results.html
@@ -17,7 +17,7 @@
 {% endif %}
 {% endif %}
 
-{% if (use_elastic and torrent_query.hits.total > 0) or (torrent_query.items) %}
+{% if (use_elastic and torrent_query.hits.total.value > 0) or (torrent_query.items) %}
 <div class="table-responsive">
 	<table class="table table-bordered table-hover table-striped torrent-list">
 		<thead>

diff --git a/nyaa/views/main.py b/nyaa/views/main.py
@@ -167,7 +167,7 @@ def home(rss):
         else:
             rss_query_string = _generate_query_string(
                 search_term, category, quality_filter, user_name)
-            max_results = min(max_search_results, query_results['hits']['total'])
+            max_results = min(max_search_results, query_results['hits']['total']['value'])
             # change p= argument to whatever you change page_parameter to or pagination breaks
             pagination = Pagination(p=query_args['page'], per_page=results_per_page,
                                     total=max_results, bs_version=3, page_parameter='p',

diff --git a/sync_es.py b/sync_es.py
@@ -114,7 +114,6 @@ def reindex_torrent(t, index_name):
     return {
         '_op_type': 'update',
         '_index': index_name,
-        '_type': 'torrent',
         '_id': str(t['id']),
         "doc": doc,
         "doc_as_upsert": True
@@ -128,7 +127,6 @@ def reindex_stats(s, index_name):
     return {
         '_op_type': 'update',
         '_index': index_name,
-        '_type': 'torrent',
         '_id': str(s['torrent_id']),
         "doc": {
             "stats_last_updated": s["last_updated"],
@@ -141,7 +139,6 @@ def delet_this(row, index_name):
     return {
         "_op_type": 'delete',
         '_index': index_name,
-        '_type': 'torrent',
         '_id': str(row['values']['id'])}
 
 # we could try to make this script robust to errors from es or mysql, but since