From abb91e27b586a33e17e61632b0ef8e2b02cce6bd Mon Sep 17 00:00:00 2001 From: elz Date: Sat, 13 Sep 2014 10:46:48 -0400 Subject: [PATCH 01/10] Add Elasticsearch client --- Gemfile | 1 + Gemfile.lock | 12 ++++++++++++ config/config.yml | 1 + .../initializers/gem-plugin_config/elasticsearch.rb | 1 + 4 files changed, 15 insertions(+) create mode 100644 config/initializers/gem-plugin_config/elasticsearch.rb diff --git a/Gemfile b/Gemfile index 869ec4f81fc..a993a35f664 100644 --- a/Gemfile +++ b/Gemfile @@ -36,6 +36,7 @@ gem 'resque_mailer' gem 'resque-scheduler', :require => 'resque_scheduler' #gem 'daemon-spawn', :require => 'daemon_spawn' gem 'tire' +gem 'elasticsearch' gem 'aws-sdk' gem 'css_parser' diff --git a/Gemfile.lock b/Gemfile.lock index 925a23ce52d..a18673707dc 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -97,6 +97,14 @@ GEM docile (1.1.1) domain_name (0.5.12) unf (>= 0.0.5, < 1.0.0) + elasticsearch (1.0.5) + elasticsearch-api (= 1.0.5) + elasticsearch-transport (= 1.0.5) + elasticsearch-api (1.0.5) + multi_json + elasticsearch-transport (1.0.5) + faraday + multi_json email_spec (1.5.0) launchy (~> 2.1) mail (~> 2.2) @@ -106,6 +114,8 @@ GEM activesupport (>= 3.0.0) faker (1.1.2) i18n (~> 0.5) + faraday (0.9.0) + multipart-post (>= 1.2, < 3) fastimage (1.4.0) gherkin (2.12.0) multi_json (~> 1.3) @@ -149,6 +159,7 @@ GEM mono_logger (1.1.0) multi_json (1.10.0) multi_xml (0.5.5) + multipart-post (2.0.0) mysql2 (0.3.11) mysql2 (0.3.11-x86-mingw32) net-http-digest_auth (1.3) @@ -350,6 +361,7 @@ DEPENDENCIES cucumber-rails database_cleaner delorean + elasticsearch email_spec escape_utils factory_girl diff --git a/config/config.yml b/config/config.yml index 398410a1a6b..ac4964b689b 100644 --- a/config/config.yml +++ b/config/config.yml @@ -50,6 +50,7 @@ OTWLOGO: 'OTWLogo.png' OTWALT_LOGO: 'OTW Logo:closing the circle of the copyright symbol, it symbolizes our creative engagement with media: participating and not just consuming.' REVISION: '' ELASTICSEARCH_URL: 'http://localhost:9200' +ELASTICSEARCH_1_URL: 'http://localhost:9200' MEMCACHED_URL: '127.0.0.1' # tag settings diff --git a/config/initializers/gem-plugin_config/elasticsearch.rb b/config/initializers/gem-plugin_config/elasticsearch.rb new file mode 100644 index 00000000000..d973e4956e7 --- /dev/null +++ b/config/initializers/gem-plugin_config/elasticsearch.rb @@ -0,0 +1 @@ +$elasticsearch = Elasticsearch::Client.new host: ArchiveConfig.ELASTICSEARCH_1_URL \ No newline at end of file From 23e53250d26c6b6b5d919f299ac0d8186131e2c5 Mon Sep 17 00:00:00 2001 From: elz Date: Sat, 13 Sep 2014 15:34:32 -0400 Subject: [PATCH 02/10] Groundwork for new search indexing --- app/models/es/bookmark_indexer.rb | 64 ++++++++++ app/models/es/bookmark_search.rb | 0 app/models/es/bookmarkable_indexer.rb | 13 ++ .../es/bookmarked_external_work_indexer.rb | 15 +++ app/models/es/bookmarked_series_indexer.rb | 15 +++ app/models/es/bookmarked_work_indexer.rb | 15 +++ app/models/es/indexer.rb | 116 ++++++++++++++++++ app/models/es/pseud_indexer.rb | 14 +++ app/models/es/pseud_search.rb | 0 app/models/es/tag_indexer.rb | 33 +++++ app/models/es/tag_search.rb | 0 app/models/es/work_indexer.rb | 15 +++ app/models/es/work_search.rb | 0 config/application.rb | 6 +- lib/tasks/search.rake | 6 + 15 files changed, 309 insertions(+), 3 deletions(-) create mode 100644 app/models/es/bookmark_indexer.rb create mode 100644 app/models/es/bookmark_search.rb create mode 100644 app/models/es/bookmarkable_indexer.rb create mode 100644 app/models/es/bookmarked_external_work_indexer.rb create mode 100644 app/models/es/bookmarked_series_indexer.rb create mode 100644 app/models/es/bookmarked_work_indexer.rb create mode 100644 app/models/es/indexer.rb create mode 100644 app/models/es/pseud_indexer.rb create mode 100644 app/models/es/pseud_search.rb create mode 100644 app/models/es/tag_indexer.rb create mode 100644 app/models/es/tag_search.rb create mode 100644 app/models/es/work_indexer.rb create mode 100644 app/models/es/work_search.rb create mode 100644 lib/tasks/search.rake diff --git a/app/models/es/bookmark_indexer.rb b/app/models/es/bookmark_indexer.rb new file mode 100644 index 00000000000..7cdd75e8dd9 --- /dev/null +++ b/app/models/es/bookmark_indexer.rb @@ -0,0 +1,64 @@ +module ES + class BookmarkIndexer < ES::Indexer + + def self.klass + 'Bookmark' + end + + def self.mapping + { + "bookmark" => { + "_parent" => { + type: 'bookmarkable' + }, + properties: { + bookmarkable_type: { + type: 'string', + index: 'not_analyzed' + }, + bookmarker: { + type: 'string', + analyzer: 'simple' + }, + notes: { + type: 'string', + analyzer: 'snowball' + }, + tag: { + type: 'string', + analyzer: 'simple' + } + } + } + } + end + + #################### + # INSTANCE METHODS + #################### + + def routing(id) + { + '_index' => index_name, + '_type' => document_type, + '_id' => id, + '_parent' => objects[id.to_i].bookmarkable_id + } + end + + def document(object) + tags = object.tags + filters = tags.map{ |t| t.filter }.compact + + object.as_json( + root: false, + except: [:notes_sanitizer_version, :delta], + methods: [:bookmarker, :collection_ids, :with_notes] + ).merge( + tag: (tags + filters).map(&:name).uniq, + tag_ids: tags.map(&:id), + filter_ids: filters.map(&:id) + ) + end + end +end \ No newline at end of file diff --git a/app/models/es/bookmark_search.rb b/app/models/es/bookmark_search.rb new file mode 100644 index 00000000000..e69de29bb2d diff --git a/app/models/es/bookmarkable_indexer.rb b/app/models/es/bookmarkable_indexer.rb new file mode 100644 index 00000000000..1ee26941e7b --- /dev/null +++ b/app/models/es/bookmarkable_indexer.rb @@ -0,0 +1,13 @@ +module ES + class BookmarkableIndexer < ES::Indexer + + def self.index_name + "ao3_#{Rails.env}_bookmarks" + end + + def self.document_type + 'bookmarkable' + end + + end +end \ No newline at end of file diff --git a/app/models/es/bookmarked_external_work_indexer.rb b/app/models/es/bookmarked_external_work_indexer.rb new file mode 100644 index 00000000000..bd915828fa9 --- /dev/null +++ b/app/models/es/bookmarked_external_work_indexer.rb @@ -0,0 +1,15 @@ +module ES + class BookmarkedExternalWorkIndexer < ES::BookmarkableIndexer + + def self.klass + "ExternalWork" + end + + def self.mapping + end + + def document(object) + end + + end +end \ No newline at end of file diff --git a/app/models/es/bookmarked_series_indexer.rb b/app/models/es/bookmarked_series_indexer.rb new file mode 100644 index 00000000000..29ef3468893 --- /dev/null +++ b/app/models/es/bookmarked_series_indexer.rb @@ -0,0 +1,15 @@ +module ES + class BookmarkedSeriesIndexer < ES::BookmarkableIndexer + + def self.klass + "Series" + end + + def self.mapping + end + + def document(object) + end + + end +end \ No newline at end of file diff --git a/app/models/es/bookmarked_work_indexer.rb b/app/models/es/bookmarked_work_indexer.rb new file mode 100644 index 00000000000..829430d4384 --- /dev/null +++ b/app/models/es/bookmarked_work_indexer.rb @@ -0,0 +1,15 @@ +module ES + class BookmarkedWorkIndexer < ES::BookmarkableIndexer + + def self.klass + "Work" + end + + def self.mapping + end + + def document(object) + end + + end +end \ No newline at end of file diff --git a/app/models/es/indexer.rb b/app/models/es/indexer.rb new file mode 100644 index 00000000000..1f45609c0d8 --- /dev/null +++ b/app/models/es/indexer.rb @@ -0,0 +1,116 @@ +module ES + class Indexer + + ################## + # CLASS METHODS + ################## + + def self.klass + raise "Must be defined in subclass" + end + + def self.delete_index + if $elasticsearch.indices.exists(index: index_name) + $elasticsearch.indices.delete(index: index_name) + end + end + + def self.create_index + $elasticsearch.indices.create( + index: index_name, + type: document_type, + body: { + mapping: mapping + } + ) + end + + def self.mapping + { + document_type => { + properties: { + #add properties in subclasses + } + } + } + end + + def self.index_all + delete_index + create_index + total = (klass.constantize.count / 1000) + 1 + i = 1 + klass.constantize.find_in_batches do |group| + puts "Reindexing batch #{i} of #{total}" + self.new(group.map(&:id)).index_documents + i += 1 + end + end + + def self.index_name + "ao3_#{Rails.env}_#{klass.underscore.pluralize}" + end + + def self.document_type + klass.underscore + end + + #################### + # INSTANCE METHODS + #################### + + attr_reader :ids + + def initialize(ids) + @ids = ids + end + + def klass + self.class.klass + end + + def index_name + self.class.index_name + end + + def document_type + self.class.document_type + end + + def objects + @objects ||= klass.constantize.where(id: ids).inject({}) do |h, obj| + h.merge(obj.id => obj) + end + end + + def batch + @batch = [] + ids.each do |id| + object = objects[id.to_i] + if object.present? + @batch << { index: routing_info(id) } + @batch << document(object) + else + @batch << { delete: routing_info(id) } + end + end + @batch + end + + def index_documents + $elasticsearch.bulk(body: batch) + end + + def routing_info(id) + { + '_index' => index_name, + '_type' => document_type, + '_id' => id + } + end + + def document(object) + object.as_json(root: false) + end + end +end diff --git a/app/models/es/pseud_indexer.rb b/app/models/es/pseud_indexer.rb new file mode 100644 index 00000000000..f672f54a79d --- /dev/null +++ b/app/models/es/pseud_indexer.rb @@ -0,0 +1,14 @@ +module ES + class PseudIndexer < ES::Indexer + + def self.klass + 'Pseud' + end + + def self.mapping + end + + def document(object) + end + end +end \ No newline at end of file diff --git a/app/models/es/pseud_search.rb b/app/models/es/pseud_search.rb new file mode 100644 index 00000000000..e69de29bb2d diff --git a/app/models/es/tag_indexer.rb b/app/models/es/tag_indexer.rb new file mode 100644 index 00000000000..5b78e1d6fa7 --- /dev/null +++ b/app/models/es/tag_indexer.rb @@ -0,0 +1,33 @@ +module ES + class TagIndexer < ES::Indexer + + def self.klass + 'Tag' + end + + def self.mapping + { + tag: { + properties: { + name: { + type: 'string', + analyzer: 'simple' + }, + tag_type: { + type: 'string', + index: 'not_analyzed' + } + } + } + } + end + + def document(object) + object.as_json( + root: false, + only: [:id, :name, :merger_id, :canonical] + ).merge(tag_type: object.type) + end + + end +end \ No newline at end of file diff --git a/app/models/es/tag_search.rb b/app/models/es/tag_search.rb new file mode 100644 index 00000000000..e69de29bb2d diff --git a/app/models/es/work_indexer.rb b/app/models/es/work_indexer.rb new file mode 100644 index 00000000000..34c00e7d9f1 --- /dev/null +++ b/app/models/es/work_indexer.rb @@ -0,0 +1,15 @@ +module ES + class WorkIndexer < ES::Indexer + + def self.klass + 'Work' + end + + def self.mapping + end + + def document(object) + end + + end +end \ No newline at end of file diff --git a/app/models/es/work_search.rb b/app/models/es/work_search.rb new file mode 100644 index 00000000000..e69de29bb2d diff --git a/config/application.rb b/config/application.rb index 0098a1dc0f8..fab595b2a2f 100644 --- a/config/application.rb +++ b/config/application.rb @@ -16,9 +16,9 @@ class Application < Rails::Application # config.autoload_paths += %W(#{config.root}/extras) config.autoload_paths += %W(#{Rails.root}/lib) config.autoload_paths += %W(#{Rails.root}/app/sweepers) - config.autoload_paths += %W(#{Rails.root}/app/models/challenge_models) - config.autoload_paths += %W(#{Rails.root}/app/models/tagset_models) - config.autoload_paths += %W(#{Rails.root}/app/models/indexing) + %w(challenge_models tagset_models indexing es).each do |dir| + config.autoload_paths << "#{Rails.root}/app/models/#{dir}" + end # Only load the plugins named here, in the order given (default is alphabetical). # :all can be used as a placeholder for all plugins not explicitly named. diff --git a/lib/tasks/search.rake b/lib/tasks/search.rake new file mode 100644 index 00000000000..5611765b932 --- /dev/null +++ b/lib/tasks/search.rake @@ -0,0 +1,6 @@ +namespace :search do + desc "Reindex tags" + task(:index_tags => :environment) do + ES::TagIndexer.index_all + end +end \ No newline at end of file From 0b65d71462d2461ca182775e0f17a8e6abc0bc86 Mon Sep 17 00:00:00 2001 From: elz Date: Sat, 13 Sep 2014 21:50:10 -0400 Subject: [PATCH 03/10] Pseud indexer basics --- app/models/es/pseud_indexer.rb | 19 +++++++++++++++++++ lib/tasks/search.rake | 4 ++++ 2 files changed, 23 insertions(+) diff --git a/app/models/es/pseud_indexer.rb b/app/models/es/pseud_indexer.rb index f672f54a79d..1e0cd70fdd6 100644 --- a/app/models/es/pseud_indexer.rb +++ b/app/models/es/pseud_indexer.rb @@ -6,9 +6,28 @@ def self.klass end def self.mapping + { + 'pseud' => { + properties: { + name: { + type: 'string', + analyzer: 'simple' + }, + user_login: { + type: 'string', + analyzer: 'simple' + } + } + } + } end def document(object) + object.as_json( + root: false, + only: [:id, :user_id, :name, :description], + methods: [:user_login] + ) end end end \ No newline at end of file diff --git a/lib/tasks/search.rake b/lib/tasks/search.rake index 5611765b932..595729eb826 100644 --- a/lib/tasks/search.rake +++ b/lib/tasks/search.rake @@ -3,4 +3,8 @@ namespace :search do task(:index_tags => :environment) do ES::TagIndexer.index_all end + desc "Reindex pseuds" + task(:index_pseuds => :environment) do + ES::PseudIndexer.index_all + end end \ No newline at end of file From 8c058d083f8d9a63b45283e828f99689a110aa8c Mon Sep 17 00:00:00 2001 From: elz Date: Sat, 13 Sep 2014 21:52:29 -0400 Subject: [PATCH 04/10] More tweaks to bookmark indexers --- app/models/es/bookmark_indexer.rb | 1 + app/models/es/bookmarkable_indexer.rb | 33 +++++++++++++++++++ .../es/bookmarked_external_work_indexer.rb | 8 ----- app/models/es/bookmarked_series_indexer.rb | 8 ----- app/models/es/bookmarked_work_indexer.rb | 8 ----- 5 files changed, 34 insertions(+), 24 deletions(-) diff --git a/app/models/es/bookmark_indexer.rb b/app/models/es/bookmark_indexer.rb index 7cdd75e8dd9..f16ff50a9fb 100644 --- a/app/models/es/bookmark_indexer.rb +++ b/app/models/es/bookmark_indexer.rb @@ -37,6 +37,7 @@ def self.mapping # INSTANCE METHODS #################### + # TODO: Make this work for deleted bookmarks def routing(id) { '_index' => index_name, diff --git a/app/models/es/bookmarkable_indexer.rb b/app/models/es/bookmarkable_indexer.rb index 1ee26941e7b..50a477820d4 100644 --- a/app/models/es/bookmarkable_indexer.rb +++ b/app/models/es/bookmarkable_indexer.rb @@ -9,5 +9,38 @@ def self.document_type 'bookmarkable' end + def self.mapping + { + 'bookmarkable' => { + properties: { + title: { + type: 'string', + analyzer: 'simple' + }, + creators: { + type: 'string', + analyzer: 'simple' + }, + tag: { + type: 'string', + analyzer: 'simple' + } + } + } + } + end + + def routing_info(id) + { + '_index' => index_name, + '_type' => document_type, + '_id' => "#{id}-#{klass.underscore}" + } + end + + def document(object) + object.bookmarkable_json + end + end end \ No newline at end of file diff --git a/app/models/es/bookmarked_external_work_indexer.rb b/app/models/es/bookmarked_external_work_indexer.rb index bd915828fa9..e7694067e2e 100644 --- a/app/models/es/bookmarked_external_work_indexer.rb +++ b/app/models/es/bookmarked_external_work_indexer.rb @@ -1,15 +1,7 @@ module ES class BookmarkedExternalWorkIndexer < ES::BookmarkableIndexer - def self.klass "ExternalWork" end - - def self.mapping - end - - def document(object) - end - end end \ No newline at end of file diff --git a/app/models/es/bookmarked_series_indexer.rb b/app/models/es/bookmarked_series_indexer.rb index 29ef3468893..b4e6dd66cb3 100644 --- a/app/models/es/bookmarked_series_indexer.rb +++ b/app/models/es/bookmarked_series_indexer.rb @@ -1,15 +1,7 @@ module ES class BookmarkedSeriesIndexer < ES::BookmarkableIndexer - def self.klass "Series" end - - def self.mapping - end - - def document(object) - end - end end \ No newline at end of file diff --git a/app/models/es/bookmarked_work_indexer.rb b/app/models/es/bookmarked_work_indexer.rb index 829430d4384..997778eae75 100644 --- a/app/models/es/bookmarked_work_indexer.rb +++ b/app/models/es/bookmarked_work_indexer.rb @@ -1,15 +1,7 @@ module ES class BookmarkedWorkIndexer < ES::BookmarkableIndexer - def self.klass "Work" end - - def self.mapping - end - - def document(object) - end - end end \ No newline at end of file From 7fb5c3282a48672d9919252090a90c60b8169643 Mon Sep 17 00:00:00 2001 From: elz Date: Sat, 13 Sep 2014 22:08:30 -0400 Subject: [PATCH 05/10] Set up bookmarkable json for series --- app/models/series.rb | 86 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 84 insertions(+), 2 deletions(-) diff --git a/app/models/series.rb b/app/models/series.rb index 3e8087c51e9..083f7432bec 100644 --- a/app/models/series.rb +++ b/app/models/series.rb @@ -67,9 +67,19 @@ def posted_works # Get the filters for the works in this series def filters - Tag.joins("JOIN filter_taggings ON tags.id = filter_taggings.filter_id JOIN works ON works.id = filter_taggings.filterable_id JOIN serial_works ON serial_works.work_id = works.id").where("serial_works.series_id = #{self.id} AND works.posted = 1 AND filter_taggings.filterable_type = 'Work'").group("tags.id") + Tag.joins("JOIN filter_taggings ON tags.id = filter_taggings.filter_id + JOIN works ON works.id = filter_taggings.filterable_id + JOIN serial_works ON serial_works.work_id = works.id"). + where("serial_works.series_id = #{self.id} AND + works.posted = 1 AND + filter_taggings.filterable_type = 'Work'"). + group("tags.id") end - + + def direct_filters + filters.where("filter_taggings.inherited = 0") + end + # visibility aped from the work model def visible(current_user=User.current_user) if current_user.is_a?(Admin) || (current_user.is_a?(User) && current_user.is_author_of?(self)) @@ -198,4 +208,76 @@ def revised_at Work.in_series(self).visible.collect(&:revised_at).compact.uniq.sort.last end end + + ###################### + # SEARCH + ###################### + + def bookmarkable_json + as_json( + root: false, + only: [:id, :title, :summary, :hidden_by_admin, :restricted], + methods: [:revised_at, :posted, :tag, :filter_ids, :rating_ids, + :warning_ids, :category_ids, :fandom_ids, :character_ids, + :relationship_ids, :freeform_ids, :pseud_ids, :creators, :language_id, + :word_count] + ).merge( + anonymous: anonymous?, + unrevealed: unrevealed?, + bookmarkable_type: 'Series' + ) + end + + # FIXME: should series have their own language? + def language_id + works.first.language_id + end + + def posted + !posted_works.empty? + end + + # Simple name to make it easier for people to use in full-text search + def tag + (work_tags + filters).uniq.map{ |t| t.name } + end + + # Index all the filters for pulling works + def filter_ids + filters.value_of :id + end + + # Index only direct filters (non meta-tags) for facets + def filters_for_facets + @filters_for_facets ||= direct_filters + end + def rating_ids + filters_for_facets.select{ |t| t.type.to_s == 'Rating' }.map{ |t| t.id } + end + def warning_ids + filters_for_facets.select{ |t| t.type.to_s == 'Warning' }.map{ |t| t.id } + end + def category_ids + filters_for_facets.select{ |t| t.type.to_s == 'Category' }.map{ |t| t.id } + end + def fandom_ids + filters_for_facets.select{ |t| t.type.to_s == 'Fandom' }.map{ |t| t.id } + end + def character_ids + filters_for_facets.select{ |t| t.type.to_s == 'Character' }.map{ |t| t.id } + end + def relationship_ids + filters_for_facets.select{ |t| t.type.to_s == 'Relationship' }.map{ |t| t.id } + end + def freeform_ids + filters_for_facets.select{ |t| t.type.to_s == 'Freeform' }.map{ |t| t.id } + end + + def pseud_ids + creatorships.value_of :pseud_id + end + + def creators + anonymous? ? ['Anonymous'] : pseuds.map(&:byline) + end end From 6e402bb02e8b86bf4220d3115a760e669938a6df Mon Sep 17 00:00:00 2001 From: elz Date: Sun, 14 Sep 2014 12:57:19 -0400 Subject: [PATCH 06/10] Adding more fields to indexes and putting duplicate code in taggable module --- app/models/es/bookmark_indexer.rb | 15 ++- app/models/es/bookmarkable_indexer.rb | 8 +- app/models/es/bookmarked_work_indexer.rb | 11 ++ app/models/es/indexer.rb | 19 ++- app/models/es/pseud_indexer.rb | 2 +- app/models/es/tag_indexer.rb | 2 +- app/models/es/work_indexer.rb | 63 ++++++++++ app/models/external_work.rb | 83 ++++++-------- app/models/series.rb | 10 +- app/models/work.rb | 140 +++++++++-------------- lib/taggable.rb | 95 ++++++++++++++- lib/tasks/search.rake | 10 +- 12 files changed, 310 insertions(+), 148 deletions(-) diff --git a/app/models/es/bookmark_indexer.rb b/app/models/es/bookmark_indexer.rb index f16ff50a9fb..0c6599e7b1a 100644 --- a/app/models/es/bookmark_indexer.rb +++ b/app/models/es/bookmark_indexer.rb @@ -5,6 +5,17 @@ def self.klass 'Bookmark' end + def self.index_all(options={}) + options[:skip_delete] = true + ES::BookmarkableIndexer.delete_index + ES::BookmarkableIndexer.create_index + create_mapping + ES::BookmarkedExternalWorkIndexer.index_all(skip_delete: true) + ES::BookmarkedSeriesIndexer.index_all(skip_delete: true) + ES::BookmarkedWorkIndexer.index_all(skip_delete: true) + super + end + def self.mapping { "bookmark" => { @@ -38,12 +49,12 @@ def self.mapping #################### # TODO: Make this work for deleted bookmarks - def routing(id) + def routing_info(id) { '_index' => index_name, '_type' => document_type, '_id' => id, - '_parent' => objects[id.to_i].bookmarkable_id + 'parent' => objects[id.to_i].bookmarkable_id } end diff --git a/app/models/es/bookmarkable_indexer.rb b/app/models/es/bookmarkable_indexer.rb index 50a477820d4..e2c8c03716d 100644 --- a/app/models/es/bookmarkable_indexer.rb +++ b/app/models/es/bookmarkable_indexer.rb @@ -19,11 +19,17 @@ def self.mapping }, creators: { type: 'string', - analyzer: 'simple' + analyzer: 'simple', + index_name: 'creator' }, tag: { type: 'string', analyzer: 'simple' + }, + work_types: { + type: 'string', + index: 'not_analyzed', + index_name: 'work_type' } } } diff --git a/app/models/es/bookmarked_work_indexer.rb b/app/models/es/bookmarked_work_indexer.rb index 997778eae75..7931ec105f8 100644 --- a/app/models/es/bookmarked_work_indexer.rb +++ b/app/models/es/bookmarked_work_indexer.rb @@ -3,5 +3,16 @@ class BookmarkedWorkIndexer < ES::BookmarkableIndexer def self.klass "Work" end + + def self.index_all(options={}) + works = Work.joins(:stat_counter).where("bookmarks_count > 0") + total = (works.count / 1000) + 1 + i = 1 + works.find_in_batches do |group| + puts "Reindexing #{klass} batch #{i} of #{total}" + self.new(group.map(&:id)).index_documents + i += 1 + end + end end end \ No newline at end of file diff --git a/app/models/es/indexer.rb b/app/models/es/indexer.rb index 1f45609c0d8..06145d89b1f 100644 --- a/app/models/es/indexer.rb +++ b/app/models/es/indexer.rb @@ -25,6 +25,15 @@ def self.create_index ) end + # Note that the index must exist before you can set the mapping + def self.create_mapping + $elasticsearch.indices.put_mapping( + index: index_name, + type: document_type, + body: mapping + ) + end + def self.mapping { document_type => { @@ -35,13 +44,15 @@ def self.mapping } end - def self.index_all - delete_index - create_index + def self.index_all(options={}) + unless options[:skip_delete] + delete_index + create_index + end total = (klass.constantize.count / 1000) + 1 i = 1 klass.constantize.find_in_batches do |group| - puts "Reindexing batch #{i} of #{total}" + puts "Reindexing #{klass} batch #{i} of #{total}" self.new(group.map(&:id)).index_documents i += 1 end diff --git a/app/models/es/pseud_indexer.rb b/app/models/es/pseud_indexer.rb index 1e0cd70fdd6..9c5710fe04f 100644 --- a/app/models/es/pseud_indexer.rb +++ b/app/models/es/pseud_indexer.rb @@ -25,7 +25,7 @@ def self.mapping def document(object) object.as_json( root: false, - only: [:id, :user_id, :name, :description], + only: [:id, :user_id, :name, :description, :created_at], methods: [:user_login] ) end diff --git a/app/models/es/tag_indexer.rb b/app/models/es/tag_indexer.rb index 5b78e1d6fa7..6a9d54771a1 100644 --- a/app/models/es/tag_indexer.rb +++ b/app/models/es/tag_indexer.rb @@ -25,7 +25,7 @@ def self.mapping def document(object) object.as_json( root: false, - only: [:id, :name, :merger_id, :canonical] + only: [:id, :name, :merger_id, :canonical, :created_at] ).merge(tag_type: object.type) end diff --git a/app/models/es/work_indexer.rb b/app/models/es/work_indexer.rb index 34c00e7d9f1..3abe2968768 100644 --- a/app/models/es/work_indexer.rb +++ b/app/models/es/work_indexer.rb @@ -6,9 +6,72 @@ def self.klass end def self.mapping + { + 'work' => { + properties: { + title: { + type: 'string', + analyzer: 'simple', + }, + creators: { + type: 'string', + analyzer: 'simple', + index_name: 'creator' + }, + tag: { + type: 'string', + analyzer: 'simple' + }, + authors_to_sort_on: { + type: 'string', + index: 'not_analyzed' + }, + title_to_sort_on: { + type: 'string', + index: 'not_analyzed' + }, + imported_from_url: { + type: 'string', + index: 'not_analyzed' + }, + work_types: { + type: 'string', + index_name: 'work_type', + index: 'not_analyzed', + } + } + } + } end def document(object) + object.as_json( + root: false, + except: [ + :delta, :summary_sanitizer_version, :notes_sanitizer_version, + :endnotes_sanitizer_version, :hit_count_old, :last_visitor_old], + methods: [ + :rating_ids, + :warning_ids, + :category_ids, + :fandom_ids, + :character_ids, + :relationship_ids, + :freeform_ids, + :filter_ids, + :tag, + :pseud_ids, + :collection_ids, + :hits, + :comments_count, + :kudos_count, + :bookmarks_count, + :creators, + :crossover, + :work_types, + :nonfiction + ] + ) end end diff --git a/app/models/external_work.rb b/app/models/external_work.rb index f3daba90cb0..bd5632ce559 100644 --- a/app/models/external_work.rb +++ b/app/models/external_work.rb @@ -7,55 +7,6 @@ class ExternalWork < ActiveRecord::Base attr_protected :summary_sanitizer_version has_many :related_works, :as => :parent - - has_many :taggings, :as => :taggable, :dependent => :destroy - has_many :tags, :through => :taggings, :source => :tagger, :source_type => 'Tag' - - has_many :filter_taggings, :as => :filterable, :dependent => :destroy - has_many :filters, :through => :filter_taggings - - has_many :ratings, - :through => :taggings, - :source => :tagger, - :source_type => 'Tag', - :before_remove => :remove_filter_tagging, - :conditions => "tags.type = 'Rating'" - has_many :categories, - :through => :taggings, - :source => :tagger, - :source_type => 'Tag', - :before_remove => :remove_filter_tagging, - :conditions => "tags.type = 'Category'" - has_many :warnings, - :through => :taggings, - :source => :tagger, - :source_type => 'Tag', - :before_remove => :remove_filter_tagging, - :conditions => "tags.type = 'Warning'" - has_many :fandoms, - :through => :taggings, - :source => :tagger, - :source_type => 'Tag', - :before_remove => :remove_filter_tagging, - :conditions => "tags.type = 'Fandom'" - has_many :relationships, - :through => :taggings, - :source => :tagger, - :source_type => 'Tag', - :before_remove => :remove_filter_tagging, - :conditions => "tags.type = 'Relationship'" - has_many :characters, - :through => :taggings, - :source => :tagger, - :source_type => 'Tag', - :before_remove => :remove_filter_tagging, - :conditions => "tags.type = 'Character'" - has_many :freeforms, - :through => :taggings, - :source => :tagger, - :source_type => 'Tag', - :before_remove => :remove_filter_tagging, - :conditions => "tags.type = 'Freeform'" belongs_to :language @@ -189,4 +140,38 @@ def tag_groups self.tags.group_by { |t| t.type.to_s } end + ###################### + # SEARCH + ###################### + + def bookmarkable_json + as_json( + root: false, + only: [ + :title, :summary, :hidden_by_admin, :created_at, :language_id + ], + methods: [ + :posted, :restricted, :tag, :filter_ids, :rating_ids, + :warning_ids, :category_ids, :fandom_ids, :character_ids, + :relationship_ids, :freeform_ids, :creators, :revised_at + ] + ).merge(bookmarkable_type: "ExternalWork") + end + + def posted + true + end + + def restricted + false + end + + def creators + [author] + end + + def revised_at + created_at + end + end diff --git a/app/models/series.rb b/app/models/series.rb index 083f7432bec..78857b4f755 100644 --- a/app/models/series.rb +++ b/app/models/series.rb @@ -216,11 +216,11 @@ def revised_at def bookmarkable_json as_json( root: false, - only: [:id, :title, :summary, :hidden_by_admin, :restricted], + only: [:id, :title, :summary, :hidden_by_admin, :restricted, :created_at], methods: [:revised_at, :posted, :tag, :filter_ids, :rating_ids, :warning_ids, :category_ids, :fandom_ids, :character_ids, :relationship_ids, :freeform_ids, :pseud_ids, :creators, :language_id, - :word_count] + :word_count, :work_types] ).merge( anonymous: anonymous?, unrevealed: unrevealed?, @@ -230,7 +230,7 @@ def bookmarkable_json # FIXME: should series have their own language? def language_id - works.first.language_id + works.first.language_id if works.present? end def posted @@ -280,4 +280,8 @@ def pseud_ids def creators anonymous? ? ['Anonymous'] : pseuds.map(&:byline) end + + def work_types + works.map(&:work_types).flatten.uniq + end end diff --git a/app/models/work.rb b/app/models/work.rb index 4b313797033..af24af5560a 100755 --- a/app/models/work.rb +++ b/app/models/work.rb @@ -48,57 +48,6 @@ class Work < ActiveRecord::Base has_many :challenge_claims, :as => :creation accepts_nested_attributes_for :challenge_claims - has_many :filter_taggings, :as => :filterable - has_many :filters, :through => :filter_taggings - has_many :direct_filter_taggings, :class_name => "FilterTagging", :as => :filterable, :conditions => "inherited = 0" - has_many :direct_filters, :source => :filter, :through => :direct_filter_taggings - - has_many :taggings, :as => :taggable, :dependent => :destroy - has_many :tags, :through => :taggings, :source => :tagger, :source_type => 'Tag' - - has_many :ratings, - :through => :taggings, - :source => :tagger, - :source_type => 'Tag', - :before_remove => :remove_filter_tagging, - :conditions => "tags.type = 'Rating'" - has_many :categories, - :through => :taggings, - :source => :tagger, - :source_type => 'Tag', - :before_remove => :remove_filter_tagging, - :conditions => "tags.type = 'Category'" - has_many :warnings, - :through => :taggings, - :source => :tagger, - :source_type => 'Tag', - :before_remove => :remove_filter_tagging, - :conditions => "tags.type = 'Warning'" - has_many :fandoms, - :through => :taggings, - :source => :tagger, - :source_type => 'Tag', - :before_remove => :remove_filter_tagging, - :conditions => "tags.type = 'Fandom'" - has_many :relationships, - :through => :taggings, - :source => :tagger, - :source_type => 'Tag', - :before_remove => :remove_filter_tagging, - :conditions => "tags.type = 'Relationship'" - has_many :characters, - :through => :taggings, - :source => :tagger, - :source_type => 'Tag', - :before_remove => :remove_filter_tagging, - :conditions => "tags.type = 'Character'" - has_many :freeforms, - :through => :taggings, - :source => :tagger, - :source_type => 'Tag', - :before_remove => :remove_filter_tagging, - :conditions => "tags.type = 'Freeform'" - acts_as_commentable has_many :total_comments, :class_name => 'Comment', :through => :chapters has_many :kudos, :as => :commentable, :dependent => :destroy @@ -1308,45 +1257,25 @@ def to_indexed_json ]) end - # Simple name to make it easier for people to use in full-text search - def tag - (tags + filters).uniq.map{ |t| t.name } - end - - # Index all the filters for pulling works - def filter_ids - filters.value_of :id - end - - # Index only direct filters (non meta-tags) for facets - def filters_for_facets - @filters_for_facets ||= filters.where("filter_taggings.inherited = 0") - end - def rating_ids - filters_for_facets.select{ |t| t.type.to_s == 'Rating' }.map{ |t| t.id } - end - def warning_ids - filters_for_facets.select{ |t| t.type.to_s == 'Warning' }.map{ |t| t.id } - end - def category_ids - filters_for_facets.select{ |t| t.type.to_s == 'Category' }.map{ |t| t.id } - end - def fandom_ids - filters_for_facets.select{ |t| t.type.to_s == 'Fandom' }.map{ |t| t.id } - end - def character_ids - filters_for_facets.select{ |t| t.type.to_s == 'Character' }.map{ |t| t.id } - end - def relationship_ids - filters_for_facets.select{ |t| t.type.to_s == 'Relationship' }.map{ |t| t.id } - end - def freeform_ids - filters_for_facets.select{ |t| t.type.to_s == 'Freeform' }.map{ |t| t.id } + def bookmarkable_json + as_json( + root: false, + only: [:id, :title, :summary, :hidden_by_admin, :restricted, :posted, + :created_at, :revised_at, :language_id, :word_count], + methods: [:tag, :filter_ids, :rating_ids, :warning_ids, :category_ids, + :fandom_ids, :character_ids, :relationship_ids, :freeform_ids, + :pseud_ids, :creators, :collection_ids, :work_types] + ).merge( + anonymous: anonymous?, + unrevealed: unrevealed?, + bookmarkable_type: 'Work' + ) end def pseud_ids creatorships.value_of :pseud_id end + def collection_ids approved_collections.value_of(:id, :parent_id).flatten.uniq.compact end @@ -1361,6 +1290,7 @@ def bookmarks_count self.stat_counter.bookmarks_count end + # Deprecated: old search def creator names = "" if anonymous? @@ -1376,4 +1306,44 @@ def creator names end + # New version + def creators + if anonymous? + ["Anonymous"] + else + pseuds.map(&:byline) + external_author_names.value_of(:name) + end + end + + # A work with multiple fandoms which are not related + # to one another can be considered a crossover + def crossover + filters.by_type('Fandom').first_class.count > 1 + end + + # Quick and dirty categorization of the most obvious stuff + # To be replaced by actual categories + def work_types + types = [] + video_ids = [44011] # Video + audio_ids = [70308] # Podfic + art_ids = [7844, 125758] # Fanart, Arts + types << "Video" if (filter_ids & video_ids).present? + types << "Audio" if (filter_ids & audio_ids).present? + types << "Art" if (filter_ids & art_ids).present? + # Very arbitrary cut off here, but wanted to make sure we + # got fic + art/podfic/video tagged as text as well + if types.empty? || word_count > 200 + types << "Text" + end + types + end + + # To be replaced by actual category + # Can't use the 'Meta' tag since that has too many different uses + def nonfiction + nonfiction_tags = [125773, 66586, 123921] # Essays, Nonfiction, Reviews + (filter_ids & nonfiction_tags).present? + end + end diff --git a/lib/taggable.rb b/lib/taggable.rb index e0a6175bbbc..bcd8e974747 100644 --- a/lib/taggable.rb +++ b/lib/taggable.rb @@ -5,6 +5,57 @@ def self.included(taggable) attr_accessor :invalid_tags attr_accessor :preview_mode, :placeholder_tags after_update :reset_placeholders + + has_many :filter_taggings, :as => :filterable + has_many :filters, :through => :filter_taggings + has_many :direct_filter_taggings, :class_name => "FilterTagging", :as => :filterable, :conditions => "inherited = 0" + has_many :direct_filters, :source => :filter, :through => :direct_filter_taggings + + has_many :taggings, :as => :taggable, :dependent => :destroy + has_many :tags, :through => :taggings, :source => :tagger, :source_type => 'Tag' + + has_many :ratings, + :through => :taggings, + :source => :tagger, + :source_type => 'Tag', + :before_remove => :remove_filter_tagging, + :conditions => "tags.type = 'Rating'" + has_many :categories, + :through => :taggings, + :source => :tagger, + :source_type => 'Tag', + :before_remove => :remove_filter_tagging, + :conditions => "tags.type = 'Category'" + has_many :warnings, + :through => :taggings, + :source => :tagger, + :source_type => 'Tag', + :before_remove => :remove_filter_tagging, + :conditions => "tags.type = 'Warning'" + has_many :fandoms, + :through => :taggings, + :source => :tagger, + :source_type => 'Tag', + :before_remove => :remove_filter_tagging, + :conditions => "tags.type = 'Fandom'" + has_many :relationships, + :through => :taggings, + :source => :tagger, + :source_type => 'Tag', + :before_remove => :remove_filter_tagging, + :conditions => "tags.type = 'Relationship'" + has_many :characters, + :through => :taggings, + :source => :tagger, + :source_type => 'Tag', + :before_remove => :remove_filter_tagging, + :conditions => "tags.type = 'Character'" + has_many :freeforms, + :through => :taggings, + :source => :tagger, + :source_type => 'Tag', + :before_remove => :remove_filter_tagging, + :conditions => "tags.type = 'Freeform'" end end @@ -205,4 +256,46 @@ def parse_tags(klass, incoming_tags) end end -end + ################ + # SEARCH + ################ + + public + + # Simple name to make it easier for people to use in full-text search + def tag + (tags + filters).uniq.map{ |t| t.name } + end + + # Index all the filters for pulling works + def filter_ids + filters.value_of :id + end + + # Index only direct filters (non meta-tags) for facets + def filters_for_facets + @filters_for_facets ||= filters.where("filter_taggings.inherited = 0") + end + def rating_ids + filters_for_facets.select{ |t| t.type.to_s == 'Rating' }.map{ |t| t.id } + end + def warning_ids + filters_for_facets.select{ |t| t.type.to_s == 'Warning' }.map{ |t| t.id } + end + def category_ids + filters_for_facets.select{ |t| t.type.to_s == 'Category' }.map{ |t| t.id } + end + def fandom_ids + filters_for_facets.select{ |t| t.type.to_s == 'Fandom' }.map{ |t| t.id } + end + def character_ids + filters_for_facets.select{ |t| t.type.to_s == 'Character' }.map{ |t| t.id } + end + def relationship_ids + filters_for_facets.select{ |t| t.type.to_s == 'Relationship' }.map{ |t| t.id } + end + def freeform_ids + filters_for_facets.select{ |t| t.type.to_s == 'Freeform' }.map{ |t| t.id } + end + +end \ No newline at end of file diff --git a/lib/tasks/search.rake b/lib/tasks/search.rake index 595729eb826..6d58796e4e6 100644 --- a/lib/tasks/search.rake +++ b/lib/tasks/search.rake @@ -6,5 +6,13 @@ namespace :search do desc "Reindex pseuds" task(:index_pseuds => :environment) do ES::PseudIndexer.index_all - end + end + desc "Reindex works" + task(:index_works => :environment) do + ES::WorkIndexer.index_all + end + desc "Reindex bookmarks" + task(:index_bookmarks => :environment) do + ES::BookmarkIndexer.index_all + end end \ No newline at end of file From 9a0ffc9f60eac9fc619af7fb3ccf68825a040c4c Mon Sep 17 00:00:00 2001 From: elz Date: Sun, 14 Sep 2014 14:41:03 -0400 Subject: [PATCH 07/10] Rename search classes for now and remove some duplication --- app/models/es/bookmark_indexer.rb | 76 ---------- app/models/es/bookmark_search.rb | 0 app/models/es/bookmarkable_indexer.rb | 52 ------- .../es/bookmarked_external_work_indexer.rb | 7 - app/models/es/bookmarked_series_indexer.rb | 7 - app/models/es/bookmarked_work_indexer.rb | 18 --- app/models/es/indexer.rb | 127 ----------------- app/models/es/pseud_indexer.rb | 33 ----- app/models/es/pseud_search.rb | 0 app/models/es/tag_indexer.rb | 33 ----- app/models/es/tag_search.rb | 0 app/models/es/work_indexer.rb | 78 ---------- app/models/es/work_search.rb | 0 app/models/search/bookmark_indexer.rb | 74 ++++++++++ app/models/search/bookmark_query.rb | 2 + app/models/search/bookmarkable_indexer.rb | 50 +++++++ .../bookmarked_external_work_indexer.rb | 5 + .../search/bookmarked_series_indexer.rb | 5 + app/models/search/bookmarked_work_indexer.rb | 10 ++ app/models/search/indexer.rb | 134 ++++++++++++++++++ app/models/search/pseud_indexer.rb | 31 ++++ app/models/search/pseud_query.rb | 2 + app/models/search/query.rb | 2 + app/models/search/tag_indexer.rb | 31 ++++ app/models/search/tag_query.rb | 2 + app/models/search/work_indexer.rb | 76 ++++++++++ app/models/search/work_query.rb | 2 + config/application.rb | 2 +- lib/tasks/search.rake | 8 +- 29 files changed, 431 insertions(+), 436 deletions(-) delete mode 100644 app/models/es/bookmark_indexer.rb delete mode 100644 app/models/es/bookmark_search.rb delete mode 100644 app/models/es/bookmarkable_indexer.rb delete mode 100644 app/models/es/bookmarked_external_work_indexer.rb delete mode 100644 app/models/es/bookmarked_series_indexer.rb delete mode 100644 app/models/es/bookmarked_work_indexer.rb delete mode 100644 app/models/es/indexer.rb delete mode 100644 app/models/es/pseud_indexer.rb delete mode 100644 app/models/es/pseud_search.rb delete mode 100644 app/models/es/tag_indexer.rb delete mode 100644 app/models/es/tag_search.rb delete mode 100644 app/models/es/work_indexer.rb delete mode 100644 app/models/es/work_search.rb create mode 100644 app/models/search/bookmark_indexer.rb create mode 100644 app/models/search/bookmark_query.rb create mode 100644 app/models/search/bookmarkable_indexer.rb create mode 100644 app/models/search/bookmarked_external_work_indexer.rb create mode 100644 app/models/search/bookmarked_series_indexer.rb create mode 100644 app/models/search/bookmarked_work_indexer.rb create mode 100644 app/models/search/indexer.rb create mode 100644 app/models/search/pseud_indexer.rb create mode 100644 app/models/search/pseud_query.rb create mode 100644 app/models/search/query.rb create mode 100644 app/models/search/tag_indexer.rb create mode 100644 app/models/search/tag_query.rb create mode 100644 app/models/search/work_indexer.rb create mode 100644 app/models/search/work_query.rb diff --git a/app/models/es/bookmark_indexer.rb b/app/models/es/bookmark_indexer.rb deleted file mode 100644 index 0c6599e7b1a..00000000000 --- a/app/models/es/bookmark_indexer.rb +++ /dev/null @@ -1,76 +0,0 @@ -module ES - class BookmarkIndexer < ES::Indexer - - def self.klass - 'Bookmark' - end - - def self.index_all(options={}) - options[:skip_delete] = true - ES::BookmarkableIndexer.delete_index - ES::BookmarkableIndexer.create_index - create_mapping - ES::BookmarkedExternalWorkIndexer.index_all(skip_delete: true) - ES::BookmarkedSeriesIndexer.index_all(skip_delete: true) - ES::BookmarkedWorkIndexer.index_all(skip_delete: true) - super - end - - def self.mapping - { - "bookmark" => { - "_parent" => { - type: 'bookmarkable' - }, - properties: { - bookmarkable_type: { - type: 'string', - index: 'not_analyzed' - }, - bookmarker: { - type: 'string', - analyzer: 'simple' - }, - notes: { - type: 'string', - analyzer: 'snowball' - }, - tag: { - type: 'string', - analyzer: 'simple' - } - } - } - } - end - - #################### - # INSTANCE METHODS - #################### - - # TODO: Make this work for deleted bookmarks - def routing_info(id) - { - '_index' => index_name, - '_type' => document_type, - '_id' => id, - 'parent' => objects[id.to_i].bookmarkable_id - } - end - - def document(object) - tags = object.tags - filters = tags.map{ |t| t.filter }.compact - - object.as_json( - root: false, - except: [:notes_sanitizer_version, :delta], - methods: [:bookmarker, :collection_ids, :with_notes] - ).merge( - tag: (tags + filters).map(&:name).uniq, - tag_ids: tags.map(&:id), - filter_ids: filters.map(&:id) - ) - end - end -end \ No newline at end of file diff --git a/app/models/es/bookmark_search.rb b/app/models/es/bookmark_search.rb deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/app/models/es/bookmarkable_indexer.rb b/app/models/es/bookmarkable_indexer.rb deleted file mode 100644 index e2c8c03716d..00000000000 --- a/app/models/es/bookmarkable_indexer.rb +++ /dev/null @@ -1,52 +0,0 @@ -module ES - class BookmarkableIndexer < ES::Indexer - - def self.index_name - "ao3_#{Rails.env}_bookmarks" - end - - def self.document_type - 'bookmarkable' - end - - def self.mapping - { - 'bookmarkable' => { - properties: { - title: { - type: 'string', - analyzer: 'simple' - }, - creators: { - type: 'string', - analyzer: 'simple', - index_name: 'creator' - }, - tag: { - type: 'string', - analyzer: 'simple' - }, - work_types: { - type: 'string', - index: 'not_analyzed', - index_name: 'work_type' - } - } - } - } - end - - def routing_info(id) - { - '_index' => index_name, - '_type' => document_type, - '_id' => "#{id}-#{klass.underscore}" - } - end - - def document(object) - object.bookmarkable_json - end - - end -end \ No newline at end of file diff --git a/app/models/es/bookmarked_external_work_indexer.rb b/app/models/es/bookmarked_external_work_indexer.rb deleted file mode 100644 index e7694067e2e..00000000000 --- a/app/models/es/bookmarked_external_work_indexer.rb +++ /dev/null @@ -1,7 +0,0 @@ -module ES - class BookmarkedExternalWorkIndexer < ES::BookmarkableIndexer - def self.klass - "ExternalWork" - end - end -end \ No newline at end of file diff --git a/app/models/es/bookmarked_series_indexer.rb b/app/models/es/bookmarked_series_indexer.rb deleted file mode 100644 index b4e6dd66cb3..00000000000 --- a/app/models/es/bookmarked_series_indexer.rb +++ /dev/null @@ -1,7 +0,0 @@ -module ES - class BookmarkedSeriesIndexer < ES::BookmarkableIndexer - def self.klass - "Series" - end - end -end \ No newline at end of file diff --git a/app/models/es/bookmarked_work_indexer.rb b/app/models/es/bookmarked_work_indexer.rb deleted file mode 100644 index 7931ec105f8..00000000000 --- a/app/models/es/bookmarked_work_indexer.rb +++ /dev/null @@ -1,18 +0,0 @@ -module ES - class BookmarkedWorkIndexer < ES::BookmarkableIndexer - def self.klass - "Work" - end - - def self.index_all(options={}) - works = Work.joins(:stat_counter).where("bookmarks_count > 0") - total = (works.count / 1000) + 1 - i = 1 - works.find_in_batches do |group| - puts "Reindexing #{klass} batch #{i} of #{total}" - self.new(group.map(&:id)).index_documents - i += 1 - end - end - end -end \ No newline at end of file diff --git a/app/models/es/indexer.rb b/app/models/es/indexer.rb deleted file mode 100644 index 06145d89b1f..00000000000 --- a/app/models/es/indexer.rb +++ /dev/null @@ -1,127 +0,0 @@ -module ES - class Indexer - - ################## - # CLASS METHODS - ################## - - def self.klass - raise "Must be defined in subclass" - end - - def self.delete_index - if $elasticsearch.indices.exists(index: index_name) - $elasticsearch.indices.delete(index: index_name) - end - end - - def self.create_index - $elasticsearch.indices.create( - index: index_name, - type: document_type, - body: { - mapping: mapping - } - ) - end - - # Note that the index must exist before you can set the mapping - def self.create_mapping - $elasticsearch.indices.put_mapping( - index: index_name, - type: document_type, - body: mapping - ) - end - - def self.mapping - { - document_type => { - properties: { - #add properties in subclasses - } - } - } - end - - def self.index_all(options={}) - unless options[:skip_delete] - delete_index - create_index - end - total = (klass.constantize.count / 1000) + 1 - i = 1 - klass.constantize.find_in_batches do |group| - puts "Reindexing #{klass} batch #{i} of #{total}" - self.new(group.map(&:id)).index_documents - i += 1 - end - end - - def self.index_name - "ao3_#{Rails.env}_#{klass.underscore.pluralize}" - end - - def self.document_type - klass.underscore - end - - #################### - # INSTANCE METHODS - #################### - - attr_reader :ids - - def initialize(ids) - @ids = ids - end - - def klass - self.class.klass - end - - def index_name - self.class.index_name - end - - def document_type - self.class.document_type - end - - def objects - @objects ||= klass.constantize.where(id: ids).inject({}) do |h, obj| - h.merge(obj.id => obj) - end - end - - def batch - @batch = [] - ids.each do |id| - object = objects[id.to_i] - if object.present? - @batch << { index: routing_info(id) } - @batch << document(object) - else - @batch << { delete: routing_info(id) } - end - end - @batch - end - - def index_documents - $elasticsearch.bulk(body: batch) - end - - def routing_info(id) - { - '_index' => index_name, - '_type' => document_type, - '_id' => id - } - end - - def document(object) - object.as_json(root: false) - end - end -end diff --git a/app/models/es/pseud_indexer.rb b/app/models/es/pseud_indexer.rb deleted file mode 100644 index 9c5710fe04f..00000000000 --- a/app/models/es/pseud_indexer.rb +++ /dev/null @@ -1,33 +0,0 @@ -module ES - class PseudIndexer < ES::Indexer - - def self.klass - 'Pseud' - end - - def self.mapping - { - 'pseud' => { - properties: { - name: { - type: 'string', - analyzer: 'simple' - }, - user_login: { - type: 'string', - analyzer: 'simple' - } - } - } - } - end - - def document(object) - object.as_json( - root: false, - only: [:id, :user_id, :name, :description, :created_at], - methods: [:user_login] - ) - end - end -end \ No newline at end of file diff --git a/app/models/es/pseud_search.rb b/app/models/es/pseud_search.rb deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/app/models/es/tag_indexer.rb b/app/models/es/tag_indexer.rb deleted file mode 100644 index 6a9d54771a1..00000000000 --- a/app/models/es/tag_indexer.rb +++ /dev/null @@ -1,33 +0,0 @@ -module ES - class TagIndexer < ES::Indexer - - def self.klass - 'Tag' - end - - def self.mapping - { - tag: { - properties: { - name: { - type: 'string', - analyzer: 'simple' - }, - tag_type: { - type: 'string', - index: 'not_analyzed' - } - } - } - } - end - - def document(object) - object.as_json( - root: false, - only: [:id, :name, :merger_id, :canonical, :created_at] - ).merge(tag_type: object.type) - end - - end -end \ No newline at end of file diff --git a/app/models/es/tag_search.rb b/app/models/es/tag_search.rb deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/app/models/es/work_indexer.rb b/app/models/es/work_indexer.rb deleted file mode 100644 index 3abe2968768..00000000000 --- a/app/models/es/work_indexer.rb +++ /dev/null @@ -1,78 +0,0 @@ -module ES - class WorkIndexer < ES::Indexer - - def self.klass - 'Work' - end - - def self.mapping - { - 'work' => { - properties: { - title: { - type: 'string', - analyzer: 'simple', - }, - creators: { - type: 'string', - analyzer: 'simple', - index_name: 'creator' - }, - tag: { - type: 'string', - analyzer: 'simple' - }, - authors_to_sort_on: { - type: 'string', - index: 'not_analyzed' - }, - title_to_sort_on: { - type: 'string', - index: 'not_analyzed' - }, - imported_from_url: { - type: 'string', - index: 'not_analyzed' - }, - work_types: { - type: 'string', - index_name: 'work_type', - index: 'not_analyzed', - } - } - } - } - end - - def document(object) - object.as_json( - root: false, - except: [ - :delta, :summary_sanitizer_version, :notes_sanitizer_version, - :endnotes_sanitizer_version, :hit_count_old, :last_visitor_old], - methods: [ - :rating_ids, - :warning_ids, - :category_ids, - :fandom_ids, - :character_ids, - :relationship_ids, - :freeform_ids, - :filter_ids, - :tag, - :pseud_ids, - :collection_ids, - :hits, - :comments_count, - :kudos_count, - :bookmarks_count, - :creators, - :crossover, - :work_types, - :nonfiction - ] - ) - end - - end -end \ No newline at end of file diff --git a/app/models/es/work_search.rb b/app/models/es/work_search.rb deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/app/models/search/bookmark_indexer.rb b/app/models/search/bookmark_indexer.rb new file mode 100644 index 00000000000..24d545f1315 --- /dev/null +++ b/app/models/search/bookmark_indexer.rb @@ -0,0 +1,74 @@ +class BookmarkIndexer < Indexer + + def self.klass + 'Bookmark' + end + + def self.index_all(options={}) + options[:skip_delete] = true + BookmarkableIndexer.delete_index + BookmarkableIndexer.create_index + create_mapping + BookmarkedExternalWorkIndexer.index_all(skip_delete: true) + BookmarkedSeriesIndexer.index_all(skip_delete: true) + BookmarkedWorkIndexer.index_all(skip_delete: true) + super + end + + def self.mapping + { + "bookmark" => { + "_parent" => { + type: 'bookmarkable' + }, + properties: { + bookmarkable_type: { + type: 'string', + index: 'not_analyzed' + }, + bookmarker: { + type: 'string', + analyzer: 'simple' + }, + notes: { + type: 'string', + analyzer: 'snowball' + }, + tag: { + type: 'string', + analyzer: 'simple' + } + } + } + } + end + + #################### + # INSTANCE METHODS + #################### + + # TODO: Make this work for deleted bookmarks + def routing_info(id) + { + '_index' => index_name, + '_type' => document_type, + '_id' => id, + 'parent' => objects[id.to_i].bookmarkable_id + } + end + + def document(object) + tags = object.tags + filters = tags.map{ |t| t.filter }.compact + + object.as_json( + root: false, + except: [:notes_sanitizer_version, :delta], + methods: [:bookmarker, :collection_ids, :with_notes] + ).merge( + tag: (tags + filters).map(&:name).uniq, + tag_ids: tags.map(&:id), + filter_ids: filters.map(&:id) + ) + end +end diff --git a/app/models/search/bookmark_query.rb b/app/models/search/bookmark_query.rb new file mode 100644 index 00000000000..bf5a6a4b0d4 --- /dev/null +++ b/app/models/search/bookmark_query.rb @@ -0,0 +1,2 @@ +class BookmarkQuery < Query +end diff --git a/app/models/search/bookmarkable_indexer.rb b/app/models/search/bookmarkable_indexer.rb new file mode 100644 index 00000000000..3e222e3106c --- /dev/null +++ b/app/models/search/bookmarkable_indexer.rb @@ -0,0 +1,50 @@ +class BookmarkableIndexer < Indexer + + def self.index_name + "ao3_#{Rails.env}_bookmarks" + end + + def self.document_type + 'bookmarkable' + end + + def self.mapping + { + 'bookmarkable' => { + properties: { + title: { + type: 'string', + analyzer: 'simple' + }, + creators: { + type: 'string', + analyzer: 'simple', + index_name: 'creator' + }, + tag: { + type: 'string', + analyzer: 'simple' + }, + work_types: { + type: 'string', + index: 'not_analyzed', + index_name: 'work_type' + } + } + } + } + end + + def routing_info(id) + { + '_index' => index_name, + '_type' => document_type, + '_id' => "#{id}-#{klass.underscore}" + } + end + + def document(object) + object.bookmarkable_json + end + +end diff --git a/app/models/search/bookmarked_external_work_indexer.rb b/app/models/search/bookmarked_external_work_indexer.rb new file mode 100644 index 00000000000..39ea7599b70 --- /dev/null +++ b/app/models/search/bookmarked_external_work_indexer.rb @@ -0,0 +1,5 @@ +class BookmarkedExternalWorkIndexer < BookmarkableIndexer + def self.klass + "ExternalWork" + end +end diff --git a/app/models/search/bookmarked_series_indexer.rb b/app/models/search/bookmarked_series_indexer.rb new file mode 100644 index 00000000000..0b86d4bddd8 --- /dev/null +++ b/app/models/search/bookmarked_series_indexer.rb @@ -0,0 +1,5 @@ +class BookmarkedSeriesIndexer < BookmarkableIndexer + def self.klass + "Series" + end +end diff --git a/app/models/search/bookmarked_work_indexer.rb b/app/models/search/bookmarked_work_indexer.rb new file mode 100644 index 00000000000..b2154781695 --- /dev/null +++ b/app/models/search/bookmarked_work_indexer.rb @@ -0,0 +1,10 @@ +class BookmarkedWorkIndexer < BookmarkableIndexer + def self.klass + "Work" + end + + # Only index works with bookmarks + def self.indexables + Work.joins(:stat_counter).where("bookmarks_count > 0") + end +end diff --git a/app/models/search/indexer.rb b/app/models/search/indexer.rb new file mode 100644 index 00000000000..52aa1d68e4b --- /dev/null +++ b/app/models/search/indexer.rb @@ -0,0 +1,134 @@ +class Indexer + + ################## + # CLASS METHODS + ################## + + def self.klass + raise "Must be defined in subclass" + end + + def self.delete_index + if $elasticsearch.indices.exists(index: index_name) + $elasticsearch.indices.delete(index: index_name) + end + end + + def self.create_index + $elasticsearch.indices.create( + index: index_name, + type: document_type, + body: { + mapping: mapping + } + ) + end + + # Note that the index must exist before you can set the mapping + def self.create_mapping + $elasticsearch.indices.put_mapping( + index: index_name, + type: document_type, + body: mapping + ) + end + + def self.mapping + { + document_type => { + properties: { + #add properties in subclasses + } + } + } + end + + def self.index_all(options={}) + unless options[:skip_delete] + delete_index + create_index + end + index_from_db + end + + def self.index_from_db + total = (indexables.count / 1000) + 1 + i = 1 + indexables.find_in_batches do |group| + puts "Reindexing #{klass} batch #{i} of #{total}" + self.new(group.map(&:id)).index_documents + i += 1 + end + end + + # Add conditions here + def self.indexables + klass.constantize + end + + def self.index_name + "ao3_#{Rails.env}_#{klass.underscore.pluralize}" + end + + def self.document_type + klass.underscore + end + + #################### + # INSTANCE METHODS + #################### + + attr_reader :ids + + def initialize(ids) + @ids = ids + end + + def klass + self.class.klass + end + + def index_name + self.class.index_name + end + + def document_type + self.class.document_type + end + + def objects + @objects ||= klass.constantize.where(id: ids).inject({}) do |h, obj| + h.merge(obj.id => obj) + end + end + + def batch + @batch = [] + ids.each do |id| + object = objects[id.to_i] + if object.present? + @batch << { index: routing_info(id) } + @batch << document(object) + else + @batch << { delete: routing_info(id) } + end + end + @batch + end + + def index_documents + $elasticsearch.bulk(body: batch) + end + + def routing_info(id) + { + '_index' => index_name, + '_type' => document_type, + '_id' => id + } + end + + def document(object) + object.as_json(root: false) + end +end diff --git a/app/models/search/pseud_indexer.rb b/app/models/search/pseud_indexer.rb new file mode 100644 index 00000000000..e7ae573a857 --- /dev/null +++ b/app/models/search/pseud_indexer.rb @@ -0,0 +1,31 @@ +class PseudIndexer < Indexer + + def self.klass + 'Pseud' + end + + def self.mapping + { + 'pseud' => { + properties: { + name: { + type: 'string', + analyzer: 'simple' + }, + user_login: { + type: 'string', + analyzer: 'simple' + } + } + } + } + end + + def document(object) + object.as_json( + root: false, + only: [:id, :user_id, :name, :description, :created_at], + methods: [:user_login] + ) + end +end diff --git a/app/models/search/pseud_query.rb b/app/models/search/pseud_query.rb new file mode 100644 index 00000000000..3dd09412368 --- /dev/null +++ b/app/models/search/pseud_query.rb @@ -0,0 +1,2 @@ +class PseudQuery < Query +end diff --git a/app/models/search/query.rb b/app/models/search/query.rb new file mode 100644 index 00000000000..c3bb2f0e165 --- /dev/null +++ b/app/models/search/query.rb @@ -0,0 +1,2 @@ +class Query +end diff --git a/app/models/search/tag_indexer.rb b/app/models/search/tag_indexer.rb new file mode 100644 index 00000000000..d9f06e245b6 --- /dev/null +++ b/app/models/search/tag_indexer.rb @@ -0,0 +1,31 @@ +class TagIndexer < Indexer + + def self.klass + 'Tag' + end + + def self.mapping + { + tag: { + properties: { + name: { + type: 'string', + analyzer: 'simple' + }, + tag_type: { + type: 'string', + index: 'not_analyzed' + } + } + } + } + end + + def document(object) + object.as_json( + root: false, + only: [:id, :name, :merger_id, :canonical, :created_at] + ).merge(tag_type: object.type) + end + +end diff --git a/app/models/search/tag_query.rb b/app/models/search/tag_query.rb new file mode 100644 index 00000000000..30cd139dbf5 --- /dev/null +++ b/app/models/search/tag_query.rb @@ -0,0 +1,2 @@ +class TagQuery < Query +end diff --git a/app/models/search/work_indexer.rb b/app/models/search/work_indexer.rb new file mode 100644 index 00000000000..87288fcfc95 --- /dev/null +++ b/app/models/search/work_indexer.rb @@ -0,0 +1,76 @@ +class WorkIndexer < Indexer + + def self.klass + 'Work' + end + + def self.mapping + { + 'work' => { + properties: { + title: { + type: 'string', + analyzer: 'simple', + }, + creators: { + type: 'string', + analyzer: 'simple', + index_name: 'creator' + }, + tag: { + type: 'string', + analyzer: 'simple' + }, + authors_to_sort_on: { + type: 'string', + index: 'not_analyzed' + }, + title_to_sort_on: { + type: 'string', + index: 'not_analyzed' + }, + imported_from_url: { + type: 'string', + index: 'not_analyzed' + }, + work_types: { + type: 'string', + index_name: 'work_type', + index: 'not_analyzed', + } + } + } + } + end + + def document(object) + object.as_json( + root: false, + except: [ + :delta, :summary_sanitizer_version, :notes_sanitizer_version, + :endnotes_sanitizer_version, :hit_count_old, :last_visitor_old], + methods: [ + :rating_ids, + :warning_ids, + :category_ids, + :fandom_ids, + :character_ids, + :relationship_ids, + :freeform_ids, + :filter_ids, + :tag, + :pseud_ids, + :collection_ids, + :hits, + :comments_count, + :kudos_count, + :bookmarks_count, + :creators, + :crossover, + :work_types, + :nonfiction + ] + ) + end + +end diff --git a/app/models/search/work_query.rb b/app/models/search/work_query.rb new file mode 100644 index 00000000000..e12a7461a94 --- /dev/null +++ b/app/models/search/work_query.rb @@ -0,0 +1,2 @@ +class WorkQuery < Query +end diff --git a/config/application.rb b/config/application.rb index fab595b2a2f..59d7d03a1fb 100644 --- a/config/application.rb +++ b/config/application.rb @@ -16,7 +16,7 @@ class Application < Rails::Application # config.autoload_paths += %W(#{config.root}/extras) config.autoload_paths += %W(#{Rails.root}/lib) config.autoload_paths += %W(#{Rails.root}/app/sweepers) - %w(challenge_models tagset_models indexing es).each do |dir| + %w(challenge_models tagset_models indexing search).each do |dir| config.autoload_paths << "#{Rails.root}/app/models/#{dir}" end diff --git a/lib/tasks/search.rake b/lib/tasks/search.rake index 6d58796e4e6..c1c51f088ea 100644 --- a/lib/tasks/search.rake +++ b/lib/tasks/search.rake @@ -1,18 +1,18 @@ namespace :search do desc "Reindex tags" task(:index_tags => :environment) do - ES::TagIndexer.index_all + TagIndexer.index_all end desc "Reindex pseuds" task(:index_pseuds => :environment) do - ES::PseudIndexer.index_all + PseudIndexer.index_all end desc "Reindex works" task(:index_works => :environment) do - ES::WorkIndexer.index_all + WorkIndexer.index_all end desc "Reindex bookmarks" task(:index_bookmarks => :environment) do - ES::BookmarkIndexer.index_all + BookmarkIndexer.index_all end end \ No newline at end of file From f5497ca99101a9267b14fa4bc6e0af03bea10072 Mon Sep 17 00:00:00 2001 From: elz Date: Sun, 14 Sep 2014 15:03:41 -0400 Subject: [PATCH 08/10] Fix for bookmark error on boolean method names --- app/models/external_work.rb | 2 ++ app/models/series.rb | 1 + 2 files changed, 3 insertions(+) diff --git a/app/models/external_work.rb b/app/models/external_work.rb index bd5632ce559..861df7a94b1 100644 --- a/app/models/external_work.rb +++ b/app/models/external_work.rb @@ -161,10 +161,12 @@ def bookmarkable_json def posted true end + alias_method :posted?, :posted def restricted false end + alias_method :restricted?, :restricted def creators [author] diff --git a/app/models/series.rb b/app/models/series.rb index 78857b4f755..bc58274412c 100644 --- a/app/models/series.rb +++ b/app/models/series.rb @@ -236,6 +236,7 @@ def language_id def posted !posted_works.empty? end + alias_method :posted?, :posted # Simple name to make it easier for people to use in full-text search def tag From ae0ea64e438bdbd503d4ae50a0d98c8364d5f44b Mon Sep 17 00:00:00 2001 From: elz Date: Sun, 14 Sep 2014 18:59:35 -0400 Subject: [PATCH 09/10] Give the batch size a name --- app/models/search/indexer.rb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/app/models/search/indexer.rb b/app/models/search/indexer.rb index 52aa1d68e4b..90aaddb49f9 100644 --- a/app/models/search/indexer.rb +++ b/app/models/search/indexer.rb @@ -1,5 +1,7 @@ class Indexer + BATCH_SIZE = 1000 + ################## # CLASS METHODS ################## @@ -52,9 +54,9 @@ def self.index_all(options={}) end def self.index_from_db - total = (indexables.count / 1000) + 1 + total = (indexables.count / BATCH_SIZE) + 1 i = 1 - indexables.find_in_batches do |group| + indexables.find_in_batches(batch_size: BATCH_SIZE) do |group| puts "Reindexing #{klass} batch #{i} of #{total}" self.new(group.map(&:id)).index_documents i += 1 From ab87444fccec3506b6936ea80dd251e11f128ee8 Mon Sep 17 00:00:00 2001 From: elz Date: Mon, 15 Sep 2014 09:48:01 -0400 Subject: [PATCH 10/10] Fix syntax on index creation --- app/models/search/indexer.rb | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/app/models/search/indexer.rb b/app/models/search/indexer.rb index 90aaddb49f9..50e1299a999 100644 --- a/app/models/search/indexer.rb +++ b/app/models/search/indexer.rb @@ -21,7 +21,12 @@ def self.create_index index: index_name, type: document_type, body: { - mapping: mapping + settings: { + index: { + number_of_shards: 5 + } + }, + mappings: mapping } ) end