Skip to content

Commit

Permalink
Public status index (#11)
Browse files Browse the repository at this point in the history
* Add a new public status index

* Create an importer for the new index

* Try to add a vaccuum thing

* quick lint

* make sure that the settings are correct. TODO: add a test making sure the settings are set up correctly

* One more easy one...

* Hunt down all the status index usage except for the actual searching and the tests

* consolidate and clean up a bit

* forgot the end

* Add new test

* Move test to the right spot and try agian

* clean up the test

* use described_class instead and clean up the chewy fields a bit

* What happens if you change your discoverable attribute

* lint

* rename the file

* Just trying to make the build happy again...

* it helps when you know what your methods are called

* add an if here cause it seems a bit smarter

* too many blank lines...

* Seperate out into more than a single file

* Make the public index really public

* Add some new lines...

* Try to dumb down the tests again so that I can get them to pass....

* gosh so many rules

* annoying contrdictory rules...make a new method

* I don't think this was working correctly

* Add one more test and then I think I duplicated everything that I need to before adding any new tests (if I am doing that...) and actually implementing the search part of this

* Update the public status importer

* Try to do a better join...not sure that I really know what I'm doing at this point

* Get this working. see if we can make it better...

* Okay. I think this is the best I am going to do

* add some new tests for the workers

* Lint the tests and the files with them...

* I think this is actually more correct

* Add new test file for the new concern

* lint

* We need to override the chewy's strategy's callback so that if we have a status also try to write it to the public status. The chewy index will take care of the rest for us and figure out what should actually happen

* Single quotes

* Update the index the right way

* Get searching working...I think

* I think this is a more correct query
  • Loading branch information
jsgoldstein authored Aug 4, 2023
1 parent 8c53339 commit b7d4c84
Show file tree
Hide file tree
Showing 20 changed files with 559 additions and 35 deletions.
79 changes: 79 additions & 0 deletions app/chewy/public_statuses_index.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# frozen_string_literal: true

class PublicStatusesIndex < Chewy::Index
include FormattingHelper

settings index: { refresh_interval: '30s' }, analysis: {
filter: {
english_stop: {
type: 'stop',
stopwords: '_english_',
},
english_stemmer: {
type: 'stemmer',
language: 'english',
},
english_possessive_stemmer: {
type: 'stemmer',
language: 'possessive_english',
},
},
analyzer: {
content: {
tokenizer: 'uax_url_email',
filter: %w(
english_possessive_stemmer
lowercase
asciifolding
cjk_width
english_stop
english_stemmer
),
},
},
}

# We do not use delete_if option here because it would call a method that we
# expect to be called with crutches without crutches, causing n+1 queries
index_scope ::Status.unscoped
.kept
.without_reblogs
.includes(:media_attachments, :preloadable_poll)
.joins(:account)
.where(accounts: { discoverable: true })
.where(visibility: :public)

crutch :mentions do |collection|
data = ::Mention.where(status_id: collection.map(&:id)).where(account: Account.local, silent: false).pluck(:status_id, :account_id)
data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
end

crutch :favourites do |collection|
data = ::Favourite.where(status_id: collection.map(&:id)).where(account: Account.local).pluck(:status_id, :account_id)
data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
end

crutch :reblogs do |collection|
data = ::Status.where(reblog_of_id: collection.map(&:id)).where(account: Account.local).pluck(:reblog_of_id, :account_id)
data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
end

crutch :bookmarks do |collection|
data = ::Bookmark.where(status_id: collection.map(&:id)).where(account: Account.local).pluck(:status_id, :account_id)
data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
end

crutch :votes do |collection|
data = ::PollVote.joins(:poll).where(poll: { status_id: collection.map(&:id) }).where(account: Account.local).pluck(:status_id, :account_id)
data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
end

root date_detection: false do
field(:id, type: 'long')
field(:account_id, type: 'long')

field(:text, type: 'text', value: ->(status) { status.searchable_text }) do
field(:stemmed, type: 'text', analyzer: 'content')
end
end
end
10 changes: 5 additions & 5 deletions app/chewy/statuses_index.rb
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,13 @@ class StatusesIndex < Chewy::Index
end

root date_detection: false do
field :id, type: 'long'
field :account_id, type: 'long'
field(:id, type: 'long')
field(:account_id, type: 'long')

field :text, type: 'text', value: ->(status) { status.searchable_text } do
field :stemmed, type: 'text', analyzer: 'content'
field(:text, type: 'text', value: ->(status) { status.searchable_text }) do
field(:stemmed, type: 'text', analyzer: 'content')
end

field :searchable_by, type: 'long', value: ->(status, crutches) { status.searchable_by(crutches) }
field(:searchable_by, type: 'long', value: ->(status, crutches) { status.searchable_by(crutches) })
end
end
94 changes: 94 additions & 0 deletions app/lib/importer/public_statuses_index_importer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# frozen_string_literal: true

class Importer::PublicStatusesIndexImporter < Importer::BaseImporter
def import!
# Similar to the StatusesIndexImporter, we will process different scopes
# to import data into the PublicStatusesIndex.
scopes.each do |scope|
scope.find_in_batches(batch_size: @batch_size) do |batch|
in_work_unit(batch.map(&:status_id)) do |status_ids|
bulk = ActiveRecord::Base.connection_pool.with_connection do
status_data = Status.includes(:media_attachments, :preloadable_poll)
.joins(:account)
.where(accounts: { discoverable: true })
.where(id: status_ids)
Chewy::Index::Import::BulkBuilder.new(index, to_index: status_data).bulk_body
end

indexed = 0
deleted = 0

bulk.map! do |entry|
if entry[:index]
indexed += 1
else
deleted += 1
end
entry
end

Chewy::Index::Import::BulkRequest.new(index).perform(bulk)

[indexed, deleted]
end
end
end

wait!
end

private

def index
PublicStatusesIndex
end

def scopes
[
local_statuses_scope,
local_mentions_scope,
local_favourites_scope,
local_votes_scope,
local_bookmarks_scope,
]
end

def local_mentions_scope
Mention.where(account: Account.local, silent: false)
.joins(status: :account)
.where(accounts: { discoverable: true })
.where(statuses: { visibility: :public })
.select('mentions.id, statuses.id AS status_id')
end

def local_favourites_scope
Favourite.where(account: Account.local)
.joins(status: :account)
.where(accounts: { discoverable: true })
.where(statuses: { visibility: :public })
.select('favourites.id, statuses.id AS status_id')
end

def local_bookmarks_scope
Bookmark.joins(status: :account)
.where(accounts: { discoverable: true })
.where(statuses: { visibility: :public })
.select('bookmarks.id, statuses.id AS status_id')
end

def local_votes_scope
local_account_ids = Account.where(discoverable: true).pluck(:id)

Poll.joins(:votes)
.where(poll_votes: { account_id: local_account_ids })
.where(status_id: Status.where(visibility: :public))
end

def local_statuses_scope
Status.local
.select('"statuses"."id", COALESCE("statuses"."reblog_of_id", "statuses"."id") AS status_id')
.joins(:account)
.where(accounts: { discoverable: true })
.where(visibility: :public)
end
end
9 changes: 6 additions & 3 deletions app/lib/vacuum/statuses_vacuum.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@ def vacuum_statuses!
statuses.direct_visibility
.includes(mentions: :account)
.find_each(&:unlink_from_conversations!)
remove_from_search_index(statuses.ids) if Chewy.enabled?
if Chewy.enabled?
remove_from_index(statuses.ids, 'chewy:queue:StatusesIndex')
remove_from_index(statuses.ids, 'chewy:queue:PublicStatusesIndex')
end

# Foreign keys take care of most associated records for us.
# Media attachments will be orphaned.
Expand All @@ -38,7 +41,7 @@ def retention_period_as_id
Mastodon::Snowflake.id_at(@retention_period.ago, with_random: false)
end

def remove_from_search_index(status_ids)
with_redis { |redis| redis.sadd('chewy:queue:StatusesIndex', status_ids) }
def remove_from_index(status_ids, index)
with_redis { |redis| redis.sadd(index, status_ids) }
end
end
7 changes: 7 additions & 0 deletions app/models/account.rb
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ class Account < ApplicationRecord
include DomainMaterializable
include AccountMerging
include AccountSearch
include AccountStatusesSearch

enum protocol: { ostatus: 0, activitypub: 1 }
enum suspension_origin: { local: 0, remote: 1 }, _prefix: true
Expand Down Expand Up @@ -131,6 +132,7 @@ class Account < ApplicationRecord
scope :not_domain_blocked_by_account, ->(account) { where(arel_table[:domain].eq(nil).or(arel_table[:domain].not_in(account.excluded_from_timeline_domains))) }

after_update_commit :trigger_update_webhooks
after_update :enqueue_update_public_statuses_index, if: :saved_change_to_discoverable? and Chewy.enabled?

delegate :email,
:unconfirmed_email,
Expand Down Expand Up @@ -168,6 +170,10 @@ def bot?
%w(Application Service).include? actor_type
end

def undiscoverable?
!discoverable?
end

def instance_actor?
id == -99
end
Expand Down Expand Up @@ -451,6 +457,7 @@ def emojis
before_validation :prepare_username, on: :create
before_create :generate_keys
before_destroy :clean_feed_manager
after_commit :enqueue_remove_from_public_statuses_index, on: :destroy, if: -> { Chewy.enabled? && discoverable? }

def ensure_keys!
return unless local? && private_key.blank? && public_key.blank?
Expand Down
50 changes: 50 additions & 0 deletions app/models/concerns/account_statuses_search.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# frozen_string_literal: true

module AccountStatusesSearch
extend ActiveSupport::Concern

def enqueue_update_public_statuses_index
if discoverable?
enqueue_add_to_public_statuses_index
else
enqueue_remove_from_public_statuses_index
end
end

def enqueue_add_to_public_statuses_index
return unless Chewy.enabled?

AddToPublicStatusesIndexWorker.perform_async(id)
end

def enqueue_remove_from_public_statuses_index
return unless Chewy.enabled?

RemoveFromPublicStatusesIndexWorker.perform_async(id)
end

def add_to_public_statuses_index!
return unless Chewy.enabled?

batch_size = 1000
offset = 0

loop do
batch = Status.where(account_id: id).offset(offset).limit(batch_size)

break if batch.empty?

Chewy.strategy(:sidekiq) do
PublicStatusesIndex.import(query: batch)
end

offset += batch_size
end
end

def remove_from_public_statuses_index!
return unless Chewy.enabled?

PublicStatusesIndex.filter(term: { account_id: id }).delete_all
end
end
1 change: 1 addition & 0 deletions app/models/status.rb
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class Status < ApplicationRecord
attr_accessor :override_timestamps

update_index('statuses', :proper)
update_index('publicStatuses', :proper)

enum visibility: { public: 0, unlisted: 1, private: 2, direct: 3, limited: 4 }, _suffix: :visibility

Expand Down
2 changes: 1 addition & 1 deletion app/services/batched_remove_status_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def call(statuses, **options)

# Since we skipped all callbacks, we also need to manually
# deindex the statuses
Chewy.strategy.current.update(StatusesIndex, statuses_and_reblogs) if Chewy.enabled?
Chewy::Index.update([StatusesIndex, PublicStatusesIndex], statuses_and_reblogs) if Chewy.enabled?

return if options[:skip_side_effects]

Expand Down
4 changes: 2 additions & 2 deletions app/services/delete_account_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -187,15 +187,15 @@ def purge_favourites!
@account.favourites.in_batches do |favourites|
ids = favourites.pluck(:status_id)
StatusStat.where(status_id: ids).update_all('favourites_count = GREATEST(0, favourites_count - 1)')
Chewy.strategy.current.update(StatusesIndex, ids) if Chewy.enabled?
Chewy::Index.update([StatusesIndex, PublicStatusesIndex], ids) if Chewy.enabled?
Rails.cache.delete_multi(ids.map { |id| "statuses/#{id}" })
favourites.delete_all
end
end

def purge_bookmarks!
@account.bookmarks.in_batches do |bookmarks|
Chewy.strategy.current.update(StatusesIndex, bookmarks.pluck(:status_id)) if Chewy.enabled?
Chewy::Index.update([StatusesIndex, PublicStatusesIndex], bookmarks.pluck(:status_id)) if Chewy.enabled?
bookmarks.delete_all
end
end
Expand Down
32 changes: 9 additions & 23 deletions app/services/search_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -39,25 +39,15 @@ def perform_accounts_search!
end

def perform_statuses_search!
definition = parsed_query.apply(StatusesIndex.filter(term: { searchable_by: @account.id }))

definition = definition.filter(term: { account_id: @options[:account_id] }) if @options[:account_id].present?

if @options[:min_id].present? || @options[:max_id].present?
range = {}
range[:gt] = @options[:min_id].to_i if @options[:min_id].present?
range[:lt] = @options[:max_id].to_i if @options[:max_id].present?
definition = definition.filter(range: { id: range })
end

results = definition.limit(@limit).offset(@offset).objects.compact
account_ids = results.map(&:account_id)
account_domains = results.map(&:account_domain)
preloaded_relations = @account.relations_map(account_ids, account_domains)

results.reject { |status| StatusFilter.new(status, @account, preloaded_relations).filtered? }
rescue Faraday::ConnectionFailed, Parslet::ParseFailed
[]
StatusesSearchService.new.call(
@query,
@account,
limit: @limit,
offset: @offset,
account_id: @options[:account_id],
min_id: @options[:min_id],
max_id: @options[:max_id]
)
end

def perform_hashtags_search!
Expand Down Expand Up @@ -114,8 +104,4 @@ def hashtag_search?
def statuses_search?
@options[:type].blank? || @options[:type] == 'statuses'
end

def parsed_query
SearchQueryTransformer.new.apply(SearchQueryParser.new.parse(@query))
end
end
Loading

0 comments on commit b7d4c84

Please sign in to comment.