From 941dfe2160ebc05cdd89ad122a1177dbd6c9e1e5 Mon Sep 17 00:00:00 2001 From: Claire Date: Mon, 27 Nov 2023 14:25:54 +0100 Subject: [PATCH] Clamp dates when serializing to Elasticsearch API (#28081) --- app/chewy/accounts_index.rb | 4 +++- app/chewy/concerns/datetime_clamping_concern.rb | 14 ++++++++++++++ app/chewy/public_statuses_index.rb | 4 +++- app/chewy/statuses_index.rb | 4 +++- app/chewy/tags_index.rb | 4 +++- 5 files changed, 26 insertions(+), 4 deletions(-) create mode 100644 app/chewy/concerns/datetime_clamping_concern.rb diff --git a/app/chewy/accounts_index.rb b/app/chewy/accounts_index.rb index 00db257ac711db..59f2f991f2bda0 100644 --- a/app/chewy/accounts_index.rb +++ b/app/chewy/accounts_index.rb @@ -1,6 +1,8 @@ # frozen_string_literal: true class AccountsIndex < Chewy::Index + include DatetimeClampingConcern + settings index: index_preset(refresh_interval: '30s'), analysis: { filter: { english_stop: { @@ -60,7 +62,7 @@ class AccountsIndex < Chewy::Index field(:following_count, type: 'long') field(:followers_count, type: 'long') field(:properties, type: 'keyword', value: ->(account) { account.searchable_properties }) - field(:last_status_at, type: 'date', value: ->(account) { account.last_status_at || account.created_at }) + field(:last_status_at, type: 'date', value: ->(account) { clamp_date(account.last_status_at || account.created_at) }) field(:display_name, type: 'text', analyzer: 'verbatim') { field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'verbatim' } field(:username, type: 'text', analyzer: 'verbatim', value: ->(account) { [account.username, account.domain].compact.join('@') }) { field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'verbatim' } field(:text, type: 'text', analyzer: 'verbatim', value: ->(account) { account.searchable_text }) { field :stemmed, type: 'text', analyzer: 'natural' } diff --git a/app/chewy/concerns/datetime_clamping_concern.rb b/app/chewy/concerns/datetime_clamping_concern.rb new file mode 100644 index 00000000000000..7f176b6e5489f4 --- /dev/null +++ b/app/chewy/concerns/datetime_clamping_concern.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +module DatetimeClampingConcern + extend ActiveSupport::Concern + + MIN_ISO8601_DATETIME = '0000-01-01T00:00:00Z'.to_datetime.freeze + MAX_ISO8601_DATETIME = '9999-12-31T23:59:59Z'.to_datetime.freeze + + class_methods do + def clamp_date(datetime) + datetime.clamp(MIN_ISO8601_DATETIME, MAX_ISO8601_DATETIME) + end + end +end diff --git a/app/chewy/public_statuses_index.rb b/app/chewy/public_statuses_index.rb index b5f0be5e5ce16b..09a4dfc09320a3 100644 --- a/app/chewy/public_statuses_index.rb +++ b/app/chewy/public_statuses_index.rb @@ -1,6 +1,8 @@ # frozen_string_literal: true class PublicStatusesIndex < Chewy::Index + include DatetimeClampingConcern + settings index: index_preset(refresh_interval: '30s', number_of_shards: 5), analysis: { filter: { english_stop: { @@ -62,6 +64,6 @@ class PublicStatusesIndex < Chewy::Index field(:tags, type: 'text', analyzer: 'hashtag', value: ->(status) { status.tags.map(&:display_name) }) field(:language, type: 'keyword') field(:properties, type: 'keyword', value: ->(status) { status.searchable_properties }) - field(:created_at, type: 'date') + field(:created_at, type: 'date', value: ->(status) { clamp_date(status.created_at) }) end end diff --git a/app/chewy/statuses_index.rb b/app/chewy/statuses_index.rb index e315a2030f2363..e739ccecb41b1a 100644 --- a/app/chewy/statuses_index.rb +++ b/app/chewy/statuses_index.rb @@ -1,6 +1,8 @@ # frozen_string_literal: true class StatusesIndex < Chewy::Index + include DatetimeClampingConcern + settings index: index_preset(refresh_interval: '30s', number_of_shards: 5), analysis: { filter: { english_stop: { @@ -60,6 +62,6 @@ class StatusesIndex < Chewy::Index field(:searchable_by, type: 'long', value: ->(status) { status.searchable_by }) field(:language, type: 'keyword') field(:properties, type: 'keyword', value: ->(status) { status.searchable_properties }) - field(:created_at, type: 'date') + field(:created_at, type: 'date', value: ->(status) { clamp_date(status.created_at) }) end end diff --git a/app/chewy/tags_index.rb b/app/chewy/tags_index.rb index 5b6349a964963a..c99218a47fcdcb 100644 --- a/app/chewy/tags_index.rb +++ b/app/chewy/tags_index.rb @@ -1,6 +1,8 @@ # frozen_string_literal: true class TagsIndex < Chewy::Index + include DatetimeClampingConcern + settings index: index_preset(refresh_interval: '30s'), analysis: { analyzer: { content: { @@ -42,6 +44,6 @@ class TagsIndex < Chewy::Index field(:name, type: 'text', analyzer: 'content', value: :display_name) { field(:edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content') } field(:reviewed, type: 'boolean', value: ->(tag) { tag.reviewed? }) field(:usage, type: 'long', value: ->(tag, crutches) { tag.history.aggregate(crutches.time_period).accounts }) - field(:last_status_at, type: 'date', value: ->(tag) { tag.last_status_at || tag.created_at }) + field(:last_status_at, type: 'date', value: ->(tag) { clamp_date(tag.last_status_at || tag.created_at) }) end end