Merge commit '6c4c72497a5722870e4432ef41dd4c9ec36a8928' into glitch-soc/merge-upstream
Conflicts: - `.github/workflows/build-releases.yml`: Upstream changed comments close to a line we modified to account for different container image repositories. Updated the comments as upstream did.
This commit is contained in:
@@ -4,10 +4,10 @@ class Importer::AccountsIndexImporter < Importer::BaseImporter
|
||||
def import!
|
||||
scope.includes(:account_stat).find_in_batches(batch_size: @batch_size) do |tmp|
|
||||
in_work_unit(tmp) do |accounts|
|
||||
bulk = Chewy::Index::Import::BulkBuilder.new(index, to_index: accounts).bulk_body
|
||||
bulk = build_bulk_body(accounts)
|
||||
|
||||
indexed = bulk.count { |entry| entry[:index] }
|
||||
deleted = bulk.count { |entry| entry[:delete] }
|
||||
indexed = bulk.size
|
||||
deleted = 0
|
||||
|
||||
Chewy::Index::Import::BulkRequest.new(index).perform(bulk)
|
||||
|
||||
|
||||
@@ -68,6 +68,14 @@ class Importer::BaseImporter
|
||||
|
||||
protected
|
||||
|
||||
def build_bulk_body(to_import)
|
||||
# Specialize `Chewy::Index::Import::BulkBuilder#bulk_body` to avoid a few
|
||||
# inefficiencies, as none of our fields or join fields and we do not need
|
||||
# `BulkBuilder`'s versatility.
|
||||
crutches = Chewy::Index::Crutch::Crutches.new index, to_import
|
||||
to_import.map { |object| { index: { _id: object.id, data: index.compose(object, crutches, fields: []) } } }
|
||||
end
|
||||
|
||||
def in_work_unit(...)
|
||||
work_unit = Concurrent::Promises.future_on(@executor, ...)
|
||||
|
||||
|
||||
@@ -4,10 +4,10 @@ class Importer::InstancesIndexImporter < Importer::BaseImporter
|
||||
def import!
|
||||
index.adapter.default_scope.find_in_batches(batch_size: @batch_size) do |tmp|
|
||||
in_work_unit(tmp) do |instances|
|
||||
bulk = Chewy::Index::Import::BulkBuilder.new(index, to_index: instances).bulk_body
|
||||
bulk = build_bulk_body(instances)
|
||||
|
||||
indexed = bulk.count { |entry| entry[:index] }
|
||||
deleted = bulk.count { |entry| entry[:delete] }
|
||||
indexed = bulk.size
|
||||
deleted = 0
|
||||
|
||||
Chewy::Index::Import::BulkRequest.new(index).perform(bulk)
|
||||
|
||||
|
||||
@@ -5,11 +5,11 @@ class Importer::PublicStatusesIndexImporter < Importer::BaseImporter
|
||||
scope.select(:id).find_in_batches(batch_size: @batch_size) do |batch|
|
||||
in_work_unit(batch.pluck(:id)) do |status_ids|
|
||||
bulk = ActiveRecord::Base.connection_pool.with_connection do
|
||||
Chewy::Index::Import::BulkBuilder.new(index, to_index: Status.includes(:media_attachments, :preloadable_poll, :preview_cards).where(id: status_ids)).bulk_body
|
||||
build_bulk_body(index.adapter.default_scope.where(id: status_ids))
|
||||
end
|
||||
|
||||
indexed = bulk.count { |entry| entry[:index] }
|
||||
deleted = bulk.count { |entry| entry[:delete] }
|
||||
indexed = bulk.size
|
||||
deleted = 0
|
||||
|
||||
Chewy::Index::Import::BulkRequest.new(index).perform(bulk)
|
||||
|
||||
|
||||
@@ -13,32 +13,25 @@ class Importer::StatusesIndexImporter < Importer::BaseImporter
|
||||
|
||||
scope.find_in_batches(batch_size: @batch_size) do |tmp|
|
||||
in_work_unit(tmp.map(&:status_id)) do |status_ids|
|
||||
bulk = ActiveRecord::Base.connection_pool.with_connection do
|
||||
Chewy::Index::Import::BulkBuilder.new(index, to_index: index.adapter.default_scope.where(id: status_ids)).bulk_body
|
||||
end
|
||||
|
||||
indexed = 0
|
||||
deleted = 0
|
||||
|
||||
# We can't use the delete_if proc to do the filtering because delete_if
|
||||
# is called before rendering the data and we need to filter based
|
||||
# on the results of the filter, so this filtering happens here instead
|
||||
bulk.map! do |entry|
|
||||
new_entry = if entry[:index] && entry.dig(:index, :data, 'searchable_by').blank?
|
||||
{ delete: entry[:index].except(:data) }
|
||||
else
|
||||
entry
|
||||
end
|
||||
|
||||
if new_entry[:index]
|
||||
indexed += 1
|
||||
else
|
||||
deleted += 1
|
||||
bulk = ActiveRecord::Base.connection_pool.with_connection do
|
||||
to_index = index.adapter.default_scope.where(id: status_ids)
|
||||
crutches = Chewy::Index::Crutch::Crutches.new index, to_index
|
||||
to_index.map do |object|
|
||||
# This is unlikely to happen, but the post may have been
|
||||
# un-interacted with since it was queued for indexing
|
||||
if object.searchable_by.empty?
|
||||
deleted += 1
|
||||
{ delete: { _id: object.id } }
|
||||
else
|
||||
{ index: { _id: object.id, data: index.compose(object, crutches, fields: []) } }
|
||||
end
|
||||
end
|
||||
|
||||
new_entry
|
||||
end
|
||||
|
||||
indexed = bulk.size - deleted
|
||||
|
||||
Chewy::Index::Import::BulkRequest.new(index).perform(bulk)
|
||||
|
||||
[indexed, deleted]
|
||||
|
||||
@@ -4,10 +4,10 @@ class Importer::TagsIndexImporter < Importer::BaseImporter
|
||||
def import!
|
||||
index.adapter.default_scope.find_in_batches(batch_size: @batch_size) do |tmp|
|
||||
in_work_unit(tmp) do |tags|
|
||||
bulk = Chewy::Index::Import::BulkBuilder.new(index, to_index: tags).bulk_body
|
||||
bulk = build_bulk_body(tags)
|
||||
|
||||
indexed = bulk.count { |entry| entry[:index] }
|
||||
deleted = bulk.count { |entry| entry[:delete] }
|
||||
indexed = bulk.size
|
||||
deleted = 0
|
||||
|
||||
Chewy::Index::Import::BulkRequest.new(index).perform(bulk)
|
||||
|
||||
|
||||
@@ -6,10 +6,10 @@ class SearchQueryParser < Parslet::Parser
|
||||
rule(:colon) { str(':') }
|
||||
rule(:space) { match('\s').repeat(1) }
|
||||
rule(:operator) { (str('+') | str('-')).as(:operator) }
|
||||
rule(:prefix) { (term >> colon).as(:prefix) }
|
||||
rule(:prefix) { term >> colon }
|
||||
rule(:shortcode) { (colon >> term >> colon.maybe).as(:shortcode) }
|
||||
rule(:phrase) { (quote >> (term >> space.maybe).repeat >> quote).as(:phrase) }
|
||||
rule(:clause) { (operator.maybe >> prefix.maybe >> (phrase | term | shortcode)).as(:clause) }
|
||||
rule(:clause) { (operator.maybe >> prefix.maybe.as(:prefix) >> (phrase | term | shortcode)).as(:clause) | prefix.as(:clause) | quote.as(:junk) }
|
||||
rule(:query) { (clause >> space.maybe).repeat.as(:query) }
|
||||
root(:query)
|
||||
end
|
||||
|
||||
@@ -1,50 +1,32 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
class SearchQueryTransformer < Parslet::Transform
|
||||
SUPPORTED_PREFIXES = %w(
|
||||
has
|
||||
is
|
||||
language
|
||||
from
|
||||
before
|
||||
after
|
||||
during
|
||||
).freeze
|
||||
|
||||
class Query
|
||||
attr_reader :should_clauses, :must_not_clauses, :must_clauses, :filter_clauses
|
||||
attr_reader :must_not_clauses, :must_clauses, :filter_clauses
|
||||
|
||||
def initialize(clauses)
|
||||
grouped = clauses.chunk(&:operator).to_h
|
||||
@should_clauses = grouped.fetch(:should, [])
|
||||
grouped = clauses.compact.chunk(&:operator).to_h
|
||||
@must_not_clauses = grouped.fetch(:must_not, [])
|
||||
@must_clauses = grouped.fetch(:must, [])
|
||||
@filter_clauses = grouped.fetch(:filter, [])
|
||||
end
|
||||
|
||||
def apply(search)
|
||||
should_clauses.each { |clause| search = search.query.should(clause_to_query(clause)) }
|
||||
must_clauses.each { |clause| search = search.query.must(clause_to_query(clause)) }
|
||||
must_not_clauses.each { |clause| search = search.query.must_not(clause_to_query(clause)) }
|
||||
filter_clauses.each { |clause| search = search.filter(**clause_to_filter(clause)) }
|
||||
must_clauses.each { |clause| search = search.query.must(clause.to_query) }
|
||||
must_not_clauses.each { |clause| search = search.query.must_not(clause.to_query) }
|
||||
filter_clauses.each { |clause| search = search.filter(**clause.to_query) }
|
||||
search.query.minimum_should_match(1)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def clause_to_query(clause)
|
||||
case clause
|
||||
when TermClause
|
||||
{ multi_match: { type: 'most_fields', query: clause.term, fields: ['text', 'text.stemmed'] } }
|
||||
when PhraseClause
|
||||
{ match_phrase: { text: { query: clause.phrase } } }
|
||||
else
|
||||
raise "Unexpected clause type: #{clause}"
|
||||
end
|
||||
end
|
||||
|
||||
def clause_to_filter(clause)
|
||||
case clause
|
||||
when PrefixClause
|
||||
if clause.negated?
|
||||
{ bool: { must_not: { clause.type => { clause.filter => clause.term } } } }
|
||||
else
|
||||
{ clause.type => { clause.filter => clause.term } }
|
||||
end
|
||||
else
|
||||
raise "Unexpected clause type: #{clause}"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
class Operator
|
||||
@@ -63,31 +45,38 @@ class SearchQueryTransformer < Parslet::Transform
|
||||
end
|
||||
|
||||
class TermClause
|
||||
attr_reader :prefix, :operator, :term
|
||||
attr_reader :operator, :term
|
||||
|
||||
def initialize(prefix, operator, term)
|
||||
@prefix = prefix
|
||||
def initialize(operator, term)
|
||||
@operator = Operator.symbol(operator)
|
||||
@term = term
|
||||
end
|
||||
|
||||
def to_query
|
||||
{ multi_match: { type: 'most_fields', query: @term, fields: ['text', 'text.stemmed'], operator: 'and' } }
|
||||
end
|
||||
end
|
||||
|
||||
class PhraseClause
|
||||
attr_reader :prefix, :operator, :phrase
|
||||
attr_reader :operator, :phrase
|
||||
|
||||
def initialize(prefix, operator, phrase)
|
||||
@prefix = prefix
|
||||
def initialize(operator, phrase)
|
||||
@operator = Operator.symbol(operator)
|
||||
@phrase = phrase
|
||||
end
|
||||
|
||||
def to_query
|
||||
{ match_phrase: { text: { query: @phrase } } }
|
||||
end
|
||||
end
|
||||
|
||||
class PrefixClause
|
||||
attr_reader :type, :filter, :operator, :term
|
||||
attr_reader :operator, :prefix, :term
|
||||
|
||||
def initialize(prefix, operator, term, options = {})
|
||||
@negated = operator == '-'
|
||||
@options = options
|
||||
@prefix = prefix
|
||||
@negated = operator == '-'
|
||||
@options = options
|
||||
@operator = :filter
|
||||
|
||||
case prefix
|
||||
@@ -116,12 +105,16 @@ class SearchQueryTransformer < Parslet::Transform
|
||||
@type = :range
|
||||
@term = { gte: term, lte: term, time_zone: @options[:current_account]&.user_time_zone || 'UTC' }
|
||||
else
|
||||
raise Mastodon::SyntaxError
|
||||
raise "Unknown prefix: #{prefix}"
|
||||
end
|
||||
end
|
||||
|
||||
def negated?
|
||||
@negated
|
||||
def to_query
|
||||
if @negated
|
||||
{ bool: { must_not: { @type => { @filter => @term } } } }
|
||||
else
|
||||
{ @type => { @filter => @term } }
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
@@ -159,18 +152,26 @@ class SearchQueryTransformer < Parslet::Transform
|
||||
prefix = clause[:prefix][:term].to_s if clause[:prefix]
|
||||
operator = clause[:operator]&.to_s
|
||||
|
||||
if clause[:prefix]
|
||||
if clause[:prefix] && SUPPORTED_PREFIXES.include?(prefix)
|
||||
PrefixClause.new(prefix, operator, clause[:term].to_s, current_account: current_account)
|
||||
elsif clause[:prefix]
|
||||
TermClause.new(operator, "#{prefix} #{clause[:term]}")
|
||||
elsif clause[:term]
|
||||
TermClause.new(prefix, operator, clause[:term].to_s)
|
||||
TermClause.new(operator, clause[:term].to_s)
|
||||
elsif clause[:shortcode]
|
||||
TermClause.new(prefix, operator, ":#{clause[:term]}:")
|
||||
TermClause.new(operator, ":#{clause[:term]}:")
|
||||
elsif clause[:phrase]
|
||||
PhraseClause.new(prefix, operator, clause[:phrase].is_a?(Array) ? clause[:phrase].map { |p| p[:term].to_s }.join(' ') : clause[:phrase].to_s)
|
||||
PhraseClause.new(operator, clause[:phrase].is_a?(Array) ? clause[:phrase].map { |p| p[:term].to_s }.join(' ') : clause[:phrase].to_s)
|
||||
else
|
||||
raise "Unexpected clause type: #{clause}"
|
||||
end
|
||||
end
|
||||
|
||||
rule(query: sequence(:clauses)) { Query.new(clauses) }
|
||||
rule(junk: subtree(:junk)) do
|
||||
nil
|
||||
end
|
||||
|
||||
rule(query: sequence(:clauses)) do
|
||||
Query.new(clauses)
|
||||
end
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user