Merge commit '6c4c72497a5722870e4432ef41dd4c9ec36a8928' into glitch-soc/merge-upstream

Conflicts: - `.github/workflows/build-releases.yml`: Upstream changed comments close to a line we modified to account for different container image repositories. Updated the comments as upstream did.
2023-09-02 13:50:16 +02:00
parent f5bd2014e2 6c4c72497a
commit ac2dae0d11
27 changed files with 443 additions and 147 deletions
@@ -4,10 +4,10 @@ class Importer::AccountsIndexImporter < Importer::BaseImporter
  def import!
    scope.includes(:account_stat).find_in_batches(batch_size: @batch_size) do |tmp|
      in_work_unit(tmp) do |accounts|
-        bulk = Chewy::Index::Import::BulkBuilder.new(index, to_index: accounts).bulk_body
+        bulk = build_bulk_body(accounts)

-        indexed = bulk.count { |entry| entry[:index] }
-        deleted = bulk.count { |entry| entry[:delete] }
+        indexed = bulk.size
+        deleted = 0

        Chewy::Index::Import::BulkRequest.new(index).perform(bulk)

@@ -68,6 +68,14 @@ class Importer::BaseImporter

  protected

+  def build_bulk_body(to_import)
+    # Specialize `Chewy::Index::Import::BulkBuilder#bulk_body` to avoid a few
+    # inefficiencies, as none of our fields or join fields and we do not need
+    # `BulkBuilder`'s versatility.
+    crutches = Chewy::Index::Crutch::Crutches.new index, to_import
+    to_import.map { |object| { index: { _id: object.id, data: index.compose(object, crutches, fields: []) } } }
+  end
+
  def in_work_unit(...)
    work_unit = Concurrent::Promises.future_on(@executor, ...)

@@ -4,10 +4,10 @@ class Importer::InstancesIndexImporter < Importer::BaseImporter
  def import!
    index.adapter.default_scope.find_in_batches(batch_size: @batch_size) do |tmp|
      in_work_unit(tmp) do |instances|
-        bulk = Chewy::Index::Import::BulkBuilder.new(index, to_index: instances).bulk_body
+        bulk = build_bulk_body(instances)

-        indexed = bulk.count { |entry| entry[:index] }
-        deleted = bulk.count { |entry| entry[:delete] }
+        indexed = bulk.size
+        deleted = 0

        Chewy::Index::Import::BulkRequest.new(index).perform(bulk)

@@ -5,11 +5,11 @@ class Importer::PublicStatusesIndexImporter < Importer::BaseImporter
    scope.select(:id).find_in_batches(batch_size: @batch_size) do |batch|
      in_work_unit(batch.pluck(:id)) do |status_ids|
        bulk = ActiveRecord::Base.connection_pool.with_connection do
-          Chewy::Index::Import::BulkBuilder.new(index, to_index: Status.includes(:media_attachments, :preloadable_poll, :preview_cards).where(id: status_ids)).bulk_body
+          build_bulk_body(index.adapter.default_scope.where(id: status_ids))
        end

-        indexed = bulk.count { |entry| entry[:index] }
-        deleted = bulk.count { |entry| entry[:delete] }
+        indexed = bulk.size
+        deleted = 0

        Chewy::Index::Import::BulkRequest.new(index).perform(bulk)

@@ -13,32 +13,25 @@ class Importer::StatusesIndexImporter < Importer::BaseImporter

      scope.find_in_batches(batch_size: @batch_size) do |tmp|
        in_work_unit(tmp.map(&:status_id)) do |status_ids|
-          bulk = ActiveRecord::Base.connection_pool.with_connection do
-            Chewy::Index::Import::BulkBuilder.new(index, to_index: index.adapter.default_scope.where(id: status_ids)).bulk_body
-          end
-
-          indexed = 0
          deleted = 0

-          # We can't use the delete_if proc to do the filtering because delete_if
-          # is called before rendering the data and we need to filter based
-          # on the results of the filter, so this filtering happens here instead
-          bulk.map! do |entry|
-            new_entry = if entry[:index] && entry.dig(:index, :data, 'searchable_by').blank?
-                          { delete: entry[:index].except(:data) }
-                        else
-                          entry
-                        end
-
-            if new_entry[:index]
-              indexed += 1
-            else
-              deleted += 1
+          bulk = ActiveRecord::Base.connection_pool.with_connection do
+            to_index = index.adapter.default_scope.where(id: status_ids)
+            crutches = Chewy::Index::Crutch::Crutches.new index, to_index
+            to_index.map do |object|
+              # This is unlikely to happen, but the post may have been
+              # un-interacted with since it was queued for indexing
+              if object.searchable_by.empty?
+                deleted += 1
+                { delete: { _id: object.id } }
+              else
+                { index: { _id: object.id, data: index.compose(object, crutches, fields: []) } }
+              end
            end
-
-            new_entry
          end

+          indexed = bulk.size - deleted
+
          Chewy::Index::Import::BulkRequest.new(index).perform(bulk)

          [indexed, deleted]
@@ -4,10 +4,10 @@ class Importer::TagsIndexImporter < Importer::BaseImporter
  def import!
    index.adapter.default_scope.find_in_batches(batch_size: @batch_size) do |tmp|
      in_work_unit(tmp) do |tags|
-        bulk = Chewy::Index::Import::BulkBuilder.new(index, to_index: tags).bulk_body
+        bulk = build_bulk_body(tags)

-        indexed = bulk.count { |entry| entry[:index] }
-        deleted = bulk.count { |entry| entry[:delete] }
+        indexed = bulk.size
+        deleted = 0

        Chewy::Index::Import::BulkRequest.new(index).perform(bulk)

@@ -6,10 +6,10 @@ class SearchQueryParser < Parslet::Parser
  rule(:colon)     { str(':') }
  rule(:space)     { match('\s').repeat(1) }
  rule(:operator)  { (str('+') | str('-')).as(:operator) }
-  rule(:prefix)    { (term >> colon).as(:prefix) }
+  rule(:prefix)    { term >> colon }
  rule(:shortcode) { (colon >> term >> colon.maybe).as(:shortcode) }
  rule(:phrase)    { (quote >> (term >> space.maybe).repeat >> quote).as(:phrase) }
-  rule(:clause)    { (operator.maybe >> prefix.maybe >> (phrase | term | shortcode)).as(:clause) }
+  rule(:clause)    { (operator.maybe >> prefix.maybe.as(:prefix) >> (phrase | term | shortcode)).as(:clause) | prefix.as(:clause) | quote.as(:junk) }
  rule(:query)     { (clause >> space.maybe).repeat.as(:query) }
  root(:query)
 end
@@ -1,50 +1,32 @@
 # frozen_string_literal: true

 class SearchQueryTransformer < Parslet::Transform
+  SUPPORTED_PREFIXES = %w(
+    has
+    is
+    language
+    from
+    before
+    after
+    during
+  ).freeze
+
  class Query
-    attr_reader :should_clauses, :must_not_clauses, :must_clauses, :filter_clauses
+    attr_reader :must_not_clauses, :must_clauses, :filter_clauses

    def initialize(clauses)
-      grouped = clauses.chunk(&:operator).to_h
-      @should_clauses = grouped.fetch(:should, [])
+      grouped = clauses.compact.chunk(&:operator).to_h
      @must_not_clauses = grouped.fetch(:must_not, [])
      @must_clauses = grouped.fetch(:must, [])
      @filter_clauses = grouped.fetch(:filter, [])
    end

    def apply(search)
-      should_clauses.each { |clause| search = search.query.should(clause_to_query(clause)) }
-      must_clauses.each { |clause| search = search.query.must(clause_to_query(clause)) }
-      must_not_clauses.each { |clause| search = search.query.must_not(clause_to_query(clause)) }
-      filter_clauses.each { |clause| search = search.filter(**clause_to_filter(clause)) }
+      must_clauses.each { |clause| search = search.query.must(clause.to_query) }
+      must_not_clauses.each { |clause| search = search.query.must_not(clause.to_query) }
+      filter_clauses.each { |clause| search = search.filter(**clause.to_query) }
      search.query.minimum_should_match(1)
    end
-
-    private
-
-    def clause_to_query(clause)
-      case clause
-      when TermClause
-        { multi_match: { type: 'most_fields', query: clause.term, fields: ['text', 'text.stemmed'] } }
-      when PhraseClause
-        { match_phrase: { text: { query: clause.phrase } } }
-      else
-        raise "Unexpected clause type: #{clause}"
-      end
-    end
-
-    def clause_to_filter(clause)
-      case clause
-      when PrefixClause
-        if clause.negated?
-          { bool: { must_not: { clause.type => { clause.filter => clause.term } } } }
-        else
-          { clause.type => { clause.filter => clause.term } }
-        end
-      else
-        raise "Unexpected clause type: #{clause}"
-      end
-    end
  end

  class Operator
@@ -63,31 +45,38 @@ class SearchQueryTransformer < Parslet::Transform
  end

  class TermClause
-    attr_reader :prefix, :operator, :term
+    attr_reader :operator, :term

-    def initialize(prefix, operator, term)
-      @prefix = prefix
+    def initialize(operator, term)
      @operator = Operator.symbol(operator)
      @term = term
    end
+
+    def to_query
+      { multi_match: { type: 'most_fields', query: @term, fields: ['text', 'text.stemmed'], operator: 'and' } }
+    end
  end

  class PhraseClause
-    attr_reader :prefix, :operator, :phrase
+    attr_reader :operator, :phrase

-    def initialize(prefix, operator, phrase)
-      @prefix = prefix
+    def initialize(operator, phrase)
      @operator = Operator.symbol(operator)
      @phrase = phrase
    end
+
+    def to_query
+      { match_phrase: { text: { query: @phrase } } }
+    end
  end

  class PrefixClause
-    attr_reader :type, :filter, :operator, :term
+    attr_reader :operator, :prefix, :term

    def initialize(prefix, operator, term, options = {})
-      @negated  = operator == '-'
-      @options  = options
+      @prefix = prefix
+      @negated = operator == '-'
+      @options = options
      @operator = :filter

      case prefix
@@ -116,12 +105,16 @@ class SearchQueryTransformer < Parslet::Transform
        @type = :range
        @term = { gte: term, lte: term, time_zone: @options[:current_account]&.user_time_zone || 'UTC' }
      else
-        raise Mastodon::SyntaxError
+        raise "Unknown prefix: #{prefix}"
      end
    end

-    def negated?
-      @negated
+    def to_query
+      if @negated
+        { bool: { must_not: { @type => { @filter => @term } } } }
+      else
+        { @type => { @filter => @term } }
+      end
    end

    private
@@ -159,18 +152,26 @@ class SearchQueryTransformer < Parslet::Transform
    prefix   = clause[:prefix][:term].to_s if clause[:prefix]
    operator = clause[:operator]&.to_s

-    if clause[:prefix]
+    if clause[:prefix] && SUPPORTED_PREFIXES.include?(prefix)
      PrefixClause.new(prefix, operator, clause[:term].to_s, current_account: current_account)
+    elsif clause[:prefix]
+      TermClause.new(operator, "#{prefix} #{clause[:term]}")
    elsif clause[:term]
-      TermClause.new(prefix, operator, clause[:term].to_s)
+      TermClause.new(operator, clause[:term].to_s)
    elsif clause[:shortcode]
-      TermClause.new(prefix, operator, ":#{clause[:term]}:")
+      TermClause.new(operator, ":#{clause[:term]}:")
    elsif clause[:phrase]
-      PhraseClause.new(prefix, operator, clause[:phrase].is_a?(Array) ? clause[:phrase].map { |p| p[:term].to_s }.join(' ') : clause[:phrase].to_s)
+      PhraseClause.new(operator, clause[:phrase].is_a?(Array) ? clause[:phrase].map { |p| p[:term].to_s }.join(' ') : clause[:phrase].to_s)
    else
      raise "Unexpected clause type: #{clause}"
    end
  end

-  rule(query: sequence(:clauses)) { Query.new(clauses) }
+  rule(junk: subtree(:junk)) do
+    nil
+  end
+
+  rule(query: sequence(:clauses)) do
+    Query.new(clauses)
+  end
 end