Skip to content

Commit

Permalink
Improve searching of petitions
Browse files Browse the repository at this point in the history
By using the btree_gin extension and merging the indexes on the various
columns into a single index combined with state, debate_state and
parliament_id for archived petitions we can achieve a significant
performance gain of more than 10x when searching arbitrary text.

This also adds in the petition id to the index so people can now search
by that on the front end - it will even return cases where people refer
to the petition by id in other petition text.
  • Loading branch information
pixeltrix committed Apr 26, 2021
1 parent d49d8ed commit 36b7a69
Show file tree
Hide file tree
Showing 18 changed files with 453 additions and 68 deletions.
1 change: 0 additions & 1 deletion Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ gem 'faraday'
gem 'faraday_middleware'
gem 'net-http-persistent'
gem 'sass-rails', '~> 5.0'
gem 'textacular'
gem 'uglifier'
gem 'bcrypt'
gem 'faker', require: false
Expand Down
3 changes: 0 additions & 3 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -332,8 +332,6 @@ GEM
actionpack (>= 4.0)
activesupport (>= 4.0)
sprockets (>= 3.0.0)
textacular (5.4.0)
activerecord (>= 5.0, < 6.2)
thor (1.1.0)
tilt (2.0.10)
tzinfo (2.0.4)
Expand Down Expand Up @@ -413,7 +411,6 @@ DEPENDENCIES
shoulda-matchers
simplecov
slack-notifier
textacular
uglifier
webdrivers (~> 3.8.1)
webmock
Expand Down
5 changes: 3 additions & 2 deletions app/models/archived/petition.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
require 'textacular/searchable'
require_dependency 'archived'

module Archived
Expand Down Expand Up @@ -48,9 +47,11 @@ class Petition < ActiveRecord::Base

before_save :update_debate_state, if: :scheduled_debate_date_changed?

extend Searchable(:action, :background, :additional_details)
include Browseable, Taggable, Departments, Topics, Anonymization

query :id, :action, :background
query :additional_details, null: true

facet :all, -> { visible.by_most_signatures }
facet :awaiting_response, -> { awaiting_response.by_waiting_for_response_longest }
facet :awaiting_debate_date, -> { awaiting_debate_date.by_waiting_for_debate_longest }
Expand Down
68 changes: 64 additions & 4 deletions app/models/concerns/browseable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ module Browseable
class_attribute :filter_definitions, instance_writer: false
self.filter_definitions = {}

class_attribute :query_columns, instance_writer: false
self.query_columns = []

class_attribute :default_page_size, instance_writer: false
self.default_page_size = 50

Expand Down Expand Up @@ -108,6 +111,60 @@ def filter_params
end
end

class Query
Column = Struct.new(:name, :config, :null)

attr_reader :klass, :param

delegate :query_columns, to: :klass
delegate :connection, to: :klass
delegate :quoted_table_name, to: :klass
delegate :quote_column_name, to: :connection
delegate :inspect, :present?, :==, to: :to_s

def initialize(klass, param)
@klass, @param = klass, param.to_s
end

def build
[sql, query: param]
end

def to_s
param
end

private

def sql
"((#{search_conditions.join(" || ")}) @@ #{to_tsquery})"
end

def search_conditions
query_columns.map(&method(:search_condition))
end

def search_condition(column)
"#{to_tsvector(column)}"
end

def quoted_column(column)
quoted_table_name + "." + quote_column_name(column)
end

def to_tsvector(column)
if column.null
"to_tsvector('#{column.config}', COALESCE(#{quoted_column(column.name)}, '')::text)"
else
"to_tsvector('#{column.config}', #{quoted_column(column.name)}::text)"
end
end

def to_tsquery
"plainto_tsquery('english', :query)"
end
end

class Search
include Enumerable

Expand All @@ -119,6 +176,7 @@ class Search
delegate :total_entries, :total_pages, to: :results
delegate :to_a, :to_ary, to: :results
delegate :each, :map, :size, to: :to_a
delegate :explain, to: :execute_search_with_pagination

def initialize(klass, params = {})
@klass, @params = klass, params
Expand Down Expand Up @@ -157,7 +215,7 @@ def last_page?
end

def query
@query ||= params[:q].to_s
@query ||= Query.new(klass, params[:q])
end

def page_size
Expand Down Expand Up @@ -239,9 +297,7 @@ def execute_search_with_pagination

def execute_search
if search?
relation = klass.basic_search(query)
relation = relation.except(:select).select(star)
relation = relation.except(:order)
relation = klass.where(query.build)
else
relation = klass
end
Expand Down Expand Up @@ -276,6 +332,10 @@ def filter(key, transformer)
self.filter_definitions[key] = transformer
end

def query(*columns, config: "english", null: false)
self.query_columns += columns.map { |column| Query::Column.new(column.to_s, config, null) }
end

def search(params)
Search.new(all, params)
end
Expand Down
6 changes: 3 additions & 3 deletions app/models/department.rb
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
require 'textacular/searchable'

class Department < ActiveRecord::Base
extend Searchable(:name, :acronym)
include Browseable

query :name
query :acronym, null: true

validates :external_id, length: { maximum: 30 }
validates :name, presence: true, length: { maximum: 100 }
validates :acronym, length: { maximum: 10 }
Expand Down
6 changes: 3 additions & 3 deletions app/models/invalidation.rb
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
require 'textacular/searchable'

class Invalidation < ActiveRecord::Base
extend Searchable(:id, :summary, :details, :petition_id)
include Browseable

query :summary
query :details, :petition_id, null: true

belongs_to :petition, optional: true
has_many :signatures

Expand Down
6 changes: 3 additions & 3 deletions app/models/petition.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
require 'textacular/searchable'

class Petition < ActiveRecord::Base
include PerishableTokenGenerator

Expand Down Expand Up @@ -46,9 +44,11 @@ class Petition < ActiveRecord::Base
before_save :update_moderation_lag, unless: :moderation_lag?
after_create :update_last_petition_created_at

extend Searchable(:action, :background, :additional_details)
include Browseable, Taggable, Departments, Topics, Anonymization

query :id, :action, :background
query :additional_details, null: true

facet :all, -> { by_most_popular }
facet :open, -> { open_state.by_most_popular }
facet :rejected, -> { rejected_state.by_most_recent }
Expand Down
6 changes: 3 additions & 3 deletions app/models/tag.rb
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
require 'textacular/searchable'

class Tag < ActiveRecord::Base
extend Searchable(:name, :description)
include Browseable

query :name
query :description, null: true

validates_presence_of :name
validates_uniqueness_of :name
validates_length_of :name, maximum: 50
Expand Down
5 changes: 2 additions & 3 deletions app/models/topic.rb
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
require 'textacular/searchable'

class Topic < ActiveRecord::Base
extend Searchable(:code, :name)
include Browseable

query :code, :name

with_options presence: true, uniqueness: true do
validates :code, length: { maximum: 100 }
validates :name, length: { maximum: 100 }
Expand Down
2 changes: 1 addition & 1 deletion app/views/admin/topics/index.html.erb
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<%= render "admin/shared/tag_tabs" %>

<div class="grid-row">
<%= form_tag admin_tags_path, method: :get, class: 'search-topics' do %>
<%= form_tag admin_topics_path, method: :get, class: 'search-topics' do %>
<div class="column-half">
<div class="search-inline">
<%= label_tag :q, "Search topics", class: "visuallyhidden" %>
Expand Down
99 changes: 99 additions & 0 deletions db/migrate/20210422053757_optimize_free_text_search_indexes.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
class OptimizeFreeTextSearchIndexes < ActiveRecord::Migration[6.1]
disable_ddl_transaction!

def up
execute "CREATE EXTENSION IF NOT EXISTS btree_gin"

execute <<~SQL
CREATE INDEX CONCURRENTLY IF NOT EXISTS
index_petitions_for_search
ON petitions USING gin((
to_tsvector('english', id::text) ||
to_tsvector('english', action::text) ||
to_tsvector('english', background::text) ||
to_tsvector('english', COALESCE(additional_details, '')::text)),
state, debate_state
);
SQL

execute <<~SQL
CREATE INDEX CONCURRENTLY IF NOT EXISTS
index_archived_petitions_for_search
ON archived_petitions USING gin((
to_tsvector('english', id::text) ||
to_tsvector('english', action::text) ||
to_tsvector('english', background::text) ||
to_tsvector('english', COALESCE(additional_details, '')::text)),
state, parliament_id, debate_state
);
SQL

execute "DROP INDEX CONCURRENTLY IF EXISTS index_petitions_on_action"
execute "DROP INDEX CONCURRENTLY IF EXISTS index_petitions_on_background"
execute "DROP INDEX CONCURRENTLY IF EXISTS index_petitions_on_additional_details"
execute "ANALYZE petitions"

execute "DROP INDEX CONCURRENTLY IF EXISTS index_archived_petitions_on_action"
execute "DROP INDEX CONCURRENTLY IF EXISTS index_archived_petitions_on_background"
execute "DROP INDEX CONCURRENTLY IF EXISTS index_archived_petitions_on_additional_details"
execute "ANALYZE archived_petitions"
end

def down
execute <<~SQL
CREATE INDEX CONCURRENTLY IF NOT EXISTS
index_petitions_on_action
ON petitions USING gin(
to_tsvector('english', action)
);
SQL

execute <<~SQL
CREATE INDEX CONCURRENTLY IF NOT EXISTS
index_petitions_on_background
ON petitions USING gin(
to_tsvector('english', background)
);
SQL

execute <<~SQL
CREATE INDEX CONCURRENTLY IF NOT EXISTS
index_petitions_on_additional_details
ON petitions USING gin(
to_tsvector('english', additional_details)
);
SQL

execute <<~SQL
CREATE INDEX CONCURRENTLY IF NOT EXISTS
index_archived_petitions_on_action
ON archived_petitions USING gin(
to_tsvector('english', action)
);
SQL

execute <<~SQL
CREATE INDEX CONCURRENTLY IF NOT EXISTS
index_archived_petitions_on_background
ON archived_petitions USING gin(
to_tsvector('english', background)
);
SQL

execute <<~SQL
CREATE INDEX CONCURRENTLY IF NOT EXISTS
index_archived_petitions_on_additional_details
ON archived_petitions USING gin(
to_tsvector('english', additional_details)
);
SQL

execute "DROP INDEX CONCURRENTLY IF EXISTS index_petitions_for_search"
execute "ANALYZE petitions"

execute "DROP INDEX CONCURRENTLY IF EXISTS index_archived_petitions_for_search"
execute "ANALYZE archived_petitions"

execute "DROP EXTENSION IF EXISTS btree_gin"
end
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
class OptimizeSearchIndexesForDepartments < ActiveRecord::Migration[6.1]
disable_ddl_transaction!

def up
execute <<~SQL
CREATE INDEX CONCURRENTLY IF NOT EXISTS
index_departments_for_search
ON departments USING gin((
to_tsvector('english', name::text) ||
to_tsvector('english', COALESCE(acronym, '')::text)
));
SQL

execute "ANALYZE departments"
end

def down
execute "DROP INDEX CONCURRENTLY IF EXISTS index_departments_for_search"
execute "ANALYZE departments"
end
end
Loading

0 comments on commit 36b7a69

Please sign in to comment.