diff --git a/.travis/docker-compose-travis.yml b/.travis/docker-compose-travis.yml index e94a4b167c3..c1a0c13d9a1 100644 --- a/.travis/docker-compose-travis.yml +++ b/.travis/docker-compose-travis.yml @@ -3,8 +3,15 @@ version: "2" services: elasticsearch: - image: elasticsearch:1.5.2 + image: elasticsearch:7.8.0 container_name: "es.edx" + environment: + - discovery.type=single-node + - bootstrap.memory_lock=true + - "ES_JAVA_OPTS=-Xms512m -Xmx512m" + volumes: + - data01:/usr/share/elasticsearch/data + - ./elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml mongo: image: mongo:3.2.21 container_name: "mongo.edx" @@ -15,6 +22,7 @@ services: - ..:/edx/app/forum/cs_comments_service environment: MONGOID_AUTH_MECH: "" + SEARCH_SERVER_ES7: "http://elasticsearch:9200" forum: extends: forum-base command: tail -f /dev/null @@ -27,3 +35,6 @@ services: depends_on: - "elasticsearch" - "mongo" + +volumes: + data01: diff --git a/.travis/elasticsearch.yml b/.travis/elasticsearch.yml new file mode 100644 index 00000000000..d8b35f56c3a --- /dev/null +++ b/.travis/elasticsearch.yml @@ -0,0 +1,4 @@ +network.host: 0.0.0.0 +cluster.routing.allocation.disk.watermark.low: 150mb +cluster.routing.allocation.disk.watermark.high: 100mb +cluster.routing.allocation.disk.watermark.flood_stage: 50mb diff --git a/Gemfile b/Gemfile index 3058cfdf029..feb359a6d05 100644 --- a/Gemfile +++ b/Gemfile @@ -38,8 +38,8 @@ gem 'will_paginate_mongoid', "~>2.0" gem 'rdiscount' gem 'nokogiri', "~>1.8.1" -gem 'elasticsearch', '~> 1.1.2' -gem 'elasticsearch-model', '~> 0.1.9' +gem 'elasticsearch', '~> 7.8.0' +gem 'elasticsearch-model', '~> 7.1.0' gem 'dalli' diff --git a/Gemfile.lock b/Gemfile.lock index 1ad07303236..171b6243a31 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -50,17 +50,17 @@ GEM docile (1.3.2) domain_name (0.5.20170404) unf (>= 0.0.5, < 1.0.0) - elasticsearch (1.1.2) - elasticsearch-api (= 1.1.2) - elasticsearch-transport (= 1.1.2) - elasticsearch-api (1.1.2) + elasticsearch (7.8.0) + elasticsearch-api (= 7.8.0) + elasticsearch-transport (= 7.8.0) + elasticsearch-api (7.8.0) multi_json - elasticsearch-model (0.1.9) + elasticsearch-model (7.1.0) activesupport (> 3) - elasticsearch (> 0.4) + elasticsearch (> 1) hashie - elasticsearch-transport (1.1.2) - faraday + elasticsearch-transport (7.8.0) + faraday (~> 1) multi_json enumerize (2.1.2) activesupport (>= 3.2) @@ -68,7 +68,7 @@ GEM activesupport (>= 3.0.0) faker (1.7.3) i18n (~> 0.5) - faraday (0.12.1) + faraday (1.0.1) multipart-post (>= 1.2, < 3) ffi (1.9.18) formatador (0.2.5) @@ -84,7 +84,7 @@ GEM guard-unicorn (0.2.0) guard (>= 1.1) hashdiff (0.3.4) - hashie (3.5.5) + hashie (4.1.0) http-cookie (1.0.3) domain_name (~> 0.5) i18n (0.9.5) @@ -117,8 +117,8 @@ GEM mongoid_magic_counter_cache (1.1.1) mongoid rake - multi_json (1.12.1) - multipart-post (2.0.0) + multi_json (1.15.0) + multipart-post (2.1.1) nenv (0.3.0) netrc (0.11.0) newrelic_rpm (5.6.0.349) @@ -221,8 +221,8 @@ DEPENDENCIES dalli delayed_job delayed_job_mongoid - elasticsearch (~> 1.1.2) - elasticsearch-model (~> 0.1.9) + elasticsearch (~> 7.8.0) + elasticsearch-model (~> 7.1.0) enumerize factory_girl (~> 4.0) faker (~> 1.6) diff --git a/README.rst b/README.rst index 6efbd23c04e..3e5f7ffbe23 100644 --- a/README.rst +++ b/README.rst @@ -35,45 +35,38 @@ Install the requisite gems: $ bundle install -To initialize the index: +To initialize indices: -Setup the search index. Note that the command below creates an alias with a unique name (e.g. -content_20161220185820323), and assigns it a known alias: content. If you choose not to use the command below, you -should still opt to reference your index by an alias rather than the actual index name. This will enable you to swap out -indices (e.g. rebuild_index) without having to take downtime or modify code with a new index name. +Setup search indices. Note that the command below creates `comments_20161220185820323` and +`comment_threads_20161220185820323` indices and assigns `comments` and `comment_threads` aliases. This will enable you +to swap out indices (e.g. rebuild_index) without having to take downtime or modify code with a new index name. .. code-block:: bash $ bin/rake search:initialize -To validate the 'content' alias exists and contains the proper mappings: +To validate indices exist and contain the proper mappings: .. code-block:: bash - $ bin/rake search:validate_index + $ bin/rake search:validate_indices -To rebuild the index: +To rebuild indices: -To rebuild a new index from the database and then point the alias 'content' to it, you can use the -rebuild_index task. This task will also run catchup before and after the alias is moved, to minimize time where the -alias does not contain all documents. +To rebuild new indices from the database and then point the aliases `comments` and `comment_threads` to each index +which has equivalent index prefix, you can use the rebuild_indices task. This task will also run catch up before +and after aliases are moved, to minimize time where aliases do not contain all documents. .. code-block:: bash - $ bin/rake search:rebuild_index - -To rebuild a new index without moving the alias and without running catchup, use the following: - -.. code-block:: bash - - $ bin/rake search:rebuild_index[false] + $ bin/rake search:rebuild_indices You can also adjust the batch size (e.g. 200) and the sleep time (e.g. 2 seconds) between batches to lighten the load on MongoDB. .. code-block:: bash - $ bin/rake search:rebuild_index[true,200,2] + $ bin/rake search:rebuild_indices[200,2] Run the server: diff --git a/api/search.rb b/api/search.rb index e8a363ab15a..2a0eda0b5f5 100644 --- a/api/search.rb +++ b/api/search.rb @@ -1,62 +1,63 @@ def get_thread_ids(context, group_ids, local_params, search_text) - filters = [] - filters.push({term: {commentable_id: local_params['commentable_id']}}) if local_params['commentable_id'] - filters.push({terms: {commentable_id: local_params['commentable_ids'].split(',')}}) if local_params['commentable_ids'] - filters.push({term: {course_id: local_params['course_id']}}) if local_params['course_id'] + must = [] + filter = [] + must.push({term: {commentable_id: local_params['commentable_id']}}) if local_params['commentable_id'] + must.push({terms: {commentable_id: local_params['commentable_ids'].split(',')}}) if local_params['commentable_ids'] + must.push({term: {course_id: local_params['course_id']}}) if local_params['course_id'] + must.push( + { + multi_match: { + query: search_text, + fields: [:title, :body], + operator: :AND + } + } + ) + group_id = local_params['group_id'] + + if group_id + filter.push( + {:bool => {:must_not => {:exists => {:field => :group_id}}}}, + {:term => {:group_id => group_id}} + ) + end - filters.push({or: [ - {not: {exists: {field: :context}}}, - {term: {context: context}} - ]}) + filter.push( + {:bool => {:must_not => {:exists => {:field => :context}}}}, + {:term => {:context => context}} + ) unless group_ids.empty? - filters.push( - { - bool: { - should: [ - {:not => {:exists => {:field => :group_id}}}, - {:terms => {:group_id => group_ids}} - ] - } - } + filter.push( + {:bool => {:must_not => {:exists => {:field => :group_id}}}}, + {:terms => {:group_id => group_ids}} ) end body = { - size: CommentService.config['max_deep_search_comment_count'].to_i, - sort: [ - {updated_at: :desc} - ], - query: { - filtered: { - query: { - multi_match: { - query: search_text, - fields: [:title, :body], - operator: :AND - } - }, - filter: { - bool: { - must: filters - } - } - } + size: CommentService.config['max_deep_search_comment_count'].to_i, + sort: [ + {updated_at: :desc} + ], + query: { + bool: { + must: must, + should: filter } + } } - response = Elasticsearch::Model.client.search(index: Content::ES_INDEX_NAME, body: body) + response = Elasticsearch::Model.client.search(index: TaskHelpers::ElasticsearchHelper::INDEX_NAMES, body: body) thread_ids = Set.new response['hits']['hits'].each do |hit| - case hit['_type'] - when CommentThread.document_type - thread_ids.add(hit['_id']) - when Comment.document_type - thread_ids.add(hit['_source']['comment_thread_id']) - else - # There shouldn't be any other document types. Nevertheless, ignore them, if they are present. - next + if hit['_index'].include? CommentThread.index_name + thread_ids.add(hit['_id']) + elsif hit['_index'].include? Comment.index_name + thread_ids.add(hit['_source']['comment_thread_id']) + else + # There shouldn't be any other indices. Nevertheless, ignore them, if they are present. + next end end thread_ids @@ -64,19 +65,30 @@ def get_thread_ids(context, group_ids, local_params, search_text) def get_suggested_text(search_text) body = { - suggestions: { - text: search_text, - phrase: { - field: :_all - } + suggest: { + body_suggestions: { + text: search_text, + phrase: { + field: :body + } + }, + title_suggestions: { + text: search_text, + phrase: { + field: :title + } } + } } - response = Elasticsearch::Model.client.suggest(index: Content::ES_INDEX_NAME, body: body) - suggestions = response.fetch('suggestions', []) - if suggestions.length > 0 - options = suggestions[0]['options'] - if options.length > 0 - return options[0]['text'] + + response = Elasticsearch::Model.client.search(index: TaskHelpers::ElasticsearchHelper::INDEX_NAMES, body: body) + body_suggestions = response['suggest'].fetch('body_suggestions', []) + title_suggestions = response['suggest'].fetch('title_suggestions', []) + + [body_suggestions, title_suggestions].each do |suggestion| + if suggestion.length > 0 + options = suggestion[0]['options'] + return options[0]['text'] if options.length > 0 end end diff --git a/config/application.yml b/config/application.yml index 8894d9bd5cb..4ce627d47b4 100644 --- a/config/application.yml +++ b/config/application.yml @@ -1,6 +1,6 @@ level_limit: 3 api_key: <%= ENV['API_KEY'] || 'PUT_YOUR_API_KEY_HERE' %> -elasticsearch_server: <%= ENV['SEARCH_SERVER'] || 'http://localhost:9200' %> +elasticsearch_server: <%= ENV['SEARCH_SERVER_ES7'] || 'http://localhost:9200' %> max_deep_search_comment_count: 5000 enable_search: true default_locale: <%= ENV['SERVICE_LANGUAGE'] || 'en-US' %> diff --git a/lib/task_helpers.rb b/lib/task_helpers.rb index d773d19403c..9f067959a74 100644 --- a/lib/task_helpers.rb +++ b/lib/task_helpers.rb @@ -1,86 +1,126 @@ require 'elasticsearch' +require_relative '../models/comment' +require_relative '../models/comment_thread' module TaskHelpers module ElasticsearchHelper LOG = Logger.new(STDERR) + INDEX_MODELS = [Comment, CommentThread].freeze + INDEX_NAMES = [Comment.index_name, CommentThread.index_name].freeze + # local variable which store actual indices for future deletion + @@temporary_index_names = [] - # Creates a new index and loads data from the database. If an alias name - # is supplied, it will be pointed to the new index and catch up will be - # called both before and after the alias switch.. - # - # Returns the name of the newly created index. + def self.temporary_index_names + @@temporary_index_names + end + + def self.add_temporary_index_names(index_names) + # clone list of new index names which have been already created + @@temporary_index_names = index_names + end + + # Creates new indices and loads data from the database. # # Params: - # +alias_name+:: (optional) The alias to point to the new index. # +batch_size+:: (optional) The number of elements to index at a time. Defaults to 500. - # +sleep_time+:: (optional) The number of seconds to sleep between batches. Defaults to 0. # +extra_catchup_minutes+:: (optional) The number of extra minutes to catchup. Defaults to 5. - def self.rebuild_index(alias_name=nil, batch_size=500, sleep_time=0, extra_catchup_minutes=5) + def self.rebuild_indices(batch_size=500, extra_catchup_minutes=5) initial_start_time = Time.now - index_name = create_index() - [Comment, CommentThread].each do |model| + index_names = create_indices + index_names.each do |index_name| current_batch = 1 + model = get_index_model_rel(index_name) model.import(index: index_name, batch_size: batch_size) do |response| - batch_import_post_process(response, current_batch, sleep_time) - current_batch += 1 + batch_import_post_process(response, current_batch) + current_batch += 1 end end - if alias_name - # Just in case initial rebuild took days and first catch up takes hours, - # we catch up once before the alias move and once afterwards. - first_catchup_start_time = Time.now - adjusted_start_time = initial_start_time - (extra_catchup_minutes * 60) - catchup_index(adjusted_start_time, index_name, batch_size, sleep_time) + # Just in case initial rebuild took days and first catch up takes hours, + # we catch up once before the alias move and once afterwards. + first_catchup_start_time = Time.now + adjusted_start_time = initial_start_time - (extra_catchup_minutes * 60) + catchup_indices(index_names, adjusted_start_time, batch_size) - move_alias(alias_name, index_name, force_delete: true) - adjusted_start_time = first_catchup_start_time - (extra_catchup_minutes * 60) - catchup_index(adjusted_start_time, alias_name, batch_size, sleep_time) + alias_names = [] + index_names.each do |index_name| + current_batch = 1 + model = get_index_model_rel(index_name) + model_index_name = model.index_name + alias_names.push(model_index_name) + move_alias(model_index_name, index_name, force_delete: true) end - LOG.info "Rebuild index complete." - index_name + adjusted_start_time = first_catchup_start_time - (extra_catchup_minutes * 60) + catchup_indices(alias_names, adjusted_start_time, batch_size) + + add_temporary_index_names(index_names) + LOG.info "Rebuild indices complete." + end + + # Get index name which corresponds to the model + def self.get_index_model_rel(index_name) + model = nil + if index_name.include? Comment.index_name + model = Comment + elsif index_name.include? CommentThread.index_name + model = CommentThread + end + model end - def self.catchup_index(start_time, index_name, batch_size=100, sleep_time=0) - [Comment, CommentThread].each do |model| + def self.catchup_indices(index_names, start_time, batch_size=100) + index_names.each do |index_name| current_batch = 1 + model = get_index_model_rel(index_name) model.where(:updated_at.gte => start_time).import(index: index_name, batch_size: batch_size) do |response| - batch_import_post_process(response, current_batch, sleep_time) - current_batch += 1 + batch_import_post_process(response, current_batch) + current_batch += 1 end end LOG.info "Catch up from #{start_time} complete." end - def self.create_index(name=nil) - name ||= "#{Content::ES_INDEX_NAME}_#{Time.now.strftime('%Y%m%d%H%M%S%L')}" + def self.create_indices + index_names = [] + time_now = Time.now.strftime('%Y%m%d%H%M%S%L') - Elasticsearch::Model.client.indices.create(index: name) - put_mappings(name) - - LOG.info "Created new index: #{name}." - name + INDEX_MODELS.each do |model| + index_name = "#{model.index_name}_#{time_now}" + index_names.push(index_name) + Elasticsearch::Model.client.indices.create( + index: index_name, + body: {"mappings": model.mapping.to_hash} + ) + end + LOG.info "New indices #{index_names} are created." + index_names end def self.delete_index(name) - begin - Elasticsearch::Model.client.indices.delete(index: name) - LOG.info "Deleted index: #{name}." - rescue Elasticsearch::Transport::Transport::Errors::NotFound - # NOTE (CCB): Future versions of the Elasticsearch client support the ignore parameter, - # that can be used to ignore 404 errors. - LOG.info "Unable to delete non-existent index: #{name}." + Elasticsearch::Model.client.indices.delete(index: name, ignore_unavailable: true) + LOG.info "Deleted index: #{name}." + end + + # Deletes current indices if they used by forum app + def self.delete_indices + # NOTE: elasticsearch cannot delete index by alias, so forum store names + # of current indices in the temporary_index_names variable. If it is empty + # forum indices cannot be deleted by forum + if temporary_index_names.length > 0 + Elasticsearch::Model.client.indices.delete(index: temporary_index_names, ignore_unavailable: true) + LOG.info "Indices #{temporary_index_names} are deleted." + else + LOG.info "No Indices to delete." end end - def self.batch_import_post_process(response, batch_number, sleep_time) - response['items'].select { |i| i['index']['error'] }.each do |item| - LOG.error "Error indexing. Response was: #{response}" - end - LOG.info "Imported batch #{batch_number} into the index" - sleep(sleep_time) + def self.batch_import_post_process(response, batch_number) + response['items'].select { |i| i['index']['error'] }.each do |item| + LOG.error "Error indexing. Response was: #{response}" + end + LOG.info "Imported batch #{batch_number} into the index" end def self.get_index_shard_count(name) @@ -92,6 +132,14 @@ def self.exists_alias(alias_name) Elasticsearch::Model.client.indices.exists_alias(name: alias_name) end + def self.exists_indices + Elasticsearch::Model.client.indices.exists(index: temporary_index_names) + end + + def self.exists_aliases(aliases) + Elasticsearch::Model.client.indices.exists_alias(name: aliases) + end + def self.exists_index(index_name) Elasticsearch::Model.client.indices.exists(index: index_name) end @@ -136,66 +184,57 @@ def self.move_alias(alias_name, index_name, force_delete=false) LOG.info "Alias [#{alias_name}] now points to index [#{index_name}]." end - def self.refresh_index(name) - Elasticsearch::Model.client.indices.refresh(index: name) - end - - def self.initialize_index(alias_name, force_new_index) - # When force_new_index is true, a fresh index will be created for the alias, - # even if it already exists. - if force_new_index or not exists_alias(alias_name) - index_name = create_index() - # WARNING: if an index exists with the same name as the intended alias, it - # will be deleted. - move_alias(alias_name, index_name, force_delete: true) + def self.refresh_indices + if temporary_index_names.length > 0 + Elasticsearch::Model.client.indices.refresh(index: INDEX_NAMES) else - LOG.info "Skipping initialization. The 'content' alias already exists. If 'rake search:validate_index' indicates "\ - "a problem with the mappings, you could either use 'rake search:rebuild_index' to reload from the db or 'rake "\ - "search:initialize[true]' to force initialization with an empty index." + fail "No indices to refresh" end end - def self.put_mappings(name) - # As of ES 0.9, the order that these mappings are created matters. Unit test failures - # appear with a different order. It is unclear if this is a defect in ES, the test, or - # neither. - [CommentThread, Comment].each do |model| - Elasticsearch::Model.client.indices.put_mapping(index: name, type: model.document_type, body: model.mappings.to_hash) + def self.initialize_indices(force_new_index = false) + # When force_new_index is true, fresh indices will be created even if it already exists. + if force_new_index or not exists_aliases(INDEX_NAMES) + index_names = create_indices + index_names.each do |index_name| + model = get_index_model_rel(index_name) + move_alias(model.index_name, index_name, force_delete: true) + end + add_temporary_index_names(index_names) + else + LOG.info "Skipping initialization. Indices already exist. If 'rake search:validate_indices' indicates "\ + "a problem with the mappings, you could either use 'rake search:rebuild_indices' to reload from the db or 'rake "\ + "search:initialize[true]' to force initialization with an empty index." end - LOG.info "Added mappings to index: #{name}." end - # Validates that the alias exists and its index includes the proper mappings. - # There is no return value, but an exception is raised if the alias is invalid. - # - # Params: - # +alias_name+:: The alias name to be validated. - def self.validate_index(alias_name) - if exists_alias(alias_name) === false - fail "Alias '#{alias_name}' does not exist." - end + # Validates that each index includes the proper mappings. + # There is no return value, but an exception is raised if the index is invalid. + def self.validate_indices + actual_mappings = Elasticsearch::Model.client.indices.get_mapping(index: INDEX_NAMES) - actual_mapping = Elasticsearch::Model.client.indices.get_mapping(index: alias_name).values[0]['mappings'] - expected_mapping = {} - [CommentThread, Comment].each do |model| - expected_mapping.merge! model.mappings.to_hash + if actual_mappings.length == 0 + fail "Indices are not exist!" end - # As of ES 0.9, the order the mappings are created in matters. See put_mappings. - # Compare document types and order - expected_mapping_keys = expected_mapping.keys.map { |x| x.to_s } - if actual_mapping.keys != expected_mapping_keys - fail "Actual mapping types [#{actual_mapping.keys}] does not match expected mapping types (including order) [#{expected_mapping.keys}]." - end + actual_mappings.keys.each do |index_name| + model = get_index_model_rel(index_name) + expected_mapping = model.mappings.to_hash + actual_mapping = actual_mappings[index_name]['mappings'] + expected_mapping_keys = expected_mapping.keys.map { |x| x.to_s } + if actual_mapping.keys != expected_mapping_keys + fail "Actual mapping [#{actual_mapping.keys}] does not match expected mapping (including order) [#{expected_mapping.keys}]." + end - # Check that expected field mappings of the correct type exist - expected_mapping.keys.each do |doc_type| + actual_mapping_properties = actual_mapping['properties'] + expected_mapping_properties = expected_mapping[:properties] missing_fields = Array.new invalid_field_types = Array.new - expected_mapping[doc_type][:properties].keys.each do |property| - if actual_mapping[doc_type.to_s]['properties'].key?(property.to_s) - expected_type = expected_mapping[doc_type][:properties][property][:type].to_s - actual_type = actual_mapping[doc_type.to_s]['properties'][property.to_s]['type'] + + expected_mapping_properties.keys.each do |property| + if actual_mapping_properties.key?(property.to_s) + expected_type = expected_mapping_properties[property][:type].to_s + actual_type = actual_mapping_properties[property.to_s]['type'] if actual_type != expected_type invalid_field_types.push("'#{property}' type '#{actual_type}' should be '#{expected_type}'") end @@ -204,10 +243,13 @@ def self.validate_index(alias_name) end end if missing_fields.any? or invalid_field_types.any? - fail "Document type '#{doc_type}' has missing or invalid field mappings. Missing fields: #{missing_fields}. Invalid types: #{invalid_field_types}." + fail "Index '#{model.index_name}' has missing or invalid field mappings. Missing fields: #{missing_fields}. Invalid types: #{invalid_field_types}." end + + # Check that expected field mappings of the correct type exist + LOG.info "Passed: Index '#{model.index_name}' exists with up-to-date mappings." end - LOG.info "Passed: Alias '#{alias_name}' exists with up-to-date mappings." + end end diff --git a/lib/tasks/search.rake b/lib/tasks/search.rake index 28ff762aece..89d3740eb86 100644 --- a/lib/tasks/search.rake +++ b/lib/tasks/search.rake @@ -2,61 +2,37 @@ require_relative '../task_helpers' namespace :search do desc 'Indexes content updated in the last N minutes.' - task :catchup, [:minutes, :index_name, :batch_size, :sleep_time] => :environment do |t, args| + task :catchup, [:comments_index_name, :comment_threads_index_name, :minutes, :batch_size] => :environment do |t, args| start_time = Time.now - (args[:minutes].to_i * 60) - args.with_defaults(:index_name => Content::ES_INDEX_NAME) args.with_defaults(:batch_size => 500) - args.with_defaults(:sleep_time => 0) - TaskHelpers::ElasticsearchHelper.catchup_index(start_time, args[:index_name], args[:batch_size].to_i, args[:sleep_time].to_i) + indices = [args[:comments_index_name].to_s, args[:comment_threads_index_name].to_s] + TaskHelpers::ElasticsearchHelper.catchup_indices(indices, start_time, args[:batch_size].to_i) end - desc 'Rebuilds a new index of all data from the database and then updates alias.' - task :rebuild_index, [:call_move_alias, :batch_size, :sleep_time, :extra_catchup_minutes] => :environment do |t, args| - args.with_defaults(:call_move_alias => true) + desc 'Rebuilds new indices of all data from the database.' + task :rebuild_indices, [:batch_size, :extra_catchup_minutes] => :environment do |t, args| args.with_defaults(:batch_size => 500) - args.with_defaults(:sleep_time => 0) # sleep time between batches in seconds args.with_defaults(:extra_catchup_minutes => 5) # additional catchup time in minutes - alias_name = args[:call_move_alias] === true ? Content::ES_INDEX_NAME : nil - TaskHelpers::ElasticsearchHelper.rebuild_index( - alias_name, + + TaskHelpers::ElasticsearchHelper.rebuild_indices( args[:batch_size].to_i, - args[:sleep_time].to_i, args[:extra_catchup_minutes].to_i ) end - desc 'Generate a new, empty physical index, without bringing it online.' - task :create_index => :environment do - TaskHelpers::ElasticsearchHelper.create_index - end - - desc 'Creates a new search index and points the "content" alias to it' + desc 'Creates a new search indices' task :initialize, [:force_new_index] => :environment do |t, args| # When force_new_index is true, a fresh index for "content" alias is created even if the # "content" alias already exists. args.with_defaults(:force_new_index => false) # WARNING: if "content" is an index and not an alias, it will be deleted and recreated # no matter what is supplied for the force argument - TaskHelpers::ElasticsearchHelper.initialize_index(Content::ES_INDEX_NAME, args[:force_new_index]) - end - - desc 'Updates field mappings for the given index.' - task :put_mappings, [:index] => :environment do |t, args| - args.with_defaults(:index => Content::ES_INDEX_NAME) - TaskHelpers::ElasticsearchHelper.put_mappings(args[:index]) - end - - desc 'Sets/moves an alias to the specified index' - task :move_alias, [:index, :force_delete] => :environment do |t, args| - # Forces delete of an index with same name as alias if it exists. - args.with_defaults(:force_delete => false) - alias_name = Content::ES_INDEX_NAME - TaskHelpers::ElasticsearchHelper.move_alias(alias_name, args[:index], args[:force_delete]) + TaskHelpers::ElasticsearchHelper.initialize_indices(args[:force_new_index]) end desc 'Validates that the "content" alias exists with expected field mappings and types.' - task :validate_index => :environment do - TaskHelpers::ElasticsearchHelper.validate_index(Content::ES_INDEX_NAME) + task :validate_indices => :environment do + TaskHelpers::ElasticsearchHelper.validate_indices end end diff --git a/lib/unicorn_helpers.rb b/lib/unicorn_helpers.rb index 734de466623..476d0e353a2 100644 --- a/lib/unicorn_helpers.rb +++ b/lib/unicorn_helpers.rb @@ -3,12 +3,12 @@ module UnicornHelpers # Make sure elasticsearch is configured correctly def self.exit_on_invalid_index begin - TaskHelpers::ElasticsearchHelper.validate_index(Content::ES_INDEX_NAME) + TaskHelpers::ElasticsearchHelper.validate_indices rescue => e # Magic exit code expected by forum-supervisor.sh for when - # rake search:validate_index fails + # rake search:validate_indices fails STDERR.puts "ERROR: ElasticSearch configuration validation failed. "\ - "\"rake search:validate_index\" failed with the following message: #{e.message}" + "\"rake search:validate_indices\" failed with the following message: #{e.message}" exit(101) end end diff --git a/models/comment.rb b/models/comment.rb index 954256e7660..437faeb4618 100644 --- a/models/comment.rb +++ b/models/comment.rb @@ -11,6 +11,7 @@ class Comment < Content include Mongoid::Timestamps include Mongoid::MagicCounterCache include ActiveModel::MassAssignmentSecurity + include Elasticsearch::Model include Searchable voteable self, :up => +1, :down => -1 @@ -31,17 +32,23 @@ class Comment < Content index({_type: 1, comment_thread_id: 1, author_id: 1, updated_at: 1}) index({comment_thread_id: 1, author_id: 1, created_at: 1}) - index_name Content::ES_INDEX_NAME - - mapping do - indexes :body, type: :string, analyzer: :english, stored: true, term_vector: :with_positions_offsets - indexes :course_id, type: :string, index: :not_analyzed, included_in_all: false - indexes :comment_thread_id, type: :string, index: :not_analyzed, included_in_all: false, as: 'comment_thread_id' - indexes :commentable_id, type: :string, index: :not_analyzed, included_in_all: false, as: 'commentable_id' - indexes :group_id, type: :string, index: :not_analyzed, included_in_all: false, as: 'group_id' - indexes :context, type: :string, index: :not_analyzed, included_in_all: false, as: 'context' - indexes :created_at, type: :date, included_in_all: false - indexes :updated_at, type: :date, included_in_all: false + index_name = "comment" + + mapping dynamic: 'false' do + indexes :body, type: :text, store: true, term_vector: :with_positions_offsets + indexes :course_id, type: :keyword + indexes :comment_thread_id, type: :keyword + indexes :commentable_id, type: :keyword + indexes :group_id, type: :keyword + indexes :context, type: :keyword + indexes :created_at, type: :date + indexes :updated_at, type: :date + # NOTE: this field needs only for testing + indexes :title, type: :keyword + end + + def as_indexed_json(options={}) + as_json(except: [:id, :_id]) end belongs_to :comment_thread, index: true diff --git a/models/comment_thread.rb b/models/comment_thread.rb index 1c46f39dc46..61fa44d68cc 100644 --- a/models/comment_thread.rb +++ b/models/comment_thread.rb @@ -11,6 +11,7 @@ class CommentThread < Content include Mongoid::Timestamps include Mongoid::Attributes::Dynamic include ActiveModel::MassAssignmentSecurity + include Elasticsearch::Model include Searchable extend Enumerize @@ -36,23 +37,27 @@ class CommentThread < Content index({author_id: 1, course_id: 1}) - index_name Content::ES_INDEX_NAME - - mapping do - indexes :title, type: :string, analyzer: :english, boost: 5.0, stored: true, term_vector: :with_positions_offsets - indexes :body, type: :string, analyzer: :english, stored: true, term_vector: :with_positions_offsets - indexes :created_at, type: :date, included_in_all: false - indexes :updated_at, type: :date, included_in_all: false - indexes :last_activity_at, type: :date, included_in_all: false - indexes :comment_count, type: :integer, included_in_all: false - indexes :votes_point, type: :integer, as: 'votes_point', included_in_all: false - indexes :context, type: :string, index: :not_analyzed, included_in_all: false - indexes :course_id, type: :string, index: :not_analyzed, included_in_all: false - indexes :commentable_id, type: :string, index: :not_analyzed, included_in_all: false - indexes :author_id, type: :string, as: 'author_id', index: :not_analyzed, included_in_all: false - indexes :group_id, type: :integer, as: 'group_id', index: :not_analyzed, included_in_all: false - indexes :id, :index => :not_analyzed - indexes :thread_id, :analyzer => :keyword, :as => '_id' + index_name = "comment_thread" + + mapping dynamic: 'false' do + indexes :title, type: :text, boost: 5.0, store: true, term_vector: :with_positions_offsets + indexes :body, type: :text, store: true, term_vector: :with_positions_offsets + indexes :created_at, type: :date + indexes :updated_at, type: :date + indexes :last_activity_at, type: :date + indexes :comment_count, type: :integer + indexes :votes_point, type: :integer + indexes :context, type: :keyword + indexes :course_id, type: :keyword + indexes :commentable_id, type: :keyword + indexes :author_id, type: :keyword + indexes :group_id, type: :integer + indexes :id, type: :keyword + indexes :thread_id, type: :keyword + end + + def as_indexed_json(options={}) + as_json(except: [:thread_id, :_id]) end belongs_to :author, class_name: 'User', inverse_of: :comment_threads, index: true diff --git a/models/content.rb b/models/content.rb index 9d8a21573b4..2bd89cd44b8 100644 --- a/models/content.rb +++ b/models/content.rb @@ -2,9 +2,6 @@ class Content include Mongoid::Document include Mongo::Voteable - ES_INDEX_NAME = 'content' - - field :visible, type: Boolean, default: true field :abuse_flaggers, type: Array, default: [] field :historical_abuse_flaggers, type: Array, default: [] #preserve abuse flaggers after a moderator unflags diff --git a/models/user.rb b/models/user.rb index f94ad3419b2..aa155cb37bd 100644 --- a/models/user.rb +++ b/models/user.rb @@ -174,8 +174,7 @@ def retire_comment(comment, retired_username) data[:author_username] = retired_username { update: { - _index: Content::ES_INDEX_NAME, - _type: comment.__elasticsearch__.document_type, + _index: Comment.index_name, _id: comment._id, data: { doc: data } } @@ -212,8 +211,7 @@ def replace_comment_username(comment, new_username) end { update: { - _index: Content::ES_INDEX_NAME, - _type: comment.__elasticsearch__.document_type, + _index: Comment.index_name, _id: comment._id, data: { doc: data } } diff --git a/spec/api/query_spec.rb b/spec/api/query_spec.rb index b4f33c5b76b..51fc13b2356 100644 --- a/spec/api/query_spec.rb +++ b/spec/api/query_spec.rb @@ -11,7 +11,7 @@ shared_examples_for 'a search endpoint' do subject do - refresh_es_index + TaskHelpers::ElasticsearchHelper.refresh_indices get '/api/v1/search/threads', text: body end diff --git a/spec/api/search_spec.rb b/spec/api/search_spec.rb index 2c58dc76174..8afa894a25b 100644 --- a/spec/api/search_spec.rb +++ b/spec/api/search_spec.rb @@ -15,7 +15,7 @@ def get_result_ids(result) describe "GET /api/v1/search/threads" do shared_examples_for 'response for invalid request' do - before (:each) { refresh_es_index } + before (:each) { TaskHelpers::ElasticsearchHelper.refresh_indices } subject { get '/api/v1/search/threads', {course_id: course_id}.merge!(parameters) } it { should be_ok } @@ -44,14 +44,14 @@ def assert_result_total(expected_total) def create_and_delete_comment_or_thread(factory_name, text) comment_or_thread = create(factory_name, course_id: course_id, body: text) comment_or_thread.destroy - refresh_es_index + TaskHelpers::ElasticsearchHelper.refresh_indices end def update_comment_or_thread(factory_name, original_text, new_text) comment_or_thread = create(factory_name, course_id: course_id, body: original_text) comment_or_thread.body = new_text comment_or_thread.save! - refresh_es_index + TaskHelpers::ElasticsearchHelper.refresh_indices end it 'returns an empty result if thread is deleted' do @@ -119,7 +119,7 @@ def update_comment_or_thread(factory_name, original_text, new_text) end thread end - refresh_es_index + TaskHelpers::ElasticsearchHelper.refresh_indices threads end @@ -213,7 +213,7 @@ def assert_response_contains(expected_thread_indexes) threads[i].save! end threads[4].save! - refresh_es_index + TaskHelpers::ElasticsearchHelper.refresh_indices threads end @@ -250,7 +250,7 @@ def check_sort(sort_key, expected_thread_indexes) describe "pagination" do let!(:threads) do threads = (1..50).map { |i| make_thread(author, "text", course_id, "dummy") } - refresh_es_index + TaskHelpers::ElasticsearchHelper.refresh_indices threads end @@ -292,7 +292,7 @@ def check_correction(original_text, corrected_text) before(:each) do thread = make_thread(author, "a thread about green artichokes", course_id, commentable_id) make_comment(author, thread, "a comment about greed pineapples") - refresh_es_index + TaskHelpers::ElasticsearchHelper.refresh_indices end it "can correct a word appearing only in a comment" do @@ -334,7 +334,7 @@ def check_correction(original_text, corrected_text) thread.group_id = 1 thread.save! end - refresh_es_index + TaskHelpers::ElasticsearchHelper.refresh_indices get "/api/v1/search/threads", text: "abot", course_id: course_id last_response.should be_ok @@ -358,7 +358,7 @@ def check_correction(original_text, corrected_text) end # Elasticsearch does not necessarily make newly indexed content # available immediately, so we must explicitly refresh the index - refresh_es_index + TaskHelpers::ElasticsearchHelper.refresh_indices test_text = lambda do |text, expected_total_results, expected_num_pages| get '/api/v1/search/threads', course_id: course_id, text: text, per_page: '10' @@ -382,7 +382,7 @@ def test_unicode_data(text) thread = create(:comment_thread, body: "#{search_term} #{text}") create(:comment, comment_thread: thread, body: text) - refresh_es_index + TaskHelpers::ElasticsearchHelper.refresh_indices get '/api/v1/search/threads', course_id: thread.course_id, text: search_term diff --git a/spec/lib/task_helpers_spec.rb b/spec/lib/task_helpers_spec.rb index 46acfde001a..a0342ecda3e 100644 --- a/spec/lib/task_helpers_spec.rb +++ b/spec/lib/task_helpers_spec.rb @@ -3,10 +3,11 @@ describe TaskHelpers do describe TaskHelpers::ElasticsearchHelper do - let(:alias_name) { 'test_alias' } + let(:alias_name) { 'test_alias' } - after(:each) do - TaskHelpers::ElasticsearchHelper.delete_index(alias_name) + before(:each) do + TaskHelpers::ElasticsearchHelper.delete_indices + TaskHelpers::ElasticsearchHelper.rebuild_indices end def assert_alias_points_to_index(alias_name, index_name) @@ -16,11 +17,12 @@ def assert_alias_points_to_index(alias_name, index_name) context("#move_alias") do before(:each) do - @index_name = TaskHelpers::ElasticsearchHelper.create_index() + @index_names = TaskHelpers::ElasticsearchHelper.create_indices + @index_name = @index_names[0] end after(:each) do - TaskHelpers::ElasticsearchHelper.delete_index(@index_name) + Elasticsearch::Model.client.indices.delete(index: @index_names, ignore_unavailable: true) end it "points alias to index" do @@ -36,76 +38,34 @@ def assert_alias_points_to_index(alias_name, index_name) expect { TaskHelpers::ElasticsearchHelper.move_alias(alias_name, 'missing_index') }.to raise_error end - it "fails when index of same name as alias exists" do - TaskHelpers::ElasticsearchHelper.create_index(alias_name) - expect { TaskHelpers::ElasticsearchHelper.move_alias(alias_name, @index_name) }.to raise_error - end - - it "points alias to index when index of same name as alias is deleted" do - TaskHelpers::ElasticsearchHelper.create_index(alias_name) - force_delete = true - TaskHelpers::ElasticsearchHelper.move_alias(alias_name, @index_name, force_delete) - assert_alias_points_to_index(alias_name, @index_name) - end - end - context("#rebuild_index") do + context("#rebuild_indices") do include_context 'search_enabled' - def create_thread_and_delete_index() - thread = create(:comment_thread, body: 'the best test body', course_id: 'test_course_id') - refresh_es_index - TaskHelpers::ElasticsearchHelper.delete_index(Content::ES_INDEX_NAME) + it "builds new index with content" do + create(:comment_thread, body: 'the best test body', course_id: 'test_course_id') + TaskHelpers::ElasticsearchHelper.refresh_indices + Elasticsearch::Model.client.search( + index: TaskHelpers::ElasticsearchHelper::INDEX_NAMES + )['hits']['total']['value'].should be > 0 end - it "builds new index without switching alias" do - create_thread_and_delete_index - - index_name = TaskHelpers::ElasticsearchHelper.rebuild_index() - refresh_es_index(index_name) - - Elasticsearch::Model.client.search(index: index_name)['hits']['total'].should be > 0 - end - - it "builds new index and points alias to it" do - create_thread_and_delete_index - - index_name = TaskHelpers::ElasticsearchHelper.rebuild_index(alias_name) - refresh_es_index(alias_name) - - Elasticsearch::Model.client.search(index: alias_name)['hits']['total'].should be > 0 - end - - it "builds new index and points alias to it, first deleting index with same name as alias" do - create_thread_and_delete_index - TaskHelpers::ElasticsearchHelper.create_index(alias_name) - - index_name = TaskHelpers::ElasticsearchHelper.rebuild_index(alias_name) - refresh_es_index(alias_name) - - Elasticsearch::Model.client.search(index: alias_name)['hits']['total'].should be > 0 - end end - context("#validate_index") do - include_context 'search_enabled' - - subject { TaskHelpers::ElasticsearchHelper.validate_index(Content::ES_INDEX_NAME) } + context("#validate_indices") do + subject { TaskHelpers::ElasticsearchHelper.validate_indices} it "validates the 'content' alias exists with proper mappings" do subject end - it "fails if the alias doesn't exist" do - TaskHelpers::ElasticsearchHelper.delete_index(Content::ES_INDEX_NAME) - expect{subject}.to raise_error(RuntimeError) + it "fails if one of the index doesn't exist" do + Elasticsearch::Model.client.indices.delete(index: TaskHelpers::ElasticsearchHelper::temporary_index_names[0]) + expect{subject}.to raise_error(Elasticsearch::Transport::Transport::Errors::NotFound) + Elasticsearch::Model.client.indices.delete(index: TaskHelpers::ElasticsearchHelper::temporary_index_names[1]) end - it "fails if the alias has the wrong mappings" do - Elasticsearch::Model.client.indices.delete_mapping(index: Content::ES_INDEX_NAME, type: Comment.document_type) - expect{subject}.to raise_error(RuntimeError) - end end end diff --git a/spec/lib/tasks/search_rake_spec.rb b/spec/lib/tasks/search_rake_spec.rb index cf3ab17ac84..7d4e4c9c673 100644 --- a/spec/lib/tasks/search_rake_spec.rb +++ b/spec/lib/tasks/search_rake_spec.rb @@ -1,66 +1,57 @@ require 'spec_helper' require 'elasticsearch' -describe "search:rebuild_index" do +describe "search:rebuild_indices" do include_context "rake" before do - TaskHelpers::ElasticsearchHelper.stub(:rebuild_index) + TaskHelpers::ElasticsearchHelper.stub(:rebuild_indices) end its(:prerequisites) { should include("environment") } - it "calls rebuild_index with defaults" do - TaskHelpers::ElasticsearchHelper.should_receive(:rebuild_index).with(Content::ES_INDEX_NAME, 500, 0, 5) + it "calls rebuild_indices with defaults" do + TaskHelpers::ElasticsearchHelper.should_receive(:rebuild_indices).with(500, 5) subject.invoke end - it "calls rebuild_index with arguments" do + it "calls rebuild_indices with arguments" do # Rake calls receive arguments as strings. - call_move_alias = 'false' batch_size = '100' - sleep_time = '2' extra_catchup_minutes = '10' - TaskHelpers::ElasticsearchHelper.should_receive(:rebuild_index).with( - nil, batch_size.to_i, sleep_time.to_i, extra_catchup_minutes.to_i + TaskHelpers::ElasticsearchHelper.should_receive(:rebuild_indices).with( + batch_size.to_i, extra_catchup_minutes.to_i ) - subject.invoke(call_move_alias, batch_size, sleep_time, extra_catchup_minutes) + subject.invoke(batch_size, extra_catchup_minutes) end end describe "search:catchup" do include_context "rake" + let(:indices) { TaskHelpers::ElasticsearchHelper::INDEX_NAMES } + let(:comments_index_name) { Comment.index_name } + let(:comment_threads_index_name) { CommentThread.index_name } before do - TaskHelpers::ElasticsearchHelper.stub(:catchup_index) + TaskHelpers::ElasticsearchHelper.stub(:catchup_indices) end its(:prerequisites) { should include("environment") } it "calls catchup with defaults" do - TaskHelpers::ElasticsearchHelper.should_receive(:catchup_index).with( - anything, Content::ES_INDEX_NAME, 500, 0 - ) do |start_time_arg| - start_time_arg.should be_within(1.second).of Time.now - end + TaskHelpers::ElasticsearchHelper.should_receive(:catchup_indices).with(indices, anything, 500) - subject.invoke + subject.invoke(comments_index_name, comment_threads_index_name) end it "calls catchup with arguments" do # Rake calls receive arguments as strings. minutes = '2' - index_name = 'some_index' batch_size = '100' - sleep_time = '2' - TaskHelpers::ElasticsearchHelper.should_receive(:catchup_index).with( - anything, index_name, batch_size.to_i, sleep_time.to_i - ) do |start_time_arg| - start_time_arg.should be_within((60 * minutes.to_i + 1).second).of Time.now - end + TaskHelpers::ElasticsearchHelper.should_receive(:catchup_indices).with(indices, anything, batch_size.to_i) - subject.invoke(minutes, index_name, batch_size, sleep_time) + subject.invoke(comments_index_name, comment_threads_index_name, minutes, batch_size) end end diff --git a/spec/lib/unicorn_helpers_spec.rb b/spec/lib/unicorn_helpers_spec.rb index 0d75c41932c..6be742cd429 100644 --- a/spec/lib/unicorn_helpers_spec.rb +++ b/spec/lib/unicorn_helpers_spec.rb @@ -13,7 +13,7 @@ end it "exits when index is invalid" do - TaskHelpers::ElasticsearchHelper.delete_index(Content::ES_INDEX_NAME) + TaskHelpers::ElasticsearchHelper.delete_indices # code 101 is special code recongnized by forum-supervisor.sh lambda{subject}.should exit_with_code(101) end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 62077e125ad..cc23a6db3bc 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -49,7 +49,6 @@ def set_api_key_header RSpec.configure do |config| config.include Rack::Test::Methods - config.treat_symbols_as_metadata_keys_with_true_values = true config.filter_run focus: true config.run_all_when_everything_filtered = true end diff --git a/spec/support/elasticsearch.rb b/spec/support/elasticsearch.rb index 5ad60fb03f0..9ab11755758 100644 --- a/spec/support/elasticsearch.rb +++ b/spec/support/elasticsearch.rb @@ -1,10 +1,5 @@ require 'task_helpers' -def refresh_es_index(index_name=nil) - index_name = index_name ? index_name : Content::ES_INDEX_NAME - TaskHelpers::ElasticsearchHelper.refresh_index(index_name) -end - RSpec.shared_context 'search_enabled' do @@ -13,12 +8,12 @@ def refresh_es_index(index_name=nil) # Delete any previously created index to ensure our search tests start # with a clean slate. Each test will recreate the index. - TaskHelpers::ElasticsearchHelper.delete_index(Content::ES_INDEX_NAME) + TaskHelpers::ElasticsearchHelper.delete_indices end after(:each) do # Delete the index after each test so it will be re-created. - TaskHelpers::ElasticsearchHelper.delete_index(Content::ES_INDEX_NAME) + TaskHelpers::ElasticsearchHelper.delete_indices end after(:all) do @@ -26,7 +21,7 @@ def refresh_es_index(index_name=nil) CommentService.config[:enable_search] = false # Ensure (once more) the index was deleted. - TaskHelpers::ElasticsearchHelper.delete_index(Content::ES_INDEX_NAME) + TaskHelpers::ElasticsearchHelper.delete_indices end end @@ -39,14 +34,11 @@ def refresh_es_index(index_name=nil) config.before(:each) do # Create the index before each test if it doesn't exist. - if not TaskHelpers::ElasticsearchHelper.exists_alias(Content::ES_INDEX_NAME) - test_index = TaskHelpers::ElasticsearchHelper.create_index - TaskHelpers::ElasticsearchHelper.move_alias(Content::ES_INDEX_NAME, test_index) - end + TaskHelpers::ElasticsearchHelper.initialize_indices(true) end - config.after(:suite) do - TaskHelpers::ElasticsearchHelper.delete_index(Content::ES_INDEX_NAME) + config.after(:each) do + Elasticsearch::Model.client.indices.delete(index: "_all", ignore_unavailable: true) end end