Skip to content

Commit

Permalink
Merge pull request #327 from raccoongang/golub-sergey/OeX_ElasticSear…
Browse files Browse the repository at this point in the history
…ch/feature/transition-from-ES1.5-to-ES7

[BD-19] Transition to the new Elasticsearch libs version for cs_comments_service
  • Loading branch information
dianakhuang committed Oct 14, 2020
2 parents 609eef0 + af1dbf1 commit 573567e
Show file tree
Hide file tree
Showing 21 changed files with 361 additions and 374 deletions.
13 changes: 12 additions & 1 deletion .travis/docker-compose-travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,15 @@ version: "2"

services:
elasticsearch:
image: elasticsearch:1.5.2
image: elasticsearch:7.8.0
container_name: "es.edx"
environment:
- discovery.type=single-node
- bootstrap.memory_lock=true
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
volumes:
- data01:/usr/share/elasticsearch/data
- ./elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml
mongo:
image: mongo:3.2.21
container_name: "mongo.edx"
Expand All @@ -15,6 +22,7 @@ services:
- ..:/edx/app/forum/cs_comments_service
environment:
MONGOID_AUTH_MECH: ""
SEARCH_SERVER_ES7: "http://elasticsearch:9200"
forum:
extends: forum-base
command: tail -f /dev/null
Expand All @@ -27,3 +35,6 @@ services:
depends_on:
- "elasticsearch"
- "mongo"

volumes:
data01:
4 changes: 4 additions & 0 deletions .travis/elasticsearch.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
network.host: 0.0.0.0
cluster.routing.allocation.disk.watermark.low: 150mb
cluster.routing.allocation.disk.watermark.high: 100mb
cluster.routing.allocation.disk.watermark.flood_stage: 50mb
4 changes: 2 additions & 2 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ gem 'will_paginate_mongoid', "~>2.0"
gem 'rdiscount'
gem 'nokogiri', "~>1.8.1"

gem 'elasticsearch', '~> 1.1.2'
gem 'elasticsearch-model', '~> 0.1.9'
gem 'elasticsearch', '~> 7.8.0'
gem 'elasticsearch-model', '~> 7.1.0'

gem 'dalli'

Expand Down
28 changes: 14 additions & 14 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -50,25 +50,25 @@ GEM
docile (1.3.2)
domain_name (0.5.20170404)
unf (>= 0.0.5, < 1.0.0)
elasticsearch (1.1.2)
elasticsearch-api (= 1.1.2)
elasticsearch-transport (= 1.1.2)
elasticsearch-api (1.1.2)
elasticsearch (7.8.0)
elasticsearch-api (= 7.8.0)
elasticsearch-transport (= 7.8.0)
elasticsearch-api (7.8.0)
multi_json
elasticsearch-model (0.1.9)
elasticsearch-model (7.1.0)
activesupport (> 3)
elasticsearch (> 0.4)
elasticsearch (> 1)
hashie
elasticsearch-transport (1.1.2)
faraday
elasticsearch-transport (7.8.0)
faraday (~> 1)
multi_json
enumerize (2.1.2)
activesupport (>= 3.2)
factory_girl (4.8.0)
activesupport (>= 3.0.0)
faker (1.7.3)
i18n (~> 0.5)
faraday (0.12.1)
faraday (1.0.1)
multipart-post (>= 1.2, < 3)
ffi (1.9.18)
formatador (0.2.5)
Expand All @@ -84,7 +84,7 @@ GEM
guard-unicorn (0.2.0)
guard (>= 1.1)
hashdiff (0.3.4)
hashie (3.5.5)
hashie (4.1.0)
http-cookie (1.0.3)
domain_name (~> 0.5)
i18n (0.9.5)
Expand Down Expand Up @@ -117,8 +117,8 @@ GEM
mongoid_magic_counter_cache (1.1.1)
mongoid
rake
multi_json (1.12.1)
multipart-post (2.0.0)
multi_json (1.15.0)
multipart-post (2.1.1)
nenv (0.3.0)
netrc (0.11.0)
newrelic_rpm (5.6.0.349)
Expand Down Expand Up @@ -221,8 +221,8 @@ DEPENDENCIES
dalli
delayed_job
delayed_job_mongoid
elasticsearch (~> 1.1.2)
elasticsearch-model (~> 0.1.9)
elasticsearch (~> 7.8.0)
elasticsearch-model (~> 7.1.0)
enumerize
factory_girl (~> 4.0)
faker (~> 1.6)
Expand Down
31 changes: 12 additions & 19 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,45 +35,38 @@ Install the requisite gems:
$ bundle install
To initialize the index:
To initialize indices:

Setup the search index. Note that the command below creates an alias with a unique name (e.g.
content_20161220185820323), and assigns it a known alias: content. If you choose not to use the command below, you
should still opt to reference your index by an alias rather than the actual index name. This will enable you to swap out
indices (e.g. rebuild_index) without having to take downtime or modify code with a new index name.
Setup search indices. Note that the command below creates `comments_20161220185820323` and
`comment_threads_20161220185820323` indices and assigns `comments` and `comment_threads` aliases. This will enable you
to swap out indices (e.g. rebuild_index) without having to take downtime or modify code with a new index name.

.. code-block:: bash
$ bin/rake search:initialize
To validate the 'content' alias exists and contains the proper mappings:
To validate indices exist and contain the proper mappings:

.. code-block:: bash
$ bin/rake search:validate_index
$ bin/rake search:validate_indices
To rebuild the index:
To rebuild indices:

To rebuild a new index from the database and then point the alias 'content' to it, you can use the
rebuild_index task. This task will also run catchup before and after the alias is moved, to minimize time where the
alias does not contain all documents.
To rebuild new indices from the database and then point the aliases `comments` and `comment_threads` to each index
which has equivalent index prefix, you can use the rebuild_indices task. This task will also run catch up before
and after aliases are moved, to minimize time where aliases do not contain all documents.

.. code-block:: bash
$ bin/rake search:rebuild_index
To rebuild a new index without moving the alias and without running catchup, use the following:

.. code-block:: bash
$ bin/rake search:rebuild_index[false]
$ bin/rake search:rebuild_indices
You can also adjust the batch size (e.g. 200) and the sleep time (e.g. 2 seconds) between batches to lighten the load
on MongoDB.

.. code-block:: bash
$ bin/rake search:rebuild_index[true,200,2]
$ bin/rake search:rebuild_indices[200,2]
Run the server:

Expand Down
124 changes: 68 additions & 56 deletions api/search.rb
Original file line number Diff line number Diff line change
@@ -1,82 +1,94 @@
def get_thread_ids(context, group_ids, local_params, search_text)
filters = []
filters.push({term: {commentable_id: local_params['commentable_id']}}) if local_params['commentable_id']
filters.push({terms: {commentable_id: local_params['commentable_ids'].split(',')}}) if local_params['commentable_ids']
filters.push({term: {course_id: local_params['course_id']}}) if local_params['course_id']
must = []
filter = []
must.push({term: {commentable_id: local_params['commentable_id']}}) if local_params['commentable_id']
must.push({terms: {commentable_id: local_params['commentable_ids'].split(',')}}) if local_params['commentable_ids']
must.push({term: {course_id: local_params['course_id']}}) if local_params['course_id']
must.push(
{
multi_match: {
query: search_text,
fields: [:title, :body],
operator: :AND
}
}
)
group_id = local_params['group_id']

if group_id
filter.push(
{:bool => {:must_not => {:exists => {:field => :group_id}}}},
{:term => {:group_id => group_id}}
)
end

filters.push({or: [
{not: {exists: {field: :context}}},
{term: {context: context}}
]})
filter.push(
{:bool => {:must_not => {:exists => {:field => :context}}}},
{:term => {:context => context}}
)

unless group_ids.empty?
filters.push(
{
bool: {
should: [
{:not => {:exists => {:field => :group_id}}},
{:terms => {:group_id => group_ids}}
]
}
}
filter.push(
{:bool => {:must_not => {:exists => {:field => :group_id}}}},
{:terms => {:group_id => group_ids}}
)
end

body = {
size: CommentService.config['max_deep_search_comment_count'].to_i,
sort: [
{updated_at: :desc}
],
query: {
filtered: {
query: {
multi_match: {
query: search_text,
fields: [:title, :body],
operator: :AND
}
},
filter: {
bool: {
must: filters
}
}
}
size: CommentService.config['max_deep_search_comment_count'].to_i,
sort: [
{updated_at: :desc}
],
query: {
bool: {
must: must,
should: filter
}
}
}

response = Elasticsearch::Model.client.search(index: Content::ES_INDEX_NAME, body: body)
response = Elasticsearch::Model.client.search(index: TaskHelpers::ElasticsearchHelper::INDEX_NAMES, body: body)

thread_ids = Set.new
response['hits']['hits'].each do |hit|
case hit['_type']
when CommentThread.document_type
thread_ids.add(hit['_id'])
when Comment.document_type
thread_ids.add(hit['_source']['comment_thread_id'])
else
# There shouldn't be any other document types. Nevertheless, ignore them, if they are present.
next
if hit['_index'].include? CommentThread.index_name
thread_ids.add(hit['_id'])
elsif hit['_index'].include? Comment.index_name
thread_ids.add(hit['_source']['comment_thread_id'])
else
# There shouldn't be any other indices. Nevertheless, ignore them, if they are present.
next
end
end
thread_ids
end

def get_suggested_text(search_text)
body = {
suggestions: {
text: search_text,
phrase: {
field: :_all
}
suggest: {
body_suggestions: {
text: search_text,
phrase: {
field: :body
}
},
title_suggestions: {
text: search_text,
phrase: {
field: :title
}
}
}
}
response = Elasticsearch::Model.client.suggest(index: Content::ES_INDEX_NAME, body: body)
suggestions = response.fetch('suggestions', [])
if suggestions.length > 0
options = suggestions[0]['options']
if options.length > 0
return options[0]['text']

response = Elasticsearch::Model.client.search(index: TaskHelpers::ElasticsearchHelper::INDEX_NAMES, body: body)
body_suggestions = response['suggest'].fetch('body_suggestions', [])
title_suggestions = response['suggest'].fetch('title_suggestions', [])

[body_suggestions, title_suggestions].each do |suggestion|
if suggestion.length > 0
options = suggestion[0]['options']
return options[0]['text'] if options.length > 0
end
end

Expand Down
2 changes: 1 addition & 1 deletion config/application.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
level_limit: 3
api_key: <%= ENV['API_KEY'] || 'PUT_YOUR_API_KEY_HERE' %>
elasticsearch_server: <%= ENV['SEARCH_SERVER'] || 'http://localhost:9200' %>
elasticsearch_server: <%= ENV['SEARCH_SERVER_ES7'] || 'http://localhost:9200' %>
max_deep_search_comment_count: 5000
enable_search: true
default_locale: <%= ENV['SERVICE_LANGUAGE'] || 'en-US' %>
Expand Down
Loading

0 comments on commit 573567e

Please sign in to comment.