A detailed comparison between autocompletion strategies in ElasticSearch

Mourjo Sen
Aug 30, 2018 · 25 min read

Search != Autocompletion

Understanding our Use-Case

Our use-case: Autocompletions that assist users to build a search query
status:is:new AND (platform:is:ios OR dt:is_before:1–1–2016) AND email:is:harry@helpshift.com
{
"helpshift_idx": {
"mappings": {
"issue": {
"properties": {
"app": {
"index": "not_analyzed",
"type": "string"
},
"email": {
"index": "not_analyzed",
"type": "string"
},
"author": {
"analyzer": "lowercase_and_split_words",
"type": "string"
},
"dt": {
"format": "dd-MM-yyyy",
"include_in_all": false,
"store": true,
"type": "date"
},
"description": {
"analyzer": "lowercase_and_split_words",
"type": "string"
},
"status": {
"index": "not_analyzed",
"type": "string"
},
"tags": {
"analyzer": "lowercase",
"store": true,
"type": "string"
},
"title": {
"analyzer": "lowercase_and_split_words",
"store": true,
"type": "string"
}
...
...
}
}
}
}
}

Ways to provide autocompletions

Approach #1: Prefix Query + Aggregations

{
"aggregations": {
"autocomplete": {
"aggs": {
"autocomplete": {
"terms": {
"field": "app",
"size": 25
}
}
}
}
},
"query": {
"match_phrase_prefix": {
"app": {
"max_expansions": 25,
"query": "che"
}
}
}
}

Advantages of using prefix-like queries

Drawbacks of using prefix-like queries

Approach #2: NGram + Aggregations

h
he
hel
hell
hello

The NGram mapping

{
"doc_values": true,
"fields": {
"autocomplete": {
"include_in_all": false,
"index_analyzer": "edge_ngram",
"search_analyzer": "standard",
"type": "string"
}
},
"index": "not_analyzed",
"type": "string"
}
{
"analysis": {
"analyzer": {
"edge_ngram": {
"filter": [
"lowercase",
"edge_ngram_filter"
],
"tokenizer": "keyword",
"type": "custom"
}
},
"filter": {
"edge_ngram_filter": {
"max_gram": "15",
"min_gram": "1",
"side": "front",
"type": "edgeNGram"
}
}
}
}

A word about multi-fields (renamed to fields)

Querying the NGram analyzed field

GET /helpshift_idx/issue/_search?search_type=count&query_cache=true
{
"aggregations": {
"fld-suggestions": {
"terms": {
"field": "app",
"size": 25
}
}
},
"query": {
"match": {
"app.autocomplete": "hell"
}
}
}

Advantages of using NGrams for autocompletions

Drawbacks of using NGrams for autocompletions

Approach #3: Completion Suggester

The Mapping using Completion Suggester

PUT music
{
"mappings": {
"song": {
"properties": {
"genre": {
"analyzer": "standard",
"type": "string"
},
"name": {
"analyzer": "standard",
"fields": {
"autocomplete": {
"analyzer": "stopword_analyzer",
"context": {
"genre": {
"default": [
"unknown"
],
"path": "genre",
"type": "category"
}
},
"max_input_length": 10,
"payloads": false,
"preserve_position_increments": false,
"preserve_separators": false,
"type": "completion"
}
},
"type": "string"
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"stopword_analyzer": {
"stopwords": [
"and",
"the"
],
"type": "standard"
}
}
}
}
}

Understanding the Mapping

Indexing and Querying the Completion Suggester

PUT /music/song/1
{
"name": "Petit Papa Noel",
"genre": "Carols"
}
PUT /music/song/2
{
"name": "Little Drummer Boy",
"genre": "Traditional"
}
PUT /music/song/3?refresh=true
{
"name": "With or Without You",
"genre": "Rock"
}
POST /music/_suggest?pretty
{
"suggestion": {
"completion": {
"context": {
"genre": "Carols"
},
"field": "name.autocomplete",
"size": "100"
},
"text": "Pe"
}
}
{
"_shards": {
"failed": 0,
"successful": 1,
"total": 1
},
"suggestion": [
{
"length": 2,
"offset": 0,
"options": [
{
"score": 1,
"text": "Petit Papa Noel"
}
],
"text": "Pe"
}
]
}

Advantages of using the Completion Suggester

Drawbacks of using the Completion Suggester

Approach #4: Using a Separate Index

Conclusion

Welcome to a place where words matter. On Medium, smart voices and original ideas take center stage - with no ads in sight. Watch
Follow all the topics you care about, and we’ll deliver the best stories for you to your homepage and inbox. Explore
Get unlimited access to the best stories on Medium — and support writers while you’re at it. Just $5/month. Upgrade