Gentle introduction to basic elasticsearch constructs boosting search: ngrams, shingles, stemmers, suggesters and fuzzy queries.
workshop
and answers
are in workshop directorypillar
p
, i
, l
, l
, a
, r
pi
, il
, ll
, la
, ar
pil
, ill
, lla
, lar
pi
, il
, ll
, la
, ar
pi
, il
, la
, ar
pillar
p
, pi
, pil
, pill
, pilla
, pillar
length > 8
- full text search instead of terms
PUT /index-name
{
"settings": {
"analysis": {
"filter": {
"autocomplete_filter": { // filter that will split tokens into edge ngrams
"type": "edge_ngram",
"min_gram": 2, // smallest ngrams to generate, default: 1
"max_gram": 5 // largest ngrams to generate, default: 2
}
},
"analyzer": {
"autocomplete_analyzer": { // custom analyzer: standard + autocomplete
"type": "custom",
"tokenizer": "standard",
"filter": [ "lowercase", "stop", "autocomplete_filter"]
}
}
}
},
"mappings": {
"properties": {
"description": { // field name with autocomplete feature
"type": "text", // normal mapping
"fields" : { // multi-field mapping
"autocomplete": { // mapping responsible for autocomplete feature
"type": "text",
"analyzer": "autocomplete_analyzer",
"search_analyzer": "standard" // override - by default, queries use the analyzer defined above
},
"english": { // other mappings for the same field
"type": "text",
"analyzer": "english"
}
}
}
}
}
}
GET /index-type/_search
{
"query": {
"bool": {
"should": { // filter, must
"match": { "description.autocomplete": "search for descr" }
}
}
}
}
please divide
, divide this
, this sentence
, sentence into
, and into shingles
index-phrases
option on a text fieldsearch_as_you_type
my_field
my_field._2gram
my_field
with a shingle token filter of shingle size 2my_field._3gram
my_field
with a shingle token filter of shingle size 3my_field._index_prefix
my_field._3gram
with an edge ngram token filtermulti_match
query of type bool_prefix
bool_prefix
- constructs a bool query from the termsplease divide this sen
please
, divide
, this
, sen
sen
-> sentence
PUT /index-type
{
"mappings": {
"properties": {
"description": { "type": "search_as_you_type"}
...
}
}
}
GET /index-type/_search
{
"query": {
"multi_match": {
"query": "...",
"type": "bool_prefix",
"fields": [
"description._2gram",
"description._3gram"
]
}
}
}
-s
and -es
from the end of plural wordsstemmer
- porter stemming algorithm, several languageskstem
- algorithmic stemming + built-in dictionaryporter_stem
- porter stemming algorithm, recommended for Englishsnowball
- Snowball-based stemming rules, several languageshunspell
GET index-search/_search
{
"query": {
"bool": {
"should": [
{ "prefix": { "field-name": "..." } },
{ "fuzzy": { "field-name": { "value": "...", "fuzziness": 2 } } }
]
}
}
}
PUT index-name
{
"mappings": {
"properties": {
"search_associations": {
"type": "completion"
},
...
}
}
}
POST index-name/_doc
{
"search_associations": [ ... ],
}
POST index-name/_search
{
"suggest": {
"suggest-name": {
"prefix": "...",
"completion": {
"field": "search_associations"
}
}
}
}