Elasticsearch：如何实现对 emoji 表情符号进行搜索

2021-03-03 07:00 712人阅读评论(0)

Elasticsearch 是一个应用非常广泛的搜索引擎。它可以对文字进行分词，从而实现全文搜索。在实际的使用中，我们会发现有一些文字中包含一些表情符号，比如笑脸，动物等等，那么我们该如何对这些表情符号来进行搜索呢？


  
   
    
     
    
    
     
      🏻 => 🏻, light skin tone, skin tone, type 
      1–
      2
     
    
   
    
     
    
    
     
      🏼 => 🏼, medium-light skin tone, skin tone, type 
      3
     
    
   
    
     
    
    
     
      🏽 => 🏽, medium skin tone, skin tone, type 
      4
     
    
   
    
     
    
    
     
      🏾 => 🏾, medium-dark skin tone, skin tone, type 
      5
     
    
   
    
     
    
    
     
      🏿 => 🏿, dark skin tone, skin tone, type 
      6
     
    
   
    
     
    
    
     
      ♪ => ♪, eighth, music, note
     
    
   
    
     
    
    
     
      ♭ => ♭, bemolle, flat, music, note
     
    
   
    
     
    
    
     
      ♯ => ♯, dièse, diesis, music, note, sharp
     
    
   
    
     
    
    
     
      😀 => 😀, face, grin, grinning face
     
    
   
    
     
    
    
     
      😃 => 😃, face, grinning face with big eyes, mouth, open, smile
     
    
   
    
     
    
    
     
      😄 => 😄, eye, face, grinning face with smiling eyes, mouth, open, smile
     
    
   
    
     
    
    
     
      😁 => 😁, beaming face with smiling eyes, eye, face, grin, smile
     
    
   
    
     
    
    
     
      😆 => 😆, face, grinning squinting face, laugh, mouth, satisfied, smile
     
    
   
    
     
    
    
     
      😅 => 😅, cold, face, grinning face with sweat, open, smile, sweat
     
    
   
    
     
    
    
     
      🤣 => 🤣, face, floor, laugh, rofl, rolling, rolling on the floor laughing, rotfl
     
    
   
    
     
    
    
     
      😂 => 😂, face, face with tears of joy, joy, laugh, tear
     
    
   
    
     
    
    
     
      🙂 => 🙂, face, slightly smiling face, smile
     
    
   
    
     
    
    
     
      🙃 => 🙃, face, upside-down
     
    
   
    
     
    
    
     
      😉 => 😉, face, wink, winking face
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      🐅 => 🐅, tiger
     
    
   
    
     
    
    
     
      🐆 => 🐆, leopard
     
    
   
    
     
    
    
     
      🐴 => 🐴, face, horse
     
    
   
    
     
    
    
     
      🐎 => 🐎, equestrian, horse, racehorse, racing
     
    
   
    
     
    
    
     
      🦄 => 🦄, face, unicorn
     
    
   
    
     
    
    
     
      🦓 => 🦓, stripe, zebra
     
    
   
    
     
    
    
     
      🦌 => 🦌, deer

在上面，我们可以看到各种各样的 emoji 符号。比如我们想搜索 grin，那么它就把含有 😀 emoji 符号的文档也找出来。在今天的文章中，我们来展示如何实现对 emoji 符号的进行搜索。

安装

如果你还没有对 Elasticsearch 及 Kibana 进行安装的话，请参阅之前的文章 “Elastic：菜鸟上手指南” 进行安装。另外，我们必须安装 ICU analyzer。关于 ICU analyzer 的安装，请参阅之前的文章 “Elasticsearch：ICU 分词器介绍”。我们在 Elasticsearch 的安装根目录中，打入如下的命令：

./bin/elasticsearch-plugin install analysis-icu

等安装好后，我们需要重新启动 Elasticsearch 让它起作用。运行：

./bin/elasticsearch-plugin list

上面的命令显示：


  
   
    
     
    
    
     
      $ ./bin/elasticsearch-plugin 
      install analysis-icu
     
    
   
    
     
    
    
     
      -> Installing analysis-icu
     
    
   
    
     
    
    
     
      -> Downloading analysis-icu 
      from elastic
     
    
   
    
     
    
    
     
      [=================================================] 
      100%   
     
    
   
    
     
    
    
     
      -> Installed analysis-icu
     
    
   
    
     
    
    
     
      $ ./
      bin/elasticsearch-
      plugin 
      list
     
    
   
    
     
    
    
     
      analysis-icu

安装完 ICU analyzer 后，我们必须重新启动 Elasticsearch。

搜索 emoji 符号

我们先做一个简单的实验：


  
   
    
     
    
    
     
      GET /_analyze
     
    
   
    
     
    
    
     
      {
     
    
   
    
     
    
    
       
      "tokenizer": 
      "icu_tokenizer",
     
    
   
    
     
    
    
       
      "text": 
      "I live in 🇨🇳 and I'm 👩‍🚀"
     
    
   
    
     
    
    
     
      }

上面使用 icu_tokenizer 来对 “I live in 🇨🇳 and I'm 👩‍🚀” 进行分词。 👩‍🚀 表情符号非常独特，因为它是更经典的 👩 和 🚀 表情符号的组合。中国的国旗也很特别，它是 🇨 和 🇳 的组合。因此，我们不仅在谈论正确地分割 Unicode 代码点，而且在这里真正地了解了表情符号。

上面的请求的返回结果为：


  
   
    
     
    
    
     
      {
     
    
   
    
     
    
    
       
      "tokens" : [
     
    
   
    
     
    
    
     
          {
     
    
   
    
     
    
    
           
      "token" : 
      "I",
     
    
   
    
     
    
    
           
      "start_offset" : 
      0,
     
    
   
    
     
    
    
           
      "end_offset" : 
      1,
     
    
   
    
     
    
    
           
      "type" : 
      "<ALPHANUM>",
     
    
   
    
     
    
    
           
      "position" : 
      0
     
    
   
    
     
    
    
     
          },
     
    
   
    
     
    
    
     
          {
     
    
   
    
     
    
    
           
      "token" : 
      "live",
     
    
   
    
     
    
    
           
      "start_offset" : 
      2,
     
    
   
    
     
    
    
           
      "end_offset" : 
      6,
     
    
   
    
     
    
    
           
      "type" : 
      "<ALPHANUM>",
     
    
   
    
     
    
    
           
      "position" : 
      1
     
    
   
    
     
    
    
     
          },
     
    
   
    
     
    
    
     
          {
     
    
   
    
     
    
    
           
      "token" : 
      "in",
     
    
   
    
     
    
    
           
      "start_offset" : 
      7,
     
    
   
    
     
    
    
           
      "end_offset" : 
      9,
     
    
   
    
     
    
    
           
      "type" : 
      "<ALPHANUM>",
     
    
   
    
     
    
    
           
      "position" : 
      2
     
    
   
    
     
    
    
     
          },
     
    
   
    
     
    
    
     
          {
     
    
   
    
     
    
    
           
      "token" : 
      ""
      "🇨🇳"
      "",
     
    
   
    
     
    
    
           
      "start_offset" : 
      10,
     
    
   
    
     
    
    
           
      "end_offset" : 
      14,
     
    
   
    
     
    
    
           
      "type" : 
      "<EMOJI>",
     
    
   
    
     
    
    
           
      "position" : 
      3
     
    
   
    
     
    
    
     
          },
     
    
   
    
     
    
    
     
          {
     
    
   
    
     
    
    
           
      "token" : 
      "and",
     
    
   
    
     
    
    
           
      "start_offset" : 
      16,
     
    
   
    
     
    
    
           
      "end_offset" : 
      19,
     
    
   
    
     
    
    
           
      "type" : 
      "<ALPHANUM>",
     
    
   
    
     
    
    
           
      "position" : 
      4
     
    
   
    
     
    
    
     
          },
     
    
   
    
     
    
    
     
          {
     
    
   
    
     
    
    
           
      "token" : 
      "I'm",
     
    
   
    
     
    
    
           
      "start_offset" : 
      20,
     
    
   
    
     
    
    
           
      "end_offset" : 
      23,
     
    
   
    
     
    
    
           
      "type" : 
      "<ALPHANUM>",
     
    
   
    
     
    
    
           
      "position" : 
      5
     
    
   
    
     
    
    
     
          },
     
    
   
    
     
    
    
     
          {
     
    
   
    
     
    
    
           
      "token" : 
      ""
      "👩‍🚀"
      "",
     
    
   
    
     
    
    
           
      "start_offset" : 
      24,
     
    
   
    
     
    
    
           
      "end_offset" : 
      29,
     
    
   
    
     
    
    
           
      "type" : 
      "<EMOJI>",
     
    
   
    
     
    
    
           
      "position" : 
      6
     
    
   
    
     
    
    
     
          }
     
    
   
    
     
    
    
     
        ]
     
    
   
    
     
    
    
     
      }

显然 emoji 的符号被正确地分词，并能被搜索。

在实际的使用中，我们可能并不限限于对这些 emoji 的符号的搜索。比如我们想对如下的文档进行搜索：


  
   
    
     
    
    
     
      PUT emoji-capable
      /_doc/
      1
     
    
   
    
     
    
    
     
      {
     
    
   
    
     
    
    
       
      "content": 
      "I like 🐅"
     
    
   
    
     
    
    
     
      }

上面的文档中含有一个 🐅，也就是老虎。针对上面的文档，我们想搜索 tiger 的时候，也能正确地搜索到文档，那么我们该如何去做呢？

在 github 上面，有一个项目叫做 https://github.com/jolicode/emoji-search/。在它的项目中，有一个目录 https://github.com/jolicode/emoji-search/tree/master/synonyms。这里其实就是同义词的目录。我们现在下载其中的一个文件 https://github.com/jolicode/emoji-search/blob/master/synonyms/cldr-emoji-annotation-synonyms-en.txt 到 Elasticsearch 的本地安装目录：


  
   
    
     
    
    
     
      config
     
    
   
    
     
    
    
     
      ├── analysis
     
    
   
    
     
    
    
     
      │   ├── cldr-emoji-
      annotation-
      synonyms-
      en.
      txt
     
    
   
    
     
    
    
     
      │   └── emoticons.txt
     
    
   
    
     
    
    
     
      ├── elasticsearch.yml
     
    
   
    
     
    
    
     
      ...

在我的电脑上：


  
   
    
     
    
    
     
      $ pwd
     
    
   
    
     
    
    
     
      /Users/liuxg
      /elastic1/elasticsearch-
      7.11.
      0/config
     
    
   
    
     
    
    
     
      $ tree -L 
      3
     
    
   
    
     
    
    
     
      .
     
    
   
    
     
    
    
     
      ├── analysis
     
    
   
    
     
    
    
     
      │   └── cldr-emoji-
      annotation-
      synonyms-
      en.
      txt
     
    
   
    
     
    
    
     
      ├── elasticsearch.keystore
     
    
   
    
     
    
    
     
      ├── elasticsearch.yml
     
    
   
    
     
    
    
     
      ├── jvm.options
     
    
   
    
     
    
    
     
      ├── jvm.options.d
     
    
   
    
     
    
    
     
      ├── log4j2.properties
     
    
   
    
     
    
    
     
      ├── role_mapping.yml
     
    
   
    
     
    
    
     
      ├── roles.yml
     
    
   
    
     
    
    
     
      ├── users
     
    
   
    
     
    
    
     
      └── users_roles

在上面的 cldr-emoji-annotation-synonyms-en.txt 的文件中，它包含了常见 emoji 的符号的同义词。比如：


  
   
    
     
    
    
     
      😀 => 😀, face, grin, grinning face
     
    
   
    
     
    
    
     
      😃 => 😃, face, grinning face with big eyes, mouth, open, smile
     
    
   
    
     
    
    
     
      😄 => 😄, eye, face, grinning face with smiling eyes, mouth, open, smile
     
    
   
    
     
    
    
     
      😁 => 😁, beaming face with smiling eyes, eye, face, grin, smile
     
    
   
    
     
    
    
     
      😆 => 😆, face, grinning squinting face, laugh, mouth, satisfied, smile
     
    
   
    
     
    
    
     
      😅 => 😅, cold, face, grinning face with sweat, open, smile, sweat
     
    
   
    
     
    
    
     
      ....

为此，我们来进行如下的实验：


  
   
    
     
    
    
     
      PUT /emoji-
      capable
     
    
   
    
     
    
    
     
      {
     
    
   
    
     
    
    
       
      "settings": {
     
    
   
    
     
    
    
         
      "analysis": {
     
    
   
    
     
    
    
           
      "filter": {
     
    
   
    
     
    
    
             
      "english_emoji": {
     
    
   
    
     
    
    
               
      "type": 
      "synonym",
     
    
   
    
     
    
    
               
      "synonyms_path": 
      "analysis/cldr-emoji-annotation-synonyms-en.txt" 
     
    
   
    
     
    
    
     
              }
     
    
   
    
     
    
    
     
            },
     
    
   
    
     
    
    
           
      "analyzer": {
     
    
   
    
     
    
    
             
      "english_with_emoji": {
     
    
   
    
     
    
    
               
      "tokenizer": 
      "icu_tokenizer",
     
    
   
    
     
    
    
               
      "filter": [
     
    
   
    
     
    
    
                 
      "english_emoji"
     
    
   
    
     
    
    
     
                ]
     
    
   
    
     
    
    
     
              }
     
    
   
    
     
    
    
     
            }
     
    
   
    
     
    
    
     
          }
     
    
   
    
     
    
    
     
        },
     
    
   
    
     
    
    
       
      "mappings": {
     
    
   
    
     
    
    
         
      "properties": {
     
    
   
    
     
    
    
           
      "content": {
     
    
   
    
     
    
    
             
      "type": 
      "text",
     
    
   
    
     
    
    
             
      "analyzer": 
      "english_with_emoji"
     
    
   
    
     
    
    
     
            }
     
    
   
    
     
    
    
     
          }
     
    
   
    
     
    
    
     
        }
     
    
   
    
     
    
    
     
      }

在上面，我们定义了 english_with_emoji 分词器，同时我们在定义 content 字段时也使用相同的分词器 english_with_emoji。我们使用 _analyze API 来进行如下的使用：


  
   
    
     
    
    
     
      GET emoji-capable/_analyze
     
    
   
    
     
    
    
     
      {
     
    
   
    
     
    
    
       
      "analyzer": 
      "english_with_emoji",
     
    
   
    
     
    
    
       
      "text": 
      "I like 🐅"
     
    
   
    
     
    
    
     
      }

上面的命令返回：


  
   
    
     
    
    
     
      {
     
    
   
    
     
    
    
       
      "tokens" : [
     
    
   
    
     
    
    
     
          {
     
    
   
    
     
    
    
           
      "token" : 
      "I",
     
    
   
    
     
    
    
           
      "start_offset" : 
      0,
     
    
   
    
     
    
    
           
      "end_offset" : 
      1,
     
    
   
    
     
    
    
           
      "type" : 
      "<ALPHANUM>",
     
    
   
    
     
    
    
           
      "position" : 
      0
     
    
   
    
     
    
    
     
          },
     
    
   
    
     
    
    
     
          {
     
    
   
    
     
    
    
           
      "token" : 
      "like",
     
    
   
    
     
    
    
           
      "start_offset" : 
      2,
     
    
   
    
     
    
    
           
      "end_offset" : 
      6,
     
    
   
    
     
    
    
           
      "type" : 
      "<ALPHANUM>",
     
    
   
    
     
    
    
           
      "position" : 
      1
     
    
   
    
     
    
    
     
          },
     
    
   
    
     
    
    
     
          {
     
    
   
    
     
    
    
           
      "token" : 
      ""
      "🐅"
      "",
     
    
   
    
     
    
    
           
      "start_offset" : 
      7,
     
    
   
    
     
    
    
           
      "end_offset" : 
      9,
     
    
   
    
     
    
    
           
      "type" : 
      "SYNONYM",
     
    
   
    
     
    
    
           
      "position" : 
      2
     
    
   
    
     
    
    
     
          },
     
    
   
    
     
    
    
     
          {
     
    
   
    
     
    
    
           
      "token" : 
      "tiger",
     
    
   
    
     
    
    
           
      "start_offset" : 
      7,
     
    
   
    
     
    
    
           
      "end_offset" : 
      9,
     
    
   
    
     
    
    
           
      "type" : 
      "SYNONYM",
     
    
   
    
     
    
    
           
      "position" : 
      2
     
    
   
    
     
    
    
     
          }
     
    
   
    
     
    
    
     
        ]
     
    
   
    
     
    
    
     
      }

显然它除了返回 🐅，也同时返回了 tiger 这样的 token。也就是说我们可以同时搜索这两种，都可以搜索到这个文档。同样地：


  
   
    
     
    
    
     
      GET emoji-capable/_analyze
     
    
   
    
     
    
    
     
      {
     
    
   
    
     
    
    
       
      "analyzer": 
      "english_with_emoji",
     
    
   
    
     
    
    
       
      "text": 
      "😀 means happy"
     
    
   
    
     
    
    
     
      }

它返回：


  
   
    
     
    
    
     
      {
     
    
   
    
     
    
    
       
      "tokens" : [
     
    
   
    
     
    
    
     
          {
     
    
   
    
     
    
    
           
      "token" : 
      ""
      "😀"
      "",
     
    
   
    
     
    
    
           
      "start_offset" : 
      0,
     
    
   
    
     
    
    
           
      "end_offset" : 
      2,
     
    
   
    
     
    
    
           
      "type" : 
      "SYNONYM",
     
    
   
    
     
    
    
           
      "position" : 
      0
     
    
   
    
     
    
    
     
          },
     
    
   
    
     
    
    
     
          {
     
    
   
    
     
    
    
           
      "token" : 
      "face",
     
    
   
    
     
    
    
           
      "start_offset" : 
      0,
     
    
   
    
     
    
    
           
      "end_offset" : 
      2,
     
    
   
    
     
    
    
           
      "type" : 
      "SYNONYM",
     
    
   
    
     
    
    
           
      "position" : 
      0
     
    
   
    
     
    
    
     
          },
     
    
   
    
     
    
    
     
          {
     
    
   
    
     
    
    
           
      "token" : 
      "grin",
     
    
   
    
     
    
    
           
      "start_offset" : 
      0,
     
    
   
    
     
    
    
           
      "end_offset" : 
      2,
     
    
   
    
     
    
    
           
      "type" : 
      "SYNONYM",
     
    
   
    
     
    
    
           
      "position" : 
      0
     
    
   
    
     
    
    
     
          },
     
    
   
    
     
    
    
     
          {
     
    
   
    
     
    
    
           
      "token" : 
      "grinning",
     
    
   
    
     
    
    
           
      "start_offset" : 
      0,
     
    
   
    
     
    
    
           
      "end_offset" : 
      2,
     
    
   
    
     
    
    
           
      "type" : 
      "SYNONYM",
     
    
   
    
     
    
    
           
      "position" : 
      0
     
    
   
    
     
    
    
     
          },
     
    
   
    
     
    
    
     
          {
     
    
   
    
     
    
    
           
      "token" : 
      "means",
     
    
   
    
     
    
    
           
      "start_offset" : 
      3,
     
    
   
    
     
    
    
           
      "end_offset" : 
      8,
     
    
   
    
     
    
    
           
      "type" : 
      "<ALPHANUM>",
     
    
   
    
     
    
    
           
      "position" : 
      1
     
    
   
    
     
    
    
     
          },
     
    
   
    
     
    
    
     
          {
     
    
   
    
     
    
    
           
      "token" : 
      "face",
     
    
   
    
     
    
    
           
      "start_offset" : 
      3,
     
    
   
    
     
    
    
           
      "end_offset" : 
      8,
     
    
   
    
     
    
    
           
      "type" : 
      "SYNONYM",
     
    
   
    
     
    
    
           
      "position" : 
      1
     
    
   
    
     
    
    
     
          },
     
    
   
    
     
    
    
     
          {
     
    
   
    
     
    
    
           
      "token" : 
      "happy",
     
    
   
    
     
    
    
           
      "start_offset" : 
      9,
     
    
   
    
     
    
    
           
      "end_offset" : 
      14,
     
    
   
    
     
    
    
           
      "type" : 
      "<ALPHANUM>",
     
    
   
    
     
    
    
           
      "position" : 
      2
     
    
   
    
     
    
    
     
          }
     
    
   
    
     
    
    
     
        ]
     
    
   
    
     
    
    
     
      }

它表明，如果我们搜索 face, grinning，grin，该文档也会被正确地返回。

现在，我们输入如下的两个文档：


  
   
    
     
    
    
     
      PUT emoji-capable
      /_doc/
      1
     
    
   
    
     
    
    
     
      {
     
    
   
    
     
    
    
       
      "content": 
      "I like 🐅"
     
    
   
    
     
    
    
     
      }
     
    
   
    
     
    
    
      
     
    
   
    
     
    
    
     
      PUT emoji-capable
      /_doc/
      2
     
    
   
    
     
    
    
     
      {
     
    
   
    
     
    
    
       
      "content": 
      "😀 means happy"
     
    
   
    
     
    
    
     
      }

我们对文档进行如下的搜索：


  
   
    
     
    
    
     
      GET emoji-capable/_search
     
    
   
    
     
    
    
     
      {
     
    
   
    
     
    
    
       
      "query": {
     
    
   
    
     
    
    
         
      "match": {
     
    
   
    
     
    
    
           
      "content": 
      "🐅"
     
    
   
    
     
    
    
     
          }
     
    
   
    
     
    
    
     
        }
     
    
   
    
     
    
    
     
      }

或：


  
   
    
     
    
    
     
      GET emoji-capable/_search
     
    
   
    
     
    
    
     
      {
     
    
   
    
     
    
    
       
      "query": {
     
    
   
    
     
    
    
         
      "match": {
     
    
   
    
     
    
    
           
      "content": 
      "tiger"
     
    
   
    
     
    
    
     
          }
     
    
   
    
     
    
    
     
        }
     
    
   
    
     
    
    
     
      }

他们都将返回第一个文档：


  
   
    
     
    
    
     
      {
     
    
   
    
     
    
    
       
      "took" : 
      2,
     
    
   
    
     
    
    
       
      "timed_out" : 
      false,
     
    
   
    
     
    
    
       
      "_shards" : {
     
    
   
    
     
    
    
         
      "total" : 
      1,
     
    
   
    
     
    
    
         
      "successful" : 
      1,
     
    
   
    
     
    
    
         
      "skipped" : 
      0,
     
    
   
    
     
    
    
         
      "failed" : 
      0
     
    
   
    
     
    
    
     
        },
     
    
   
    
     
    
    
       
      "hits" : {
     
    
   
    
     
    
    
         
      "total" : {
     
    
   
    
     
    
    
           
      "value" : 
      1,
     
    
   
    
     
    
    
           
      "relation" : 
      "eq"
     
    
   
    
     
    
    
     
          },
     
    
   
    
     
    
    
         
      "max_score" : 
      0.8514803,
     
    
   
    
     
    
    
         
      "hits" : [
     
    
   
    
     
    
    
     
            {
     
    
   
    
     
    
    
             
      "_index" : 
      "emoji-capable",
     
    
   
    
     
    
    
             
      "_type" : 
      "_doc",
     
    
   
    
     
    
    
             
      "_id" : 
      "1",
     
    
   
    
     
    
    
             
      "_score" : 
      0.8514803,
     
    
   
    
     
    
    
             
      "_source" : {
     
    
   
    
     
    
    
               
      "content" : 
      ""
      "I like 🐅"
      ""
     
    
   
    
     
    
    
     
              }
     
    
   
    
     
    
    
     
            }
     
    
   
    
     
    
    
     
          ]
     
    
   
    
     
    
    
     
        }
     
    
   
    
     
    
    
     
      }

通用地，我们进行如下的搜索：


  
   
    
     
    
    
     
      GET emoji-capable/_search
     
    
   
    
     
    
    
     
      {
     
    
   
    
     
    
    
       
      "query": {
     
    
   
    
     
    
    
         
      "match": {
     
    
   
    
     
    
    
           
      "content": 
      "😀"
     
    
   
    
     
    
    
     
          }
     
    
   
    
     
    
    
     
        }
     
    
   
    
     
    
    
     
      }

或者：


  
   
    
     
    
    
     
      GET emoji-capable/_search
     
    
   
    
     
    
    
     
      {
     
    
   
    
     
    
    
       
      "query": {
     
    
   
    
     
    
    
         
      "match": {
     
    
   
    
     
    
    
           
      "content": 
      "grin"
     
    
   
    
     
    
    
     
          }
     
    
   
    
     
    
    
     
        }
     
    
   
    
     
    
    
     
      }

它们都将返回第二个文档：


  
   
    
     
    
    
     
      {
     
    
   
    
     
    
    
       
      "took" : 
      1,
     
    
   
    
     
    
    
       
      "timed_out" : 
      false,
     
    
   
    
     
    
    
       
      "_shards" : {
     
    
   
    
     
    
    
         
      "total" : 
      1,
     
    
   
    
     
    
    
         
      "successful" : 
      1,
     
    
   
    
     
    
    
         
      "skipped" : 
      0,
     
    
   
    
     
    
    
         
      "failed" : 
      0
     
    
   
    
     
    
    
     
        },
     
    
   
    
     
    
    
       
      "hits" : {
     
    
   
    
     
    
    
         
      "total" : {
     
    
   
    
     
    
    
           
      "value" : 
      1,
     
    
   
    
     
    
    
           
      "relation" : 
      "eq"
     
    
   
    
     
    
    
     
          },
     
    
   
    
     
    
    
         
      "max_score" : 
      0.8514803,
     
    
   
    
     
    
    
         
      "hits" : [
     
    
   
    
     
    
    
     
            {
     
    
   
    
     
    
    
             
      "_index" : 
      "emoji-capable",
     
    
   
    
     
    
    
             
      "_type" : 
      "_doc",
     
    
   
    
     
    
    
             
      "_id" : 
      "2",
     
    
   
    
     
    
    
             
      "_score" : 
      0.8514803,
     
    
   
    
     
    
    
             
      "_source" : {
     
    
   
    
     
    
    
               
      "content" : 
      ""
      "😀 means happy"
      ""
     
    
   
    
     
    
    
     
              }
     
    
   
    
     
    
    
     
            }
     
    
   
    
     
    
    
     
          ]
     
    
   
    
     
    
    
     
        }
     
    
   
    
     
    
    
     
      }

转载：https://blog.csdn.net/UbuntuTouch/article/details/114261636

查看评论

飞道的博客

飞道的博客

个人资料

文章分类

文章存档

阅读排行

评论排行

推荐文章

Elasticsearch：如何实现对 emoji 表情符号进行搜索

安装

搜索 emoji 符号

* 以上用户言论只代表其个人观点，不代表本网站的观点或立场