Elasticsearch自定义分词

实现单个字符分词(ngram)

  • -u elastic:uates12345 为权限验证,没有设置权限验证的直接去掉即可.

初始化

开始操作之前先确定数据库是否已经创建测试template和index,如有直接删除.

  • 删除template
    1
    curl -XDELETE  -u elastic:uates12345  'http://localhost:9200/_template/trade_test_0'
    
  • 删除index
    1
    curl -XDELETE  -u elastic:uates12345    'http://localhost:9200/trade-test_1'
    

创建template和index

  • 创建template
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    curl -u elastic:uates12345  -H "Content-Type:application/json" -XPUT http://localhost:9200/_template/trade_test_0 -d ' {
    "template": "trade-test_*",
    "order": 0,
    "settings": {
      "analysis": {
        "filter": {
          "sp_no_ngram_filter": {
            "type": "ngram",
            "min_gram": 1,
            "max_gram": 30
          }
        },
        "analyzer": {
          "sp_str_analyzer": {
            "type": "custom",
            "tokenizer": "standard",
            "filter": [
              "sp_no_ngram_filter"
            ]
          }
        }
      }
    },
    "mappings": {
      "type": {
        "_source": {
          "enabled": true
        },
        "properties": {
          "orderType": {
            "type": "integer"
          },
          "smallRegionId": {
            "type": "keyword"
          },
          "serialNumber": {
            "analyzer": "sp_str_analyzer",
            "type": "text"
          }
        }
      }
    }
    }'
    
  • 创建索引
    1
    curl -XPUT   -u elastic:uates12345   'http://localhost:9200/trade-test_1'
    

添加数据

  • 添加数据
    1
    2
    3
    4
    5
    curl -u elastic:uates12345  -H "Content-Type:application/json" -XPOST http://localhost:9200/trade_test_0/type -d' {
      "orderType": "1",
      "smallRegionId": "1213",
      "serialNumber": "我爱我的祖国"
    }'
    

分词测试

1
2
3
4
curl -u elastic:uates12345  -H "Content-Type:application/json" -XPOST http://localhost:9200/trade-test_1/_analyze -d' {
    "analyzer": "sp_str_analyzer",
    "text": "我爱我的祖国"
}'

查询测试

  • match测试
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    curl -u elastic:uates12345  -H "Content-Type:application/json" -XPOST http://localhost:9200/trade_test_0/_search -d' {
      "query": {
          "bool": {
            "must": [
              {
                "match": {
                  "serialNumber": "我的"
                }
              }
            ],
            "must_not": [],
            "should": []
          }
        },
        "from": 0,
        "size": 10,
        "sort": [],
        "aggs": {}
    }'
    
  • match_phrase测试
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    curl -u elastic:uates12345  -H "Content-Type:application/json" -XPOST http://localhost:9200/trade_test_0/_search -d' {
      "query": {
      "bool": {
        "must": [
          {
            "match_phrase": {
              "serialNumber": {
                "query": "爱国",
                "slop":  3
              }
            }
          }
        ],
        "must_not": [],
        "should": []
      }
    },
    "from": 0,
    "size": 10,
    "sort": [],
    "aggs": {}
    }'
    

reference