Elasticsearch 在 Python 中的应用。通过 Elasticsearch,我们可以轻松实现复杂的全文检索、数据分析等功能。
1. 环境准备
# 安装 elasticsearch 包
pip install elasticsearch
2. 基础连接设置
from elasticsearch import Elasticsearch
# 创建 ES 客户端连接
es = Elasticsearch([{'host': 'localhost', 'port': 9200}])
# 检查连接状态
if es.ping():
print("连接成功!")
else:
print("连接失败!")
3. 创建索引和文档
# 创建索引
index_settings = {
"settings": {
"number_of_shards": 1,
"number_of_replicas": 1
},
"mappings": {
"properties": {
"title": {"type": "text"},
"content": {"type": "text"},
"tags": {"type": "keyword"}
}
}
}
# 创建名为 'blog_posts' 的索引
es.indices.create(index='blog_posts', body=index_settings)
# 添加文档
doc = {
'title': 'Python 编程入门',
'content': '这是一篇关于 Python 基础的博客文章',
'tags': ['python', '编程', '教程']
}
# 插入文档
es.index(index='blog_posts', body=doc)
text 类型会进行分词,适合全文搜索;
keyword 类型不分词,适合精确匹配和聚合分析。
4. 基本搜索操作
# 简单搜索
def simple_search(keyword):
query = {
"query": {
"multi_match": {
"query": keyword,
"fields": ["title", "content"]
}
}
}
result = es.search(index='blog_posts', body=query)
return result['hits']['hits']
# 测试搜索
results = simple_search("Python")
for hit in results:
print(f"得分: {hit['_score']}")
print(f"标题: {hit['_source']['title']}")
5. 高级搜索示例
def advanced_search(keyword, tags=None, min_score=0.5):
query = {
"query": {
"bool": {
"must": [
{
"multi_match": {
"query": keyword,
"fields": ["title^2", "content"] # 标题字段权重加倍
}
}
],
"filter": [] if tags is None else [
{"terms": {"tags": tags}}
]
}
},
"min_score": min_score
}
return es.search(index='blog_posts', body=query)
# 搜索包含特定标签的文档
results = advanced_search("Python", tags=['教程'])
6. 批量操作示例
from elasticsearch.helpers import bulk
def bulk_index_documents(documents):
actions = [
{
"_index": "blog_posts",
"_source": doc
}
for doc in documents
]
success, failed = bulk(es, actions)
print(f"成功索引: {success}条")
print(f"失败数量: {len(failed)}条")
# 批量添加文档
sample_docs = [
{"title": "ES 入门", "content": "ES 基础教程", "tags": ["elasticsearch"]},
{"title": "搜索优化", "content": "提高搜索质量", "tags": ["搜索", "优化"]}
]
bulk_index_documents(sample_docs)
7. 聚合分析示例
def analyze_tags():
query = {
"aggs": {
"popular_tags": {
"terms": {
"field": "tags",
"size": 10
}
}
}
}
result = es.search(index='blog_posts', body=query)
return result['aggregations']['popular_tags']['buckets']
# 获取最热门的标签
popular_tags = analyze_tags()
for tag in popular_tags:
print(f"标签: {tag['key']}, 数量: {tag['doc_count']}")
© 版权声明
博主的文章没有高度、深度和广度,只是凑字数。利用读书、参考、引用、抄袭、复制和粘贴等多种方式打造成自己的纯镀 24k 文章!如若有侵权,请联系博主删除。
喜欢就点个赞吧