Python 搜索引擎 Elasticsearch-py

Elasticsearch 在 Python 中的应用。通过 Elasticsearch,我们可以轻松实现复杂的全文检索、数据分析等功能。

1. 环境准备


# 安装 elasticsearch 包
pip install elasticsearch

2. 基础连接设置


from elasticsearch import Elasticsearch

# 创建 ES 客户端连接
es = Elasticsearch([{'host': 'localhost', 'port': 9200}])

# 检查连接状态
if es.ping():
    print("连接成功!")
else:
    print("连接失败!")

3. 创建索引和文档


# 创建索引
index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 1
    },
    "mappings": {
        "properties": {
            "title": {"type": "text"},
            "content": {"type": "text"},
            "tags": {"type": "keyword"}
        }
    }
}

# 创建名为 'blog_posts' 的索引
es.indices.create(index='blog_posts', body=index_settings)

# 添加文档
doc = {
    'title': 'Python 编程入门',
    'content': '这是一篇关于 Python 基础的博客文章',
    'tags': ['python', '编程', '教程']
}

# 插入文档
es.index(index='blog_posts', body=doc)


text 类型会进行分词,适合全文搜索;
keyword 类型不分词,适合精确匹配和聚合分析。

4. 基本搜索操作


# 简单搜索
def simple_search(keyword):
    query = {
        "query": {
            "multi_match": {
                "query": keyword,
                "fields": ["title", "content"]
            }
        }
    }
    
    result = es.search(index='blog_posts', body=query)
    return result['hits']['hits']

# 测试搜索
results = simple_search("Python")
for hit in results:
    print(f"得分: {hit['_score']}")
    print(f"标题: {hit['_source']['title']}")

5. 高级搜索示例


def advanced_search(keyword, tags=None, min_score=0.5):
    query = {
        "query": {
            "bool": {
                "must": [
                    {
                        "multi_match": {
                            "query": keyword,
                            "fields": ["title^2", "content"]  # 标题字段权重加倍
                        }
                    }
                ],
                "filter": [] if tags is None else [
                    {"terms": {"tags": tags}}
                ]
            }
        },
        "min_score": min_score
    }
    
    return es.search(index='blog_posts', body=query)

# 搜索包含特定标签的文档
results = advanced_search("Python", tags=['教程'])

6. 批量操作示例


from elasticsearch.helpers import bulk

def bulk_index_documents(documents):
    actions = [
        {
            "_index": "blog_posts",
            "_source": doc
        }
        for doc in documents
    ]
    
    success, failed = bulk(es, actions)
    print(f"成功索引: {success}条")
    print(f"失败数量: {len(failed)}条")

# 批量添加文档
sample_docs = [
    {"title": "ES 入门", "content": "ES 基础教程", "tags": ["elasticsearch"]},
    {"title": "搜索优化", "content": "提高搜索质量", "tags": ["搜索", "优化"]}
]

bulk_index_documents(sample_docs)

7. 聚合分析示例


def analyze_tags():
    query = {
        "aggs": {
            "popular_tags": {
                "terms": {
                    "field": "tags",
                    "size": 10
                }
            }
        }
    }
    
    result = es.search(index='blog_posts', body=query)
    return result['aggregations']['popular_tags']['buckets']

# 获取最热门的标签
popular_tags = analyze_tags()
for tag in popular_tags:
    print(f"标签: {tag['key']}, 数量: {tag['doc_count']}")
© 版权声明

☆ END ☆
喜欢就点个赞吧
点赞0 分享
图片正在生成中,请稍后...