python elasticsearch_dsl 分頁,聚合,多關鍵字查詢

這裏直接利用代碼講述,代碼中有詳細的註釋

利用的是elasticsearch_dsl.FacetedSearch,多面查詢,重寫特定的方法,達到目的

第一種方法,代碼如下

from django.conf import settings
from elasticsearch_dsl.query import Query
from elasticsearch_dsl import Document, Keyword, Text, FacetedSearch, Short, Q, TermsFacet


class ArticleDocument(Document):
    kind = Short()
    author = Keyword()
    keyword = Keyword(multi=True)
    title = Text(analyzer="ik_smart", search_analyzer="ik_smart", fields={'name': Keyword()})
    context = Text(analyzer="ik_smart", search_analyzer="ik_smart", fields={'name': Keyword()})

    class Index:
        name = 'articles'


class ArticleSearch(FacetedSearch):
    index = 'articles'
    doc_types = ArticleDocument
    fields = ['title', 'keyword', 'context']

    # 聚合設置,根據關鍵字author聚合
    # 關鍵子author,不能是Text類型,否則會觸發異常,狀態碼400,異常信息illegal_argument_exception
    # 可以提供多個聚合關鍵字
    facets = {'author': TermsFacet(field='author')}

    def __init__(self, query=None, filters={}, sort=(), extra_query=None, page=1):
        """

        :param query:
        :param filters:
        :param sort:
        :param extra_query: 額外查詢條件,原始的查詢條件是query包含於fields指定的字段
        :param page: 分頁,指的是第幾頁
        """
        self.page = page
        self.extra_query = extra_query
        super().__init__(query, filters, sort)

    def query(self, search, query):
        if query:
            if self.fields:
                query_ = Q('multi_match', fields=self.fields, query=query)
            else:
                query_ = Q('multi_match', fields=self.fields, query=query)
            if isinstance(self.extra_query, Query):
                query_ = query_ & self.extra_query
            return search.query(query_)
        return search

    def build_search(self):
        s = super().build_search()
        # 返回分頁結果,這裏是10個結果爲一頁,也是系統默認的
        return s[settings.PAGE * (self.page - 1):self.page * settings.PAGE]

使用方法(返回數據結構比較複雜,可自行查看)

from elasticsearch_dsl import Q as Q_ELA

def article_syn_e(pk, kind, source, title, keyword, context):
    """
    同步到elasticsearch服務器, 新建
    """
    keyword = keyword.split(',')
    context = BeautifulSoup(context, 'html.parser').text
    obj = ArticleDocument(_id=pk, kind=kind, source=source, title=title, keyword=keyword, context=context)
    obj.save()


def article_syn_e_del(pk):
    """
    同步到elasticsearch服務器, 刪除
    """
    ArticleDocument.get(id=pk).delete()


# 查詢
obj_search = ArticleSearch(keyword, extra_query=Q_ELA('term', kind=int(kind_word)), page=page)
data = obj_search.execute()

第二種方法

聚合和篩選字段需要一致,某種程度上來說,如果繼承於FacetedSearch類的話,這種寫法,非常合適。搜索會按照fields的字段,執行multi_match查詢;繼而根據facets字段,聚合、篩選(這就涉及到,查詢和過濾的區別,就不多說了)

from django.conf import settings
from elasticsearch_dsl.query import Query
from elasticsearch_dsl import Document, Keyword, Text, FacetedSearch, Short, Q, TermsFacet


class ArticleDocument(Document):
    kind = Short()
    author = Keyword()
    keyword = Keyword(multi=True)
    title = Text(analyzer="ik_smart", search_analyzer="ik_smart", fields={'name': Keyword()})
    context = Text(analyzer="ik_smart", search_analyzer="ik_smart", fields={'name': Keyword()})

    class Index:
        name = 'articles'
        settings = {
            "number_of_shards": settings.PAGE,
        }


class ArticleSearch(FacetedSearch):
    index = 'articles'
    doc_types = ArticleDocument
    fields = ['title', 'keyword', 'context']

    # 聚合設置,根據關鍵字author聚合
    # 關鍵子author,不能是Text類型,否則會觸發異常,狀態碼400,異常信息illegal_argument_exception
    # 還有其他不能聚合的字段類型
    # 可以提供多個聚合關鍵字
    # 如果多個聚合關鍵字,會執行每個聚合的關鍵字,獲取聚合產生的統計結果
    facets = {'author': TermsFacet(field='author')}

    def __init__(self, query=None, filters={}, sort=(), page=1):
        """

        :param query:
        :param filters:與facets中字段對應(可以重寫使得這一限制條件失效),聚合和篩選都會執行
        :param sort:
        :param page: 分頁,指的是第幾頁
        """
        self.page = page
        super().__init__(query, filters, sort)

    def build_search(self):
        s = super().build_search()
        # 返回分頁結果,這裏是10個結果爲一頁,也是系統默認的
        return s[settings.PAGE * (self.page - 1):self.page * settings.PAGE]

使用方法(返回數據結構比較複雜,可自行查看)

obj_search_2 = ArticleSearch('play', {'kind':0})
data1=obj_search_2.execute()
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章