多关键词检索

2025-09-15 12:58:34 +08:00 · 2022-06-28 16:49:44 +08:00 · 2022-06-28 16:49:44 +08:00 · f17a0ad45f
commit f17a0ad45f
parent 1df8f3da7c
5 changed files with 180 additions and 45 deletions
--- a/glc/ldb/engine.go
+++ b/glc/ldb/engine.go
@ -38,7 +38,7 @@ func (e *Engine) AddTextLog(logText string) {
 	e.logStorage.AddTextLog(logText)
 }

-func (e *Engine) Search(searchKey string, pageSize int, currentId uint64, forward bool) *search.SearchResult {
+func (e *Engine) Search(searchKey string, pageSize int, currentDocId uint64, forward bool) *search.SearchResult {

 	// 检查修正pageSize
 	if pageSize < 1 {
@ -59,11 +59,15 @@ func (e *Engine) Search(searchKey string, pageSize int, currentId uint64, forwar
 		return new(search.SearchResult)
 	}

-	// 无条件浏览模式
 	if len(kws) == 0 {
-		return search.Search(e.storeName, "", pageSize, currentId, forward)
+		// 无条件浏览模式
+		return search.SearchLogData(e.storeName, pageSize, currentDocId, forward)
+	} else if len(kws) == 1 {
+		// 单关键词查询模式
+		return search.SearchWordIndex(e.storeName, kws[0], pageSize, currentDocId, forward)
+	} else {
+		// 多关键词查询模式
+		return search.Search(e.storeName, kws, pageSize, currentDocId, forward)
 	}

-	// 单关键词查询模式
-	return search.Search(e.storeName, kws[0], pageSize, currentId, forward)
 }
--- a/glc/ldb/search/search_keys.go
+++ b/glc/ldb/search/search_keys.go
@ -12,22 +12,22 @@ import (
 )

 type SearchResult struct {
-	Total       string                  `json:"total,omitempty"`       // 总件数（用10进制字符串形式以避免出现科学计数法）
-	PageFirstId string                  `json:"pageFirstId,omitempty"` // 当前页第一条的文档ID或索引ID
-	PageLastId  string                  `json:"pageLastId,omitempty"`  // 当前页最后一条的文档ID或索引ID
-	Data        []*storage.LogDataModel `json:"data,omitempty"`        // 检索结果数据（日志文档数组）
+	Total string                  `json:"total,omitempty"` // 总件数（用10进制字符串形式以避免出现科学计数法）
+	Data  []*storage.LogDataModel `json:"data,omitempty"`  // 检索结果数据（日志文档数组）
 }

-// 单关键词浏览日志
-func Search(storeName string, word string, pageSize int, currentId uint64, forward bool) *SearchResult {
-	if word == "" {
-		return searchLogData(storeName, pageSize, currentId, forward)
+// 多关键词浏览日志
+func Search(storeName string, kws []string, pageSize int, currentDocId uint64, forward bool) *SearchResult {
+	storeLogData := storage.NewLogDataStorageHandle(storeName) // 数据
+	var widxs []*storage.WordIndexStorage
+	for _, word := range kws {
+		widxs = append(widxs, storage.NewWordIndexStorage(storeName, word))
 	}
-	return searchWordIndex(storeName, word, pageSize, currentId, forward)
+	return findSame(pageSize, currentDocId, forward, storeLogData, widxs...)
 }

 // 无关键词时走全量检索
-func searchLogData(storeName string, pageSize int, currentId uint64, forward bool) *SearchResult {
+func SearchLogData(storeName string, pageSize int, currentDocId uint64, forward bool) *SearchResult {

 	var rs = new(SearchResult)                                 // 检索结果
 	storeLogData := storage.NewLogDataStorageHandle(storeName) // 数据
@ -38,7 +38,7 @@ func searchLogData(storeName string, pageSize int, currentId uint64, forward boo
 		return rs
 	}

-	if currentId == 0 {
+	if currentDocId == 0 {
 		// 第一页
 		var min, max uint64
 		max = totalCount
@ -51,16 +51,14 @@ func searchLogData(storeName string, pageSize int, currentId uint64, forward boo
 		for i := max; i >= min; i-- {
 			rs.Data = append(rs.Data, storeLogData.GetLogDataDocument(i).ToLogDataModel()) // 件数等同日志文档ID
 		}
-		rs.PageFirstId = cmn.Uint64ToString(max, 36)
-		rs.PageLastId = cmn.Uint64ToString(min, 36)
 	} else if forward {
 		// 后一页
-		if currentId > 1 {
+		if currentDocId > 1 {
 			var min, max uint64
-			if currentId > totalCount {
+			if currentDocId > totalCount {
 				max = totalCount
 			} else {
-				max = currentId - 1
+				max = currentDocId - 1
 			}
 			if max > uint64(pageSize) {
 				min = max - uint64(pageSize) + 1
@ -71,14 +69,12 @@ func searchLogData(storeName string, pageSize int, currentId uint64, forward boo
 			for i := max; i >= min; i-- {
 				rs.Data = append(rs.Data, storeLogData.GetLogDataDocument(i).ToLogDataModel())
 			}
-			rs.PageFirstId = cmn.Uint64ToString(max, 36)
-			rs.PageLastId = cmn.Uint64ToString(min, 36)
 		}
 	} else {
 		// 前一页
-		if totalCount > currentId {
+		if totalCount > currentDocId {
 			var min, max uint64
-			min = currentId + 1
+			min = currentDocId + 1
 			max = min + uint64(pageSize) - 1
 			if max > totalCount {
 				max = totalCount
@ -87,8 +83,6 @@ func searchLogData(storeName string, pageSize int, currentId uint64, forward boo
 			for i := max; i >= min; i-- {
 				rs.Data = append(rs.Data, storeLogData.GetLogDataDocument(i).ToLogDataModel())
 			}
-			rs.PageFirstId = cmn.Uint64ToString(max, 36)
-			rs.PageLastId = cmn.Uint64ToString(min, 36)
 		}
 	}

@ -96,7 +90,7 @@ func searchLogData(storeName string, pageSize int, currentId uint64, forward boo
 }

 // 有关键词时走索引检索
-func searchWordIndex(storeName string, word string, pageSize int, currentId uint64, forward bool) *SearchResult {
+func SearchWordIndex(storeName string, word string, pageSize int, currentDocId uint64, forward bool) *SearchResult {

 	var rs = new(SearchResult)                                 // 检索结果
 	storeLogData := storage.NewLogDataStorageHandle(storeName) // 数据
@ -108,7 +102,7 @@ func searchWordIndex(storeName string, word string, pageSize int, currentId uint
 		return rs
 	}

-	if currentId == 0 {
+	if currentDocId == 0 {
 		// 第一页
 		var min, max uint64
 		max = totalCount
@ -121,16 +115,14 @@ func searchWordIndex(storeName string, word string, pageSize int, currentId uint
 		for i := max; i >= min; i-- {
 			rs.Data = append(rs.Data, storeLogData.GetLogDataDocument(storeIndex.Get(i)).ToLogDataModel()) // 经索引取日志文档ID
 		}
-		rs.PageFirstId = cmn.Uint64ToString(max, 36)
-		rs.PageLastId = cmn.Uint64ToString(min, 36)
 	} else if forward {
 		// 后一页
-		if currentId > 1 {
+		if currentDocId > 1 {
 			var min, max uint64
-			if currentId > totalCount {
+			if currentDocId > totalCount {
 				max = totalCount
 			} else {
-				max = currentId - 1
+				max = currentDocId - 1
 			}
 			if max > uint64(pageSize) {
 				min = max - uint64(pageSize) + 1
@ -141,14 +133,12 @@ func searchWordIndex(storeName string, word string, pageSize int, currentId uint
 			for i := max; i >= min; i-- {
 				rs.Data = append(rs.Data, storeLogData.GetLogDataDocument(storeIndex.Get(i)).ToLogDataModel())
 			}
-			rs.PageFirstId = cmn.Uint64ToString(max, 36)
-			rs.PageLastId = cmn.Uint64ToString(min, 36)
 		}
 	} else {
 		// 前一页
-		if totalCount > currentId {
+		if totalCount > currentDocId {
 			var min, max uint64
-			min = currentId + 1
+			min = currentDocId + 1
 			max = min + uint64(pageSize) - 1
 			if max > totalCount {
 				max = totalCount
@ -157,8 +147,6 @@ func searchWordIndex(storeName string, word string, pageSize int, currentId uint
 			for i := max; i >= min; i-- {
 				rs.Data = append(rs.Data, storeLogData.GetLogDataDocument(storeIndex.Get(i)).ToLogDataModel())
 			}
-			rs.PageFirstId = cmn.Uint64ToString(max, 36)
-			rs.PageLastId = cmn.Uint64ToString(min, 36)
 		}
 	}

--- a/glc/ldb/search/search_same.go
+++ b/glc/ldb/search/search_same.go
@ -0,0 +1,108 @@
+/**
+ * 反向索引求交集
+ */
+package search
+
+import (
+	"fmt"
+	"glc/ldb/storage"
+)
+
+// 参数widxs长度要求大于1，currentDocId不传就是查第一页
+func findSame(pageSize int, currentDocId uint64, forward bool, storeLogData *storage.LogDataStorageHandle, widxs ...*storage.WordIndexStorage) *SearchResult {
+
+	var rs = new(SearchResult)
+
+	// 选个最短的索引
+	cnt := len(widxs)
+	minIdx := widxs[0]
+	for i := 1; i < cnt; i++ {
+		if widxs[i].TotalCount() < minIdx.TotalCount() {
+			minIdx = widxs[i]
+		}
+	}
+
+	// 简单检查排除没结果的情景
+	totalCount := minIdx.TotalCount()
+	if totalCount == 0 || (totalCount == 1 && currentDocId > 0) {
+		return rs // 索引件数0、或只有1条又还要跳过，都是找不到
+	}
+
+	// 找匹配位置并排除没结果的情景
+	pos := totalCount // 默认检索最新第一页
+	if currentDocId > 0 {
+		pos = minIdx.GetPosByDocId(currentDocId) // 有相对文档ID时找相对位置
+		if pos == 0 || (pos == 1 && forward) || (pos == totalCount && !forward) {
+			return rs // 找不到、或最后条还要向后、或最前条还要向前，都是找不到
+		}
+	}
+
+	// 位置就绪
+	var rsCnt int = 0
+	var flg bool
+	if currentDocId == 0 || currentDocId > 0 && forward {
+		// 无相对文档ID、或有且是后一页方向
+		if currentDocId > 0 {
+			pos-- //  相对文档ID有的话才顺移
+		}
+
+		for i := pos; i > 0; i-- {
+			// 取值
+			docId := minIdx.Get(i)
+			// 比较
+			flg = true
+			for i := 0; i < cnt; i++ {
+				if widxs[i] == minIdx {
+					continue // 跳过比较自己
+				}
+				if widxs[i].GetPosByDocId(docId) == 0 {
+					flg = false // 没找到
+					break
+				}
+			}
+			// 找到则加入结果
+			if flg {
+				rsCnt++
+				rs.Data = append(rs.Data, storeLogData.GetLogDataModel(docId))
+				if rsCnt >= pageSize {
+					break // 最多找一页
+				}
+			}
+		}
+	} else {
+		// 有相对文档ID且是前一页方向
+		pos++
+		var ary []*storage.LogDataModel
+		for i := pos; i <= totalCount; i++ {
+			// 取值
+			docId := minIdx.Get(pos)
+			// 比较
+			flg = true
+			for i := 0; i < cnt; i++ {
+				if widxs[i] == minIdx {
+					continue // 跳过比较自己
+				}
+				if widxs[i].GetPosByDocId(docId) == 0 {
+					flg = false // 没找到
+					break
+				}
+			}
+			// 找到则加入结果
+			if flg {
+				rsCnt++
+				ary = append(ary, storeLogData.GetLogDataModel(docId))
+				if rsCnt >= pageSize {
+					break // 最多找一页
+				}
+			}
+		}
+
+		// 倒序放入结果
+		for i := len(ary) - 1; i >= 0; i-- {
+			rs.Data = append(rs.Data, ary[i])
+		}
+	}
+
+	rs.Total = fmt.Sprintf("%d", rsCnt)
+	return rs
+}
--- a/glc/ldb/storage/logdata_storage_handle.go
+++ b/glc/ldb/storage/logdata_storage_handle.go
@ -8,6 +8,7 @@ package storage

 import (
 	"glc/cmn"
+	"log"
 	"strings"
 )

@ -56,7 +57,14 @@ func (s *LogDataStorageHandle) AddTextLog(logText string) {
 	if s.storage.IsClose() {
 		s.storage = NewLogDataStorage(s.storage.storeName, "data")
 	}
-	s.storage.Add(d)
+	err := s.storage.Add(d)
+	if err != nil {
+		log.Println("竟然失败，再来一次", s.storage.IsClose(), err)
+		if s.storage.IsClose() {
+			s.storage = NewLogDataStorage(s.storage.storeName, "data")
+		}
+		s.storage.Add(d)
+	}
 }

 // // 添加日志（参数是LogDataModel形式的json字符串）
--- a/glc/ldb/storage/word_index_storage.go
+++ b/glc/ldb/storage/word_index_storage.go
@ -6,6 +6,7 @@
 package storage

 import (
+	"fmt"
 	"glc/cmn"
 	"glc/ldb/conf"
 	"glc/onexit"
@ -49,7 +50,7 @@ func getWidxStorage(cacheName string) *WordIndexStorage {
 func NewWordIndexStorage(storeName string, word string) *WordIndexStorage { // 存储器，文档，自定义对象

 	// 缓存有则取用
-	subPath := "inverted" + cmn.PathSeparator() + cmn.HashAndMod(word, 100) + cmn.PathSeparator() + "k_" + cmn.HashAndMod(word, math.MaxUint32)
+	subPath := getIndexSubPath(word)
 	cacheName := storeName + cmn.PathSeparator() + subPath
 	cacheStore := getWidxStorage(cacheName)
 	if cacheStore != nil && !cacheStore.IsClose() {
@ -103,12 +104,20 @@ func autoCloseWordIndexStorageWhenMaxIdle(store *WordIndexStorage) {
 }

 // 日志ID添加到索引
-func (s *WordIndexStorage) Add(id uint64) error {
+func (s *WordIndexStorage) Add(docId uint64) error {

 	// 加索引
 	s.lastTime = time.Now().Unix()
 	s.currentCount++ // ID递增
-	err := s.leveldb.Put(cmn.Uint64ToBytes(s.currentCount), cmn.Uint64ToBytes(id), nil)
+	err := s.leveldb.Put(cmn.Uint64ToBytes(s.currentCount), cmn.Uint64ToBytes(docId), nil)
+	if err != nil {
+		log.Println("保存索引失败", err)
+		return err
+	}
+
+	// docId加盐为键保存索引位置（反向索引再建反向索引之意）
+	keyDocId := fmt.Sprintf("d%d", docId)
+	err = s.leveldb.Put(cmn.StringToBytes(keyDocId), cmn.Uint64ToBytes(s.currentCount), nil)
 	if err != nil {
 		log.Println("保存索引失败", err)
 		return err
@ -120,10 +129,20 @@ func (s *WordIndexStorage) Add(id uint64) error {
 		log.Println("保存索引件数失败", err)
 		return err // 忽略事务问题，可下回重建
 	}
-	log.Println("创建日志索引：", id, "，关键词：", s.word)
+	log.Println("创建日志索引：", docId, "，关键词：", s.word)
 	return nil
 }

+// 按日志文档ID找索引位置(找不到返回0)
+func (s *WordIndexStorage) GetPosByDocId(id uint64) uint64 {
+	keyDocId := fmt.Sprintf("d%d", id)
+	idx, err := s.leveldb.Get(cmn.StringToBytes(keyDocId), nil)
+	if err != nil {
+		return 0
+	}
+	return cmn.BytesToUint64(idx)
+}
+
 // 通过索引ID取日志ID（返回0表示有问题）
 func (s *WordIndexStorage) Get(id uint64) uint64 {
 	if s.closing {
@ -187,3 +206,11 @@ func onExit4WordIndexStorage() {
 	}
 	log.Println("退出WordIndexStorage")
 }
+
+// 反向索引的子目录（多级目录散列处理避免冲突）
+func getIndexSubPath(word string) string {
+	return "inverted" + cmn.PathSeparator() +
+		cmn.HashAndMod(word, 100, "添油") + cmn.PathSeparator() +
+		cmn.HashAndMod(word, 100, "加醋") + cmn.PathSeparator() +
+		"k_" + cmn.HashAndMod(word, math.MaxUint32, "原味")
+}