多关键词检索

This commit is contained in:
gotoeasy 2022-06-28 16:49:44 +08:00
parent 1df8f3da7c
commit f17a0ad45f
5 changed files with 180 additions and 45 deletions

View File

@ -38,7 +38,7 @@ func (e *Engine) AddTextLog(logText string) {
e.logStorage.AddTextLog(logText)
}
func (e *Engine) Search(searchKey string, pageSize int, currentId uint64, forward bool) *search.SearchResult {
func (e *Engine) Search(searchKey string, pageSize int, currentDocId uint64, forward bool) *search.SearchResult {
// 检查修正pageSize
if pageSize < 1 {
@ -59,11 +59,15 @@ func (e *Engine) Search(searchKey string, pageSize int, currentId uint64, forwar
return new(search.SearchResult)
}
// 无条件浏览模式
if len(kws) == 0 {
return search.Search(e.storeName, "", pageSize, currentId, forward)
// 无条件浏览模式
return search.SearchLogData(e.storeName, pageSize, currentDocId, forward)
} else if len(kws) == 1 {
// 单关键词查询模式
return search.SearchWordIndex(e.storeName, kws[0], pageSize, currentDocId, forward)
} else {
// 多关键词查询模式
return search.Search(e.storeName, kws, pageSize, currentDocId, forward)
}
// 单关键词查询模式
return search.Search(e.storeName, kws[0], pageSize, currentId, forward)
}

View File

@ -12,22 +12,22 @@ import (
)
type SearchResult struct {
Total string `json:"total,omitempty"` // 总件数用10进制字符串形式以避免出现科学计数法
PageFirstId string `json:"pageFirstId,omitempty"` // 当前页第一条的文档ID或索引ID
PageLastId string `json:"pageLastId,omitempty"` // 当前页最后一条的文档ID或索引ID
Data []*storage.LogDataModel `json:"data,omitempty"` // 检索结果数据(日志文档数组)
Total string `json:"total,omitempty"` // 总件数用10进制字符串形式以避免出现科学计数法
Data []*storage.LogDataModel `json:"data,omitempty"` // 检索结果数据(日志文档数组)
}
// 单关键词浏览日志
func Search(storeName string, word string, pageSize int, currentId uint64, forward bool) *SearchResult {
if word == "" {
return searchLogData(storeName, pageSize, currentId, forward)
// 多关键词浏览日志
func Search(storeName string, kws []string, pageSize int, currentDocId uint64, forward bool) *SearchResult {
storeLogData := storage.NewLogDataStorageHandle(storeName) // 数据
var widxs []*storage.WordIndexStorage
for _, word := range kws {
widxs = append(widxs, storage.NewWordIndexStorage(storeName, word))
}
return searchWordIndex(storeName, word, pageSize, currentId, forward)
return findSame(pageSize, currentDocId, forward, storeLogData, widxs...)
}
// 无关键词时走全量检索
func searchLogData(storeName string, pageSize int, currentId uint64, forward bool) *SearchResult {
func SearchLogData(storeName string, pageSize int, currentDocId uint64, forward bool) *SearchResult {
var rs = new(SearchResult) // 检索结果
storeLogData := storage.NewLogDataStorageHandle(storeName) // 数据
@ -38,7 +38,7 @@ func searchLogData(storeName string, pageSize int, currentId uint64, forward boo
return rs
}
if currentId == 0 {
if currentDocId == 0 {
// 第一页
var min, max uint64
max = totalCount
@ -51,16 +51,14 @@ func searchLogData(storeName string, pageSize int, currentId uint64, forward boo
for i := max; i >= min; i-- {
rs.Data = append(rs.Data, storeLogData.GetLogDataDocument(i).ToLogDataModel()) // 件数等同日志文档ID
}
rs.PageFirstId = cmn.Uint64ToString(max, 36)
rs.PageLastId = cmn.Uint64ToString(min, 36)
} else if forward {
// 后一页
if currentId > 1 {
if currentDocId > 1 {
var min, max uint64
if currentId > totalCount {
if currentDocId > totalCount {
max = totalCount
} else {
max = currentId - 1
max = currentDocId - 1
}
if max > uint64(pageSize) {
min = max - uint64(pageSize) + 1
@ -71,14 +69,12 @@ func searchLogData(storeName string, pageSize int, currentId uint64, forward boo
for i := max; i >= min; i-- {
rs.Data = append(rs.Data, storeLogData.GetLogDataDocument(i).ToLogDataModel())
}
rs.PageFirstId = cmn.Uint64ToString(max, 36)
rs.PageLastId = cmn.Uint64ToString(min, 36)
}
} else {
// 前一页
if totalCount > currentId {
if totalCount > currentDocId {
var min, max uint64
min = currentId + 1
min = currentDocId + 1
max = min + uint64(pageSize) - 1
if max > totalCount {
max = totalCount
@ -87,8 +83,6 @@ func searchLogData(storeName string, pageSize int, currentId uint64, forward boo
for i := max; i >= min; i-- {
rs.Data = append(rs.Data, storeLogData.GetLogDataDocument(i).ToLogDataModel())
}
rs.PageFirstId = cmn.Uint64ToString(max, 36)
rs.PageLastId = cmn.Uint64ToString(min, 36)
}
}
@ -96,7 +90,7 @@ func searchLogData(storeName string, pageSize int, currentId uint64, forward boo
}
// 有关键词时走索引检索
func searchWordIndex(storeName string, word string, pageSize int, currentId uint64, forward bool) *SearchResult {
func SearchWordIndex(storeName string, word string, pageSize int, currentDocId uint64, forward bool) *SearchResult {
var rs = new(SearchResult) // 检索结果
storeLogData := storage.NewLogDataStorageHandle(storeName) // 数据
@ -108,7 +102,7 @@ func searchWordIndex(storeName string, word string, pageSize int, currentId uint
return rs
}
if currentId == 0 {
if currentDocId == 0 {
// 第一页
var min, max uint64
max = totalCount
@ -121,16 +115,14 @@ func searchWordIndex(storeName string, word string, pageSize int, currentId uint
for i := max; i >= min; i-- {
rs.Data = append(rs.Data, storeLogData.GetLogDataDocument(storeIndex.Get(i)).ToLogDataModel()) // 经索引取日志文档ID
}
rs.PageFirstId = cmn.Uint64ToString(max, 36)
rs.PageLastId = cmn.Uint64ToString(min, 36)
} else if forward {
// 后一页
if currentId > 1 {
if currentDocId > 1 {
var min, max uint64
if currentId > totalCount {
if currentDocId > totalCount {
max = totalCount
} else {
max = currentId - 1
max = currentDocId - 1
}
if max > uint64(pageSize) {
min = max - uint64(pageSize) + 1
@ -141,14 +133,12 @@ func searchWordIndex(storeName string, word string, pageSize int, currentId uint
for i := max; i >= min; i-- {
rs.Data = append(rs.Data, storeLogData.GetLogDataDocument(storeIndex.Get(i)).ToLogDataModel())
}
rs.PageFirstId = cmn.Uint64ToString(max, 36)
rs.PageLastId = cmn.Uint64ToString(min, 36)
}
} else {
// 前一页
if totalCount > currentId {
if totalCount > currentDocId {
var min, max uint64
min = currentId + 1
min = currentDocId + 1
max = min + uint64(pageSize) - 1
if max > totalCount {
max = totalCount
@ -157,8 +147,6 @@ func searchWordIndex(storeName string, word string, pageSize int, currentId uint
for i := max; i >= min; i-- {
rs.Data = append(rs.Data, storeLogData.GetLogDataDocument(storeIndex.Get(i)).ToLogDataModel())
}
rs.PageFirstId = cmn.Uint64ToString(max, 36)
rs.PageLastId = cmn.Uint64ToString(min, 36)
}
}

View File

@ -0,0 +1,108 @@
/**
* 反向索引求交集
*/
package search
import (
"fmt"
"glc/ldb/storage"
)
// 参数widxs长度要求大于1currentDocId不传就是查第一页
func findSame(pageSize int, currentDocId uint64, forward bool, storeLogData *storage.LogDataStorageHandle, widxs ...*storage.WordIndexStorage) *SearchResult {
var rs = new(SearchResult)
// 选个最短的索引
cnt := len(widxs)
minIdx := widxs[0]
for i := 1; i < cnt; i++ {
if widxs[i].TotalCount() < minIdx.TotalCount() {
minIdx = widxs[i]
}
}
// 简单检查排除没结果的情景
totalCount := minIdx.TotalCount()
if totalCount == 0 || (totalCount == 1 && currentDocId > 0) {
return rs // 索引件数0、或只有1条又还要跳过都是找不到
}
// 找匹配位置并排除没结果的情景
pos := totalCount // 默认检索最新第一页
if currentDocId > 0 {
pos = minIdx.GetPosByDocId(currentDocId) // 有相对文档ID时找相对位置
if pos == 0 || (pos == 1 && forward) || (pos == totalCount && !forward) {
return rs // 找不到、或最后条还要向后、或最前条还要向前,都是找不到
}
}
// 位置就绪
var rsCnt int = 0
var flg bool
if currentDocId == 0 || currentDocId > 0 && forward {
// 无相对文档ID、或有且是后一页方向
if currentDocId > 0 {
pos-- // 相对文档ID有的话才顺移
}
for i := pos; i > 0; i-- {
// 取值
docId := minIdx.Get(i)
// 比较
flg = true
for i := 0; i < cnt; i++ {
if widxs[i] == minIdx {
continue // 跳过比较自己
}
if widxs[i].GetPosByDocId(docId) == 0 {
flg = false // 没找到
break
}
}
// 找到则加入结果
if flg {
rsCnt++
rs.Data = append(rs.Data, storeLogData.GetLogDataModel(docId))
if rsCnt >= pageSize {
break // 最多找一页
}
}
}
} else {
// 有相对文档ID且是前一页方向
pos++
var ary []*storage.LogDataModel
for i := pos; i <= totalCount; i++ {
// 取值
docId := minIdx.Get(pos)
// 比较
flg = true
for i := 0; i < cnt; i++ {
if widxs[i] == minIdx {
continue // 跳过比较自己
}
if widxs[i].GetPosByDocId(docId) == 0 {
flg = false // 没找到
break
}
}
// 找到则加入结果
if flg {
rsCnt++
ary = append(ary, storeLogData.GetLogDataModel(docId))
if rsCnt >= pageSize {
break // 最多找一页
}
}
}
// 倒序放入结果
for i := len(ary) - 1; i >= 0; i-- {
rs.Data = append(rs.Data, ary[i])
}
}
rs.Total = fmt.Sprintf("%d", rsCnt)
return rs
}

View File

@ -8,6 +8,7 @@ package storage
import (
"glc/cmn"
"log"
"strings"
)
@ -56,7 +57,14 @@ func (s *LogDataStorageHandle) AddTextLog(logText string) {
if s.storage.IsClose() {
s.storage = NewLogDataStorage(s.storage.storeName, "data")
}
s.storage.Add(d)
err := s.storage.Add(d)
if err != nil {
log.Println("竟然失败,再来一次", s.storage.IsClose(), err)
if s.storage.IsClose() {
s.storage = NewLogDataStorage(s.storage.storeName, "data")
}
s.storage.Add(d)
}
}
// // 添加日志参数是LogDataModel形式的json字符串

View File

@ -6,6 +6,7 @@
package storage
import (
"fmt"
"glc/cmn"
"glc/ldb/conf"
"glc/onexit"
@ -49,7 +50,7 @@ func getWidxStorage(cacheName string) *WordIndexStorage {
func NewWordIndexStorage(storeName string, word string) *WordIndexStorage { // 存储器,文档,自定义对象
// 缓存有则取用
subPath := "inverted" + cmn.PathSeparator() + cmn.HashAndMod(word, 100) + cmn.PathSeparator() + "k_" + cmn.HashAndMod(word, math.MaxUint32)
subPath := getIndexSubPath(word)
cacheName := storeName + cmn.PathSeparator() + subPath
cacheStore := getWidxStorage(cacheName)
if cacheStore != nil && !cacheStore.IsClose() {
@ -103,12 +104,20 @@ func autoCloseWordIndexStorageWhenMaxIdle(store *WordIndexStorage) {
}
// 日志ID添加到索引
func (s *WordIndexStorage) Add(id uint64) error {
func (s *WordIndexStorage) Add(docId uint64) error {
// 加索引
s.lastTime = time.Now().Unix()
s.currentCount++ // ID递增
err := s.leveldb.Put(cmn.Uint64ToBytes(s.currentCount), cmn.Uint64ToBytes(id), nil)
err := s.leveldb.Put(cmn.Uint64ToBytes(s.currentCount), cmn.Uint64ToBytes(docId), nil)
if err != nil {
log.Println("保存索引失败", err)
return err
}
// docId加盐为键保存索引位置反向索引再建反向索引之意
keyDocId := fmt.Sprintf("d%d", docId)
err = s.leveldb.Put(cmn.StringToBytes(keyDocId), cmn.Uint64ToBytes(s.currentCount), nil)
if err != nil {
log.Println("保存索引失败", err)
return err
@ -120,10 +129,20 @@ func (s *WordIndexStorage) Add(id uint64) error {
log.Println("保存索引件数失败", err)
return err // 忽略事务问题,可下回重建
}
log.Println("创建日志索引:", id, ",关键词:", s.word)
log.Println("创建日志索引:", docId, ",关键词:", s.word)
return nil
}
// 按日志文档ID找索引位置(找不到返回0)
func (s *WordIndexStorage) GetPosByDocId(id uint64) uint64 {
keyDocId := fmt.Sprintf("d%d", id)
idx, err := s.leveldb.Get(cmn.StringToBytes(keyDocId), nil)
if err != nil {
return 0
}
return cmn.BytesToUint64(idx)
}
// 通过索引ID取日志ID返回0表示有问题
func (s *WordIndexStorage) Get(id uint64) uint64 {
if s.closing {
@ -187,3 +206,11 @@ func onExit4WordIndexStorage() {
}
log.Println("退出WordIndexStorage")
}
// 反向索引的子目录(多级目录散列处理避免冲突)
func getIndexSubPath(word string) string {
return "inverted" + cmn.PathSeparator() +
cmn.HashAndMod(word, 100, "添油") + cmn.PathSeparator() +
cmn.HashAndMod(word, 100, "加醋") + cmn.PathSeparator() +
"k_" + cmn.HashAndMod(word, math.MaxUint32, "原味")
}