mirror of
https://github.com/gotoeasy/glogcenter.git
synced 2025-09-15 12:58:34 +08:00
多关键词检索
This commit is contained in:
parent
1df8f3da7c
commit
f17a0ad45f
@ -38,7 +38,7 @@ func (e *Engine) AddTextLog(logText string) {
|
||||
e.logStorage.AddTextLog(logText)
|
||||
}
|
||||
|
||||
func (e *Engine) Search(searchKey string, pageSize int, currentId uint64, forward bool) *search.SearchResult {
|
||||
func (e *Engine) Search(searchKey string, pageSize int, currentDocId uint64, forward bool) *search.SearchResult {
|
||||
|
||||
// 检查修正pageSize
|
||||
if pageSize < 1 {
|
||||
@ -59,11 +59,15 @@ func (e *Engine) Search(searchKey string, pageSize int, currentId uint64, forwar
|
||||
return new(search.SearchResult)
|
||||
}
|
||||
|
||||
// 无条件浏览模式
|
||||
if len(kws) == 0 {
|
||||
return search.Search(e.storeName, "", pageSize, currentId, forward)
|
||||
// 无条件浏览模式
|
||||
return search.SearchLogData(e.storeName, pageSize, currentDocId, forward)
|
||||
} else if len(kws) == 1 {
|
||||
// 单关键词查询模式
|
||||
return search.SearchWordIndex(e.storeName, kws[0], pageSize, currentDocId, forward)
|
||||
} else {
|
||||
// 多关键词查询模式
|
||||
return search.Search(e.storeName, kws, pageSize, currentDocId, forward)
|
||||
}
|
||||
|
||||
// 单关键词查询模式
|
||||
return search.Search(e.storeName, kws[0], pageSize, currentId, forward)
|
||||
}
|
||||
|
||||
@ -12,22 +12,22 @@ import (
|
||||
)
|
||||
|
||||
type SearchResult struct {
|
||||
Total string `json:"total,omitempty"` // 总件数(用10进制字符串形式以避免出现科学计数法)
|
||||
PageFirstId string `json:"pageFirstId,omitempty"` // 当前页第一条的文档ID或索引ID
|
||||
PageLastId string `json:"pageLastId,omitempty"` // 当前页最后一条的文档ID或索引ID
|
||||
Data []*storage.LogDataModel `json:"data,omitempty"` // 检索结果数据(日志文档数组)
|
||||
Total string `json:"total,omitempty"` // 总件数(用10进制字符串形式以避免出现科学计数法)
|
||||
Data []*storage.LogDataModel `json:"data,omitempty"` // 检索结果数据(日志文档数组)
|
||||
}
|
||||
|
||||
// 单关键词浏览日志
|
||||
func Search(storeName string, word string, pageSize int, currentId uint64, forward bool) *SearchResult {
|
||||
if word == "" {
|
||||
return searchLogData(storeName, pageSize, currentId, forward)
|
||||
// 多关键词浏览日志
|
||||
func Search(storeName string, kws []string, pageSize int, currentDocId uint64, forward bool) *SearchResult {
|
||||
storeLogData := storage.NewLogDataStorageHandle(storeName) // 数据
|
||||
var widxs []*storage.WordIndexStorage
|
||||
for _, word := range kws {
|
||||
widxs = append(widxs, storage.NewWordIndexStorage(storeName, word))
|
||||
}
|
||||
return searchWordIndex(storeName, word, pageSize, currentId, forward)
|
||||
return findSame(pageSize, currentDocId, forward, storeLogData, widxs...)
|
||||
}
|
||||
|
||||
// 无关键词时走全量检索
|
||||
func searchLogData(storeName string, pageSize int, currentId uint64, forward bool) *SearchResult {
|
||||
func SearchLogData(storeName string, pageSize int, currentDocId uint64, forward bool) *SearchResult {
|
||||
|
||||
var rs = new(SearchResult) // 检索结果
|
||||
storeLogData := storage.NewLogDataStorageHandle(storeName) // 数据
|
||||
@ -38,7 +38,7 @@ func searchLogData(storeName string, pageSize int, currentId uint64, forward boo
|
||||
return rs
|
||||
}
|
||||
|
||||
if currentId == 0 {
|
||||
if currentDocId == 0 {
|
||||
// 第一页
|
||||
var min, max uint64
|
||||
max = totalCount
|
||||
@ -51,16 +51,14 @@ func searchLogData(storeName string, pageSize int, currentId uint64, forward boo
|
||||
for i := max; i >= min; i-- {
|
||||
rs.Data = append(rs.Data, storeLogData.GetLogDataDocument(i).ToLogDataModel()) // 件数等同日志文档ID
|
||||
}
|
||||
rs.PageFirstId = cmn.Uint64ToString(max, 36)
|
||||
rs.PageLastId = cmn.Uint64ToString(min, 36)
|
||||
} else if forward {
|
||||
// 后一页
|
||||
if currentId > 1 {
|
||||
if currentDocId > 1 {
|
||||
var min, max uint64
|
||||
if currentId > totalCount {
|
||||
if currentDocId > totalCount {
|
||||
max = totalCount
|
||||
} else {
|
||||
max = currentId - 1
|
||||
max = currentDocId - 1
|
||||
}
|
||||
if max > uint64(pageSize) {
|
||||
min = max - uint64(pageSize) + 1
|
||||
@ -71,14 +69,12 @@ func searchLogData(storeName string, pageSize int, currentId uint64, forward boo
|
||||
for i := max; i >= min; i-- {
|
||||
rs.Data = append(rs.Data, storeLogData.GetLogDataDocument(i).ToLogDataModel())
|
||||
}
|
||||
rs.PageFirstId = cmn.Uint64ToString(max, 36)
|
||||
rs.PageLastId = cmn.Uint64ToString(min, 36)
|
||||
}
|
||||
} else {
|
||||
// 前一页
|
||||
if totalCount > currentId {
|
||||
if totalCount > currentDocId {
|
||||
var min, max uint64
|
||||
min = currentId + 1
|
||||
min = currentDocId + 1
|
||||
max = min + uint64(pageSize) - 1
|
||||
if max > totalCount {
|
||||
max = totalCount
|
||||
@ -87,8 +83,6 @@ func searchLogData(storeName string, pageSize int, currentId uint64, forward boo
|
||||
for i := max; i >= min; i-- {
|
||||
rs.Data = append(rs.Data, storeLogData.GetLogDataDocument(i).ToLogDataModel())
|
||||
}
|
||||
rs.PageFirstId = cmn.Uint64ToString(max, 36)
|
||||
rs.PageLastId = cmn.Uint64ToString(min, 36)
|
||||
}
|
||||
}
|
||||
|
||||
@ -96,7 +90,7 @@ func searchLogData(storeName string, pageSize int, currentId uint64, forward boo
|
||||
}
|
||||
|
||||
// 有关键词时走索引检索
|
||||
func searchWordIndex(storeName string, word string, pageSize int, currentId uint64, forward bool) *SearchResult {
|
||||
func SearchWordIndex(storeName string, word string, pageSize int, currentDocId uint64, forward bool) *SearchResult {
|
||||
|
||||
var rs = new(SearchResult) // 检索结果
|
||||
storeLogData := storage.NewLogDataStorageHandle(storeName) // 数据
|
||||
@ -108,7 +102,7 @@ func searchWordIndex(storeName string, word string, pageSize int, currentId uint
|
||||
return rs
|
||||
}
|
||||
|
||||
if currentId == 0 {
|
||||
if currentDocId == 0 {
|
||||
// 第一页
|
||||
var min, max uint64
|
||||
max = totalCount
|
||||
@ -121,16 +115,14 @@ func searchWordIndex(storeName string, word string, pageSize int, currentId uint
|
||||
for i := max; i >= min; i-- {
|
||||
rs.Data = append(rs.Data, storeLogData.GetLogDataDocument(storeIndex.Get(i)).ToLogDataModel()) // 经索引取日志文档ID
|
||||
}
|
||||
rs.PageFirstId = cmn.Uint64ToString(max, 36)
|
||||
rs.PageLastId = cmn.Uint64ToString(min, 36)
|
||||
} else if forward {
|
||||
// 后一页
|
||||
if currentId > 1 {
|
||||
if currentDocId > 1 {
|
||||
var min, max uint64
|
||||
if currentId > totalCount {
|
||||
if currentDocId > totalCount {
|
||||
max = totalCount
|
||||
} else {
|
||||
max = currentId - 1
|
||||
max = currentDocId - 1
|
||||
}
|
||||
if max > uint64(pageSize) {
|
||||
min = max - uint64(pageSize) + 1
|
||||
@ -141,14 +133,12 @@ func searchWordIndex(storeName string, word string, pageSize int, currentId uint
|
||||
for i := max; i >= min; i-- {
|
||||
rs.Data = append(rs.Data, storeLogData.GetLogDataDocument(storeIndex.Get(i)).ToLogDataModel())
|
||||
}
|
||||
rs.PageFirstId = cmn.Uint64ToString(max, 36)
|
||||
rs.PageLastId = cmn.Uint64ToString(min, 36)
|
||||
}
|
||||
} else {
|
||||
// 前一页
|
||||
if totalCount > currentId {
|
||||
if totalCount > currentDocId {
|
||||
var min, max uint64
|
||||
min = currentId + 1
|
||||
min = currentDocId + 1
|
||||
max = min + uint64(pageSize) - 1
|
||||
if max > totalCount {
|
||||
max = totalCount
|
||||
@ -157,8 +147,6 @@ func searchWordIndex(storeName string, word string, pageSize int, currentId uint
|
||||
for i := max; i >= min; i-- {
|
||||
rs.Data = append(rs.Data, storeLogData.GetLogDataDocument(storeIndex.Get(i)).ToLogDataModel())
|
||||
}
|
||||
rs.PageFirstId = cmn.Uint64ToString(max, 36)
|
||||
rs.PageLastId = cmn.Uint64ToString(min, 36)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
108
glc/ldb/search/search_same.go
Normal file
108
glc/ldb/search/search_same.go
Normal file
@ -0,0 +1,108 @@
|
||||
/**
|
||||
* 反向索引求交集
|
||||
*/
|
||||
package search
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"glc/ldb/storage"
|
||||
)
|
||||
|
||||
// 参数widxs长度要求大于1,currentDocId不传就是查第一页
|
||||
func findSame(pageSize int, currentDocId uint64, forward bool, storeLogData *storage.LogDataStorageHandle, widxs ...*storage.WordIndexStorage) *SearchResult {
|
||||
|
||||
var rs = new(SearchResult)
|
||||
|
||||
// 选个最短的索引
|
||||
cnt := len(widxs)
|
||||
minIdx := widxs[0]
|
||||
for i := 1; i < cnt; i++ {
|
||||
if widxs[i].TotalCount() < minIdx.TotalCount() {
|
||||
minIdx = widxs[i]
|
||||
}
|
||||
}
|
||||
|
||||
// 简单检查排除没结果的情景
|
||||
totalCount := minIdx.TotalCount()
|
||||
if totalCount == 0 || (totalCount == 1 && currentDocId > 0) {
|
||||
return rs // 索引件数0、或只有1条又还要跳过,都是找不到
|
||||
}
|
||||
|
||||
// 找匹配位置并排除没结果的情景
|
||||
pos := totalCount // 默认检索最新第一页
|
||||
if currentDocId > 0 {
|
||||
pos = minIdx.GetPosByDocId(currentDocId) // 有相对文档ID时找相对位置
|
||||
if pos == 0 || (pos == 1 && forward) || (pos == totalCount && !forward) {
|
||||
return rs // 找不到、或最后条还要向后、或最前条还要向前,都是找不到
|
||||
}
|
||||
}
|
||||
|
||||
// 位置就绪
|
||||
var rsCnt int = 0
|
||||
var flg bool
|
||||
if currentDocId == 0 || currentDocId > 0 && forward {
|
||||
// 无相对文档ID、或有且是后一页方向
|
||||
if currentDocId > 0 {
|
||||
pos-- // 相对文档ID有的话才顺移
|
||||
}
|
||||
|
||||
for i := pos; i > 0; i-- {
|
||||
// 取值
|
||||
docId := minIdx.Get(i)
|
||||
// 比较
|
||||
flg = true
|
||||
for i := 0; i < cnt; i++ {
|
||||
if widxs[i] == minIdx {
|
||||
continue // 跳过比较自己
|
||||
}
|
||||
if widxs[i].GetPosByDocId(docId) == 0 {
|
||||
flg = false // 没找到
|
||||
break
|
||||
}
|
||||
}
|
||||
// 找到则加入结果
|
||||
if flg {
|
||||
rsCnt++
|
||||
rs.Data = append(rs.Data, storeLogData.GetLogDataModel(docId))
|
||||
if rsCnt >= pageSize {
|
||||
break // 最多找一页
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// 有相对文档ID且是前一页方向
|
||||
pos++
|
||||
var ary []*storage.LogDataModel
|
||||
for i := pos; i <= totalCount; i++ {
|
||||
// 取值
|
||||
docId := minIdx.Get(pos)
|
||||
// 比较
|
||||
flg = true
|
||||
for i := 0; i < cnt; i++ {
|
||||
if widxs[i] == minIdx {
|
||||
continue // 跳过比较自己
|
||||
}
|
||||
if widxs[i].GetPosByDocId(docId) == 0 {
|
||||
flg = false // 没找到
|
||||
break
|
||||
}
|
||||
}
|
||||
// 找到则加入结果
|
||||
if flg {
|
||||
rsCnt++
|
||||
ary = append(ary, storeLogData.GetLogDataModel(docId))
|
||||
if rsCnt >= pageSize {
|
||||
break // 最多找一页
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 倒序放入结果
|
||||
for i := len(ary) - 1; i >= 0; i-- {
|
||||
rs.Data = append(rs.Data, ary[i])
|
||||
}
|
||||
}
|
||||
|
||||
rs.Total = fmt.Sprintf("%d", rsCnt)
|
||||
return rs
|
||||
}
|
||||
@ -8,6 +8,7 @@ package storage
|
||||
|
||||
import (
|
||||
"glc/cmn"
|
||||
"log"
|
||||
"strings"
|
||||
)
|
||||
|
||||
@ -56,7 +57,14 @@ func (s *LogDataStorageHandle) AddTextLog(logText string) {
|
||||
if s.storage.IsClose() {
|
||||
s.storage = NewLogDataStorage(s.storage.storeName, "data")
|
||||
}
|
||||
s.storage.Add(d)
|
||||
err := s.storage.Add(d)
|
||||
if err != nil {
|
||||
log.Println("竟然失败,再来一次", s.storage.IsClose(), err)
|
||||
if s.storage.IsClose() {
|
||||
s.storage = NewLogDataStorage(s.storage.storeName, "data")
|
||||
}
|
||||
s.storage.Add(d)
|
||||
}
|
||||
}
|
||||
|
||||
// // 添加日志(参数是LogDataModel形式的json字符串)
|
||||
|
||||
@ -6,6 +6,7 @@
|
||||
package storage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"glc/cmn"
|
||||
"glc/ldb/conf"
|
||||
"glc/onexit"
|
||||
@ -49,7 +50,7 @@ func getWidxStorage(cacheName string) *WordIndexStorage {
|
||||
func NewWordIndexStorage(storeName string, word string) *WordIndexStorage { // 存储器,文档,自定义对象
|
||||
|
||||
// 缓存有则取用
|
||||
subPath := "inverted" + cmn.PathSeparator() + cmn.HashAndMod(word, 100) + cmn.PathSeparator() + "k_" + cmn.HashAndMod(word, math.MaxUint32)
|
||||
subPath := getIndexSubPath(word)
|
||||
cacheName := storeName + cmn.PathSeparator() + subPath
|
||||
cacheStore := getWidxStorage(cacheName)
|
||||
if cacheStore != nil && !cacheStore.IsClose() {
|
||||
@ -103,12 +104,20 @@ func autoCloseWordIndexStorageWhenMaxIdle(store *WordIndexStorage) {
|
||||
}
|
||||
|
||||
// 日志ID添加到索引
|
||||
func (s *WordIndexStorage) Add(id uint64) error {
|
||||
func (s *WordIndexStorage) Add(docId uint64) error {
|
||||
|
||||
// 加索引
|
||||
s.lastTime = time.Now().Unix()
|
||||
s.currentCount++ // ID递增
|
||||
err := s.leveldb.Put(cmn.Uint64ToBytes(s.currentCount), cmn.Uint64ToBytes(id), nil)
|
||||
err := s.leveldb.Put(cmn.Uint64ToBytes(s.currentCount), cmn.Uint64ToBytes(docId), nil)
|
||||
if err != nil {
|
||||
log.Println("保存索引失败", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// docId加盐为键保存索引位置(反向索引再建反向索引之意)
|
||||
keyDocId := fmt.Sprintf("d%d", docId)
|
||||
err = s.leveldb.Put(cmn.StringToBytes(keyDocId), cmn.Uint64ToBytes(s.currentCount), nil)
|
||||
if err != nil {
|
||||
log.Println("保存索引失败", err)
|
||||
return err
|
||||
@ -120,10 +129,20 @@ func (s *WordIndexStorage) Add(id uint64) error {
|
||||
log.Println("保存索引件数失败", err)
|
||||
return err // 忽略事务问题,可下回重建
|
||||
}
|
||||
log.Println("创建日志索引:", id, ",关键词:", s.word)
|
||||
log.Println("创建日志索引:", docId, ",关键词:", s.word)
|
||||
return nil
|
||||
}
|
||||
|
||||
// 按日志文档ID找索引位置(找不到返回0)
|
||||
func (s *WordIndexStorage) GetPosByDocId(id uint64) uint64 {
|
||||
keyDocId := fmt.Sprintf("d%d", id)
|
||||
idx, err := s.leveldb.Get(cmn.StringToBytes(keyDocId), nil)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
return cmn.BytesToUint64(idx)
|
||||
}
|
||||
|
||||
// 通过索引ID取日志ID(返回0表示有问题)
|
||||
func (s *WordIndexStorage) Get(id uint64) uint64 {
|
||||
if s.closing {
|
||||
@ -187,3 +206,11 @@ func onExit4WordIndexStorage() {
|
||||
}
|
||||
log.Println("退出WordIndexStorage")
|
||||
}
|
||||
|
||||
// 反向索引的子目录(多级目录散列处理避免冲突)
|
||||
func getIndexSubPath(word string) string {
|
||||
return "inverted" + cmn.PathSeparator() +
|
||||
cmn.HashAndMod(word, 100, "添油") + cmn.PathSeparator() +
|
||||
cmn.HashAndMod(word, 100, "加醋") + cmn.PathSeparator() +
|
||||
"k_" + cmn.HashAndMod(word, math.MaxUint32, "原味")
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user