diff --git a/README.md b/README.md index 4b9b547..8553eb8 100644 --- a/README.md +++ b/README.md @@ -173,14 +173,17 @@ func main() { ### 开发版`latest` -- [ ] 增加时间范围检索条件 -- [ ] 界面优化 - [ ] 多语言 - [ ] 分词优化 - [ ] 日志审计 - [ ] 集群支持动态删减节点(或是页面管理删除) +### 版本`0.8.8` + +- [x] 增加时间范围检索条件 +- [x] 界面进一步简化优化 + ### 版本`0.8.7` - [x] 修复:增加特殊字符转换处理,避免日志中的html标签字样无法显示 diff --git a/glc/ldb/engine.go b/glc/ldb/engine.go index 0127614..58c0060 100644 --- a/glc/ldb/engine.go +++ b/glc/ldb/engine.go @@ -43,7 +43,7 @@ func (e *Engine) AddTextLog(date string, logText string, system string) { e.logStorage.AddTextLog(date, logText, system) } -func (e *Engine) Search(searchKey string, pageSize int, currentDocId uint32, forward bool) *search.SearchResult { +func (e *Engine) Search(searchKey string, minDatetime string, maxDatetime string, pageSize int, currentDocId uint32, forward bool) *search.SearchResult { // 检查修正pageSize if pageSize < 1 { @@ -76,11 +76,11 @@ func (e *Engine) Search(searchKey string, pageSize int, currentDocId uint32, for if len(kws) == 0 { // 无条件浏览模式 - return search.SearchLogData(e.storeName, pageSize, currentDocId, forward) + return search.SearchLogData(e.storeName, pageSize, currentDocId, forward, minDatetime, maxDatetime) } // 多关键词查询模式 - return search.SearchWordIndex(e.storeName, kws, pageSize, currentDocId, forward) + return search.SearchWordIndex(e.storeName, kws, pageSize, currentDocId, forward, minDatetime, maxDatetime) } // 添加日志 diff --git a/glc/ldb/engine_test.go b/glc/ldb/engine_test.go index da06d87..766d685 100644 --- a/glc/ldb/engine_test.go +++ b/glc/ldb/engine_test.go @@ -46,7 +46,7 @@ func Test_all(t *testing.T) { // } // time.Sleep(time.Duration(5) * time.Second) - rs := engine.Search(` them java `, 5, 0, true) + rs := engine.Search(` them java `, "", "", 5, 0, true) cmn.Println("共查到", rs.Total, "件") for _, v := range rs.Data { cmn.Println(v.Id, v.Text) diff --git a/glc/ldb/search/search_keys.go b/glc/ldb/search/search_keys.go index 63f1e4e..8fecec4 100644 --- a/glc/ldb/search/search_keys.go +++ b/glc/ldb/search/search_keys.go @@ -11,6 +11,7 @@ import ( "glc/ldb/storage/indexdoc" "glc/ldb/storage/indexword" "glc/ldb/storage/logdata" + "strings" "github.com/gotoeasy/glang/cmn" ) @@ -28,8 +29,41 @@ type WidxStorage struct { } // 多关键词时计算关键词索引交集 -func SearchWordIndex(storeName string, kws []string, pageSize int, currentDocId uint32, forward bool) *SearchResult { +func SearchWordIndex(storeName string, kws []string, pageSize int, currentDocId uint32, forward bool, minDatetime string, maxDatetime string) *SearchResult { storeLogData := storage.NewLogDataStorageHandle(storeName) // 数据 + + // 时间条件范围判断,默认全部,有检索条件时调整范围 + maxDocumentId := storeLogData.TotalCount() // 时间范围条件内的最大文档ID + minDocumentId := cmn.StringToUint32("1", 1) // 时间范围条件内的最小文档ID + if !cmn.IsBlank(minDatetime) { + minDocumentId = findMinDocumentIdByDatetime(storeLogData, minDocumentId, maxDocumentId, minDatetime) // 时间范围条件内的最小文档ID,找不到时返回0 + if minDocumentId == 0 { + // 简单判断,无匹配时直接返回 + var rs = new(SearchResult) + rs.Total = cmn.Uint32ToString(storeLogData.TotalCount()) + rs.Count = "0" + return rs + } + } + if !cmn.IsBlank(maxDatetime) { + maxDocumentId = findMaxDocumentIdByDatetime(storeLogData, minDocumentId, maxDocumentId, maxDatetime) // 时间范围条件内的最大文档ID,找不到时返回0 + if maxDocumentId == 0 { + // 简单判断,无匹配时直接返回 + var rs = new(SearchResult) + rs.Total = cmn.Uint32ToString(storeLogData.TotalCount()) + rs.Count = "0" + return rs + } + } + if minDocumentId > maxDocumentId { + // 简单判断,无匹配时直接返回 + var rs = new(SearchResult) + rs.Total = cmn.Uint32ToString(storeLogData.TotalCount()) + rs.Count = "0" + return rs + } + + // 汇总索引进行关联查找 var widxs []*WidxStorage for _, word := range kws { widxStorage := &WidxStorage{ @@ -39,32 +73,76 @@ func SearchWordIndex(storeName string, kws []string, pageSize int, currentDocId } widxs = append(widxs, widxStorage) } - return findSame(pageSize, currentDocId, forward, storeLogData, widxs...) + return findSame(pageSize, currentDocId, forward, minDocumentId, maxDocumentId, storeLogData, widxs...) } // 无关键词时走全量检索 -func SearchLogData(storeName string, pageSize int, currentDocId uint32, forward bool) *SearchResult { +func SearchLogData(storeName string, pageSize int, currentDocId uint32, forward bool, minDatetime string, maxDatetime string) *SearchResult { var rs = new(SearchResult) // 检索结果 storeLogData := storage.NewLogDataStorageHandle(storeName) // 数据 totalCount := storeLogData.TotalCount() // 总件数 - rs.Total = cmn.Uint32ToString(totalCount) // 返回的总件数用10进制字符串形式以避免出现科学计数法 + rs.Total = cmn.Uint32ToString(totalCount) // 返回的日志总量件数,用10进制字符串形式以避免出现科学计数法 rs.Count = cmn.Uint32ToString(totalCount) // 当前条件最多匹配件数 if totalCount == 0 { return rs } + // 时间条件范围判断,默认全部,有检索条件时调整范围 + maxDocumentId := totalCount // 时间范围条件内的最大文档ID + minDocumentId := cmn.StringToUint32("1", 1) // 时间范围条件内的最小文档ID + hasMin := !cmn.IsBlank(minDatetime) + hasMax := !cmn.IsBlank(maxDatetime) + if hasMin { + minDocumentId = findMinDocumentIdByDatetime(storeLogData, minDocumentId, maxDocumentId, minDatetime) // 时间范围条件内的最小文档ID + if minDocumentId == 0 { + // 简单判断,无匹配时直接返回 + var rs = new(SearchResult) + rs.Total = "0" + rs.Count = "0" + return rs + } + } + if hasMax { + maxDocumentId = findMaxDocumentIdByDatetime(storeLogData, minDocumentId, maxDocumentId, maxDatetime) // 时间范围条件内的最大文档ID + if maxDocumentId == 0 { + // 简单判断,无匹配时直接返回 + var rs = new(SearchResult) + rs.Total = "0" + rs.Count = "0" + return rs + } + } + if hasMax || hasMin { + if minDocumentId > maxDocumentId { + // 简单判断,无匹配时直接返回 + rs.Count = "0" + return rs + } + rs.Count = cmn.Uint32ToString(maxDocumentId - minDocumentId + 1) // 估算的最大匹配件数 + } + + // 开始检索 if currentDocId == 0 { // 第一页 var min, max uint32 max = totalCount + + if max > maxDocumentId { + max = maxDocumentId // 最大不超出时间范围限制内的最大文档ID + } + if max > uint32(pageSize) { min = max - uint32(pageSize) + 1 } else { min = 1 } + if min < minDocumentId { + min = minDocumentId // 最小不超出时间范围限制内的最小文档ID + } + for i := max; i >= min; i-- { rs.Data = append(rs.Data, storeLogData.GetLogDataDocument(i).ToLogDataModel()) // 件数等同日志文档ID } @@ -77,12 +155,21 @@ func SearchLogData(storeName string, pageSize int, currentDocId uint32, forward } else { max = currentDocId - 1 } + + if max > maxDocumentId { + max = maxDocumentId // 最大不超出时间范围限制内的最大文档ID + } + if max > uint32(pageSize) { min = max - uint32(pageSize) + 1 } else { min = 1 } + if min < minDocumentId { + min = minDocumentId // 最小不超出时间范围限制内的最小文档ID + } + for i := max; i >= min; i-- { rs.Data = append(rs.Data, storeLogData.GetLogDataDocument(i).ToLogDataModel()) } @@ -92,11 +179,20 @@ func SearchLogData(storeName string, pageSize int, currentDocId uint32, forward if totalCount > currentDocId { var min, max uint32 min = currentDocId + 1 + + if min < minDocumentId { + min = minDocumentId // 最小不超出时间范围限制内的最小文档ID + } + max = min + uint32(pageSize) - 1 if max > totalCount { max = totalCount } + if max > maxDocumentId { + max = maxDocumentId // 最大不超出时间范围限制内的最大文档ID + } + for i := max; i >= min; i-- { rs.Data = append(rs.Data, storeLogData.GetLogDataDocument(i).ToLogDataModel()) } @@ -107,7 +203,7 @@ func SearchLogData(storeName string, pageSize int, currentDocId uint32, forward } // 参数widxs长度要求大于1,currentDocId不传就是查第一页 -func findSame(pageSize int, currentDocId uint32, forward bool, storeLogData *storage.LogDataStorageHandle, widxs ...*WidxStorage) *SearchResult { +func findSame(pageSize int, currentDocId uint32, forward bool, minDocumentId uint32, maxDocumentId uint32, storeLogData *storage.LogDataStorageHandle, widxs ...*WidxStorage) *SearchResult { var rs = new(SearchResult) rs.Total = cmn.Uint32ToString(storeLogData.TotalCount()) // 日志总量件数 @@ -123,6 +219,9 @@ func findSame(pageSize int, currentDocId uint32, forward bool, storeLogData *sto minIdx = widxs[i] } } + if minCount > maxDocumentId-minDocumentId+1 { + minCount = maxDocumentId - minDocumentId + 1 // 最多匹配件数估算,不会超出时间条件范围,两者取其小 + } rs.Count = cmn.Uint32ToString(minCount) // 当前条件最多匹配件数 // 简单检查排除没结果的情景 @@ -154,29 +253,31 @@ func findSame(pageSize int, currentDocId uint32, forward bool, storeLogData *sto for i := tmpMinPos; i > 0; { // 取值 docId := minIdx.idxwordStorage.GetDocId(minIdx.word, i) - // 比较 - flg = true - for n := 0; n < cnt; n++ { - if widxs[n] == minIdx { - continue // 跳过比较自己 - } + if docId >= minDocumentId && docId <= maxDocumentId { + // 在时间范围条件内时,继续查找比较 + flg = true + for n := 0; n < cnt; n++ { + if widxs[n] == minIdx { + continue // 跳过比较自己 + } - seq := widxs[n].idxdocStorage.GetWordDocSeq(widxs[n].word, docId) - if seq == 0 { - flg = false // 没找到 - break + seq := widxs[n].idxdocStorage.GetWordDocSeq(widxs[n].word, docId) + if seq == 0 { + flg = false // 没找到 + break + } + if seq < tmpMinPos { + tmpMinPos = seq + tmpMinIdx = widxs[n] // 当前最短索引,存起来下回比较用 + } } - if seq < tmpMinPos { - tmpMinPos = seq - tmpMinIdx = widxs[n] // 当前最短索引,存起来下回比较用 - } - } - // 找到则加入结果 - if flg { - rsCnt++ - rs.Data = append(rs.Data, storeLogData.GetLogDataModel(docId)) - if rsCnt >= pageSize { - break // 最多找一页 + // 找到则加入结果 + if flg { + rsCnt++ + rs.Data = append(rs.Data, storeLogData.GetLogDataModel(docId)) + if rsCnt >= pageSize { + break // 最多找一页 + } } } @@ -191,6 +292,11 @@ func findSame(pageSize int, currentDocId uint32, forward bool, storeLogData *sto for i := pos; i <= totalCount; i++ { // 取值 docId := minIdx.idxwordStorage.GetDocId(minIdx.word, i) + + if docId < minDocumentId || docId > maxDocumentId { + continue // 不在时间范围条件内,不匹配,跳过 + } + // 比较 flg = true for i := 0; i < cnt; i++ { @@ -218,6 +324,61 @@ func findSame(pageSize int, currentDocId uint32, forward bool, storeLogData *sto } } - rs.Total = cmn.Uint32ToString(storeLogData.TotalCount()) return rs } + +// 查找满足最小时间范围的最小文档id +func findMinDocumentIdByDatetime(storeLogData *storage.LogDataStorageHandle, uiMin uint32, uiMax uint32, minDatetime string) uint32 { + if strings.Compare(minDatetime+".000", storeLogData.GetLogDataDocument(uiMin).ToLogDataModel().Date) <= 0 { + return uiMin // 边界外输入条件常发生,特殊照顾确认边界,一定程度提高性能 + } + + rs := cmn.StringToUint32("0", 0) + min := uiMin + 1 // 参数的最小已检查,跳过 + max := uiMax + for min <= max { + left, rigth, flg, target := findGE(storeLogData, min, max, minDatetime) + min = left + max = rigth + if flg { + rs = target + } + } + return rs +} + +func findGE(storeLogData *storage.LogDataStorageHandle, min uint32, max uint32, minDatetime string) (uint32, uint32, bool, uint32) { + middle := (min + max) / 2 + if strings.Compare(minDatetime+".000", storeLogData.GetLogDataDocument(middle).ToLogDataModel().Date) <= 0 { + return min, middle - 1, true, middle // 能匹配(middle的日时>=minDatetime),但不一定是最小匹配,继续返回下次待查找的范围 + } + return middle + 1, max, false, 0 // 不匹配(middle的日时=middle的日时),但不一定是最小匹配,继续返回下次待查找的范围 + } + + rs := cmn.StringToUint32("0", 0) + min := uiMin + max := uiMax - 1 // 参数的最大已检查,跳过 + for min <= max { + left, rigth, flg, target := findLE(storeLogData, min, max, maxDatetime) + min = left + max = rigth + if flg { + rs = target + } + } + return rs +} + +func findLE(storeLogData *storage.LogDataStorageHandle, min uint32, max uint32, maxDatetime string) (uint32, uint32, bool, uint32) { + middle := (min + max) / 2 + if strings.Compare(storeLogData.GetLogDataDocument(middle).ToLogDataModel().Date, maxDatetime+".999") <= 0 { + return middle + 1, max, true, middle // 能匹配(maxDatetime>=middle的日时),但不一定是最小匹配,继续返回下次待查找的范围 + } + return min, middle - 1, false, 0 // 不匹配(maxDatetime

- +