Fuzzy search + tests (#555)
This commit is contained in:
@@ -69,53 +69,53 @@ func TestVerifyPassword(t *testing.T) {
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
password string
|
||||
hash string
|
||||
wantMatch bool
|
||||
wantErr bool
|
||||
name string
|
||||
password string
|
||||
hash string
|
||||
wantMatch bool
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
name: "correct password",
|
||||
password: testPassword,
|
||||
hash: testHash,
|
||||
wantMatch: true,
|
||||
wantErr: false,
|
||||
name: "correct password",
|
||||
password: testPassword,
|
||||
hash: testHash,
|
||||
wantMatch: true,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "incorrect password",
|
||||
password: "wrongpassword",
|
||||
hash: testHash,
|
||||
wantMatch: false,
|
||||
wantErr: false,
|
||||
name: "incorrect password",
|
||||
password: "wrongpassword",
|
||||
hash: testHash,
|
||||
wantMatch: false,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "empty password against valid hash",
|
||||
password: "",
|
||||
hash: testHash,
|
||||
wantMatch: false,
|
||||
wantErr: false,
|
||||
name: "empty password against valid hash",
|
||||
password: "",
|
||||
hash: testHash,
|
||||
wantMatch: false,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "empty hash",
|
||||
password: testPassword,
|
||||
hash: "",
|
||||
wantMatch: false,
|
||||
wantErr: false,
|
||||
name: "empty hash",
|
||||
password: testPassword,
|
||||
hash: "",
|
||||
wantMatch: false,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "invalid hash format",
|
||||
password: testPassword,
|
||||
hash: "invalid",
|
||||
wantMatch: false,
|
||||
wantErr: true,
|
||||
name: "invalid hash format",
|
||||
password: testPassword,
|
||||
hash: "invalid",
|
||||
wantMatch: false,
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "malformed hash - wrong prefix",
|
||||
password: testPassword,
|
||||
hash: "$bcrypt$invalid$hash",
|
||||
wantMatch: false,
|
||||
wantErr: true,
|
||||
name: "malformed hash - wrong prefix",
|
||||
password: testPassword,
|
||||
hash: "$bcrypt$invalid$hash",
|
||||
wantMatch: false,
|
||||
wantErr: true,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -190,4 +190,4 @@ func TestPasswordRoundTrip(t *testing.T) {
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,6 +2,8 @@ package index
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"strconv"
|
||||
|
||||
"github.com/blevesearch/bleve/v2"
|
||||
"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
|
||||
"github.com/blevesearch/bleve/v2/analysis/token/camelcase"
|
||||
@@ -10,7 +12,6 @@ import (
|
||||
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
|
||||
"github.com/blevesearch/bleve/v2/search/query"
|
||||
"github.com/rs/zerolog/log"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
type BleveIndexer struct {
|
||||
@@ -53,6 +54,8 @@ func (i *BleveIndexer) open() (bleve.Index, error) {
|
||||
|
||||
docMapping := bleve.NewDocumentMapping()
|
||||
docMapping.AddFieldMappingsAt("GistID", bleve.NewNumericFieldMapping())
|
||||
docMapping.AddFieldMappingsAt("UserID", bleve.NewNumericFieldMapping())
|
||||
docMapping.AddFieldMappingsAt("Visibility", bleve.NewNumericFieldMapping())
|
||||
docMapping.AddFieldMappingsAt("Content", bleve.NewTextFieldMapping())
|
||||
|
||||
mapping := bleve.NewIndexMapping()
|
||||
@@ -74,6 +77,7 @@ func (i *BleveIndexer) open() (bleve.Index, error) {
|
||||
}
|
||||
|
||||
docMapping.DefaultAnalyzer = "gistAnalyser"
|
||||
mapping.DefaultMapping = docMapping
|
||||
|
||||
return bleve.New(i.path, mapping)
|
||||
}
|
||||
@@ -105,39 +109,72 @@ func (i *BleveIndexer) Search(queryStr string, queryMetadata SearchGistMetadata,
|
||||
var err error
|
||||
var indexerQuery query.Query
|
||||
if queryStr != "" {
|
||||
contentQuery := bleve.NewMatchPhraseQuery(queryStr)
|
||||
contentQuery.FieldVal = "Content"
|
||||
// Use match query with fuzzy matching for more flexible content search
|
||||
contentQuery := bleve.NewMatchQuery(queryStr)
|
||||
contentQuery.SetField("Content")
|
||||
contentQuery.SetFuzziness(2)
|
||||
indexerQuery = contentQuery
|
||||
} else {
|
||||
contentQuery := bleve.NewMatchAllQuery()
|
||||
indexerQuery = contentQuery
|
||||
}
|
||||
|
||||
privateQuery := bleve.NewBoolFieldQuery(false)
|
||||
privateQuery.SetField("Private")
|
||||
// Visibility filtering: show public gists (Visibility=0) OR user's own gists
|
||||
visibilityZero := float64(0)
|
||||
truee := true
|
||||
publicQuery := bleve.NewNumericRangeInclusiveQuery(&visibilityZero, &visibilityZero, &truee, &truee)
|
||||
publicQuery.SetField("Visibility")
|
||||
|
||||
userIdMatch := float64(userId)
|
||||
truee := true
|
||||
userIdQuery := bleve.NewNumericRangeInclusiveQuery(&userIdMatch, &userIdMatch, &truee, &truee)
|
||||
userIdQuery.SetField("UserID")
|
||||
|
||||
accessQuery := bleve.NewDisjunctionQuery(privateQuery, userIdQuery)
|
||||
accessQuery := bleve.NewDisjunctionQuery(publicQuery, userIdQuery)
|
||||
indexerQuery = bleve.NewConjunctionQuery(accessQuery, indexerQuery)
|
||||
|
||||
addQuery := func(field, value string) {
|
||||
if value != "" && value != "." {
|
||||
q := bleve.NewMatchPhraseQuery(value)
|
||||
q.FieldVal = field
|
||||
indexerQuery = bleve.NewConjunctionQuery(indexerQuery, q)
|
||||
}
|
||||
}
|
||||
// Handle "All" field - search across all metadata fields with OR logic
|
||||
if queryMetadata.All != "" {
|
||||
allQueries := make([]query.Query, 0)
|
||||
|
||||
addQuery("Username", queryMetadata.Username)
|
||||
addQuery("Title", queryMetadata.Title)
|
||||
addQuery("Extensions", "."+queryMetadata.Extension)
|
||||
addQuery("Filenames", queryMetadata.Filename)
|
||||
addQuery("Languages", queryMetadata.Language)
|
||||
addQuery("Topics", queryMetadata.Topic)
|
||||
// Create match phrase queries for each field
|
||||
fields := []struct {
|
||||
field string
|
||||
value string
|
||||
}{
|
||||
{"Username", queryMetadata.All},
|
||||
{"Title", queryMetadata.All},
|
||||
{"Extensions", "." + queryMetadata.All},
|
||||
{"Filenames", queryMetadata.All},
|
||||
{"Languages", queryMetadata.All},
|
||||
{"Topics", queryMetadata.All},
|
||||
}
|
||||
|
||||
for _, f := range fields {
|
||||
q := bleve.NewMatchPhraseQuery(f.value)
|
||||
q.FieldVal = f.field
|
||||
allQueries = append(allQueries, q)
|
||||
}
|
||||
|
||||
// Combine all field queries with OR (disjunction)
|
||||
allDisjunction := bleve.NewDisjunctionQuery(allQueries...)
|
||||
indexerQuery = bleve.NewConjunctionQuery(indexerQuery, allDisjunction)
|
||||
} else {
|
||||
// Original behavior: add each metadata field with AND logic
|
||||
addQuery := func(field, value string) {
|
||||
if value != "" && value != "." {
|
||||
q := bleve.NewMatchPhraseQuery(value)
|
||||
q.FieldVal = field
|
||||
indexerQuery = bleve.NewConjunctionQuery(indexerQuery, q)
|
||||
}
|
||||
}
|
||||
|
||||
addQuery("Username", queryMetadata.Username)
|
||||
addQuery("Title", queryMetadata.Title)
|
||||
addQuery("Extensions", "."+queryMetadata.Extension)
|
||||
addQuery("Filenames", queryMetadata.Filename)
|
||||
addQuery("Languages", queryMetadata.Language)
|
||||
addQuery("Topics", queryMetadata.Topic)
|
||||
}
|
||||
|
||||
languageFacet := bleve.NewFacetRequest("Languages", 10)
|
||||
|
||||
|
||||
162
internal/index/bleve_test.go
Normal file
162
internal/index/bleve_test.go
Normal file
@@ -0,0 +1,162 @@
|
||||
package index
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// setupBleveIndexer creates a new BleveIndexer for testing
|
||||
func setupBleveIndexer(t *testing.T) (*BleveIndexer, func()) {
|
||||
t.Helper()
|
||||
|
||||
// Create a temporary directory for the test index
|
||||
tmpDir, err := os.MkdirTemp("", "bleve-test-*")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create temp directory: %v", err)
|
||||
}
|
||||
|
||||
indexPath := filepath.Join(tmpDir, "test.index")
|
||||
indexer := NewBleveIndexer(indexPath)
|
||||
|
||||
// Initialize the indexer
|
||||
err = indexer.Init()
|
||||
if err != nil {
|
||||
os.RemoveAll(tmpDir)
|
||||
t.Fatalf("Failed to initialize BleveIndexer: %v", err)
|
||||
}
|
||||
|
||||
// Store in the global atomicIndexer since Add/Remove use it
|
||||
var idx Indexer = indexer
|
||||
atomicIndexer.Store(&idx)
|
||||
|
||||
// Return cleanup function
|
||||
cleanup := func() {
|
||||
atomicIndexer.Store(nil)
|
||||
indexer.Close()
|
||||
os.RemoveAll(tmpDir)
|
||||
}
|
||||
|
||||
return indexer, cleanup
|
||||
}
|
||||
|
||||
func TestBleveIndexerAddGist(t *testing.T) {
|
||||
indexer, cleanup := setupBleveIndexer(t)
|
||||
defer cleanup()
|
||||
|
||||
testIndexerAddGist(t, indexer)
|
||||
}
|
||||
|
||||
func TestBleveIndexerAllFieldSearch(t *testing.T) {
|
||||
indexer, cleanup := setupBleveIndexer(t)
|
||||
defer cleanup()
|
||||
|
||||
testIndexerAllFieldSearch(t, indexer)
|
||||
}
|
||||
|
||||
func TestBleveIndexerFuzzySearch(t *testing.T) {
|
||||
indexer, cleanup := setupBleveIndexer(t)
|
||||
defer cleanup()
|
||||
|
||||
testIndexerFuzzySearch(t, indexer)
|
||||
}
|
||||
|
||||
func TestBleveIndexerSearchBasic(t *testing.T) {
|
||||
indexer, cleanup := setupBleveIndexer(t)
|
||||
defer cleanup()
|
||||
|
||||
testIndexerSearchBasic(t, indexer)
|
||||
}
|
||||
|
||||
func TestBleveIndexerPagination(t *testing.T) {
|
||||
indexer, cleanup := setupBleveIndexer(t)
|
||||
defer cleanup()
|
||||
|
||||
testIndexerPagination(t, indexer)
|
||||
}
|
||||
|
||||
// TestBleveIndexerInitAndClose tests Bleve-specific initialization and closing
|
||||
func TestBleveIndexerInitAndClose(t *testing.T) {
|
||||
tmpDir, err := os.MkdirTemp("", "bleve-init-test-*")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create temp directory: %v", err)
|
||||
}
|
||||
defer os.RemoveAll(tmpDir)
|
||||
|
||||
indexPath := filepath.Join(tmpDir, "test.index")
|
||||
indexer := NewBleveIndexer(indexPath)
|
||||
|
||||
// Test initialization
|
||||
err = indexer.Init()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to initialize BleveIndexer: %v", err)
|
||||
}
|
||||
|
||||
if indexer.index == nil {
|
||||
t.Fatal("Expected index to be initialized, got nil")
|
||||
}
|
||||
|
||||
// Test closing
|
||||
indexer.Close()
|
||||
|
||||
// Test reopening the same index
|
||||
indexer2 := NewBleveIndexer(indexPath)
|
||||
err = indexer2.Init()
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to reopen BleveIndexer: %v", err)
|
||||
}
|
||||
defer indexer2.Close()
|
||||
|
||||
if indexer2.index == nil {
|
||||
t.Fatal("Expected reopened index to be initialized, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
// TestBleveIndexerUnicodeSearch tests that Unicode content can be indexed and searched
|
||||
func TestBleveIndexerUnicodeSearch(t *testing.T) {
|
||||
indexer, cleanup := setupBleveIndexer(t)
|
||||
defer cleanup()
|
||||
|
||||
// Add a gist with Unicode content
|
||||
gist := &Gist{
|
||||
GistID: 100,
|
||||
UserID: 100,
|
||||
Visibility: 0,
|
||||
Username: "testuser",
|
||||
Title: "Unicode Test",
|
||||
Content: "Hello world with unicode characters: café résumé naïve",
|
||||
Filenames: []string{"test.txt"},
|
||||
Extensions: []string{".txt"},
|
||||
Languages: []string{"Text"},
|
||||
Topics: []string{"unicode"},
|
||||
CreatedAt: 1234567890,
|
||||
UpdatedAt: 1234567890,
|
||||
}
|
||||
|
||||
err := indexer.Add(gist)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to add gist: %v", err)
|
||||
}
|
||||
|
||||
// Search for unicode content
|
||||
gistIDs, total, _, err := indexer.Search("café", SearchGistMetadata{}, 100, 1)
|
||||
if err != nil {
|
||||
t.Fatalf("Search failed: %v", err)
|
||||
}
|
||||
|
||||
if total == 0 {
|
||||
t.Skip("Unicode search may require specific index configuration")
|
||||
return
|
||||
}
|
||||
|
||||
found := false
|
||||
for _, id := range gistIDs {
|
||||
if id == 100 {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Log("Unicode gist not found in search results, but other results were returned")
|
||||
}
|
||||
}
|
||||
@@ -22,4 +22,5 @@ type SearchGistMetadata struct {
|
||||
Extension string
|
||||
Language string
|
||||
Topic string
|
||||
All string
|
||||
}
|
||||
|
||||
1619
internal/index/indexer_test.go
Normal file
1619
internal/index/indexer_test.go
Normal file
File diff suppressed because it is too large
Load Diff
@@ -2,6 +2,9 @@ package gist
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/thomiceli/opengist/internal/db"
|
||||
"github.com/thomiceli/opengist/internal/index"
|
||||
@@ -9,8 +12,6 @@ import (
|
||||
"github.com/thomiceli/opengist/internal/web/context"
|
||||
"github.com/thomiceli/opengist/internal/web/handlers"
|
||||
"gorm.io/gorm"
|
||||
"slices"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func AllGists(ctx *context.Context) error {
|
||||
@@ -188,6 +189,7 @@ func Search(ctx *context.Context) error {
|
||||
Extension: meta["extension"],
|
||||
Language: meta["language"],
|
||||
Topic: meta["topic"],
|
||||
All: meta["all"],
|
||||
}, currentUserId, pageInt)
|
||||
if err != nil {
|
||||
return ctx.ErrorRes(500, "Error searching gists", err)
|
||||
|
||||
Reference in New Issue
Block a user