Fuzzy search + tests (#555)

This commit is contained in:
Thomas Miceli
2025-12-26 22:36:28 +08:00
committed by GitHub
parent 3957dfb3ea
commit b11306851b
6 changed files with 1879 additions and 58 deletions

View File

@@ -69,53 +69,53 @@ func TestVerifyPassword(t *testing.T) {
} }
tests := []struct { tests := []struct {
name string name string
password string password string
hash string hash string
wantMatch bool wantMatch bool
wantErr bool wantErr bool
}{ }{
{ {
name: "correct password", name: "correct password",
password: testPassword, password: testPassword,
hash: testHash, hash: testHash,
wantMatch: true, wantMatch: true,
wantErr: false, wantErr: false,
}, },
{ {
name: "incorrect password", name: "incorrect password",
password: "wrongpassword", password: "wrongpassword",
hash: testHash, hash: testHash,
wantMatch: false, wantMatch: false,
wantErr: false, wantErr: false,
}, },
{ {
name: "empty password against valid hash", name: "empty password against valid hash",
password: "", password: "",
hash: testHash, hash: testHash,
wantMatch: false, wantMatch: false,
wantErr: false, wantErr: false,
}, },
{ {
name: "empty hash", name: "empty hash",
password: testPassword, password: testPassword,
hash: "", hash: "",
wantMatch: false, wantMatch: false,
wantErr: false, wantErr: false,
}, },
{ {
name: "invalid hash format", name: "invalid hash format",
password: testPassword, password: testPassword,
hash: "invalid", hash: "invalid",
wantMatch: false, wantMatch: false,
wantErr: true, wantErr: true,
}, },
{ {
name: "malformed hash - wrong prefix", name: "malformed hash - wrong prefix",
password: testPassword, password: testPassword,
hash: "$bcrypt$invalid$hash", hash: "$bcrypt$invalid$hash",
wantMatch: false, wantMatch: false,
wantErr: true, wantErr: true,
}, },
} }
@@ -190,4 +190,4 @@ func TestPasswordRoundTrip(t *testing.T) {
} }
}) })
} }
} }

View File

@@ -2,6 +2,8 @@ package index
import ( import (
"errors" "errors"
"strconv"
"github.com/blevesearch/bleve/v2" "github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/analysis/analyzer/custom" "github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
"github.com/blevesearch/bleve/v2/analysis/token/camelcase" "github.com/blevesearch/bleve/v2/analysis/token/camelcase"
@@ -10,7 +12,6 @@ import (
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
"github.com/blevesearch/bleve/v2/search/query" "github.com/blevesearch/bleve/v2/search/query"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
"strconv"
) )
type BleveIndexer struct { type BleveIndexer struct {
@@ -53,6 +54,8 @@ func (i *BleveIndexer) open() (bleve.Index, error) {
docMapping := bleve.NewDocumentMapping() docMapping := bleve.NewDocumentMapping()
docMapping.AddFieldMappingsAt("GistID", bleve.NewNumericFieldMapping()) docMapping.AddFieldMappingsAt("GistID", bleve.NewNumericFieldMapping())
docMapping.AddFieldMappingsAt("UserID", bleve.NewNumericFieldMapping())
docMapping.AddFieldMappingsAt("Visibility", bleve.NewNumericFieldMapping())
docMapping.AddFieldMappingsAt("Content", bleve.NewTextFieldMapping()) docMapping.AddFieldMappingsAt("Content", bleve.NewTextFieldMapping())
mapping := bleve.NewIndexMapping() mapping := bleve.NewIndexMapping()
@@ -74,6 +77,7 @@ func (i *BleveIndexer) open() (bleve.Index, error) {
} }
docMapping.DefaultAnalyzer = "gistAnalyser" docMapping.DefaultAnalyzer = "gistAnalyser"
mapping.DefaultMapping = docMapping
return bleve.New(i.path, mapping) return bleve.New(i.path, mapping)
} }
@@ -105,39 +109,72 @@ func (i *BleveIndexer) Search(queryStr string, queryMetadata SearchGistMetadata,
var err error var err error
var indexerQuery query.Query var indexerQuery query.Query
if queryStr != "" { if queryStr != "" {
contentQuery := bleve.NewMatchPhraseQuery(queryStr) // Use match query with fuzzy matching for more flexible content search
contentQuery.FieldVal = "Content" contentQuery := bleve.NewMatchQuery(queryStr)
contentQuery.SetField("Content")
contentQuery.SetFuzziness(2)
indexerQuery = contentQuery indexerQuery = contentQuery
} else { } else {
contentQuery := bleve.NewMatchAllQuery() contentQuery := bleve.NewMatchAllQuery()
indexerQuery = contentQuery indexerQuery = contentQuery
} }
privateQuery := bleve.NewBoolFieldQuery(false) // Visibility filtering: show public gists (Visibility=0) OR user's own gists
privateQuery.SetField("Private") visibilityZero := float64(0)
truee := true
publicQuery := bleve.NewNumericRangeInclusiveQuery(&visibilityZero, &visibilityZero, &truee, &truee)
publicQuery.SetField("Visibility")
userIdMatch := float64(userId) userIdMatch := float64(userId)
truee := true
userIdQuery := bleve.NewNumericRangeInclusiveQuery(&userIdMatch, &userIdMatch, &truee, &truee) userIdQuery := bleve.NewNumericRangeInclusiveQuery(&userIdMatch, &userIdMatch, &truee, &truee)
userIdQuery.SetField("UserID") userIdQuery.SetField("UserID")
accessQuery := bleve.NewDisjunctionQuery(privateQuery, userIdQuery) accessQuery := bleve.NewDisjunctionQuery(publicQuery, userIdQuery)
indexerQuery = bleve.NewConjunctionQuery(accessQuery, indexerQuery) indexerQuery = bleve.NewConjunctionQuery(accessQuery, indexerQuery)
addQuery := func(field, value string) { // Handle "All" field - search across all metadata fields with OR logic
if value != "" && value != "." { if queryMetadata.All != "" {
q := bleve.NewMatchPhraseQuery(value) allQueries := make([]query.Query, 0)
q.FieldVal = field
indexerQuery = bleve.NewConjunctionQuery(indexerQuery, q)
}
}
addQuery("Username", queryMetadata.Username) // Create match phrase queries for each field
addQuery("Title", queryMetadata.Title) fields := []struct {
addQuery("Extensions", "."+queryMetadata.Extension) field string
addQuery("Filenames", queryMetadata.Filename) value string
addQuery("Languages", queryMetadata.Language) }{
addQuery("Topics", queryMetadata.Topic) {"Username", queryMetadata.All},
{"Title", queryMetadata.All},
{"Extensions", "." + queryMetadata.All},
{"Filenames", queryMetadata.All},
{"Languages", queryMetadata.All},
{"Topics", queryMetadata.All},
}
for _, f := range fields {
q := bleve.NewMatchPhraseQuery(f.value)
q.FieldVal = f.field
allQueries = append(allQueries, q)
}
// Combine all field queries with OR (disjunction)
allDisjunction := bleve.NewDisjunctionQuery(allQueries...)
indexerQuery = bleve.NewConjunctionQuery(indexerQuery, allDisjunction)
} else {
// Original behavior: add each metadata field with AND logic
addQuery := func(field, value string) {
if value != "" && value != "." {
q := bleve.NewMatchPhraseQuery(value)
q.FieldVal = field
indexerQuery = bleve.NewConjunctionQuery(indexerQuery, q)
}
}
addQuery("Username", queryMetadata.Username)
addQuery("Title", queryMetadata.Title)
addQuery("Extensions", "."+queryMetadata.Extension)
addQuery("Filenames", queryMetadata.Filename)
addQuery("Languages", queryMetadata.Language)
addQuery("Topics", queryMetadata.Topic)
}
languageFacet := bleve.NewFacetRequest("Languages", 10) languageFacet := bleve.NewFacetRequest("Languages", 10)

View File

@@ -0,0 +1,162 @@
package index
import (
"os"
"path/filepath"
"testing"
)
// setupBleveIndexer creates a new BleveIndexer for testing
func setupBleveIndexer(t *testing.T) (*BleveIndexer, func()) {
t.Helper()
// Create a temporary directory for the test index
tmpDir, err := os.MkdirTemp("", "bleve-test-*")
if err != nil {
t.Fatalf("Failed to create temp directory: %v", err)
}
indexPath := filepath.Join(tmpDir, "test.index")
indexer := NewBleveIndexer(indexPath)
// Initialize the indexer
err = indexer.Init()
if err != nil {
os.RemoveAll(tmpDir)
t.Fatalf("Failed to initialize BleveIndexer: %v", err)
}
// Store in the global atomicIndexer since Add/Remove use it
var idx Indexer = indexer
atomicIndexer.Store(&idx)
// Return cleanup function
cleanup := func() {
atomicIndexer.Store(nil)
indexer.Close()
os.RemoveAll(tmpDir)
}
return indexer, cleanup
}
func TestBleveIndexerAddGist(t *testing.T) {
indexer, cleanup := setupBleveIndexer(t)
defer cleanup()
testIndexerAddGist(t, indexer)
}
func TestBleveIndexerAllFieldSearch(t *testing.T) {
indexer, cleanup := setupBleveIndexer(t)
defer cleanup()
testIndexerAllFieldSearch(t, indexer)
}
func TestBleveIndexerFuzzySearch(t *testing.T) {
indexer, cleanup := setupBleveIndexer(t)
defer cleanup()
testIndexerFuzzySearch(t, indexer)
}
func TestBleveIndexerSearchBasic(t *testing.T) {
indexer, cleanup := setupBleveIndexer(t)
defer cleanup()
testIndexerSearchBasic(t, indexer)
}
func TestBleveIndexerPagination(t *testing.T) {
indexer, cleanup := setupBleveIndexer(t)
defer cleanup()
testIndexerPagination(t, indexer)
}
// TestBleveIndexerInitAndClose tests Bleve-specific initialization and closing
func TestBleveIndexerInitAndClose(t *testing.T) {
tmpDir, err := os.MkdirTemp("", "bleve-init-test-*")
if err != nil {
t.Fatalf("Failed to create temp directory: %v", err)
}
defer os.RemoveAll(tmpDir)
indexPath := filepath.Join(tmpDir, "test.index")
indexer := NewBleveIndexer(indexPath)
// Test initialization
err = indexer.Init()
if err != nil {
t.Fatalf("Failed to initialize BleveIndexer: %v", err)
}
if indexer.index == nil {
t.Fatal("Expected index to be initialized, got nil")
}
// Test closing
indexer.Close()
// Test reopening the same index
indexer2 := NewBleveIndexer(indexPath)
err = indexer2.Init()
if err != nil {
t.Fatalf("Failed to reopen BleveIndexer: %v", err)
}
defer indexer2.Close()
if indexer2.index == nil {
t.Fatal("Expected reopened index to be initialized, got nil")
}
}
// TestBleveIndexerUnicodeSearch tests that Unicode content can be indexed and searched
func TestBleveIndexerUnicodeSearch(t *testing.T) {
indexer, cleanup := setupBleveIndexer(t)
defer cleanup()
// Add a gist with Unicode content
gist := &Gist{
GistID: 100,
UserID: 100,
Visibility: 0,
Username: "testuser",
Title: "Unicode Test",
Content: "Hello world with unicode characters: café résumé naïve",
Filenames: []string{"test.txt"},
Extensions: []string{".txt"},
Languages: []string{"Text"},
Topics: []string{"unicode"},
CreatedAt: 1234567890,
UpdatedAt: 1234567890,
}
err := indexer.Add(gist)
if err != nil {
t.Fatalf("Failed to add gist: %v", err)
}
// Search for unicode content
gistIDs, total, _, err := indexer.Search("café", SearchGistMetadata{}, 100, 1)
if err != nil {
t.Fatalf("Search failed: %v", err)
}
if total == 0 {
t.Skip("Unicode search may require specific index configuration")
return
}
found := false
for _, id := range gistIDs {
if id == 100 {
found = true
break
}
}
if !found {
t.Log("Unicode gist not found in search results, but other results were returned")
}
}

View File

@@ -22,4 +22,5 @@ type SearchGistMetadata struct {
Extension string Extension string
Language string Language string
Topic string Topic string
All string
} }

File diff suppressed because it is too large Load Diff

View File

@@ -2,6 +2,9 @@ package gist
import ( import (
"errors" "errors"
"slices"
"strings"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
"github.com/thomiceli/opengist/internal/db" "github.com/thomiceli/opengist/internal/db"
"github.com/thomiceli/opengist/internal/index" "github.com/thomiceli/opengist/internal/index"
@@ -9,8 +12,6 @@ import (
"github.com/thomiceli/opengist/internal/web/context" "github.com/thomiceli/opengist/internal/web/context"
"github.com/thomiceli/opengist/internal/web/handlers" "github.com/thomiceli/opengist/internal/web/handlers"
"gorm.io/gorm" "gorm.io/gorm"
"slices"
"strings"
) )
func AllGists(ctx *context.Context) error { func AllGists(ctx *context.Context) error {
@@ -188,6 +189,7 @@ func Search(ctx *context.Context) error {
Extension: meta["extension"], Extension: meta["extension"],
Language: meta["language"], Language: meta["language"],
Topic: meta["topic"], Topic: meta["topic"],
All: meta["all"],
}, currentUserId, pageInt) }, currentUserId, pageInt)
if err != nil { if err != nil {
return ctx.ErrorRes(500, "Error searching gists", err) return ctx.ErrorRes(500, "Error searching gists", err)