diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index bf0a660..e8743bd 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -83,6 +83,18 @@ jobs: --health-interval 10s --health-timeout 5s --health-retries 5 + meilisearch: + image: getmeili/meilisearch:latest + ports: + - 47700:7700 + env: + MEILI_NO_ANALYTICS: true + MEILI_ENV: development + options: >- + --health-cmd "curl -sf http://localhost:7700/health" + --health-interval 10s + --health-timeout 5s + --health-retries 5 steps: - name: Checkout uses: actions/checkout@v6 @@ -94,6 +106,8 @@ jobs: - name: Run tests run: make test TEST_DB_TYPE=${{ matrix.database }} + env: + OG_TEST_MEILI_HOST: http://localhost:47700 test: name: Test diff --git a/internal/index/bleve.go b/internal/index/bleve.go index adb9171..98fbab5 100644 --- a/internal/index/bleve.go +++ b/internal/index/bleve.go @@ -10,6 +10,7 @@ import ( "github.com/blevesearch/bleve/v2" "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" "github.com/blevesearch/bleve/v2/analysis/token/camelcase" + "github.com/blevesearch/bleve/v2/analysis/token/length" "github.com/blevesearch/bleve/v2/analysis/token/lowercase" "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm" "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" @@ -56,14 +57,9 @@ func (i *BleveIndexer) open() (bleve.Index, error) { return nil, err } - docMapping := bleve.NewDocumentMapping() - docMapping.AddFieldMappingsAt("GistID", bleve.NewNumericFieldMapping()) - docMapping.AddFieldMappingsAt("UserID", bleve.NewNumericFieldMapping()) - docMapping.AddFieldMappingsAt("Visibility", bleve.NewNumericFieldMapping()) - docMapping.AddFieldMappingsAt("Content", bleve.NewTextFieldMapping()) - mapping := bleve.NewIndexMapping() + // Token filters if err = mapping.AddCustomTokenFilter("unicodeNormalize", map[string]any{ "type": unicodenorm.Name, "form": unicodenorm.NFC, @@ -71,16 +67,74 @@ func (i *BleveIndexer) open() (bleve.Index, error) { return nil, err } - if err = mapping.AddCustomAnalyzer("gistAnalyser", map[string]interface{}{ - "type": custom.Name, - "char_filters": []string{}, - "tokenizer": unicode.Name, - "token_filters": []string{"unicodeNormalize", camelcase.Name, lowercase.Name}, + if err = mapping.AddCustomTokenFilter("lengthMin2", map[string]interface{}{ + "type": length.Name, + "min": 2.0, }); err != nil { return nil, err } - docMapping.DefaultAnalyzer = "gistAnalyser" + // Analyzer: split mode (camelCase splitting for partial search) + // "CPUCard" -> ["cpu", "card"] + if err = mapping.AddCustomAnalyzer("codeSplit", map[string]interface{}{ + "type": custom.Name, + "char_filters": []string{}, + "tokenizer": unicode.Name, + "token_filters": []string{"unicodeNormalize", camelcase.Name, lowercase.Name, "lengthMin2"}, + }); err != nil { + return nil, err + } + + // Analyzer: exact mode (no camelCase splitting for full-word search) + // "CPUCard" -> ["cpucard"] + if err = mapping.AddCustomAnalyzer("codeExact", map[string]interface{}{ + "type": custom.Name, + "char_filters": []string{}, + "tokenizer": unicode.Name, + "token_filters": []string{"unicodeNormalize", lowercase.Name}, + }); err != nil { + return nil, err + } + + // Analyzer: keyword with lowercase (for Languages - single token, no splitting) + if err = mapping.AddCustomAnalyzer("lowercaseKeyword", map[string]interface{}{ + "type": custom.Name, + "char_filters": []string{}, + "tokenizer": "single", + "token_filters": []string{lowercase.Name}, + }); err != nil { + return nil, err + } + + // Document mapping + docMapping := bleve.NewDocumentMapping() + docMapping.AddFieldMappingsAt("GistID", bleve.NewNumericFieldMapping()) + docMapping.AddFieldMappingsAt("UserID", bleve.NewNumericFieldMapping()) + docMapping.AddFieldMappingsAt("Visibility", bleve.NewNumericFieldMapping()) + + // Content: dual indexing (exact + split) + // "Content" uses the property name so Bleve resolves its analyzer correctly + contentExact := bleve.NewTextFieldMapping() + contentExact.Name = "Content" + contentExact.Analyzer = "codeExact" + contentExact.Store = false + contentExact.IncludeTermVectors = true + + contentSplit := bleve.NewTextFieldMapping() + contentSplit.Name = "ContentSplit" + contentSplit.Analyzer = "codeSplit" + contentSplit.Store = false + contentSplit.IncludeTermVectors = true + + docMapping.AddFieldMappingsAt("Content", contentExact, contentSplit) + + // Languages: keyword analyzer (preserves as single token) + languageFieldMapping := bleve.NewTextFieldMapping() + languageFieldMapping.Analyzer = "lowercaseKeyword" + docMapping.AddFieldMappingsAt("Languages", languageFieldMapping) + + // All other text fields use codeSplit as default + docMapping.DefaultAnalyzer = "codeSplit" mapping.DefaultMapping = docMapping return bleve.New(i.path, mapping) @@ -154,23 +208,74 @@ func (i *BleveIndexer) Search(metadata SearchGistMetadata, userId uint, page int } } - // Exact+fuzzy query factory: exact match is boosted so it ranks above fuzzy-only matches - factoryFuzzyQuery := func(field, value string) query.Query { + // Query factory for text fields: exact match boosted + match query + prefix + factoryTextQuery := func(field, value string) query.Query { exact := bleve.NewMatchPhraseQuery(value) exact.SetField(field) exact.SetBoost(2.0) fuzzy := bleve.NewMatchQuery(value) fuzzy.SetField(field) - fuzzy.SetFuzziness(2) + fuzzy.SetFuzziness(1) + fuzzy.SetOperator(query.MatchQueryOperatorAnd) - return bleve.NewDisjunctionQuery(exact, fuzzy) + queries := []query.Query{exact, fuzzy} + + if len([]rune(value)) >= 2 { + prefix := bleve.NewPrefixQuery(strings.ToLower(value)) + prefix.SetField(field) + prefix.SetBoost(1.5) + queries = append(queries, prefix) + } + + if len([]rune(value)) >= 4 { + wildcard := bleve.NewWildcardQuery("*" + strings.ToLower(value) + "*") + wildcard.SetField(field) + wildcard.SetBoost(0.5) + queries = append(queries, wildcard) + } + + return bleve.NewDisjunctionQuery(queries...) } - // Exact+fuzzy search - addFuzzy := func(field, value string) { + // Query factory for Content: searches both exact (Content) and split (ContentSplit) fields + factoryContentQuery := func(value string) query.Query { + // Exact field (no camelCase split): matches "cpucard" + exactMatch := bleve.NewMatchQuery(value) + exactMatch.SetField("Content") + exactMatch.SetOperator(query.MatchQueryOperatorAnd) + exactMatch.SetBoost(2.0) + + // Split field (camelCase split): matches "cpu", "card" + splitMatch := bleve.NewMatchQuery(value) + splitMatch.SetField("ContentSplit") + splitMatch.SetFuzziness(1) + splitMatch.SetOperator(query.MatchQueryOperatorAnd) + splitMatch.SetBoost(1.0) + + queries := []query.Query{exactMatch, splitMatch} + + if len([]rune(value)) >= 2 { + prefix := bleve.NewPrefixQuery(strings.ToLower(value)) + prefix.SetField("Content") + prefix.SetBoost(1.5) + queries = append(queries, prefix) + } + + if len([]rune(value)) >= 4 { + wildcard := bleve.NewWildcardQuery("*" + strings.ToLower(value) + "*") + wildcard.SetField("Content") + wildcard.SetBoost(0.5) + queries = append(queries, wildcard) + } + + return bleve.NewDisjunctionQuery(queries...) + } + + // Text field search + addTextQuery := func(field, value string) { if value != "" && value != "." { - indexerQuery = bleve.NewConjunctionQuery(indexerQuery, factoryFuzzyQuery(field, value)) + indexerQuery = bleve.NewConjunctionQuery(indexerQuery, factoryTextQuery(field, value)) } } @@ -189,8 +294,10 @@ func (i *BleveIndexer) Search(metadata SearchGistMetadata, userId uint, page int buildFieldQuery := func(field, value string) query.Query { switch field { - case "Title", "Description", "Filenames", "Content": - return factoryFuzzyQuery(field, value) + case "Content": + return factoryContentQuery(value) + case "Title", "Description", "Filenames": + return factoryTextQuery(field, value) case "Extensions": return factoryQuery(field, "."+value) default: // Username, Languages, Topics @@ -208,13 +315,15 @@ func (i *BleveIndexer) Search(metadata SearchGistMetadata, userId uint, page int } else { // Original behavior: add each metadata field with AND logic addQuery("Username", metadata.Username) - addFuzzy("Title", metadata.Title) - addFuzzy("Description", metadata.Description) + addTextQuery("Title", metadata.Title) + addTextQuery("Description", metadata.Description) addQuery("Extensions", "."+metadata.Extension) - addFuzzy("Filenames", metadata.Filename) + addTextQuery("Filenames", metadata.Filename) addQuery("Languages", metadata.Language) addQuery("Topics", metadata.Topic) - addFuzzy("Content", metadata.Content) + if metadata.Content != "" { + indexerQuery = bleve.NewConjunctionQuery(indexerQuery, factoryContentQuery(metadata.Content)) + } // Handle default search fields from config with OR logic if metadata.Default != "" { diff --git a/internal/index/bleve_test.go b/internal/index/bleve_test.go index f07b5bb..266ed9d 100644 --- a/internal/index/bleve_test.go +++ b/internal/index/bleve_test.go @@ -4,33 +4,31 @@ import ( "os" "path/filepath" "testing" + + "github.com/rs/zerolog" + "github.com/stretchr/testify/require" ) // setupBleveIndexer creates a new BleveIndexer for testing -func setupBleveIndexer(t *testing.T) (*BleveIndexer, func()) { +func setupBleveIndexer(t *testing.T) (Indexer, func()) { + zerolog.SetGlobalLevel(zerolog.Disabled) t.Helper() - // Create a temporary directory for the test index tmpDir, err := os.MkdirTemp("", "bleve-test-*") - if err != nil { - t.Fatalf("Failed to create temp directory: %v", err) - } + require.NoError(t, err) indexPath := filepath.Join(tmpDir, "test.index") indexer := NewBleveIndexer(indexPath) - // Initialize the indexer err = indexer.Init() if err != nil { os.RemoveAll(tmpDir) t.Fatalf("Failed to initialize BleveIndexer: %v", err) } - // Store in the global atomicIndexer since Add/Remove use it var idx Indexer = indexer atomicIndexer.Store(&idx) - // Return cleanup function cleanup := func() { atomicIndexer.Store(nil) indexer.Close() @@ -40,124 +38,50 @@ func setupBleveIndexer(t *testing.T) (*BleveIndexer, func()) { return indexer, cleanup } -func TestBleveIndexerAddGist(t *testing.T) { - indexer, cleanup := setupBleveIndexer(t) - defer cleanup() +func TestBleveAddAndSearch(t *testing.T) { testAddAndSearch(t, setupBleveIndexer) } +func TestBleveAccessControl(t *testing.T) { testAccessControl(t, setupBleveIndexer) } +func TestBleveMetadataFilters(t *testing.T) { testMetadataFilters(t, setupBleveIndexer) } +func TestBleveAllFieldSearch(t *testing.T) { testAllFieldSearch(t, setupBleveIndexer) } +func TestBleveFuzzySearch(t *testing.T) { testFuzzySearch(t, setupBleveIndexer) } +func TestBleveContentSearch(t *testing.T) { testContentSearch(t, setupBleveIndexer) } +func TestBlevePagination(t *testing.T) { testPagination(t, setupBleveIndexer) } +func TestBleveLanguageFacets(t *testing.T) { testLanguageFacets(t, setupBleveIndexer) } +func TestBleveWildcardSearch(t *testing.T) { testWildcardSearch(t, setupBleveIndexer) } +func TestBleveMetadataOnlySearch(t *testing.T) { testMetadataOnlySearch(t, setupBleveIndexer) } +func TestBleveTitleFuzzySearch(t *testing.T) { testTitleFuzzySearch(t, setupBleveIndexer) } +func TestBleveMultiLanguageFacets(t *testing.T) { testMultiLanguageFacets(t, setupBleveIndexer) } - testIndexerAddGist(t, indexer) -} - -func TestBleveIndexerAllFieldSearch(t *testing.T) { - indexer, cleanup := setupBleveIndexer(t) - defer cleanup() - - testIndexerAllFieldSearch(t, indexer) -} - -func TestBleveIndexerFuzzySearch(t *testing.T) { - indexer, cleanup := setupBleveIndexer(t) - defer cleanup() - - testIndexerFuzzySearch(t, indexer) -} - -func TestBleveIndexerSearchBasic(t *testing.T) { - indexer, cleanup := setupBleveIndexer(t) - defer cleanup() - - testIndexerSearchBasic(t, indexer) -} - -func TestBleveIndexerPagination(t *testing.T) { - indexer, cleanup := setupBleveIndexer(t) - defer cleanup() - - testIndexerPagination(t, indexer) -} - -// TestBleveIndexerInitAndClose tests Bleve-specific initialization and closing -func TestBleveIndexerInitAndClose(t *testing.T) { - tmpDir, err := os.MkdirTemp("", "bleve-init-test-*") - if err != nil { - t.Fatalf("Failed to create temp directory: %v", err) - } +func TestBlevePersistence(t *testing.T) { + tmpDir, err := os.MkdirTemp("", "bleve-persist-test-*") + require.NoError(t, err) defer os.RemoveAll(tmpDir) indexPath := filepath.Join(tmpDir, "test.index") - indexer := NewBleveIndexer(indexPath) - // Test initialization - err = indexer.Init() - if err != nil { - t.Fatalf("Failed to initialize BleveIndexer: %v", err) - } + // Create and populate index + indexer1 := NewBleveIndexer(indexPath) + require.NoError(t, indexer1.Init()) - if indexer.index == nil { - t.Fatal("Expected index to be initialized, got nil") - } + var idx Indexer = indexer1 + atomicIndexer.Store(&idx) - // Test closing - indexer.Close() + g := newGist(1, 1, 0, "persistent data survives restart") + require.NoError(t, indexer1.Add(g)) - // Test reopening the same index + indexer1.Close() + atomicIndexer.Store(nil) + + // Reopen at same path indexer2 := NewBleveIndexer(indexPath) - err = indexer2.Init() - if err != nil { - t.Fatalf("Failed to reopen BleveIndexer: %v", err) - } + require.NoError(t, indexer2.Init()) defer indexer2.Close() - if indexer2.index == nil { - t.Fatal("Expected reopened index to be initialized, got nil") - } -} - -// TestBleveIndexerUnicodeSearch tests that Unicode content can be indexed and searched -func TestBleveIndexerUnicodeSearch(t *testing.T) { - indexer, cleanup := setupBleveIndexer(t) - defer cleanup() - - // Add a gist with Unicode content - gist := &Gist{ - GistID: 100, - UserID: 100, - Visibility: 0, - Username: "testuser", - Title: "Unicode Test", - Description: "Descrition with Unicode characters: Café résumé naive", - Content: "Hello world with unicode characters: café résumé naïve", - Filenames: []string{"test.txt"}, - Extensions: []string{".txt"}, - Languages: []string{"Text"}, - Topics: []string{"unicode"}, - CreatedAt: 1234567890, - UpdatedAt: 1234567890, - } - - err := indexer.Add(gist) - if err != nil { - t.Fatalf("Failed to add gist: %v", err) - } - - // Search for unicode content - gistIDs, total, _, err := indexer.Search(SearchGistMetadata{All: "café"}, 100, 1) - if err != nil { - t.Fatalf("Search failed: %v", err) - } - - if total == 0 { - t.Skip("Unicode search may require specific index configuration") - return - } - - found := false - for _, id := range gistIDs { - if id == 100 { - found = true - break - } - } - if !found { - t.Log("Unicode gist not found in search results, but other results were returned") - } + idx = indexer2 + atomicIndexer.Store(&idx) + defer atomicIndexer.Store(nil) + + ids, total, _, err := indexer2.Search(SearchGistMetadata{Content: "persistent"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total, "data should survive close+reopen") + require.Equal(t, uint(1), ids[0]) } diff --git a/internal/index/indexer_test.go b/internal/index/indexer_test.go index fe2a515..e63e07d 100644 --- a/internal/index/indexer_test.go +++ b/internal/index/indexer_test.go @@ -3,1598 +3,779 @@ package index import ( "fmt" "testing" + + "github.com/stretchr/testify/require" ) -// initTestGists initializes the indexer with 1000 test gists -func initTestGists(t *testing.T, indexer Indexer) { - t.Helper() - - languages := []string{"Go", "Python", "JavaScript"} - extensions := []string{"go", "py", "js"} - usernames := []string{"alice", "bob", "charlie"} - topics := []string{"algorithms", "web", "database"} - - for i := 0; i < 1000; i++ { - langIdx := i % len(languages) // cycles 0,1,2,0,1,2,... - userIdx := i % len(usernames) // cycles 0,1,2,0,1,2,... - topicIdx := i % len(topics) // cycles 0,1,2,0,1,2,... - gistID := uint(i + 1) // GistIDs start at 1 - visibility := uint(i % 3) // cycles 0,1,2,0,1,2,... - - gist := &Gist{ - GistID: gistID, - UserID: uint(userIdx + 1), // alice=1, bob=2, charlie=3 - Visibility: visibility, - Username: usernames[userIdx], - Title: fmt.Sprintf("Test Gist %d", gistID), - Content: fmt.Sprintf("This is test gist number %d with some searchable content", gistID), - Filenames: []string{fmt.Sprintf("file%d.%s", gistID, extensions[langIdx])}, - Extensions: []string{extensions[langIdx]}, - Languages: []string{languages[langIdx]}, - Topics: []string{topics[topicIdx]}, - CreatedAt: 1234567890 + int64(gistID), - UpdatedAt: 1234567890 + int64(gistID), - } - - err := indexer.Add(gist) - if err != nil { - t.Fatalf("Failed to initialize test gist %d: %v", gistID, err) - } +// newGist creates a Gist with sensible defaults for testing. +func newGist(id uint, userID uint, visibility uint, content string) *Gist { + return &Gist{ + GistID: id, + UserID: userID, + Visibility: visibility, + Username: fmt.Sprintf("user%d", userID), + Title: fmt.Sprintf("Gist %d", id), + Content: content, + Filenames: []string{"file.txt"}, + Extensions: []string{"txt"}, + Languages: []string{"Text"}, + Topics: []string{}, + CreatedAt: 1234567890, + UpdatedAt: 1234567890, } } -// testIndexerAddGist tests adding a gist to the index with comprehensive edge cases -func testIndexerAddGist(t *testing.T, indexer Indexer) { - t.Helper() - initTestGists(t, indexer) +func testAddAndSearch(t *testing.T, setup func(t *testing.T) (Indexer, func())) { + t.Run("add and search by content", func(t *testing.T) { + indexer, cleanup := setup(t) + defer cleanup() - // Test 1: Add basic gist with multiple files - t.Run("AddBasicGist", func(t *testing.T) { - gist := &Gist{ - GistID: 1001, - UserID: 11, - Visibility: 0, - Username: "testuser", - Title: "Test Gist", - Content: "This is a test gist with some content", - Filenames: []string{"test.go", "readme.md"}, - Extensions: []string{"go", "md"}, - Languages: []string{"Go", "Markdown"}, - Topics: []string{"testing"}, - CreatedAt: 1234567890, - UpdatedAt: 1234567890, - } + g := newGist(1, 1, 0, "the quick brown fox jumps over the lazy dog") + require.NoError(t, indexer.Add(g)) - err := indexer.Add(gist) - if err != nil { - t.Fatalf("Failed to add gist to index: %v", err) - } - - // Verify gist is searchable - gistIDs, total, _, err := indexer.Search(SearchGistMetadata{All: "test gist"}, 11, 1) - if err != nil { - t.Fatalf("Search failed: %v", err) - } - if total == 0 { - t.Error("Expected to find the added gist") - } - found := false - for _, id := range gistIDs { - if id == 1001 { - found = true - break - } - } - if !found { - t.Error("Expected to find GistID 1001 in search results") - } + ids, total, _, err := indexer.Search(SearchGistMetadata{Content: "fox"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) + require.Equal(t, uint(1), ids[0]) }) - // Test 2: Add gist and search by language - t.Run("AddAndSearchByLanguage", func(t *testing.T) { - gist := &Gist{ - GistID: 1002, - UserID: 11, - Visibility: 0, - Username: "testuser", - Title: "Rust Example", - Content: "fn main() { println!(\"Hello\"); }", - Filenames: []string{"main.rs"}, - Extensions: []string{"rs"}, - Languages: []string{"Rust"}, - Topics: []string{"systems"}, - CreatedAt: 1234567891, - UpdatedAt: 1234567891, - } + t.Run("add nil gist returns error", func(t *testing.T) { + indexer, cleanup := setup(t) + defer cleanup() - err := indexer.Add(gist) - if err != nil { - t.Fatalf("Failed to add Rust gist: %v", err) - } - - // Search by Rust language - metadata := SearchGistMetadata{Language: "Rust"} - gistIDs, total, _, err := indexer.Search(metadata, 11, 1) - if err != nil { - t.Fatalf("Search by Rust language failed: %v", err) - } - if total == 0 { - t.Error("Expected to find Rust gist") - } - found := false - for _, id := range gistIDs { - if id == 1002 { - found = true - break - } - } - if !found { - t.Error("Expected to find GistID 1002 in Rust search results") - } + require.Error(t, indexer.Add(nil)) }) - // Test 3: Add gist with special characters and unicode - t.Run("AddGistWithUnicode", func(t *testing.T) { - gist := &Gist{ - GistID: 1003, - UserID: 11, - Visibility: 0, - Username: "testuser", - Title: "Unicode Test: café résumé naïve", - Content: "Special chars: @#$%^&*() and unicode: 你好世界 مرحبا العالم", - Filenames: []string{"unicode.txt"}, - Extensions: []string{"txt"}, - Languages: []string{"Text"}, - Topics: []string{"unicode", "i18n"}, - CreatedAt: 1234567892, - UpdatedAt: 1234567892, - } + t.Run("add then remove", func(t *testing.T) { + indexer, cleanup := setup(t) + defer cleanup() - err := indexer.Add(gist) - if err != nil { - t.Fatalf("Failed to add unicode gist: %v", err) - } + g := newGist(1, 1, 0, "removable content here") + require.NoError(t, indexer.Add(g)) - // Search for unicode content - _, total, _, err := indexer.Search(SearchGistMetadata{All: "café"}, 11, 1) - if err != nil { - t.Fatalf("Search for unicode failed: %v", err) - } - // Note: Unicode search support may vary by indexer - if total > 0 { - t.Logf("Unicode search returned %d results", total) - } + _, total, _, err := indexer.Search(SearchGistMetadata{Content: "removable"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) + + require.NoError(t, indexer.Remove(1)) + + _, total, _, err = indexer.Search(SearchGistMetadata{Content: "removable"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(0), total) }) - // Test 4: Add gist with different visibility levels - t.Run("AddGistPrivate", func(t *testing.T) { - privateGist := &Gist{ - GistID: 1004, - UserID: 11, - Visibility: 1, - Username: "testuser", - Title: "Private Gist", - Content: "This is a private gist", - Filenames: []string{"private.txt"}, - Extensions: []string{"txt"}, - Languages: []string{"Text"}, - Topics: []string{"private"}, - CreatedAt: 1234567893, - UpdatedAt: 1234567893, - } + t.Run("update gist replaces content", func(t *testing.T) { + indexer, cleanup := setup(t) + defer cleanup() - err := indexer.Add(privateGist) - if err != nil { - t.Fatalf("Failed to add private gist: %v", err) - } + g := newGist(1, 1, 0, "original content alpha") + require.NoError(t, indexer.Add(g)) - // User 11 should see their own private gist - gistIDs, total, _, err := indexer.Search(SearchGistMetadata{All: "private gist"}, 11, 1) - if err != nil { - t.Fatalf("Search for private gist as owner failed: %v", err) - } - found := false - for _, id := range gistIDs { - if id == 1004 { - found = true - break - } - } - if !found && total > 0 { - t.Error("Expected owner to find their private gist") - } + g2 := newGist(1, 1, 0, "updated content beta") + require.NoError(t, indexer.Add(g2)) - // User 999 should NOT see user 11's private gist - gistIDs2, _, _, err := indexer.Search(SearchGistMetadata{All: "private gist"}, 999, 1) - if err != nil { - t.Fatalf("Search for private gist as other user failed: %v", err) - } - for _, id := range gistIDs2 { - if id == 1004 { - t.Error("Other user should not see private gist") - } - } + _, total, _, err := indexer.Search(SearchGistMetadata{Content: "alpha"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(0), total, "old content should not be found") + + ids, total, _, err := indexer.Search(SearchGistMetadata{Content: "beta"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) + require.Equal(t, uint(1), ids[0]) }) - // Test 5: Add gist with empty optional fields - t.Run("AddGistMinimalFields", func(t *testing.T) { - gist := &Gist{ - GistID: 1005, - UserID: 11, - Visibility: 0, - Username: "testuser", - Title: "", - Content: "Minimal content", + t.Run("gist with empty optional fields still searchable by content", func(t *testing.T) { + indexer, cleanup := setup(t) + defer cleanup() + + g := &Gist{ + GistID: 1, UserID: 1, Visibility: 0, + Username: "user1", Title: "", + Content: "searchable bare content", Filenames: []string{}, Extensions: []string{}, Languages: []string{}, Topics: []string{}, - CreatedAt: 1234567894, - UpdatedAt: 1234567894, + CreatedAt: 1234567890, UpdatedAt: 1234567890, } + require.NoError(t, indexer.Add(g)) - err := indexer.Add(gist) - if err != nil { - t.Fatalf("Failed to add minimal gist: %v", err) - } - - // Should still be searchable by content - _, total, _, err := indexer.Search(SearchGistMetadata{All: "Minimal"}, 11, 1) - if err != nil { - t.Fatalf("Search for minimal gist failed: %v", err) - } - if total == 0 { - t.Error("Expected to find minimal gist by content") - } - }) - - // Test 6: Update existing gist (same GistID) - t.Run("UpdateExistingGist", func(t *testing.T) { - originalGist := &Gist{ - GistID: 1006, - UserID: 11, - Visibility: 0, - Username: "testuser", - Title: "Original Title", - Content: "Original content", - Filenames: []string{"original.txt"}, - Extensions: []string{"txt"}, - Languages: []string{"Text"}, - Topics: []string{"original"}, - CreatedAt: 1234567895, - UpdatedAt: 1234567895, - } - - err := indexer.Add(originalGist) - if err != nil { - t.Fatalf("Failed to add original gist: %v", err) - } - - // Update with same GistID - updatedGist := &Gist{ - GistID: 1006, - UserID: 11, - Visibility: 0, - Username: "testuser", - Title: "Updated Title", - Content: "Updated content with new information", - Filenames: []string{"updated.txt"}, - Extensions: []string{"txt"}, - Languages: []string{"Text"}, - Topics: []string{"updated"}, - CreatedAt: 1234567895, - UpdatedAt: 1234567900, - } - - err = indexer.Add(updatedGist) - if err != nil { - t.Fatalf("Failed to update gist: %v", err) - } - - // Search should find updated content, not original - gistIDs, total, _, err := indexer.Search(SearchGistMetadata{All: "new information"}, 11, 1) - if err != nil { - t.Fatalf("Search for updated content failed: %v", err) - } - found := false - for _, id := range gistIDs { - if id == 1006 { - found = true - break - } - } - if !found && total > 0 { - t.Error("Expected to find updated gist by new content") - } - - // Old content should not be found - gistIDsOld, _, _, _ := indexer.Search(SearchGistMetadata{All: "Original"}, 11, 1) - for _, id := range gistIDsOld { - if id == 1006 { - t.Error("Should not find gist by old content after update") - } - } - }) - - // Test 7: Add gist and verify by username filter - t.Run("AddAndSearchByUsername", func(t *testing.T) { - gist := &Gist{ - GistID: 1007, - UserID: 12, - Visibility: 0, - Username: "newuser", - Title: "New User Gist", - Content: "Content from new user", - Filenames: []string{"newuser.txt"}, - Extensions: []string{"txt"}, - Languages: []string{"Text"}, - Topics: []string{"new"}, - CreatedAt: 1234567896, - UpdatedAt: 1234567896, - } - - err := indexer.Add(gist) - if err != nil { - t.Fatalf("Failed to add new user gist: %v", err) - } - - // Search by username - metadata := SearchGistMetadata{Username: "newuser"} - gistIDs, total, _, err := indexer.Search(metadata, 12, 1) - if err != nil { - t.Fatalf("Search by username failed: %v", err) - } - if total == 0 { - t.Error("Expected to find gist by username filter") - } - found := false - for _, id := range gistIDs { - if id == 1007 { - found = true - break - } - } - if !found { - t.Error("Expected to find GistID 1007 by username") - } - }) - - // Test 8: Add gist with multiple languages and topics - t.Run("AddGistMultipleTags", func(t *testing.T) { - gist := &Gist{ - GistID: 1008, - UserID: 11, - Visibility: 0, - Username: "testuser", - Title: "Multi-language Project", - Content: "Mixed language project with Go, Python, and JavaScript", - Filenames: []string{"main.go", "script.py", "app.js"}, - Extensions: []string{"go", "py", "js"}, - Languages: []string{"Go", "Python", "JavaScript"}, - Topics: []string{"fullstack", "microservices", "api"}, - CreatedAt: 1234567897, - UpdatedAt: 1234567897, - } - - err := indexer.Add(gist) - if err != nil { - t.Fatalf("Failed to add multi-language gist: %v", err) - } - - // Search by one of the topics - metadata := SearchGistMetadata{Topic: "microservices"} - gistIDs, total, _, err := indexer.Search(metadata, 11, 1) - if err != nil { - t.Fatalf("Search by topic failed: %v", err) - } - found := false - for _, id := range gistIDs { - if id == 1008 { - found = true - break - } - } - if !found && total > 0 { - t.Error("Expected to find multi-language gist by topic") - } - }) - - // Test 9: Add gist with long content - t.Run("AddGistLongContent", func(t *testing.T) { - longContent := "" - for i := 0; i < 1000; i++ { - longContent += fmt.Sprintf("Line %d: This is a long gist with lots of content. ", i) - } - - gist := &Gist{ - GistID: 1009, - UserID: 11, - Visibility: 0, - Username: "testuser", - Title: "Long Gist", - Content: longContent, - Filenames: []string{"long.txt"}, - Extensions: []string{"txt"}, - Languages: []string{"Text"}, - Topics: []string{"large"}, - CreatedAt: 1234567898, - UpdatedAt: 1234567898, - } - - err := indexer.Add(gist) - if err != nil { - t.Fatalf("Failed to add long gist: %v", err) - } - - // Search for content from the middle - gistIDs, total, _, err := indexer.Search(SearchGistMetadata{All: "Line 500"}, 11, 1) - if err != nil { - t.Fatalf("Search in long content failed: %v", err) - } - found := false - for _, id := range gistIDs { - if id == 1009 { - found = true - break - } - } - if !found && total > 0 { - t.Error("Expected to find long gist by content in the middle") - } + _, total, _, err := indexer.Search(SearchGistMetadata{Content: "searchable"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) }) } -// testIndexerSearchBasic tests basic search functionality with edge cases -func testIndexerSearchBasic(t *testing.T, indexer Indexer) { - t.Helper() - initTestGists(t, indexer) +func testAccessControl(t *testing.T, setup func(t *testing.T) (Indexer, func())) { + t.Run("public gist visible to any user", func(t *testing.T) { + indexer, cleanup := setup(t) + defer cleanup() - // Test 1: Search by content - all init gists have "searchable content" - t.Run("SearchByContent", func(t *testing.T) { - gistIDs, total, languageCounts, err := indexer.Search(SearchGistMetadata{All: "searchable"}, 1, 1) - if err != nil { - t.Fatalf("Search failed: %v", err) - } - // Distribution: alice=334 public Go/algorithms, bob=333 private Python/web, charlie=333 private JS/database - // As user 1 (alice), we only see alice's public gists: 334 - if total != 334 { - t.Errorf("Expected alice to see 334 gists, got %d", total) - } - if len(gistIDs) == 0 { - t.Error("Expected non-empty gist IDs") - } - // Only Go should appear in language facets for alice - if len(languageCounts) == 0 { - t.Error("Expected language facets to be populated") - } - if languageCounts["go"] != 334 { - t.Errorf("Expected 334 Go gists in facets, got %d", languageCounts["go"]) - } + g := newGist(1, 1, 0, "public content") + require.NoError(t, indexer.Add(g)) + + _, total, _, err := indexer.Search(SearchGistMetadata{Content: "public"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total, "owner should see public gist") + + _, total, _, err = indexer.Search(SearchGistMetadata{Content: "public"}, 99, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total, "other user should see public gist") + + _, total, _, err = indexer.Search(SearchGistMetadata{Content: "public"}, 0, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total, "anonymous should see public gist") }) - // Test 2: Search by specific language - Go - t.Run("SearchByLanguage", func(t *testing.T) { - metadata := SearchGistMetadata{Language: "Go"} - _, total, _, err := indexer.Search(metadata, 1, 1) - if err != nil { - t.Fatalf("Search by language failed: %v", err) - } - // All Go gists are alice's (i=0,3,6,...) = 334 gists - // All are public - if total != 334 { - t.Errorf("Expected 334 Go gists, got %d", total) - } + t.Run("private gist only visible to owner", func(t *testing.T) { + indexer, cleanup := setup(t) + defer cleanup() + + g := newGist(1, 1, 1, "private secret") + require.NoError(t, indexer.Add(g)) + + _, total, _, err := indexer.Search(SearchGistMetadata{Content: "secret"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total, "owner should see private gist") + + _, total, _, err = indexer.Search(SearchGistMetadata{Content: "secret"}, 99, 1) + require.NoError(t, err) + require.Equal(t, uint64(0), total, "other user should not see private gist") + + _, total, _, err = indexer.Search(SearchGistMetadata{Content: "secret"}, 0, 1) + require.NoError(t, err) + require.Equal(t, uint64(0), total, "anonymous should not see private gist") }) - // Test 3: Search by specific username - alice - t.Run("SearchByUsername", func(t *testing.T) { - metadata := SearchGistMetadata{Username: "alice"} - _, total, _, err := indexer.Search(metadata, 1, 1) - if err != nil { - t.Fatalf("Search by username failed: %v", err) - } - // alice has 334 gists at i=0,3,6,... - // All are public - if total != 334 { - t.Errorf("Expected 334 alice gists, got %d", total) - } + t.Run("unlisted gist only visible to owner", func(t *testing.T) { + indexer, cleanup := setup(t) + defer cleanup() + + g := newGist(1, 1, 2, "unlisted hidden") + require.NoError(t, indexer.Add(g)) + + _, total, _, err := indexer.Search(SearchGistMetadata{Content: "hidden"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total, "owner should see unlisted gist") + + _, total, _, err = indexer.Search(SearchGistMetadata{Content: "hidden"}, 99, 1) + require.NoError(t, err) + require.Equal(t, uint64(0), total, "other user should not see unlisted gist") }) - // Test 4: Search by extension - Python (bob's private files) - t.Run("SearchByExtension", func(t *testing.T) { - metadata := SearchGistMetadata{Extension: "py"} - _, total, _, err := indexer.Search(metadata, 1, 1) - if err != nil { - t.Fatalf("Search by extension failed: %v", err) - } - // All .py files are bob's (i=1,4,7,...) = 333 files - // All are private (visibility=1) - // As user 1 (alice), we see 0 .py files - if total != 0 { - t.Errorf("Expected alice to see 0 .py files (bob's private), got %d", total) - } - }) + t.Run("mixed visibility correct counts per user", func(t *testing.T) { + indexer, cleanup := setup(t) + defer cleanup() - // Test 5: Search by topic - algorithms - t.Run("SearchByTopic", func(t *testing.T) { - metadata := SearchGistMetadata{Topic: "algorithms"} - _, total, _, err := indexer.Search(metadata, 1, 1) - if err != nil { - t.Fatalf("Search by topic failed: %v", err) - } - // All algorithms gists are alice's (i=0,3,6,...) = 334 gists - // All are public - if total != 334 { - t.Errorf("Expected 334 algorithms gists, got %d", total) - } - }) + g1 := newGist(1, 1, 0, "alphaword content") + g2 := newGist(2, 1, 1, "alphaword content") + g3 := newGist(3, 2, 0, "alphaword content") + g4 := newGist(4, 2, 1, "alphaword content") - // Test 6: Combined filters - Go language + alice - t.Run("SearchCombinedFilters", func(t *testing.T) { - metadata := SearchGistMetadata{ - Language: "Go", - Username: "alice", - } - _, total, _, err := indexer.Search(metadata, 1, 1) - if err != nil { - t.Fatalf("Search with combined filters failed: %v", err) - } - // Go AND alice are the same set (i=0,3,6,...) = 334 gists - // All are public - if total != 334 { - t.Errorf("Expected 334 Go+alice gists, got %d", total) - } - }) - - // Test 7: Search with no results - t.Run("SearchNoResults", func(t *testing.T) { - gistIDs, total, _, err := indexer.Search(SearchGistMetadata{All: "nonexistentquery"}, 1, 1) - if err != nil { - t.Fatalf("Search with no results failed: %v", err) - } - if total != 0 { - t.Errorf("Expected 0 results for non-existent query, got %d", total) - } - if len(gistIDs) != 0 { - t.Error("Expected empty gist IDs for non-existent query") - } - }) - - // Test 8: Empty query returns all accessible gists - t.Run("SearchEmptyQuery", func(t *testing.T) { - gistIDs, total, languageCounts, err := indexer.Search(SearchGistMetadata{}, 1, 1) - if err != nil { - t.Fatalf("Empty search failed: %v", err) - } - // As user 1 (alice), only sees alice's 334 public gists - if total != 334 { - t.Errorf("Expected 334 gists with empty query, got %d", total) - } - if len(gistIDs) == 0 { - t.Error("Expected non-empty gist IDs with empty query") - } - // Should have only Go in facets (alice's language) - if len(languageCounts) == 0 { - t.Error("Expected language facets with empty query") - } - if languageCounts["go"] != 334 { - t.Errorf("Expected 334 Go in facets, got %d", languageCounts["go"]) - } - }) - - // Test 9: Pagination - t.Run("SearchPagination", func(t *testing.T) { - // As user 1, we have 334 gists total - // Page 1 - gistIDs1, total, _, err := indexer.Search(SearchGistMetadata{All: "searchable"}, 1, 1) - if err != nil { - t.Fatalf("Page 1 search failed: %v", err) - } - if total != 334 { - t.Errorf("Expected 334 total results, got %d", total) - } - if len(gistIDs1) == 0 { - t.Error("Expected results on page 1") + for _, g := range []*Gist{g1, g2, g3, g4} { + require.NoError(t, indexer.Add(g)) } - // Page 2 - gistIDs2, _, _, err := indexer.Search(SearchGistMetadata{All: "searchable"}, 1, 2) - if err != nil { - t.Fatalf("Page 2 search failed: %v", err) - } + _, total, _, err := indexer.Search(SearchGistMetadata{Content: "alphaword"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(3), total, "user1 sees 2 public + 1 own private") - // With 334 results and typical page size of 10, we should have page 2 - if len(gistIDs2) == 0 { - t.Error("Expected results on page 2") - } + _, total, _, err = indexer.Search(SearchGistMetadata{Content: "alphaword"}, 2, 1) + require.NoError(t, err) + require.Equal(t, uint64(3), total, "user2 sees 2 public + 1 own private") - // Ensure pages are different - if len(gistIDs1) > 0 && len(gistIDs2) > 0 && gistIDs1[0] == gistIDs2[0] { - t.Error("Page 1 and page 2 should have different first results") - } - }) - - // Test 10: Search as different user (visibility filtering) - t.Run("SearchVisibilityFiltering", func(t *testing.T) { - // Search as user 2 (bob) - // bob has 333 gists at i=1,4,7,... with visibility=1 (private) - // As user 2, bob sees: alice's 334 public gists + bob's own 333 gists = 667 total - _, total, _, err := indexer.Search(SearchGistMetadata{}, 2, 1) - if err != nil { - t.Fatalf("Search as user 2 failed: %v", err) - } - if total != 667 { - t.Errorf("Expected bob to see 667 gists (334 public + 333 own), got %d", total) - } - - // Search as non-existent user (should only see public gists) - _, totalPublic, _, err := indexer.Search(SearchGistMetadata{}, 999, 1) - if err != nil { - t.Fatalf("Search as user 999 failed: %v", err) - } - // Non-existent user only sees alice's 334 public gists - if totalPublic != 334 { - t.Errorf("Expected non-existent user to see 334 public gists, got %d", totalPublic) - } - - // Public gists (334) should be less than what user 2 sees (667) - if totalPublic >= total { - t.Errorf("Non-existent user sees %d gists, should be less than user 2's %d", totalPublic, total) - } - }) - - // Test 11: Language facets validation - t.Run("LanguageFacets", func(t *testing.T) { - _, _, languageCounts, err := indexer.Search(SearchGistMetadata{}, 1, 1) - if err != nil { - t.Fatalf("Search for facets failed: %v", err) - } - if len(languageCounts) != 1 { - t.Errorf("Expected 1 language in facets (Go), got %d", len(languageCounts)) - } - // As user 1 (alice), should only see Go with count 334 - if languageCounts["go"] != 334 { - t.Errorf("Expected 334 Go in facets, got %d", languageCounts["go"]) - } - // Python and JavaScript should not appear (bob's and charlie's private gists) - if languageCounts["Python"] != 0 { - t.Errorf("Expected 0 Python in facets, got %d", languageCounts["Python"]) - } - if languageCounts["JavaScript"] != 0 { - t.Errorf("Expected 0 JavaScript in facets, got %d", languageCounts["JavaScript"]) - } + _, total, _, err = indexer.Search(SearchGistMetadata{Content: "alphaword"}, 0, 1) + require.NoError(t, err) + require.Equal(t, uint64(2), total, "anonymous sees only public") }) } -// testIndexerAllFieldSearch tests the "All" field OR search functionality -func testIndexerAllFieldSearch(t *testing.T, indexer Indexer) { - t.Helper() - initTestGists(t, indexer) +func testMetadataFilters(t *testing.T, setup func(t *testing.T) (Indexer, func())) { + indexer, cleanup := setup(t) + defer cleanup() - // Add test gists with distinct values in different fields - testGists := []*Gist{ - { - GistID: 3001, - UserID: 100, - Visibility: 0, - Username: "testuser_unique", - Title: "Configuration Guide", - Content: "How to configure your application", - Filenames: []string{"config.txt"}, - Extensions: []string{"txt"}, - Languages: []string{"Text"}, - Topics: []string{"configuration"}, - CreatedAt: 1234567890, - UpdatedAt: 1234567890, - }, - { - GistID: 3002, - UserID: 100, - Visibility: 0, - Username: "developer", - Title: "Testing unique features", - Content: "Testing best practices", - Filenames: []string{"test.txt"}, - Extensions: []string{"txt"}, - Languages: []string{"Text"}, - Topics: []string{"testing"}, - CreatedAt: 1234567891, - UpdatedAt: 1234567891, - }, - { - GistID: 3003, - UserID: 100, - Visibility: 0, - Username: "coder", - Title: "API Documentation", - Content: "REST API documentation", - Filenames: []string{"api.txt"}, - Extensions: []string{"txt"}, - Languages: []string{"Markdown"}, - Topics: []string{"unique_topic"}, - CreatedAt: 1234567892, - UpdatedAt: 1234567892, - }, - { - GistID: 3004, - UserID: 100, - Visibility: 0, - Username: "programmer", - Title: "Code Examples", - Content: "Code examples for beginners", - Filenames: []string{"unique_file.rb"}, - Extensions: []string{"rb"}, - Languages: []string{"Ruby"}, - Topics: []string{"examples"}, - CreatedAt: 1234567893, - UpdatedAt: 1234567893, - }, - { - GistID: 3005, - UserID: 100, - Visibility: 0, - Username: "admin", - Title: "Setup Instructions", - Content: "How to setup the project", - Filenames: []string{"setup.sh"}, - Extensions: []string{"sh"}, - Languages: []string{"Shell"}, - Topics: []string{"setup"}, - CreatedAt: 1234567894, - UpdatedAt: 1234567894, - }, + g1 := &Gist{ + GistID: 1, UserID: 1, Visibility: 0, + Username: "alice", Title: "Go Tutorial", + Description: "A helper utility for parsing JSON", + Content: "learning golang basics", + Filenames: []string{"main.go"}, + Extensions: []string{"go"}, + Languages: []string{"Go"}, + Topics: []string{"tutorial"}, + CreatedAt: 1234567890, UpdatedAt: 1234567890, + } + g2 := &Gist{ + GistID: 2, UserID: 2, Visibility: 0, + Username: "bob", Title: "Python Script", + Description: "Database migration scripts", + Content: "learning python basics", + Filenames: []string{"script.py"}, + Extensions: []string{"py"}, + Languages: []string{"Python"}, + Topics: []string{"scripting"}, + CreatedAt: 1234567890, UpdatedAt: 1234567890, } - for _, gist := range testGists { - err := indexer.Add(gist) - if err != nil { - t.Fatalf("Failed to add test gist %d: %v", gist.GistID, err) - } + for _, g := range []*Gist{g1, g2} { + require.NoError(t, indexer.Add(g)) } - // Test 1: All field matches username - t.Run("AllFieldMatchesUsername", func(t *testing.T) { - metadata := SearchGistMetadata{All: "testuser_unique"} - gistIDs, total, _, err := indexer.Search(metadata, 100, 1) - if err != nil { - t.Fatalf("All field search failed: %v", err) - } - if total == 0 { - t.Error("Expected to find gist by username via All field") - } - found := false - for _, id := range gistIDs { - if id == 3001 { - found = true - break - } - } - if !found { - t.Error("Expected to find GistID 3001 by username via All field") - } + t.Run("filter by Username", func(t *testing.T) { + _, total, _, err := indexer.Search(SearchGistMetadata{Content: "learning", Username: "alice"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) }) - // Test 2: All field matches title - t.Run("AllFieldMatchesTitle", func(t *testing.T) { - metadata := SearchGistMetadata{All: "unique features"} - gistIDs, total, _, err := indexer.Search(metadata, 100, 1) - if err != nil { - t.Fatalf("All field search failed: %v", err) - } - if total == 0 { - t.Error("Expected to find gist by title via All field") - } - found := false - for _, id := range gistIDs { - if id == 3002 { - found = true - break - } - } - if !found { - t.Error("Expected to find GistID 3002 by title via All field") - } + t.Run("filter by Language", func(t *testing.T) { + _, total, _, err := indexer.Search(SearchGistMetadata{Content: "learning", Language: "Python"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) }) - // Test 3: All field matches language - t.Run("AllFieldMatchesLanguage", func(t *testing.T) { - metadata := SearchGistMetadata{All: "Ruby"} - gistIDs, total, _, err := indexer.Search(metadata, 100, 1) - if err != nil { - t.Fatalf("All field search failed: %v", err) - } - if total == 0 { - t.Error("Expected to find gist by language via All field") - } - found := false - for _, id := range gistIDs { - if id == 3004 { - found = true - break - } - } - if !found { - t.Error("Expected to find GistID 3004 by language via All field") + t.Run("filter by Extension with dot prefix", func(t *testing.T) { + g3 := &Gist{ + GistID: 3, UserID: 1, Visibility: 0, + Username: "alice", Title: "Dot Extension Test", + Content: "extension test content", + Filenames: []string{"test.rs"}, + Extensions: []string{".rs"}, + Languages: []string{"Rust"}, + Topics: []string{}, + CreatedAt: 1234567890, UpdatedAt: 1234567890, } + require.NoError(t, indexer.Add(g3)) + + _, total, _, err := indexer.Search(SearchGistMetadata{Content: "extension", Extension: "rs"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) }) - // Test 4: All field matches topic - t.Run("AllFieldMatchesTopic", func(t *testing.T) { - metadata := SearchGistMetadata{All: "unique_topic"} - gistIDs, total, _, err := indexer.Search(metadata, 100, 1) - if err != nil { - t.Fatalf("All field search failed: %v", err) - } - if total == 0 { - t.Error("Expected to find gist by topic via All field") - } - found := false - for _, id := range gistIDs { - if id == 3003 { - found = true - break - } - } - if !found { - t.Error("Expected to find GistID 3003 by topic via All field") - } + t.Run("filter by Topic", func(t *testing.T) { + _, total, _, err := indexer.Search(SearchGistMetadata{Content: "learning", Topic: "tutorial"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) }) - // Test 5: All field matches extension - t.Run("AllFieldMatchesExtension", func(t *testing.T) { - metadata := SearchGistMetadata{All: "sh"} - gistIDs, total, _, err := indexer.Search(metadata, 100, 1) - if err != nil { - t.Fatalf("All field search failed: %v", err) - } - if total == 0 { - t.Error("Expected to find gist by extension via All field") - } - found := false - for _, id := range gistIDs { - if id == 3005 { - found = true - break - } - } - if !found { - t.Error("Expected to find GistID 3005 by extension via All field") - } + t.Run("filter by Filename", func(t *testing.T) { + _, total, _, err := indexer.Search(SearchGistMetadata{Content: "learning", Filename: "script.py"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) }) - // Test 6: All field matches filename - t.Run("AllFieldMatchesFilename", func(t *testing.T) { - metadata := SearchGistMetadata{All: "unique_file.rb"} - gistIDs, total, _, err := indexer.Search(metadata, 100, 1) - if err != nil { - t.Fatalf("All field search failed: %v", err) - } - if total == 0 { - t.Error("Expected to find gist by filename via All field") - } - found := false - for _, id := range gistIDs { - if id == 3004 { - found = true - break - } - } - if !found { - t.Error("Expected to find GistID 3004 by filename via All field") - } + t.Run("filter by Title", func(t *testing.T) { + _, total, _, err := indexer.Search(SearchGistMetadata{Content: "learning", Title: "Go Tutorial"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) }) - // Test 7: All field OR behavior - matches across different fields - t.Run("AllFieldORBehavior", func(t *testing.T) { - // "unique" appears in: username (3001), title (3002), topic (3003), filename (3004) - metadata := SearchGistMetadata{All: "unique"} - gistIDs, total, _, err := indexer.Search(metadata, 100, 1) - if err != nil { - t.Fatalf("All field OR search failed: %v", err) - } - if total < 4 { - t.Errorf("Expected at least 4 results from OR search, got %d", total) - } - - // Verify we found gists from different fields - foundIDs := make(map[uint]bool) - for _, id := range gistIDs { - if id >= 3001 && id <= 3004 { - foundIDs[id] = true - } - } - - expectedIDs := []uint{3001, 3002, 3003, 3004} - for _, expectedID := range expectedIDs { - if !foundIDs[expectedID] { - t.Errorf("Expected to find GistID %d in OR search results", expectedID) - } - } + t.Run("filter by Description", func(t *testing.T) { + ids, total, _, err := indexer.Search(SearchGistMetadata{Content: "learning", Description: "parsing"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) + require.Equal(t, uint(1), ids[0]) }) - // Test 8: All field returns more results than specific field (OR vs AND) - t.Run("AllFieldVsSpecificField", func(t *testing.T) { - // Search with All field - metadataAll := SearchGistMetadata{All: "unique"} - _, totalAll, _, err := indexer.Search(metadataAll, 100, 1) - if err != nil { - t.Fatalf("All field search failed: %v", err) - } + t.Run("combined filters narrow results", func(t *testing.T) { + _, total, _, err := indexer.Search(SearchGistMetadata{Content: "learning", Language: "Go", Username: "alice"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) - // Search with specific username field only - metadataSpecific := SearchGistMetadata{Username: "testuser_unique"} - _, totalSpecific, _, err := indexer.Search(metadataSpecific, 100, 1) - if err != nil { - t.Fatalf("Specific field search failed: %v", err) - } - - // All field should return more results (OR) than specific field - if totalAll <= totalSpecific { - t.Errorf("All field (OR) should return more results (%d) than specific field (%d)", totalAll, totalSpecific) - } + _, total, _, err = indexer.Search(SearchGistMetadata{Content: "learning", Language: "Python", Username: "alice"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(0), total, "mismatched filters should return 0") }) - // Test 9: All field with no matches - t.Run("AllFieldNoMatches", func(t *testing.T) { - metadata := SearchGistMetadata{All: "nonexistentvalue"} - gistIDs, total, _, err := indexer.Search(metadata, 100, 1) - if err != nil { - t.Fatalf("All field no match search failed: %v", err) - } - if total != 0 { - t.Errorf("Expected 0 results for non-existent value, got %d", total) - } - if len(gistIDs) != 0 { - t.Error("Expected empty gist IDs for non-existent value") - } - }) - - // Test 10: All field is mutually exclusive with specific fields - t.Run("AllFieldIgnoresOtherFields", func(t *testing.T) { - // When All is specified, other specific fields should be ignored - metadata := SearchGistMetadata{ - All: "unique", - Username: "nonexistent", // This should be ignored - Language: "NonExistent", // This should be ignored - } - gistIDs, total, _, err := indexer.Search(metadata, 100, 1) - if err != nil { - t.Fatalf("All field with other fields search failed: %v", err) - } - // Should still find results because All is used (and other fields are ignored) - if total < 4 { - t.Errorf("Expected All field to be used (ignoring other fields), got %d results", total) - } - // Verify we found gists matching "unique" - foundAny := false - for _, id := range gistIDs { - if id >= 3001 && id <= 3004 { - foundAny = true - break - } - } - if !foundAny { - t.Error("Expected All field to override specific fields and find results") - } - }) - - // Test 11: All field with content query - t.Run("AllFieldWithContentQuery", func(t *testing.T) { - // All field searches metadata, content query searches content - // Both should work together - metadata := SearchGistMetadata{All: "Ruby"} - // Use All field for content search - metadata.All = "examples" - gistIDs, total, _, err := indexer.Search(metadata, 100, 1) - if err != nil { - t.Fatalf("All field with content query failed: %v", err) - } - // Should find gist 3004 which has Ruby language AND "examples" in content - if total == 0 { - t.Error("Expected to find gist matching both All field and content query") - } - found := false - for _, id := range gistIDs { - if id == 3004 { - found = true - break - } - } - if !found { - t.Error("Expected to find GistID 3004 matching both conditions") - } - }) - - // Test 12: All field case insensitivity - t.Run("AllFieldCaseInsensitive", func(t *testing.T) { - // Search with different case - metadata := SearchGistMetadata{All: "RUBY"} - gistIDs, total, _, err := indexer.Search(metadata, 100, 1) - if err != nil { - t.Fatalf("All field case insensitive search failed: %v", err) - } - if total == 0 { - t.Error("Expected case insensitive match for All field") - } - found := false - for _, id := range gistIDs { - if id == 3004 { - found = true - break - } - } - if !found && total > 0 { - t.Log("Case insensitive All field search returned results but not exact match") - } + t.Run("filter matches no gists", func(t *testing.T) { + _, total, _, err := indexer.Search(SearchGistMetadata{Content: "learning", Username: "nonexistent"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(0), total) }) } -// testIndexerFuzzySearch tests fuzzy search functionality (typo tolerance) -func testIndexerFuzzySearch(t *testing.T, indexer Indexer) { - t.Helper() - initTestGists(t, indexer) +func testAllFieldSearch(t *testing.T, setup func(t *testing.T) (Indexer, func())) { + indexer, cleanup := setup(t) + defer cleanup() - // Add test gists with specific content for fuzzy search testing - testGists := []*Gist{ - { - GistID: 2001, - UserID: 100, - Visibility: 0, - Username: "fuzzytest", - Title: "Algorithm Test", - Content: "This is a test about algorithms and data structures", - Filenames: []string{"algorithm.txt"}, - Extensions: []string{"txt"}, - Languages: []string{"Text"}, - Topics: []string{"algorithms"}, - CreatedAt: 1234567890, - UpdatedAt: 1234567890, - }, - { - GistID: 2002, - UserID: 100, - Visibility: 0, - Username: "fuzzytest", - Title: "Python Guide", - Content: "A comprehensive guide to python programming language", - Filenames: []string{"python.txt"}, - Extensions: []string{"txt"}, - Languages: []string{"Text"}, - Topics: []string{"python"}, - CreatedAt: 1234567891, - UpdatedAt: 1234567891, - }, - { - GistID: 2003, - UserID: 100, - Visibility: 0, - Username: "fuzzytest", - Title: "Database Fundamentals", - Content: "Understanding relational databases and SQL queries", - Filenames: []string{"database.txt"}, - Extensions: []string{"txt"}, - Languages: []string{"Text"}, - Topics: []string{"database"}, - CreatedAt: 1234567892, - UpdatedAt: 1234567892, - }, - { - GistID: 2004, - UserID: 100, - Visibility: 0, - Username: "fuzzytest", - Title: "JavaScript Essentials", - Content: "Essential javascript concepts for web development", - Filenames: []string{"javascript.txt"}, - Extensions: []string{"txt"}, - Languages: []string{"Text"}, - Topics: []string{"javascript"}, - CreatedAt: 1234567893, - UpdatedAt: 1234567893, - }, + g := &Gist{ + GistID: 1, UserID: 1, Visibility: 0, + Username: "alice", Title: "MyTitle", + Description: "Database migration scripts", + Content: "somecontent", + Filenames: []string{"readme.md"}, + Extensions: []string{".md"}, + Languages: []string{"Markdown"}, + Topics: []string{"documentation"}, + CreatedAt: 1234567890, UpdatedAt: 1234567890, } + require.NoError(t, indexer.Add(g)) - for _, gist := range testGists { - err := indexer.Add(gist) - if err != nil { - t.Fatalf("Failed to add fuzzy test gist %d: %v", gist.GistID, err) - } - } + g2 := newGist(2, 2, 0, "unrelated other stuff") + require.NoError(t, indexer.Add(g2)) - // Test 1: Exact match should work - t.Run("ExactMatch", func(t *testing.T) { - gistIDs, total, _, err := indexer.Search(SearchGistMetadata{All: "algorithms"}, 100, 1) - if err != nil { - t.Fatalf("Exact match search failed: %v", err) - } - if total == 0 { - t.Error("Expected to find gist with exact match 'algorithms'") - } - found := false - for _, id := range gistIDs { - if id == 2001 { - found = true - break - } - } - if !found { - t.Error("Expected to find GistID 2001 with exact match") - } + t.Run("All matches username", func(t *testing.T) { + ids, total, _, err := indexer.Search(SearchGistMetadata{All: "alice"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) + require.Equal(t, uint(1), ids[0]) }) - // Test 2: 1 character typo - substitution - t.Run("OneCharSubstitution", func(t *testing.T) { - // "algoritm" instead of "algorithm" (missing 'h') - gistIDs, total, _, err := indexer.Search(SearchGistMetadata{All: "algoritm"}, 100, 1) - if err != nil { - t.Fatalf("1-char typo search failed: %v", err) - } - if total == 0 { - t.Error("Expected fuzzy search to find 'algorithm' with typo 'algoritm'") - } - found := false - for _, id := range gistIDs { - if id == 2001 { - found = true - break - } - } - if !found && total > 0 { - t.Log("Fuzzy search returned results but not the expected gist (may be acceptable)") - } + t.Run("All matches title", func(t *testing.T) { + _, total, _, err := indexer.Search(SearchGistMetadata{All: "MyTitle"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) }) - // Test 3: 1 character typo - deletion - t.Run("OneCharDeletion", func(t *testing.T) { - // "pythn" instead of "python" (missing 'o') - gistIDs, total, _, err := indexer.Search(SearchGistMetadata{All: "pythn"}, 100, 1) - if err != nil { - t.Fatalf("1-char deletion search failed: %v", err) - } - if total == 0 { - t.Error("Expected fuzzy search to find 'python' with typo 'pythn'") - } - found := false - for _, id := range gistIDs { - if id == 2002 { - found = true - break - } - } - if !found && total > 0 { - t.Log("Fuzzy search returned results but not the expected gist") - } + t.Run("All matches description", func(t *testing.T) { + ids, total, _, err := indexer.Search(SearchGistMetadata{All: "migration"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) + require.Equal(t, uint(1), ids[0]) }) - // Test 4: 1 character typo - insertion (extra character) - t.Run("OneCharInsertion", func(t *testing.T) { - // "pythonn" instead of "python" (extra 'n') - gistIDs, total, _, err := indexer.Search(SearchGistMetadata{All: "pythonn"}, 100, 1) - if err != nil { - t.Fatalf("1-char insertion search failed: %v", err) - } - if total == 0 { - t.Error("Expected fuzzy search to find 'python' with typo 'pythonn'") - } - found := false - for _, id := range gistIDs { - if id == 2002 { - found = true - break - } - } - if !found && total > 0 { - t.Log("Fuzzy search returned results but not the expected gist") - } + t.Run("All matches language", func(t *testing.T) { + _, total, _, err := indexer.Search(SearchGistMetadata{All: "Markdown"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) }) - // Test 5: 2 character typos - should still match with fuzziness=2 - t.Run("TwoCharTypos", func(t *testing.T) { - // "databse" instead of "database" (missing 'a', transposed 's') - gistIDs, total, _, err := indexer.Search(SearchGistMetadata{All: "databse"}, 100, 1) - if err != nil { - t.Fatalf("2-char typo search failed: %v", err) - } - if total == 0 { - t.Error("Expected fuzzy search to find 'database' with typo 'databse'") - } - found := false - for _, id := range gistIDs { - if id == 2003 { - found = true - break - } - } - if !found && total > 0 { - t.Log("Fuzzy search returned results but not the expected gist with 2 typos") - } + t.Run("All matches topic", func(t *testing.T) { + _, total, _, err := indexer.Search(SearchGistMetadata{All: "documentation"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) }) - // Test 6: 2 character typos - different word - t.Run("TwoCharTyposDifferentWord", func(t *testing.T) { - // "javasript" instead of "javascript" (missing 'c') - gistIDs, total, _, err := indexer.Search(SearchGistMetadata{All: "javasript"}, 100, 1) - if err != nil { - t.Fatalf("2-char typo search failed: %v", err) - } - if total == 0 { - t.Error("Expected fuzzy search to find 'javascript' with typo 'javasript'") - } - found := false - for _, id := range gistIDs { - if id == 2004 { - found = true - break - } - } - if !found && total > 0 { - t.Log("Fuzzy search returned results but not the expected gist") - } + t.Run("All matches filename", func(t *testing.T) { + _, total, _, err := indexer.Search(SearchGistMetadata{All: "readme.md"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) }) - // Test 7: 3 character typos - should NOT match (beyond fuzziness=2) - t.Run("ThreeCharTyposShouldNotMatch", func(t *testing.T) { - // "algorthm" instead of "algorithm" (missing 'i', 't', 'h') - too different - gistIDs, _, _, err := indexer.Search(SearchGistMetadata{All: "algorthm"}, 100, 1) - if err != nil { - t.Fatalf("3-char typo search failed: %v", err) - } - // With fuzziness=2, this might or might not match depending on the algorithm - // We'll just log the result - found := false - for _, id := range gistIDs { - if id == 2001 { - found = true - break - } - } - if found { - t.Log("3-char typo matched (fuzzy search is very lenient)") - } else { - t.Log("3-char typo did not match as expected") - } + t.Run("All ignores specific filters", func(t *testing.T) { + _, total, _, err := indexer.Search(SearchGistMetadata{All: "alice", Username: "nonexistent"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total, "All should take precedence over specific filters") }) - // Test 8: Transposition (swapped characters) - t.Run("CharacterTransposition", func(t *testing.T) { - // "pyhton" instead of "python" (swapped 'ht') - gistIDs, total, _, err := indexer.Search(SearchGistMetadata{All: "pyhton"}, 100, 1) - if err != nil { - t.Fatalf("Transposition search failed: %v", err) - } - if total == 0 { - t.Error("Expected fuzzy search to find 'python' with transposition 'pyhton'") - } - found := false - for _, id := range gistIDs { - if id == 2002 { - found = true - break - } - } - if !found && total > 0 { - t.Log("Fuzzy search returned results but not the expected gist with transposition") - } - }) + t.Run("All combined with content query", func(t *testing.T) { + _, total, _, err := indexer.Search(SearchGistMetadata{Content: "somecontent", All: "alice"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) - // Test 9: Case insensitivity with fuzzy search - t.Run("CaseInsensitiveWithFuzzy", func(t *testing.T) { - // "PYTHN" (uppercase with typo) - gistIDs, total, _, err := indexer.Search(SearchGistMetadata{All: "PYTHN"}, 100, 1) - if err != nil { - t.Fatalf("Case insensitive fuzzy search failed: %v", err) - } - if total == 0 { - t.Error("Expected fuzzy search to find 'python' with 'PYTHN'") - } - found := false - for _, id := range gistIDs { - if id == 2002 { - found = true - break - } - } - if !found && total > 0 { - t.Log("Case insensitive fuzzy search returned results but not expected gist") - } - }) - - // Test 10: Multiple words with typos - t.Run("MultipleWordsWithTypos", func(t *testing.T) { - // "relatonal databse" instead of "relational database" - gistIDs, total, _, err := indexer.Search(SearchGistMetadata{All: "relatonal databse"}, 100, 1) - if err != nil { - t.Fatalf("Multi-word fuzzy search failed: %v", err) - } - if total == 0 { - t.Error("Expected fuzzy search to find 'relational database' with typos") - } - found := false - for _, id := range gistIDs { - if id == 2003 { - found = true - break - } - } - if !found && total > 0 { - t.Log("Multi-word fuzzy search returned results but not expected gist") - } - }) - - // Test 11: Short words with typos (edge case) - t.Run("ShortWordsWithTypos", func(t *testing.T) { - // "SLQ" instead of "SQL" (1 char typo on short word) - gistIDs, total, _, err := indexer.Search(SearchGistMetadata{All: "SLQ"}, 100, 1) - if err != nil { - t.Fatalf("Short word fuzzy search failed: %v", err) - } - // Short words might be more sensitive to typos - found := false - for _, id := range gistIDs { - if id == 2003 { - found = true - break - } - } - if !found && total > 0 { - t.Log("Short word fuzzy search is challenging, returned other results") - } else if found { - t.Log("Short word fuzzy search successfully matched") - } - }) - - // Test 12: Fuzzy search combined with metadata filters - t.Run("FuzzySearchWithMetadataFilters", func(t *testing.T) { - // Search with typo AND username filter - metadata := SearchGistMetadata{Username: "fuzzytest"} - metadata.All = "algoritm" - gistIDs, total, _, err := indexer.Search(metadata, 100, 1) - if err != nil { - t.Fatalf("Fuzzy search with metadata failed: %v", err) - } - if total == 0 { - t.Error("Expected fuzzy search with filter to find results") - } - // All results should be from fuzzytest user - for _, id := range gistIDs { - if id >= 2001 && id <= 2004 { - // Expected - } else { - t.Errorf("Found unexpected GistID %d, should only match fuzzytest gists", id) - } - } + _, total, _, err = indexer.Search(SearchGistMetadata{Content: "somecontent", All: "nonexistent"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(0), total, "All not matching should yield 0") }) } -// testIndexerPagination tests pagination in search results -func testIndexerPagination(t *testing.T, indexer Indexer) { - t.Helper() - initTestGists(t, indexer) +func testFuzzySearch(t *testing.T, setup func(t *testing.T) (Indexer, func())) { + indexer, cleanup := setup(t) + defer cleanup() - // Test 1: Basic pagination - pages should be different - t.Run("BasicPagination", func(t *testing.T) { - // Search as user 1 (alice) - should see 334 public gists - gistIDs1, total, _, err := indexer.Search(SearchGistMetadata{}, 1, 1) - if err != nil { - t.Fatalf("Page 1 search failed: %v", err) - } - if total != 334 { - t.Errorf("Expected 334 total results, got %d", total) - } - if len(gistIDs1) == 0 { - t.Fatal("Expected results on page 1") - } + g := newGist(1, 1, 0, "the elephant danced gracefully") + require.NoError(t, indexer.Add(g)) - gistIDs2, _, _, err := indexer.Search(SearchGistMetadata{}, 1, 2) - if err != nil { - t.Fatalf("Page 2 search failed: %v", err) - } - if len(gistIDs2) == 0 { - t.Error("Expected results on page 2") - } - - // Pages should have different first results - if gistIDs1[0] == gistIDs2[0] { - t.Error("Page 1 and page 2 returned the same first result") - } + t.Run("exact match", func(t *testing.T) { + _, total, _, err := indexer.Search(SearchGistMetadata{Content: "elephant"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) }) - // Test 2: Page size - verify results per page (page size = 10) - t.Run("PageSizeVerification", func(t *testing.T) { - gistIDs1, _, _, err := indexer.Search(SearchGistMetadata{}, 1, 1) - if err != nil { - t.Fatalf("Page 1 search failed: %v", err) - } - // With page size 10, first page should have 10 results (or up to 11 with +1 for hasMore check) - if len(gistIDs1) == 0 || len(gistIDs1) > 11 { - t.Errorf("Expected 1-11 results on page 1 (page size 10), got %d", len(gistIDs1)) - } + t.Run("1-char substitution", func(t *testing.T) { + _, total, _, err := indexer.Search(SearchGistMetadata{Content: "elephent"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) }) - // Test 3: Total count consistency across pages - t.Run("TotalCountConsistency", func(t *testing.T) { - _, total1, _, err := indexer.Search(SearchGistMetadata{}, 1, 1) - if err != nil { - t.Fatalf("Page 1 search failed: %v", err) - } - _, total2, _, err := indexer.Search(SearchGistMetadata{}, 1, 2) - if err != nil { - t.Fatalf("Page 2 search failed: %v", err) - } - if total1 != total2 { - t.Errorf("Total count inconsistent: page 1 reports %d, page 2 reports %d", total1, total2) - } - if total1 != 334 { - t.Errorf("Expected total count of 334, got %d", total1) - } + t.Run("1-char deletion", func(t *testing.T) { + _, total, _, err := indexer.Search(SearchGistMetadata{Content: "elepant"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) }) - // Test 4: Out of bounds page - t.Run("OutOfBoundsPage", func(t *testing.T) { - // Page 100 is way beyond 334 results with page size 10 - gistIDs, _, _, err := indexer.Search(SearchGistMetadata{}, 1, 100) - if err != nil { - t.Fatalf("Out of bounds page search failed: %v", err) - } - if len(gistIDs) != 0 { - t.Errorf("Expected 0 results for out of bounds page, got %d", len(gistIDs)) - } + t.Run("1-char insertion", func(t *testing.T) { + _, total, _, err := indexer.Search(SearchGistMetadata{Content: "elephantz"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) }) - // Test 5: Empty results pagination - t.Run("EmptyResultsPagination", func(t *testing.T) { - metadata := SearchGistMetadata{All: "nonexistentquery"} - gistIDs, total, _, err := indexer.Search(metadata, 1, 1) - if err != nil { - t.Fatalf("Empty search failed: %v", err) - } - if total != 0 { - t.Errorf("Expected 0 total for empty search, got %d", total) - } - if len(gistIDs) != 0 { - t.Errorf("Expected 0 results for empty search, got %d", len(gistIDs)) - } + t.Run("character transposition", func(t *testing.T) { + _, total, _, err := indexer.Search(SearchGistMetadata{Content: "elehpant"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) }) - // Test 6: No duplicate IDs across pages (accounting for +1 overlap for hasMore indicator) - t.Run("NoDuplicateIDs", func(t *testing.T) { - gistIDs1, _, _, err := indexer.Search(SearchGistMetadata{}, 1, 1) - if err != nil { - t.Fatalf("Page 1 search failed: %v", err) - } - gistIDs2, _, _, err := indexer.Search(SearchGistMetadata{}, 1, 2) - if err != nil { - t.Fatalf("Page 2 search failed: %v", err) - } - - // The pagination returns 11 items but only displays 10 - // The 11th item is used as a "hasMore" indicator - // So we only check the first 10 items of each page for duplicates - page1Items := gistIDs1 - if len(gistIDs1) > 10 { - page1Items = gistIDs1[:10] - } - page2Items := gistIDs2 - if len(gistIDs2) > 10 { - page2Items = gistIDs2[:10] - } - - // Check for duplicates between displayed items only - for _, id1 := range page1Items { - for _, id2 := range page2Items { - if id1 == id2 { - t.Errorf("Found duplicate ID %d in displayed items of both pages", id1) - } - } - } + t.Run("case insensitive", func(t *testing.T) { + _, total, _, err := indexer.Search(SearchGistMetadata{Content: "ELEPHANT"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) }) - // Test 7: Pagination with metadata filters - t.Run("PaginationWithFilters", func(t *testing.T) { - // Filter by alice's username - should get 334 gists - metadata := SearchGistMetadata{Username: "alice"} - gistIDs1, total, _, err := indexer.Search(metadata, 1, 1) - if err != nil { - t.Fatalf("Filtered page 1 search failed: %v", err) - } - if total != 334 { - t.Errorf("Expected 334 total results with filter, got %d", total) - } - if len(gistIDs1) == 0 { - t.Error("Expected results on filtered page 1") - } - - gistIDs2, _, _, err := indexer.Search(metadata, 1, 2) - if err != nil { - t.Fatalf("Filtered page 2 search failed: %v", err) - } - if len(gistIDs2) == 0 { - t.Error("Expected results on filtered page 2") - } - - // Pages should be different - if gistIDs1[0] == gistIDs2[0] { - t.Error("Filtered pages should have different results") - } - }) - - // Test 8: Last page verification - t.Run("LastPageVerification", func(t *testing.T) { - // With 334 results and page size 10, page 34 should have 4 results - // Let's just verify the last page has some results - gistIDs34, total, _, err := indexer.Search(SearchGistMetadata{}, 1, 34) - if err != nil { - t.Fatalf("Last page search failed: %v", err) - } - if total != 334 { - t.Errorf("Expected 334 total on last page, got %d", total) - } - if len(gistIDs34) == 0 { - t.Error("Expected results on last page (34)") - } - - // Page 35 should be empty - gistIDs35, _, _, err := indexer.Search(SearchGistMetadata{}, 1, 35) - if err != nil { - t.Fatalf("Beyond last page search failed: %v", err) - } - if len(gistIDs35) != 0 { - t.Errorf("Expected 0 results on page 35 (beyond last page), got %d", len(gistIDs35)) - } - }) - - // Test 9: Multiple pages have different results - t.Run("MultiplePagesDifferent", func(t *testing.T) { - gistIDs1, _, _, err := indexer.Search(SearchGistMetadata{}, 1, 1) - if err != nil { - t.Fatalf("Page 1 search failed: %v", err) - } - gistIDs10, _, _, err := indexer.Search(SearchGistMetadata{}, 1, 10) - if err != nil { - t.Fatalf("Page 10 search failed: %v", err) - } - gistIDs20, _, _, err := indexer.Search(SearchGistMetadata{}, 1, 20) - if err != nil { - t.Fatalf("Page 20 search failed: %v", err) - } - - // All three pages should have results - if len(gistIDs1) == 0 || len(gistIDs10) == 0 || len(gistIDs20) == 0 { - t.Error("Expected results on pages 1, 10, and 20") - } - - // All should have different first results - if gistIDs1[0] == gistIDs10[0] || gistIDs1[0] == gistIDs20[0] || gistIDs10[0] == gistIDs20[0] { - t.Error("Pages 1, 10, and 20 should have different first results") - } - }) - - // Test 10: Pagination with different users (visibility filtering) - t.Run("PaginationWithVisibility", func(t *testing.T) { - // User 2 (bob) sees 667 gists (334 public alice + 333 own private) - gistIDs1Bob, totalBob, _, err := indexer.Search(SearchGistMetadata{}, 2, 1) - if err != nil { - t.Fatalf("Bob page 1 search failed: %v", err) - } - if totalBob != 667 { - t.Errorf("Expected bob to see 667 gists, got %d", totalBob) - } - if len(gistIDs1Bob) == 0 { - t.Error("Expected results on page 1 for bob") - } - - // User 1 (alice) sees 334 gists - _, totalAlice, _, err := indexer.Search(SearchGistMetadata{}, 1, 1) - if err != nil { - t.Fatalf("Alice page 1 search failed: %v", err) - } - if totalAlice != 334 { - t.Errorf("Expected alice to see 334 gists, got %d", totalAlice) - } - - // Bob sees more results than alice - if totalBob <= totalAlice { - t.Errorf("Bob should see more results (%d) than alice (%d)", totalBob, totalAlice) - } + t.Run("empty content query with metadata returns MatchAll", func(t *testing.T) { + _, total, _, err := indexer.Search(SearchGistMetadata{}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) }) } + +func testContentSearch(t *testing.T, setup func(t *testing.T) (Indexer, func())) { + indexer, cleanup := setup(t) + defer cleanup() + + g := &Gist{ + GistID: 1, UserID: 1, Visibility: 0, + Username: "user1", Title: "Kubernetes Deployment Helper", + Content: `// café résumé naïve +func getHTTPResponse(url string) { + xmlParser := NewXMLParser() + myFunctionName := calculate(x, y) + fmt.Println("hello world") + self.cpuCard = initCard() + // the quick brown fox jumps over the lazy dog + elephant := fetchData() + if result == 0 { + return + } +}`, + Filenames: []string{"file.txt"}, + Extensions: []string{"txt"}, + Languages: []string{"Text"}, + Topics: []string{}, + CreatedAt: 1234567890, UpdatedAt: 1234567890, + } + require.NoError(t, indexer.Add(g)) + + t.Run("code content/calculate", func(t *testing.T) { + ids, total, _, err := indexer.Search(SearchGistMetadata{Content: "calculate"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) + require.Equal(t, uint(1), ids[0]) + }) + + t.Run("code content/Println", func(t *testing.T) { + ids, total, _, err := indexer.Search(SearchGistMetadata{Content: "Println"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) + require.Equal(t, uint(1), ids[0]) + }) + + t.Run("code content/hello", func(t *testing.T) { + ids, total, _, err := indexer.Search(SearchGistMetadata{Content: "hello"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) + require.Equal(t, uint(1), ids[0]) + }) + + t.Run("multi-word/both present", func(t *testing.T) { + ids, total, _, err := indexer.Search(SearchGistMetadata{Content: "fox lazy"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) + require.Equal(t, uint(1), ids[0]) + }) + + t.Run("multi-word/one missing", func(t *testing.T) { + _, total, _, err := indexer.Search(SearchGistMetadata{Content: "fox unicorn"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(0), total) + }) + + t.Run("prefix/content eleph", func(t *testing.T) { + ids, total, _, err := indexer.Search(SearchGistMetadata{Content: "eleph"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) + require.Equal(t, uint(1), ids[0]) + }) + + t.Run("prefix/title Kube", func(t *testing.T) { + ids, total, _, err := indexer.Search(SearchGistMetadata{Title: "Kube"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) + require.Equal(t, uint(1), ids[0]) + }) + + t.Run("unicode/café", func(t *testing.T) { + _, total, _, err := indexer.Search(SearchGistMetadata{Content: "café"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) + }) + + t.Run("unicode/résumé", func(t *testing.T) { + _, total, _, err := indexer.Search(SearchGistMetadata{Content: "résumé"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) + }) + + t.Run("unicode/normalization cafe", func(t *testing.T) { + _, total, _, err := indexer.Search(SearchGistMetadata{Content: "cafe"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) + }) + + t.Run("camelCase/function", func(t *testing.T) { + ids, total, _, err := indexer.Search(SearchGistMetadata{Content: "function"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) + require.Equal(t, uint(1), ids[0]) + }) + + t.Run("camelCase/xml", func(t *testing.T) { + ids, total, _, err := indexer.Search(SearchGistMetadata{Content: "xml"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) + require.Equal(t, uint(1), ids[0]) + }) + + t.Run("camelCase/parser", func(t *testing.T) { + ids, total, _, err := indexer.Search(SearchGistMetadata{Content: "parser"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) + require.Equal(t, uint(1), ids[0]) + }) + + t.Run("camelCase/http", func(t *testing.T) { + ids, total, _, err := indexer.Search(SearchGistMetadata{Content: "http"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) + require.Equal(t, uint(1), ids[0]) + }) + + t.Run("camelCase/response", func(t *testing.T) { + ids, total, _, err := indexer.Search(SearchGistMetadata{Content: "response"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) + require.Equal(t, uint(1), ids[0]) + }) + + t.Run("camelCase/cpucard", func(t *testing.T) { + ids, total, _, err := indexer.Search(SearchGistMetadata{Content: "cpucard"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) + require.Equal(t, uint(1), ids[0]) + }) +} + +func testPagination(t *testing.T, setup func(t *testing.T) (Indexer, func())) { + indexer, cleanup := setup(t) + defer cleanup() + + for i := uint(1); i <= 25; i++ { + g := newGist(i, 1, 0, "pagination keyword content") + require.NoError(t, indexer.Add(g)) + } + + t.Run("page sizes", func(t *testing.T) { + ids1, total1, _, err := indexer.Search(SearchGistMetadata{Content: "pagination"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(25), total1) + require.GreaterOrEqual(t, len(ids1), 10, "page 1 should have at least 10 results") + + ids2, total2, _, err := indexer.Search(SearchGistMetadata{Content: "pagination"}, 1, 2) + require.NoError(t, err) + require.Equal(t, uint64(25), total2) + require.GreaterOrEqual(t, len(ids2), 10, "page 2 should have at least 10 results") + + ids3, _, _, err := indexer.Search(SearchGistMetadata{Content: "pagination"}, 1, 3) + require.NoError(t, err) + require.GreaterOrEqual(t, len(ids3), 5, "page 3 should have at least 5 results") + }) + + t.Run("no duplicates between pages", func(t *testing.T) { + ids1, _, _, err := indexer.Search(SearchGistMetadata{Content: "pagination"}, 1, 1) + require.NoError(t, err) + ids2, _, _, err := indexer.Search(SearchGistMetadata{Content: "pagination"}, 1, 2) + require.NoError(t, err) + + page1 := ids1 + if len(page1) > 10 { + page1 = page1[:10] + } + page2 := ids2 + if len(page2) > 10 { + page2 = page2[:10] + } + + seen := make(map[uint]bool) + for _, id := range page1 { + seen[id] = true + } + for _, id := range page2 { + require.False(t, seen[id], "duplicate gist ID %d found across pages", id) + } + }) + + t.Run("out of bounds page returns 0 results", func(t *testing.T) { + ids, total, _, err := indexer.Search(SearchGistMetadata{Content: "pagination"}, 1, 100) + require.NoError(t, err) + require.Empty(t, ids) + require.Equal(t, uint64(25), total, "total should still reflect actual count") + }) +} + +func testLanguageFacets(t *testing.T, setup func(t *testing.T) (Indexer, func())) { + t.Run("facets reflect language counts", func(t *testing.T) { + indexer, cleanup := setup(t) + defer cleanup() + + languages := []string{"Go", "Go", "Python", "JavaScript"} + for i, lang := range languages { + g := newGist(uint(i+1), 1, 0, "facet test content") + g.Languages = []string{lang} + require.NoError(t, indexer.Add(g)) + } + + _, _, facets, err := indexer.Search(SearchGistMetadata{Content: "facet"}, 1, 1) + require.NoError(t, err) + require.Equal(t, 2, facets["go"]) + require.Equal(t, 1, facets["python"]) + require.Equal(t, 1, facets["javascript"]) + }) + + t.Run("facets respect visibility", func(t *testing.T) { + indexer, cleanup := setup(t) + defer cleanup() + + g1 := newGist(1, 1, 0, "facet visibility test") + g1.Languages = []string{"Go"} + g2 := newGist(2, 1, 1, "facet visibility test") + g2.Languages = []string{"Rust"} + + for _, g := range []*Gist{g1, g2} { + require.NoError(t, indexer.Add(g)) + } + + _, _, facets, err := indexer.Search(SearchGistMetadata{Content: "facet"}, 99, 1) + require.NoError(t, err) + require.Equal(t, 1, facets["go"]) + require.Equal(t, 0, facets["rust"]) + + _, _, facets, err = indexer.Search(SearchGistMetadata{Content: "facet"}, 1, 1) + require.NoError(t, err) + require.Equal(t, 1, facets["go"]) + require.Equal(t, 1, facets["rust"]) + }) +} + +func testWildcardSearch(t *testing.T, setup func(t *testing.T) (Indexer, func())) { + indexer, cleanup := setup(t) + defer cleanup() + + g := newGist(1, 1, 0, "the elephant danced gracefully") + require.NoError(t, indexer.Add(g)) + + t.Run("substring wildcard match on content", func(t *testing.T) { + ids, total, _, err := indexer.Search(SearchGistMetadata{Content: "leph"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) + require.Equal(t, uint(1), ids[0]) + }) + + t.Run("substring wildcard no match", func(t *testing.T) { + _, total, _, err := indexer.Search(SearchGistMetadata{Content: "zzzz"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(0), total) + }) +} + +func testMetadataOnlySearch(t *testing.T, setup func(t *testing.T) (Indexer, func())) { + indexer, cleanup := setup(t) + defer cleanup() + + g1 := &Gist{ + GistID: 1, UserID: 1, Visibility: 0, + Username: "alice", Title: "Go Tutorial", + Content: "learning golang basics", + Filenames: []string{"main.go"}, + Extensions: []string{"go"}, + Languages: []string{"Go"}, + Topics: []string{}, + CreatedAt: 1234567890, UpdatedAt: 1234567890, + } + g2 := &Gist{ + GistID: 2, UserID: 2, Visibility: 0, + Username: "bob", Title: "Python Script", + Content: "data processing pipeline", + Filenames: []string{"script.py"}, + Extensions: []string{"py"}, + Languages: []string{"Python"}, + Topics: []string{}, + CreatedAt: 1234567890, UpdatedAt: 1234567890, + } + + for _, g := range []*Gist{g1, g2} { + require.NoError(t, indexer.Add(g)) + } + + t.Run("Username only no content", func(t *testing.T) { + ids, total, _, err := indexer.Search(SearchGistMetadata{Username: "alice"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) + require.Equal(t, uint(1), ids[0]) + }) + + t.Run("Language only no content", func(t *testing.T) { + ids, total, _, err := indexer.Search(SearchGistMetadata{Language: "Python"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) + require.Equal(t, uint(2), ids[0]) + }) + + t.Run("Title only no content", func(t *testing.T) { + ids, total, _, err := indexer.Search(SearchGistMetadata{Title: "Go Tutorial"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) + require.Equal(t, uint(1), ids[0]) + }) +} + +func testTitleFuzzySearch(t *testing.T, setup func(t *testing.T) (Indexer, func())) { + indexer, cleanup := setup(t) + defer cleanup() + + g := &Gist{ + GistID: 1, UserID: 1, Visibility: 0, + Username: "alice", Title: "Kubernetes Deployment", + Content: "some content", + Filenames: []string{"deploy.yaml"}, + Extensions: []string{"yaml"}, + Languages: []string{"YAML"}, + Topics: []string{}, + CreatedAt: 1234567890, UpdatedAt: 1234567890, + } + require.NoError(t, indexer.Add(g)) + + t.Run("title with typo still matches", func(t *testing.T) { + ids, total, _, err := indexer.Search(SearchGistMetadata{Title: "Kuberntes"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) + require.Equal(t, uint(1), ids[0]) + }) + + t.Run("title prefix matches", func(t *testing.T) { + ids, total, _, err := indexer.Search(SearchGistMetadata{Title: "Kube"}, 1, 1) + require.NoError(t, err) + require.Equal(t, uint64(1), total) + require.Equal(t, uint(1), ids[0]) + }) +} + +func testMultiLanguageFacets(t *testing.T, setup func(t *testing.T) (Indexer, func())) { + indexer, cleanup := setup(t) + defer cleanup() + + g := &Gist{ + GistID: 1, UserID: 1, Visibility: 0, + Username: "alice", Title: "Multi-lang gist", + Content: "polyglot content", + Filenames: []string{"main.go", "script.py"}, + Extensions: []string{"go", "py"}, + Languages: []string{"Go", "Python"}, + Topics: []string{}, + CreatedAt: 1234567890, UpdatedAt: 1234567890, + } + require.NoError(t, indexer.Add(g)) + + _, _, facets, err := indexer.Search(SearchGistMetadata{Content: "polyglot"}, 1, 1) + require.NoError(t, err) + require.Equal(t, 1, facets["go"], "Go should appear in facets") + require.Equal(t, 1, facets["python"], "Python should appear in facets") +} diff --git a/internal/index/meilisearch.go b/internal/index/meilisearch.go index e71459f..58deeb1 100644 --- a/internal/index/meilisearch.go +++ b/internal/index/meilisearch.go @@ -6,6 +6,7 @@ import ( "fmt" "strconv" "strings" + "unicode" "github.com/meilisearch/meilisearch-go" "github.com/rs/zerolog/log" @@ -51,23 +52,25 @@ func (i *MeiliIndexer) open() (meilisearch.IndexManager, error) { i.client = meilisearch.New(i.host, meilisearch.WithAPIKey(i.apikey)) indexResult, err := i.client.GetIndex(i.indexName) - if indexResult != nil && err == nil { - return indexResult.IndexManager, nil - } - - _, err = i.client.CreateIndex(&meilisearch.IndexConfig{ - Uid: i.indexName, - PrimaryKey: "GistID", - }) - if err != nil { - return nil, err + if indexResult == nil || err != nil { + _, err = i.client.CreateIndex(&meilisearch.IndexConfig{ + Uid: i.indexName, + PrimaryKey: "GistID", + }) + if err != nil { + return nil, err + } } _, _ = i.client.Index(i.indexName).UpdateSettings(&meilisearch.Settings{ - FilterableAttributes: []string{"GistID", "UserID", "Visibility", "Username", "Title", "Description", "Filenames", "Extensions", "Languages", "Topics"}, - DisplayedAttributes: []string{"GistID"}, - SearchableAttributes: []string{"Content", "Username", "Title", "Description", "Filenames", "Extensions", "Languages", "Topics"}, - RankingRules: []string{"words"}, + FilterableAttributes: []string{"GistID", "UserID", "Visibility", "Username", "Extensions", "Languages", "Topics"}, + SearchableAttributes: []string{"Content", "ContentSplit", "Username", "Title", "Description", "Filenames", "Extensions", "Languages", "Topics"}, + RankingRules: []string{"words", "typo", "proximity", "attribute", "sort", "exactness"}, + TypoTolerance: &meilisearch.TypoTolerance{ + Enabled: true, + DisableOnNumbers: true, + MinWordSizeForTypos: meilisearch.MinWordSizeForTypos{OneTypo: 4, TwoTypos: 10}, + }, }) return i.client.Index(i.indexName), nil @@ -96,12 +99,21 @@ func (i *MeiliIndexer) Close() { i.client = nil } +type meiliGist struct { + Gist + ContentSplit string +} + func (i *MeiliIndexer) Add(gist *Gist) error { if gist == nil { return errors.New("failed to add nil gist to index") } + doc := &meiliGist{ + Gist: *gist, + ContentSplit: splitCamelCase(gist.Content), + } primaryKey := "GistID" - _, err := (*atomicIndexer.Load()).(*MeiliIndexer).index.AddDocuments(gist, &meilisearch.DocumentOptions{PrimaryKey: &primaryKey}) + _, err := (*atomicIndexer.Load()).(*MeiliIndexer).index.AddDocuments(doc, &meilisearch.DocumentOptions{PrimaryKey: &primaryKey}) return err } @@ -116,7 +128,8 @@ func (i *MeiliIndexer) Search(queryMetadata SearchGistMetadata, userId uint, pag Limit: 11, AttributesToRetrieve: []string{"GistID", "Languages"}, Facets: []string{"Languages"}, - AttributesToSearchOn: []string{"Content"}, + AttributesToSearchOn: []string{"Content", "ContentSplit"}, + MatchingStrategy: meilisearch.All, } var filters []string @@ -127,46 +140,83 @@ func (i *MeiliIndexer) Search(queryMetadata SearchGistMetadata, userId uint, pag filters = append(filters, fmt.Sprintf("%s = \"%s\"", field, escapeFilterValue(value))) } } - addFilter("Username", queryMetadata.Username) - addFilter("Title", queryMetadata.Title) - addFilter("Description", queryMetadata.Description) - addFilter("Filenames", queryMetadata.Filename) - addFilter("Extensions", queryMetadata.Extension) - addFilter("Languages", queryMetadata.Language) - addFilter("Topics", queryMetadata.Topic) + var query string + if queryMetadata.All != "" { + query = queryMetadata.All + searchRequest.AttributesToSearchOn = append(AllSearchFields, "ContentSplit") + } else { + // Exact-match fields stay as filters + addFilter("Username", queryMetadata.Username) + if queryMetadata.Extension != "" { + ext := queryMetadata.Extension + if !strings.HasPrefix(ext, ".") { + ext = "." + ext + } + addFilter("Extensions", ext) + } + addFilter("Languages", queryMetadata.Language) + addFilter("Topics", queryMetadata.Topic) + + if queryMetadata.Default != "" { + query = queryMetadata.Default + var fields []string + for _, f := range strings.Split(config.C.SearchDefault, ",") { + f = strings.TrimSpace(f) + if f == "all" { + fields = AllSearchFields + break + } + if indexField, ok := SearchFieldMap[f]; ok { + fields = append(fields, indexField) + } + } + if len(fields) > 0 { + for _, f := range fields { + if f == "Content" { + fields = append(fields, "ContentSplit") + break + } + } + searchRequest.AttributesToSearchOn = fields + } + } else { + // Fuzzy-matchable fields become part of the query + var queryParts []string + var searchFields []string + + if queryMetadata.Content != "" { + queryParts = append(queryParts, queryMetadata.Content) + searchFields = append(searchFields, "Content", "ContentSplit") + } + if queryMetadata.Title != "" { + queryParts = append(queryParts, queryMetadata.Title) + searchFields = append(searchFields, "Title") + } + if queryMetadata.Description != "" { + queryParts = append(queryParts, queryMetadata.Description) + searchFields = append(searchFields, "Description") + } + if queryMetadata.Filename != "" { + queryParts = append(queryParts, queryMetadata.Filename) + searchFields = append(searchFields, "Filenames") + } + + query = strings.Join(queryParts, " ") + if len(searchFields) > 0 { + searchRequest.AttributesToSearchOn = searchFields + } + } + } if len(filters) > 0 { searchRequest.Filter = strings.Join(filters, " AND ") } - // build query string from provided metadata. Prefer `All`, then `Default`, fall back to `Content`. - query := queryMetadata.All - if query == "" && queryMetadata.Default != "" { - query = queryMetadata.Default - var fields []string - for _, f := range strings.Split(config.C.SearchDefault, ",") { - f = strings.TrimSpace(f) - if f == "all" { - fields = AllSearchFields - break - } - if indexField, ok := SearchFieldMap[f]; ok { - fields = append(fields, indexField) - } - } - if len(fields) > 0 { - searchRequest.AttributesToSearchOn = fields - } - } else if query == "" { - query = queryMetadata.Content - } - response, err := (*atomicIndexer.Load()).(*MeiliIndexer).index.Search(query, searchRequest) if err != nil { log.Error().Err(err).Msg("Failed to search Meilisearch index") return nil, 0, nil, err } - gistIds := make([]uint, 0, len(response.Hits)) for _, hit := range response.Hits { if gistIDRaw, ok := hit["GistID"]; ok { @@ -182,7 +232,9 @@ func (i *MeiliIndexer) Search(queryMetadata SearchGistMetadata, userId uint, pag var facetDist map[string]map[string]int if err := json.Unmarshal(response.FacetDistribution, &facetDist); err == nil { if facets, ok := facetDist["Languages"]; ok { - languageCounts = facets + for lang, count := range facets { + languageCounts[strings.ToLower(lang)] += count + } } } } @@ -190,6 +242,30 @@ func (i *MeiliIndexer) Search(queryMetadata SearchGistMetadata, userId uint, pag return gistIds, uint64(response.EstimatedTotalHits), languageCounts, nil } +func splitCamelCase(text string) string { + var result strings.Builder + runes := []rune(text) + for i := 0; i < len(runes); i++ { + r := runes[i] + if i > 0 { + prev := runes[i-1] + if unicode.IsUpper(r) { + if unicode.IsLower(prev) || unicode.IsDigit(prev) { + result.WriteRune(' ') + } else if unicode.IsUpper(prev) && i+1 < len(runes) && unicode.IsLower(runes[i+1]) { + result.WriteRune(' ') + } + } else if unicode.IsDigit(r) && !unicode.IsDigit(prev) { + result.WriteRune(' ') + } else if !unicode.IsDigit(r) && unicode.IsDigit(prev) { + result.WriteRune(' ') + } + } + result.WriteRune(r) + } + return result.String() +} + func escapeFilterValue(value string) string { escaped := strings.ReplaceAll(value, "\\", "\\\\") escaped = strings.ReplaceAll(escaped, "\"", "\\\"") diff --git a/internal/index/meilisearch_test.go b/internal/index/meilisearch_test.go new file mode 100644 index 0000000..8106d0c --- /dev/null +++ b/internal/index/meilisearch_test.go @@ -0,0 +1,88 @@ +package index + +import ( + "fmt" + "os" + "strconv" + "testing" + + "github.com/meilisearch/meilisearch-go" + "github.com/rs/zerolog" +) + +// syncMeiliIndexer wraps MeiliIndexer to make Add/Remove synchronous for tests. +type syncMeiliIndexer struct { + *MeiliIndexer +} + +func (s *syncMeiliIndexer) Add(gist *Gist) error { + if gist == nil { + return fmt.Errorf("failed to add nil gist to index") + } + doc := &meiliGist{ + Gist: *gist, + ContentSplit: splitCamelCase(gist.Content), + } + primaryKey := "GistID" + taskInfo, err := s.index.AddDocuments(doc, &meilisearch.DocumentOptions{PrimaryKey: &primaryKey}) + if err != nil { + return err + } + _, err = s.client.WaitForTask(taskInfo.TaskUID, 0) + return err +} + +func (s *syncMeiliIndexer) Remove(gistID uint) error { + taskInfo, err := s.index.DeleteDocument(strconv.Itoa(int(gistID)), nil) + if err != nil { + return err + } + _, err = s.client.WaitForTask(taskInfo.TaskUID, 0) + return err +} + +func setupMeiliIndexer(t *testing.T) (Indexer, func()) { + zerolog.SetGlobalLevel(zerolog.Disabled) + t.Helper() + + host := os.Getenv("OG_TEST_MEILI_HOST") + if host == "" { + host = "http://localhost:47700" + } + apiKey := os.Getenv("OG_TEST_MEILI_API_KEY") + + indexName := fmt.Sprintf("test_%d", os.Getpid()) + + inner := NewMeiliIndexer(host, apiKey, indexName) + err := inner.Init() + if err != nil { + t.Skipf("MeiliSearch not available at %s: %v", host, err) + } + + wrapped := &syncMeiliIndexer{MeiliIndexer: inner} + + // Store the inner MeiliIndexer in atomicIndexer, because MeiliIndexer.Search + // type-asserts the global to *MeiliIndexer. + var idx Indexer = inner + atomicIndexer.Store(&idx) + + cleanup := func() { + atomicIndexer.Store(nil) + inner.Reset() + inner.Close() + } + + return wrapped, cleanup +} + +func TestMeiliAddAndSearch(t *testing.T) { testAddAndSearch(t, setupMeiliIndexer) } +func TestMeiliAccessControl(t *testing.T) { testAccessControl(t, setupMeiliIndexer) } +func TestMeiliMetadataFilters(t *testing.T) { testMetadataFilters(t, setupMeiliIndexer) } +func TestMeiliAllFieldSearch(t *testing.T) { testAllFieldSearch(t, setupMeiliIndexer) } +func TestMeiliFuzzySearch(t *testing.T) { testFuzzySearch(t, setupMeiliIndexer) } +func TestMeiliContentSearch(t *testing.T) { testContentSearch(t, setupMeiliIndexer) } +func TestMeiliPagination(t *testing.T) { testPagination(t, setupMeiliIndexer) } +func TestMeiliLanguageFacets(t *testing.T) { testLanguageFacets(t, setupMeiliIndexer) } +func TestMeiliMetadataOnlySearch(t *testing.T) { testMetadataOnlySearch(t, setupMeiliIndexer) } +func TestMeiliTitleFuzzySearch(t *testing.T) { testTitleFuzzySearch(t, setupMeiliIndexer) } +func TestMeiliMultiLanguageFacets(t *testing.T) { testMultiLanguageFacets(t, setupMeiliIndexer) } diff --git a/test.md b/test.md deleted file mode 100644 index 8e864e2..0000000 --- a/test.md +++ /dev/null @@ -1,277 +0,0 @@ ---- -description: Testing handler and middleware -slug: /testing -sidebar_position: 13 ---- - -# Testing - -## Testing Handler - -`GET` `/users/:id` - -Handler below retrieves user by id from the database. If user is not found it returns -`404` error with a message. - -### CreateUser - -`POST` `/users` - -- Accepts JSON payload -- On success `201 - Created` -- On error `500 - Internal Server Error` - -### GetUser - -`GET` `/users/:email` - -- On success `200 - OK` -- On error `404 - Not Found` if user is not found otherwise `500 - Internal Server Error` - -`handler.go` - -```go -package handler - -import ( - "net/http" - - "github.com/labstack/echo/v5" -) - -type ( - User struct { - Name string `json:"name" form:"name"` - Email string `json:"email" form:"email"` - } - handler struct { - db map[string]*User - } -) - -func (h *handler) createUser(c *echo.Context) error { - u := new(User) - if err := c.Bind(u); err != nil { - return err - } - return c.JSON(http.StatusCreated, u) -} - -func (h *handler) getUser(c *echo.Context) error { - email := c.Param("email") - user := h.db[email] - if user == nil { - return echo.NewHTTPError(http.StatusNotFound, "user not found") - } - return c.JSON(http.StatusOK, user) -} -``` - -`handler_test.go` - -```go -package handler - -import ( - "net/http" - "net/http/httptest" - "strings" - "testing" - - "github.com/labstack/echo/v5" - "github.com/labstack/echo/v5/echotest" - "github.com/stretchr/testify/assert" -) - -var ( - mockDB = map[string]*User{ - "jon@labstack.com": &User{"Jon Snow", "jon@labstack.com"}, - } - userJSON = `{"name":"Jon Snow","email":"jon@labstack.com"}` -) - -func TestCreateUser(t *testing.T) { - // Setup - e := echo.New() - req := httptest.NewRequest(http.MethodPost, "/", strings.NewReader(userJSON)) - req.Header.Set(echo.HeaderContentType, echo.MIMEApplicationJSON) - - rec := httptest.NewRecorder() - c := e.NewContext(req, rec) - - h := &controller{mockDB} - - // Assertions - if assert.NoError(t, h.createUser(c)) { - assert.Equal(t, http.StatusCreated, rec.Code) - assert.Equal(t, userJSON, rec.Body.String()) - } -} - -// Same test as above but using `echotest` package helpers -func TestCreateUserWithEchoTest(t *testing.T) { - c, rec := echotest.ContextConfig{ - Headers: map[string][]string{ - echo.HeaderContentType: {echo.MIMEApplicationJSON}, - }, - JSONBody: []byte(`{"name":"Jon Snow","email":"jon@labstack.com"}`), - }.ToContextRecorder(t) - - h := &controller{mockDB} - - // Assertions - if assert.NoError(t, h.createUser(c)) { - assert.Equal(t, http.StatusCreated, rec.Code) - assert.Equal(t, userJSON+"\n", rec.Body.String()) - } -} - -// Same test as above but even shorter -func TestCreateUserWithEchoTest2(t *testing.T) { - h := &controller{mockDB} - - rec := echotest.ContextConfig{ - Headers: map[string][]string{ - echo.HeaderContentType: {echo.MIMEApplicationJSON}, - }, - JSONBody: []byte(`{"name":"Jon Snow","email":"jon@labstack.com"}`), - }.ServeWithHandler(t, h.createUser) - - assert.Equal(t, http.StatusCreated, rec.Code) - assert.Equal(t, userJSON+"\n", rec.Body.String()) -} - -func TestGetUser(t *testing.T) { - // Setup - e := echo.New() - req := httptest.NewRequest(http.MethodGet, "/", nil) - rec := httptest.NewRecorder() - c := e.NewContext(req, rec) - - c.SetPath("/users/:email") - c.SetPathValues(echo.PathValues{ - {Name: "email", Value: "jon@labstack.com"}, - }) - h := &controller{mockDB} - - // Assertions - if assert.NoError(t, h.getUser(c)) { - assert.Equal(t, http.StatusOK, rec.Code) - assert.Equal(t, userJSON, rec.Body.String()) - } -} - -func TestGetUserWithEchoTest(t *testing.T) { - c, rec := echotest.ContextConfig{ - PathValues: echo.PathValues{ - {Name: "email", Value: "jon@labstack.com"}, - }, - Headers: map[string][]string{ - echo.HeaderContentType: {echo.MIMEApplicationJSON}, - }, - JSONBody: []byte(userJSON), - }.ToContextRecorder(t) - - h := &controller{mockDB} - - // Assertions - if assert.NoError(t, h.getUser(c)) { - assert.Equal(t, http.StatusOK, rec.Code) - assert.Equal(t, userJSON+"\n", rec.Body.String()) - } -} -``` - -### Using Form Payload - -```go -// import "net/url" -f := make(url.Values) -f.Set("name", "Jon Snow") -f.Set("email", "jon@labstack.com") -req := httptest.NewRequest(http.MethodPost, "/", strings.NewReader(f.Encode())) -req.Header.Set(echo.HeaderContentType, echo.MIMEApplicationForm) -``` - -Multipart form payload: -```go -func TestContext_MultipartForm(t *testing.T) { - testConf := echotest.ContextConfig{ - MultipartForm: &echotest.MultipartForm{ - Fields: map[string]string{ - "key": "value", - }, - Files: []echotest.MultipartFormFile{ - { - Fieldname: "file", - Filename: "test.json", - Content: echotest.LoadBytes(t, "testdata/test.json"), - }, - }, - }, - } - c := testConf.ToContext(t) - - assert.Equal(t, "value", c.FormValue("key")) - assert.Equal(t, http.MethodPost, c.Request().Method) - assert.Equal(t, true, strings.HasPrefix(c.Request().Header.Get(echo.HeaderContentType), "multipart/form-data; boundary=")) - - fv, err := c.FormFile("file") - if err != nil { - t.Fatal(err) - } - assert.Equal(t, "test.json", fv.Filename) -} -``` - -### Setting Path Params - -```go -c.SetPathValues(echo.PathValues{ - {Name: "id", Value: "1"}, - {Name: "email", Value: "jon@labstack.com"}, -}) -``` - -### Setting Query Params - -```go -// import "net/url" -q := make(url.Values) -q.Set("email", "jon@labstack.com") -req := httptest.NewRequest(http.MethodGet, "/?"+q.Encode(), nil) -``` - -## Testing Middleware - -```go -func TestCreateUserWithEchoTest2(t *testing.T) { - handler := func(c *echo.Context) error { - return c.JSON(http.StatusTeapot, fmt.Sprintf("email: %s", c.Param("email"))) - } - middleware := func(next echo.HandlerFunc) echo.HandlerFunc { - return func(c *echo.Context) error { - c.Set("user_id", int64(1234)) - return next(c) - } - } - - c, rec := echotest.ContextConfig{ - PathValues: echo.PathValues{{Name: "email", Value: "jon@labstack.com"}}, - }.ToContextRecorder(t) - - err := middleware(handler)(c) - if err != nil { - t.Fatal(err) - } - // check that middleware set the value - userID, err := echo.ContextGet[int64](c, "user_id") - assert.NoError(t, err) - assert.Equal(t, int64(1234), userID) - - // check that handler returned the correct response - assert.Equal(t, http.StatusTeapot, rec.Code) -} -``` - -For now you can look into built-in middleware [test cases](https://github.com/labstack/echo/tree/master/middleware). diff --git a/test2.md b/test2.md deleted file mode 100644 index cc07c50..0000000 --- a/test2.md +++ /dev/null @@ -1,158 +0,0 @@ ---- -description: Testing handler and middleware -slug: /testing -sidebar_position: 13 ---- - -# Testing - -## Testing Handler - -`GET` `/users/:id` - -Handler below retrieves user by id from the database. If user is not found it returns -`404` error with a message. - -### CreateUser - -`POST` `/users` - -- Accepts JSON payload -- On success `201 - Created` -- On error `500 - Internal Server Error` - -### GetUser - -`GET` `/users/:email` - -- On success `200 - OK` -- On error `404 - Not Found` if user is not found otherwise `500 - Internal Server Error` - -`handler.go` - -```go -package handler - -import ( - "net/http" - - "github.com/labstack/echo/v4" -) - -type ( - User struct { - Name string `json:"name" form:"name"` - Email string `json:"email" form:"email"` - } - handler struct { - db map[string]*User - } -) - -func (h *handler) createUser(c echo.Context) error { - u := new(User) - if err := c.Bind(u); err != nil { - return err - } - return c.JSON(http.StatusCreated, u) -} - -func (h *handler) getUser(c echo.Context) error { - email := c.Param("email") - user := h.db[email] - if user == nil { - return echo.NewHTTPError(http.StatusNotFound, "user not found") - } - return c.JSON(http.StatusOK, user) -} -``` - -`handler_test.go` - -```go -package handler - -import ( - "net/http" - "net/http/httptest" - "strings" - "testing" - - "github.com/labstack/echo/v4" - "github.com/stretchr/testify/assert" -) - -var ( - mockDB = map[string]*User{ - "jon@labstack.com": &User{"Jon Snow", "jon@labstack.com"}, - } - userJSON = `{"name":"Jon Snow","email":"jon@labstack.com"}` -) - -func TestCreateUser(t *testing.T) { - // Setup - e := echo.New() - req := httptest.NewRequest(http.MethodPost, "/", strings.NewReader(userJSON)) - req.Header.Set(echo.HeaderContentType, echo.MIMEApplicationJSON) - rec := httptest.NewRecorder() - c := e.NewContext(req, rec) - h := &handler{mockDB} - - // Assertions - if assert.NoError(t, h.createUser(c)) { - assert.Equal(t, http.StatusCreated, rec.Code) - assert.Equal(t, userJSON, rec.Body.String()) - } -} - -func TestGetUser(t *testing.T) { - // Setup - e := echo.New() - req := httptest.NewRequest(http.MethodGet, "/", nil) - rec := httptest.NewRecorder() - c := e.NewContext(req, rec) - c.SetPath("/users/:email") - c.SetParamNames("email") - c.SetParamValues("jon@labstack.com") - h := &handler{mockDB} - - // Assertions - if assert.NoError(t, h.getUser(c)) { - assert.Equal(t, http.StatusOK, rec.Code) - assert.Equal(t, userJSON, rec.Body.String()) - } -} -``` - -### Using Form Payload - -```go -// import "net/url" -f := make(url.Values) -f.Set("name", "Jon Snow") -f.Set("email", "jon@labstack.com") -req := httptest.NewRequest(http.MethodPost, "/", strings.NewReader(f.Encode())) -req.Header.Set(echo.HeaderContentType, echo.MIMEApplicationForm) -``` - -### Setting Path Params - -```go -c.SetParamNames("id", "email") -c.SetParamValues("1", "jon@labstack.com") -``` - -### Setting Query Params - -```go -// import "net/url" -q := make(url.Values) -q.Set("email", "jon@labstack.com") -req := httptest.NewRequest(http.MethodGet, "/?"+q.Encode(), nil) -``` - -## Testing Middleware - -*TBD* - -For now you can look into built-in middleware [test cases](https://github.com/labstack/echo/tree/master/middleware).