Improve code search + tests (#663)

Signed-off-by: Thomas Miceli <tho.miceli@gmail.com>
Co-authored-by: Qiang Zhou <zhouqiang.loaded@bytedance.com>
Co-authored-by: theodoruszq <theodoruszq@gmail.com>
This commit is contained in:
Thomas Miceli
2026-03-13 10:16:10 +07:00
committed by GitHub
parent 279da52899
commit e91139d3ec
8 changed files with 1066 additions and 2109 deletions

View File

@@ -6,6 +6,7 @@ import (
"fmt"
"strconv"
"strings"
"unicode"
"github.com/meilisearch/meilisearch-go"
"github.com/rs/zerolog/log"
@@ -51,23 +52,25 @@ func (i *MeiliIndexer) open() (meilisearch.IndexManager, error) {
i.client = meilisearch.New(i.host, meilisearch.WithAPIKey(i.apikey))
indexResult, err := i.client.GetIndex(i.indexName)
if indexResult != nil && err == nil {
return indexResult.IndexManager, nil
}
_, err = i.client.CreateIndex(&meilisearch.IndexConfig{
Uid: i.indexName,
PrimaryKey: "GistID",
})
if err != nil {
return nil, err
if indexResult == nil || err != nil {
_, err = i.client.CreateIndex(&meilisearch.IndexConfig{
Uid: i.indexName,
PrimaryKey: "GistID",
})
if err != nil {
return nil, err
}
}
_, _ = i.client.Index(i.indexName).UpdateSettings(&meilisearch.Settings{
FilterableAttributes: []string{"GistID", "UserID", "Visibility", "Username", "Title", "Description", "Filenames", "Extensions", "Languages", "Topics"},
DisplayedAttributes: []string{"GistID"},
SearchableAttributes: []string{"Content", "Username", "Title", "Description", "Filenames", "Extensions", "Languages", "Topics"},
RankingRules: []string{"words"},
FilterableAttributes: []string{"GistID", "UserID", "Visibility", "Username", "Extensions", "Languages", "Topics"},
SearchableAttributes: []string{"Content", "ContentSplit", "Username", "Title", "Description", "Filenames", "Extensions", "Languages", "Topics"},
RankingRules: []string{"words", "typo", "proximity", "attribute", "sort", "exactness"},
TypoTolerance: &meilisearch.TypoTolerance{
Enabled: true,
DisableOnNumbers: true,
MinWordSizeForTypos: meilisearch.MinWordSizeForTypos{OneTypo: 4, TwoTypos: 10},
},
})
return i.client.Index(i.indexName), nil
@@ -96,12 +99,21 @@ func (i *MeiliIndexer) Close() {
i.client = nil
}
type meiliGist struct {
Gist
ContentSplit string
}
func (i *MeiliIndexer) Add(gist *Gist) error {
if gist == nil {
return errors.New("failed to add nil gist to index")
}
doc := &meiliGist{
Gist: *gist,
ContentSplit: splitCamelCase(gist.Content),
}
primaryKey := "GistID"
_, err := (*atomicIndexer.Load()).(*MeiliIndexer).index.AddDocuments(gist, &meilisearch.DocumentOptions{PrimaryKey: &primaryKey})
_, err := (*atomicIndexer.Load()).(*MeiliIndexer).index.AddDocuments(doc, &meilisearch.DocumentOptions{PrimaryKey: &primaryKey})
return err
}
@@ -116,7 +128,8 @@ func (i *MeiliIndexer) Search(queryMetadata SearchGistMetadata, userId uint, pag
Limit: 11,
AttributesToRetrieve: []string{"GistID", "Languages"},
Facets: []string{"Languages"},
AttributesToSearchOn: []string{"Content"},
AttributesToSearchOn: []string{"Content", "ContentSplit"},
MatchingStrategy: meilisearch.All,
}
var filters []string
@@ -127,46 +140,83 @@ func (i *MeiliIndexer) Search(queryMetadata SearchGistMetadata, userId uint, pag
filters = append(filters, fmt.Sprintf("%s = \"%s\"", field, escapeFilterValue(value)))
}
}
addFilter("Username", queryMetadata.Username)
addFilter("Title", queryMetadata.Title)
addFilter("Description", queryMetadata.Description)
addFilter("Filenames", queryMetadata.Filename)
addFilter("Extensions", queryMetadata.Extension)
addFilter("Languages", queryMetadata.Language)
addFilter("Topics", queryMetadata.Topic)
var query string
if queryMetadata.All != "" {
query = queryMetadata.All
searchRequest.AttributesToSearchOn = append(AllSearchFields, "ContentSplit")
} else {
// Exact-match fields stay as filters
addFilter("Username", queryMetadata.Username)
if queryMetadata.Extension != "" {
ext := queryMetadata.Extension
if !strings.HasPrefix(ext, ".") {
ext = "." + ext
}
addFilter("Extensions", ext)
}
addFilter("Languages", queryMetadata.Language)
addFilter("Topics", queryMetadata.Topic)
if queryMetadata.Default != "" {
query = queryMetadata.Default
var fields []string
for _, f := range strings.Split(config.C.SearchDefault, ",") {
f = strings.TrimSpace(f)
if f == "all" {
fields = AllSearchFields
break
}
if indexField, ok := SearchFieldMap[f]; ok {
fields = append(fields, indexField)
}
}
if len(fields) > 0 {
for _, f := range fields {
if f == "Content" {
fields = append(fields, "ContentSplit")
break
}
}
searchRequest.AttributesToSearchOn = fields
}
} else {
// Fuzzy-matchable fields become part of the query
var queryParts []string
var searchFields []string
if queryMetadata.Content != "" {
queryParts = append(queryParts, queryMetadata.Content)
searchFields = append(searchFields, "Content", "ContentSplit")
}
if queryMetadata.Title != "" {
queryParts = append(queryParts, queryMetadata.Title)
searchFields = append(searchFields, "Title")
}
if queryMetadata.Description != "" {
queryParts = append(queryParts, queryMetadata.Description)
searchFields = append(searchFields, "Description")
}
if queryMetadata.Filename != "" {
queryParts = append(queryParts, queryMetadata.Filename)
searchFields = append(searchFields, "Filenames")
}
query = strings.Join(queryParts, " ")
if len(searchFields) > 0 {
searchRequest.AttributesToSearchOn = searchFields
}
}
}
if len(filters) > 0 {
searchRequest.Filter = strings.Join(filters, " AND ")
}
// build query string from provided metadata. Prefer `All`, then `Default`, fall back to `Content`.
query := queryMetadata.All
if query == "" && queryMetadata.Default != "" {
query = queryMetadata.Default
var fields []string
for _, f := range strings.Split(config.C.SearchDefault, ",") {
f = strings.TrimSpace(f)
if f == "all" {
fields = AllSearchFields
break
}
if indexField, ok := SearchFieldMap[f]; ok {
fields = append(fields, indexField)
}
}
if len(fields) > 0 {
searchRequest.AttributesToSearchOn = fields
}
} else if query == "" {
query = queryMetadata.Content
}
response, err := (*atomicIndexer.Load()).(*MeiliIndexer).index.Search(query, searchRequest)
if err != nil {
log.Error().Err(err).Msg("Failed to search Meilisearch index")
return nil, 0, nil, err
}
gistIds := make([]uint, 0, len(response.Hits))
for _, hit := range response.Hits {
if gistIDRaw, ok := hit["GistID"]; ok {
@@ -182,7 +232,9 @@ func (i *MeiliIndexer) Search(queryMetadata SearchGistMetadata, userId uint, pag
var facetDist map[string]map[string]int
if err := json.Unmarshal(response.FacetDistribution, &facetDist); err == nil {
if facets, ok := facetDist["Languages"]; ok {
languageCounts = facets
for lang, count := range facets {
languageCounts[strings.ToLower(lang)] += count
}
}
}
}
@@ -190,6 +242,30 @@ func (i *MeiliIndexer) Search(queryMetadata SearchGistMetadata, userId uint, pag
return gistIds, uint64(response.EstimatedTotalHits), languageCounts, nil
}
func splitCamelCase(text string) string {
var result strings.Builder
runes := []rune(text)
for i := 0; i < len(runes); i++ {
r := runes[i]
if i > 0 {
prev := runes[i-1]
if unicode.IsUpper(r) {
if unicode.IsLower(prev) || unicode.IsDigit(prev) {
result.WriteRune(' ')
} else if unicode.IsUpper(prev) && i+1 < len(runes) && unicode.IsLower(runes[i+1]) {
result.WriteRune(' ')
}
} else if unicode.IsDigit(r) && !unicode.IsDigit(prev) {
result.WriteRune(' ')
} else if !unicode.IsDigit(r) && unicode.IsDigit(prev) {
result.WriteRune(' ')
}
}
result.WriteRune(r)
}
return result.String()
}
func escapeFilterValue(value string) string {
escaped := strings.ReplaceAll(value, "\\", "\\\\")
escaped = strings.ReplaceAll(escaped, "\"", "\\\"")