Improve code search + tests (#663)
Signed-off-by: Thomas Miceli <tho.miceli@gmail.com> Co-authored-by: Qiang Zhou <zhouqiang.loaded@bytedance.com> Co-authored-by: theodoruszq <theodoruszq@gmail.com>
This commit is contained in:
@@ -6,6 +6,7 @@ import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unicode"
|
||||
|
||||
"github.com/meilisearch/meilisearch-go"
|
||||
"github.com/rs/zerolog/log"
|
||||
@@ -51,23 +52,25 @@ func (i *MeiliIndexer) open() (meilisearch.IndexManager, error) {
|
||||
i.client = meilisearch.New(i.host, meilisearch.WithAPIKey(i.apikey))
|
||||
indexResult, err := i.client.GetIndex(i.indexName)
|
||||
|
||||
if indexResult != nil && err == nil {
|
||||
return indexResult.IndexManager, nil
|
||||
}
|
||||
|
||||
_, err = i.client.CreateIndex(&meilisearch.IndexConfig{
|
||||
Uid: i.indexName,
|
||||
PrimaryKey: "GistID",
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
if indexResult == nil || err != nil {
|
||||
_, err = i.client.CreateIndex(&meilisearch.IndexConfig{
|
||||
Uid: i.indexName,
|
||||
PrimaryKey: "GistID",
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
_, _ = i.client.Index(i.indexName).UpdateSettings(&meilisearch.Settings{
|
||||
FilterableAttributes: []string{"GistID", "UserID", "Visibility", "Username", "Title", "Description", "Filenames", "Extensions", "Languages", "Topics"},
|
||||
DisplayedAttributes: []string{"GistID"},
|
||||
SearchableAttributes: []string{"Content", "Username", "Title", "Description", "Filenames", "Extensions", "Languages", "Topics"},
|
||||
RankingRules: []string{"words"},
|
||||
FilterableAttributes: []string{"GistID", "UserID", "Visibility", "Username", "Extensions", "Languages", "Topics"},
|
||||
SearchableAttributes: []string{"Content", "ContentSplit", "Username", "Title", "Description", "Filenames", "Extensions", "Languages", "Topics"},
|
||||
RankingRules: []string{"words", "typo", "proximity", "attribute", "sort", "exactness"},
|
||||
TypoTolerance: &meilisearch.TypoTolerance{
|
||||
Enabled: true,
|
||||
DisableOnNumbers: true,
|
||||
MinWordSizeForTypos: meilisearch.MinWordSizeForTypos{OneTypo: 4, TwoTypos: 10},
|
||||
},
|
||||
})
|
||||
|
||||
return i.client.Index(i.indexName), nil
|
||||
@@ -96,12 +99,21 @@ func (i *MeiliIndexer) Close() {
|
||||
i.client = nil
|
||||
}
|
||||
|
||||
type meiliGist struct {
|
||||
Gist
|
||||
ContentSplit string
|
||||
}
|
||||
|
||||
func (i *MeiliIndexer) Add(gist *Gist) error {
|
||||
if gist == nil {
|
||||
return errors.New("failed to add nil gist to index")
|
||||
}
|
||||
doc := &meiliGist{
|
||||
Gist: *gist,
|
||||
ContentSplit: splitCamelCase(gist.Content),
|
||||
}
|
||||
primaryKey := "GistID"
|
||||
_, err := (*atomicIndexer.Load()).(*MeiliIndexer).index.AddDocuments(gist, &meilisearch.DocumentOptions{PrimaryKey: &primaryKey})
|
||||
_, err := (*atomicIndexer.Load()).(*MeiliIndexer).index.AddDocuments(doc, &meilisearch.DocumentOptions{PrimaryKey: &primaryKey})
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -116,7 +128,8 @@ func (i *MeiliIndexer) Search(queryMetadata SearchGistMetadata, userId uint, pag
|
||||
Limit: 11,
|
||||
AttributesToRetrieve: []string{"GistID", "Languages"},
|
||||
Facets: []string{"Languages"},
|
||||
AttributesToSearchOn: []string{"Content"},
|
||||
AttributesToSearchOn: []string{"Content", "ContentSplit"},
|
||||
MatchingStrategy: meilisearch.All,
|
||||
}
|
||||
|
||||
var filters []string
|
||||
@@ -127,46 +140,83 @@ func (i *MeiliIndexer) Search(queryMetadata SearchGistMetadata, userId uint, pag
|
||||
filters = append(filters, fmt.Sprintf("%s = \"%s\"", field, escapeFilterValue(value)))
|
||||
}
|
||||
}
|
||||
addFilter("Username", queryMetadata.Username)
|
||||
addFilter("Title", queryMetadata.Title)
|
||||
addFilter("Description", queryMetadata.Description)
|
||||
addFilter("Filenames", queryMetadata.Filename)
|
||||
addFilter("Extensions", queryMetadata.Extension)
|
||||
addFilter("Languages", queryMetadata.Language)
|
||||
addFilter("Topics", queryMetadata.Topic)
|
||||
var query string
|
||||
if queryMetadata.All != "" {
|
||||
query = queryMetadata.All
|
||||
searchRequest.AttributesToSearchOn = append(AllSearchFields, "ContentSplit")
|
||||
} else {
|
||||
// Exact-match fields stay as filters
|
||||
addFilter("Username", queryMetadata.Username)
|
||||
if queryMetadata.Extension != "" {
|
||||
ext := queryMetadata.Extension
|
||||
if !strings.HasPrefix(ext, ".") {
|
||||
ext = "." + ext
|
||||
}
|
||||
addFilter("Extensions", ext)
|
||||
}
|
||||
addFilter("Languages", queryMetadata.Language)
|
||||
addFilter("Topics", queryMetadata.Topic)
|
||||
|
||||
if queryMetadata.Default != "" {
|
||||
query = queryMetadata.Default
|
||||
var fields []string
|
||||
for _, f := range strings.Split(config.C.SearchDefault, ",") {
|
||||
f = strings.TrimSpace(f)
|
||||
if f == "all" {
|
||||
fields = AllSearchFields
|
||||
break
|
||||
}
|
||||
if indexField, ok := SearchFieldMap[f]; ok {
|
||||
fields = append(fields, indexField)
|
||||
}
|
||||
}
|
||||
if len(fields) > 0 {
|
||||
for _, f := range fields {
|
||||
if f == "Content" {
|
||||
fields = append(fields, "ContentSplit")
|
||||
break
|
||||
}
|
||||
}
|
||||
searchRequest.AttributesToSearchOn = fields
|
||||
}
|
||||
} else {
|
||||
// Fuzzy-matchable fields become part of the query
|
||||
var queryParts []string
|
||||
var searchFields []string
|
||||
|
||||
if queryMetadata.Content != "" {
|
||||
queryParts = append(queryParts, queryMetadata.Content)
|
||||
searchFields = append(searchFields, "Content", "ContentSplit")
|
||||
}
|
||||
if queryMetadata.Title != "" {
|
||||
queryParts = append(queryParts, queryMetadata.Title)
|
||||
searchFields = append(searchFields, "Title")
|
||||
}
|
||||
if queryMetadata.Description != "" {
|
||||
queryParts = append(queryParts, queryMetadata.Description)
|
||||
searchFields = append(searchFields, "Description")
|
||||
}
|
||||
if queryMetadata.Filename != "" {
|
||||
queryParts = append(queryParts, queryMetadata.Filename)
|
||||
searchFields = append(searchFields, "Filenames")
|
||||
}
|
||||
|
||||
query = strings.Join(queryParts, " ")
|
||||
if len(searchFields) > 0 {
|
||||
searchRequest.AttributesToSearchOn = searchFields
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(filters) > 0 {
|
||||
searchRequest.Filter = strings.Join(filters, " AND ")
|
||||
}
|
||||
|
||||
// build query string from provided metadata. Prefer `All`, then `Default`, fall back to `Content`.
|
||||
query := queryMetadata.All
|
||||
if query == "" && queryMetadata.Default != "" {
|
||||
query = queryMetadata.Default
|
||||
var fields []string
|
||||
for _, f := range strings.Split(config.C.SearchDefault, ",") {
|
||||
f = strings.TrimSpace(f)
|
||||
if f == "all" {
|
||||
fields = AllSearchFields
|
||||
break
|
||||
}
|
||||
if indexField, ok := SearchFieldMap[f]; ok {
|
||||
fields = append(fields, indexField)
|
||||
}
|
||||
}
|
||||
if len(fields) > 0 {
|
||||
searchRequest.AttributesToSearchOn = fields
|
||||
}
|
||||
} else if query == "" {
|
||||
query = queryMetadata.Content
|
||||
}
|
||||
|
||||
response, err := (*atomicIndexer.Load()).(*MeiliIndexer).index.Search(query, searchRequest)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("Failed to search Meilisearch index")
|
||||
return nil, 0, nil, err
|
||||
}
|
||||
|
||||
gistIds := make([]uint, 0, len(response.Hits))
|
||||
for _, hit := range response.Hits {
|
||||
if gistIDRaw, ok := hit["GistID"]; ok {
|
||||
@@ -182,7 +232,9 @@ func (i *MeiliIndexer) Search(queryMetadata SearchGistMetadata, userId uint, pag
|
||||
var facetDist map[string]map[string]int
|
||||
if err := json.Unmarshal(response.FacetDistribution, &facetDist); err == nil {
|
||||
if facets, ok := facetDist["Languages"]; ok {
|
||||
languageCounts = facets
|
||||
for lang, count := range facets {
|
||||
languageCounts[strings.ToLower(lang)] += count
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -190,6 +242,30 @@ func (i *MeiliIndexer) Search(queryMetadata SearchGistMetadata, userId uint, pag
|
||||
return gistIds, uint64(response.EstimatedTotalHits), languageCounts, nil
|
||||
}
|
||||
|
||||
func splitCamelCase(text string) string {
|
||||
var result strings.Builder
|
||||
runes := []rune(text)
|
||||
for i := 0; i < len(runes); i++ {
|
||||
r := runes[i]
|
||||
if i > 0 {
|
||||
prev := runes[i-1]
|
||||
if unicode.IsUpper(r) {
|
||||
if unicode.IsLower(prev) || unicode.IsDigit(prev) {
|
||||
result.WriteRune(' ')
|
||||
} else if unicode.IsUpper(prev) && i+1 < len(runes) && unicode.IsLower(runes[i+1]) {
|
||||
result.WriteRune(' ')
|
||||
}
|
||||
} else if unicode.IsDigit(r) && !unicode.IsDigit(prev) {
|
||||
result.WriteRune(' ')
|
||||
} else if !unicode.IsDigit(r) && unicode.IsDigit(prev) {
|
||||
result.WriteRune(' ')
|
||||
}
|
||||
}
|
||||
result.WriteRune(r)
|
||||
}
|
||||
return result.String()
|
||||
}
|
||||
|
||||
func escapeFilterValue(value string) string {
|
||||
escaped := strings.ReplaceAll(value, "\\", "\\\\")
|
||||
escaped = strings.ReplaceAll(escaped, "\"", "\\\"")
|
||||
|
||||
Reference in New Issue
Block a user