Make repository indexer tokenize by camel case selectable

This commit is contained in:
Guillermo Prandi 2019-08-03 19:15:48 -03:00
parent 6c0c5c5310
commit a8d4e40af5
4 changed files with 28 additions and 12 deletions

View file

@ -296,6 +296,9 @@ REPO_INDEXER_ENABLED = false
REPO_INDEXER_PATH = indexers/repos.bleve
UPDATE_BUFFER_LEN = 20
MAX_FILE_SIZE = 1048576
; Break camel case names into separate words for indexing.
; It's imperative to delete any previous indexes from REPO_INDEXER_PATH after changing this setting.
REPO_INDEXER_CAMEL_CASE = true
[admin]
; Disallow regular (non-admin) users from creating organizations.

View file

@ -179,6 +179,8 @@ Values containing `#` or `;` must be quoted using `` ` `` or `"""`.
- `REPO_INDEXER_PATH`: **indexers/repos.bleve**: Index file used for code search.
- `UPDATE_BUFFER_LEN`: **20**: Buffer length of index request.
- `MAX_FILE_SIZE`: **1048576**: Maximum size in bytes of files to be indexed.
- `REPO_INDEXER_CAMEL_CASE`: **true**: When `REPO_INDEXER_CAMEL_CASE` is true, repository indexer will break camel case into words, so thisCameCaseName will be indexed as this, camel, case, name. It's imperative to delete any previous indexes from REPO_INDEXER_PATH after changing this setting.
## Security (`security`)

View file

@ -107,11 +107,20 @@ func createRepoIndexer(path string, latestVersion int) error {
mapping := bleve.NewIndexMapping()
if err = addUnicodeNormalizeTokenFilter(mapping); err != nil {
return err
} else if err = mapping.AddCustomAnalyzer(repoIndexerAnalyzer, map[string]interface{}{
}
var tokenFilters []string
if setting.Indexer.RepoUseCamelCaseTokenizer {
tokenFilters = []string{unicodeNormalizeName, camelcase.Name, lowercase.Name, unique.Name}
} else {
tokenFilters = []string{unicodeNormalizeName, lowercase.Name, unique.Name}
}
if err = mapping.AddCustomAnalyzer(repoIndexerAnalyzer, map[string]interface{}{
"type": custom.Name,
"char_filters": []string{},
"tokenizer": unicode.Name,
"token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name, unique.Name},
"token_filters": tokenFilters,
}); err != nil {
return err
}

View file

@ -29,6 +29,7 @@ var (
IssueQueueDir string
IssueQueueConnStr string
IssueQueueBatchNumber int
RepoUseCamelCaseTokenizer bool
}{
IssueType: "bleve",
IssuePath: "indexers/issues.bleve",
@ -53,6 +54,7 @@ func newIndexerService() {
}
Indexer.UpdateQueueLength = sec.Key("UPDATE_BUFFER_LEN").MustInt(20)
Indexer.MaxIndexerFileSize = sec.Key("MAX_FILE_SIZE").MustInt64(1024 * 1024)
Indexer.RepoUseCamelCaseTokenizer = sec.Key("REPO_INDEXER_CAMEL_CASE").MustBool(true)
Indexer.IssueQueueType = sec.Key("ISSUE_INDEXER_QUEUE_TYPE").MustString(LevelQueueType)
Indexer.IssueQueueDir = sec.Key("ISSUE_INDEXER_QUEUE_DIR").MustString(path.Join(AppDataPath, "indexers/issues.queue"))
Indexer.IssueQueueConnStr = sec.Key("ISSUE_INDEXER_QUEUE_CONN_STR").MustString(path.Join(AppDataPath, ""))