mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-10-24 17:14:30 +02:00 
			
		
		
		
	Make indexer code more reusable (#2590)
This commit is contained in:
		
							parent
							
								
									0b0d85c90d
								
							
						
					
					
						commit
						fa28de820e
					
				| @ -25,6 +25,7 @@ func InitIssueIndexer() { | |||||||
| 
 | 
 | ||||||
| // populateIssueIndexer populate the issue indexer with issue data | // populateIssueIndexer populate the issue indexer with issue data | ||||||
| func populateIssueIndexer() error { | func populateIssueIndexer() error { | ||||||
|  | 	batch := indexer.IssueIndexerBatch() | ||||||
| 	for page := 1; ; page++ { | 	for page := 1; ; page++ { | ||||||
| 		repos, _, err := Repositories(&SearchRepoOptions{ | 		repos, _, err := Repositories(&SearchRepoOptions{ | ||||||
| 			Page:     page, | 			Page:     page, | ||||||
| @ -34,7 +35,7 @@ func populateIssueIndexer() error { | |||||||
| 			return fmt.Errorf("Repositories: %v", err) | 			return fmt.Errorf("Repositories: %v", err) | ||||||
| 		} | 		} | ||||||
| 		if len(repos) == 0 { | 		if len(repos) == 0 { | ||||||
| 			return nil | 			return batch.Flush() | ||||||
| 		} | 		} | ||||||
| 		for _, repo := range repos { | 		for _, repo := range repos { | ||||||
| 			issues, err := Issues(&IssuesOptions{ | 			issues, err := Issues(&IssuesOptions{ | ||||||
| @ -42,29 +43,37 @@ func populateIssueIndexer() error { | |||||||
| 				IsClosed: util.OptionalBoolNone, | 				IsClosed: util.OptionalBoolNone, | ||||||
| 				IsPull:   util.OptionalBoolNone, | 				IsPull:   util.OptionalBoolNone, | ||||||
| 			}) | 			}) | ||||||
| 			updates := make([]indexer.IssueIndexerUpdate, len(issues)) | 			if err != nil { | ||||||
| 			for i, issue := range issues { | 				return err | ||||||
| 				updates[i] = issue.update() |  | ||||||
| 			} | 			} | ||||||
| 			if err = indexer.BatchUpdateIssues(updates...); err != nil { | 			for _, issue := range issues { | ||||||
| 				return fmt.Errorf("BatchUpdate: %v", err) | 				if err := batch.Add(issue.update()); err != nil { | ||||||
|  | 					return err | ||||||
|  | 				} | ||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| func processIssueIndexerUpdateQueue() { | func processIssueIndexerUpdateQueue() { | ||||||
|  | 	batch := indexer.IssueIndexerBatch() | ||||||
| 	for { | 	for { | ||||||
|  | 		var issueID int64 | ||||||
| 		select { | 		select { | ||||||
| 		case issueID := <-issueIndexerUpdateQueue: | 		case issueID = <-issueIndexerUpdateQueue: | ||||||
| 			issue, err := GetIssueByID(issueID) | 		default: | ||||||
| 			if err != nil { | 			// flush whatever updates we currently have, since we | ||||||
| 				log.Error(4, "issuesIndexer.Index: %v", err) | 			// might have to wait a while | ||||||
| 				continue | 			if err := batch.Flush(); err != nil { | ||||||
| 			} | 				log.Error(4, "IssueIndexer: %v", err) | ||||||
| 			if err = indexer.UpdateIssue(issue.update()); err != nil { |  | ||||||
| 				log.Error(4, "issuesIndexer.Index: %v", err) |  | ||||||
| 			} | 			} | ||||||
|  | 			issueID = <-issueIndexerUpdateQueue | ||||||
|  | 		} | ||||||
|  | 		issue, err := GetIssueByID(issueID) | ||||||
|  | 		if err != nil { | ||||||
|  | 			log.Error(4, "GetIssueByID: %v", err) | ||||||
|  | 		} else if err = batch.Add(issue.update()); err != nil { | ||||||
|  | 			log.Error(4, "IssueIndexer: %v", err) | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  | |||||||
| @ -9,6 +9,8 @@ import ( | |||||||
| 	"strconv" | 	"strconv" | ||||||
| 
 | 
 | ||||||
| 	"github.com/blevesearch/bleve" | 	"github.com/blevesearch/bleve" | ||||||
|  | 	"github.com/blevesearch/bleve/analysis/token/unicodenorm" | ||||||
|  | 	"github.com/blevesearch/bleve/mapping" | ||||||
| 	"github.com/blevesearch/bleve/search/query" | 	"github.com/blevesearch/bleve/search/query" | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| @ -41,3 +43,50 @@ func newMatchPhraseQuery(matchPhrase, field, analyzer string) *query.MatchPhrase | |||||||
| 	q.Analyzer = analyzer | 	q.Analyzer = analyzer | ||||||
| 	return q | 	return q | ||||||
| } | } | ||||||
|  | 
 | ||||||
|  | const unicodeNormalizeName = "unicodeNormalize" | ||||||
|  | 
 | ||||||
|  | func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error { | ||||||
|  | 	return m.AddCustomTokenFilter(unicodeNormalizeName, map[string]interface{}{ | ||||||
|  | 		"type": unicodenorm.Name, | ||||||
|  | 		"form": unicodenorm.NFC, | ||||||
|  | 	}) | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // Update represents an update to an indexer | ||||||
|  | type Update interface { | ||||||
|  | 	addToBatch(batch *bleve.Batch) error | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | const maxBatchSize = 16 | ||||||
|  | 
 | ||||||
|  | // Batch batch of indexer updates that automatically flushes once it | ||||||
|  | // reaches a certain size | ||||||
|  | type Batch struct { | ||||||
|  | 	batch *bleve.Batch | ||||||
|  | 	index bleve.Index | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // Add add update to batch, possibly flushing | ||||||
|  | func (batch *Batch) Add(update Update) error { | ||||||
|  | 	if err := update.addToBatch(batch.batch); err != nil { | ||||||
|  | 		return err | ||||||
|  | 	} | ||||||
|  | 	return batch.flushIfFull() | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | func (batch *Batch) flushIfFull() error { | ||||||
|  | 	if batch.batch.Size() >= maxBatchSize { | ||||||
|  | 		return batch.Flush() | ||||||
|  | 	} | ||||||
|  | 	return nil | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // Flush manually flush the batch, regardless of its size | ||||||
|  | func (batch *Batch) Flush() error { | ||||||
|  | 	if err := batch.index.Batch(batch.batch); err != nil { | ||||||
|  | 		return err | ||||||
|  | 	} | ||||||
|  | 	batch.batch.Reset() | ||||||
|  | 	return nil | ||||||
|  | } | ||||||
|  | |||||||
| @ -13,7 +13,6 @@ import ( | |||||||
| 	"github.com/blevesearch/bleve" | 	"github.com/blevesearch/bleve" | ||||||
| 	"github.com/blevesearch/bleve/analysis/analyzer/custom" | 	"github.com/blevesearch/bleve/analysis/analyzer/custom" | ||||||
| 	"github.com/blevesearch/bleve/analysis/token/lowercase" | 	"github.com/blevesearch/bleve/analysis/token/lowercase" | ||||||
| 	"github.com/blevesearch/bleve/analysis/token/unicodenorm" |  | ||||||
| 	"github.com/blevesearch/bleve/analysis/tokenizer/unicode" | 	"github.com/blevesearch/bleve/analysis/tokenizer/unicode" | ||||||
| 	"github.com/blevesearch/bleve/index/upsidedown" | 	"github.com/blevesearch/bleve/index/upsidedown" | ||||||
| ) | ) | ||||||
| @ -35,6 +34,10 @@ type IssueIndexerUpdate struct { | |||||||
| 	Data    *IssueIndexerData | 	Data    *IssueIndexerData | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | func (update IssueIndexerUpdate) addToBatch(batch *bleve.Batch) error { | ||||||
|  | 	return batch.Index(indexerID(update.IssueID), update.Data) | ||||||
|  | } | ||||||
|  | 
 | ||||||
| const issueIndexerAnalyzer = "issueIndexer" | const issueIndexerAnalyzer = "issueIndexer" | ||||||
| 
 | 
 | ||||||
| // InitIssueIndexer initialize issue indexer | // InitIssueIndexer initialize issue indexer | ||||||
| @ -74,17 +77,13 @@ func createIssueIndexer() error { | |||||||
| 	docMapping.AddFieldMappingsAt("Content", textFieldMapping) | 	docMapping.AddFieldMappingsAt("Content", textFieldMapping) | ||||||
| 	docMapping.AddFieldMappingsAt("Comments", textFieldMapping) | 	docMapping.AddFieldMappingsAt("Comments", textFieldMapping) | ||||||
| 
 | 
 | ||||||
| 	const unicodeNormNFC = "unicodeNormNFC" | 	if err := addUnicodeNormalizeTokenFilter(mapping); err != nil { | ||||||
| 	if err := mapping.AddCustomTokenFilter(unicodeNormNFC, map[string]interface{}{ |  | ||||||
| 		"type": unicodenorm.Name, |  | ||||||
| 		"form": unicodenorm.NFC, |  | ||||||
| 	}); err != nil { |  | ||||||
| 		return err | 		return err | ||||||
| 	} else if err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]interface{}{ | 	} else if err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]interface{}{ | ||||||
| 		"type":          custom.Name, | 		"type":          custom.Name, | ||||||
| 		"char_filters":  []string{}, | 		"char_filters":  []string{}, | ||||||
| 		"tokenizer":     unicode.Name, | 		"tokenizer":     unicode.Name, | ||||||
| 		"token_filters": []string{unicodeNormNFC, lowercase.Name}, | 		"token_filters": []string{unicodeNormalizeName, lowercase.Name}, | ||||||
| 	}); err != nil { | 	}); err != nil { | ||||||
| 		return err | 		return err | ||||||
| 	} | 	} | ||||||
| @ -97,21 +96,12 @@ func createIssueIndexer() error { | |||||||
| 	return err | 	return err | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // UpdateIssue update the issue indexer | // IssueIndexerBatch batch to add updates to | ||||||
| func UpdateIssue(update IssueIndexerUpdate) error { | func IssueIndexerBatch() *Batch { | ||||||
| 	return issueIndexer.Index(indexerID(update.IssueID), update.Data) | 	return &Batch{ | ||||||
| } | 		batch: issueIndexer.NewBatch(), | ||||||
| 
 | 		index: issueIndexer, | ||||||
| // BatchUpdateIssues perform a batch update of the issue indexer |  | ||||||
| func BatchUpdateIssues(updates ...IssueIndexerUpdate) error { |  | ||||||
| 	batch := issueIndexer.NewBatch() |  | ||||||
| 	for _, update := range updates { |  | ||||||
| 		err := batch.Index(indexerID(update.IssueID), update.Data) |  | ||||||
| 		if err != nil { |  | ||||||
| 			return err |  | ||||||
| 		} |  | ||||||
| 	} | 	} | ||||||
| 	return issueIndexer.Batch(batch) |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| // SearchIssuesByKeyword searches for issues by given conditions. | // SearchIssuesByKeyword searches for issues by given conditions. | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user