mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-10-25 01:24:13 +02:00 
			
		
		
		
	* Update blevesearch v0.8.1 -> v1.0.7 * make vendor Co-authored-by: zeripath <art27@cantab.net>
		
			
				
	
	
		
			911 lines
		
	
	
		
			24 KiB
		
	
	
	
		
			Go
		
	
	
	
		
			Vendored
		
	
	
	
			
		
		
	
	
			911 lines
		
	
	
		
			24 KiB
		
	
	
	
		
			Go
		
	
	
	
		
			Vendored
		
	
	
	
| //  Copyright (c) 2017 Couchbase, Inc.
 | |
| //
 | |
| // Licensed under the Apache License, Version 2.0 (the "License");
 | |
| // you may not use this file except in compliance with the License.
 | |
| // You may obtain a copy of the License at
 | |
| //
 | |
| // 		http://www.apache.org/licenses/LICENSE-2.0
 | |
| //
 | |
| // Unless required by applicable law or agreed to in writing, software
 | |
| // distributed under the License is distributed on an "AS IS" BASIS,
 | |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| // See the License for the specific language governing permissions and
 | |
| // limitations under the License.
 | |
| 
 | |
| package zap
 | |
| 
 | |
| import (
 | |
| 	"encoding/binary"
 | |
| 	"fmt"
 | |
| 	"math"
 | |
| 	"reflect"
 | |
| 
 | |
| 	"github.com/RoaringBitmap/roaring"
 | |
| 	"github.com/blevesearch/bleve/index/scorch/segment"
 | |
| 	"github.com/blevesearch/bleve/size"
 | |
| )
 | |
| 
 | |
| var reflectStaticSizePostingsList int
 | |
| var reflectStaticSizePostingsIterator int
 | |
| var reflectStaticSizePosting int
 | |
| var reflectStaticSizeLocation int
 | |
| 
 | |
| func init() {
 | |
| 	var pl PostingsList
 | |
| 	reflectStaticSizePostingsList = int(reflect.TypeOf(pl).Size())
 | |
| 	var pi PostingsIterator
 | |
| 	reflectStaticSizePostingsIterator = int(reflect.TypeOf(pi).Size())
 | |
| 	var p Posting
 | |
| 	reflectStaticSizePosting = int(reflect.TypeOf(p).Size())
 | |
| 	var l Location
 | |
| 	reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
 | |
| }
 | |
| 
 | |
| // FST or vellum value (uint64) encoding is determined by the top two
 | |
| // highest-order or most significant bits...
 | |
| //
 | |
| //  encoding  : MSB
 | |
| //  name      : 63  62  61...to...bit #0 (LSB)
 | |
| //  ----------+---+---+---------------------------------------------------
 | |
| //   general  : 0 | 0 | 62-bits of postingsOffset.
 | |
| //   ~        : 0 | 1 | reserved for future.
 | |
| //   1-hit    : 1 | 0 | 31-bits of positive float31 norm | 31-bits docNum.
 | |
| //   ~        : 1 | 1 | reserved for future.
 | |
| //
 | |
| // Encoding "general" is able to handle all cases, where the
 | |
| // postingsOffset points to more information about the postings for
 | |
| // the term.
 | |
| //
 | |
| // Encoding "1-hit" is used to optimize a commonly seen case when a
 | |
| // term has only a single hit.  For example, a term in the _id field
 | |
| // will have only 1 hit.  The "1-hit" encoding is used for a term
 | |
| // in a field when...
 | |
| //
 | |
| // - term vector info is disabled for that field;
 | |
| // - and, the term appears in only a single doc for that field;
 | |
| // - and, the term's freq is exactly 1 in that single doc for that field;
 | |
| // - and, the docNum must fit into 31-bits;
 | |
| //
 | |
| // Otherwise, the "general" encoding is used instead.
 | |
| //
 | |
| // In the "1-hit" encoding, the field in that single doc may have
 | |
| // other terms, which is supported in the "1-hit" encoding by the
 | |
| // positive float31 norm.
 | |
| 
 | |
| const FSTValEncodingMask = uint64(0xc000000000000000)
 | |
| const FSTValEncodingGeneral = uint64(0x0000000000000000)
 | |
| const FSTValEncoding1Hit = uint64(0x8000000000000000)
 | |
| 
 | |
| func FSTValEncode1Hit(docNum uint64, normBits uint64) uint64 {
 | |
| 	return FSTValEncoding1Hit | ((mask31Bits & normBits) << 31) | (mask31Bits & docNum)
 | |
| }
 | |
| 
 | |
| func FSTValDecode1Hit(v uint64) (docNum uint64, normBits uint64) {
 | |
| 	return (mask31Bits & v), (mask31Bits & (v >> 31))
 | |
| }
 | |
| 
 | |
| const mask31Bits = uint64(0x000000007fffffff)
 | |
| 
 | |
| func under32Bits(x uint64) bool {
 | |
| 	return x <= mask31Bits
 | |
| }
 | |
| 
 | |
| const DocNum1HitFinished = math.MaxUint64
 | |
| 
 | |
| var NormBits1Hit = uint64(math.Float32bits(float32(1)))
 | |
| 
 | |
| // PostingsList is an in-memory representation of a postings list
 | |
| type PostingsList struct {
 | |
| 	sb             *SegmentBase
 | |
| 	postingsOffset uint64
 | |
| 	freqOffset     uint64
 | |
| 	locOffset      uint64
 | |
| 	postings       *roaring.Bitmap
 | |
| 	except         *roaring.Bitmap
 | |
| 
 | |
| 	// when normBits1Hit != 0, then this postings list came from a
 | |
| 	// 1-hit encoding, and only the docNum1Hit & normBits1Hit apply
 | |
| 	docNum1Hit   uint64
 | |
| 	normBits1Hit uint64
 | |
| }
 | |
| 
 | |
| // represents an immutable, empty postings list
 | |
| var emptyPostingsList = &PostingsList{}
 | |
| 
 | |
| func (p *PostingsList) Size() int {
 | |
| 	sizeInBytes := reflectStaticSizePostingsList + size.SizeOfPtr
 | |
| 
 | |
| 	if p.except != nil {
 | |
| 		sizeInBytes += int(p.except.GetSizeInBytes())
 | |
| 	}
 | |
| 
 | |
| 	return sizeInBytes
 | |
| }
 | |
| 
 | |
| func (p *PostingsList) OrInto(receiver *roaring.Bitmap) {
 | |
| 	if p.normBits1Hit != 0 {
 | |
| 		receiver.Add(uint32(p.docNum1Hit))
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	if p.postings != nil {
 | |
| 		receiver.Or(p.postings)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // Iterator returns an iterator for this postings list
 | |
| func (p *PostingsList) Iterator(includeFreq, includeNorm, includeLocs bool,
 | |
| 	prealloc segment.PostingsIterator) segment.PostingsIterator {
 | |
| 	if p.normBits1Hit == 0 && p.postings == nil {
 | |
| 		return emptyPostingsIterator
 | |
| 	}
 | |
| 
 | |
| 	var preallocPI *PostingsIterator
 | |
| 	pi, ok := prealloc.(*PostingsIterator)
 | |
| 	if ok && pi != nil {
 | |
| 		preallocPI = pi
 | |
| 	}
 | |
| 	if preallocPI == emptyPostingsIterator {
 | |
| 		preallocPI = nil
 | |
| 	}
 | |
| 
 | |
| 	return p.iterator(includeFreq, includeNorm, includeLocs, preallocPI)
 | |
| }
 | |
| 
 | |
| func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool,
 | |
| 	rv *PostingsIterator) *PostingsIterator {
 | |
| 	if rv == nil {
 | |
| 		rv = &PostingsIterator{}
 | |
| 	} else {
 | |
| 		freqNormReader := rv.freqNormReader
 | |
| 		if freqNormReader != nil {
 | |
| 			freqNormReader.Reset([]byte(nil))
 | |
| 		}
 | |
| 
 | |
| 		locReader := rv.locReader
 | |
| 		if locReader != nil {
 | |
| 			locReader.Reset([]byte(nil))
 | |
| 		}
 | |
| 
 | |
| 		freqChunkOffsets := rv.freqChunkOffsets[:0]
 | |
| 		locChunkOffsets := rv.locChunkOffsets[:0]
 | |
| 
 | |
| 		nextLocs := rv.nextLocs[:0]
 | |
| 		nextSegmentLocs := rv.nextSegmentLocs[:0]
 | |
| 
 | |
| 		buf := rv.buf
 | |
| 
 | |
| 		*rv = PostingsIterator{} // clear the struct
 | |
| 
 | |
| 		rv.freqNormReader = freqNormReader
 | |
| 		rv.locReader = locReader
 | |
| 
 | |
| 		rv.freqChunkOffsets = freqChunkOffsets
 | |
| 		rv.locChunkOffsets = locChunkOffsets
 | |
| 
 | |
| 		rv.nextLocs = nextLocs
 | |
| 		rv.nextSegmentLocs = nextSegmentLocs
 | |
| 
 | |
| 		rv.buf = buf
 | |
| 	}
 | |
| 
 | |
| 	rv.postings = p
 | |
| 	rv.includeFreqNorm = includeFreq || includeNorm || includeLocs
 | |
| 	rv.includeLocs = includeLocs
 | |
| 
 | |
| 	if p.normBits1Hit != 0 {
 | |
| 		// "1-hit" encoding
 | |
| 		rv.docNum1Hit = p.docNum1Hit
 | |
| 		rv.normBits1Hit = p.normBits1Hit
 | |
| 
 | |
| 		if p.except != nil && p.except.Contains(uint32(rv.docNum1Hit)) {
 | |
| 			rv.docNum1Hit = DocNum1HitFinished
 | |
| 		}
 | |
| 
 | |
| 		return rv
 | |
| 	}
 | |
| 
 | |
| 	// "general" encoding, check if empty
 | |
| 	if p.postings == nil {
 | |
| 		return rv
 | |
| 	}
 | |
| 
 | |
| 	var n uint64
 | |
| 	var read int
 | |
| 
 | |
| 	// prepare the freq chunk details
 | |
| 	if rv.includeFreqNorm {
 | |
| 		var numFreqChunks uint64
 | |
| 		numFreqChunks, read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
 | |
| 		n += uint64(read)
 | |
| 		if cap(rv.freqChunkOffsets) >= int(numFreqChunks) {
 | |
| 			rv.freqChunkOffsets = rv.freqChunkOffsets[:int(numFreqChunks)]
 | |
| 		} else {
 | |
| 			rv.freqChunkOffsets = make([]uint64, int(numFreqChunks))
 | |
| 		}
 | |
| 		for i := 0; i < int(numFreqChunks); i++ {
 | |
| 			rv.freqChunkOffsets[i], read = binary.Uvarint(p.sb.mem[p.freqOffset+n : p.freqOffset+n+binary.MaxVarintLen64])
 | |
| 			n += uint64(read)
 | |
| 		}
 | |
| 		rv.freqChunkStart = p.freqOffset + n
 | |
| 	}
 | |
| 
 | |
| 	// prepare the loc chunk details
 | |
| 	if rv.includeLocs {
 | |
| 		n = 0
 | |
| 		var numLocChunks uint64
 | |
| 		numLocChunks, read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
 | |
| 		n += uint64(read)
 | |
| 		if cap(rv.locChunkOffsets) >= int(numLocChunks) {
 | |
| 			rv.locChunkOffsets = rv.locChunkOffsets[:int(numLocChunks)]
 | |
| 		} else {
 | |
| 			rv.locChunkOffsets = make([]uint64, int(numLocChunks))
 | |
| 		}
 | |
| 		for i := 0; i < int(numLocChunks); i++ {
 | |
| 			rv.locChunkOffsets[i], read = binary.Uvarint(p.sb.mem[p.locOffset+n : p.locOffset+n+binary.MaxVarintLen64])
 | |
| 			n += uint64(read)
 | |
| 		}
 | |
| 		rv.locChunkStart = p.locOffset + n
 | |
| 	}
 | |
| 
 | |
| 	rv.all = p.postings.Iterator()
 | |
| 	if p.except != nil {
 | |
| 		rv.ActualBM = roaring.AndNot(p.postings, p.except)
 | |
| 		rv.Actual = rv.ActualBM.Iterator()
 | |
| 	} else {
 | |
| 		rv.ActualBM = p.postings
 | |
| 		rv.Actual = rv.all // Optimize to use same iterator for all & Actual.
 | |
| 	}
 | |
| 
 | |
| 	return rv
 | |
| }
 | |
| 
 | |
| // Count returns the number of items on this postings list
 | |
| func (p *PostingsList) Count() uint64 {
 | |
| 	var n, e uint64
 | |
| 	if p.normBits1Hit != 0 {
 | |
| 		n = 1
 | |
| 		if p.except != nil && p.except.Contains(uint32(p.docNum1Hit)) {
 | |
| 			e = 1
 | |
| 		}
 | |
| 	} else if p.postings != nil {
 | |
| 		n = p.postings.GetCardinality()
 | |
| 		if p.except != nil {
 | |
| 			e = p.postings.AndCardinality(p.except)
 | |
| 		}
 | |
| 	}
 | |
| 	return n - e
 | |
| }
 | |
| 
 | |
| func (rv *PostingsList) read(postingsOffset uint64, d *Dictionary) error {
 | |
| 	rv.postingsOffset = postingsOffset
 | |
| 
 | |
| 	// handle "1-hit" encoding special case
 | |
| 	if rv.postingsOffset&FSTValEncodingMask == FSTValEncoding1Hit {
 | |
| 		return rv.init1Hit(postingsOffset)
 | |
| 	}
 | |
| 
 | |
| 	// read the location of the freq/norm details
 | |
| 	var n uint64
 | |
| 	var read int
 | |
| 
 | |
| 	rv.freqOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+binary.MaxVarintLen64])
 | |
| 	n += uint64(read)
 | |
| 
 | |
| 	rv.locOffset, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
 | |
| 	n += uint64(read)
 | |
| 
 | |
| 	var postingsLen uint64
 | |
| 	postingsLen, read = binary.Uvarint(d.sb.mem[postingsOffset+n : postingsOffset+n+binary.MaxVarintLen64])
 | |
| 	n += uint64(read)
 | |
| 
 | |
| 	roaringBytes := d.sb.mem[postingsOffset+n : postingsOffset+n+postingsLen]
 | |
| 
 | |
| 	if rv.postings == nil {
 | |
| 		rv.postings = roaring.NewBitmap()
 | |
| 	}
 | |
| 	_, err := rv.postings.FromBuffer(roaringBytes)
 | |
| 	if err != nil {
 | |
| 		return fmt.Errorf("error loading roaring bitmap: %v", err)
 | |
| 	}
 | |
| 
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| func (rv *PostingsList) init1Hit(fstVal uint64) error {
 | |
| 	docNum, normBits := FSTValDecode1Hit(fstVal)
 | |
| 
 | |
| 	rv.docNum1Hit = docNum
 | |
| 	rv.normBits1Hit = normBits
 | |
| 
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // PostingsIterator provides a way to iterate through the postings list
 | |
| type PostingsIterator struct {
 | |
| 	postings *PostingsList
 | |
| 	all      roaring.IntPeekable
 | |
| 	Actual   roaring.IntPeekable
 | |
| 	ActualBM *roaring.Bitmap
 | |
| 
 | |
| 	currChunk         uint32
 | |
| 	currChunkFreqNorm []byte
 | |
| 	currChunkLoc      []byte
 | |
| 
 | |
| 	freqNormReader *segment.MemUvarintReader
 | |
| 	locReader      *segment.MemUvarintReader
 | |
| 
 | |
| 	freqChunkOffsets []uint64
 | |
| 	freqChunkStart   uint64
 | |
| 
 | |
| 	locChunkOffsets []uint64
 | |
| 	locChunkStart   uint64
 | |
| 
 | |
| 	next            Posting            // reused across Next() calls
 | |
| 	nextLocs        []Location         // reused across Next() calls
 | |
| 	nextSegmentLocs []segment.Location // reused across Next() calls
 | |
| 
 | |
| 	docNum1Hit   uint64
 | |
| 	normBits1Hit uint64
 | |
| 
 | |
| 	buf []byte
 | |
| 
 | |
| 	includeFreqNorm bool
 | |
| 	includeLocs     bool
 | |
| }
 | |
| 
 | |
| var emptyPostingsIterator = &PostingsIterator{}
 | |
| 
 | |
| func (i *PostingsIterator) Size() int {
 | |
| 	sizeInBytes := reflectStaticSizePostingsIterator + size.SizeOfPtr +
 | |
| 		len(i.currChunkFreqNorm) +
 | |
| 		len(i.currChunkLoc) +
 | |
| 		len(i.freqChunkOffsets)*size.SizeOfUint64 +
 | |
| 		len(i.locChunkOffsets)*size.SizeOfUint64 +
 | |
| 		i.next.Size()
 | |
| 
 | |
| 	for _, entry := range i.nextLocs {
 | |
| 		sizeInBytes += entry.Size()
 | |
| 	}
 | |
| 
 | |
| 	return sizeInBytes
 | |
| }
 | |
| 
 | |
| func (i *PostingsIterator) loadChunk(chunk int) error {
 | |
| 	if i.includeFreqNorm {
 | |
| 		if chunk >= len(i.freqChunkOffsets) {
 | |
| 			return fmt.Errorf("tried to load freq chunk that doesn't exist %d/(%d)",
 | |
| 				chunk, len(i.freqChunkOffsets))
 | |
| 		}
 | |
| 
 | |
| 		end, start := i.freqChunkStart, i.freqChunkStart
 | |
| 		s, e := readChunkBoundary(chunk, i.freqChunkOffsets)
 | |
| 		start += s
 | |
| 		end += e
 | |
| 		i.currChunkFreqNorm = i.postings.sb.mem[start:end]
 | |
| 		if i.freqNormReader == nil {
 | |
| 			i.freqNormReader = segment.NewMemUvarintReader(i.currChunkFreqNorm)
 | |
| 		} else {
 | |
| 			i.freqNormReader.Reset(i.currChunkFreqNorm)
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if i.includeLocs {
 | |
| 		if chunk >= len(i.locChunkOffsets) {
 | |
| 			return fmt.Errorf("tried to load loc chunk that doesn't exist %d/(%d)",
 | |
| 				chunk, len(i.locChunkOffsets))
 | |
| 		}
 | |
| 
 | |
| 		end, start := i.locChunkStart, i.locChunkStart
 | |
| 		s, e := readChunkBoundary(chunk, i.locChunkOffsets)
 | |
| 		start += s
 | |
| 		end += e
 | |
| 		i.currChunkLoc = i.postings.sb.mem[start:end]
 | |
| 		if i.locReader == nil {
 | |
| 			i.locReader = segment.NewMemUvarintReader(i.currChunkLoc)
 | |
| 		} else {
 | |
| 			i.locReader.Reset(i.currChunkLoc)
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	i.currChunk = uint32(chunk)
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| func (i *PostingsIterator) readFreqNormHasLocs() (uint64, uint64, bool, error) {
 | |
| 	if i.normBits1Hit != 0 {
 | |
| 		return 1, i.normBits1Hit, false, nil
 | |
| 	}
 | |
| 
 | |
| 	freqHasLocs, err := i.freqNormReader.ReadUvarint()
 | |
| 	if err != nil {
 | |
| 		return 0, 0, false, fmt.Errorf("error reading frequency: %v", err)
 | |
| 	}
 | |
| 
 | |
| 	freq, hasLocs := decodeFreqHasLocs(freqHasLocs)
 | |
| 
 | |
| 	normBits, err := i.freqNormReader.ReadUvarint()
 | |
| 	if err != nil {
 | |
| 		return 0, 0, false, fmt.Errorf("error reading norm: %v", err)
 | |
| 	}
 | |
| 
 | |
| 	return freq, normBits, hasLocs, nil
 | |
| }
 | |
| 
 | |
| func (i *PostingsIterator) skipFreqNormReadHasLocs() (bool, error) {
 | |
| 	if i.normBits1Hit != 0 {
 | |
| 		return false, nil
 | |
| 	}
 | |
| 
 | |
| 	freqHasLocs, err := i.freqNormReader.ReadUvarint()
 | |
| 	if err != nil {
 | |
| 		return false, fmt.Errorf("error reading freqHasLocs: %v", err)
 | |
| 	}
 | |
| 
 | |
| 	i.freqNormReader.SkipUvarint() // Skip normBits.
 | |
| 
 | |
| 	return freqHasLocs&0x01 != 0, nil // See decodeFreqHasLocs() / hasLocs.
 | |
| }
 | |
| 
 | |
| func encodeFreqHasLocs(freq uint64, hasLocs bool) uint64 {
 | |
| 	rv := freq << 1
 | |
| 	if hasLocs {
 | |
| 		rv = rv | 0x01 // 0'th LSB encodes whether there are locations
 | |
| 	}
 | |
| 	return rv
 | |
| }
 | |
| 
 | |
| func decodeFreqHasLocs(freqHasLocs uint64) (uint64, bool) {
 | |
| 	freq := freqHasLocs >> 1
 | |
| 	hasLocs := freqHasLocs&0x01 != 0
 | |
| 	return freq, hasLocs
 | |
| }
 | |
| 
 | |
| // readLocation processes all the integers on the stream representing a single
 | |
| // location.
 | |
| func (i *PostingsIterator) readLocation(l *Location) error {
 | |
| 	// read off field
 | |
| 	fieldID, err := i.locReader.ReadUvarint()
 | |
| 	if err != nil {
 | |
| 		return fmt.Errorf("error reading location field: %v", err)
 | |
| 	}
 | |
| 	// read off pos
 | |
| 	pos, err := i.locReader.ReadUvarint()
 | |
| 	if err != nil {
 | |
| 		return fmt.Errorf("error reading location pos: %v", err)
 | |
| 	}
 | |
| 	// read off start
 | |
| 	start, err := i.locReader.ReadUvarint()
 | |
| 	if err != nil {
 | |
| 		return fmt.Errorf("error reading location start: %v", err)
 | |
| 	}
 | |
| 	// read off end
 | |
| 	end, err := i.locReader.ReadUvarint()
 | |
| 	if err != nil {
 | |
| 		return fmt.Errorf("error reading location end: %v", err)
 | |
| 	}
 | |
| 	// read off num array pos
 | |
| 	numArrayPos, err := i.locReader.ReadUvarint()
 | |
| 	if err != nil {
 | |
| 		return fmt.Errorf("error reading location num array pos: %v", err)
 | |
| 	}
 | |
| 
 | |
| 	l.field = i.postings.sb.fieldsInv[fieldID]
 | |
| 	l.pos = pos
 | |
| 	l.start = start
 | |
| 	l.end = end
 | |
| 
 | |
| 	if cap(l.ap) < int(numArrayPos) {
 | |
| 		l.ap = make([]uint64, int(numArrayPos))
 | |
| 	} else {
 | |
| 		l.ap = l.ap[:int(numArrayPos)]
 | |
| 	}
 | |
| 
 | |
| 	// read off array positions
 | |
| 	for k := 0; k < int(numArrayPos); k++ {
 | |
| 		ap, err := i.locReader.ReadUvarint()
 | |
| 		if err != nil {
 | |
| 			return fmt.Errorf("error reading array position: %v", err)
 | |
| 		}
 | |
| 
 | |
| 		l.ap[k] = ap
 | |
| 	}
 | |
| 
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // Next returns the next posting on the postings list, or nil at the end
 | |
| func (i *PostingsIterator) Next() (segment.Posting, error) {
 | |
| 	return i.nextAtOrAfter(0)
 | |
| }
 | |
| 
 | |
| // Advance returns the posting at the specified docNum or it is not present
 | |
| // the next posting, or if the end is reached, nil
 | |
| func (i *PostingsIterator) Advance(docNum uint64) (segment.Posting, error) {
 | |
| 	return i.nextAtOrAfter(docNum)
 | |
| }
 | |
| 
 | |
| // Next returns the next posting on the postings list, or nil at the end
 | |
| func (i *PostingsIterator) nextAtOrAfter(atOrAfter uint64) (segment.Posting, error) {
 | |
| 	docNum, exists, err := i.nextDocNumAtOrAfter(atOrAfter)
 | |
| 	if err != nil || !exists {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	i.next = Posting{} // clear the struct
 | |
| 	rv := &i.next
 | |
| 	rv.docNum = docNum
 | |
| 
 | |
| 	if !i.includeFreqNorm {
 | |
| 		return rv, nil
 | |
| 	}
 | |
| 
 | |
| 	var normBits uint64
 | |
| 	var hasLocs bool
 | |
| 
 | |
| 	rv.freq, normBits, hasLocs, err = i.readFreqNormHasLocs()
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	rv.norm = math.Float32frombits(uint32(normBits))
 | |
| 
 | |
| 	if i.includeLocs && hasLocs {
 | |
| 		// prepare locations into reused slices, where we assume
 | |
| 		// rv.freq >= "number of locs", since in a composite field,
 | |
| 		// some component fields might have their IncludeTermVector
 | |
| 		// flags disabled while other component fields are enabled
 | |
| 		if cap(i.nextLocs) >= int(rv.freq) {
 | |
| 			i.nextLocs = i.nextLocs[0:rv.freq]
 | |
| 		} else {
 | |
| 			i.nextLocs = make([]Location, rv.freq, rv.freq*2)
 | |
| 		}
 | |
| 		if cap(i.nextSegmentLocs) < int(rv.freq) {
 | |
| 			i.nextSegmentLocs = make([]segment.Location, rv.freq, rv.freq*2)
 | |
| 		}
 | |
| 		rv.locs = i.nextSegmentLocs[:0]
 | |
| 
 | |
| 		numLocsBytes, err := i.locReader.ReadUvarint()
 | |
| 		if err != nil {
 | |
| 			return nil, fmt.Errorf("error reading location numLocsBytes: %v", err)
 | |
| 		}
 | |
| 
 | |
| 		j := 0
 | |
| 		startBytesRemaining := i.locReader.Len() // # bytes remaining in the locReader
 | |
| 		for startBytesRemaining-i.locReader.Len() < int(numLocsBytes) {
 | |
| 			err := i.readLocation(&i.nextLocs[j])
 | |
| 			if err != nil {
 | |
| 				return nil, err
 | |
| 			}
 | |
| 			rv.locs = append(rv.locs, &i.nextLocs[j])
 | |
| 			j++
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return rv, nil
 | |
| }
 | |
| 
 | |
| var freqHasLocs1Hit = encodeFreqHasLocs(1, false)
 | |
| 
 | |
| // nextBytes returns the docNum and the encoded freq & loc bytes for
 | |
| // the next posting
 | |
| func (i *PostingsIterator) nextBytes() (
 | |
| 	docNumOut uint64, freq uint64, normBits uint64,
 | |
| 	bytesFreqNorm []byte, bytesLoc []byte, err error) {
 | |
| 	docNum, exists, err := i.nextDocNumAtOrAfter(0)
 | |
| 	if err != nil || !exists {
 | |
| 		return 0, 0, 0, nil, nil, err
 | |
| 	}
 | |
| 
 | |
| 	if i.normBits1Hit != 0 {
 | |
| 		if i.buf == nil {
 | |
| 			i.buf = make([]byte, binary.MaxVarintLen64*2)
 | |
| 		}
 | |
| 		n := binary.PutUvarint(i.buf, freqHasLocs1Hit)
 | |
| 		n += binary.PutUvarint(i.buf[n:], i.normBits1Hit)
 | |
| 		return docNum, uint64(1), i.normBits1Hit, i.buf[:n], nil, nil
 | |
| 	}
 | |
| 
 | |
| 	startFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
 | |
| 
 | |
| 	var hasLocs bool
 | |
| 
 | |
| 	freq, normBits, hasLocs, err = i.readFreqNormHasLocs()
 | |
| 	if err != nil {
 | |
| 		return 0, 0, 0, nil, nil, err
 | |
| 	}
 | |
| 
 | |
| 	endFreqNorm := len(i.currChunkFreqNorm) - i.freqNormReader.Len()
 | |
| 	bytesFreqNorm = i.currChunkFreqNorm[startFreqNorm:endFreqNorm]
 | |
| 
 | |
| 	if hasLocs {
 | |
| 		startLoc := len(i.currChunkLoc) - i.locReader.Len()
 | |
| 
 | |
| 		numLocsBytes, err := i.locReader.ReadUvarint()
 | |
| 		if err != nil {
 | |
| 			return 0, 0, 0, nil, nil,
 | |
| 				fmt.Errorf("error reading location nextBytes numLocs: %v", err)
 | |
| 		}
 | |
| 
 | |
| 		// skip over all the location bytes
 | |
| 		i.locReader.SkipBytes(int(numLocsBytes))
 | |
| 
 | |
| 		endLoc := len(i.currChunkLoc) - i.locReader.Len()
 | |
| 		bytesLoc = i.currChunkLoc[startLoc:endLoc]
 | |
| 	}
 | |
| 
 | |
| 	return docNum, freq, normBits, bytesFreqNorm, bytesLoc, nil
 | |
| }
 | |
| 
 | |
| // nextDocNum returns the next docNum on the postings list, and also
 | |
| // sets up the currChunk / loc related fields of the iterator.
 | |
| func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool, error) {
 | |
| 	if i.normBits1Hit != 0 {
 | |
| 		if i.docNum1Hit == DocNum1HitFinished {
 | |
| 			return 0, false, nil
 | |
| 		}
 | |
| 		if i.docNum1Hit < atOrAfter {
 | |
| 			// advanced past our 1-hit
 | |
| 			i.docNum1Hit = DocNum1HitFinished // consume our 1-hit docNum
 | |
| 			return 0, false, nil
 | |
| 		}
 | |
| 		docNum := i.docNum1Hit
 | |
| 		i.docNum1Hit = DocNum1HitFinished // consume our 1-hit docNum
 | |
| 		return docNum, true, nil
 | |
| 	}
 | |
| 
 | |
| 	if i.Actual == nil || !i.Actual.HasNext() {
 | |
| 		return 0, false, nil
 | |
| 	}
 | |
| 
 | |
| 	if i.postings == nil || i.postings.postings == i.ActualBM {
 | |
| 		return i.nextDocNumAtOrAfterClean(atOrAfter)
 | |
| 	}
 | |
| 
 | |
| 	i.Actual.AdvanceIfNeeded(uint32(atOrAfter))
 | |
| 
 | |
| 	if !i.Actual.HasNext() {
 | |
| 		// couldn't find anything
 | |
| 		return 0, false, nil
 | |
| 	}
 | |
| 
 | |
| 	n := i.Actual.Next()
 | |
| 	allN := i.all.Next()
 | |
| 
 | |
| 	nChunk := n / i.postings.sb.chunkFactor
 | |
| 
 | |
| 	// when allN becomes >= to here, then allN is in the same chunk as nChunk.
 | |
| 	allNReachesNChunk := nChunk * i.postings.sb.chunkFactor
 | |
| 
 | |
| 	// n is the next actual hit (excluding some postings), and
 | |
| 	// allN is the next hit in the full postings, and
 | |
| 	// if they don't match, move 'all' forwards until they do
 | |
| 	for allN != n {
 | |
| 		// we've reached same chunk, so move the freq/norm/loc decoders forward
 | |
| 		if i.includeFreqNorm && allN >= allNReachesNChunk {
 | |
| 			err := i.currChunkNext(nChunk)
 | |
| 			if err != nil {
 | |
| 				return 0, false, err
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		allN = i.all.Next()
 | |
| 	}
 | |
| 
 | |
| 	if i.includeFreqNorm && (i.currChunk != nChunk || i.currChunkFreqNorm == nil) {
 | |
| 		err := i.loadChunk(int(nChunk))
 | |
| 		if err != nil {
 | |
| 			return 0, false, fmt.Errorf("error loading chunk: %v", err)
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return uint64(n), true, nil
 | |
| }
 | |
| 
 | |
| // optimization when the postings list is "clean" (e.g., no updates &
 | |
| // no deletions) where the all bitmap is the same as the actual bitmap
 | |
| func (i *PostingsIterator) nextDocNumAtOrAfterClean(
 | |
| 	atOrAfter uint64) (uint64, bool, error) {
 | |
| 
 | |
| 	if !i.includeFreqNorm {
 | |
| 		i.Actual.AdvanceIfNeeded(uint32(atOrAfter))
 | |
| 
 | |
| 		if !i.Actual.HasNext() {
 | |
| 			return 0, false, nil // couldn't find anything
 | |
| 		}
 | |
| 
 | |
| 		return uint64(i.Actual.Next()), true, nil
 | |
| 	}
 | |
| 
 | |
| 	// freq-norm's needed, so maintain freq-norm chunk reader
 | |
| 	sameChunkNexts := 0 // # of times we called Next() in the same chunk
 | |
| 	n := i.Actual.Next()
 | |
| 	nChunk := n / i.postings.sb.chunkFactor
 | |
| 
 | |
| 	for uint64(n) < atOrAfter && i.Actual.HasNext() {
 | |
| 		n = i.Actual.Next()
 | |
| 
 | |
| 		nChunkPrev := nChunk
 | |
| 		nChunk = n / i.postings.sb.chunkFactor
 | |
| 
 | |
| 		if nChunk != nChunkPrev {
 | |
| 			sameChunkNexts = 0
 | |
| 		} else {
 | |
| 			sameChunkNexts += 1
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if uint64(n) < atOrAfter {
 | |
| 		// couldn't find anything
 | |
| 		return 0, false, nil
 | |
| 	}
 | |
| 
 | |
| 	for j := 0; j < sameChunkNexts; j++ {
 | |
| 		err := i.currChunkNext(nChunk)
 | |
| 		if err != nil {
 | |
| 			return 0, false, fmt.Errorf("error optimized currChunkNext: %v", err)
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
 | |
| 		err := i.loadChunk(int(nChunk))
 | |
| 		if err != nil {
 | |
| 			return 0, false, fmt.Errorf("error loading chunk: %v", err)
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return uint64(n), true, nil
 | |
| }
 | |
| 
 | |
| func (i *PostingsIterator) currChunkNext(nChunk uint32) error {
 | |
| 	if i.currChunk != nChunk || i.currChunkFreqNorm == nil {
 | |
| 		err := i.loadChunk(int(nChunk))
 | |
| 		if err != nil {
 | |
| 			return fmt.Errorf("error loading chunk: %v", err)
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// read off freq/offsets even though we don't care about them
 | |
| 	hasLocs, err := i.skipFreqNormReadHasLocs()
 | |
| 	if err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 
 | |
| 	if i.includeLocs && hasLocs {
 | |
| 		numLocsBytes, err := i.locReader.ReadUvarint()
 | |
| 		if err != nil {
 | |
| 			return fmt.Errorf("error reading location numLocsBytes: %v", err)
 | |
| 		}
 | |
| 
 | |
| 		// skip over all the location bytes
 | |
| 		i.locReader.SkipBytes(int(numLocsBytes))
 | |
| 	}
 | |
| 
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // DocNum1Hit returns the docNum and true if this is "1-hit" optimized
 | |
| // and the docNum is available.
 | |
| func (p *PostingsIterator) DocNum1Hit() (uint64, bool) {
 | |
| 	if p.normBits1Hit != 0 && p.docNum1Hit != DocNum1HitFinished {
 | |
| 		return p.docNum1Hit, true
 | |
| 	}
 | |
| 	return 0, false
 | |
| }
 | |
| 
 | |
| // ActualBitmap returns the underlying actual bitmap
 | |
| // which can be used up the stack for optimizations
 | |
| func (p *PostingsIterator) ActualBitmap() *roaring.Bitmap {
 | |
| 	return p.ActualBM
 | |
| }
 | |
| 
 | |
| // ReplaceActual replaces the ActualBM with the provided
 | |
| // bitmap
 | |
| func (p *PostingsIterator) ReplaceActual(abm *roaring.Bitmap) {
 | |
| 	p.ActualBM = abm
 | |
| 	p.Actual = abm.Iterator()
 | |
| }
 | |
| 
 | |
| // PostingsIteratorFromBitmap constructs a PostingsIterator given an
 | |
| // "actual" bitmap.
 | |
| func PostingsIteratorFromBitmap(bm *roaring.Bitmap,
 | |
| 	includeFreqNorm, includeLocs bool) (segment.PostingsIterator, error) {
 | |
| 	return &PostingsIterator{
 | |
| 		ActualBM:        bm,
 | |
| 		Actual:          bm.Iterator(),
 | |
| 		includeFreqNorm: includeFreqNorm,
 | |
| 		includeLocs:     includeLocs,
 | |
| 	}, nil
 | |
| }
 | |
| 
 | |
| // PostingsIteratorFrom1Hit constructs a PostingsIterator given a
 | |
| // 1-hit docNum.
 | |
| func PostingsIteratorFrom1Hit(docNum1Hit uint64,
 | |
| 	includeFreqNorm, includeLocs bool) (segment.PostingsIterator, error) {
 | |
| 	return &PostingsIterator{
 | |
| 		docNum1Hit:      docNum1Hit,
 | |
| 		normBits1Hit:    NormBits1Hit,
 | |
| 		includeFreqNorm: includeFreqNorm,
 | |
| 		includeLocs:     includeLocs,
 | |
| 	}, nil
 | |
| }
 | |
| 
 | |
| // Posting is a single entry in a postings list
 | |
| type Posting struct {
 | |
| 	docNum uint64
 | |
| 	freq   uint64
 | |
| 	norm   float32
 | |
| 	locs   []segment.Location
 | |
| }
 | |
| 
 | |
| func (p *Posting) Size() int {
 | |
| 	sizeInBytes := reflectStaticSizePosting
 | |
| 
 | |
| 	for _, entry := range p.locs {
 | |
| 		sizeInBytes += entry.Size()
 | |
| 	}
 | |
| 
 | |
| 	return sizeInBytes
 | |
| }
 | |
| 
 | |
| // Number returns the document number of this posting in this segment
 | |
| func (p *Posting) Number() uint64 {
 | |
| 	return p.docNum
 | |
| }
 | |
| 
 | |
| // Frequency returns the frequencies of occurrence of this term in this doc/field
 | |
| func (p *Posting) Frequency() uint64 {
 | |
| 	return p.freq
 | |
| }
 | |
| 
 | |
| // Norm returns the normalization factor for this posting
 | |
| func (p *Posting) Norm() float64 {
 | |
| 	return float64(p.norm)
 | |
| }
 | |
| 
 | |
| // Locations returns the location information for each occurrence
 | |
| func (p *Posting) Locations() []segment.Location {
 | |
| 	return p.locs
 | |
| }
 | |
| 
 | |
| // Location represents the location of a single occurrence
 | |
| type Location struct {
 | |
| 	field string
 | |
| 	pos   uint64
 | |
| 	start uint64
 | |
| 	end   uint64
 | |
| 	ap    []uint64
 | |
| }
 | |
| 
 | |
| func (l *Location) Size() int {
 | |
| 	return reflectStaticSizeLocation +
 | |
| 		len(l.field) +
 | |
| 		len(l.ap)*size.SizeOfUint64
 | |
| }
 | |
| 
 | |
| // Field returns the name of the field (useful in composite fields to know
 | |
| // which original field the value came from)
 | |
| func (l *Location) Field() string {
 | |
| 	return l.field
 | |
| }
 | |
| 
 | |
| // Start returns the start byte offset of this occurrence
 | |
| func (l *Location) Start() uint64 {
 | |
| 	return l.start
 | |
| }
 | |
| 
 | |
| // End returns the end byte offset of this occurrence
 | |
| func (l *Location) End() uint64 {
 | |
| 	return l.end
 | |
| }
 | |
| 
 | |
| // Pos returns the 1-based phrase position of this occurrence
 | |
| func (l *Location) Pos() uint64 {
 | |
| 	return l.pos
 | |
| }
 | |
| 
 | |
| // ArrayPositions returns the array position vector associated with this occurrence
 | |
| func (l *Location) ArrayPositions() []uint64 {
 | |
| 	return l.ap
 | |
| }
 |