public class STUniformSplitTermsWriter extends UniformSplitTermsWriter
UniformSplitTermsWriter
by sharing all the fields terms
in the same dictionary and by writing all the fields of a term in the same
block line.
The block file
contains all the term blocks for all fields. Each block line, for a single term,
may have multiple fields TermState
. The
block file also contains the fields metadata at the end of the file.
The dictionary file
contains a single trie (FST
bytes) for all
fields.
This structure is adapted when there are lots of fields. In this case the shared-terms dictionary trie is much smaller.
This FieldsConsumer
requires a custom
merge(MergeState, NormsProducer)
method for efficiency. The
regular merge would scan all the fields sequentially, which internally would
scan the whole shared-terms dictionary as many times as there are fields.
Whereas the custom merge directly scans the internal shared-terms dictionary
of all segments to merge, thus scanning once whatever the number of fields is.
Modifier and Type | Class and Description |
---|---|
private static class |
STUniformSplitTermsWriter.FieldsIterator |
private class |
STUniformSplitTermsWriter.FieldTerms |
private class |
STUniformSplitTermsWriter.MergingFieldTerms |
protected class |
STUniformSplitTermsWriter.SegmentPostings |
private class |
STUniformSplitTermsWriter.SegmentTerms |
private static interface |
STUniformSplitTermsWriter.SharedTermsWriter |
private class |
STUniformSplitTermsWriter.TermIterator<T> |
private class |
STUniformSplitTermsWriter.TermIteratorQueue<T> |
blockEncoder, blockOutput, DEFAULT_DELTA_NUM_LINES, DEFAULT_TARGET_NUM_BLOCK_LINES, deltaNumLines, dictionaryOutput, fieldInfos, MAX_NUM_BLOCK_LINES, maxDoc, postingsWriter, targetNumBlockLines
Modifier | Constructor and Description |
---|---|
|
STUniformSplitTermsWriter(PostingsWriterBase postingsWriter,
SegmentWriteState state,
BlockEncoder blockEncoder) |
|
STUniformSplitTermsWriter(PostingsWriterBase postingsWriter,
SegmentWriteState state,
int targetNumBlockLines,
int deltaNumLines,
BlockEncoder blockEncoder) |
protected |
STUniformSplitTermsWriter(PostingsWriterBase postingsWriter,
SegmentWriteState state,
int targetNumBlockLines,
int deltaNumLines,
BlockEncoder blockEncoder,
java.lang.String codecName,
int versionCurrent,
java.lang.String termsBlocksExtension,
java.lang.String dictionaryExtension) |
close, validateSettings, writeDictionary, writeFieldsMetadata, writeFieldTerms, writePostingLine
public STUniformSplitTermsWriter(PostingsWriterBase postingsWriter, SegmentWriteState state, BlockEncoder blockEncoder) throws java.io.IOException
java.io.IOException
public STUniformSplitTermsWriter(PostingsWriterBase postingsWriter, SegmentWriteState state, int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder) throws java.io.IOException
java.io.IOException
protected STUniformSplitTermsWriter(PostingsWriterBase postingsWriter, SegmentWriteState state, int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder, java.lang.String codecName, int versionCurrent, java.lang.String termsBlocksExtension, java.lang.String dictionaryExtension) throws java.io.IOException
java.io.IOException
public void write(Fields fields, NormsProducer normsProducer) throws java.io.IOException
FieldsConsumer
Notes:
write
in class UniformSplitTermsWriter
java.io.IOException
private void writeSegment(STUniformSplitTermsWriter.SharedTermsWriter termsWriter) throws java.io.IOException
STUniformSplitTermsWriter.SharedTermsWriter
,
which can be either a single segment writer, or a multiple segment merging writer.java.io.IOException
private java.util.Collection<FieldMetadata> writeSingleSegment(Fields fields, NormsProducer normsProducer, STBlockWriter blockWriter, IndexDictionary.Builder dictionaryBuilder) throws java.io.IOException
java.io.IOException
private java.util.List<FieldMetadata> createFieldMetadataList(java.util.Iterator<FieldInfo> fieldInfos, int maxDoc)
private STUniformSplitTermsWriter.TermIteratorQueue<STUniformSplitTermsWriter.FieldTerms> createFieldTermsQueue(Fields fields, java.util.List<FieldMetadata> fieldMetadataList) throws java.io.IOException
java.io.IOException
private <T> void groupByTerm(STUniformSplitTermsWriter.TermIteratorQueue<T> termIteratorQueue, STUniformSplitTermsWriter.TermIterator<T> topTermIterator, java.util.List<STUniformSplitTermsWriter.TermIterator<T>> groupedTermIterators)
private void writePostingLines(BytesRef term, java.util.List<? extends STUniformSplitTermsWriter.TermIterator<STUniformSplitTermsWriter.FieldTerms>> groupedFieldTerms, NormsProducer normsProducer, java.util.List<FieldMetadataTermState> termStates) throws java.io.IOException
java.io.IOException
private <T> void nextTermForIterators(java.util.List<? extends STUniformSplitTermsWriter.TermIterator<T>> termIterators, STUniformSplitTermsWriter.TermIteratorQueue<T> termIteratorQueue) throws java.io.IOException
java.io.IOException
private int writeFieldMetadataList(java.util.Collection<FieldMetadata> fieldMetadataList) throws java.io.IOException
java.io.IOException
protected void writeDictionary(int fieldsNumber, IndexDictionary.Builder dictionaryBuilder) throws java.io.IOException
java.io.IOException
public void merge(MergeState mergeState, NormsProducer normsProducer) throws java.io.IOException
FieldsConsumer
mergeState
. The default implementation skips
and maps around deleted documents, and calls FieldsConsumer.write(Fields,NormsProducer)
.
Implementations can override this method for more sophisticated
merging (bulk-byte copying, etc).merge
in class FieldsConsumer
java.io.IOException
private java.util.Collection<FieldMetadata> mergeSegments(MergeState mergeState, NormsProducer normsProducer, java.util.List<STUniformSplitTermsWriter.TermIterator<STUniformSplitTermsWriter.SegmentTerms>> segmentTermsList, STBlockWriter blockWriter, IndexDictionary.Builder dictionaryBuilder) throws java.io.IOException
java.io.IOException
private java.util.Map<java.lang.String,STUniformSplitTermsWriter.MergingFieldTerms> createMergingFieldTermsMap(java.util.List<FieldMetadata> fieldMetadataList, int numSegments)
private STUniformSplitTermsWriter.TermIteratorQueue<STUniformSplitTermsWriter.SegmentTerms> createSegmentTermsQueue(java.util.List<STUniformSplitTermsWriter.TermIterator<STUniformSplitTermsWriter.SegmentTerms>> segmentTermsList) throws java.io.IOException
java.io.IOException
private void combineSegmentsFields(java.util.List<STUniformSplitTermsWriter.TermIterator<STUniformSplitTermsWriter.SegmentTerms>> groupedSegmentTerms, java.util.Map<java.lang.String,java.util.List<STUniformSplitTermsWriter.SegmentPostings>> fieldPostingsMap)
private void combinePostingsPerField(BytesRef term, java.util.Map<java.lang.String,STUniformSplitTermsWriter.MergingFieldTerms> fieldTermsMap, java.util.Map<java.lang.String,java.util.List<STUniformSplitTermsWriter.SegmentPostings>> fieldPostingsMap, java.util.List<STUniformSplitTermsWriter.MergingFieldTerms> groupedFieldTerms)