public class StructureMerger extends Object
Properties:<ul>
Modifier and Type | Field and Description |
---|---|
protected String |
basicDirectIndexPostingIteratorClass |
protected String |
basicInvertedIndexPostingIteratorClass |
protected CompressionFactory.CompressionConfiguration |
compressionDirectConfig |
protected CompressionFactory.CompressionConfiguration |
compressionInvertedConfig |
protected IndexOnDisk |
destIndex
destination index
|
protected Class<? extends DirectInvertedOutputStream> |
directFileOutputStreamClass
class to use to write direct file
|
protected int |
fieldCount |
protected Class<? extends DirectInvertedOutputStream> |
fieldDirectFileOutputStreamClass |
protected String |
fieldDirectIndexPostingIteratorClass |
protected String |
fieldInvertedIndexPostingIteratorClass |
protected boolean |
keepTermCodeMap |
protected static org.apache.log4j.Logger |
logger
the logger used
|
protected boolean |
MetaReverse |
protected int |
numberOfDocuments
The number of documents in the merged structures.
|
protected long |
numberOfPointers
The number of pointers in the merged structures.
|
protected int |
numberOfTerms
The number of terms in the collection.
|
protected IndexOnDisk |
srcIndex1
source index 1
|
protected IndexOnDisk |
srcIndex2
source index 2
|
protected gnu.trove.TIntIntHashMap |
termcodeHashmap
A hashmap for converting the codes of terms appearing only in the
vocabulary of the second set of data structures into a new set of
term codes for the merged set of data structures.
|
Constructor and Description |
---|
StructureMerger(IndexOnDisk _srcIndex1,
IndexOnDisk _srcIndex2,
IndexOnDisk _destIndex)
constructor
|
Modifier and Type | Method and Description |
---|---|
protected void |
createLexidFile()
creates the final term code to offset file, and the lexicon hash if enabled.
|
protected static Class<?>[] |
getInterfaces(Object o) |
static void |
main(String[] args)
Usage: java org.terrier.structures.merging.StructureMerger [binary bits] [inverted file 1] [inverted file 2] [output inverted file]
|
protected void |
mergeDirectFiles()
Merges the two direct files and the corresponding document id files.
|
protected void |
mergeDocumentIndexFiles()
Merges the two document index files, and the meta files.
|
protected void |
mergeInvertedFiles()
Merges the two lexicons into one.
|
void |
mergeStructures()
Merges the structures created by terrier.
|
void |
setOutputIndex(IndexOnDisk _outputIndex)
Sets the output index.
|
protected static final org.apache.log4j.Logger logger
protected gnu.trove.TIntIntHashMap termcodeHashmap
protected boolean keepTermCodeMap
protected int numberOfDocuments
protected long numberOfPointers
protected int numberOfTerms
protected CompressionFactory.CompressionConfiguration compressionDirectConfig
protected CompressionFactory.CompressionConfiguration compressionInvertedConfig
protected boolean MetaReverse
protected IndexOnDisk srcIndex1
protected IndexOnDisk srcIndex2
protected IndexOnDisk destIndex
protected Class<? extends DirectInvertedOutputStream> directFileOutputStreamClass
protected Class<? extends DirectInvertedOutputStream> fieldDirectFileOutputStreamClass
protected final int fieldCount
protected String basicInvertedIndexPostingIteratorClass
protected String fieldInvertedIndexPostingIteratorClass
protected String basicDirectIndexPostingIteratorClass
protected String fieldDirectIndexPostingIteratorClass
public StructureMerger(IndexOnDisk _srcIndex1, IndexOnDisk _srcIndex2, IndexOnDisk _destIndex)
_srcIndex1
- _srcIndex2
- _destIndex
- public void setOutputIndex(IndexOnDisk _outputIndex)
_outputIndex
- the index to be merged toprotected void mergeInvertedFiles()
protected void mergeDirectFiles()
protected void mergeDocumentIndexFiles()
protected void createLexidFile()
public void mergeStructures()
Terrier 4.0. Copyright © 2004-2014 University of Glasgow