|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectorg.terrier.structures.indexing.MetaIndexBuilder
org.terrier.structures.indexing.CompressingMetaIndexBuilder
public class CompressingMetaIndexBuilder
Creates a metaindex structure that compresses all values using Deflator. Properties:
Nested Class Summary | |
---|---|
static class |
CompressingMetaIndexBuilder.KeyedPartitioner
class KeyedPartitioner |
Field Summary | |
---|---|
protected ByteArrayOutputStream |
baos
|
protected byte[] |
compressedBuffer
|
protected static boolean |
CROP_LONG
|
protected long |
currentIndexOffset
|
protected long |
currentOffset
|
protected DataOutputStream |
dataOutput
|
protected static int |
DOCS_PER_CHECK
|
protected int |
entryCount
|
protected int |
entryLengthBytes
|
protected String[] |
forwardKeyNames
|
protected int[] |
forwardKeys
|
protected boolean[] |
forwardKeyValuesSorted
|
protected FSOrderedMapFile.MapFileWriter[] |
forwardWriters
|
protected Index |
index
|
protected DataOutputStream |
indexOutput
|
protected gnu.trove.TObjectIntHashMap<String> |
key2Index
|
protected int |
keyCount
|
protected FixedSizeWriteableFactory<org.apache.hadoop.io.Text>[] |
keyFactories
|
protected String[] |
keyNames
|
protected String[] |
lastValues
|
protected static org.apache.log4j.Logger |
logger
|
protected static int |
MAX_INDEX_MB_IN_MEM_RETRIEVAL
|
protected static int |
MAX_MB_IN_MEM_RETRIEVAL
|
protected MemoryChecker |
memCheck
|
protected static boolean |
REVERSE_ALLOW_DUPS
|
protected static int |
REVERSE_KEY_LOOKUP_WRITING_BUFFER_SIZE
|
protected byte[] |
spaces
|
protected String |
structureName
|
protected int[] |
valueLensBytes
|
protected int[] |
valueLensChars
|
protected Deflater |
zip
|
protected static int |
ZIP_COMPRESSION_LEVEL
|
Constructor Summary | |
---|---|
CompressingMetaIndexBuilder(Index _index,
String[] _keyNames,
int[] _valueLens,
String[] _forwardKeys)
constructor |
|
CompressingMetaIndexBuilder(Index _index,
String _structureName,
String[] _keyNames,
int[] _valueLens,
String[] _forwardKeys)
constructor |
Method Summary | |
---|---|
void |
close()
|
void |
flush()
|
static void |
main(String[] args)
|
static void |
reverseAsMapReduceJob(Index index,
String structureName,
String[] keys)
reverseAsMapReduceJob |
static void |
reverseAsMapReduceJob(Index index,
String structureName,
String[] keys,
HadoopPlugin.JobFactory jf)
reverseAsMapReduceJob |
void |
writeDocumentEntry(Map<String,String> data)
Write out metadata for current document, extracted from specified map Typically, the MetaIndexBuilder will know which keys from data that it is interested in. |
void |
writeDocumentEntry(String[] data)
Write out metadata for current document. |
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
protected static final org.apache.log4j.Logger logger
protected static final int MAX_MB_IN_MEM_RETRIEVAL
protected static final int MAX_INDEX_MB_IN_MEM_RETRIEVAL
protected static final boolean REVERSE_ALLOW_DUPS
protected static final boolean CROP_LONG
protected static final int REVERSE_KEY_LOOKUP_WRITING_BUFFER_SIZE
protected static final int DOCS_PER_CHECK
protected static final int ZIP_COMPRESSION_LEVEL
protected final gnu.trove.TObjectIntHashMap<String> key2Index
protected DataOutputStream dataOutput
protected final String[] keyNames
protected final int keyCount
protected Deflater zip
protected ByteArrayOutputStream baos
protected DataOutputStream indexOutput
protected byte[] compressedBuffer
protected Index index
protected int[] valueLensChars
protected int[] valueLensBytes
protected byte[] spaces
protected int entryLengthBytes
protected long currentOffset
protected long currentIndexOffset
protected int entryCount
protected int[] forwardKeys
protected String[] forwardKeyNames
protected FSOrderedMapFile.MapFileWriter[] forwardWriters
protected boolean[] forwardKeyValuesSorted
protected String[] lastValues
protected MemoryChecker memCheck
protected FixedSizeWriteableFactory<org.apache.hadoop.io.Text>[] keyFactories
protected String structureName
Constructor Detail |
---|
public CompressingMetaIndexBuilder(Index _index, String[] _keyNames, int[] _valueLens, String[] _forwardKeys)
_index
- _keyNames
- _valueLens
- _forwardKeys
- public CompressingMetaIndexBuilder(Index _index, String _structureName, String[] _keyNames, int[] _valueLens, String[] _forwardKeys)
_index
- _structureName
- _keyNames
- _valueLens
- _forwardKeys
- Method Detail |
---|
public void writeDocumentEntry(Map<String,String> data) throws IOException
writeDocumentEntry
in class MetaIndexBuilder
IOException
public void writeDocumentEntry(String[] data) throws IOException
writeDocumentEntry
in class MetaIndexBuilder
IOException
public void flush() throws IOException
flush
in interface Flushable
IOException
public void close() throws IOException
close
in interface Closeable
IOException
public static void main(String[] args) throws Exception
Exception
public static void reverseAsMapReduceJob(Index index, String structureName, String[] keys) throws Exception
index
- structureName
- keys
-
Exception
public static void reverseAsMapReduceJob(Index index, String structureName, String[] keys, HadoopPlugin.JobFactory jf) throws Exception
index
- structureName
- keys
- jf
-
Exception
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |