|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||
java.lang.Objectorg.terrier.structures.indexing.MetaIndexBuilder
org.terrier.structures.indexing.CompressingMetaIndexBuilder
public class CompressingMetaIndexBuilder
Creates a metaindex structure that compresses all values using Deflator. Properties:
| Nested Class Summary | |
|---|---|
static class |
CompressingMetaIndexBuilder.KeyedPartitioner
class KeyedPartitioner |
| Field Summary | |
|---|---|
protected ByteArrayOutputStream |
baos
|
protected byte[] |
compressedBuffer
|
protected static boolean |
CROP_LONG
|
protected long |
currentIndexOffset
|
protected long |
currentOffset
|
protected DataOutputStream |
dataOutput
|
protected static int |
DOCS_PER_CHECK
|
protected int |
entryCount
|
protected int |
entryLengthBytes
|
protected String[] |
forwardKeyNames
|
protected int[] |
forwardKeys
|
protected boolean[] |
forwardKeyValuesSorted
|
protected FSOrderedMapFile.MapFileWriter[] |
forwardWriters
|
protected Index |
index
|
protected DataOutputStream |
indexOutput
|
protected gnu.trove.TObjectIntHashMap<String> |
key2Index
|
protected int |
keyCount
|
protected FixedSizeWriteableFactory<org.apache.hadoop.io.Text>[] |
keyFactories
|
protected String[] |
keyNames
|
protected String[] |
lastValues
|
protected static org.apache.log4j.Logger |
logger
|
protected static int |
MAX_INDEX_MB_IN_MEM_RETRIEVAL
|
protected static int |
MAX_MB_IN_MEM_RETRIEVAL
|
protected MemoryChecker |
memCheck
|
protected static boolean |
REVERSE_ALLOW_DUPS
|
protected static int |
REVERSE_KEY_LOOKUP_WRITING_BUFFER_SIZE
|
protected byte[] |
spaces
|
protected String |
structureName
|
protected int[] |
valueLensBytes
|
protected int[] |
valueLensChars
|
protected Deflater |
zip
|
protected static int |
ZIP_COMPRESSION_LEVEL
|
| Constructor Summary | |
|---|---|
CompressingMetaIndexBuilder(Index _index,
String[] _keyNames,
int[] _valueLens,
String[] _forwardKeys)
constructor |
|
CompressingMetaIndexBuilder(Index _index,
String _structureName,
String[] _keyNames,
int[] _valueLens,
String[] _forwardKeys)
constructor |
|
| Method Summary | |
|---|---|
void |
close()
|
void |
flush()
|
static void |
main(String[] args)
|
static void |
reverseAsMapReduceJob(Index index,
String structureName,
String[] keys)
reverseAsMapReduceJob |
static void |
reverseAsMapReduceJob(Index index,
String structureName,
String[] keys,
HadoopPlugin.JobFactory jf)
reverseAsMapReduceJob |
void |
writeDocumentEntry(Map<String,String> data)
Write out metadata for current document, extracted from specified map Typically, the MetaIndexBuilder will know which keys from data that it is interested in. |
void |
writeDocumentEntry(String[] data)
Write out metadata for current document. |
| Methods inherited from class java.lang.Object |
|---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Field Detail |
|---|
protected static final org.apache.log4j.Logger logger
protected static final int MAX_MB_IN_MEM_RETRIEVAL
protected static final int MAX_INDEX_MB_IN_MEM_RETRIEVAL
protected static final boolean REVERSE_ALLOW_DUPS
protected static final boolean CROP_LONG
protected static final int REVERSE_KEY_LOOKUP_WRITING_BUFFER_SIZE
protected static final int DOCS_PER_CHECK
protected static final int ZIP_COMPRESSION_LEVEL
protected final gnu.trove.TObjectIntHashMap<String> key2Index
protected DataOutputStream dataOutput
protected final String[] keyNames
protected final int keyCount
protected Deflater zip
protected ByteArrayOutputStream baos
protected DataOutputStream indexOutput
protected byte[] compressedBuffer
protected Index index
protected int[] valueLensChars
protected int[] valueLensBytes
protected byte[] spaces
protected int entryLengthBytes
protected long currentOffset
protected long currentIndexOffset
protected int entryCount
protected int[] forwardKeys
protected String[] forwardKeyNames
protected FSOrderedMapFile.MapFileWriter[] forwardWriters
protected boolean[] forwardKeyValuesSorted
protected String[] lastValues
protected MemoryChecker memCheck
protected FixedSizeWriteableFactory<org.apache.hadoop.io.Text>[] keyFactories
protected String structureName
| Constructor Detail |
|---|
public CompressingMetaIndexBuilder(Index _index,
String[] _keyNames,
int[] _valueLens,
String[] _forwardKeys)
_index - _keyNames - _valueLens - _forwardKeys -
public CompressingMetaIndexBuilder(Index _index,
String _structureName,
String[] _keyNames,
int[] _valueLens,
String[] _forwardKeys)
_index - _structureName - _keyNames - _valueLens - _forwardKeys - | Method Detail |
|---|
public void writeDocumentEntry(Map<String,String> data)
throws IOException
writeDocumentEntry in class MetaIndexBuilderIOException
public void writeDocumentEntry(String[] data)
throws IOException
writeDocumentEntry in class MetaIndexBuilderIOException
public void flush()
throws IOException
flush in interface FlushableIOException
public void close()
throws IOException
close in interface CloseableIOException
public static void main(String[] args)
throws Exception
Exception
public static void reverseAsMapReduceJob(Index index,
String structureName,
String[] keys)
throws Exception
index - structureName - keys -
Exception
public static void reverseAsMapReduceJob(Index index,
String structureName,
String[] keys,
HadoopPlugin.JobFactory jf)
throws Exception
index - structureName - keys - jf -
Exception
|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||