org.terrier.structures.indexing
Class CompressingMetaIndexBuilder

java.lang.Object
  extended by org.terrier.structures.indexing.MetaIndexBuilder
      extended by org.terrier.structures.indexing.CompressingMetaIndexBuilder
All Implemented Interfaces:
java.io.Closeable, java.io.Flushable

public class CompressingMetaIndexBuilder
extends MetaIndexBuilder
implements java.io.Flushable

Creates a metaindex structure that compresses all values using Deflator.

Since:
3.0
Author:
Craig Macdonald & Vassilis Plachouras

Nested Class Summary
static class CompressingMetaIndexBuilder.KeyedPartitioner
          class KeyedPartitioner
 
Field Summary
protected  java.io.ByteArrayOutputStream baos
           
protected  byte[] compressedBuffer
           
protected  long currentIndexOffset
           
protected  long currentOffset
           
protected  java.io.DataOutputStream dataOutput
           
protected static int DOCS_PER_CHECK
           
protected  int entryCount
           
protected  int entryLengthBytes
           
protected  java.lang.String[] forwardKeyNames
           
protected  int[] forwardKeys
           
protected  boolean[] forwardKeyValuesSorted
           
protected  FSOrderedMapFile.MapFileWriter[] forwardWriters
           
protected  Index index
           
protected  java.io.DataOutputStream indexOutput
           
protected  gnu.trove.TObjectIntHashMap<java.lang.String> key2Index
           
protected  int keyCount
           
protected  FixedSizeWriteableFactory<org.apache.hadoop.io.Text>[] keyFactories
           
protected  java.lang.String[] keyNames
           
protected  java.lang.String[] lastValues
           
protected static org.apache.log4j.Logger logger
           
protected static int MAX_INDEX_MB_IN_MEM_RETRIEVAL
           
protected static int MAX_MB_IN_MEM_RETRIEVAL
           
protected  MemoryChecker memCheck
           
protected static int REVERSE_KEY_LOOKUP_WRITING_BUFFER_SIZE
           
protected  byte[] spaces
           
protected  java.lang.String structureName
           
protected  int[] valueLensBytes
           
protected  int[] valueLensChars
           
protected  java.util.zip.Deflater zip
           
protected static int ZIP_COMPRESSION_LEVEL
           
 
Constructor Summary
CompressingMetaIndexBuilder(Index _index, java.lang.String[] _keyNames, int[] _valueLens, java.lang.String[] _forwardKeys)
          constructor
CompressingMetaIndexBuilder(Index _index, java.lang.String _structureName, java.lang.String[] _keyNames, int[] _valueLens, java.lang.String[] _forwardKeys)
          constructor
 
Method Summary
 void close()
          
 void flush()
          
static void reverseAsMapReduceJob(Index index, java.lang.String structureName, java.lang.String[] keys)
          reverseAsMapReduceJob
static void reverseAsMapReduceJob(Index index, java.lang.String structureName, java.lang.String[] keys, HadoopPlugin.JobFactory jf)
          reverseAsMapReduceJob
 void writeDocumentEntry(java.util.Map<java.lang.String,java.lang.String> data)
          Write out metadata for current document, extracted from specified map Typically, the MetaIndexBuilder will know which keys from data that it is interested in.
 void writeDocumentEntry(java.lang.String[] data)
          Write out metadata for current document.
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

logger

protected static final org.apache.log4j.Logger logger

MAX_MB_IN_MEM_RETRIEVAL

protected static final int MAX_MB_IN_MEM_RETRIEVAL

MAX_INDEX_MB_IN_MEM_RETRIEVAL

protected static final int MAX_INDEX_MB_IN_MEM_RETRIEVAL

REVERSE_KEY_LOOKUP_WRITING_BUFFER_SIZE

protected static final int REVERSE_KEY_LOOKUP_WRITING_BUFFER_SIZE
See Also:
Constant Field Values

DOCS_PER_CHECK

protected static final int DOCS_PER_CHECK

ZIP_COMPRESSION_LEVEL

protected static final int ZIP_COMPRESSION_LEVEL
See Also:
Constant Field Values

key2Index

protected final gnu.trove.TObjectIntHashMap<java.lang.String> key2Index

dataOutput

protected java.io.DataOutputStream dataOutput

keyNames

protected final java.lang.String[] keyNames

keyCount

protected final int keyCount

zip

protected java.util.zip.Deflater zip

baos

protected java.io.ByteArrayOutputStream baos

indexOutput

protected java.io.DataOutputStream indexOutput

compressedBuffer

protected byte[] compressedBuffer

index

protected Index index

valueLensChars

protected int[] valueLensChars

valueLensBytes

protected int[] valueLensBytes

spaces

protected byte[] spaces

entryLengthBytes

protected int entryLengthBytes

currentOffset

protected long currentOffset

currentIndexOffset

protected long currentIndexOffset

entryCount

protected int entryCount

forwardKeys

protected int[] forwardKeys

forwardKeyNames

protected java.lang.String[] forwardKeyNames

forwardWriters

protected FSOrderedMapFile.MapFileWriter[] forwardWriters

forwardKeyValuesSorted

protected boolean[] forwardKeyValuesSorted

lastValues

protected java.lang.String[] lastValues

memCheck

protected MemoryChecker memCheck

keyFactories

protected FixedSizeWriteableFactory<org.apache.hadoop.io.Text>[] keyFactories

structureName

protected java.lang.String structureName
Constructor Detail

CompressingMetaIndexBuilder

public CompressingMetaIndexBuilder(Index _index,
                                   java.lang.String[] _keyNames,
                                   int[] _valueLens,
                                   java.lang.String[] _forwardKeys)
constructor

Parameters:
_index -
_keyNames -
_valueLens -
_forwardKeys -

CompressingMetaIndexBuilder

public CompressingMetaIndexBuilder(Index _index,
                                   java.lang.String _structureName,
                                   java.lang.String[] _keyNames,
                                   int[] _valueLens,
                                   java.lang.String[] _forwardKeys)
constructor

Parameters:
_index -
_structureName -
_keyNames -
_valueLens -
_forwardKeys -
Method Detail

writeDocumentEntry

public void writeDocumentEntry(java.util.Map<java.lang.String,java.lang.String> data)
                        throws java.io.IOException
Write out metadata for current document, extracted from specified map Typically, the MetaIndexBuilder will know which keys from data that it is interested in.

Specified by:
writeDocumentEntry in class MetaIndexBuilder
Throws:
java.io.IOException

writeDocumentEntry

public void writeDocumentEntry(java.lang.String[] data)
                        throws java.io.IOException
Write out metadata for current document. Values for all keys are specified.

Specified by:
writeDocumentEntry in class MetaIndexBuilder
Throws:
java.io.IOException

flush

public void flush()
           throws java.io.IOException

Specified by:
flush in interface java.io.Flushable
Throws:
java.io.IOException

close

public void close()
           throws java.io.IOException

Specified by:
close in interface java.io.Closeable
Throws:
java.io.IOException

reverseAsMapReduceJob

public static void reverseAsMapReduceJob(Index index,
                                         java.lang.String structureName,
                                         java.lang.String[] keys)
                                  throws java.lang.Exception
reverseAsMapReduceJob

Parameters:
index -
structureName -
keys -
Throws:
java.lang.Exception

reverseAsMapReduceJob

public static void reverseAsMapReduceJob(Index index,
                                         java.lang.String structureName,
                                         java.lang.String[] keys,
                                         HadoopPlugin.JobFactory jf)
                                  throws java.lang.Exception
reverseAsMapReduceJob

Parameters:
index -
structureName -
keys -
jf -
Throws:
java.lang.Exception


Terrier 3.5. Copyright © 2004-2011 University of Glasgow