diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/dsms/MRFDependenceScoreModifier.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/dsms/MRFDependenceScoreModifier.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/dsms/MRFDependenceScoreModifier.java 2011-06-16 18:26:37.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/dsms/MRFDependenceScoreModifier.java 2012-12-17 16:21:24.000000000 +0100 @@ -27,10 +27,11 @@ import org.terrier.matching.MatchingQueryTerms; import org.terrier.matching.ResultSet; -import org.terrier.matching.models.Idf; +import static org.terrier.matching.models.WeightingModelLibrary.log; import org.terrier.structures.CollectionStatistics; import org.terrier.structures.Index; import org.terrier.utility.ApplicationSetup; + /** Implements Markov Random Fields. See Metzler & Croft, SIGIR 2005. * Note that this implementation does not utilise the frequency of a * tuple in the collection - instead, this is assumed to be a constant, @@ -52,22 +53,30 @@ double defaultDf; double defaultCf; - @Override - public boolean modifyScores(Index index, MatchingQueryTerms terms, + /** + * {@inheritDoc} + */ + @Override public boolean modifyScores(Index index, MatchingQueryTerms terms, ResultSet set) { MU = Double.parseDouble(ApplicationSetup.getProperty("mrf.mu", ApplicationSetup.getProperty("proximity.norm2.c", "4000d"))); return super.modifyScores(index, terms, set); } - @Override - protected double scoreFDSD(int matchingNGrams, int _docLength) { + /** + * {@inheritDoc} + */ + @Override protected double scoreFDSD(int matchingNGrams, int _docLength) { final double mu = MU; double docLength = (double)_docLength; double tf = (double)matchingNGrams; - return w_o * (Idf.log(1 + (tf/(mu * (defaultCf / super.numTokens))) ) + Idf.log(mu/(docLength+mu))); + return w_o * (log(1 + (tf/(mu * (defaultCf / super.numTokens)))) + log(mu/(docLength+mu))); } - /** {@inheritDoc}*/ + + /** + * {@inheritDoc} + */ + @Override public void setCollectionStatistics(CollectionStatistics cs, Index _index) { super.setCollectionStatistics(cs, _index); w_o = Double.parseDouble(ApplicationSetup.getProperty("proximity."+super.ngramLength+".w_o", diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/BB2.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/BB2.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/BB2.java 2011-06-16 18:26:37.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/BB2.java 2012-12-18 16:00:08.000000000 +0100 @@ -26,11 +26,16 @@ * Vassilis Plachouras */ package org.terrier.matching.models; + +import static org.terrier.matching.models.WeightingModelLibrary.log; +import static org.terrier.matching.models.WeightingModelLibrary.LOG_2_OF_E;; + /** * This class implements the BB2 weighting model. * @author Gianni Amati, Ben He, Vassilis Plachouras */ public class BB2 extends WeightingModel { + private static final long serialVersionUID = 1L; /** @@ -52,65 +57,28 @@ this.c = c; } - /** - * Returns the name of the model. - * @return the name of the model + /** + * {@inheritDoc} */ - public final String getInfo() { + @Override + public String getInfo() { return "BB2c" + c; } - /** - * This method provides the contract for implementing weighting models. - * @param tf The term frequency in the document - * @param docLength the document's length - * @return the score assigned to a document with the given tf and - * docLength, and other preset parameters - */ - public final double score(double tf, double docLength) { - double TF = - tf * Idf.log(1.0d + (c * averageDocumentLength) / docLength); - double NORM = (termFrequency + 1d) / (documentFrequency * (TF + 1d)); - //double f = termFrequency / numberOfDocuments; - return NORM - * keyFrequency - * ( - - Idf.log(numberOfDocuments - 1) - - Idf.REC_LOG_2_OF_E - + stirlingPower( - numberOfDocuments - + termFrequency - - 1d, - numberOfDocuments - + termFrequency - - TF - - 2d) - - stirlingPower(termFrequency, termFrequency - TF)); - } - - /** - *This method provides the contract for implementing weighting models. - * @param tf The term frequency in the document - * @param docLength the document's length - * @param documentFrequency The document frequency of the term - * @param termFrequency the term frequency in the collection - * @param keyFrequency the term frequency in the query - * @return the score returned by the implemented weighting model. - */ - public final double score( - double tf, - double docLength, - double documentFrequency, - double termFrequency, - double keyFrequency) { - double TF = - tf * Idf.log(1.0d + (c * averageDocumentLength) / docLength); - double NORM = (termFrequency + 1d) / (documentFrequency * (TF + 1d)); - //double f = termFrequency / numberOfDocuments; + + /** + * {@inheritDoc} + */ + @Override + public double score(double tf, double docLength) { + final double TF = + tf * log(1.0d + (c * averageDocumentLength) / docLength); + final double NORM = (termFrequency + 1d) / (documentFrequency * (TF + 1d)); + // final double f = termFrequency / numberOfDocuments; return NORM * keyFrequency * ( - - Idf.log(numberOfDocuments - 1) - - Idf.REC_LOG_2_OF_E + - log(numberOfDocuments - 1) + - LOG_2_OF_E + stirlingPower( numberOfDocuments + termFrequency diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/BM25.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/BM25.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/BM25.java 2011-06-16 18:26:37.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/BM25.java 2012-12-21 10:06:46.000000000 +0100 @@ -26,6 +26,11 @@ * Vassilis Plachouras */ package org.terrier.matching.models; + +import static org.terrier.matching.models.WeightingModelLibrary.log; +import static org.terrier.matching.models.WeightingModelLibrary.tf_robertson; +import static org.terrier.matching.models.WeightingModelLibrary.tf_concave_k;; + /** * This class implements the Okapi BM25 weighting model. The * default parameters used are:
@@ -35,76 +40,52 @@ * @author Gianni Amati, Ben He, Vassilis Plachouras */ public class BM25 extends WeightingModel { + private static final long serialVersionUID = 1L; /** The constant k_1.*/ - private double k_1 = 1.2d; + protected final double k_1 = 1.2d; /** The constant k_3.*/ - private double k_3 = 8d; + protected final double k_3 = 1000d; /** The parameter b.*/ - private double b; - - /** A default constructor.*/ - public BM25() { - super(); - b=0.75d; - } - /** - * Returns the name of the model. - * @return the name of the model + protected double b = 0.75d; + + /** + * {@inheritDoc} */ - public final String getInfo() { + @Override + public String getInfo() { return "BM25b"+b; } - /** - * Uses BM25 to compute a weight for a term in a document. - * @param tf The term frequency in the document - * @param docLength the document's length - * @return the score assigned to a document with the given - * tf and docLength, and other preset parameters + + /** + * {@inheritDoc} */ + @Override public double score(double tf, double docLength) { - double K = k_1 * ((1 - b) + b * docLength / averageDocumentLength) + tf; - return (tf * (k_3 + 1d) * keyFrequency / ((k_3 + keyFrequency) * K)) - * Idf.log((numberOfDocuments - documentFrequency + 0.5d) / (documentFrequency + 0.5d)); - } - /** - * Uses BM25 to compute a weight for a term in a document. - * @param tf The term frequency in the document - * @param docLength the document's length - * @param n_t The document frequency of the term - * @param F_t the term frequency in the collection - * @param keyFrequency the term frequency in the query - * @return the score assigned by the weighting model BM25. - */ - public double score( - double tf, - double docLength, - double n_t, - double F_t, - double keyFrequency) { - double K = k_1 * ((1 - b) + b * docLength / averageDocumentLength) + tf; - return Idf.log((numberOfDocuments - n_t + 0.5d) / (n_t+ 0.5d)) * - ((k_1 + 1d) * tf / (K + tf)) * - ((k_3+1)*keyFrequency/(k_3+keyFrequency)); + final double keyFrequency = tf_concave_k(this.keyFrequency, k_3); + tf = tf_robertson(tf, b, docLength, averageDocumentLength, k_1); + final double idf = + log((numberOfDocuments - documentFrequency + 0.5d) / (documentFrequency + 0.5d)); + return keyFrequency * tf * idf; } /** * Sets the b parameter to BM25 ranking formula - * @param _b the b parameter value to use. + * @param b the b parameter value to use. */ - public void setParameter(double _b) { - this.b = _b; + @Override + public void setParameter(double b) { + this.b = b; } - /** * Returns the b parameter to the BM25 ranking formula as set by setParameter() */ + @Override public double getParameter() { return this.b; } - } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/BM25F.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/BM25F.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/BM25F.java 2011-06-16 18:26:37.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/BM25F.java 2012-12-06 12:24:19.000000000 +0100 @@ -41,15 +41,15 @@ * @author Craig Macdonald * @since 3.0 */ -public class BM25F extends PerFieldNormWeightingModel -{ +public class BM25F extends PerFieldNormWeightingModel { private static final long serialVersionUID = 1L; + /** * Constructs an instance of the BM25F * @throws Exception */ public BM25F() throws Exception { - super(new String[]{ + super(new String[] { BM.class.getName(), NormalisationB.class.getName() }); @@ -59,6 +59,4 @@ public String getInfo() { return "BM25F"; } - - } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/DFI0.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/DFI0.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/DFI0.java 2011-06-16 18:26:37.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/DFI0.java 2012-12-18 16:01:14.000000000 +0100 @@ -27,6 +27,8 @@ */ package org.terrier.matching.models; +import static org.terrier.matching.models.WeightingModelLibrary.log; + /** Implementation of the basic Divergence from Independence model. * For details on this model, please see * IRRA at TREC 2009: Index Term Weighting Based on Divergence From Independence Model @@ -38,24 +40,20 @@ private static final long serialVersionUID = 1L; + /** + * {@inheritDoc} + */ @Override public String getInfo() { return "DFI0"; } + /** + * {@inheritDoc} + */ @Override - public double score(double tf, double docLength) - { - final double eij = super.termFrequency * (docLength / super.numberOfTokens); - return keyFrequency * Idf.log(1+ (tf - eij)/Math.sqrt(eij) ); + public double score(double tf, double docLength) { + final double eij = termFrequency * (docLength / super.numberOfTokens); + return keyFrequency * log(1 + (tf - eij)/Math.sqrt(eij)); } - - @Override - public double score(double tf, double docLength, double n_t, double F_t, - double keyFrequency) - { - final double eij = F_t * (docLength / super.numberOfTokens); - return keyFrequency * Idf.log(1+ (tf - eij)/Math.sqrt(eij) ); - } - } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/DFRWeightingModel.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/DFRWeightingModel.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/DFRWeightingModel.java 2011-06-16 18:26:37.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/DFRWeightingModel.java 2012-12-18 16:01:44.000000000 +0100 @@ -33,7 +33,6 @@ import org.terrier.structures.CollectionStatistics; import org.terrier.structures.EntryStatistics; - /** * This class implements a modular Divergence from Randomness weighting model. * Components in the model are specified individually, with many implementations @@ -50,8 +49,10 @@ * * @author Ben He */ -public class DFRWeightingModel extends WeightingModel{ +public class DFRWeightingModel extends WeightingModel { + private static final long serialVersionUID = 1L; + protected static final Logger logger = Logger.getLogger(DFRWeightingModel.class); /** The applied basic model for randomness. */ protected BasicModel basicModel; @@ -80,7 +81,7 @@ * @param components Corresponds to the names of the 3 DFR weighting models component * names, as passed to initialise(). */ - public DFRWeightingModel (String[] components){ + public DFRWeightingModel (String[] components) { this.initialise( components[0].trim(), components.length > 1 ? components[1].trim() : "", @@ -186,85 +187,63 @@ // set parameter this.setParameter(_parameter); } - + /** * Set the frequency normalisation parameter. * @param value The given parameter value. */ + @Override public void setParameter(double value){ this.parameter = value; this.normalisation.setParameter(parameter); - if (this.ENABLE_AFTEREFFECT) + if (this.ENABLE_AFTEREFFECT) { this.afterEffect.setParameter(parameter); + } } /** Return the parameter set by setParameter() * @return parameter double value */ + @Override public double getParameter(){ return this.parameter; } - - - /** - * Returns the name of the model. - * @return The name of the model. + + /** + * {@inheritDoc} */ - public final String getInfo() { + @Override + public String getInfo() { String modelName = this.basicModel.getInfo(); - if (ENABLE_AFTEREFFECT) + if (ENABLE_AFTEREFFECT) { modelName += this.afterEffect.getInfo(); - if (this.ENABLE_NORMALISATION) + } + if (this.ENABLE_NORMALISATION) { modelName += this.normalisation.getInfo(); + } return modelName; } - /** - * Compute a weight for a term in a document. - * @param tf The term frequency in the document - * @param docLength the document's length - * @return the score assigned to a document with the given - * tf and docLength, and other preset parameters + /** + * {@inheritDoc} */ - public final double score(double tf, double docLength) { + public double score(double tf, double docLength) { double tfn = tf; // if the frequency normalisation is enabled, do the normalisation. - if (this.ENABLE_NORMALISATION) + if (this.ENABLE_NORMALISATION) { tfn = normalisation.normalise(tf, docLength, termFrequency); + } double gain = 1; // if the first normalisation by after effect is enabled, compute the gain. - if (this.ENABLE_AFTEREFFECT) + if (this.ENABLE_AFTEREFFECT) { gain = afterEffect.gain(tfn, documentFrequency, termFrequency); + } // produce the final score. - return gain * - basicModel.score(tfn, - documentFrequency, - termFrequency, - keyFrequency, - docLength); + return gain * basicModel.score(tfn, documentFrequency, termFrequency, keyFrequency, docLength); } - /** - * Compute a weight for a term in a document. - * @param tf The term frequency in the document - * @param docLength the document's length - * @param documentFrequency The document frequency of the term - * @param termFrequency the term frequency in the collection - * @param queryTermWeight the term frequency in the query - * @return the score assigned by the weighting model PL2. + + /** + * {@inheritDoc} */ - @SuppressWarnings("deprecation") - public final double score( - double tf, - double docLength, - double documentFrequency, - double termFrequency, - double queryTermWeight) { - this.setDocumentFrequency(documentFrequency); - this.setTermFrequency(termFrequency); - this.setKeyFrequency(queryTermWeight); - return this.score(tf, docLength); - } - - @Override public void setCollectionStatistics(CollectionStatistics _cs) { super.setCollectionStatistics(_cs); @@ -276,13 +255,14 @@ this.normalisation.setAverageDocumentLength(_cs.getAverageDocumentLength()); this.i.setNumberOfDocuments(_cs.getNumberOfDocuments()); } - - + + /** + * {@inheritDoc} + */ @Override public void setEntryStatistics(EntryStatistics _es) { super.setEntryStatistics(_es); this.normalisation.setDocumentFrequency(_es.getDocumentFrequency()); } - } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/DFR_BM25.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/DFR_BM25.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/DFR_BM25.java 2011-06-16 18:26:37.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/DFR_BM25.java 2012-12-21 10:06:46.000000000 +0100 @@ -24,78 +24,58 @@ * Gianni Amati (original author) */ package org.terrier.matching.models; + +import static org.terrier.matching.models.WeightingModelLibrary.*; + /** * This class implements the DFR_BM25 weighting model. * This DFR model, if expanded in Taylor's series, provides the BM25 formula, when the parameter c - * is set to 1. - * + * is set to 1. * @author Gianni Amati, Ben He */ public class DFR_BM25 extends WeightingModel { + private static final long serialVersionUID = 1L; + /** * A default constructor. This must be followed * by specifying the c value. */ public DFR_BM25() { super(); - this.c=1.0d; + this.c = 1.0d; } + /** * Constructs an instance of this class with the specified * value for the parameter c. * @param c the term frequency normalisation parameter value. */ public DFR_BM25(double c) { - super(); + this(); this.c = c; } - /** - * Returns the name of the model. - * @return the name of the model + + /** + * {@inheritDoc} */ - public final String getInfo() { + @Override + public String getInfo() { return "DFR_BM25c" + c; } - /** - * Computes the score according to the model DFR_BM25. - * @param tf The term frequency in the document - * @param docLength the document's length - * @return the score assigned to a document with the - * given tf and docLength, and other preset parameters - */ - public final double score( - double tf, - double docLength) { - double k_1 = 1.2d; - double k_3 = 1000d; - double TF = tf * Idf.log(1.0d + (c * averageDocumentLength) / docLength); - double NORM = 1d / (TF + k_1); - return ( (k_3 + 1d) * keyFrequency / (k_3 + keyFrequency)) * NORM - *TF * Idf.log((numberOfDocuments - documentFrequency + 0.5d) / - (documentFrequency + 0.5d)); - } - /** - * Computes the score according to the model DFR_BM25. - * @param tf The term frequency in the document - * @param docLength the document's length - * @param documentFrequency The document frequency of the term - * @param termFrequency the term frequency in the collection - * @param keyFrequency the term frequency in the query - * @return the score returned by the implemented weighting model. - */ - public final double score( - double tf, - double docLength, - double documentFrequency, - double termFrequency, - double keyFrequency) { - double k_1 = 1.2d; - double k_3 = 1000d; - double TF = tf * Idf.log(1.0d + (c * averageDocumentLength) / docLength); - double NORM = 1d / (TF + k_1); - return ( (k_3 + 1d) * keyFrequency / (k_3 + keyFrequency)) * NORM - * TF * Idf.log((numberOfDocuments - documentFrequency + 0.5d) / - (documentFrequency + 0.5d)); + + /** + * {@inheritDoc} + */ + @Override + public double score(double tf, double docLength) { + final double k_1 = 1.2d; + final double k_3 = 1000d; + final double keyFrequency = tf_concave_k(this.keyFrequency, k_3); + final double TF = tf * log(1.0d + (c * averageDocumentLength) / docLength); + final double NORM = 1d / (TF + k_1); + final double idf = + log((numberOfDocuments - documentFrequency + 0.5d) / (documentFrequency + 0.5d)); + return keyFrequency * NORM * TF * idf; } } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/DFRee.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/DFRee.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/DFRee.java 2011-06-16 18:26:37.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/DFRee.java 2012-12-18 16:01:32.000000000 +0100 @@ -24,6 +24,9 @@ * Gianni Amati (Original author) */ package org.terrier.matching.models; + +import static org.terrier.matching.models.WeightingModelLibrary.log; + /** * This class implements the DFRee weighting model. DFRee stands for DFR free from parameters. * In particular, the DFRee model computes an average number of extra bits (as information @@ -35,93 +38,44 @@ * @author Gianni Amati */ public class DFRee extends WeightingModel { + private static final long serialVersionUID = 1L; + /** model name */ private static final String name = "DFRee"; - /** - * A default constructor to make this model. + * {@inheritDoc} */ - public DFRee() { - super(); - } - /** - * Returns the name of the model, in this case "DFRee" - * @return the name of the model - */ - public final String getInfo() { + @Override + public String getInfo() { return name; } - /** - * Uses DFRee to compute a weight for a term in a document. - * @param tf The term frequency of the term in the document - * @param docLength the document's length - * @return the score assigned to a document with the given - * tf and docLength, and other preset parameters - */ - public final double score(double tf, double docLength) { - /** - * DFRee model with the log normalisation function. - */ - double prior = tf/docLength; - double posterior = (tf +1d)/(docLength +1); - double InvPriorCollection = numberOfTokens/termFrequency; - //double alpha = 1d/docLength; //0 <= alpha <= posterior - - - double norm = tf*Idf.log(posterior/prior) ; - - return keyFrequency * norm *( - tf *( - - Idf.log (prior *InvPriorCollection) - ) - + - (tf+1d) * ( - + Idf.log ( posterior*InvPriorCollection) - ) - + 0.5*Idf.log(posterior/prior) - ); - } - - - /** - * Uses DFRee to compute a weight for a term in a document. - * @param tf The term frequency of the term in the document - * @param docLength the document's length - * @param documentFrequency The document frequency of the term (ignored) - * @param termFrequency the term frequency in the collection (ignored) - * @param keyFrequency the term frequency in the query (ignored). - * @return the score assigned by the weighting model DFRee. + /** + * {@inheritDoc} */ - public final double score( - double tf, - double docLength, - double documentFrequency, - double termFrequency, - double keyFrequency) - { + @Override + public double score(double tf, double docLength) { /** * DFRee model with the log normalisation function. */ - double prior = tf/docLength; - double posterior = (tf +1d)/(docLength +1); - double InvPriorCollection = numberOfTokens/termFrequency; - //double alpha = 1d/docLength; //0 <= alpha <= posterior - - - double norm = tf*Idf.log(posterior/prior) ; - - return keyFrequency * norm *( - tf *( - - Idf.log (prior *InvPriorCollection) - ) - + - (tf+1d) * ( - + Idf.log ( posterior*InvPriorCollection) - ) - + 0.5*Idf.log(posterior/prior) - ); + final double prior = tf/docLength; + final double posterior = (tf +1d)/(docLength +1); + final double InvPriorCollection = numberOfTokens/termFrequency; + // final double alpha = 1d/docLength; //0 <= alpha <= posterior + + final double norm = tf*log(posterior/prior); + + return keyFrequency * norm *( + tf *( + - log(prior *InvPriorCollection) + ) + + + (tf+1d) * ( + + log( posterior*InvPriorCollection) + ) + + 0.5*log(posterior/prior) + ); } } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/DLH.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/DLH.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/DLH.java 2011-06-16 18:26:37.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/DLH.java 2012-12-18 16:02:00.000000000 +0100 @@ -26,6 +26,9 @@ * Vassilis Plachouras */ package org.terrier.matching.models; + +import static org.terrier.matching.models.WeightingModelLibrary.log; + /** * This class implements the DLH weighting model. This is a parameter-free * weighting model. Even if the user specifies a parameter value, it will NOT @@ -39,60 +42,29 @@ * @author Gianni Amati, Ben He, Vassilis Plachouras */ public class DLH extends WeightingModel { + private static final long serialVersionUID = 1L; - private double k = 0.5d; + + protected double k = 0.5d; + /** - * A default constructor. + * {@inheritDoc} */ - public DLH() { - super(); - } - - /** - * Returns the name of the model. - * @return the name of the model - */ - public final String getInfo() { + @Override + public String getInfo() { return "DLH"; } - /** - * Uses DLH to compute a weight for a term in a document. - * @param tf The term frequency in the document - * @param docLength the document's length - * @return the score assigned to a document with the given - * tf and docLength, and other preset parameters - */ - public final double score(double tf, double docLength) { - double f = tf/docLength ; - return - keyFrequency - * (tf*Idf.log ((tf* averageDocumentLength/docLength) * - ( numberOfDocuments/termFrequency) ) - + (docLength -tf) * Idf.log (1d -f) - + 0.5d* Idf.log(2d*Math.PI*tf*(1d-f))) - /(tf + k); - } - /** - * Uses DLH to compute a weight for a term in a document. - * @param tf The term frequency in the document - * @param docLength the document's length - * @param n_t The document frequency of the term - * @param F_t the term frequency in the collection - * @param keyFrequency the term frequency in the query - * @return the score assigned by the weighting model DLH. + + /** + * {@inheritDoc} */ - public final double score( - double tf, - double docLength, - double n_t, - double F_t, - double keyFrequency) { - double f = tf/docLength ; - return - keyFrequency - * (tf*Idf.log ((tf* averageDocumentLength/docLength) *( numberOfDocuments/F_t) ) - + (docLength -tf) * Idf.log (1d -f) - + 0.5d* Idf.log(2d*Math.PI*tf*(1d-f))) + @Override + public double score(double tf, double docLength) { + final double f = tf/docLength ; + return keyFrequency + * (tf*log((tf* averageDocumentLength/docLength) * (numberOfDocuments/termFrequency)) + + (docLength - tf) * log(1d - f) + + 0.5d* log(2d*Math.PI*tf*(1d - f))) /(tf + k); } } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/DLH13.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/DLH13.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/DLH13.java 2011-06-16 18:26:37.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/DLH13.java 2012-12-18 16:02:06.000000000 +0100 @@ -26,6 +26,9 @@ * Vassilis Plachouras */ package org.terrier.matching.models; + +import static org.terrier.matching.models.WeightingModelLibrary.log; + /** * This class implements the DLH13 weighting model. This is a parameter-free * weighting model. Even if the user specifies a parameter value, it will NOT @@ -42,58 +45,28 @@ * @author Gianni Amati, Ben He, Vassilis Plachouras */ public class DLH13 extends WeightingModel { + private static final long serialVersionUID = 1L; - private double k = 0.5d; - /** - * A default constructor. - */ - public DLH13() { - super(); - } + + protected double k = 0.5d; - /** - * Returns the name of the model. - * @return the name of the model + /** + * {@inheritDoc} */ - public final String getInfo() { + @Override + public String getInfo() { return "DLH13"; } - /** - * Uses DLH13 to compute a weight for a term in a document. - * @param tf The term frequency in the document - * @param docLength the document's length - * @return the score assigned to a document with the given - * tf and docLength, and other preset parameters - */ - public final double score(double tf, double docLength) { - double f = tf/docLength ; - return - keyFrequency - * (tf*Idf.log ((tf* averageDocumentLength/docLength) * - ( numberOfDocuments/termFrequency) ) - + 0.5d* Idf.log(2d*Math.PI*tf*(1d-f))) - /(tf + k); - } - /** - * Uses DLH13 to compute a weight for a term in a document. - * @param tf The term frequency in the document - * @param docLength the document's length - * @param n_t The document frequency of the term - * @param F_t the term frequency in the collection - * @param keyFrequency the term frequency in the query - * @return the score assigned by the weighting model DLH13. + + /** + * {@inheritDoc} */ - public final double score( - double tf, - double docLength, - double n_t, - double F_t, - double keyFrequency) { - double f = tf/docLength ; - return - keyFrequency - * (tf*Idf.log ((tf* averageDocumentLength/docLength) *( numberOfDocuments/F_t) ) - + 0.5d* Idf.log(2d*Math.PI*tf*(1d-f))) + @Override + public double score(double tf, double docLength) { + final double f = tf/docLength ; + return keyFrequency + * (tf*log((tf * averageDocumentLength/docLength) * (numberOfDocuments/termFrequency)) + + 0.5d* log(2d*Math.PI*tf*(1d - f))) /(tf + k); - } + } } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/DPH.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/DPH.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/DPH.java 2011-06-16 18:26:37.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/DPH.java 2012-12-18 16:02:13.000000000 +0100 @@ -26,6 +26,9 @@ * Vassilis Plachouras */ package org.terrier.matching.models; + +import static org.terrier.matching.models.WeightingModelLibrary.log; + /** * This class implements the DPH hypergeometric weighting model. P * stands for Popper's normalization. This is a parameter-free @@ -43,64 +46,28 @@ * @author Gianni Amati, Ben He, Vassilis Plachouras */ public class DPH extends WeightingModel { + private static final long serialVersionUID = 1L; - /** - * A default constructor. - */ - public DPH() { - super(); - } - /** - * Returns the name of the model. - * @return the name of the model + /** + * {@inheritDoc} */ - public final String getInfo() { + @Override + public String getInfo() { return "DPH"; } - /** - * Uses DPH to compute a weight for a term in a document. - * @param tf The term frequency in the document - * @param docLength the document's length - * @return the score assigned to a document with the given - * tf and docLength, and other preset parameters - */ - public final double score(double tf, double docLength) { - double f = tf/docLength; - double norm = (1d-f) * (1d -f)/(tf+1d); - - return keyFrequency *norm - * (tf*Idf.log ((tf* - averageDocumentLength/docLength) * - ( numberOfDocuments/termFrequency) ) - + 0.5d* Idf.log(2d*Math.PI*tf*(1d-f)) - ); - } - /** - * Uses DPH to compute a weight for a term in a document. - * @param tf The term frequency in the document - * @param docLength the document's length - * @param n_t The document frequency of the term - * @param F_t the term frequency in the collection - * @param keyFrequency the term frequency in the query - * @return the score assigned by the weighting model DPH. + /** + * {@inheritDoc} */ - public final double score( - double tf, - double docLength, - double n_t, - double F_t, - double keyFrequency) { - double f = tf/docLength; - double norm = (1d-f) * (1d -f)/(tf+1d); - - return keyFrequency *norm - * (tf*Idf.log ((tf* - averageDocumentLength/docLength) * - ( numberOfDocuments/F_t) ) - + 0.5d* Idf.log(2d*Math.PI*tf*(1d-f)) + @Override + public double score(double tf, double docLength) { + final double f = tf/docLength; + final double norm = (1d - f) * (1d -f)/(tf+1d); + return keyFrequency + * norm + * (tf*log((tf * averageDocumentLength/docLength) * (numberOfDocuments/termFrequency)) + + 0.5d* log(2d*Math.PI*tf*(1d - f)) ); } - } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/DirichletLM.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/DirichletLM.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/DirichletLM.java 2011-06-16 18:26:37.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/DirichletLM.java 2012-12-18 16:01:52.000000000 +0100 @@ -24,6 +24,8 @@ */ package org.terrier.matching.models; +import static org.terrier.matching.models.WeightingModelLibrary.log; + /** Bayesian smoothing with Dirichlet Prior. This has one parameter, mu > 0. "The optimal value of * mu also tends to be larger for long queries than for title queries. The optimal ... seems to vary from * collection to collection, though in most cases, it is around 2,000. The tail of the curves is generally @@ -39,12 +41,9 @@ * @since 3.0 */ public class DirichletLM extends WeightingModel { + private static final long serialVersionUID = 1L; - @Override - public String getInfo() { - return "DirichletLM_mu" + c; - } /** * Constructs an instance of DirichletLM */ @@ -53,16 +52,19 @@ c = 2500; } + /** + * {@inheritDoc} + */ @Override - public double score(double tf, double docLength) { - return Idf.log(1 + (tf/(c * (super.termFrequency / numberOfTokens))) ) + Idf.log(c/(docLength+c)); + public String getInfo() { + return "DirichletLM_mu" + c; } + /** + * {@inheritDoc} + */ @Override - public double score(double tf, double docLength, double n_t, double F_t, - double keyFrequency) - { - return Idf.log(1 + (tf/(c * (F_t / numberOfTokens))) ) + Idf.log(c/(docLength+c)); + public double score(double tf, double docLength) { + return log(1 + (tf/(c * (termFrequency / numberOfTokens)))) + log(c/(docLength + c)); } - } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/Hiemstra_LM.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/Hiemstra_LM.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/Hiemstra_LM.java 2011-06-16 18:26:37.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/Hiemstra_LM.java 2012-12-18 16:02:21.000000000 +0100 @@ -24,6 +24,8 @@ */ package org.terrier.matching.models; +import static org.terrier.matching.models.WeightingModelLibrary.log; + /** * This class implements the Hiemstra LM weighting model. * A default lambda value of 0.15 is used, according to section @@ -34,17 +36,17 @@ * @author Jie Peng */ public class Hiemstra_LM extends WeightingModel { + private static final long serialVersionUID = 1L; + /** * A default constructor. Uses the default value of lambda=0.15. */ - public Hiemstra_LM() { super(); this.c = 0.15; - } - + /** * Constructs an instance of this class with the * specified value for the parameter lambda. @@ -54,62 +56,23 @@ this(); this.c = lambda; } - /** - * Returns the name of the model. - * @return the name of the model + + /** + * {@inheritDoc} */ - - public final String getInfo(){ + @Override + public String getInfo(){ return "Hiemstra_LM" + c; } - /** - * Uses Hiemestra_LM to compute a weight for a term in a document. - * @param tf The term frequency in the document - * @param docLength the document's length - * @return the score assigned to a document with the given - * tf and docLength, and other preset parameters - */ - public final double score(double tf, double docLength) { - return - - Idf.log(1 + (c * tf * numberOfTokens) - / ((1-c) * termFrequency * docLength )) - ; - - /* Idf.log(((1 - c) * (tf / docLength)) - / (c * (termFrequency / numberOfTokens)) - + 1) - + Idf.log(c * (termFrequency / numberOfTokens)) - ; - */ - } - /** - * Uses Hiemstra_LM to compute a weight for a term in a document. - * @param tf The term frequency in the document - * @param docLength the document's length - * @param n_t The document frequency of the term - * @param F_t the term frequency in the collection - * @param keyFrequency the term frequency in the query - * @return the score assigned by the weighting model Hiemstra_LM. + /** + * {@inheritDoc} */ - public final double score( - double tf, - double docLength, - double n_t, - double F_t, - double keyFrequency) { - - return - - Idf.log(1 + (c * tf * numberOfTokens) - / ((1-c) * F_t * docLength)) ; - -/** Idf.log(((1 - c) * (tf / docLength)) - / (c * (F_t / numberOfTokens)) - + 1 ) - + Idf.log(c* (F_t / numberOfTokens)); -*/ - + @Override + public double score(double tf, double docLength) { + return log(1 + (c * tf * numberOfTokens) / ((1-c) * termFrequency * docLength)); +/* log(((1 - c) * (tf / docLength)) / (c * (termFrequency / numberOfTokens)) + 1 ) + + log(c* (F_t / numberOfTokens)); +*/ } } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/IFB2.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/IFB2.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/IFB2.java 2011-06-16 18:26:38.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/IFB2.java 2012-12-18 16:02:31.000000000 +0100 @@ -26,19 +26,24 @@ * Vassilis Plachouras */ package org.terrier.matching.models; + +import static org.terrier.matching.models.WeightingModelLibrary.log; + /** * This class implements the IFB2 weighting model. * @author Gianni Amati, Ben He, Vassilis Plachouras */ public class IFB2 extends WeightingModel { + private static final long serialVersionUID = 1L; + /** * A default constructor. This must be followed by * specifying the c value. */ public IFB2() { super(); - this.c=1.0d; + this.c = 1.0d; } /** @@ -50,46 +55,23 @@ this(); this.c = c; } - /** - * Returns the name of the model. - * @return the name of the model + + /** + * {@inheritDoc} */ - public final String getInfo() { + @Override + public String getInfo() { return "IFB2c" + c; } - /** - * Uses IFB2 to compute a weight for a term in a document. - * @param tf The term frequency in the document - * @param docLength the document's length - * @return the score assigned to a document with the given - * tf and docLength, and other preset parameters - */ - public final double score(double tf, double docLength) { - double TF = - tf * Idf.log(1.0d + (c * averageDocumentLength) / docLength); - double NORM = (termFrequency + 1d) / (documentFrequency * (TF + 1d)); - //double f = termFrequency / numberOfDocuments; - return TF * keyFrequency * i.idfDFR(termFrequency) * NORM; - } - /** - * Uses IFB2 to compute a weight for a term in a document. - * @param tf The term frequency in the document - * @param docLength the document's length - * @param n_t The document frequency of the term - * @param F_t the term frequency in the collection - * @param keyFrequency the term frequency in the query - * @return the score assigned by the weighting model IFB2. + + /** + * {@inheritDoc} */ - public final double score( - double tf, - double docLength, - double n_t, - double F_t, - double keyFrequency) { - double TF = - tf * Idf.log(1.0d + (c * averageDocumentLength) / docLength); - double NORM = (termFrequency + 1d) / (documentFrequency * (TF + 1d)); - //double f = termFrequency / numberOfDocuments; + @Override + public double score(double tf, double docLength) { + final double TF = tf * log(1.0d + (c * averageDocumentLength) / docLength); + final double NORM = (termFrequency + 1d) / (documentFrequency * (TF + 1d)); + // final double f = termFrequency / numberOfDocuments; return TF * keyFrequency * i.idfDFR(termFrequency) * NORM; } } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/Idf.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/Idf.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/Idf.java 2011-06-16 18:26:38.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/Idf.java 2012-12-18 11:11:26.000000000 +0100 @@ -26,21 +26,22 @@ * Vassilis Plachouras */ package org.terrier.matching.models; + import java.io.Serializable; -//import org.terrier.structures.CollectionStatistics; +import static org.terrier.matching.models.WeightingModelLibrary.log; +import static org.terrier.matching.models.WeightingModelLibrary.LOG_2_OF_E; + /** * This class computes the idf values for specific terms in the collection. * @author Gianni Amati, Ben He, Vassilis Plachouras */ public final class Idf implements Serializable, Cloneable{ private static final long serialVersionUID = 1L; - /** The natural logarithm of 2, used to change the base of logarithms.*/ - public static final double LOG_2_OF_E = Math.log(2.0D); - /** The reciprocal of CONSTANT, computed for efficiency.*/ - public static final double REC_LOG_2_OF_E = 1.0D / LOG_2_OF_E; + /** The number of documents in the collection.*/ private double numberOfDocuments; + /** A default constructor. NOTE: You must set the number of documents * if you intend to use the idf* functions in this class */ public Idf() {} @@ -68,7 +69,7 @@ * @return the base 2 log of numberOfDocuments/d */ public double idf(double d) { - return (Math.log(numberOfDocuments/d) * REC_LOG_2_OF_E); + return (Math.log(numberOfDocuments/d) * LOG_2_OF_E); } /** @@ -77,7 +78,7 @@ * @return the idf of the given number d. */ public double idf(int d) { - return (Math.log(numberOfDocuments/((double)d)) * REC_LOG_2_OF_E); + return (Math.log(numberOfDocuments/((double)d)) * LOG_2_OF_E); } /** @@ -86,7 +87,7 @@ * @return the base 2 log of numberOfDocuments/d */ public double idfDFR(double d) { - return (Math.log((numberOfDocuments+1)/(d+0.5)) * REC_LOG_2_OF_E); + return (Math.log((numberOfDocuments+1)/(d+0.5)) * LOG_2_OF_E); } /** @@ -95,7 +96,7 @@ * @return the idf of the given number d. */ public double idfDFR(int d) { - return (Math.log((numberOfDocuments+1)/((double)d+0.5d)) * REC_LOG_2_OF_E); + return (Math.log((numberOfDocuments+1)/((double)d+0.5d)) * LOG_2_OF_E); } /** @@ -106,7 +107,7 @@ * @return the INQUERY idf of the number d */ public double idfENQUIRY(double d) { - return (Math.log(numberOfDocuments - d / d) * REC_LOG_2_OF_E); + return (Math.log(numberOfDocuments - d / d) * LOG_2_OF_E); } /** @@ -142,25 +143,7 @@ public double idfNENQUIRY(double d) { return (log(numberOfDocuments + 1.0D, d + 0.5D) / log(numberOfDocuments+1.0D)); } - - /** - * Returns the base 2 log of the given double precision number. - * @param d The number of which the log we will compute - * @return the base 2 log of the given number - */ - public static double log(double d) { - return (Math.log(d) * REC_LOG_2_OF_E); - } - - /** - * Returns the base 2 log of d1 over d2 - * @param d1 the nominator - * @param d2 the denominator - * @return the base 2 log of d1/d2 - */ - public static double log(double d1, double d2) { - return (Math.log(d1/d2) * REC_LOG_2_OF_E); - } + /** * main * @param args diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/InB2.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/InB2.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/InB2.java 2011-06-16 18:26:38.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/InB2.java 2012-12-18 16:02:53.000000000 +0100 @@ -26,20 +26,26 @@ * Vassilis Plachouras */ package org.terrier.matching.models; + +import static org.terrier.matching.models.WeightingModelLibrary.log; + /** * This class implements the PL2 weighting model. * @author Gianni Amati, Ben He, Vassilis Plachouras */ public class InB2 extends WeightingModel { + private static final long serialVersionUID = 1L; + /** * A default constructor. This must be followed by specifying * the c value. */ public InB2() { super(); - this.c=1.0d; + this.c = 1.0d; } + /** * Constructs an instance of this class with the specified * value for the parameter c @@ -49,45 +55,23 @@ this(); this.c = c; } - /** - * Returns the name of the model. - * @return the name of the model + + /** + * {@inheritDoc} */ - public final String getInfo() { + @Override + public String getInfo() { return "InB2c" + c; } - /** - * This method provides the contract for implementing weighting models. - * @param tf The term frequency in the document - * @param docLength the document's length - * @return the score assigned to a document with the given tf and docLength, and other preset parameters - */ - public final double score(double tf, double docLength) { - double TF = - tf * Idf.log(1.0d + (c * averageDocumentLength) / docLength); - double NORM = (termFrequency + 1d) / (documentFrequency * (TF + 1d)); - //double f = this.termFrequency / numberOfDocuments; - return TF * keyFrequency * i.idfDFR(documentFrequency) * NORM; - } - /** - *This method provides the contract for implementing weighting models. - * @param tf The term frequency in the document - * @param docLength the document's length - * @param documentFrequency The document frequency of the term - * @param termFrequency the term frequency in the collection - * @param keyFrequency the term frequency in the query - * @return the score returned by the implemented weighting model. - */ - public final double score( - double tf, - double docLength, - double documentFrequency, - double termFrequency, - double keyFrequency) { - double TF = - tf * Idf.log(1.0d + (c * averageDocumentLength) / docLength); - double NORM = (termFrequency + 1d) / (documentFrequency * (TF + 1d)); - //double f = termFrequency / numberOfDocuments; + + /** + * {@inheritDoc} + */ + @Override + public double score(double tf, double docLength) { + final double TF = tf * log(1.0d + (c * averageDocumentLength) / docLength); + final double NORM = (termFrequency + 1d) / (documentFrequency * (TF + 1d)); + // final double f = termFrequency / numberOfDocuments; return TF * keyFrequency * i.idfDFR(documentFrequency) * NORM; } } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/InL2.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/InL2.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/InL2.java 2011-06-16 18:26:38.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/InL2.java 2012-12-18 16:03:00.000000000 +0100 @@ -26,20 +26,26 @@ * Vassilis Plachouras */ package org.terrier.matching.models; + +import static org.terrier.matching.models.WeightingModelLibrary.log; + /** * This class implements the InL2 weighting model. * @author Gianni Amati, Ben He, Vassilis Plachouras */ public class InL2 extends WeightingModel { + private static final long serialVersionUID = 1L; + /** * A default constructor. This must be followed * by specifying the c value. */ public InL2() { super(); - this.c=1.0d; + this.c = 1.0d; } + /** * Constructs an instance of this class with the specified * value for the parameter c. @@ -49,44 +55,22 @@ this(); this.c = c; } - /** - * Returns the name of the model. - * @return the name of the model + + /** + * {@inheritDoc} */ - public final String getInfo() { + @Override + public String getInfo() { return "InL2c" + c; } - /** - * Computes the score according to the model InL2. - * @param tf The term frequency in the document - * @param docLength the document's length - * @return the score assigned to a document with the - * given tf and docLength, and other preset parameters - */ - public final double score(double tf, double docLength) { - double TF = - tf * Idf.log(1.0d + (c * averageDocumentLength) / docLength); - double NORM = 1d / (TF + 1d); - return TF * i.idfDFR(documentFrequency) * keyFrequency * NORM; - } - /** - * Computes the score according to the model InL2. - * @param tf The term frequency in the document - * @param docLength the document's length - * @param documentFrequency The document frequency of the term - * @param termFrequency the term frequency in the collection - * @param keyFrequency the term frequency in the query - * @return the score returned by the implemented weighting model. - */ - public final double score( - double tf, - double docLength, - double documentFrequency, - double termFrequency, - double keyFrequency) { - double TF = - tf * Idf.log(1.0d + (c * averageDocumentLength) / docLength); - double NORM = 1d / (TF + 1d); + + /** + * {@inheritDoc} + */ + @Override + public double score(double tf, double docLength) { + final double TF = tf * log(1.0d + (c * averageDocumentLength) / docLength); + final double NORM = 1d / (TF + 1d); return TF * i.idfDFR(documentFrequency) * keyFrequency * NORM; } } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/In_expB2.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/In_expB2.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/In_expB2.java 2011-06-16 18:26:38.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/In_expB2.java 2012-12-18 16:02:38.000000000 +0100 @@ -26,20 +26,26 @@ * Vassilis Plachouras */ package org.terrier.matching.models; + +import static org.terrier.matching.models.WeightingModelLibrary.log; + /** * This class implements the PL2 weighting model. * @author Gianni Amati, Ben He, Vassilis Plachouras */ public class In_expB2 extends WeightingModel { + private static final long serialVersionUID = 1L; + /** * A default constructor. This must be followed * by specifying the c value. */ public In_expB2() { super(); - this.c=1.0d; + this.c = 1.0d; } + /** * Constructs an instance of this class with the specified * value for the parameter beta. @@ -49,50 +55,24 @@ this(); this.c = c; } - /** - * Returns the name of the model. - * @return the name of the model + + /** + * {@inheritDoc} */ - public final String getInfo() { + @Override + public String getInfo() { return "In_expB2c" + c; } - /** - * This method provides the contract for implementing weighting models. - * @param tf The term frequency in the document - * @param docLength the document's length - * @return the score assigned to a document with the given tf and - * docLength, and other preset parameters - */ - public final double score(double tf, double docLength) { - double TF = - tf * Idf.log(1.0d + (c * averageDocumentLength) / docLength); - double NORM = (termFrequency + 1d) / (documentFrequency * (TF + 1d)); - double f = this.termFrequency / numberOfDocuments; - double n_exp = numberOfDocuments * (1 - Math.exp(-f)); - return TF * i.idfDFR(n_exp) * keyFrequency * NORM; - } - /** - *This method provides the contract for implementing weighting models. - * @param tf The term frequency in the document - * @param docLength the document's length - * @param documentFrequency The document frequency of the term - * @param termFrequency the term frequency in the collection - * @param keyFrequency the term frequency in the query - * @return the score returned by the implemented weighting model. - */ - public final double score( - double tf, - double docLength, - double documentFrequency, - double termFrequency, - double keyFrequency) { - //double TF = tf * beta * averageDocumentLength / docLength; - double TF = - tf * Idf.log(1.0d + (c * averageDocumentLength) / docLength); - double NORM = (termFrequency + 1d) / (documentFrequency * (TF + 1d)); - double f = termFrequency / numberOfDocuments; - double n_exp = - numberOfDocuments * (1 - Math.exp(-f)); + + /** + * {@inheritDoc} + */ + @Override + public double score(double tf, double docLength) { + final double TF = tf * log(1.0d + (c * averageDocumentLength) / docLength); + final double NORM = (termFrequency + 1d) / (documentFrequency * (TF + 1d)); + final double f = termFrequency / numberOfDocuments; + final double n_exp = numberOfDocuments * (1 - Math.exp(-f)); return TF * i.idfDFR(n_exp) * keyFrequency * NORM; } } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/In_expC2.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/In_expC2.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/In_expC2.java 2011-06-16 18:26:38.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/In_expC2.java 2012-12-18 16:02:45.000000000 +0100 @@ -26,20 +26,24 @@ * Vassilis Plachouras */ package org.terrier.matching.models; + /** * This class implements the In_expC2 weighting model. * @author Gianni Amati, Ben He, Vassilis Plachouras */ public class In_expC2 extends WeightingModel { + private static final long serialVersionUID = 1L; + /** * A default constructor. This must be followed * by specifying the c value. */ public In_expC2() { super(); - this.c=1.0d; + this.c = 1.0d; } + /** * Constructs an instance of this class * with the specified value for the parameter beta. @@ -49,48 +53,24 @@ this(); this.c = c; } - /** - * Returns the name of the model. - * @return the name of the model + + /** + * {@inheritDoc} */ - public final String getInfo() { + @Override + public String getInfo() { return "In_expC2c" + c; } - /** - * Computes the score according to the weighting models In_expC2. - * @param tf The term frequency in the document - * @param docLength the document's length - * @return the score assigned to a document with the - * given tf and docLength, and other preset parameters - */ - public final double score(double tf, double docLength) { - double TF = - tf * Math.log(1d + c * averageDocumentLength / docLength); - double NORM = (termFrequency + 1d) / (documentFrequency * (TF + 1d)); - double f = termFrequency / numberOfDocuments; - double n_exp = numberOfDocuments * (1 - Math.exp(-f)); - return TF * i.idfDFR(n_exp) * keyFrequency * NORM; - } - /** - * Computes the score according to the weighting models In_expC2. - * @param tf The term frequency in the document - * @param docLength the document's length - * @param documentFrequency The document frequency of the term - * @param termFrequency the term frequency in the collection - * @param keyFrequency the term frequency in the query - * @return the score returned by the implemented weighting model. - */ - public final double score( - double tf, - double docLength, - double documentFrequency, - double termFrequency, - double keyFrequency) { - double TF = - tf * Math.log(1.0d + (c * averageDocumentLength) / docLength); - double NORM = (termFrequency + 1d) / (documentFrequency * (TF + 1d)); - double f = termFrequency / numberOfDocuments; - double n_exp = numberOfDocuments * (1 - Math.exp(-f)); + + /** + * {@inheritDoc} + */ + @Override + public double score(double tf, double docLength) { + final double TF = tf * Math.log(1.0d + (c * averageDocumentLength) / docLength); + final double NORM = (termFrequency + 1d) / (documentFrequency * (TF + 1d)); + final double f = termFrequency / numberOfDocuments; + final double n_exp = numberOfDocuments * (1 - Math.exp(-f)); return TF * i.idfDFR(n_exp) * keyFrequency * NORM; } } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/Js_KLs.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/Js_KLs.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/Js_KLs.java 2011-06-16 18:26:38.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/Js_KLs.java 2012-12-18 16:03:09.000000000 +0100 @@ -24,6 +24,9 @@ * Gianni Amati (Original author) */ package org.terrier.matching.models; + +import static org.terrier.matching.models.WeightingModelLibrary.log; + /** * This class implements the Js_KLs weighting model, which is the product * of two measures: the Jefrreys' divergence with the Kullback Leibler's divergence. @@ -54,62 +57,35 @@ * @author Gianni Amati */ public class Js_KLs extends WeightingModel { + private static final long serialVersionUID = 1L; + /** - * A default constructor to make this model. - */ - public Js_KLs() { - super(); - } - /** - * Returns the name of the model, in this case "Js_KLs" - * @return the name of the model + * {@inheritDoc} */ - - public final String getInfo() { + @Override + public String getInfo() { return "Js_KLs" ; } - /** - * Uses Js_KLs to compute a weight for a term in a document. - * @param tf The term frequency of the term in the document - * @param docLength the document's length - * @return the score assigned to a document with the given - * tf and docLength, and other preset parameters - */ - public final double score(double tf, double docLength) { - return score (tf, docLength,documentFrequency , termFrequency , keyFrequency) ; - } - - /** - * Uses Js_KLs to compute a weight for a term in a document. - * @param tf The term frequency of the term in the document - * @param docLength the document's length - * @param documentFrequency The document frequency of the term (ignored) - * @param termFrequency the term frequency in the collection (ignored) - * @param keyFrequency the term frequency in the query (ignored). - * @return the score assigned by the weighting model Js_KLs. + /** + * {@inheritDoc} */ - public final double score( - double tf, - double docLength, - double documentFrequency, - double termFrequency, - double keyFrequency) - { - - //Two neighbouring distributions in the document: the true and the smoothed one. - double maximumLikelihoodEstimate = tf/docLength; //the true probability - double smoothedProbability = (tf +1d)/(docLength +1d); //the smoothed probability + @Override + public double score(double tf, double docLength) { + // Two neighboring distributions in the document: the true and the smoothed one. + final double maximumLikelihoodEstimate = tf/docLength; // the true probability + final double smoothedProbability = (tf + 1d)/(docLength + 1d); // the smoothed probability // The true distribution in the collection: - double collectionPrior = termFrequency/numberOfTokens; + final double collectionPrior = termFrequency/numberOfTokens; - /** The divergence measure in the document between neighbouring distributions. */ - double Js = (docLength /(docLength+1))*(1 - maximumLikelihoodEstimate) * Idf.log ((tf+1d)/tf); - // The information of the sample wrt collection priors - double KLs = - Idf.log ( smoothedProbability/collectionPrior) + tf*Idf.log (1+1d/tf); - return keyFrequency * tf * Js *KLs ; + // The divergence measure in the document between neighbouring distributions. + final double Js = + (docLength / (docLength + 1))*(1 - maximumLikelihoodEstimate) * log((tf + 1d)/tf); + // The information of the sample wrt collection priors + final double KLs = + log(smoothedProbability/collectionPrior) + tf*log(1 + 1d/tf); + return keyFrequency * tf * Js * KLs; } } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/LGD.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/LGD.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/LGD.java 2011-06-16 18:26:38.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/LGD.java 2012-12-18 16:03:23.000000000 +0100 @@ -24,6 +24,9 @@ * Gianni Amati (original author) */ package org.terrier.matching.models; + +import static org.terrier.matching.models.WeightingModelLibrary.log; + /** * This class implements the LGD weighting model. For more information about * this model, see: @@ -38,15 +41,18 @@ * @author Gianni Amati */ public class LGD extends WeightingModel { + private static final long serialVersionUID = 1L; + /** * A default constructor. This must be followed * by specifying the c value. */ public LGD() { super(); - c =1.0d; + this.c = 1.0d; } + /** * Constructs an instance of this class with the * specified value for the parameter c. @@ -56,48 +62,22 @@ this(); this.c = c; } - /** - * Returns the name of the model. - * @return the name of the model + + /** + * {@inheritDoc} */ - public final String getInfo() { + @Override + public String getInfo() { return "LGDc" + c; } - /** - * Uses LGD to compute a weight for a term in a document. - * @param tf The term frequency in the document - * @param docLength the document's length - * @return the score assigned to a document with the given - * tf and docLength, and other preset parameters - */ - public final double score(double tf, double docLength) { - double TF = - tf * Idf.log(1.0d + (c * averageDocumentLength) / docLength); - double freq = (1.0D * documentFrequency) / (1.0D * numberOfDocuments); - return - keyFrequency - * Idf.log( ( freq + TF)/freq); - } - /** - * Uses LGD to compute a weight for a term in a document. - * @param tf The term frequency in the document - * @param docLength the document's length - * @param n_t The document frequency of the term - * @param F_t the term frequency in the collection - * @param keyFrequency the term frequency in the query - * @return the score assigned by the weighting model LGD. + + /** + * {@inheritDoc} */ - public final double score( - double tf, - double docLength, - double n_t, - double F_t, - double keyFrequency) { - double TF = - tf * Idf.log(1.0d + (c * averageDocumentLength) / docLength); - double freq = (1.0D * n_t) / (1.0D * numberOfDocuments); - return - keyFrequency - * Idf.log(( freq + TF)/freq); - } + @Override + public double score(double tf, double docLength) { + final double TF = tf * log(1.0d + (c * averageDocumentLength) / docLength); + final double freq = (1.0D * documentFrequency) / (1.0D * numberOfDocuments); + return keyFrequency * log((freq + TF)/freq); + } } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/LemurTF_IDF.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/LemurTF_IDF.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/LemurTF_IDF.java 2011-06-16 18:26:38.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/LemurTF_IDF.java 2012-12-18 16:03:16.000000000 +0100 @@ -26,63 +26,40 @@ * Vassilis Plachouras */ package org.terrier.matching.models; + +import static org.terrier.matching.models.WeightingModelLibrary.log; +import static org.terrier.matching.models.WeightingModelLibrary.tf_robertson; + /** * This class implements the TF_IDF weighting model as it is implemented in Lemur. * See Notes on the Lemur TFIDF model. Chenxiang Zhai, 2001. * @author Ben He, Gianni Amati, Vassilis Plachouras */ public class LemurTF_IDF extends WeightingModel { + private static final long serialVersionUID = 1L; /** The constant k_1.*/ - private double k_1 = 1.2d; + protected double k_1 = 1.2d; /** The constant b.*/ - private double b = 0.75d; + protected double b = 0.75d; + /** - * A default constructor. This must be followed by - * specifying the c value. - */ - public LemurTF_IDF() { - super(); - } - /** - * Returns the name of the model. - * @return the name of the model + * {@inheritDoc} */ - public final String getInfo() { + @Override + public String getInfo() { return "LemurTF_IDF"; } - /** - * Uses LemurTF_IDF to compute a weight for a term in a document. - * @param tf The term frequency in the document - * @param docLength the document's length - * @return the score assigned to a document with the given - * tf and docLength, and other preset parameters - */ - public final double score(double tf, double docLength) { - double Robertson_tf = k_1*tf/(tf+k_1*(1-b+b*docLength/averageDocumentLength)); - return keyFrequency*Robertson_tf * - Math.pow(Idf.log(numberOfDocuments/documentFrequency), 2); - } - /** - * Uses LemurTF_IDF to compute a weight for a term in a document. - * @param tf The term frequency in the document - * @param docLength the document's length - * @param documentFrequency The document frequency of the term - * @param termFrequency the term frequency in the collection - * @param keyFrequency the term frequency in the query - * @return the score assigned by the weighting model LemurTF_IDF. - */ - public final double score( - double tf, - double docLength, - double documentFrequency, - double termFrequency, - double keyFrequency) { - double Robertson_tf = k_1*tf/(tf+k_1*(1-b+b*docLength/averageDocumentLength)); - return keyFrequency*Robertson_tf * - Math.pow(Idf.log(numberOfDocuments/documentFrequency), 2); + /** + * {@inheritDoc} + */ + @Override + public double score(double tf, double docLength) { + tf = tf_robertson(tf, b, docLength, averageDocumentLength, k_1); + final double idf = Math.pow(log(numberOfDocuments/documentFrequency), 2); + return keyFrequency * tf * idf; } } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/MDL2.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/MDL2.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/MDL2.java 2011-06-16 18:26:38.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/MDL2.java 2012-12-18 16:03:34.000000000 +0100 @@ -48,8 +48,7 @@ * @author Vassilis Plachouras and Craig Macdonald * @since 3.0 */ -public class MDL2 extends WeightingModel -{ +public class MDL2 extends WeightingModel { static final double LOG2 = Math.log(2.0d); Class normClass; @@ -68,16 +67,21 @@ * @param parameters * @throws Exception */ - public MDL2(String[] parameters) throws Exception - { + public MDL2(String[] parameters) throws Exception { this.normClass = Class.forName(parameters[0]).asSubclass(Normalisation.class); } + /** + * {@inheritDoc} + */ @Override public String getInfo() { return this.getClass().getSimpleName(); } + /** + * {@inheritDoc} + */ @Override public void prepare() { super.prepare(); @@ -85,24 +89,28 @@ } + /** + * {@inheritDoc} + */ @Override public void setCollectionStatistics(CollectionStatistics _cs) { super.setCollectionStatistics(_cs); fieldCount = _cs.getNumberOfFields(); p = new double[fieldCount]; this.fieldNormalisations = new Normalisation[fieldCount]; - try{ - for(int fi=0;fi 0) fieldsWithTerm++; } double score = (fieldsWithTerm/2.0d)*Math.log(2.0d*Math.PI*super.termFrequency)/LOG2; - for(int fi = 0; fi < fieldCount; fi++) - { + for(int fi = 0; fi < fieldCount; fi++) { if (tff[fi] > 0) continue; - final double __p = 1.0d / super.numberOfDocuments * fieldsWithTerm * fieldWeights[fi]; - final double tfn_i = this.fieldNormalisations[fi].normalise(tff[fi], fieldLengths[fi], fieldTermFrequencies[fi]); + final double __p = + 1.0d / super.numberOfDocuments * fieldsWithTerm * fieldWeights[fi]; + final double tfn_i = + fieldNormalisations[fi].normalise(tff[fi], fieldLengths[fi], fieldTermFrequencies[fi]); tf_q -= tfn_i; q -= __p; - double tmp = tfn_i * Math.log(tfn_i / (super.termFrequency*__p))/LOG2 + Math.log(tfn_i/super.termFrequency)/(2.0d*LOG2); + double tmp = + tfn_i * Math.log(tfn_i / (super.termFrequency*__p))/LOG2 + Math.log(tfn_i/super.termFrequency)/(2.0d*LOG2); if (tmp > 0) score += tmp; } @@ -149,15 +164,12 @@ return keyFrequency * score; } + /** + * {@inheritDoc} + */ @Override public double score(double tf, double docLength) { return 0; } - @Override - public double score(double tf, double docLength, double n_t, double F_t, - double keyFrequency) { - return 0; - } - } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/ML2.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/ML2.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/ML2.java 2011-06-16 18:26:38.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/ML2.java 2012-12-18 16:03:45.000000000 +0100 @@ -48,8 +48,7 @@ * @author Vassilis Plachouras and Craig Macdonald * @since 3.0 */ -public class ML2 extends WeightingModel -{ +public class ML2 extends WeightingModel { static final double LOG2 = Math.log(2.0d); Class normClass; @@ -69,36 +68,44 @@ * @param parameters * @throws Exception */ - public ML2(String[] parameters) throws Exception - { + public ML2(String[] parameters) throws Exception { this.normClass = Class.forName(parameters[0]).asSubclass(Normalisation.class); } + /** + * {@inheritDoc} + */ @Override public String getInfo() { return this.getClass().getSimpleName(); } + /** + * {@inheritDoc} + */ @Override public void prepare() { super.prepare(); initialScore = -gF.compute_log(super.termFrequency + 1.0d)/LOG2; - } + /** + * {@inheritDoc} + */ @Override public void setCollectionStatistics(CollectionStatistics _cs) { super.setCollectionStatistics(_cs); fieldCount = _cs.getNumberOfFields(); p = new double[fieldCount]; this.fieldNormalisations = new Normalisation[fieldCount]; - try{ - for(int fi=0;fi normClass; /** @@ -69,50 +71,59 @@ */ public PerFieldNormWeightingModel( Class _basicModel, - Class _normalisationModel) throws Exception - { + Class _normalisationModel) throws Exception { this.params = new String[]{_basicModel.getSimpleName(), _normalisationModel.getSimpleName()}; this.basicModel = _basicModel.newInstance(); this.normClass = _normalisationModel; } + /** * Constructs an instance of PerFieldNormWeightingModel * @param parameters * @throws Exception */ - public PerFieldNormWeightingModel(String[] parameters) throws Exception - { + public PerFieldNormWeightingModel(String[] parameters) throws Exception { this.params = parameters; this.basicModel = Class.forName(parameters[0]).asSubclass(BasicModel.class).newInstance(); this.normClass = Class.forName(parameters[1]).asSubclass(Normalisation.class); } - + + /** + * {@inheritDoc} + */ @Override public void prepare() { super.prepare(); } + /** + * {@inheritDoc} + */ @Override public double score(Posting _p) { FieldPosting p = (FieldPosting)_p; final int[] tff = p.getFieldFrequencies(); final int[] fieldLengths = p.getFieldLengths(); final double[] normFieldFreqs = new double[fieldCount]; - for(int i=0;i */ package org.terrier.matching.models; + +import static org.terrier.matching.models.WeightingModelLibrary.log; +import static org.terrier.matching.models.WeightingModelLibrary.tf_robertson; + /** * This class implements the TF_IDF weighting model. * tf is given by Robertson's tf and idf is given by the @@ -40,79 +44,59 @@ private static final String name = "TF_IDF"; /** The constant k_1.*/ - private double k_1 = 1.2d; + protected final double k_1 = 1.2d; /** The constant b.*/ - private double b = 0.75d; + protected double b = 0.75d; /** - * A default constructor to make this model. + * A default constructor. The parameter b already has a default value. */ public TF_IDF() { super(); } + /** * Constructs an instance of TF_IDF - * @param _b + * @param b */ - public TF_IDF(double _b) { + public TF_IDF(double b) { this(); - this.b = _b; + this.b = b; } - /** - * Returns the name of the model, in this case "TF_IDF" - * @return the name of the model + /** + * {@inheritDoc} */ - public final String getInfo() { + @Override + public String getInfo() { return name; } - /** - * Uses TF_IDF to compute a weight for a term in a document. - * @param tf The term frequency of the term in the document - * @param docLength the document's length - * @return the score assigned to a document with the given - * tf and docLength, and other preset parameters - */ - public final double score(double tf, double docLength) { - double Robertson_tf = k_1*tf/(tf+k_1*(1-b+b*docLength/averageDocumentLength)); - double idf = Idf.log(numberOfDocuments/documentFrequency+1); - return keyFrequency * Robertson_tf * idf; - } - /** - * Uses TF_IDF to compute a weight for a term in a document. - * @param tf The term frequency of the term in the document - * @param docLength the document's length - * @param documentFrequency The document frequency of the term (ignored) - * @param termFrequency the term frequency in the collection (ignored) - * @param keyFrequency the term frequency in the query (ignored). - * @return the score assigned by the weighting model TF_IDF. - */ - public final double score( - double tf, - double docLength, - double documentFrequency, - double termFrequency, - double keyFrequency) - { - double Robertson_tf = k_1*tf/(tf+k_1*(1-b+b*docLength/averageDocumentLength)); - double idf = Idf.log(numberOfDocuments/documentFrequency+1); - return keyFrequency*Robertson_tf * idf; + /** + * {@inheritDoc} + */ + @Override + public double score(double tf, double docLength) { + tf = tf_robertson(tf, b, docLength, averageDocumentLength, k_1); + final double idf = log(numberOfDocuments/documentFrequency + 1); + return keyFrequency * tf * idf; } /** - * Sets the b parameter to ranking formula - * @param _b the b parameter value to use. - */ - public void setParameter(double _b) { - this.b = _b; + * Sets the b parameter to ranking formula. + * It corresponds to the slope of the pivot length normalization. + * @param b the b parameter value to use. + */ + @Override + public void setParameter(double b) { + this.b = b; } - /** * Returns the b parameter to the ranking formula as set by setParameter() */ + @Override public double getParameter() { return this.b; } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/WeightingModel.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/WeightingModel.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/WeightingModel.java 2011-06-16 18:26:38.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/WeightingModel.java 2012-12-18 15:59:01.000000000 +0100 @@ -26,20 +26,25 @@ * Vassilis Plachouras */ package org.terrier.matching.models; + import java.io.Serializable; import org.terrier.matching.Model; +import static org.terrier.matching.models.WeightingModelLibrary.*; import org.terrier.querying.Request; import org.terrier.structures.CollectionStatistics; import org.terrier.structures.EntryStatistics; import org.terrier.structures.postings.Posting; + /** * This class should be extended by the classes used * for weighting terms and documents. * @author Gianni Amati, Ben He, Vassilis Plachouras */ -public abstract class WeightingModel implements Model, Serializable,Cloneable { +public abstract class WeightingModel implements Model, Serializable, Cloneable { + private static final long serialVersionUID = 1L; + /** The class used for computing the idf values.*/ protected Idf i; /** The average length of documents in the collection.*/ @@ -59,10 +64,10 @@ protected double c = 1.0d; /** Number of unique terms in the collection */ protected double numberOfUniqueTerms; - /** The number of distinct entries in the inverted file. This figure can be calculated * as the sum of all Nt over all terms */ protected double numberOfPointers; + /** * A default constructor that initialises the idf i attribute */ @@ -71,8 +76,9 @@ } /** Clone this weighting model */ + @Override public Object clone() { - try{ + try { WeightingModel newModel = (WeightingModel)super.clone(); newModel.i = (Idf)this.i.clone(); return newModel; @@ -86,11 +92,11 @@ * @return java.lang.String */ public abstract String getInfo(); + /** * prepare */ - public void prepare() - { + public void prepare() { averageDocumentLength = cs.getAverageDocumentLength(); numberOfDocuments = (double)cs.getNumberOfDocuments(); i.setNumberOfDocuments(numberOfDocuments); @@ -100,55 +106,52 @@ documentFrequency = (double)getOverflowed(es.getDocumentFrequency()); termFrequency = (double)getOverflowed(es.getFrequency()); } + /** * Returns overflow * @param o * @return overflow */ - public static long getOverflowed(int o) - { + public static long getOverflowed(int o) { return o < 0 ? (o - Integer.MIN_VALUE) + (long)Integer.MAX_VALUE + 1l : (long)o; } + /** * Returns score * @param p * @return score */ - public double score(Posting p) - { + public double score(Posting p) { return this.score(p.getFrequency(), p.getDocumentLength()); } - CollectionStatistics cs; + protected CollectionStatistics cs; /** * Sets collection statistics * @param _cs */ - public void setCollectionStatistics(CollectionStatistics _cs) - { + public void setCollectionStatistics(CollectionStatistics _cs) { cs = _cs; } - EntryStatistics es; + + protected EntryStatistics es; /** * Sets entry statistics. * @param _es */ - public void setEntryStatistics(EntryStatistics _es) - { + public void setEntryStatistics(EntryStatistics _es) { es = _es; } - Request rq; + protected Request rq; /** * Sets request * @param _rq */ - public void setRequest(Request _rq) - { + public void setRequest(Request _rq) { rq = _rq; } - - + /** * This method provides the contract for implementing weighting models. * @param tf The term frequency in the document @@ -157,29 +160,17 @@ * and docLength, and other preset parameters */ public abstract double score(double tf, double docLength); - /** - * This method provides the contract for implementing weighting models. - * @param tf The term frequency in the document - * @param docLength the document's length - * @param n_t The document frequency of the term - * @param F_t the term frequency in the collection - * @param _keyFrequency the term frequency in the query - * @return the score returned by the implemented weighting model. - */ - public abstract double score( - double tf, - double docLength, - double n_t, - double F_t, - double _keyFrequency); + /** * Sets the average length of documents in the collection. * @param avgDocLength The documents' average length. * @deprecated Use setCollectionStatistics(CollectionStatistics) */ + @Deprecated public void setAverageDocumentLength(double avgDocLength) { averageDocumentLength = avgDocLength; } + /** * Sets the c value * @param _c the term frequency normalisation parameter value. @@ -188,7 +179,6 @@ this.c = _c; } - /** * Returns the parameter as set by setParameter() */ @@ -201,9 +191,11 @@ * @param docFreq the document frequency of the term in the collection. * @deprecated Use setEntryStatistics(EntryStatistics) */ + @Deprecated public void setDocumentFrequency(double docFreq) { documentFrequency = docFreq; } + /** * Sets the term's frequency in the query. * @param keyFreq the term's frequency in the query. @@ -211,48 +203,56 @@ public void setKeyFrequency(double keyFreq) { keyFrequency = keyFreq; } - - - + /** * Set the number of tokens in the collection. * @param value The number of tokens in the collection. * @deprecated Use setCollectionStatistics(CollectionStatistics) */ + @Deprecated public void setNumberOfTokens(double value){ this.numberOfTokens = value; } + /** * Sets the number of documents in the collection. * @param numOfDocs the number of documents in the collection. * @deprecated Use setCollectionStatistics(CollectionStatistics) */ + @Deprecated public void setNumberOfDocuments(double numOfDocs) { numberOfDocuments = numOfDocs; i.setNumberOfDocuments(numOfDocs); } + /** * Sets the term's frequency in the collection. * @param termFreq the term's frequency in the collection. * @deprecated Use setEntryStatistics(EntryStatistics) */ + @Deprecated public void setTermFrequency(double termFreq) { termFrequency = termFreq; } + /** * Set the number of unique terms in the collection. * @deprecated Use setCollectionStatistics(CollectionStatistics) */ + @Deprecated public void setNumberOfUniqueTerms(double number) { numberOfUniqueTerms = number; } + /** * Set the number of pointers in the collection. * @deprecated Use setCollectionStatistics(CollectionStatistics) */ + @Deprecated public void setNumberOfPointers(double number) { numberOfPointers = number; } + /** * This method provides the contract for implementing the * Stirling formula for the power series. @@ -262,6 +262,6 @@ */ public double stirlingPower(double n, double m) { double dif = n - m; - return (m + 0.5d) * Idf.log(n / m) + dif * Idf.log(n); + return (m + 0.5d) * log(n / m) + dif * log(n); } } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/WeightingModelFactory.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/WeightingModelFactory.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/WeightingModelFactory.java 2011-06-16 18:26:39.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/WeightingModelFactory.java 2012-12-06 14:11:12.000000000 +0100 @@ -44,7 +44,8 @@ /** The default namespace for weighting models. */ public static final String NAMESPACE = "org.terrier.matching.models."; /** A cache for instantiated weighting models. */ - private static Map> cache = new HashMap>(); + private static Map> cache = + new HashMap>(); /** * Returns the requested weighting model. @@ -60,7 +61,7 @@ * @param index The index where the weighting model should be applied. */ public static WeightingModel newInstance(String name, Index index) { - Logger logger = Logger.getLogger(WeightingModelFactory.class); + final Logger logger = Logger.getLogger(WeightingModelFactory.class); WeightingModel model = null; name = name.replaceFirst("^([^\\.]+(\\(|$))", NAMESPACE + "$1"); diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/WeightingModelLibrary.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/WeightingModelLibrary.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/WeightingModelLibrary.java 1970-01-01 01:00:00.000000000 +0100 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/WeightingModelLibrary.java 2012-12-21 14:09:48.000000000 +0100 @@ -0,0 +1,126 @@ +/* + * Terrier - Terabyte Retriever + * Webpage: http://terrier.org + * Contact: terrier{a.}dcs.gla.ac.uk + * University of Glasgow - School of Computing Science + * http://www.gla.ac.uk/ + * + * The contents of this file are subject to the Mozilla Public License + * Version 1.1 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * The Original Code is WeightingModelLibrary.java. + * + * The Original Code is Copyright (C) 2004-2011 the University of Glasgow. + * All Rights Reserved. + * + * Contributor(s): + * Francois Rousseau (original author) + * + */ +package org.terrier.matching.models; + +/** + * A library of tf normalizations for weighting models such as the pivoted length normalization + * described in Singhal et al., 1996. + * + * @since 4.0 + * @author Francois Rousseau + */ +public class WeightingModelLibrary { + + /** The natural logarithm of 2, used to change the base of logarithms.*/ + public static final double LOG_E_OF_2 = Math.log(2.0D); + /** The logarithm in base 2 of e, used to change the base of logarithms.*/ + public static final double LOG_2_OF_E = 1.0D / LOG_E_OF_2; + + /** + * Returns the base 2 log of the given double precision number. + * @param d The number of which the log we will compute + * @return the base 2 log of the given number + */ + public static double log(double d) { + return (Math.log(d) * LOG_2_OF_E); + } + + /** + * Returns the base 2 log of d1 over d2 + * @param d1 the numerator + * @param d2 the denominator + * @return the base 2 log of d1/d2 + */ + public static double log(double d1, double d2) { + return (Math.log(d1/d2) * LOG_2_OF_E); + } + + /** + * Returns a modified tf with pivot length normalization as described in Singhal et al., 1996. + * Pivoted document length normalization (SIGIR '96), pages 21-29. + * @param tf the term frequency to modify + * @param slope the slope + * @param dl the document length + * @param avdl the average document length in the collection + * @return a pivot length normalized tf + */ + public static double tf_pivoted(double tf, double slope, double dl, double avdl) { + return tf / (1 - slope + slope * dl/avdl); + } + + /** + * Returns a concave tf as described in Robertson and Walker, 1994. + * Some simple effective approximations to the 2-poisson model for probabilistic weighted + * retrieval (SIGIR '94), page 232-241. + * @param tf the term frequency to modify + * @param k the concavity coefficient + * @return a concave tf + */ + public static double tf_concave_k(double tf, double k) { + return (k + 1) * tf / (k + tf); + } + + /** + * Returns a concave tf as described in Singhal et al., 1999. AT&T at TREC-7. + * In Proceedings of the Seventh Text REtrieval Conference (TREC-7), pages 239-252. + * @param tf the term frequency to modify + * @param k the concavity coefficient + * @return a concave tf + */ + public static double tf_concave_log(double tf) { + return 1 + log(1 + log(tf)); + } + + /** + * Returns a concave pivot length normalized tf as described in Robertson et al., 1999. + * Okapi at TREC-7: automatic ad hoc, filtering, VLC and filtering tracks. + * In Proceedings of the Seventh Text REtrieval Conference (TREC-7), pages 253-264 + * @param tf the term frequency to modify + * @param b the slope + * @param dl the document length + * @param avdl the average document length in the collection + * @param k1 the concavity coefficient + * @return a concave pivot length normalized tf + */ + public static double tf_robertson(double tf, double b, double dl, double avdl, double k1) { + return tf_concave_k(tf_pivoted(tf, b, dl, avdl), k1); + } + + /** + * Returns a concave pivot length normalized tf as described in Singhal et al., 1999. + * AT&T at TREC-7. + * In Proceedings of the Seventh Text REtrieval Conference (TREC-7), pages 239-252. + * @param tf the term frequency to modify + * @param b the slope + * @param dl the document length + * @param avdl the average document length in the collection + * @return a concave pivot length normalized tf + */ + public static double tf_cornell(double tf, double s, double dl, double avdl) { + return tf_pivoted(tf_concave_log(tf), s, dl, avdl); + } +} diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/XSqrA_M.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/XSqrA_M.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/XSqrA_M.java 2011-06-16 18:26:39.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/XSqrA_M.java 2012-12-18 16:04:20.000000000 +0100 @@ -24,6 +24,9 @@ * Gianni Amati (Original author) */ package org.terrier.matching.models; + +import static org.terrier.matching.models.WeightingModelLibrary.log; + /** * This class implements the XSqrA_M weighting model, which computed the * inner product of Pearson's X^2 with the information growth computed @@ -49,61 +52,38 @@ * @author Gianni Amati */ public class XSqrA_M extends WeightingModel { + private static final long serialVersionUID = 1L; + /** - * A default constructor to make this model. + * {@inheritDoc} */ - public XSqrA_M() { - super(); - } - /** - * Returns the name of the model, in this case "XSqrA_M" - * @return the name of the model - */ - - public final String getInfo() { + @Override + public String getInfo() { return "XSqrA_M" ; } - /** - * Uses XSqrA_M to compute a weight for a term in a document. - * @param tf The term frequency of the term in the document - * @param docLength the document's length - * @return the score assigned to a document with the given - * tf and docLength, and other preset parameters - */ - public final double score(double tf, double docLength) { - return score (tf, docLength,documentFrequency , termFrequency , keyFrequency) ; - } - - /** - * Uses XSqrA_M to compute a weight for a term in a document. - * @param tf The term frequency of the term in the document - * @param docLength the document's length - * @param documentFrequency The document frequency of the term (ignored) - * @param termFrequency the term frequency in the collection (ignored) - * @param keyFrequency the term frequency in the query (ignored). - * @return the score assigned by the weighting model XSqrA_M. + /** + * {@inheritDoc} */ - public final double score( - double tf, - double docLength, - double documentFrequency, - double termFrequency, - double keyFrequency) { - - //Two neighbouring distributions in the document: the true and the smoothed one. - double maximumLikelihoodEstimate = tf/docLength; //the true probability - double smoothedProbability = (tf +1d)/(docLength +1d); //the smoothed probability - - // The true distribution in the collection: - double collectionPrior = termFrequency/numberOfTokens; - - /** The divergence measure (Pearson) of the two neighbouring distributions*/ - double XSqrA = Math.pow(1d-maximumLikelihoodEstimate,2)/(tf+1d) ; - // The information growth in the document from the the true probability to the smoothed one and wrt the collection priors - double InformationDelta = ((tf+1d) * Idf.log (smoothedProbability/collectionPrior) -tf*Idf.log (maximumLikelihoodEstimate /collectionPrior) +0.5*Idf.log(smoothedProbability/maximumLikelihoodEstimate)); - //the inner product - return keyFrequency * tf*XSqrA *InformationDelta; - } + @Override + public double score(double tf, double docLength) { + //Two neighboring distributions in the document: the true and the smoothed one. + final double maximumLikelihoodEstimate = tf/docLength; //the true probability + final double smoothedProbability = (tf +1d)/(docLength +1d); //the smoothed probability + + // The true distribution in the collection: + final double collectionPrior = termFrequency/numberOfTokens; + + // The divergence measure (Pearson) of the two neighboring distributions + final double XSqrA = Math.pow(1d-maximumLikelihoodEstimate,2)/(tf+1d) ; + // The information growth in the document from the the true probability to the smoothed one + // and wrt the collection priors + final double InformationDelta = ( + (tf+1d) * log(smoothedProbability/collectionPrior) + - tf * log(maximumLikelihoodEstimate /collectionPrior) + + 0.5 * log(smoothedProbability/maximumLikelihoodEstimate)); + //the inner product + return keyFrequency * tf*XSqrA *InformationDelta; + } } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/aftereffect/B.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/aftereffect/B.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/aftereffect/B.java 2011-06-16 18:26:39.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/aftereffect/B.java 2012-12-07 10:35:31.000000000 +0100 @@ -30,13 +30,12 @@ * after effect. B stands for the binomial model. * @author Ben He */ -public class B extends AfterEffect{ - /** - * - */ +public class B extends AfterEffect { + private static final long serialVersionUID = 1L; /** The name of the model. */ protected final String methodName = "B"; + /** * This method returns the name of the model. * @return String Return the name of the model. @@ -44,6 +43,7 @@ public String getInfo(){ return this.methodName; } + /** * This method computes the gain of encountering an extra token of the query term. * @param tf The term frequency in the document diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/aftereffect/LL.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/aftereffect/LL.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/aftereffect/LL.java 2011-06-16 18:26:39.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/aftereffect/LL.java 2012-12-06 18:02:23.000000000 +0100 @@ -25,7 +25,7 @@ */ package org.terrier.matching.models.aftereffect; -import org.terrier.matching.models.Idf; +import static org.terrier.matching.models.WeightingModelLibrary.log; /** * This class implements the LL model for the first normalisation by @@ -61,6 +61,6 @@ * @return the gain returned by the implemented formula. */ public double gain(double tf, double documentFrequency, double termFrequency){ - return Idf.log((1+tf)/tf); + return log((1+tf)/tf); } } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/basicmodel/B.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/basicmodel/B.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/basicmodel/B.java 2011-06-16 18:26:39.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/basicmodel/B.java 2012-12-06 18:02:23.000000000 +0100 @@ -24,7 +24,8 @@ * Ben He */ package org.terrier.matching.models.basicmodel; -import org.terrier.matching.models.Idf; + +import static org.terrier.matching.models.WeightingModelLibrary.*; /** * This class implements the B basic model for randomness. B stands @@ -32,18 +33,10 @@ * @author Ben He */ public class B extends BasicModel{ - /** - * - */ private static final long serialVersionUID = 1L; /** The name of the model. */ protected String modelName = "B"; - /** - * A default constructor. - */ - public B(){ - super(); - } + /** * Returns the name of the model. * @return the name of the model @@ -51,6 +44,7 @@ public String getInfo(){ return this.modelName; } + /** * This method computes the score for the implemented weighting model. * @param tf The term frequency in the document @@ -67,8 +61,8 @@ double keyFrequency, double documentLength){ return keyFrequency * ( - - Idf.log(numberOfDocuments - 1) - - Idf.REC_LOG_2_OF_E + - log(numberOfDocuments - 1) + - LOG_2_OF_E + stirlingPower( numberOfDocuments + termFrequency diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/basicmodel/BM.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/basicmodel/BM.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/basicmodel/BM.java 2011-06-16 18:26:39.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/basicmodel/BM.java 2012-12-21 10:06:46.000000000 +0100 @@ -25,8 +25,7 @@ */ package org.terrier.matching.models.basicmodel; -import org.terrier.matching.models.Idf; - +import static org.terrier.matching.models.WeightingModelLibrary.*; /** * This class implements the BM weighting model, which generates the original @@ -42,10 +41,9 @@ * @author Ben He */ public class BM extends BasicModel{ - /** - * - */ + private static final long serialVersionUID = 1L; + /** The constant k_1.*/ private double k_1 = 1.2d; /** The constant k_3.*/ @@ -65,6 +63,7 @@ public String getInfo(){ return this.modelName; } + /** * This method computes the score for the implemented weighting model. * @param tf The term frequency in the document @@ -79,10 +78,11 @@ double documentFrequency, double termFrequency, double keyFrequency, - double documentLength){ - return ((k_1 + 1) * tf)/(k_1 + tf) * - (((k_3+1)*keyFrequency)/(k_3+keyFrequency)) * - Idf.log((numberOfDocuments - documentFrequency + 0.5d) / - (documentFrequency + 0.5d)); + double documentLength) { + keyFrequency = tf_concave_k(keyFrequency, k_3); + tf = tf_concave_k(tf, k_1); + final double idf = + log((numberOfDocuments - documentFrequency + 0.5d) / (documentFrequency + 0.5d)); + return keyFrequency * tf * idf; } } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/basicmodel/BasicModel.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/basicmodel/BasicModel.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/basicmodel/BasicModel.java 2011-06-16 18:26:39.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/basicmodel/BasicModel.java 2012-12-06 18:02:23.000000000 +0100 @@ -30,6 +30,7 @@ import java.io.Serializable; import org.terrier.matching.models.Idf; +import static org.terrier.matching.models.WeightingModelLibrary.log; /** * This class provides a contract for implementing the basic models for randomness in the DFR framework, @@ -39,27 +40,29 @@ * @see org.terrier.matching.models.DFRWeightingModel */ public abstract class BasicModel implements Serializable{ - /** - * - */ + private static final long serialVersionUID = 1L; + /** The class used for computing the idf values.*/ protected Idf i; /** The number of documents in the whole collection. */ protected double numberOfDocuments; /** The number of tokens in the whole collection */ protected double numberOfTokens; + /** * A default constructor that initialises the idf i attribute */ public BasicModel() { i = new Idf(); } + /** * Returns the name of the model. * @return java.lang.String */ public abstract String getInfo(); + /** * Sets the number of documents in the collection. * @param numOfDocs the number of documents in the collection. @@ -68,6 +71,7 @@ this.numberOfDocuments = numOfDocs; this.i.setNumberOfDocuments(numOfDocs); } + /** * Set the number of tokens in the collection. * @param numTokens double The number of tokens in the collection. @@ -91,6 +95,7 @@ double termFrequency, double keyFrequency, double documentLength); + /** * This method provides the contract for implementing the * Stirling formula for the power series. @@ -100,6 +105,6 @@ */ public double stirlingPower(double n, double m) { double dif = n - m; - return (m + 0.5d) * Idf.log(n / m) + dif * Idf.log(n); + return (m + 0.5d) * log(n / m) + dif * log(n); } } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/basicmodel/Br.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/basicmodel/Br.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/basicmodel/Br.java 2011-06-16 18:26:39.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/basicmodel/Br.java 2012-12-06 18:02:23.000000000 +0100 @@ -25,26 +25,18 @@ */ package org.terrier.matching.models.basicmodel; -import org.terrier.matching.models.Idf; +import static org.terrier.matching.models.WeightingModelLibrary.*; /** * This class implements the Bernoulli model of randomness * @author Gianni Amati, Ben He, Vassilis Plachouras, modified by Craig Macdonald for Bernoulli */ public class Br extends BasicModel{ - /** - * - */ + private static final long serialVersionUID = 1L; /** The name of the model. */ protected String modelName = "Br"; - /** - * A default constructor. - */ - public Br(){ - super(); - } /** * Returns the name of the model. * @return the name of the model @@ -52,6 +44,7 @@ public String getInfo(){ return this.modelName; } + /** * This method computes the score for the implemented weighting model. * @param tf The term frequency in the document @@ -68,10 +61,10 @@ double keyFrequency, double documentLength){ - double f = (1.0D * documentLength) / (1.0D * numberOfTokens); - return keyFrequency * (tf * Idf.log(1.0D / f) - + f * Idf.REC_LOG_2_OF_E - + 0.5d * Idf.log(2 * Math.PI * tf) - + tf * (Idf.log(tf) - Idf.REC_LOG_2_OF_E)); + final double f = (1.0D * documentLength) / (1.0D * numberOfTokens); + return keyFrequency * (tf * log(1.0D / f) + + f * LOG_2_OF_E + + 0.5d * log(2 * Math.PI * tf) + + tf * (log(tf) - LOG_2_OF_E)); } } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/basicmodel/DFR_BM.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/basicmodel/DFR_BM.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/basicmodel/DFR_BM.java 2011-06-16 18:26:39.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/basicmodel/DFR_BM.java 2012-12-06 18:02:23.000000000 +0100 @@ -25,8 +25,7 @@ */ package org.terrier.matching.models.basicmodel; -import org.terrier.matching.models.Idf; - +import static org.terrier.matching.models.WeightingModelLibrary.log; /** * This class implements the DFR BM weighting model, which is an approximation of @@ -80,7 +79,7 @@ double documentLength){ double NORM = 1d / (tf + k_1); return ( (k_3 + 1d) * keyFrequency / (k_3 + keyFrequency)) * NORM - * tf * Idf.log((numberOfDocuments - documentFrequency + 0.5d) / + * tf * log((numberOfDocuments - documentFrequency + 0.5d) / (documentFrequency + 0.5d)); } } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/basicmodel/P.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/basicmodel/P.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/basicmodel/P.java 2011-06-16 18:26:39.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/basicmodel/P.java 2012-12-06 18:02:23.000000000 +0100 @@ -25,7 +25,7 @@ */ package org.terrier.matching.models.basicmodel; -import org.terrier.matching.models.Idf; +import static org.terrier.matching.models.WeightingModelLibrary.*; /** * This class implements the P basic model for randomness. @@ -65,13 +65,11 @@ double documentFrequency, double termFrequency, double keyFrequency, - double documentLength - ){ - - double f = (1.0D * termFrequency) / (1.0D * numberOfDocuments); - return keyFrequency * (tf * Idf.log(1.0D / f) - + f * Idf.REC_LOG_2_OF_E - + 0.5d * Idf.log(2 * Math.PI * tf) - + tf * (Idf.log(tf) - Idf.REC_LOG_2_OF_E)); + double documentLength) { + final double f = (1.0D * termFrequency) / (1.0D * numberOfDocuments); + return keyFrequency * (tf * log(1.0D / f) + + f * LOG_2_OF_E + + 0.5d * log(2 * Math.PI * tf) + + tf * (log(tf) - LOG_2_OF_E)); } } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/basicmodel/PL.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/basicmodel/PL.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/basicmodel/PL.java 2011-06-16 18:26:39.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/basicmodel/PL.java 2012-12-06 18:02:23.000000000 +0100 @@ -27,29 +27,32 @@ */ package org.terrier.matching.models.basicmodel; -import org.terrier.matching.models.Idf; +import static org.terrier.matching.models.WeightingModelLibrary.*; /** * This class implements the PL weighting model. * @author Gianni Amati, Ben He, Vassilis Plachouras * @since 3.0 */ -public class PL extends P -{ +public class PL extends P { private static final long serialVersionUID = 1L; @Override - public double score(double tf, double documentFrequency, - double F_t, double keyFrequency, double documentLength) { + public double score( + double tf, + double documentFrequency, + double F_t, + double keyFrequency, + double documentLength) { double NORM = 1.0D / (tf + 1d); double f = F_t / numberOfDocuments; return NORM * keyFrequency - * (tf * Idf.log(1d / f) - + f * Idf.REC_LOG_2_OF_E - + 0.5d * Idf.log(2 * Math.PI * tf) - + tf * (Idf.log(tf) - Idf.REC_LOG_2_OF_E)); + * (tf * log(1d / f) + + f * LOG_2_OF_E + + 0.5d * log(2 * Math.PI * tf) + + tf * (log(tf) - LOG_2_OF_E)); } @Override diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/normalisation/Normalisation2.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/normalisation/Normalisation2.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/normalisation/Normalisation2.java 2011-06-16 18:26:40.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/normalisation/Normalisation2.java 2012-12-06 18:02:23.000000000 +0100 @@ -25,7 +25,7 @@ */ package org.terrier.matching.models.normalisation; -import org.terrier.matching.models.Idf; +import static org.terrier.matching.models.WeightingModelLibrary.log; /** * This class implements the DFR normalisation 2. @@ -63,6 +63,6 @@ public double normalise(double tf, double docLength, double termFrequency){ if (docLength == 0) return tf; - return tf * Idf.log(1.0d + (parameter * averageDocumentLength) / docLength); + return tf * log(1.0d + (parameter * averageDocumentLength) / docLength); } } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/queryexpansion/Bo1.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/queryexpansion/Bo1.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/queryexpansion/Bo1.java 2011-06-16 18:26:40.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/queryexpansion/Bo1.java 2012-12-06 18:02:23.000000000 +0100 @@ -26,7 +26,7 @@ */ package org.terrier.matching.models.queryexpansion; -import org.terrier.matching.models.Idf; +import static org.terrier.matching.models.WeightingModelLibrary.log; /** * This class implements the Bo1 model for query expansion. @@ -85,8 +85,7 @@ //double numberOfDocuments = //collectionLength / averageDocumentLength; double f = termFrequency / numberOfDocuments; - return withinDocumentFrequency * Idf.log((1d + f) / f) - + Idf.log(1d + f); + return withinDocumentFrequency * log((1d + f) / f) + log(1d + f); } /** * This method implements the query expansion model. @@ -109,7 +108,6 @@ //double numberOfDocuments = //collectionLength / averageDocumentLength; double f = termFrequency / numberOfDocuments; - return withinDocumentFrequency * Idf.log((1d + f) / f) - + Idf.log(1d + f); + return withinDocumentFrequency * log((1d + f) / f) + log(1d + f); } } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/queryexpansion/Bo2.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/queryexpansion/Bo2.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/queryexpansion/Bo2.java 2011-06-16 18:26:40.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/queryexpansion/Bo2.java 2012-12-06 18:02:23.000000000 +0100 @@ -26,7 +26,7 @@ */ package org.terrier.matching.models.queryexpansion; -import org.terrier.matching.models.Idf; +import static org.terrier.matching.models.WeightingModelLibrary.log; /** * This class implements the Bo2 model for query expansion. @@ -54,7 +54,7 @@ */ public final double parameterFreeNormaliser(){ double f = (maxTermFrequency) * totalDocumentLength/collectionLength; - return ((maxTermFrequency)* Idf.log((1d +f)/ f) + Idf.log(1d +f)); + return ((maxTermFrequency)* log((1d +f)/ f) + log(1d +f)); } /** * This method computes the normaliser of parameter-free query expansion. @@ -65,7 +65,7 @@ */ public final double parameterFreeNormaliser(double maxTermFrequency, double collectionLength, double totalDocumentLength){ double f = (maxTermFrequency) * totalDocumentLength/collectionLength; - return ((maxTermFrequency)* Idf.log((1d +f)/ f) + Idf.log(1d +f)); + return ((maxTermFrequency)* log((1d +f)/ f) + log(1d +f)); } /** This method implements the query expansion model. * @param withinDocumentFrequency double The term frequency @@ -81,8 +81,7 @@ withinDocumentFrequency * totalDocumentLength / collectionLength; - return withinDocumentFrequency * Idf.log((1d + f) / f) - + Idf.log(1d + f); + return withinDocumentFrequency * log((1d + f) / f) + log(1d + f); } /** * This method implements the query expansion model. @@ -106,7 +105,6 @@ withinDocumentFrequency * totalDocumentLength / collectionLength; - return withinDocumentFrequency * Idf.log((1d + f) / f) - + Idf.log(1d + f); + return withinDocumentFrequency * log((1d + f) / f) + log(1d + f); } } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/queryexpansion/CS.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/queryexpansion/CS.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/queryexpansion/CS.java 2011-06-16 18:26:40.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/queryexpansion/CS.java 2012-12-06 18:02:23.000000000 +0100 @@ -27,7 +27,7 @@ */ package org.terrier.matching.models.queryexpansion; -import org.terrier.matching.models.Idf; +import static org.terrier.matching.models.WeightingModelLibrary.log; /** * This class implements the chi-square divergence for query expansion. @@ -73,11 +73,11 @@ return 0; double f = withinDocumentFrequency / this.totalDocumentLength; double p = termFrequency / this.collectionLength; - double D = f * Idf.log(f, p) + f * Idf.log(1 - f, 1 - p); + double D = f * log(f, p) + f * log(1 - f, 1 - p); return this.totalDocumentLength * D //D(withinDocumentFrequency / this.totalDocumentLength, termFrequency / this.collectionLength) +0.5d - * Idf.log( + * log( 2 * Math.PI * this.totalDocumentLength @@ -104,11 +104,11 @@ return 0; double f = withinDocumentFrequency / totalDocumentLength; double p = termFrequency / collectionLength; - double D = f * Idf.log(f, p) + f * Idf.log(1 - f, 1 - p); + double D = f * log(f, p) + f * log(1 - f, 1 - p); return totalDocumentLength * D //D(withinDocumentFrequency / this.totalDocumentLength, termFrequency / this.collectionLength) +0.5d - * Idf.log( + * log( 2 * Math.PI * totalDocumentLength diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/queryexpansion/CSCorrect.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/queryexpansion/CSCorrect.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/queryexpansion/CSCorrect.java 2011-06-16 18:26:40.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/queryexpansion/CSCorrect.java 2012-12-06 18:02:23.000000000 +0100 @@ -27,7 +27,7 @@ */ package org.terrier.matching.models.queryexpansion; -import org.terrier.matching.models.Idf; +import static org.terrier.matching.models.WeightingModelLibrary.log; /** * This class implements the unsimplified chi-square divergence for query @@ -71,8 +71,8 @@ return this.totalDocumentLength * //Poisson(withinDocumentFrequency / this.totalDocumentLength, termFrequency / this.collectionLength) (withinDocumentFrequency / this.totalDocumentLength) * - Idf.log(withinDocumentFrequency / this.totalDocumentLength, termFrequency / this.collectionLength) - + 0.5d * Idf.log(2 * Math.PI * this.totalDocumentLength * + log(withinDocumentFrequency / this.totalDocumentLength, termFrequency / this.collectionLength) + + 0.5d * log(2 * Math.PI * this.totalDocumentLength * (1d - withinDocumentFrequency / this.totalDocumentLength)); } @@ -96,8 +96,8 @@ return totalDocumentLength * //Poisson(withinDocumentFrequency / this.totalDocumentLength, termFrequency / this.collectionLength) (withinDocumentFrequency / totalDocumentLength) * - Idf.log(withinDocumentFrequency / totalDocumentLength, termFrequency / collectionLength) - + 0.5d * Idf.log(2 * Math.PI * totalDocumentLength * + log(withinDocumentFrequency / totalDocumentLength, termFrequency / collectionLength) + + 0.5d * log(2 * Math.PI * totalDocumentLength * (1d - withinDocumentFrequency / totalDocumentLength)); } } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/queryexpansion/Information.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/queryexpansion/Information.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/queryexpansion/Information.java 2011-06-16 18:26:41.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/queryexpansion/Information.java 2012-12-06 18:02:23.000000000 +0100 @@ -26,7 +26,7 @@ */ package org.terrier.matching.models.queryexpansion; -import org.terrier.matching.models.Idf; +import static org.terrier.matching.models.WeightingModelLibrary.log; /** * This class implements the Kullback-Leibler divergence for @@ -53,7 +53,7 @@ public final double parameterFreeNormaliser(){ return (maxTermFrequency) * Math.log(collectionLength/totalDocumentLength)/ (Math.log(2d)*totalDocumentLength); - //return maxTermFrequency * idf.log(collectionLength/totalDocumentLength)/ idf.log (totalDocumentLength); + //return maxTermFrequency * log(collectionLength/totalDocumentLength)/ log(totalDocumentLength); } /** @@ -75,7 +75,7 @@ * Kullback-Leibler divergence. */ public final double score(double withinDocumentFrequency, double termFrequency) { - return - Idf.log(withinDocumentFrequency / this.totalDocumentLength ); + return - log(withinDocumentFrequency / this.totalDocumentLength ); } /** @@ -94,6 +94,6 @@ double collectionLength, double averageDocumentLength ){ - return - Idf.log(withinDocumentFrequency / this.totalDocumentLength); + return - log(withinDocumentFrequency / this.totalDocumentLength); } } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/queryexpansion/KL.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/queryexpansion/KL.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/queryexpansion/KL.java 2011-06-16 18:26:41.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/queryexpansion/KL.java 2012-12-06 18:02:23.000000000 +0100 @@ -26,7 +26,7 @@ */ package org.terrier.matching.models.queryexpansion; -import org.terrier.matching.models.Idf; +import static org.terrier.matching.models.WeightingModelLibrary.log; /** * This class implements the Kullback-Leibler divergence for @@ -55,7 +55,7 @@ public final double parameterFreeNormaliser(){ return (maxTermFrequency) * Math.log(collectionLength/totalDocumentLength)/ (Math.log(2d)*totalDocumentLength); - //return maxTermFrequency * idf.log(collectionLength/totalDocumentLength)/ idf.log (totalDocumentLength); + //return maxTermFrequency * log(collectionLength/totalDocumentLength)/ log(totalDocumentLength); } /** @@ -81,7 +81,7 @@ return 0; else return withinDocumentFrequency / this.totalDocumentLength * - Idf.log(withinDocumentFrequency / this.totalDocumentLength, + log(withinDocumentFrequency / this.totalDocumentLength, termFrequency / this.collectionLength); } @@ -105,7 +105,7 @@ return 0; else return withinDocumentFrequency / totalDocumentLength * - Idf.log(withinDocumentFrequency / totalDocumentLength, + log(withinDocumentFrequency / totalDocumentLength, termFrequency / collectionLength); } } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/queryexpansion/KLComplete.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/queryexpansion/KLComplete.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/queryexpansion/KLComplete.java 2011-06-16 18:26:41.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/queryexpansion/KLComplete.java 2012-12-06 18:02:23.000000000 +0100 @@ -27,7 +27,7 @@ */ package org.terrier.matching.models.queryexpansion; -import org.terrier.matching.models.Idf; +import static org.terrier.matching.models.WeightingModelLibrary.log; /** * This class implements the complete Kullback-Leibler divergence for @@ -75,18 +75,17 @@ return 0; double f = withinDocumentFrequency / this.totalDocumentLength; double p = termFrequency / this.collectionLength; - double D = f * Idf.log(f, p) + f * Idf.log(1 - f, 1 - p); + double D = f * log(f, p) + f * log(1 - f, 1 - p); return this.totalDocumentLength * D //D(withinDocumentFrequency / this.totalDocumentLength, termFrequency / this.collectionLength) +1 / (2d) - * (Idf - .log( + * (log( withinDocumentFrequency * (1d - withinDocumentFrequency / this.totalDocumentLength)) - + Idf.log(2 * Math.PI)); + + log(2 * Math.PI)); } /** * This method provides the contract for implementing query expansion models. @@ -109,17 +108,16 @@ return 0; double f = withinDocumentFrequency / totalDocumentLength; double p = termFrequency / collectionLength; - double D = f * Idf.log(f, p) + f * Idf.log(1 - f, 1 - p); + double D = f * log(f, p) + f * log(1 - f, 1 - p); return totalDocumentLength * D //D(withinDocumentFrequency / totalDocumentLength, termFrequency / collectionLength) +1 / (2d) - * (Idf - .log( + * (log( withinDocumentFrequency * (1d - withinDocumentFrequency / totalDocumentLength)) - + Idf.log(2 * Math.PI)); + + log(2 * Math.PI)); } } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/queryexpansion/KLCorrect.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/queryexpansion/KLCorrect.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/models/queryexpansion/KLCorrect.java 2011-06-16 18:26:41.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/models/queryexpansion/KLCorrect.java 2012-12-06 18:02:23.000000000 +0100 @@ -27,7 +27,7 @@ */ package org.terrier.matching.models.queryexpansion; -import org.terrier.matching.models.Idf; +import static org.terrier.matching.models.WeightingModelLibrary.log; /** * This class implements the correct Kullback-Leibler divergence for @@ -72,7 +72,7 @@ return 0; double f = withinDocumentFrequency / this.totalDocumentLength; double p = termFrequency / this.collectionLength; - double D = f * Idf.log(f, p) + f * Idf.log(1 - f, 1 - p); + double D = f * log(f, p) + f * log(1 - f, 1 - p); return D; } @@ -96,7 +96,7 @@ return 0; double f = withinDocumentFrequency / totalDocumentLength; double p = termFrequency / collectionLength; - double D = f * Idf.log(f, p) + f * Idf.log(1 - f, 1 - p); + double D = f * log(f, p) + f * log(1 - f, 1 - p); return D; } } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/tsms/RequiredTermModifier.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/tsms/RequiredTermModifier.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/tsms/RequiredTermModifier.java 2011-06-16 18:26:41.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/tsms/RequiredTermModifier.java 2012-12-18 16:04:44.000000000 +0100 @@ -119,11 +119,4 @@ // TODO Auto-generated method stub return 0; } - - @Override - public double score(double tf, double docLength, double n_t, double F_t, - double keyFrequency) { - // TODO Auto-generated method stub - return 0; - } } diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/tsms/TermInFieldModifier.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/tsms/TermInFieldModifier.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/matching/tsms/TermInFieldModifier.java 2011-06-16 18:26:41.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/matching/tsms/TermInFieldModifier.java 2012-12-18 16:04:54.000000000 +0100 @@ -193,12 +193,6 @@ return 0; } - @Override - public double score(double tf, double docLength, double n_t, double F_t, - double keyFrequency) { - // TODO Auto-generated method stub - return 0; - } /** * {@inheritDoc} */ diff -urN Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/utility/StaTools.java Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/utility/StaTools.java --- Documents/Langages/Java/terrier-dev/terrier-3.5-golden/src/core/org/terrier/utility/StaTools.java 2011-06-16 18:26:59.000000000 +0200 +++ Documents/Langages/Java/terrier-dev/terrier-3.5-weighting/src/core/org/terrier/utility/StaTools.java 2012-12-17 16:22:33.000000000 +0100 @@ -24,9 +24,11 @@ * Ben He */ package org.terrier.utility; + import java.util.Arrays; -import org.terrier.matching.models.Idf; +import static org.terrier.matching.models.WeightingModelLibrary.log; + /** * This class implements a series of basic statistical functions. */ @@ -39,7 +41,7 @@ */ public static double stirlingPower(double n, double m) { double dif = n - m; - return (m + 0.5d) * Idf.log(n / m) + dif * Idf.log(n); + return (m + 0.5d) * log(n / m) + dif * log(n); } /**