package com.wcohen.ss;

import com.wcohen.ss.api.SourcedStringWrapper;
import com.wcohen.ss.api.SourcedToken;
import com.wcohen.ss.api.SourcedTokenizer;
import com.wcohen.ss.api.StringWrapper;
import com.wcohen.ss.api.Token;
import java.util.Iterator;

/* loaded from: input_file:lib/com.wcohen.secondstring-0.1.jar:com/wcohen/ss/SourcedTFIDF.class */
public class SourcedTFIDF extends AbstractSourcedStatisticalTokenDistance {
    private UnitVector lastVector;

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:lib/com.wcohen.secondstring-0.1.jar:com/wcohen/ss/SourcedTFIDF$UnitVector.class */
    public class UnitVector extends BagOfSourcedTokens {
        public UnitVector(String str, SourcedToken[] sourcedTokenArr) {
            super(str, sourcedTokenArr);
            termFreq2TFIDF();
        }

        public UnitVector(SourcedTFIDF sourcedTFIDF, BagOfSourcedTokens bagOfSourcedTokens) {
            this(bagOfSourcedTokens.unwrap(), bagOfSourcedTokens.getSourcedTokens());
            termFreq2TFIDF();
        }

        private void termFreq2TFIDF() {
            double d = 0.0d;
            Iterator it = tokenIterator();
            while (it.hasNext()) {
                Token token = (Token) it.next();
                if (SourcedTFIDF.this.collectionSize > 0) {
                    double log = Math.log(getWeight(token) + 1.0d) * Math.log(SourcedTFIDF.this.collectionSize / (((Integer) SourcedTFIDF.this.documentFrequency.get(token)) == null ? 1.0d : r0.intValue()));
                    setWeight(token, log);
                    d += log * log;
                } else {
                    setWeight(token, 1.0d);
                    d += 1.0d;
                }
            }
            double sqrt = Math.sqrt(d);
            Iterator it2 = tokenIterator();
            while (it2.hasNext()) {
                Token token2 = (Token) it2.next();
                setWeight(token2, getWeight(token2) / sqrt);
            }
        }
    }

    public SourcedTFIDF(SourcedTokenizer sourcedTokenizer) {
        super(sourcedTokenizer);
        this.lastVector = null;
    }

    public SourcedTFIDF() {
        this.lastVector = null;
    }

    @Override // com.wcohen.ss.AbstractStringDistance, com.wcohen.ss.api.StringDistance
    public double score(StringWrapper stringWrapper, StringWrapper stringWrapper2) {
        SourcedStringWrapper sourcedStringWrapper = (SourcedStringWrapper) stringWrapper;
        SourcedStringWrapper sourcedStringWrapper2 = (SourcedStringWrapper) stringWrapper2;
        checkTrainingHasHappened(sourcedStringWrapper, sourcedStringWrapper2);
        UnitVector asUnitVector = asUnitVector(sourcedStringWrapper);
        UnitVector asUnitVector2 = asUnitVector(sourcedStringWrapper2);
        double d = 0.0d;
        int i = 0;
        Iterator it = asUnitVector.tokenIterator();
        while (it.hasNext()) {
            Token token = (Token) it.next();
            SourcedToken equivalentToken = asUnitVector2.getEquivalentToken(token);
            if (equivalentToken != null) {
                d += asUnitVector.getWeight(token) * asUnitVector2.getWeight(equivalentToken);
                i++;
            }
        }
        return d;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public UnitVector asUnitVector(SourcedStringWrapper sourcedStringWrapper) {
        return sourcedStringWrapper instanceof UnitVector ? (UnitVector) sourcedStringWrapper : sourcedStringWrapper instanceof BagOfSourcedTokens ? new UnitVector(this, (BagOfSourcedTokens) sourcedStringWrapper) : new UnitVector(sourcedStringWrapper.unwrap(), this.tokenizer.sourcedTokenize(sourcedStringWrapper.unwrap(), sourcedStringWrapper.getSource()));
    }

    @Override // com.wcohen.ss.AbstractStringDistance, com.wcohen.ss.api.StringDistance
    public StringWrapper prepare(String str) {
        System.out.println("unknown source for " + str);
        this.lastVector = new UnitVector(str, this.tokenizer.sourcedTokenize(str, "*UNKNOWN SOURCE*"));
        return this.lastVector;
    }

    public Token[] getTokens() {
        return this.lastVector.getTokens();
    }

    public double getWeight(Token token) {
        return this.lastVector.getWeight(token);
    }

    @Override // com.wcohen.ss.AbstractSourcedStatisticalTokenDistance
    public int getDocumentFrequency(Token token) {
        Integer num = (Integer) this.documentFrequency.get(token);
        if (num == null) {
            return 0;
        }
        return num.intValue();
    }

    public void setDocumentFrequency(Token token, int i) {
        this.documentFrequency.put(token, new Integer(i));
    }

    public int getCollectionSize() {
        return this.collectionSize;
    }

    public void setCollectionSize(int i) {
        this.collectionSize = i;
    }

    @Override // com.wcohen.ss.AbstractStringDistance, com.wcohen.ss.api.StringDistance
    public String explainScore(StringWrapper stringWrapper, StringWrapper stringWrapper2) {
        BagOfSourcedTokens bagOfSourcedTokens = (BagOfSourcedTokens) stringWrapper;
        BagOfSourcedTokens bagOfSourcedTokens2 = (BagOfSourcedTokens) stringWrapper2;
        StringBuffer stringBuffer = new StringBuffer("");
        PrintfFormat printfFormat = new PrintfFormat("%.3f");
        stringBuffer.append("Common tokens: ");
        Iterator it = bagOfSourcedTokens.tokenIterator();
        while (it.hasNext()) {
            SourcedToken sourcedToken = (SourcedToken) it.next();
            SourcedToken equivalentToken = bagOfSourcedTokens2.getEquivalentToken(sourcedToken);
            if (equivalentToken != null) {
                stringBuffer.append(" " + sourcedToken.getValue() + ": ");
                stringBuffer.append(printfFormat.sprintf(bagOfSourcedTokens.getWeight(sourcedToken)));
                stringBuffer.append("*");
                stringBuffer.append(printfFormat.sprintf(bagOfSourcedTokens2.getWeight(equivalentToken)));
            }
        }
        stringBuffer.append("\nscore = " + score(stringWrapper, stringWrapper2));
        return stringBuffer.toString();
    }

    public String toString() {
        return "[SourcedTFIDF]";
    }

    public static void main(String[] strArr) {
        doMain(new SourcedTFIDF(), strArr);
    }
}
