/*
 * Decompiled with CFR 0.152.
 */
package org.splevo.vpm.analyzer.semantic.lucene.finder;

import com.google.common.collect.HashBasedTable;
import com.google.common.collect.Table;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import org.apache.log4j.Logger;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.util.BytesRef;
import org.splevo.vpm.analyzer.semantic.lucene.finder.RelationshipFinder;

public class SharedTermFinder
implements RelationshipFinder {
    private Logger logger = Logger.getLogger(SharedTermFinder.class);
    private DirectoryReader reader;
    private boolean matchComments;
    private int minSharedTerms;

    public SharedTermFinder(DirectoryReader reader, boolean matchComments, int minSharedTerms) {
        this.reader = reader;
        this.matchComments = matchComments;
        this.minSharedTerms = minSharedTerms;
    }

    @Override
    public Table<String, String, Set<String>> findSimilarEntries() {
        HashBasedTable sharedTermTable = HashBasedTable.create();
        try {
            IndexSearcher indexSearcher = new IndexSearcher((IndexReader)this.reader);
            int i = 0;
            while (i < this.reader.maxDoc()) {
                Table<String, String, Set<String>> sharedTerms;
                Document referenceDoc = indexSearcher.doc(i);
                if (referenceDoc.getField("CONTENT") != null) {
                    sharedTerms = this.buildQueryAndExecuteSearch(indexSearcher, "CONTENT", i, referenceDoc);
                    sharedTermTable.putAll(sharedTerms);
                }
                if (this.matchComments && referenceDoc.getField("COMMENT") != null) {
                    sharedTerms = this.buildQueryAndExecuteSearch(indexSearcher, "COMMENT", i, referenceDoc);
                    sharedTermTable.putAll(sharedTerms);
                }
                ++i;
            }
        }
        catch (IOException e) {
            this.logger.error((Object)"Failure while searching Lucene index.", (Throwable)e);
        }
        return sharedTermTable;
    }

    private Table<String, String, Set<String>> buildQueryAndExecuteSearch(IndexSearcher indexSearcher, String field, int docID, Document referenceDoc) throws IOException {
        Map<String, Integer> frequencies = this.getTermFrequencies(docID, field);
        Query query = this.buildQuery(field, frequencies);
        int maxDoc = this.reader.maxDoc();
        ScoreDoc[] hits = this.executeQuery(indexSearcher, maxDoc, query);
        HashSet<Term> referenceDocTerms = new HashSet<Term>();
        query.extractTerms(referenceDocTerms);
        return this.buildSharedTermTable(indexSearcher, hits, referenceDoc, referenceDocTerms, field);
    }

    private Table<String, String, Set<String>> buildSharedTermTable(IndexSearcher indexSearcher, ScoreDoc[] hits, Document referenceDoc, Set<Term> referenceDocTerms, String field) throws IOException {
        HashBasedTable sharedTermTable = HashBasedTable.create();
        int q = 0;
        while (q < hits.length) {
            Set<String> sharedTerms;
            Document foundDoc;
            String foundVPId;
            int indexDocId = hits[q].doc;
            String vpId = referenceDoc.get("VP");
            if (!vpId.equals(foundVPId = (foundDoc = indexSearcher.doc(indexDocId)).get("VP")) && (sharedTerms = this.determineSharedTerms(referenceDocTerms, foundDoc, indexDocId, field)).size() > 0) {
                LinkedHashSet<String> set;
                if (vpId.compareTo(foundVPId) > 0) {
                    String idTmp = vpId;
                    vpId = foundVPId;
                    foundVPId = idTmp;
                }
                if ((set = (LinkedHashSet<String>)sharedTermTable.get((Object)vpId, (Object)foundVPId)) == null) {
                    set = new LinkedHashSet<String>();
                }
                set.addAll(sharedTerms);
                sharedTermTable.put((Object)vpId, (Object)foundVPId, sharedTerms);
            }
            ++q;
        }
        return sharedTermTable;
    }

    private Set<String> determineSharedTerms(Set<Term> referenceDocTerms, Document foundDoc, int foundDocId, String field) throws IOException {
        TreeSet<String> sharedTerms = new TreeSet<String>();
        Terms termVector = this.reader.getTermVector(foundDocId, field);
        TermsEnum termsEnum = null;
        TermsEnum iterator = termVector.iterator(termsEnum);
        BytesRef br = null;
        while ((br = iterator.next()) != null) {
            String term = br.utf8ToString();
            for (Term t : referenceDocTerms) {
                if (!t.text().equals(term)) continue;
                sharedTerms.add(term);
            }
        }
        return sharedTerms;
    }

    private ScoreDoc[] executeQuery(IndexSearcher indexSearcher, int maxDoc, Query query) throws IOException {
        TopScoreDocCollector collector = TopScoreDocCollector.create((int)maxDoc, (boolean)true);
        indexSearcher.search(query, (Collector)collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;
        return hits;
    }

    private Map<String, Integer> getTermFrequencies(int docId, String fieldName) {
        HashMap<String, Integer> frequencies = new HashMap<String, Integer>();
        try {
            Terms vector = this.reader.getTermVector(docId, fieldName);
            if (vector == null) {
                return frequencies;
            }
            TermsEnum termsEnum = null;
            termsEnum = vector.iterator(termsEnum);
            BytesRef text = null;
            while ((text = termsEnum.next()) != null) {
                String term = text.utf8ToString();
                int freq = (int)termsEnum.totalTermFreq();
                frequencies.put(term, freq);
            }
        }
        catch (IOException e) {
            this.logger.error((Object)"Failure while extracting Term Frequencies.");
        }
        return frequencies;
    }

    private Query buildQuery(String fieldName, Map<String, Integer> termFrequencies) {
        BooleanQuery.setMaxClauseCount((int)Integer.MAX_VALUE);
        BooleanQuery finalQuery = new BooleanQuery();
        for (String key : termFrequencies.keySet()) {
            Term t = new Term(fieldName, key);
            TermQuery termQuery = new TermQuery(t);
            finalQuery.add((Query)termQuery, BooleanClause.Occur.SHOULD);
        }
        finalQuery.setMinimumNumberShouldMatch(this.minSharedTerms);
        return finalQuery;
    }
}

