package org.fnlp.nlp.duplicate;

import gnu.trove.iterator.TIntIterator;
import gnu.trove.set.hash.TIntHashSet;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import org.fnlp.ml.types.alphabet.StringFeatureAlphabet;
import org.fnlp.nlp.duplicate.FingerPrint;

/* loaded from: input_file:org/fnlp/nlp/duplicate/SimilaritySlow.class */
public class SimilaritySlow implements ISimilarity {
    public TreeSet<DocSim> dsMap;
    private static final int lenDiffThresh = 4;
    public TIntHashSet[] features;
    public FingerPrint.Type type;
    public ArrayList<Documents> docs;
    private int numThreads;
    private boolean[] dup;
    private int[] mergeto;
    private ArrayList<TIntHashSet> lenGroup;
    AtomicInteger jobs;
    int maxDocsNum = 5000;
    public double thres = 0.5d;

    /* loaded from: input_file:org/fnlp/nlp/duplicate/SimilaritySlow$CalcSimilarity.class */
    public class CalcSimilarity implements Runnable {
        private int idx;
        private int idy;

        public CalcSimilarity(int i, int i2) {
            this.idx = i;
            this.idy = i2;
        }

        @Override // java.lang.Runnable
        public void run() {
            SimilaritySlow.this.jobs.incrementAndGet();
            if (SimilaritySlow.this.dup[this.idx] || SimilaritySlow.this.dup[this.idy]) {
                return;
            }
            try {
                if (SimilaritySlow.this.simJaccard(SimilaritySlow.this.features[this.idx], SimilaritySlow.this.features[this.idy]) > SimilaritySlow.this.thres) {
                    synchronized (SimilaritySlow.this.dup) {
                        SimilaritySlow.this.dup[this.idy] = true;
                        SimilaritySlow.this.mergeto[this.idy] = this.idx;
                    }
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

    public SimilaritySlow(int i, FingerPrint.Type type) {
        this.type = type;
        this.numThreads = i;
    }

    public void feature() {
        this.features = new TIntHashSet[this.docs.size()];
        StringFeatureAlphabet stringFeatureAlphabet = new StringFeatureAlphabet();
        for (int i = 0; i < this.docs.size(); i++) {
            Set<String> featureset = FingerPrint.featureset(this.docs.get(i).content, this.type);
            this.features[i] = new TIntHashSet(featureset.size());
            Iterator<String> it = featureset.iterator();
            while (it.hasNext()) {
                this.features[i].add(stringFeatureAlphabet.lookupIndex(it.next()));
            }
        }
        group();
    }

    private void group() {
        this.lenGroup = new ArrayList<>();
        for (int i = 0; i < this.features.length; i++) {
            int size = this.features[i].size();
            if (size >= this.lenGroup.size()) {
                for (int size2 = this.lenGroup.size(); size2 <= size; size2++) {
                    this.lenGroup.add(new TIntHashSet());
                }
            }
            this.lenGroup.get(size).add(i);
        }
    }

    @Override // org.fnlp.nlp.duplicate.ISimilarity
    public TreeSet<DocSim> duplicate(ArrayList<Documents> arrayList) throws Exception {
        this.docs = arrayList;
        this.dsMap = new TreeSet<>();
        feature();
        this.dup = new boolean[arrayList.size()];
        this.mergeto = new int[arrayList.size()];
        ThreadPoolExecutor threadPoolExecutor = new ThreadPoolExecutor(this.numThreads, this.numThreads, 1000L, TimeUnit.SECONDS, new LinkedBlockingQueue());
        this.jobs = new AtomicInteger();
        int i = 0;
        for (int i2 = 0; i2 < arrayList.size() - 1; i2++) {
            if (!this.dup[i2]) {
                for (int i3 = 0; i3 <= lenDiffThresh; i3++) {
                    int size = (this.features[i2].size() - 2) + i3;
                    if (size >= 0 && size < this.lenGroup.size()) {
                        TIntHashSet tIntHashSet = this.lenGroup.get(size);
                        TIntIterator it = tIntHashSet.iterator();
                        for (int size2 = tIntHashSet.size(); size2 > 0; size2--) {
                            int next = it.next();
                            if (!this.dup[next] && next != i2) {
                                i++;
                                threadPoolExecutor.execute(new CalcSimilarity(i2, next));
                            }
                        }
                    }
                }
            }
        }
        while (this.jobs.get() < i) {
            Thread.sleep(10L);
        }
        threadPoolExecutor.shutdown();
        HashMap hashMap = new HashMap();
        for (int i4 = 0; i4 < arrayList.size(); i4++) {
            if (!this.dup[i4]) {
                ArrayList arrayList2 = new ArrayList();
                arrayList2.add(Integer.valueOf(i4));
                hashMap.put(Integer.valueOf(i4), arrayList2);
            }
        }
        for (int i5 = 0; i5 < arrayList.size(); i5++) {
            if (this.dup[i5]) {
                ((ArrayList) hashMap.get(Integer.valueOf(findroot(i5)))).add(Integer.valueOf(i5));
            }
        }
        TreeSet<DocSim> treeSet = new TreeSet<>();
        Iterator it2 = hashMap.entrySet().iterator();
        while (it2.hasNext()) {
            treeSet.add(new DocSim((ArrayList) ((Map.Entry) it2.next()).getValue()));
        }
        return treeSet;
    }

    private int findroot(int i) {
        return this.dup[i] ? findroot(this.mergeto[i]) : i;
    }

    public void printDocSim() {
        Iterator<DocSim> it = this.dsMap.iterator();
        while (it.hasNext()) {
            System.out.println(it.next().toString());
        }
    }

    public double simJaccard(TIntHashSet tIntHashSet, TIntHashSet tIntHashSet2) {
        int i = 0;
        if (tIntHashSet == null || tIntHashSet2 == null) {
            return 0.0d;
        }
        TIntIterator it = tIntHashSet.iterator();
        int size = tIntHashSet.size();
        while (true) {
            int i2 = size;
            size--;
            if (i2 <= 0) {
                return (i * 1.0d) / ((tIntHashSet.size() + tIntHashSet2.size()) - i);
            }
            if (tIntHashSet2.contains(it.next())) {
                i++;
            }
        }
    }
}
