package org.scify.jedai.similarityjoins.tokenbased;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.scify.jedai.datamodel.Attribute;
import org.scify.jedai.datamodel.Comparison;
import org.scify.jedai.datamodel.EntityProfile;
import org.scify.jedai.datamodel.SimilarityPairs;
import org.scify.jedai.similarityjoins.fuzzysets.FuzzySetSimJoin;

/* loaded from: input_file:org/scify/jedai/similarityjoins/tokenbased/SilkMoth.class */
public class SilkMoth extends AbstractTokenBasedJoin {
    private final int qSize;
    private final Map<String, List<Set<String>>> collection1;
    private final Map<String, List<Set<String>>> collection2;

    public SilkMoth(int i, float f) {
        super(f);
        this.qSize = i;
        this.collection1 = new LinkedHashMap();
        this.collection2 = new LinkedHashMap();
    }

    @Override // org.scify.jedai.similarityjoins.AbstractSimilarityJoin
    public SimilarityPairs applyJoin() {
        init();
        return getSimilarityPairs(performJoin());
    }

    @Override // org.scify.jedai.utilities.IDocumentation
    public String getMethodInfo() {
        return getMethodName() + ": it implements the Silkmoth similarity join algorithm";
    }

    @Override // org.scify.jedai.utilities.IDocumentation
    public String getMethodName() {
        return "FuzzySetJoin";
    }

    private Set<String> getTokens(String str) {
        if (str.isEmpty()) {
            return null;
        }
        if (this.qSize < 0) {
            return new HashSet(Arrays.asList(str.split("[\\W_]")));
        }
        HashSet hashSet = new HashSet();
        if (str.length() < this.qSize) {
            hashSet.add(str);
        } else {
            int length = str.length() - (this.qSize - 1);
            for (int i = 0; i < length; i++) {
                hashSet.add(str.substring(i, i + this.qSize));
            }
        }
        return hashSet;
    }

    private void init() {
        int i = 0;
        for (EntityProfile entityProfile : this.profilesD1) {
            ArrayList arrayList = new ArrayList();
            Iterator<Attribute> it = entityProfile.getAttributes().iterator();
            while (it.hasNext()) {
                arrayList.add(getTokens(it.next().getValue().trim()));
            }
            this.collection1.put(Integer.toString(i), arrayList);
            i++;
        }
        int i2 = 0;
        for (EntityProfile entityProfile2 : this.profilesD2) {
            ArrayList arrayList2 = new ArrayList();
            Iterator<Attribute> it2 = entityProfile2.getAttributes().iterator();
            while (it2.hasNext()) {
                arrayList2.add(getTokens(it2.next().getValue().trim()));
            }
            this.collection2.put(Integer.toString(i2), arrayList2);
            i2++;
        }
    }

    private List<Comparison> performJoin() {
        HashMap<String, Float> join = new FuzzySetSimJoin().join(this.collection1, this.collection2, this.threshold);
        ArrayList arrayList = new ArrayList();
        for (String str : join.keySet()) {
            String[] split = str.split("_");
            int parseInt = Integer.parseInt(split[0]);
            int parseInt2 = Integer.parseInt(split[1]);
            float floatValue = join.get(str).floatValue();
            if (this.isCleanCleanER) {
                parseInt2 += this.datasetDelimiter;
            } else if (parseInt == parseInt2) {
            }
            if (floatValue >= this.threshold) {
                Comparison comparison = getComparison(parseInt, parseInt2);
                comparison.setUtilityMeasure(floatValue);
                arrayList.add(comparison);
            }
        }
        return arrayList;
    }
}
