package org.molgenis.omx.biobankconnect.utils;

import java.text.DecimalFormat;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.tartarus.snowball.ext.PorterStemmer;

/* loaded from: input_file:org/molgenis/omx/biobankconnect/utils/NGramMatchingModel.class */
public class NGramMatchingModel {
    private static int nGrams = 2;
    private static PorterStemmer stemmer = new PorterStemmer();
    public static final Set<String> STOPWORDSLIST = new HashSet(Arrays.asList("a", "you", "about", "above", "after", "again", "against", "all", "am", "an", "and", "any", "are", "aren't", "as", "at", "be", "because", "been", "before", "being", "below", "between", "both", "but", "by", "can't", "cannot", "could", "couldn't", "did", "didn't", "do", "does", "doesn't", "doing", "don't", "down", "during", "each", "few", "for", "from", "further", "had", "hadn't", "has", "hasn't", "have", "haven't", "having", "he", "he'd", "he'll", "he's", "her", "here", "here's", "hers", "herself", "him", "himself", "his", "how", "how's", "i", "i'd", "i'll", "i'm", "i've", "if", "in", "into", "is", "isn't", "it", "it's", "its", "itself", "let's", "me", "more", "most", "mustn't", "my", "myself", "no", "nor", "not", "of", "off", "on", "once", "only", "or", "other", "ought", "our", "ours ", " ourselves", "out", "over", "own", "same", "shan't", "she", "she'd", "she'll", "she's", "should", "shouldn't", "so", "some", "such", "than", "that", "that's", "the", "their", "theirs", "them", "themselves", "then", "there", "there's", "these", "they", "they'd", "they'll", "they're", "they've", "this", "those", "through", "to", "too", "under", "until", "up", "very", "was", "wasn't", "we", "we'd", "we'll", "we're", "we've", "were", "weren't", "what", "what's", "when", "when's", "where", "where's", "which", "while", "who", "who's", "whom", "why", "why's", "with", "won't", "would", "wouldn't", "you", "you'd", "you'll", "you're", "you've", "your", "yours", "yourself", "yourselves", "many", ")", "("));

    public static double stringMatching(String str, String str2, boolean z) {
        return calculateScore(createNGrams(str.toLowerCase().trim(), z), createNGrams(str2.toLowerCase().trim(), z));
    }

    private static Set<String> createNGrams(String str, boolean z) {
        HashSet hashSet = new HashSet(Arrays.asList(str.trim().split(" ")));
        HashSet hashSet2 = new HashSet();
        hashSet.removeAll(STOPWORDSLIST);
        Iterator it = hashSet.iterator();
        while (it.hasNext()) {
            String stemmerString = stemmerString((String) it.next());
            StringBuilder sb = new StringBuilder(stemmerString.length() + 2);
            sb.append('^').append(stemmerString.toLowerCase()).append('$');
            int length = sb.length();
            for (int i = 0; i < length; i++) {
                if (i + nGrams < length) {
                    hashSet2.add(sb.substring(i, i + nGrams));
                } else {
                    hashSet2.add(sb.substring(length - 2));
                }
            }
        }
        return hashSet2;
    }

    private static double calculateScore(Set<String> set, Set<String> set2) {
        double max = Math.max(set.size(), set2.size());
        set.retainAll(set2);
        return Double.parseDouble(new DecimalFormat("#0.000").format((set.size() / max) * 100.0d));
    }

    private static String stemmerString(String str) {
        stemmer.setCurrent(str.trim());
        stemmer.stem();
        return stemmer.getCurrent();
    }
}
