package org.molgenis.data.semanticsearch.service.impl;

import com.google.common.collect.Sets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.molgenis.data.DataService;
import org.molgenis.data.Entity;
import org.molgenis.data.MolgenisDataAccessException;
import org.molgenis.data.QueryRule;
import org.molgenis.data.meta.AttributeType;
import org.molgenis.data.meta.model.EntityType;
import org.molgenis.data.semanticsearch.string.NGramDistanceAlgorithm;
import org.molgenis.data.semanticsearch.string.Stemmer;
import org.molgenis.data.support.QueryImpl;
import org.molgenis.ontology.core.model.OntologyTerm;
import org.molgenis.ontology.core.service.OntologyService;
import org.molgenis.ontology.ic.TermFrequencyService;
import org.springframework.beans.factory.annotation.Autowired;

/* loaded from: input_file:org/molgenis/data/semanticsearch/service/impl/SemanticSearchServiceHelper.class */
public class SemanticSearchServiceHelper {
    private final TermFrequencyService termFrequencyService;
    private final DataService dataService;
    private final OntologyService ontologyService;
    private final Stemmer stemmer = new Stemmer();
    public static final int MAX_NUM_TAGS = 3;
    private static final char SPACE_CHAR = ' ';
    private static final String COMMA_CHAR = ",";
    private static final String CARET_CHARACTER = "^";
    private static final String ESCAPED_CARET_CHARACTER = "\\^";
    private static final String ILLEGAL_CHARS_REGEX = "[^\\p{L}'a-zA-Z0-9\\.~]+";

    @Autowired
    public SemanticSearchServiceHelper(DataService dataService, OntologyService ontologyService, TermFrequencyService termFrequencyService) {
        this.dataService = (DataService) Objects.requireNonNull(dataService);
        this.ontologyService = (OntologyService) Objects.requireNonNull(ontologyService);
        this.termFrequencyService = (TermFrequencyService) Objects.requireNonNull(termFrequencyService);
    }

    public QueryRule createDisMaxQueryRuleForAttribute(Set<String> set, Collection<OntologyTerm> collection) {
        ArrayList arrayList = new ArrayList();
        if (set != null) {
            arrayList.addAll((Collection) set.stream().filter((v0) -> {
                return StringUtils.isNotBlank(v0);
            }).map(this::processQueryString).collect(Collectors.toList()));
        }
        collection.stream().filter(ontologyTerm -> {
            return !ontologyTerm.getIRI().contains(COMMA_CHAR);
        }).forEach(ontologyTerm2 -> {
            arrayList.addAll(parseOntologyTermQueries(ontologyTerm2));
        });
        QueryRule createDisMaxQueryRuleForTerms = createDisMaxQueryRuleForTerms(arrayList);
        collection.stream().filter(ontologyTerm3 -> {
            return ontologyTerm3.getIRI().contains(COMMA_CHAR);
        }).forEach(ontologyTerm4 -> {
            createDisMaxQueryRuleForTerms.getNestedRules().add(createShouldQueryRule(ontologyTerm4.getIRI()));
        });
        return createDisMaxQueryRuleForTerms;
    }

    public QueryRule createDisMaxQueryRuleForTerms(List<String> list) {
        ArrayList arrayList = new ArrayList();
        list.stream().filter((v0) -> {
            return StringUtils.isNotEmpty(v0);
        }).map(this::escapeCharsExcludingCaretChar).forEach(str -> {
            arrayList.add(new QueryRule("label", QueryRule.Operator.FUZZY_MATCH, str));
            arrayList.add(new QueryRule("description", QueryRule.Operator.FUZZY_MATCH, str));
        });
        QueryRule queryRule = new QueryRule(arrayList);
        queryRule.setOperator(QueryRule.Operator.DIS_MAX);
        return queryRule;
    }

    public QueryRule createBoostedDisMaxQueryRuleForTerms(List<String> list, Double d) {
        QueryRule createDisMaxQueryRuleForTerms = createDisMaxQueryRuleForTerms(list);
        if (d != null && d.intValue() != 0) {
            createDisMaxQueryRuleForTerms.setValue(d);
        }
        return createDisMaxQueryRuleForTerms;
    }

    public QueryRule createShouldQueryRule(String str) {
        QueryRule queryRule = new QueryRule(new ArrayList());
        queryRule.setOperator(QueryRule.Operator.SHOULD);
        for (String str2 : str.split(COMMA_CHAR)) {
            List<String> parseOntologyTermQueries = parseOntologyTermQueries(this.ontologyService.getOntologyTerm(str2));
            queryRule.getNestedRules().add(createBoostedDisMaxQueryRuleForTerms(parseOntologyTermQueries, getBestInverseDocumentFrequency(parseOntologyTermQueries)));
        }
        return queryRule;
    }

    public List<String> parseOntologyTermQueries(OntologyTerm ontologyTerm) {
        List<String> list = (List) getOtLabelAndSynonyms(ontologyTerm).stream().map(this::processQueryString).collect(Collectors.toList());
        for (OntologyTerm ontologyTerm2 : this.ontologyService.getChildren(ontologyTerm)) {
            double pow = Math.pow(0.5d, this.ontologyService.getOntologyTermDistance(ontologyTerm, ontologyTerm2).intValue());
            getOtLabelAndSynonyms(ontologyTerm2).forEach(str -> {
                list.add(parseBoostQueryString(str, pow));
            });
        }
        return list;
    }

    public Set<String> getOtLabelAndSynonyms(OntologyTerm ontologyTerm) {
        LinkedHashSet newLinkedHashSet = Sets.newLinkedHashSet(ontologyTerm.getSynonyms());
        newLinkedHashSet.add(ontologyTerm.getLabel());
        return newLinkedHashSet;
    }

    public Map<String, String> collectExpandedQueryMap(Set<String> set, Collection<OntologyTerm> collection) {
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        set.stream().filter((v0) -> {
            return StringUtils.isNotBlank(v0);
        }).forEach(str -> {
        });
        for (OntologyTerm ontologyTerm : collection) {
            if (ontologyTerm.getIRI().contains(COMMA_CHAR)) {
                for (String str2 : ontologyTerm.getIRI().split(COMMA_CHAR)) {
                    collectOntologyTermQueryMap(linkedHashMap, this.ontologyService.getOntologyTerm(str2));
                }
            } else {
                collectOntologyTermQueryMap(linkedHashMap, ontologyTerm);
            }
        }
        return linkedHashMap;
    }

    public void collectOntologyTermQueryMap(Map<String, String> map, OntologyTerm ontologyTerm) {
        if (ontologyTerm != null) {
            getOtLabelAndSynonyms(ontologyTerm).forEach(str -> {
            });
            Iterator it = this.ontologyService.getChildren(ontologyTerm).iterator();
            while (it.hasNext()) {
                getOtLabelAndSynonyms((OntologyTerm) it.next()).forEach(str2 -> {
                });
            }
        }
    }

    public List<String> getAttributeIdentifiers(EntityType entityType) {
        Entity findOne = this.dataService.findOne("sys_md_EntityType", new QueryImpl().eq("id", entityType.getId()));
        if (findOne == null) {
            throw new MolgenisDataAccessException("Could not find EntityTypeEntity by the name of " + entityType.getFullyQualifiedName());
        }
        ArrayList arrayList = new ArrayList();
        recursivelyCollectAttributeIdentifiers(findOne.getEntities("attributes"), arrayList);
        return arrayList;
    }

    private void recursivelyCollectAttributeIdentifiers(Iterable<Entity> iterable, List<String> list) {
        for (Entity entity : iterable) {
            if (!entity.getString("type").equals(AttributeType.COMPOUND.toString())) {
                list.add(entity.getString("id"));
            }
            Iterable<Entity> entities = entity.getEntities("children");
            if (entities != null) {
                recursivelyCollectAttributeIdentifiers(entities, list);
            }
        }
    }

    public List<OntologyTerm> findTags(String str, List<String> list) {
        return this.ontologyService.findOntologyTerms(list, removeStopWords(str), 3);
    }

    public String processQueryString(String str) {
        return StringUtils.join(removeStopWords(str), ' ');
    }

    public String parseBoostQueryString(String str, double d) {
        return StringUtils.join((Iterable) removeStopWords(str).stream().map(str2 -> {
            return str2 + CARET_CHARACTER + d;
        }).collect(Collectors.toSet()), ' ');
    }

    public String escapeCharsExcludingCaretChar(String str) {
        return QueryParser.escape(str).replace(ESCAPED_CARET_CHARACTER, CARET_CHARACTER);
    }

    public Set<String> removeStopWords(String str) {
        return (Set) Arrays.stream(str.split(ILLEGAL_CHARS_REGEX)).map((v0) -> {
            return v0.toLowerCase();
        }).filter(str2 -> {
            return !NGramDistanceAlgorithm.STOPWORDSLIST.contains(str2) && StringUtils.isNotEmpty(str2);
        }).collect(Collectors.toSet());
    }

    private Double getBestInverseDocumentFrequency(List<String> list) {
        Optional<String> findFirst = list.stream().sorted(new Comparator<String>() { // from class: org.molgenis.data.semanticsearch.service.impl.SemanticSearchServiceHelper.1
            @Override // java.util.Comparator
            public int compare(String str, String str2) {
                return Integer.compare(str.length(), str2.length());
            }
        }).findFirst();
        if (findFirst.isPresent()) {
            return this.termFrequencyService.getTermFrequency(findFirst.get());
        }
        return null;
    }
}
