package org.molgenis.semanticsearch.service.impl;

import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.collect.FluentIterable;
import com.google.common.collect.Lists;
import com.google.common.collect.Ordering;
import com.google.common.collect.Sets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.queryparser.classic.QueryParserBase;
import org.molgenis.data.DataService;
import org.molgenis.data.Entity;
import org.molgenis.data.Query;
import org.molgenis.data.QueryRule;
import org.molgenis.data.meta.model.Attribute;
import org.molgenis.data.meta.model.EntityType;
import org.molgenis.data.support.QueryImpl;
import org.molgenis.ontology.core.model.Ontology;
import org.molgenis.ontology.core.model.OntologyTerm;
import org.molgenis.ontology.core.service.OntologyService;
import org.molgenis.semanticsearch.explain.bean.AttributeSearchResults;
import org.molgenis.semanticsearch.explain.bean.EntityTypeSearchResults;
import org.molgenis.semanticsearch.explain.bean.ExplainedAttribute;
import org.molgenis.semanticsearch.explain.bean.ExplainedQueryString;
import org.molgenis.semanticsearch.explain.service.ElasticSearchExplainService;
import org.molgenis.semanticsearch.semantic.Hit;
import org.molgenis.semanticsearch.semantic.Hits;
import org.molgenis.semanticsearch.service.OntologyTagService;
import org.molgenis.semanticsearch.service.SemanticSearchService;
import org.molgenis.semanticsearch.string.NGramDistanceAlgorithm;
import org.molgenis.semanticsearch.string.Stemmer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/molgenis/semanticsearch/service/impl/SemanticSearchServiceImpl.class */
public class SemanticSearchServiceImpl implements SemanticSearchService {
    private static final Logger LOG = LoggerFactory.getLogger(SemanticSearchServiceImpl.class);
    private final DataService dataService;
    private final OntologyService ontologyService;
    private final SemanticSearchServiceHelper semanticSearchServiceHelper;
    private final ElasticSearchExplainService elasticSearchExplainService;
    private final OntologyTagService ontologyTagService;
    private static final int MAX_NUM_TAGS = 100;
    private static final float CUTOFF = 0.4f;
    private Splitter termSplitter = Splitter.onPattern("[^\\p{IsAlphabetic}]+");
    private Joiner termJoiner = Joiner.on(' ');
    private static final String UNIT_ONTOLOGY_IRI = "http://purl.obolibrary.org/obo/uo.owl";
    private static final int MAX_NUMBER_EXPLAINED_ATTRIBUTES = 10;

    public SemanticSearchServiceImpl(DataService dataService, OntologyService ontologyService, SemanticSearchServiceHelper semanticSearchServiceHelper, ElasticSearchExplainService elasticSearchExplainService, OntologyTagService ontologyTagService) {
        this.dataService = (DataService) Objects.requireNonNull(dataService);
        this.ontologyService = (OntologyService) Objects.requireNonNull(ontologyService);
        this.semanticSearchServiceHelper = (SemanticSearchServiceHelper) Objects.requireNonNull(semanticSearchServiceHelper);
        this.elasticSearchExplainService = (ElasticSearchExplainService) Objects.requireNonNull(elasticSearchExplainService);
        this.ontologyTagService = (OntologyTagService) Objects.requireNonNull(ontologyTagService);
    }

    public Hits<ExplainedAttribute> findAttributes(EntityType entityType, Set<String> set, Collection<OntologyTerm> collection) {
        List<String> attributeIdentifiers = this.semanticSearchServiceHelper.getAttributeIdentifiers(entityType);
        QueryRule createDisMaxQueryRuleForAttribute = this.semanticSearchServiceHelper.createDisMaxQueryRuleForAttribute(set, collection);
        ArrayList newArrayList = Lists.newArrayList(new QueryRule[]{new QueryRule("id", QueryRule.Operator.IN, attributeIdentifiers)});
        if (!createDisMaxQueryRuleForAttribute.getNestedRules().isEmpty()) {
            newArrayList.addAll(Arrays.asList(new QueryRule(QueryRule.Operator.AND), createDisMaxQueryRuleForAttribute));
        }
        Stream findAll = this.dataService.findAll("sys_md_Attribute", new QueryImpl(newArrayList));
        Map<String, String> collectExpandedQueryMap = this.semanticSearchServiceHelper.collectExpandedQueryMap(set, collection);
        ArrayList arrayList = new ArrayList();
        AtomicInteger atomicInteger = new AtomicInteger(0);
        findAll.forEach(entity -> {
            Set<ExplainedQueryString> emptySet;
            boolean z;
            Attribute attribute = entityType.getAttribute(entity.getString("name"));
            if (atomicInteger.get() < MAX_NUMBER_EXPLAINED_ATTRIBUTES) {
                emptySet = convertAttributeToExplainedAttribute(attribute, collectExpandedQueryMap, new QueryImpl(newArrayList));
                z = isSingleMatchHighQuality(set, Sets.newHashSet(collectExpandedQueryMap.values()), emptySet);
            } else {
                emptySet = Collections.emptySet();
                z = false;
            }
            arrayList.add(ExplainedAttribute.create(attribute, emptySet, z));
            atomicInteger.incrementAndGet();
        });
        return Hits.create((List) arrayList.stream().map(explainedAttribute -> {
            return Hit.create(explainedAttribute, 1.0f);
        }).collect(Collectors.toList()));
    }

    boolean isSingleMatchHighQuality(Collection<String> collection, Collection<String> collection2, Iterable<ExplainedQueryString> iterable) {
        HashMap hashMap = new HashMap();
        for (ExplainedQueryString explainedQueryString : iterable) {
            hashMap.put(explainedQueryString.getTagName().toLowerCase(), Double.valueOf(explainedQueryString.getScore()));
        }
        collection2.removeAll(collection);
        if (collection.isEmpty() || !collection.stream().anyMatch(str -> {
            return isGoodMatch(hashMap, str);
        })) {
            return !collection2.isEmpty() && collection2.stream().allMatch(str2 -> {
                return isGoodMatch(hashMap, str2);
            });
        }
        return true;
    }

    boolean isGoodMatch(Map<String, Double> map, String str) {
        String lowerCase = str.toLowerCase();
        return (map.containsKey(lowerCase) && map.get(lowerCase).intValue() == MAX_NUM_TAGS) || Sets.newHashSet(lowerCase.split(" ")).stream().allMatch(str2 -> {
            return map.containsKey(str2) && ((Double) map.get(str2)).intValue() == MAX_NUM_TAGS;
        });
    }

    @Override // org.molgenis.semanticsearch.service.SemanticSearchService
    public EntityTypeSearchResults findAttributes(EntityType entityType, EntityType entityType2, Set<String> set) {
        return EntityTypeSearchResults.create(entityType2, (List) StreamSupport.stream(entityType2.getAtomicAttributes().spliterator(), false).filter(attribute -> {
            return attribute.getExpression() == null;
        }).map(attribute2 -> {
            return findAttributes(entityType, entityType2, attribute2, set);
        }).collect(Collectors.toList()));
    }

    @Override // org.molgenis.semanticsearch.service.SemanticSearchService
    public AttributeSearchResults findAttributes(EntityType entityType, EntityType entityType2, Attribute attribute, Set<String> set) {
        Collection<OntologyTerm> values = this.ontologyTagService.getTagsForAttribute(entityType2, attribute).values();
        Set<String> createLexicalSearchQueryTerms = createLexicalSearchQueryTerms(attribute, set);
        Collection<OntologyTerm> collection = values;
        if (null != set && !set.isEmpty()) {
            collection = this.ontologyService.findExactOntologyTerms(this.ontologyService.getAllOntologiesIds(), (Set) set.stream().filter((v0) -> {
                return StringUtils.isNotBlank(v0);
            }).map(QueryParserBase::escape).collect(Collectors.toSet()), MAX_NUM_TAGS);
        } else if (null == collection || collection.isEmpty()) {
            List<String> allOntologiesIds = this.ontologyService.getAllOntologiesIds();
            Ontology ontology = this.ontologyService.getOntology(UNIT_ONTOLOGY_IRI);
            if (ontology != null) {
                allOntologiesIds.remove(ontology.getId());
            }
            Hit<OntologyTerm> findTags = findTags(attribute, allOntologiesIds);
            collection = findTags != null ? Arrays.asList(findTags.getResult()) : Collections.emptyList();
        }
        return AttributeSearchResults.create(attribute, findAttributes(entityType, createLexicalSearchQueryTerms, collection));
    }

    public Set<String> createLexicalSearchQueryTerms(Attribute attribute, Set<String> set) {
        HashSet hashSet = new HashSet();
        if (set != null && !set.isEmpty()) {
            hashSet.addAll(set);
        }
        if (hashSet.isEmpty()) {
            if (StringUtils.isNotBlank(attribute.getLabel())) {
                hashSet.add(attribute.getLabel());
            }
            if (StringUtils.isNotBlank(attribute.getDescription())) {
                hashSet.add(attribute.getDescription());
            }
        }
        return hashSet;
    }

    public Set<ExplainedQueryString> convertAttributeToExplainedAttribute(Attribute attribute, Map<String, String> map, Query<Entity> query) {
        return this.elasticSearchExplainService.findQueriesFromExplanation(map, this.elasticSearchExplainService.explain(query, this.dataService.getEntityType("sys_md_Attribute"), attribute.getIdentifier()));
    }

    @Override // org.molgenis.semanticsearch.service.SemanticSearchService
    public Hits<OntologyTerm> findOntologyTerms(Attribute attribute, Collection<Ontology> collection) {
        Hit<OntologyTerm> findTags = findTags(attribute, (List) collection.stream().map((v0) -> {
            return v0.getId();
        }).collect(Collectors.toList()));
        return findTags != null ? Hits.create(findTags) : Hits.create(new Hit[0]);
    }

    Hit<OntologyTerm> findTags(Attribute attribute, List<String> list) {
        Set<String> splitIntoTerms = splitIntoTerms(attribute.getDescription() == null ? attribute.getLabel() : attribute.getDescription());
        if (LOG.isDebugEnabled()) {
            LOG.debug("findAttributeOntologyTerms({},{},{})", new Object[]{list, splitIntoTerms, Integer.valueOf(MAX_NUM_TAGS)});
        }
        List findOntologyTerms = this.ontologyService.findOntologyTerms(list, splitIntoTerms, MAX_NUM_TAGS);
        if (LOG.isDebugEnabled()) {
            LOG.debug("Candidates: {}", findOntologyTerms);
        }
        List<Hit<OntologyTerm>> list2 = (List) findOntologyTerms.stream().filter(ontologyTerm -> {
            return filterOntologyTerm(splitIntoTerms(Stemmer.stemAndJoin(splitIntoTerms)), ontologyTerm);
        }).map(ontologyTerm2 -> {
            return Hit.create(ontologyTerm2, bestMatchingSynonym(ontologyTerm2, splitIntoTerms).getScore());
        }).sorted(Ordering.natural().reverse()).collect(Collectors.toList());
        if (LOG.isDebugEnabled()) {
            LOG.debug("Hits: {}", list2);
        }
        Hit<OntologyTerm> hit = null;
        String str = null;
        for (Hit<OntologyTerm> hit2 : list2) {
            String result = bestMatchingSynonym(hit2.getResult(), splitIntoTerms).getResult();
            if (hit == null) {
                hit = hit2;
                str = result;
            } else {
                Hit<OntologyTerm> create = Hit.create(OntologyTerm.and(new OntologyTerm[]{hit.getResult(), hit2.getResult()}), distanceFrom(this.termJoiner.join(Sets.union(splitIntoTerms(str), splitIntoTerms(result))), splitIntoTerms));
                if (create.compareTo(hit) > 0) {
                    hit = create;
                    str = str + " " + result;
                }
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug("result: {}", hit);
            }
        }
        if (hit == null || hit.getScore() < CUTOFF) {
            return null;
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug("Tag {} with {}", attribute, hit);
        }
        return hit;
    }

    private boolean filterOntologyTerm(Set<String> set, OntologyTerm ontologyTerm) {
        Iterator<String> it = this.semanticSearchServiceHelper.getOtLabelAndSynonyms(ontologyTerm).iterator();
        while (it.hasNext()) {
            Set<String> splitIntoTerms = splitIntoTerms(Stemmer.stemAndJoin(splitIntoTerms(it.next())));
            if (!splitIntoTerms.isEmpty() && set.containsAll(splitIntoTerms)) {
                return true;
            }
        }
        return false;
    }

    Hit<String> bestMatchingSynonym(OntologyTerm ontologyTerm, Set<String> set) {
        return (Hit) ontologyTerm.getSynonyms().stream().map(str -> {
            return Hit.create(str, distanceFrom(str, set));
        }).max(Comparator.naturalOrder()).orElseThrow(() -> {
            return new IllegalStateException("ontologyTerm.getSynonyms() shouldn't return an empty list");
        });
    }

    float distanceFrom(String str, Set<String> set) {
        String stemAndJoin = Stemmer.stemAndJoin(splitIntoTerms(str));
        String stemAndJoin2 = Stemmer.stemAndJoin(set);
        float stringMatching = ((float) NGramDistanceAlgorithm.stringMatching(stemAndJoin, stemAndJoin2)) / 100.0f;
        LOG.debug("Similarity between: {} and {} is {}", new Object[]{stemAndJoin, stemAndJoin2, Float.valueOf(stringMatching)});
        return stringMatching;
    }

    private Set<String> splitIntoTerms(String str) {
        return FluentIterable.from(this.termSplitter.split(str)).transform((v0) -> {
            return v0.toLowerCase();
        }).filter(str2 -> {
            return !NGramDistanceAlgorithm.STOPWORDSLIST.contains(str2);
        }).filter((v0) -> {
            return StringUtils.isNotEmpty(v0);
        }).toSet();
    }
}
