package ai.idylnlp.pipeline;

import ai.idylnlp.model.entity.Entity;
import ai.idylnlp.model.exceptions.EntityFinderException;
import ai.idylnlp.model.exceptions.ModelLoaderException;
import ai.idylnlp.model.manifest.ModelManifestUtils;
import ai.idylnlp.model.manifest.StandardModelManifest;
import ai.idylnlp.model.nlp.DuplicateEntityStrategy;
import ai.idylnlp.model.nlp.EntityComparator;
import ai.idylnlp.model.nlp.EntityOrder;
import ai.idylnlp.model.nlp.EntitySanitizer;
import ai.idylnlp.model.nlp.SentenceDetector;
import ai.idylnlp.model.nlp.Tokenizer;
import ai.idylnlp.model.nlp.ner.EntityExtractionRequest;
import ai.idylnlp.model.nlp.ner.EntityExtractionResponse;
import ai.idylnlp.model.nlp.ner.EntityRecognizer;
import ai.idylnlp.model.nlp.pipeline.Pipeline;
import ai.idylnlp.model.stats.StatsReporter;
import ai.idylnlp.nlp.recognizer.OpenNLPEntityRecognizer;
import ai.idylnlp.nlp.recognizer.configuration.OpenNLPEntityRecognizerConfiguration;
import ai.idylnlp.nlp.sentence.BreakIteratorSentenceDetector;
import ai.idylnlp.nlp.tokenizers.BreakIteratorTokenizer;
import ai.idylnlp.opennlp.custom.modelloader.LocalModelLoader;
import ai.idylnlp.opennlp.custom.validators.TrueModelValidator;
import ai.idylnlp.zoo.IdylNLPModelZoo;
import com.neovisionaries.i18n.LanguageCode;
import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

/* loaded from: input_file:ai/idylnlp/pipeline/NerPipeline.class */
public class NerPipeline implements Pipeline<EntityExtractionResponse> {
    private static final Logger LOGGER = LogManager.getLogger(NerPipeline.class);
    private SentenceDetector sentenceDetector;
    private Tokenizer tokenizer;
    private List<EntityRecognizer> entityRecognizers;
    private List<EntitySanitizer> entitySanitizers;
    private StatsReporter statsReporter;
    private DuplicateEntityStrategy duplicateEntityStrategy;
    private LanguageCode languageCode;
    private EntityOrder entityOrder;
    private IdylNLPModelZoo zoo;
    private Set<String> entityTypes;

    /* loaded from: input_file:ai/idylnlp/pipeline/NerPipeline$NerPipelineBuilder.class */
    public static class NerPipelineBuilder {
        private SentenceDetector sentenceDetector;
        private Tokenizer tokenizer;
        private List<EntityRecognizer> entityRecognizers;
        private List<EntitySanitizer> entitySanitizers;
        private StatsReporter statsReporter;
        private DuplicateEntityStrategy duplicateEntityStrategy = DuplicateEntityStrategy.USE_HIGHEST_CONFIDENCE;
        private EntityOrder entityOrder = EntityOrder.CONFIDENCE;
        private IdylNLPModelZoo zoo;
        private Set<String> entityTypes;

        public NerPipelineBuilder withSentenceDetector(SentenceDetector sentenceDetector) {
            this.sentenceDetector = sentenceDetector;
            return this;
        }

        public NerPipelineBuilder withTokenizer(Tokenizer tokenizer) {
            this.tokenizer = tokenizer;
            return this;
        }

        public NerPipelineBuilder withEntityRecognizers(List<EntityRecognizer> list) {
            this.entityRecognizers = list;
            return this;
        }

        public NerPipelineBuilder withEntitySanitizers(List<EntitySanitizer> list) {
            this.entitySanitizers = list;
            return this;
        }

        public NerPipelineBuilder withStatsReporter(StatsReporter statsReporter) {
            this.statsReporter = statsReporter;
            return this;
        }

        public NerPipelineBuilder withDuplicateEntityStrategy(DuplicateEntityStrategy duplicateEntityStrategy) {
            this.duplicateEntityStrategy = duplicateEntityStrategy;
            return this;
        }

        public NerPipelineBuilder withEntityOrder(EntityOrder entityOrder) {
            this.entityOrder = entityOrder;
            return this;
        }

        public NerPipelineBuilder withIdylNLPModelZoo(IdylNLPModelZoo idylNLPModelZoo) {
            this.zoo = idylNLPModelZoo;
            return this;
        }

        public NerPipelineBuilder withEntityTypes(Set<String> set) {
            this.entityTypes = set;
            return this;
        }

        public NerPipeline build(LanguageCode languageCode) {
            if (this.sentenceDetector == null) {
                this.sentenceDetector = new BreakIteratorSentenceDetector(languageCode);
            }
            if (this.tokenizer == null) {
                this.tokenizer = new BreakIteratorTokenizer(languageCode);
            }
            if (this.entityRecognizers == null) {
                String absolutePath = new File(NerPipeline.class.getResource("/models/" + languageCode.getAlpha3().toString().toLowerCase() + "/").getFile()).getAbsolutePath();
                NerPipeline.LOGGER.info("Using model directory {}", absolutePath);
                LocalModelLoader localModelLoader = new LocalModelLoader(new TrueModelValidator(), absolutePath);
                List<StandardModelManifest> modelManifests = ModelManifestUtils.getModelManifests(absolutePath);
                HashMap hashMap = new HashMap();
                for (StandardModelManifest standardModelManifest : modelManifests) {
                    HashSet hashSet = new HashSet();
                    hashSet.add(standardModelManifest);
                    HashMap hashMap2 = new HashMap();
                    hashMap2.put(languageCode, hashSet);
                    hashMap.put(standardModelManifest.getType(), hashMap2);
                }
                EntityRecognizer openNLPEntityRecognizer = new OpenNLPEntityRecognizer(new OpenNLPEntityRecognizerConfiguration.Builder().withEntityModelLoader(localModelLoader).withEntityModels(hashMap).build());
                this.entityRecognizers = new ArrayList();
                this.entityRecognizers.add(openNLPEntityRecognizer);
            }
            if (this.entitySanitizers == null) {
                this.entitySanitizers = new ArrayList();
            }
            if (this.entityTypes == null) {
                this.entityTypes = new HashSet();
            }
            return new NerPipeline(this.sentenceDetector, this.tokenizer, this.entityRecognizers, this.entitySanitizers, this.statsReporter, this.duplicateEntityStrategy, languageCode, this.entityOrder, this.zoo, this.entityTypes);
        }
    }

    private NerPipeline(SentenceDetector sentenceDetector, Tokenizer tokenizer, List<EntityRecognizer> list, List<EntitySanitizer> list2, StatsReporter statsReporter, DuplicateEntityStrategy duplicateEntityStrategy, LanguageCode languageCode, EntityOrder entityOrder, IdylNLPModelZoo idylNLPModelZoo, Set<String> set) {
        this.sentenceDetector = sentenceDetector;
        this.tokenizer = tokenizer;
        this.entityRecognizers = list;
        this.entitySanitizers = list2;
        this.statsReporter = statsReporter;
        this.duplicateEntityStrategy = duplicateEntityStrategy;
        this.languageCode = languageCode;
        this.entityOrder = entityOrder;
        this.zoo = idylNLPModelZoo;
        this.entityTypes = set;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* renamed from: run, reason: merged with bridge method [inline-methods] */
    public EntityExtractionResponse m1run(String str) {
        Set hashSet = new HashSet();
        boolean z = true;
        long j = 0;
        try {
            for (String str2 : this.sentenceDetector.sentDetect(str)) {
                String[] strArr = this.tokenizer.tokenize(str2);
                for (EntityRecognizer entityRecognizer : this.entityRecognizers) {
                    LOGGER.debug("Processing tokenized text with entity recognizer {}.", entityRecognizer.toString());
                    EntityExtractionRequest entityExtractionRequest = new EntityExtractionRequest(strArr);
                    entityExtractionRequest.setDuplicateEntityStrategy(this.duplicateEntityStrategy);
                    EntityExtractionResponse extractEntities = entityRecognizer.extractEntities(entityExtractionRequest);
                    hashSet.addAll(extractEntities.getEntities());
                    j += extractEntities.getExtractionTime();
                }
                if (this.statsReporter != null) {
                    this.statsReporter.increment("extraction.requests", hashSet.size());
                }
                Iterator<EntitySanitizer> it = this.entitySanitizers.iterator();
                while (it.hasNext()) {
                    hashSet = it.next().sanitizeEntities(hashSet);
                }
                if (this.duplicateEntityStrategy == DuplicateEntityStrategy.USE_HIGHEST_CONFIDENCE) {
                    hashSet = removeDuplicateEntities(hashSet);
                }
                hashSet = EntityComparator.sort(hashSet, this.entityOrder);
            }
        } catch (ModelLoaderException | EntityFinderException e) {
            LOGGER.error("Unable to process through the Idyl pipeline.", e);
            hashSet = null;
            z = false;
        }
        return new EntityExtractionResponse(hashSet, j, z);
    }

    public static Set<Entity> removeDuplicateEntities(Set<Entity> set) {
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        for (Entity entity : set) {
            HashSet<Entity> hashSet = new HashSet();
            for (Entity entity2 : set) {
                if (entity.getText().equalsIgnoreCase(entity2.getText())) {
                    hashSet.add(entity2);
                }
            }
            if (hashSet.size() == 1) {
                linkedHashSet.addAll(hashSet);
            } else {
                double d = 0.0d;
                Entity entity3 = null;
                for (Entity entity4 : hashSet) {
                    if (entity4.getConfidence() > d) {
                        d = entity4.getConfidence();
                        entity3 = entity4;
                    }
                }
                linkedHashSet.add(entity3);
            }
        }
        return linkedHashSet;
    }

    public SentenceDetector getSentenceDetector() {
        return this.sentenceDetector;
    }

    public Tokenizer getTokenizer() {
        return this.tokenizer;
    }

    public List<EntityRecognizer> getEntityRecognizers() {
        return this.entityRecognizers;
    }

    public List<EntitySanitizer> getEntitySanitiziers() {
        return this.entitySanitizers;
    }

    public StatsReporter getStatsReporter() {
        return this.statsReporter;
    }

    public DuplicateEntityStrategy getDuplicateEntityStrategy() {
        return this.duplicateEntityStrategy;
    }

    public LanguageCode getLanguageCode() {
        return this.languageCode;
    }

    public EntityOrder getEntityOrder() {
        return this.entityOrder;
    }

    public IdylNLPModelZoo getZoo() {
        return this.zoo;
    }

    public Set<String> getEntityTypes() {
        return this.entityTypes;
    }
}
