package eu.monnetproject.translation.sources.iate;

import eu.monnetproject.config.Configurator;
import eu.monnetproject.framework.services.Services;
import eu.monnetproject.label.LabelExtractorFactory;
import eu.monnetproject.lang.Language;
import eu.monnetproject.lang.LanguageCodeFormatException;
import eu.monnetproject.lang.Script;
import eu.monnetproject.lemon.model.LexicalEntry;
import eu.monnetproject.lemon.model.LexicalForm;
import eu.monnetproject.lemon.model.LexicalSense;
import eu.monnetproject.lemon.model.Lexicon;
import eu.monnetproject.lemon.model.Text;
import eu.monnetproject.ontology.AnnotationProperty;
import eu.monnetproject.ontology.Class;
import eu.monnetproject.ontology.DatatypeProperty;
import eu.monnetproject.ontology.Entity;
import eu.monnetproject.ontology.Individual;
import eu.monnetproject.ontology.ObjectProperty;
import eu.monnetproject.ontology.Ontology;
import eu.monnetproject.ontology.OntologySerializer;
import eu.monnetproject.translation.Chunk;
import eu.monnetproject.translation.Label;
import eu.monnetproject.translation.Tokenizer;
import eu.monnetproject.translation.TokenizerFactory;
import eu.monnetproject.translation.TranslationPhraseChunker;
import eu.monnetproject.translation.TranslationPhraseChunkerFactory;
import eu.monnetproject.translation.monitor.Messages;
import eu.monnetproject.translation.util.CLSim;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Properties;

/* loaded from: input_file:eu/monnetproject/translation/sources/iate/IATEIndexer.class */
public class IATEIndexer {
    private OntologySerializer ontoSerializer;
    private LabelExtractorFactory lef;
    private Language sourceLanguage;
    private Language targetLanguage;
    private TokenizerFactory tokenizerFactory;
    private Iterable<TranslationPhraseChunkerFactory> chunkerFactories;
    private IATESourceWithCache translationSource = null;
    private List<URI> scopes = new ArrayList();
    private String[] scopeStrs = new String[0];

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:eu/monnetproject/translation/sources/iate/IATEIndexer$PreparedOntology.class */
    public static class PreparedOntology {
        public final Collection<Lexicon> sourceLexica;
        public final Lexicon sourceLexicon;
        public final Lexicon targetLexicon;
        public final Lexicon referenceLexicon;
        public final Ontology ontology;
        public final String fileName;

        public PreparedOntology(Collection<Lexicon> collection, Lexicon lexicon, Lexicon lexicon2, Lexicon lexicon3, Ontology ontology, String str) {
            this.sourceLexica = collection;
            this.sourceLexicon = lexicon;
            this.targetLexicon = lexicon2;
            this.referenceLexicon = lexicon3;
            this.ontology = ontology;
            this.fileName = str;
        }
    }

    public IATEIndexer(OntologySerializer ontologySerializer, LabelExtractorFactory labelExtractorFactory, TokenizerFactory tokenizerFactory, Iterable<TranslationPhraseChunkerFactory> iterable) {
        this.ontoSerializer = ontologySerializer;
        this.lef = labelExtractorFactory;
        this.tokenizerFactory = tokenizerFactory;
        this.chunkerFactories = iterable;
    }

    public static void main(String[] strArr) throws IOException {
        CLSim cLSim = (CLSim) Services.get(CLSim.class);
        Properties config = Configurator.getConfig("eu.monnetproject.translation.sources.iate.indexer");
        File file = new File(config.getProperty("ontologiesFolder"));
        IATEIndexer iATEIndexer = new IATEIndexer((OntologySerializer) Services.get(OntologySerializer.class), (LabelExtractorFactory) Services.get(LabelExtractorFactory.class), (TokenizerFactory) Services.get(TokenizerFactory.class), Services.getAll(TranslationPhraseChunkerFactory.class));
        iATEIndexer.sourceLanguage = Language.getByIso639_1(config.getProperty("sourceLanguage").trim());
        iATEIndexer.targetLanguage = Language.getByIso639_1(config.getProperty("targetLanguage").trim());
        Boolean valueOf = Boolean.valueOf(Boolean.parseBoolean(config.getProperty("use")));
        for (String str : iATEIndexer.scopeStrs) {
            URI create = URI.create(str);
            if (create == null) {
                throw new IllegalArgumentException(str + " is not a valid URI");
            }
            iATEIndexer.scopes.add(create);
        }
        if (valueOf.booleanValue()) {
            iATEIndexer.translationSource = new IATESourceWithCache(iATEIndexer.sourceLanguage, iATEIndexer.targetLanguage, config);
            boolean z = false;
            for (File file2 : file.listFiles()) {
                PreparedOntology prepareOntologyFile = iATEIndexer.prepareOntologyFile(file2);
                if (prepareOntologyFile != null) {
                    iATEIndexer.doIndexing(prepareOntologyFile.ontology, Collections.singletonList(prepareOntologyFile.sourceLexicon), iATEIndexer.scopes, cLSim);
                    try {
                        iATEIndexer.ontoSerializer = (OntologySerializer) Services.get(OntologySerializer.class);
                        iATEIndexer.lef = (LabelExtractorFactory) Services.get(LabelExtractorFactory.class);
                        iATEIndexer.tokenizerFactory = (TokenizerFactory) Services.get(TokenizerFactory.class);
                        iATEIndexer.chunkerFactories = Services.getAll(TranslationPhraseChunkerFactory.class);
                    } catch (Exception e) {
                        z = true;
                        iATEIndexer.translationSource.close();
                        e.printStackTrace();
                    }
                }
            }
            if (!z) {
                iATEIndexer.translationSource.close();
            }
            cLSim.close();
        }
    }

    private List<TranslationPhraseChunker> getChunkers(Iterable<TranslationPhraseChunkerFactory> iterable, Ontology ontology, Language language) {
        LinkedList linkedList = new LinkedList();
        Iterator<TranslationPhraseChunkerFactory> it = iterable.iterator();
        while (it.hasNext()) {
            TranslationPhraseChunker phraseChunker = it.next().getPhraseChunker(ontology, language);
            if (phraseChunker != null) {
                linkedList.add(phraseChunker);
            }
        }
        return linkedList;
    }

    protected void doIndexing(Ontology ontology, Collection<Lexicon> collection, Collection<URI> collection2, CLSim cLSim) {
        for (Lexicon lexicon : collection) {
            Language language = Language.get(lexicon.getLanguage());
            Script[] knownScriptsForLanguage = Script.getKnownScriptsForLanguage(language);
            Tokenizer tokenizer = this.tokenizerFactory.getTokenizer((knownScriptsForLanguage == null || knownScriptsForLanguage.length <= 0) ? Script.LATIN : knownScriptsForLanguage[0]);
            if (tokenizer == null) {
                Messages.warning("Skipping translations from " + language + " as no tokenizer available");
            } else {
                List<TranslationPhraseChunker> chunkers = getChunkers(this.chunkerFactories, ontology, language);
                if (chunkers.isEmpty()) {
                    Messages.warning("Skipping translations from " + language + " as no chunker available");
                } else {
                    int i = 0;
                    for (LexicalEntry lexicalEntry : lexicon.getEntrys()) {
                        int i2 = i;
                        i++;
                        System.out.println("Entry " + i2 + " out of " + lexicon.getEntrys().size());
                        if (lexicalEntry.getSenses() == null) {
                            Messages.translationFail(lexicalEntry.getURI(), "entry has no senses");
                        }
                        for (LexicalSense lexicalSense : lexicalEntry.getSenses()) {
                            if (lexicalSense.getReference() == null) {
                                Messages.warning("Sense with null reference for " + lexicalEntry.getURI());
                            } else if (getBestEntity(ontology.getEntities(lexicalSense.getReference()), collection2) == null) {
                                Messages.warning("Sense for " + lexicalEntry.getURI() + " (" + lexicalSense.getReference() + ") not found in ontology ");
                            } else {
                                Label label = getLabel(lexicalEntry, tokenizer);
                                eu.monnetproject.translation.sources.common.ChunkListImpl chunkListImpl = new eu.monnetproject.translation.sources.common.ChunkListImpl();
                                Iterator<TranslationPhraseChunker> it = chunkers.iterator();
                                while (it.hasNext()) {
                                    chunkListImpl.addAll(it.next().chunk(tokenizer.tokenize(label.asString())));
                                }
                                Iterator<Chunk> it2 = chunkListImpl.iterator();
                                while (it2.hasNext()) {
                                    this.translationSource.indexCandidates(it2.next(), cLSim);
                                }
                            }
                        }
                    }
                }
            }
        }
    }

    private Label getLabel(LexicalEntry lexicalEntry, Tokenizer tokenizer) {
        Text writtenRep;
        LexicalForm canonicalForm = lexicalEntry.getCanonicalForm();
        if (canonicalForm == null || (writtenRep = canonicalForm.getWrittenRep()) == null) {
            return null;
        }
        try {
            return new SimpleLabel(writtenRep.value, Language.get(writtenRep.language), tokenizer);
        } catch (LanguageCodeFormatException e) {
            return null;
        }
    }

    private Entity getBestEntity(Collection<Entity> collection, Collection<URI> collection2) {
        Entity entity = null;
        for (Entity entity2 : collection) {
            if (entity2.getURI() != null && (collection2 == null || collection2.isEmpty() || collection2.contains(entity2.getURI()))) {
                if (entity2 instanceof Class) {
                    return entity2;
                }
                if (entity2 instanceof ObjectProperty) {
                    entity = entity2;
                } else if ((entity2 instanceof DatatypeProperty) && (entity == null || !(entity instanceof ObjectProperty))) {
                    entity = entity2;
                } else if ((entity2 instanceof AnnotationProperty) && (entity == null || !((entity instanceof ObjectProperty) || (entity instanceof DatatypeProperty)))) {
                    entity = entity2;
                } else if ((entity2 instanceof Individual) && (entity == null || !((entity instanceof ObjectProperty) || (entity instanceof DatatypeProperty) || (entity instanceof AnnotationProperty)))) {
                    entity = entity2;
                } else if (entity == null) {
                    entity = entity2;
                }
            }
        }
        return entity;
    }

    protected PreparedOntology prepareOntologyFile(File file) throws IOException {
        if (!file.getName().endsWith("rdf") && !file.getName().endsWith("owl") && !file.getName().endsWith("ttl") && !file.getName().endsWith("xml") && !file.getName().endsWith("nt")) {
            Messages.warning("Skipping " + file.getName());
            return null;
        }
        Messages.info("Reading " + file);
        Ontology read = this.ontoSerializer.read(new FileReader(file), file.toURI());
        SimpleLexicalizer simpleLexicalizer = new SimpleLexicalizer(this.lef);
        Collection<Lexicon> lexicalize = simpleLexicalizer.lexicalize(read);
        Lexicon lexicon = null;
        Lexicon lexicon2 = null;
        for (Lexicon lexicon3 : lexicalize) {
            if (Language.get(lexicon3.getLanguage()).equals(this.sourceLanguage)) {
                lexicon = lexicon3;
            } else if (Language.get(lexicon3.getLanguage()).equals(this.targetLanguage)) {
                lexicon2 = lexicon3;
            }
        }
        if (lexicon == null || lexicon2 == null) {
            Messages.warning("No source lexicon created or no references available");
            return null;
        }
        Lexicon blankLexicon = simpleLexicalizer.getBlankLexicon(read, this.targetLanguage);
        Messages.info("Translating " + read.getEntities().size() + " entities");
        return new PreparedOntology(lexicalize, lexicon, blankLexicon, lexicon2, read, file.getName());
    }
}
