package fr.erias.iamsystem_java.speed;

import fr.erias.iamsystem_java.annotation.IAnnotation;
import fr.erias.iamsystem_java.keywords.Entity;
import fr.erias.iamsystem_java.keywords.IKeyword;
import fr.erias.iamsystem_java.keywords.Terminology;
import fr.erias.iamsystem_java.matcher.Matcher;
import fr.erias.iamsystem_java.matcher.MatcherBuilder;
import fr.erias.iamsystem_java.matcher.strategy.EMatchingStrategy;
import fr.erias.iamsystem_java.tokenize.ETokenizer;
import fr.erias.iamsystem_java.tokenize.TokenizerFactory;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.codec.EncoderException;

/* loaded from: input_file:fr/erias/iamsystem_java/speed/Main.class */
public class Main {
    private static final String FOLDER = "/media/cossin/5980c25d-cf59-4fca-b649-c8c2f241fb1c/home/cossin15072019/Documents/DetectTerms/Detector/IAMsystemTerminos/src/main/resources/UMLS/";
    private static final String filename = "/media/cossin/5980c25d-cf59-4fca-b649-c8c2f241fb1c/home/cossin15072019/Documents/DetectTerms/Detector/IAMsystemTerminos/src/main/resources/UMLS/full_umls.tsv";
    private Matcher matcher = new MatcherBuilder().tokenizer(TokenizerFactory.getTokenizer(ETokenizer.FRENCH)).keywords(getTerminology(filename)).removeNestedAnnot(true).strategy(EMatchingStrategy.WindowStrategy).w(3).build();

    public static Iterable<IKeyword> getTerminology(String str) throws IOException {
        return getUMLS(new FileInputStream(new File(str)), "\t", 1, 0, true);
    }

    public static Iterable<IKeyword> getUMLS(InputStream inputStream, String str, int i, int i2, boolean z) throws IOException {
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"));
        if (z) {
            bufferedReader.readLine();
        }
        Terminology terminology = new Terminology();
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                return terminology;
            }
            String[] split = readLine.split(str);
            terminology.addKeyword(new Entity(removeQuotes(split[i]), removeQuotes(split[i2])));
        }
    }

    public static void main(String[] strArr) throws IOException, EncoderException {
        new Main().speedTest();
    }

    private static String removeQuotes(String str) {
        return str.replaceAll("^\"|\"$", "");
    }

    public void speedTest() throws IOException {
        Iterator<File> fileIterator = new TxtFiles(new File("/media/cossin/5980c25d-cf59-4fca-b649-c8c2f241fb1c/workspace/DBpedia/wikipedia_articles")).getFileIterator();
        int i = 0;
        PrintWriter printWriter = new PrintWriter(new File("/home/cossin/workspace/iamsystem_python/examples/wikipedia/java_window.txt"));
        long nanoTime = System.nanoTime();
        System.out.println(nanoTime);
        while (fileIterator.hasNext()) {
            File next = fileIterator.next();
            List<IAnnotation> annot = this.matcher.annot(Files.readString(next.toPath(), Charset.defaultCharset()));
            Iterator<IAnnotation> it = annot.iterator();
            while (it.hasNext()) {
                printWriter.write(next.getName() + "\t" + it.next().toString());
                printWriter.write("\n");
            }
            System.out.println(next.getName() + "\t" + annot.size());
            i += annot.size();
        }
        printWriter.close();
        System.out.println("Number of annotations: " + i);
        long nanoTime2 = System.nanoTime();
        System.out.println(nanoTime2);
        System.out.println("KB: " + ((Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory()) / 1048576.0d));
        System.out.println("Time: " + ((nanoTime2 - nanoTime) / 1000000));
    }
}
