package org.clulab.processors.bionlp.ner;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.List;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import org.clulab.processors.bionlp.BioNLPProcessor;
import org.clulab.processors.bionlp.BioNLPProcessor$;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Array$;
import scala.Predef$;
import scala.StringContext;
import scala.collection.JavaConverters$;
import scala.collection.Seq;
import scala.collection.SeqLike;
import scala.collection.TraversableOnce;
import scala.collection.mutable.ArrayBuffer;
import scala.collection.mutable.ListBuffer;
import scala.collection.mutable.StringBuilder;
import scala.io.Codec$;
import scala.io.Source$;
import scala.math.Ordering$String$;
import scala.reflect.ClassTag$;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;

/* compiled from: KBGenerator.scala */
/* loaded from: input_file:org/clulab/processors/bionlp/ner/KBGenerator$.class */
public final class KBGenerator$ {
    public static final KBGenerator$ MODULE$ = null;
    private final Logger logger;
    private BioNLPProcessor processor;
    private volatile boolean bitmap$0;

    static {
        new KBGenerator$();
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v0 */
    /* JADX WARN: Type inference failed for: r0v1, types: [java.lang.Throwable] */
    /* JADX WARN: Type inference failed for: r0v5 */
    private BioNLPProcessor processor$lzycompute() {
        ?? r0 = this;
        synchronized (r0) {
            if (!this.bitmap$0) {
                this.processor = new BioNLPProcessor(BioNLPProcessor$.MODULE$.$lessinit$greater$default$1(), false, false, false, false, BioNLPProcessor$.MODULE$.$lessinit$greater$default$6(), BioNLPProcessor$.MODULE$.$lessinit$greater$default$7());
                this.bitmap$0 = true;
            }
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
            r0 = r0;
            return this.processor;
        }
    }

    public Logger logger() {
        return this.logger;
    }

    public BioNLPProcessor processor() {
        return this.bitmap$0 ? this.processor : processor$lzycompute();
    }

    public void main(String[] strArr) {
        String str = strArr[0];
        String str2 = strArr[1];
        String str3 = strArr[2];
        Seq<KBEntry> loadConfig = loadConfig(str);
        logger().info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Will convert a total of ", " KBs:"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToInteger(loadConfig.size())})));
        loadConfig.foreach(new KBGenerator$$anonfun$main$1(str3));
        loadConfig.foreach(new KBGenerator$$anonfun$main$2(str2, str3));
    }

    public Seq<KBEntry> loadConfig(String str) {
        ListBuffer listBuffer = new ListBuffer();
        Source$.MODULE$.fromFile(str, Codec$.MODULE$.fallbackSystemCodec()).getLines().foreach(new KBGenerator$$anonfun$loadConfig$1(listBuffer));
        return listBuffer.toList();
    }

    public String mkOutputFile(KBEntry kBEntry, String str) {
        return new StringBuilder().append(str).append(File.separator).append(kBEntry.neLabel()).append(".tsv.gz").toString();
    }

    public void convertKB(KBEntry kBEntry, String str, String str2) {
        logger().info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Converting ", "..."})).s(Predef$.MODULE$.genericWrapArray(new Object[]{kBEntry.kbName()})));
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(new StringBuilder().append(str).append(File.separator).append(kBEntry.kbName()).append(".tsv.gz").toString()))));
        boolean z = false;
        int i = 0;
        ArrayBuffer arrayBuffer = new ArrayBuffer();
        while (!z) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                z = true;
                BoxedUnit boxedUnit = BoxedUnit.UNIT;
            } else {
                String trim = readLine.trim();
                if (trim.isEmpty() || trim.startsWith("#")) {
                    BoxedUnit boxedUnit2 = BoxedUnit.UNIT;
                } else {
                    String[] split = readLine.split("\t");
                    if (containsValidSpecies(kBEntry, split)) {
                        i++;
                        arrayBuffer.$plus$eq(Predef$.MODULE$.refArrayOps(tokenizeResourceLine(split[0])).mkString(" "));
                    } else {
                        BoxedUnit boxedUnit3 = BoxedUnit.UNIT;
                    }
                }
            }
        }
        bufferedReader.close();
        boolean z2 = !new File(mkOutputFile(kBEntry, str2)).exists();
        PrintWriter printWriter = new PrintWriter(new GZIPOutputStream(new FileOutputStream(mkOutputFile(kBEntry, str2), true)));
        if (z2) {
            printWriter.println(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"# Created by ", " on ", "."})).s(Predef$.MODULE$.genericWrapArray(new Object[]{getClass().getName(), now()})));
        }
        Seq seq = (Seq) ((SeqLike) ((SeqLike) arrayBuffer.toSeq().filter(new KBGenerator$$anonfun$1())).sorted(Ordering$String$.MODULE$)).distinct();
        printWriter.print(seq.mkString("\n"));
        printWriter.println();
        printWriter.close();
        logger().info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Done. Read ", " lines (", " distinct) from ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToInteger(i), BoxesRunTime.boxToInteger(seq.size()), kBEntry.kbName()})));
    }

    public String now() {
        return new SimpleDateFormat("yyyy/MM/dd HH:mm:ss").format(new Date());
    }

    public String[] tokenizeResourceLine(String str) {
        Annotation annotation = new Annotation(processor().preprocessText(str));
        processor().tokenizerWithoutSentenceSplitting().annotate(annotation);
        return (String[]) Predef$.MODULE$.refArrayOps(processor().postprocessTokens((CoreLabel[]) ((TraversableOnce) JavaConverters$.MODULE$.asScalaBufferConverter((List) annotation.get(CoreAnnotations.TokensAnnotation.class)).asScala()).toArray(ClassTag$.MODULE$.apply(CoreLabel.class)))).map(new KBGenerator$$anonfun$tokenizeResourceLine$1(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class)));
    }

    public boolean containsValidSpecies(KBEntry kBEntry, String[] strArr) {
        return kBEntry.validSpecies().isEmpty() || kBEntry.validSpecies().contains(strArr[1]);
    }

    private KBGenerator$() {
        MODULE$ = this;
        this.logger = LoggerFactory.getLogger(BioNLPProcessor.class);
    }
}
