package org.chrisjr.topic_annotator.corpora;

import cc.mallet.pipe.Pipe;
import cc.mallet.pipe.SerialPipes;
import cc.mallet.pipe.TargetStringToFeatures;
import cc.mallet.pipe.TokenSequence2FeatureSequence;
import cc.mallet.types.InstanceList;
import java.io.File;
import java.io.PrintWriter;
import scala.Predef$;
import scala.collection.GenSeq$;
import scala.collection.Map;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.TraversableOnce;
import scala.collection.generic.GenericTraversableTemplate;
import scala.collection.immutable.Iterable$;
import scala.collection.mutable.StringBuilder;
import scala.runtime.RichInt$;

/* compiled from: CorpusConversions.scala */
/* loaded from: input_file:org/chrisjr/topic_annotator/corpora/CorpusConversions$.class */
public final class CorpusConversions$ {
    public static final CorpusConversions$ MODULE$ = null;

    static {
        new CorpusConversions$();
    }

    public void toVocab(Corpus corpus, String str) {
        CorpusScorer corpusScorer = new CorpusScorer(corpus, CorpusScorer$.MODULE$.$lessinit$greater$default$2());
        Map map = ((TraversableOnce) corpusScorer.vocab().toSeq().map(new CorpusConversions$$anonfun$1(), Seq$.MODULE$.canBuildFrom())).toMap(Predef$.MODULE$.conforms());
        Map mapValues = corpusScorer.tfOverall().mapValues(new CorpusConversions$$anonfun$2());
        Map mapValues2 = corpusScorer.tfMaxima().mapValues(new CorpusConversions$$anonfun$3());
        Map mapValues3 = corpusScorer.df().mapValues(new CorpusConversions$$anonfun$4());
        Map map2 = ((TraversableOnce) corpusScorer.tfidf().map(new CorpusConversions$$anonfun$5(corpusScorer), Iterable$.MODULE$.canBuildFrom())).toMap(Predef$.MODULE$.conforms());
        Map map3 = ((TraversableOnce) corpusScorer.logent().map(new CorpusConversions$$anonfun$6(corpusScorer), Seq$.MODULE$.canBuildFrom())).toMap(Predef$.MODULE$.conforms());
        PrintWriter printWriter = new PrintWriter(str, "UTF-8");
        Seq apply = Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new String[]{"word", "tfOverall", "tfMaxima", "df", "tfidf", "logent"}));
        Seq apply2 = Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Map[]{map, mapValues, mapValues2, mapValues3, map2, map3}));
        printWriter.println(apply.mkString("\t"));
        RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), map.size()).foreach$mVc$sp(new CorpusConversions$$anonfun$toVocab$1("\t", printWriter, apply2));
        printWriter.close();
    }

    public File toLDAC(Corpus corpus, File file) {
        File mkFile$1 = mkFile$1(file, ".vocab");
        File mkFile$12 = mkFile$1(file, ".ldac");
        File mkFile$13 = mkFile$1(file, ".dmap");
        CorpusScorer corpusScorer = new CorpusScorer(corpus, CorpusScorer$.MODULE$.$lessinit$greater$default$2());
        Seq<String> vocabArray = corpusScorer.vocabArray();
        Seq<scala.collection.mutable.Map<Object, Object>> tfs = corpusScorer.tfs();
        PrintWriter printWriter = new PrintWriter(mkFile$12);
        tfs.foreach(new CorpusConversions$$anonfun$toLDAC$1(printWriter));
        printWriter.close();
        PrintWriter printWriter2 = new PrintWriter(mkFile$1);
        vocabArray.foreach(new CorpusConversions$$anonfun$toLDAC$2(printWriter2));
        printWriter2.close();
        PrintWriter printWriter3 = new PrintWriter(mkFile$13);
        ((GenericTraversableTemplate) corpus.documents().map(new CorpusConversions$$anonfun$toLDAC$3(), GenSeq$.MODULE$.canBuildFrom())).foreach(new CorpusConversions$$anonfun$toLDAC$4(printWriter3));
        printWriter3.close();
        return mkFile$12;
    }

    public File toMalletInstances(Corpus corpus, File file) {
        InstanceList instanceList = new InstanceList(new TokenSequence2FeatureSequence());
        corpus.documents().foreach(new CorpusConversions$$anonfun$toMalletInstances$1(instanceList));
        Util$.MODULE$.pickle(file, instanceList, Predef$.MODULE$.conforms());
        return file;
    }

    public File toDmrInstances(Corpus corpus, File file) {
        InstanceList instanceList = new InstanceList(new SerialPipes(new Pipe[]{new TargetStringToFeatures(), new TokenSequence2FeatureSequence()}));
        corpus.documents().foreach(new CorpusConversions$$anonfun$toDmrInstances$1(instanceList));
        Util$.MODULE$.pickle(file, instanceList, Predef$.MODULE$.conforms());
        return file;
    }

    private final File mkFile$1(File file, String str) {
        return new File(file.getParentFile(), new StringBuilder().append(file.getName()).append(str).toString());
    }

    private CorpusConversions$() {
        MODULE$ = this;
    }
}
