package org.cleartk.timeml.tlink;

import java.io.File;
import java.io.IOException;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.util.FileUtils;
import org.apache.uima.util.Level;
import org.cleartk.classifier.jar.JarClassifierBuilder;
import org.cleartk.corpus.timeml.PlainTextTLINKGoldAnnotator;
import org.cleartk.corpus.timeml.TimeMLGoldAnnotator;
import org.cleartk.corpus.timeml.TreebankAligningAnnotator;
import org.cleartk.timeml.TimeMLViewName;
import org.cleartk.token.stem.snowball.DefaultSnowballStemmer;
import org.cleartk.util.cr.FilesCollectionReader;
import org.uimafit.factory.UimaContextFactory;
import org.uimafit.pipeline.SimplePipeline;

/* loaded from: input_file:org/cleartk/timeml/tlink/VerbClauseTemporalTrain.class */
public class VerbClauseTemporalTrain {
    private static void error(String str) throws Exception {
        UimaContextFactory.createUimaContext(new Object[0]).getLogger().log(Level.SEVERE, String.format("%s\nusage: VerbClauseTemporalMain timebank-dir treebank-dir", str));
        System.exit(1);
    }

    public static void main(String[] strArr) throws Exception {
        if (strArr.length != 2) {
            error("wrong number of arguments");
        } else if (!new File(strArr[0]).exists()) {
            error("TimeBank directory not found: " + strArr[0]);
        } else if (!new File(strArr[1]).exists()) {
            error("TreeBank directory not found: " + strArr[1]);
        }
        String str = strArr[0];
        String str2 = strArr[1];
        File cleanedTimeBankDir = getCleanedTimeBankDir(str);
        SimplePipeline.runPipeline(FilesCollectionReader.getCollectionReaderWithPatterns(cleanedTimeBankDir.getPath(), TimeMLViewName.TIMEML, new String[]{"wsj_.*[.]tml"}), new AnalysisEngineDescription[]{TimeMLGoldAnnotator.getDescriptionNoTLINKs(), PlainTextTLINKGoldAnnotator.getDescription(), TreebankAligningAnnotator.getDescription(str2), DefaultSnowballStemmer.getDescription("English"), VerbClauseTemporalAnnotator.FACTORY.getWriterDescription()});
        FileUtils.deleteRecursive(cleanedTimeBankDir);
        File trainingDirectory = VerbClauseTemporalAnnotator.FACTORY.getTrainingDirectory();
        JarClassifierBuilder.trainAndPackage(trainingDirectory, new String[0]);
        for (File file : trainingDirectory.listFiles()) {
            File modelJarFile = JarClassifierBuilder.getModelJarFile(trainingDirectory);
            if (!file.isDirectory() && !file.equals(modelJarFile)) {
                file.delete();
            }
        }
    }

    public static File getCleanedTimeBankDir(String str) throws IOException {
        File createTempFile = File.createTempFile("TimeBank", "Cleaned");
        createTempFile.delete();
        createTempFile.mkdir();
        for (File file : new File(str).listFiles()) {
            String name = file.getName();
            if (!file.isHidden() && !name.startsWith(".")) {
                FileUtils.saveString2File(fixTextByFileName(name, FileUtils.file2String(file).replaceAll("\\bamp\\b", "&amp;").replaceAll("SampP", "S&amp;P").replaceAll("&&amp;;", "&amp;").replaceAll("---", "")), new File(createTempFile, file.getName()));
            }
        }
        return createTempFile;
    }

    public static String fixTextByFileName(String str, String str2) {
        if (str.equals("wsj_0032.tml")) {
            str2 = str2.replace("the <TIMEX3 tid=\"t18\"", "<TIMEX3 tid=\"t18\"");
        } else if (str.equals("wsj_0159.tml")) {
            str2 = str2.replace("Acquisition has <EVENT eid=\"e11\"", "DD Acquisition has <EVENT eid=\"e11\"").replace("Acquisition <EVENT eid=\"e20\"", "DD Acquisition <EVENT eid=\"e20\"");
        } else if (str.equals("wsj_0266.tml")) {
            str2 = str2.replace("BRUCE R. BENT", "");
        } else if (str.equals("wsj_0344.tml")) {
            str2 = str2.replace(" 30</TIMEX3>.", "</TIMEX3>");
        } else if (str.equals("wsj_0376.tml")) {
            str2 = str2.replace("roughly off", "off roughly");
        } else if (str.equals("wsj_0586.tml")) {
            str2 = str2.replaceAll("(?m)@((?!</HL>).)*?$", "");
        } else if (str.equals("wsj_0612.tml")) {
            str2 = str2.replace("@ <ENAMEX TYPE=\"ORGANIZATION\">CORPORATES", "<ENAMEX TYPE=\"ORGANIZATION\">").replace("@ <ENAMEX TYPE=\"ORGANIZATION\">EUROBONDS", "<ENAMEX TYPE=\"ORGANIZATION\">");
        } else if (str.equals("wsj_0667.tml")) {
            str2 = str2.replace("1988</TIMEX3>.", "</TIMEX3>");
        } else if (str.equals("wsj_0675.tml")) {
            str2 = str2.replace("Markets</ENAMEX>", "Markets</ENAMEX> --").replace("19.29</CARDINAL>.", "</CARDINAL>");
        } else if (str.equals("wsj_0781.tml")) {
            str2 = str2.replace("not definitely", "definitely not");
        } else if (str.equals("wsj_1003.tml")) {
            str2 = str2.replace("a shhha55 cents a share,   ents a share, but  ssa share", "a share").replace("steel business, <EVENT eid=\"e109\"", "Armco, hampered by lower volume in its specialty steel business, <EVENT eid=\"e109\"");
        }
        return str2;
    }
}
