package de.versley.exml.pipe;

import de.versley.exml.annotators.Annotator;
import de.versley.exml.async.Consumer;
import de.versley.exml.async.Pipeline;
import de.versley.exml.config.GlobalConfig;
import de.versley.exml.importers.Importer;
import exml.io.DocumentWriter;
import exml.tueba.TuebaDocument;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;
import java.util.zip.GZIPOutputStream;
import javax.xml.stream.XMLStreamException;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.apache.tools.ant.util.FileUtils;

/* loaded from: input_file:de/versley/exml/pipe/TextToEXML.class */
public class TextToEXML {
    static Pattern SPECIAL_FILES = Pattern.compile("offsets(?:_[a-z]+)?\\.txt");
    static Options options = new Options();
    private static final String CONFIG_FNAME = "exmlpipe_config.yaml";

    public static TuebaDocument importFile(String str, GlobalConfig globalConfig) throws IOException {
        for (Importer importer : globalConfig.createImporters()) {
            if (importer.matchFilename(str) != null) {
                try {
                    return importer.importFile(str);
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
        return null;
    }

    public static void main(String[] strArr) {
        OutputStream outputStream;
        CommandLine commandLine = null;
        try {
            commandLine = new PosixParser().parse(options, strArr);
        } catch (ParseException e) {
            new HelpFormatter().printHelp("TextToEXML SourceFile [DestFile]", options);
            System.exit(1);
        }
        GlobalConfig globalConfig = getGlobalConfig(commandLine);
        Pipeline pipeline = new Pipeline();
        Iterator<Annotator> it = globalConfig.createAnnotators().iterator();
        while (it.hasNext()) {
            pipeline.addStage(it.next());
        }
        pipeline.loadModels();
        String str = (String) commandLine.getArgList().get(0);
        File file = new File(str);
        if (file.isDirectory()) {
            if (commandLine.getArgList().size() < 2) {
                System.err.println("If the first argument is a directory, you need to specify an output directory!");
                System.exit(1);
            }
            annotateDirectory(globalConfig, pipeline, file, new File((String) commandLine.getArgList().get(1)), commandLine.hasOption("noclobber"), commandLine.hasOption("gz"));
        } else {
            try {
                TuebaDocument importFile = importFile(str, globalConfig);
                if (importFile == null) {
                    System.err.println("No importer found for " + str);
                    System.exit(1);
                } else {
                    System.err.format("Input %s with %d tokens\n", str, Integer.valueOf(importFile.size()));
                }
                if (commandLine.getArgList().size() > 1) {
                    String str2 = (String) commandLine.getArgList().get(1);
                    OutputStream fileOutputStream = new FileOutputStream(str2);
                    outputStream = str2.endsWith(".gz") ? new GZIPOutputStream(fileOutputStream) : fileOutputStream;
                } else {
                    outputStream = System.out;
                    System.err.println("writing to stdout");
                }
                final OutputStream outputStream2 = outputStream;
                pipeline.process(importFile, new Consumer<TuebaDocument>() { // from class: de.versley.exml.pipe.TextToEXML.1
                    @Override // de.versley.exml.async.Consumer
                    public void consume(TuebaDocument tuebaDocument) {
                        try {
                            DocumentWriter.writeDocument(tuebaDocument, outputStream2);
                        } catch (XMLStreamException e2) {
                            e2.printStackTrace();
                        }
                    }
                });
            } catch (Exception e2) {
                e2.printStackTrace();
                System.exit(1);
            }
        }
        pipeline.close();
    }

    public static void annotateDirectory(GlobalConfig globalConfig, Pipeline<TuebaDocument> pipeline, File file, File file2, boolean z) {
        annotateDirectory(globalConfig, pipeline, file, file2, z, false);
    }

    public static void annotateDirectory(GlobalConfig globalConfig, Pipeline<TuebaDocument> pipeline, File file, File file2, boolean z, boolean z2) {
        List<Importer> createImporters = globalConfig.createImporters();
        for (File file3 : file.listFiles()) {
            File file4 = new File(file2, file3.getName());
            TuebaDocument tuebaDocument = null;
            if (SPECIAL_FILES.matcher(file3.getName()).matches()) {
                System.err.println("Copying special file:" + file3.toString());
                try {
                    FileUtils.getFileUtils().copyFile(file3, file4);
                } catch (IOException e) {
                    e.printStackTrace();
                    System.exit(1);
                }
            } else {
                String str = null;
                for (Importer importer : createImporters) {
                    str = importer.matchFilename(file3.getName());
                    if (str != null) {
                        try {
                            tuebaDocument = importer.importFile(file3.getPath());
                            System.err.format("Input %s with %d tokens\n", str, Integer.valueOf(tuebaDocument.size()));
                            break;
                        } catch (IOException e2) {
                            e2.printStackTrace();
                        }
                    }
                }
                if (str == null || tuebaDocument == null) {
                    System.err.println("No importer for file: " + file3.getName());
                } else {
                    File file5 = new File(file2, str + ".exml.xml");
                    if (z && file5.exists()) {
                        try {
                            System.err.format("%s is a valid exml file with %d tokens, skipping", file5.getName(), Integer.valueOf(TuebaDocument.loadDocument(file5.getPath()).size()));
                        } catch (IOException e3) {
                        }
                    } else {
                        if (z && new File(file5.getAbsolutePath() + ".gz").exists()) {
                            System.err.format("%s is a compressed exml file, skipping", file5.getName() + ".gz");
                        }
                        File file6 = z2 ? new File(file5.getAbsolutePath() + ".gz") : file5;
                        System.err.println("Processing: " + file5.getName());
                        try {
                            File file7 = file6;
                            pipeline.process(tuebaDocument, tuebaDocument2 -> {
                                try {
                                    OutputStream fileOutputStream = new FileOutputStream(file7);
                                    if (z2) {
                                        fileOutputStream = new GZIPOutputStream(fileOutputStream);
                                    }
                                    DocumentWriter.writeDocument(tuebaDocument2, fileOutputStream);
                                    fileOutputStream.close();
                                } catch (Exception e4) {
                                    e4.printStackTrace();
                                }
                            });
                        } catch (Exception e4) {
                            e4.printStackTrace();
                        }
                    }
                }
            }
        }
    }

    private static GlobalConfig getGlobalConfig(CommandLine commandLine) {
        GlobalConfig load = new File(CONFIG_FNAME).exists() ? GlobalConfig.load(CONFIG_FNAME) : GlobalConfig.fromDefaults();
        if (commandLine.hasOption("lang")) {
            load.language = commandLine.getOptionValue("lang");
        }
        if (commandLine.hasOption("pipeline")) {
            load.default_pipeline = commandLine.getOptionValue("pipeline");
        }
        if (!new File(CONFIG_FNAME).exists()) {
            load.saveAs(CONFIG_FNAME);
        }
        if (commandLine.getArgList().size() < 1) {
            System.err.println("Not enough arguments.");
            new HelpFormatter().printHelp("TextToEXML [-noclobber] [-pipeline pipeline] SourceDir DestDir", options);
            System.exit(1);
        }
        return load;
    }

    static {
        options.addOption("lang", true, "language (default:de)");
        options.addOption("pipeline", true, "pipeline (default: mate)");
        options.addOption("noclobber", false, "don't overwrite existing target files (default:no)");
        options.addOption("gz", false, "write gzip-compressed output");
    }
}
