package org.codelibs.elasticsearch.vi.nlp.tokenizer.tools;

import java.io.File;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.codelibs.elasticsearch.vi.nlp.utils.FileIterator;
import org.codelibs.elasticsearch.vi.nlp.utils.TextFileFilter;
import org.codelibs.elasticsearch.vi.nlp.utils.UTF8FileUtility;

/* loaded from: input_file:org/codelibs/elasticsearch/vi/nlp/tokenizer/tools/TokenizedToRawConverter.class */
public class TokenizedToRawConverter {
    private static final Logger logger = LogManager.getLogger(TokenizedToRawConverter.class);

    private TokenizedToRawConverter() {
    }

    private static String postProcess(String str) {
        return str.replaceAll("\\s+", " ").replaceAll("\\s+\\.", "\\.").replaceAll("\\s+,", ",").replaceAll("\\s+\\!", "\\!").replaceAll("\\s+\\?", "\\?").replaceAll("\\s+:", ":").replaceAll("\\s+\\)", "\\)").replaceAll("\\(\\s+", "\\(").replaceAll("\\s+”", "”").replaceAll("“\\s+", "“");
    }

    private static String postProcessQuotation(String str) {
        StringBuffer stringBuffer = new StringBuffer(str.length());
        String[] split = str.split("\"");
        for (int i = 0; i < split.length; i++) {
            if (i % 2 == 0) {
                stringBuffer.append(split[i]);
            } else {
                stringBuffer.append("\"");
                stringBuffer.append(split[i].trim());
                if (i < split.length - 1) {
                    stringBuffer.append("\"");
                }
            }
        }
        return stringBuffer.toString();
    }

    public static void convertFile(String str, String str2) {
        String[] lines = UTF8FileUtility.getLines(str);
        UTF8FileUtility.createWriter(str2);
        for (String str3 : lines) {
            UTF8FileUtility.write(postProcessQuotation(postProcess(str3.replaceAll("_", " "))) + "\n");
        }
        UTF8FileUtility.closeWriter();
    }

    public static void convertDirectory(String str, String str2) {
        File[] listFiles = FileIterator.listFiles(new File(str), new TextFileFilter());
        for (File file : listFiles) {
            convertFile(file.getAbsolutePath(), str2 + File.separator + file.getName());
        }
        logger.info("Converted " + listFiles.length + " files.");
    }

    public static void main(String[] strArr) {
        convertDirectory("data/VTB-20090712/VTB-20090712-TOK", "data/VTB-20090712/VTB-20090712-RAW");
    }
}
