package org.codelibs.elasticsearch.vi.nlp.tokenizer.tools;

import java.io.File;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.codelibs.elasticsearch.vi.nlp.fsm.IConstants;
import org.codelibs.elasticsearch.vi.nlp.utils.FileIterator;
import org.codelibs.elasticsearch.vi.nlp.utils.TextFileFilter;
import org.codelibs.elasticsearch.vi.nlp.utils.UTF8FileUtility;

/* loaded from: input_file:org/codelibs/elasticsearch/vi/nlp/tokenizer/tools/TaggedToTokenizedConverter.class */
public class TaggedToTokenizedConverter {
    private static final Logger logger = LogManager.getLogger(TaggedToTokenizedConverter.class);
    private static String TAGGED_FILE_EXTENSION = ".pos";
    private static String TOKENIZED_FILE_EXTENSION = ".txt";

    private TaggedToTokenizedConverter() {
    }

    private static String postProcess(String str) {
        return str.replaceAll("\\*E\\*", IConstants.EMPTY_STRING).replaceAll("\\*T\\*", IConstants.EMPTY_STRING).replaceAll("\\*E", IConstants.EMPTY_STRING).replaceAll("E\\*", IConstants.EMPTY_STRING).replaceAll("\\*T", IConstants.EMPTY_STRING).replaceAll("T\\*", IConstants.EMPTY_STRING);
    }

    public static void convertFile(String str, String str2) {
        String[] lines = UTF8FileUtility.getLines(str);
        UTF8FileUtility.createWriter(str2);
        for (String str3 : lines) {
            StringBuffer stringBuffer = new StringBuffer();
            for (String str4 : str3.split("\\s+")) {
                String[] split = str4.split("/");
                if (split.length > 0) {
                    stringBuffer.append(split[0]);
                    stringBuffer.append(" ");
                }
            }
            UTF8FileUtility.write(postProcess(stringBuffer.toString().trim()) + "\n");
        }
        UTF8FileUtility.closeWriter();
    }

    public static void convertDirectory(String str, String str2) {
        File[] listFiles = FileIterator.listFiles(new File(str), new TextFileFilter(TAGGED_FILE_EXTENSION));
        for (File file : listFiles) {
            String name = file.getName();
            int indexOf = name.indexOf(46);
            convertFile(file.getAbsolutePath(), indexOf > 0 ? str2 + File.separator + name.substring(0, indexOf) + TOKENIZED_FILE_EXTENSION : name + TOKENIZED_FILE_EXTENSION);
        }
        logger.info("Converted " + listFiles.length + " files.");
    }

    public static void main(String[] strArr) {
        convertDirectory("data/VTB-20090712/VTB-20090712-POS", "data/VTB-20090712/VTB-20090712-TOK");
    }
}
