package org.fbk.cit.hlt.core.lsa.util;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import opennlp.tools.parser.AbstractBottomUpParser;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.apache.xerces.impl.xs.SchemaSymbols;
import org.fbk.cit.hlt.core.io.FolderScanner;
import org.fbk.cit.hlt.thewikimachine.util.StringTable;

/* loaded from: input_file:org/fbk/cit/hlt/core/lsa/util/FileConverter.class */
public class FileConverter {
    static Logger logger = Logger.getLogger(FileConverter.class.getName());

    private static final String getText(File file) throws IOException {
        StringBuffer stringBuffer = new StringBuffer();
        BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
        while (true) {
            int read = bufferedReader.read();
            if (read == -1) {
                bufferedReader.close();
                return stringBuffer.toString();
            }
            stringBuffer.append((char) read);
        }
    }

    private static final List<String> parseText(String str) {
        ArrayList arrayList = new ArrayList();
        BreakIterator wordInstance = BreakIterator.getWordInstance(Locale.US);
        wordInstance.setText(str);
        int first = wordInstance.first();
        int next = wordInstance.next();
        while (true) {
            int i = next;
            if (i == -1) {
                return arrayList;
            }
            String lowerCase = str.substring(first, i).toLowerCase();
            if (lowerCase.length() > 0 && !lowerCase.matches("\\s+")) {
                arrayList.add(lowerCase);
            }
            first = i;
            next = wordInstance.next();
        }
    }

    private static final List<String> convertText(List<String> list) {
        ArrayList arrayList = new ArrayList();
        for (int i = 0; i < list.size(); i++) {
            arrayList.add(i + "&&" + list.get(i) + "&&" + list.get(i) + "&&" + AbstractBottomUpParser.OTHER + "&&" + AbstractBottomUpParser.OTHER + "&&" + AbstractBottomUpParser.OTHER);
        }
        return arrayList;
    }

    private static final String listToString(List<String> list) {
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < list.size(); i++) {
            if (i != 0) {
                sb.append(" ");
            }
            sb.append(list.get(i));
        }
        return sb.toString();
    }

    public static void main(String[] strArr) throws Exception {
        String property = System.getProperty("log-config");
        if (property == null) {
            property = "log-config.txt";
        }
        PropertyConfigurator.configure(property);
        if (strArr.length != 2) {
            System.out.println("Usage: java -mx512M org.fbk.it.hlt.jlsi.util.FileConverter in out");
            System.exit(1);
        }
        FolderScanner folderScanner = new FolderScanner(new File(strArr[0]));
        PrintWriter printWriter = new PrintWriter(new FileWriter(strArr[1]));
        int i = 1;
        while (folderScanner.hasNext()) {
            Object[] next = folderScanner.next();
            for (int i2 = 0; i2 < next.length; i2++) {
                logger.debug(i + StringTable.HORIZONTAL_TABULATION + next[i2]);
                String listToString = listToString(convertText(parseText(getText((File) next[i2]))));
                printWriter.print(SchemaSymbols.ATTVAL_FALSE_0);
                printWriter.print(StringTable.HORIZONTAL_TABULATION);
                printWriter.print(775 + i);
                printWriter.print(StringTable.HORIZONTAL_TABULATION);
                printWriter.println(listToString);
                i++;
            }
        }
        printWriter.flush();
        printWriter.close();
    }
}
