package org.tribuo.data.text;

import com.oracle.labs.mlrg.olcut.config.ConfigurationManager;
import com.oracle.labs.mlrg.olcut.config.Option;
import com.oracle.labs.mlrg.olcut.config.Options;
import com.oracle.labs.mlrg.olcut.util.LabsLogFormatter;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Random;
import java.util.logging.Handler;
import java.util.logging.Level;
import java.util.logging.Logger;

/* loaded from: input_file:org/tribuo/data/text/SplitTextData.class */
public class SplitTextData {
    private static final Logger logger = Logger.getLogger(SplitTextData.class.getName());

    /* loaded from: input_file:org/tribuo/data/text/SplitTextData$Line.class */
    private static class Line {
        public final String label;
        public final String text;

        Line(String str, String str2) {
            this.label = str;
            this.text = str2;
        }

        public String toString() {
            return this.label + "##" + this.text;
        }
    }

    /* loaded from: input_file:org/tribuo/data/text/SplitTextData$TrainTestSplitOptions.class */
    public static class TrainTestSplitOptions implements Options {

        @Option(charName = 's', longName = "split-fraction", usage = "Split fraction.")
        public float splitFraction;

        @Option(charName = 'i', longName = "input-file", usage = "Input data file in standard text format.")
        public Path inputPath;

        @Option(charName = 't', longName = "training-output-file", usage = "Output training data file.")
        public Path trainPath;

        @Option(charName = 'v', longName = "validation-output-file", usage = "Output validation data file.")
        public Path validationPath;

        @Option(charName = 'r', longName = "rng-seed", usage = "Seed for the RNG.")
        public long seed = 1;

        public String getOptionsDescription() {
            return "Splits a standard text format dataset in two.";
        }
    }

    public static void main(String[] strArr) throws IOException {
        for (Handler handler : Logger.getLogger("").getHandlers()) {
            handler.setLevel(Level.ALL);
            handler.setFormatter(new LabsLogFormatter());
            try {
                handler.setEncoding("utf-8");
            } catch (UnsupportedEncodingException | SecurityException e) {
                logger.severe("Error setting output encoding");
            }
        }
        TrainTestSplitOptions trainTestSplitOptions = new TrainTestSplitOptions();
        ConfigurationManager configurationManager = new ConfigurationManager(strArr, trainTestSplitOptions);
        if (trainTestSplitOptions.inputPath == null || trainTestSplitOptions.trainPath == null || trainTestSplitOptions.validationPath == null || trainTestSplitOptions.splitFraction < 0.0d || trainTestSplitOptions.splitFraction > 1.0d) {
            System.out.println("Incorrect arguments");
            System.out.println(configurationManager.usage());
            return;
        }
        int i = 0;
        int i2 = 0;
        int i3 = 0;
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(trainTestSplitOptions.inputPath.toFile()), StandardCharsets.UTF_8));
        PrintWriter printWriter = new PrintWriter(new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(trainTestSplitOptions.trainPath.toFile())), StandardCharsets.UTF_8));
        PrintWriter printWriter2 = new PrintWriter(new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(trainTestSplitOptions.validationPath.toFile())), StandardCharsets.UTF_8));
        ArrayList arrayList = new ArrayList();
        while (bufferedReader.ready()) {
            i++;
            String trim = bufferedReader.readLine().trim();
            if (trim.isEmpty()) {
                i3++;
            } else {
                String[] split = trim.split("##");
                if (split.length != 2) {
                    i3++;
                    logger.warning(String.format("Bad line in %s at %d: %s", trainTestSplitOptions.inputPath, Integer.valueOf(i), trim.substring(Math.min(50, trim.length()))));
                } else {
                    arrayList.add(new Line(split[0].trim().toUpperCase(), split[1]));
                    i2++;
                }
            }
        }
        bufferedReader.close();
        logger.info("Found " + i2 + " valid examples, " + i3 + " invalid examples out of " + i + " lines.");
        int round = Math.round(trainTestSplitOptions.splitFraction * i2);
        logger.info("Outputting " + round + " training examples, and " + (i2 - round) + " testing examples, with a " + trainTestSplitOptions.splitFraction + " split.");
        Collections.shuffle(arrayList, new Random(trainTestSplitOptions.seed));
        for (int i4 = 0; i4 < round; i4++) {
            printWriter.println(arrayList.get(i4));
        }
        for (int i5 = round; i5 < i2; i5++) {
            printWriter2.println(arrayList.get(i5));
        }
        printWriter.close();
        printWriter2.close();
    }
}
