package org.maochen.nlp.ml.util;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.maochen.nlp.ml.SequenceTuple;
import org.maochen.nlp.ml.Tuple;
import org.maochen.nlp.ml.vector.FeatNamedVector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/maochen/nlp/ml/util/TrainingDataUtils.class */
public class TrainingDataUtils {
    private static final Logger LOG = LoggerFactory.getLogger(TrainingDataUtils.class);

    public static List<Tuple> createBalancedTrainingData(List<Tuple> list) {
        Map map = (Map) list.parallelStream().map(tuple -> {
            return new AbstractMap.SimpleImmutableEntry(tuple.label, 1);
        }).collect(Collectors.groupingBy((v0) -> {
            return v0.getKey();
        }, Collectors.counting()));
        Map.Entry entry = (Map.Entry) map.entrySet().stream().min((entry2, entry3) -> {
            return ((Long) entry2.getValue()).compareTo((Long) entry3.getValue());
        }).orElse(null);
        map.clear();
        ArrayList arrayList = new ArrayList();
        for (Tuple tuple2 : list) {
            String str = tuple2.label;
            if (!map.containsKey(str)) {
                map.put(tuple2.label, 0L);
            }
            if (((Long) map.get(str)).longValue() < ((Long) entry.getValue()).longValue()) {
                map.put(str, Long.valueOf(((Long) map.get(str)).longValue() + 1));
                arrayList.add(tuple2);
            }
        }
        return arrayList;
    }

    private static Set<String> getSingleValFeat(Map<String, Map<Double, Integer>> map, int i) {
        return (Set) map.entrySet().stream().filter(entry -> {
            List list = (List) ((Map) entry.getValue()).entrySet().stream().collect(Collectors.toList());
            if (list.size() == 1 && ((Integer) ((Map.Entry) list.get(0)).getValue()).intValue() == 1) {
                return true;
            }
            if (list.size() == 1 && ((Integer) ((Map.Entry) list.get(0)).getValue()).equals(Integer.valueOf(i))) {
                return true;
            }
            if (list.size() == 2) {
                return ((Integer) ((Map.Entry) list.get(0)).getValue()).intValue() == 1 || ((Integer) ((Map.Entry) list.get(1)).getValue()).intValue() == 1;
            }
            return false;
        }).map((v0) -> {
            return v0.getKey();
        }).collect(Collectors.toSet());
    }

    public static void reduceDimension(List<Tuple> list) {
        Set set;
        HashMap hashMap = new HashMap();
        for (Tuple tuple : list) {
            double[] vector = tuple.vector.getVector();
            String[] strArr = tuple.vector instanceof FeatNamedVector ? tuple.vector.featsName : (String[]) IntStream.range(0, tuple.vector.getVector().length).mapToObj(String::valueOf).toArray(i -> {
                return new String[i];
            });
            for (int i2 = 0; i2 < vector.length; i2++) {
                if (!hashMap.containsKey(strArr[i2])) {
                    hashMap.put(strArr[i2], new HashMap());
                }
                Map map = (Map) hashMap.get(strArr[i2]);
                int i3 = 0;
                if (map.containsKey(Double.valueOf(vector[i2]))) {
                    i3 = ((Integer) map.get(Double.valueOf(vector[i2]))).intValue();
                }
                map.put(Double.valueOf(vector[i2]), Integer.valueOf(i3 + 1));
            }
        }
        Set<String> singleValFeat = getSingleValFeat(hashMap, list.size());
        LOG.debug("Single value feats: ");
        LOG.debug(singleValFeat.toString().replaceAll(", ", System.lineSeparator()));
        for (Tuple tuple2 : list) {
            ArrayList arrayList = new ArrayList();
            double[] vector2 = tuple2.vector.getVector();
            if (tuple2.vector instanceof FeatNamedVector) {
                set = new HashSet();
                String[] strArr2 = tuple2.vector.featsName;
                for (int i4 = 0; i4 < strArr2.length; i4++) {
                    if (singleValFeat.contains(strArr2[i4])) {
                        set.add(Integer.valueOf(i4));
                    }
                }
            } else {
                set = (Set) singleValFeat.stream().map(Integer::parseInt).collect(Collectors.toSet());
            }
            for (int i5 = 0; i5 < vector2.length; i5++) {
                if (!set.contains(Integer.valueOf(i5))) {
                    arrayList.add(Double.valueOf(vector2[i5]));
                }
            }
            tuple2.vector.setVector(arrayList.stream().mapToDouble(d -> {
                return d.doubleValue();
            }).toArray());
            if (tuple2.vector instanceof FeatNamedVector) {
                ArrayList arrayList2 = new ArrayList();
                String[] strArr3 = tuple2.vector.featsName;
                for (int i6 = 0; i6 < vector2.length; i6++) {
                    if (!set.contains(Integer.valueOf(i6))) {
                        arrayList2.add(strArr3[i6]);
                    }
                }
                tuple2.vector.featsName = (String[]) arrayList2.stream().toArray(i7 -> {
                    return new String[i7];
                });
            }
        }
    }

    public static Pair<List<Tuple>, List<Tuple>> splitData(List<Tuple> list, double d) {
        int i;
        if (d < 0.0d || d > 1.0d) {
            throw new RuntimeException("Proportion should between 0.0 - 1.0");
        }
        if (d > 0.5d) {
            d = 1.0d - d;
        }
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        int floor = (int) Math.floor(d * list.size());
        HashSet hashSet = new HashSet();
        for (int i2 = 0; i2 < floor && list.size() > hashSet.size(); i2++) {
            double random = Math.random();
            int size = list.size();
            while (true) {
                i = (int) (random * (size - 1));
                if (hashSet.contains(Integer.valueOf(i))) {
                    random = Math.random();
                    size = list.size();
                }
            }
            hashSet.add(Integer.valueOf(i));
        }
        Stream stream = hashSet.stream();
        list.getClass();
        arrayList.addAll((Collection) stream.map((v1) -> {
            return r2.get(v1);
        }).collect(Collectors.toList()));
        IntStream.range(0, list.size()).filter(i3 -> {
            return !hashSet.contains(Integer.valueOf(i3));
        }).forEach(i4 -> {
            arrayList2.add(list.get(i4));
        });
        return new ImmutablePair(arrayList, arrayList2);
    }

    public static List<SequenceTuple> readSeqFile(InputStream inputStream, String str, int i) {
        BufferedReader bufferedReader;
        Throwable th;
        ArrayList arrayList = new ArrayList();
        try {
            bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
            th = null;
        } catch (IOException e) {
            e.printStackTrace();
        }
        try {
            try {
                int i2 = 0;
                int i3 = 0;
                SequenceTuple sequenceTuple = new SequenceTuple();
                sequenceTuple.entries = new ArrayList();
                sequenceTuple.id = 0;
                for (String readLine = bufferedReader.readLine(); readLine != null; readLine = bufferedReader.readLine()) {
                    if (readLine.trim().isEmpty()) {
                        arrayList.add(sequenceTuple);
                        i2 = 0;
                        i3++;
                        sequenceTuple = new SequenceTuple();
                        sequenceTuple.entries = new ArrayList();
                        sequenceTuple.id = i3;
                    } else {
                        String[] split = readLine.trim().split(str);
                        int i4 = i2;
                        i2++;
                        sequenceTuple.entries.add(new Tuple(i4, new FeatNamedVector((String[]) IntStream.range(0, split.length).filter(i5 -> {
                            return i5 != i;
                        }).mapToObj(i6 -> {
                            return split[i6];
                        }).toArray(i7 -> {
                            return new String[i7];
                        })), split[i]));
                    }
                }
                if (bufferedReader != null) {
                    if (0 != 0) {
                        try {
                            bufferedReader.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    } else {
                        bufferedReader.close();
                    }
                }
                return arrayList;
            } finally {
            }
        } finally {
        }
    }
}
