package ai.h2o.targetencoding;

import ai.h2o.targetencoding.TargetEncoder;
import java.util.Arrays;
import java.util.Iterator;
import java.util.Map;
import org.junit.After;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
import water.TestUtil;
import water.fvec.Frame;
import water.fvec.TestFrameBuilder;
import water.fvec.Vec;
import water.util.IcedHashMapGeneric;
import water.util.TwoDimTable;

/* loaded from: input_file:ai/h2o/targetencoding/TargetEncodingKFoldStrategyTest.class */
public class TargetEncodingKFoldStrategyTest extends TestUtil {
    private Frame fr = null;

    @BeforeClass
    public static void setup() {
        stall_till_cloudsize(1);
    }

    @Test
    public void prepareEncodingMapForKFoldCaseTest() {
        this.fr = new TestFrameBuilder().withName("testFrame").withColNames(new String[]{"ColA", "ColB", "ColC", "fold_column"}).withVecTypes(new byte[]{4, 3, 4, 3}).withDataForCol(0, ar(new String[]{"a", "b", "b", "b"})).withDataForCol(1, ard(new double[]{1.0d, 1.0d, 4.0d, 7.0d})).withDataForCol(2, ar(new String[]{"2", "6", "6", "6"})).withDataForCol(3, ar(new long[]{1, 2, 2, 3})).build();
        IcedHashMapGeneric prepareEncodingMap = new TargetEncoder(new String[]{"ColA"}).prepareEncodingMap(this.fr, "ColC", "fold_column");
        Frame frame = prepareEncodingMap.get("ColA");
        Vec vec = vec(new int[]{0, 2, 1});
        assertVecEquals(vec, frame.vec(2), 1.0E-5d);
        Vec vec2 = vec(new int[]{1, 2, 1});
        assertVecEquals(vec2, frame.vec(3), 1.0E-5d);
        vec.remove();
        vec2.remove();
        encodingMapCleanUp(prepareEncodingMap);
    }

    @Test
    public void prepareEncodingMapForKFoldCaseWithSomeOfTheTEValuesRepresentedOnlyInOneFold_Test() {
    }

    @Test
    public void targetEncoderKFoldHoldoutApplyingTest() {
        this.fr = new TestFrameBuilder().withName("testFrame").withColNames(new String[]{"ColA", "ColB", "ColC", "fold_column"}).withVecTypes(new byte[]{4, 3, 4, 3}).withDataForCol(0, ar(new String[]{"a", "b", "b", "b", "a"})).withDataForCol(1, ard(new double[]{1.0d, 1.0d, 4.0d, 7.0d, 4.0d})).withDataForCol(2, ar(new String[]{"2", "6", "6", "6", "6"})).withDataForCol(3, ar(new long[]{1, 2, 2, 3, 2})).build();
        TargetEncoder targetEncoder = new TargetEncoder(new String[]{"ColA"});
        IcedHashMapGeneric prepareEncodingMap = targetEncoder.prepareEncodingMap(this.fr, "ColC", "fold_column");
        Frame applyTargetEncoding = targetEncoder.applyTargetEncoding(this.fr, "ColC", prepareEncodingMap, TargetEncoder.DataLeakageHandlingStrategy.KFold, "fold_column", false, 0.0d, false, TargetEncoder.DEFAULT_BLENDING_PARAMS, 1234L);
        Vec vec = vec(new int[]{1, 1, 1, 1, 0});
        assertVecEquals(vec, applyTargetEncoding.vec(4), 1.0E-5d);
        vec.remove();
        applyTargetEncoding.delete();
        encodingMapCleanUp(prepareEncodingMap);
    }

    @Test
    public void getUniqueValuesOfTheFoldColumnTest() {
        this.fr = new TestFrameBuilder().withName("testFrame").withColNames(new String[]{"fold_column"}).withVecTypes(new byte[]{3}).withDataForCol(0, ar(new long[]{1, 2, 2, 3, 2})).build();
        long[] uniqueValuesOfTheFoldColumn = new TargetEncoder(new String[]{""}).getUniqueValuesOfTheFoldColumn(this.fr, 0);
        Arrays.sort(uniqueValuesOfTheFoldColumn);
        Assert.assertArrayEquals(ar(new long[]{1, 2, 3}), uniqueValuesOfTheFoldColumn);
    }

    @Test
    public void targetEncoderKFoldHoldout_WithNonFirstColumnToEncode_ApplyingTest() {
        this.fr = new TestFrameBuilder().withName("testFrame").withColNames(new String[]{"ColA", "ColA2", "ColB", "ColC", "fold_column"}).withVecTypes(new byte[]{4, 4, 3, 4, 3}).withDataForCol(0, ar(new String[]{"a", "b", "b", "b", "a"})).withDataForCol(1, ar(new String[]{"a", "b", "b", "b", "a"})).withDataForCol(2, ard(new double[]{1.0d, 1.0d, 4.0d, 7.0d, 4.0d})).withDataForCol(3, ar(new String[]{"2", "6", "6", "6", "6"})).withDataForCol(4, ar(new long[]{1, 2, 2, 3, 2})).build();
        TargetEncoder targetEncoder = new TargetEncoder(new String[]{"ColA2"});
        IcedHashMapGeneric prepareEncodingMap = targetEncoder.prepareEncodingMap(this.fr, "ColC", "fold_column");
        Frame applyTargetEncoding = targetEncoder.applyTargetEncoding(this.fr, "ColC", prepareEncodingMap, TargetEncoder.DataLeakageHandlingStrategy.KFold, "fold_column", false, 0.0d, false, TargetEncoder.DEFAULT_BLENDING_PARAMS, 1234L);
        Vec vec = vec(new int[]{1, 1, 1, 1, 0});
        assertVecEquals(vec, applyTargetEncoding.vec(5), 1.0E-5d);
        vec.remove();
        encodingMapCleanUp(prepareEncodingMap);
        applyTargetEncoding.delete();
    }

    @Test
    public void targetEncoderKFoldHoldoutApplyingWithoutFoldColumnTest() {
    }

    @Test
    public void encodingWasCreatedWithFoldsCheckTest() {
    }

    @Test
    public void targetEncoderKFoldHoldoutApplyingWithNoiseTest() {
        this.fr = new TestFrameBuilder().withName("testFrame").withColNames(new String[]{"ColA", "ColB", "ColC", "fold_column"}).withVecTypes(new byte[]{4, 3, 4, 3}).withDataForCol(0, ar(new String[]{"a", "b", "b", "b", "a"})).withDataForCol(1, ard(new double[]{1.0d, 1.0d, 4.0d, 7.0d, 4.0d})).withDataForCol(2, ar(new String[]{"2", "6", "6", "6", "6"})).withDataForCol(3, ar(new long[]{1, 2, 2, 3, 2})).build();
        Frame build = new TestFrameBuilder().withName("testFrame2").withColNames(new String[]{"ColA", "ColB", "ColC", "fold_column"}).withVecTypes(new byte[]{4, 3, 4, 3}).withDataForCol(0, ar(new String[]{"a", "b", "c", "b", "a"})).withDataForCol(1, ard(new double[]{1.0d, 1.0d, 4.0d, 7.0d, 4.0d})).withDataForCol(2, ar(new String[]{"2", "6", "6", "6", "6"})).withDataForCol(3, ar(new long[]{1, 2, 2, 3, 2})).build();
        TargetEncoder targetEncoder = new TargetEncoder(new String[]{"ColA"});
        IcedHashMapGeneric prepareEncodingMap = targetEncoder.prepareEncodingMap(this.fr, "ColC", "fold_column");
        Frame applyTargetEncoding = targetEncoder.applyTargetEncoding(build, "ColC", prepareEncodingMap, TargetEncoder.DataLeakageHandlingStrategy.KFold, "fold_column", false, false, TargetEncoder.DEFAULT_BLENDING_PARAMS, 1234L);
        printOutFrameAsTable(applyTargetEncoding, false, applyTargetEncoding.numRows());
        Vec dvec = dvec(new double[]{1.0d, 1.0d, 0.8d, 1.0d, 0.0d});
        assertVecEquals(dvec, applyTargetEncoding.vec(4), 0.01d);
        dvec.remove();
        encodingMapCleanUp(prepareEncodingMap);
        applyTargetEncoding.delete();
        build.delete();
    }

    @Test
    public void targetEncoderKFoldHoldoutApplyingWithCustomNoiseTest() {
        this.fr = new TestFrameBuilder().withName("testFrame").withColNames(new String[]{"ColA", "ColB", "ColC", "fold_column"}).withVecTypes(new byte[]{4, 3, 4, 3}).withDataForCol(0, ar(new String[]{"a", "b", "b", "b", "a"})).withDataForCol(1, ard(new double[]{1.0d, 1.0d, 4.0d, 7.0d, 4.0d})).withDataForCol(2, ar(new String[]{"2", "6", "6", "6", "6"})).withDataForCol(3, ar(new long[]{1, 2, 2, 3, 2})).build();
        TargetEncoder targetEncoder = new TargetEncoder(new String[]{"ColA"});
        IcedHashMapGeneric prepareEncodingMap = targetEncoder.prepareEncodingMap(this.fr, "ColC", "fold_column");
        Frame applyTargetEncoding = targetEncoder.applyTargetEncoding(this.fr, "ColC", prepareEncodingMap, TargetEncoder.DataLeakageHandlingStrategy.KFold, "fold_column", false, 0.02d, false, TargetEncoder.DEFAULT_BLENDING_PARAMS, 1234L);
        System.out.println("Result table" + applyTargetEncoding.toTwoDimTable().toString());
        Vec vec = vec(new int[]{1, 1, 1, 1, 0});
        assertVecEquals(vec, applyTargetEncoding.vec(4), 0.02d);
        vec.remove();
        encodingMapCleanUp(prepareEncodingMap);
        applyTargetEncoding.delete();
    }

    @Test
    @Ignore
    public void targetEncoderKFoldHoldoutApplyingWithBlendedAvgTest() {
        this.fr = new TestFrameBuilder().withName("testFrame").withColNames(new String[]{"ColA", "ColB", "ColC", "fold_column"}).withVecTypes(new byte[]{4, 3, 4, 3}).withDataForCol(0, ar(new String[]{"a", "b", "b", "b", "a", "c"})).withDataForCol(1, ard(new double[]{1.0d, 1.0d, 4.0d, 7.0d, 4.0d, 9.0d})).withDataForCol(2, ar(new String[]{"2", "6", "6", "6", "6", "2"})).withDataForCol(3, ar(new long[]{1, 2, 2, 3, 2, 2})).build();
        TargetEncoder targetEncoder = new TargetEncoder(new String[]{"ColA"});
        IcedHashMapGeneric prepareEncodingMap = targetEncoder.prepareEncodingMap(this.fr, "ColC", "fold_column");
        Frame applyTargetEncoding = targetEncoder.applyTargetEncoding(this.fr, "ColC", prepareEncodingMap, TargetEncoder.DataLeakageHandlingStrategy.KFold, "fold_column", true, 0.0d, true, TargetEncoder.DEFAULT_BLENDING_PARAMS, 1234L);
        Vec vec = applyTargetEncoding.vec(4);
        Assert.assertEquals(0.855d, vec.at(0L), 0.001d);
        Assert.assertEquals(0.724d, vec.at(1L), 0.001d);
        Assert.assertEquals(0.855d, vec.at(2L), 0.001d);
        Assert.assertEquals(0.856d, vec.at(4L), 0.001d);
        vec.remove();
        encodingMapCleanUp(prepareEncodingMap);
        applyTargetEncoding.delete();
    }

    @Test
    public void manualHighCardinalityKFoldTest() {
        this.fr = new TestFrameBuilder().withName("testFrame").withColNames(new String[]{"ColA", "ColB", "fold_column"}).withVecTypes(new byte[]{4, 4, 3}).withDataForCol(0, ar(new String[]{"a", "b", "b", "c", "c", "a", "d", "d", "d", "d", "e", "e", "a", "f", "f"})).withDataForCol(1, ar(new String[]{"2", "6", "6", "6", "6", "6", "2", "6", "6", "6", "6", "2", "2", "2", "2"})).withDataForCol(2, ar(new long[]{1, 2, 1, 2, 1, 3, 2, 2, 1, 3, 1, 2, 3, 3, 2})).build();
        TargetEncoder targetEncoder = new TargetEncoder(new String[]{"ColA"});
        IcedHashMapGeneric prepareEncodingMap = targetEncoder.prepareEncodingMap(this.fr, "ColB", "fold_column");
        Frame applyTargetEncoding = targetEncoder.applyTargetEncoding(this.fr, "ColB", prepareEncodingMap, TargetEncoder.DataLeakageHandlingStrategy.KFold, "fold_column", false, 0.0d, false, TargetEncoder.DEFAULT_BLENDING_PARAMS, 1234L);
        printOutFrameAsTable(applyTargetEncoding, false, 100L);
        Vec vec = applyTargetEncoding.vec(3);
        Vec dvec = dvec(new double[]{0.5d, 1.0d, 1.0d, 1.0d, 1.0d, 0.0d, 1.0d, 1.0d, 0.66666d, 0.66666d, 0.0d, 1.0d, 0.0d, 0.0d, 0.0d});
        assertVecEquals(dvec, vec, 1.0E-5d);
        dvec.remove();
        encodingMapCleanUp(prepareEncodingMap);
        applyTargetEncoding.delete();
    }

    @Test
    public void endToEndTest() {
        Frame build = new TestFrameBuilder().withName("trainingFrame").withColNames(new String[]{"ColA", "ColC", "fold_column"}).withVecTypes(new byte[]{4, 4, 3}).withDataForCol(0, ar(new String[]{"a", "b", "c", "d", "e", "b", "b"})).withDataForCol(1, ar(new String[]{"2", "6", "6", "6", "6", "2", "2"})).withDataForCol(2, ar(new long[]{1, 2, 2, 3, 1, 2, 1})).build();
        Frame build2 = new TestFrameBuilder().withName("validFrame").withColNames(new String[]{"ColA", "ColC", "fold_column"}).withVecTypes(new byte[]{4, 4, 3}).withDataForCol(0, ar(new String[]{"a", "b", "b", "b", "a"})).withDataForCol(1, ar(new String[]{"2", "6", "6", "6", "6"})).withDataForCol(2, ar(new long[]{1, 2, 1, 2, 1})).build();
        TargetEncoder targetEncoder = new TargetEncoder(new String[]{"ColA"});
        IcedHashMapGeneric prepareEncodingMap = targetEncoder.prepareEncodingMap(build, "ColC", "fold_column");
        Frame applyTargetEncoding = targetEncoder.applyTargetEncoding(build2, "ColC", prepareEncodingMap, TargetEncoder.DataLeakageHandlingStrategy.KFold, "fold_column", false, 0.0d, false, TargetEncoder.DEFAULT_BLENDING_PARAMS, 1234L);
        printOutFrameAsTable(applyTargetEncoding, false, 100L);
        Vec vec = applyTargetEncoding.vec(3);
        Vec dvec = dvec(new double[]{0.5714285d, 0.0d, 0.5d, 0.0d, 0.5714285d});
        assertVecEquals(dvec, vec, 1.0E-5d);
        build.delete();
        build2.delete();
        dvec.remove();
        encodingMapCleanUp(prepareEncodingMap);
        applyTargetEncoding.delete();
    }

    @Test
    public void KFoldHoldoutMultipleTEColumnsWithFoldColumnTest() {
        TestFrameBuilder withDataForCol = new TestFrameBuilder().withName("testFrame").withColNames(new String[]{"ColA", "ColB", "ColC", "fold_column"}).withVecTypes(new byte[]{4, 4, 4, 3}).withDataForCol(0, ar(new String[]{"a", "b", "b", "b", "a"})).withDataForCol(1, ar(new String[]{"d", "e", "d", "e", "e"})).withDataForCol(2, ar(new String[]{"2", "6", "6", "6", "6"})).withDataForCol(3, ar(new long[]{1, 2, 2, 3, 2}));
        TargetEncoder targetEncoder = new TargetEncoder(new String[]{"ColA", "ColB"});
        this.fr = withDataForCol.withName("testFrame").build();
        IcedHashMapGeneric prepareEncodingMap = targetEncoder.prepareEncodingMap(this.fr, "ColC", "fold_column");
        Frame applyTargetEncoding = targetEncoder.applyTargetEncoding(this.fr, "ColC", prepareEncodingMap, TargetEncoder.DataLeakageHandlingStrategy.KFold, "fold_column", false, 0.0d, false, TargetEncoder.DEFAULT_BLENDING_PARAMS, 1234L);
        Frame sort = applyTargetEncoding.sort(new int[]{2});
        Vec vec = sort.vec(4);
        Frame sort2 = applyTargetEncoding.sort(new int[]{0});
        Vec vec2 = sort2.vec(5);
        Frame build = withDataForCol.withName("testFrameA").build();
        TargetEncoder targetEncoder2 = new TargetEncoder(new String[]{"ColA"});
        IcedHashMapGeneric prepareEncodingMap2 = targetEncoder2.prepareEncodingMap(build, "ColC", "fold_column");
        Frame applyTargetEncoding2 = targetEncoder2.applyTargetEncoding(build, "ColC", prepareEncodingMap2, TargetEncoder.DataLeakageHandlingStrategy.KFold, "fold_column", false, 0.0d, false, TargetEncoder.DEFAULT_BLENDING_PARAMS, 1234L);
        assertVecEquals(applyTargetEncoding2.vec(4), vec, 1.0E-5d);
        Frame build2 = withDataForCol.withName("testFrameB").build();
        TargetEncoder targetEncoder3 = new TargetEncoder(new String[]{"ColB"});
        IcedHashMapGeneric prepareEncodingMap3 = targetEncoder3.prepareEncodingMap(build2, "ColC", "fold_column");
        Frame applyTargetEncoding3 = targetEncoder3.applyTargetEncoding(build2, "ColC", prepareEncodingMap3, TargetEncoder.DataLeakageHandlingStrategy.KFold, "fold_column", false, 0.0d, false, TargetEncoder.DEFAULT_BLENDING_PARAMS, 1234L);
        Frame sort3 = applyTargetEncoding3.sort(new int[]{0});
        assertVecEquals(sort3.vec("ColB_te"), vec2, 1.0E-5d);
        sort2.delete();
        sort.delete();
        sort3.delete();
        encodingMapCleanUp(prepareEncodingMap);
        encodingMapCleanUp(prepareEncodingMap2);
        encodingMapCleanUp(prepareEncodingMap3);
        build.delete();
        build2.delete();
        applyTargetEncoding.delete();
        applyTargetEncoding2.delete();
        applyTargetEncoding3.delete();
    }

    @Test
    public void targetEncoderGetOutOfFoldDataTest() {
        this.fr = new TestFrameBuilder().withName("testFrame").withColNames(new String[]{"ColA", "ColB"}).withVecTypes(new byte[]{3, 3}).withDataForCol(0, ard(new double[]{5.0d, 6.0d, 7.0d, 9.0d})).withDataForCol(1, ard(new double[]{1.0d, 2.0d, 3.0d, 1.0d})).build();
        TargetEncoder targetEncoder = new TargetEncoder(new String[]{""});
        Frame outOfFoldData = targetEncoder.getOutOfFoldData(this.fr, "ColB", 1L);
        TwoDimTable twoDimTable = outOfFoldData.toTwoDimTable();
        Assert.assertEquals(outOfFoldData.numRows(), 2L);
        Assert.assertEquals(6L, twoDimTable.get(5, 0));
        Assert.assertEquals(7L, twoDimTable.get(6, 0));
        Frame outOfFoldData2 = targetEncoder.getOutOfFoldData(this.fr, "ColB", 2L);
        TwoDimTable twoDimTable2 = outOfFoldData2.toTwoDimTable();
        Assert.assertEquals(5L, twoDimTable2.get(5, 0));
        Assert.assertEquals(7L, twoDimTable2.get(6, 0));
        Assert.assertEquals(9L, twoDimTable2.get(7, 0));
        outOfFoldData.delete();
        outOfFoldData2.delete();
    }

    @After
    public void afterEach() {
        if (this.fr != null) {
            this.fr.delete();
        }
    }

    private void encodingMapCleanUp(Map<String, Frame> map) {
        Iterator<Map.Entry<String, Frame>> it = map.entrySet().iterator();
        while (it.hasNext()) {
            it.next().getValue().delete();
        }
    }
}
