package com.whylogs.core.statistics.datatypes;

import com.google.protobuf.ByteString;
import com.whylogs.core.message.StringsMessage;
import com.whylogs.core.statistics.NumberTracker;
import com.whylogs.core.utils.sketches.ThetaSketch;
import java.util.Arrays;
import java.util.List;
import java.util.function.Function;
import org.apache.datasketches.ArrayOfStringsSerDe;
import org.apache.datasketches.frequencies.ItemsSketch;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;
import org.apache.datasketches.theta.Union;

/* loaded from: input_file:com/whylogs/core/statistics/datatypes/StringTracker.class */
public final class StringTracker {
    public static Function<String, List<String>> TOKENIZER = str -> {
        return Arrays.asList(str.split(" "));
    };
    public static final ArrayOfStringsSerDe ARRAY_OF_STRINGS_SER_DE = new ArrayOfStringsSerDe();
    public static final int MAX_FREQUENT_ITEM_SIZE = 128;
    private long count;
    private final ItemsSketch<String> items;
    private final Union thetaSketch;
    private final NumberTracker length;
    private final NumberTracker tokenLength;
    private final CharPosTracker charPosTracker;
    private Function<String, List<String>> tokenizer;

    /* loaded from: input_file:com/whylogs/core/statistics/datatypes/StringTracker$StringTrackerBuilder.class */
    public static class StringTrackerBuilder {
        private long count;
        private ItemsSketch<String> items;
        private Union thetaSketch;
        private NumberTracker length;
        private NumberTracker tokenLength;
        private CharPosTracker charPosTracker;
        private boolean tokenizer$set;
        private Function<String, List<String>> tokenizer$value;

        StringTrackerBuilder() {
        }

        public StringTrackerBuilder count(long j) {
            this.count = j;
            return this;
        }

        public StringTrackerBuilder items(ItemsSketch<String> itemsSketch) {
            this.items = itemsSketch;
            return this;
        }

        public StringTrackerBuilder thetaSketch(Union union) {
            this.thetaSketch = union;
            return this;
        }

        public StringTrackerBuilder length(NumberTracker numberTracker) {
            this.length = numberTracker;
            return this;
        }

        public StringTrackerBuilder tokenLength(NumberTracker numberTracker) {
            this.tokenLength = numberTracker;
            return this;
        }

        public StringTrackerBuilder charPosTracker(CharPosTracker charPosTracker) {
            this.charPosTracker = charPosTracker;
            return this;
        }

        public StringTrackerBuilder tokenizer(Function<String, List<String>> function) {
            this.tokenizer$value = function;
            this.tokenizer$set = true;
            return this;
        }

        public StringTracker build() {
            Function<String, List<String>> function = this.tokenizer$value;
            if (!this.tokenizer$set) {
                function = StringTracker.access$000();
            }
            return new StringTracker(this.count, this.items, this.thetaSketch, this.length, this.tokenLength, this.charPosTracker, function);
        }

        public String toString() {
            return "StringTracker.StringTrackerBuilder(count=" + this.count + ", items=" + this.items + ", thetaSketch=" + this.thetaSketch + ", length=" + this.length + ", tokenLength=" + this.tokenLength + ", charPosTracker=" + this.charPosTracker + ", tokenizer$value=" + this.tokenizer$value + ")";
        }
    }

    public StringTracker() {
        this.count = 0L;
        this.items = new ItemsSketch<>(MAX_FREQUENT_ITEM_SIZE);
        this.thetaSketch = Union.builder().buildUnion();
        this.length = new NumberTracker();
        this.tokenLength = new NumberTracker();
        this.charPosTracker = new CharPosTracker();
        this.tokenizer = TOKENIZER;
    }

    public void update(String str) {
        update(str, null);
    }

    public void update(String str, String str2) {
        if (str == null) {
            return;
        }
        this.count++;
        this.thetaSketch.update(str);
        this.items.update(str);
        this.charPosTracker.update(str, str2);
        this.length.track(Integer.valueOf(str.length()));
        this.tokenLength.track(Integer.valueOf(this.tokenizer.apply(str).size()));
    }

    public void update(String str, String str2, Function<String, List<String>> function) {
        if (function != null) {
            this.tokenizer = function;
        }
        update(str, str2);
    }

    public StringTracker merge(StringTracker stringTracker) {
        ItemsSketch<String> itemsSketch = null;
        if (stringTracker == null) {
            return this;
        }
        if (this.items != null) {
            itemsSketch = ItemsSketch.getInstance(WritableMemory.wrap(this.items.toByteArray(ARRAY_OF_STRINGS_SER_DE)), ARRAY_OF_STRINGS_SER_DE);
            itemsSketch.merge(stringTracker.items);
        } else if (stringTracker.items != null) {
            itemsSketch = ItemsSketch.getInstance(WritableMemory.wrap(stringTracker.items.toByteArray(ARRAY_OF_STRINGS_SER_DE)), ARRAY_OF_STRINGS_SER_DE);
        }
        Union buildUnion = Union.builder().buildUnion();
        buildUnion.update(this.thetaSketch.getResult());
        buildUnion.update(stringTracker.thetaSketch.getResult());
        NumberTracker numberTracker = this.length;
        if (this.length != null && stringTracker != null) {
            numberTracker = this.length.merge(stringTracker.length);
        }
        NumberTracker numberTracker2 = this.tokenLength;
        if (this.tokenLength != null && stringTracker != null) {
            numberTracker2 = this.tokenLength.merge(stringTracker.tokenLength);
        }
        CharPosTracker charPosTracker = this.charPosTracker;
        if (this.charPosTracker != null && stringTracker != null) {
            charPosTracker = this.charPosTracker.merge(stringTracker.charPosTracker);
        }
        return builder().count(this.count + stringTracker.count).items(itemsSketch).thetaSketch(buildUnion).length(numberTracker).tokenLength(numberTracker2).charPosTracker(charPosTracker).build();
    }

    public StringsMessage.Builder toProtobuf() {
        StringsMessage.Builder charPosTracker = StringsMessage.newBuilder().setCount(this.count).setCompactTheta(ThetaSketch.serialize(this.thetaSketch)).setLength(this.length.toProtobuf()).setTokenLength(this.tokenLength.toProtobuf()).setCharPosTracker(this.charPosTracker.toProtobuf());
        if (this.items != null) {
            charPosTracker.setItems(ByteString.copyFrom(this.items.toByteArray(ARRAY_OF_STRINGS_SER_DE)));
        }
        return charPosTracker;
    }

    public static StringTracker fromProtobuf(StringsMessage stringsMessage) {
        ItemsSketch<String> itemsSketch = null;
        byte[] byteArray = stringsMessage.getItems().toByteArray();
        if (byteArray.length > 8) {
            itemsSketch = ItemsSketch.getInstance(Memory.wrap(byteArray), ARRAY_OF_STRINGS_SER_DE);
        }
        StringTrackerBuilder thetaSketch = builder().count(stringsMessage.getCount()).items(itemsSketch).thetaSketch(ThetaSketch.deserialize(stringsMessage.getCompactTheta()));
        if (stringsMessage.hasLength()) {
            thetaSketch.length(NumberTracker.fromProtobuf(stringsMessage.getLength())).tokenLength(NumberTracker.fromProtobuf(stringsMessage.getTokenLength())).charPosTracker(CharPosTracker.fromProtobuf(stringsMessage.getCharPosTracker()));
        }
        return thetaSketch.build();
    }

    public static StringTrackerBuilder builder() {
        return new StringTrackerBuilder();
    }

    public long getCount() {
        return this.count;
    }

    public ItemsSketch<String> getItems() {
        return this.items;
    }

    public Union getThetaSketch() {
        return this.thetaSketch;
    }

    public NumberTracker getLength() {
        return this.length;
    }

    public NumberTracker getTokenLength() {
        return this.tokenLength;
    }

    public CharPosTracker getCharPosTracker() {
        return this.charPosTracker;
    }

    public Function<String, List<String>> getTokenizer() {
        return this.tokenizer;
    }

    private StringTracker(long j, ItemsSketch<String> itemsSketch, Union union, NumberTracker numberTracker, NumberTracker numberTracker2, CharPosTracker charPosTracker, Function<String, List<String>> function) {
        this.count = j;
        this.items = itemsSketch;
        this.thetaSketch = union;
        this.length = numberTracker;
        this.tokenLength = numberTracker2;
        this.charPosTracker = charPosTracker;
        this.tokenizer = function;
    }

    static /* synthetic */ Function access$000() {
        return TOKENIZER;
    }
}
