/*
 * Decompiled with CFR 0.152.
 */
package cc.unitmesh.rag.splitter;

import cc.unitmesh.rag.splitter.CharacterTextSplitterKt;
import cc.unitmesh.rag.splitter.TextSplitter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import kotlin.Metadata;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Metadata(mv={1, 9, 0}, k=1, xi=48, d1={"\u0000\u001c\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0010\u000e\n\u0002\b\u0005\n\u0002\u0010 \n\u0002\b\u0005\u0018\u0000 \u000e2\u00020\u0001:\u0001\u000eB\u0005\u00a2\u0006\u0002\u0010\u0002J\"\u0010\t\u001a\b\u0012\u0004\u0012\u00020\u00040\n2\f\u0010\u000b\u001a\b\u0012\u0004\u0012\u00020\u00040\n2\u0006\u0010\u0003\u001a\u00020\u0004J\u0016\u0010\f\u001a\b\u0012\u0004\u0012\u00020\u00040\n2\u0006\u0010\r\u001a\u00020\u0004H\u0016R\u001a\u0010\u0003\u001a\u00020\u0004X\u0084\u000e\u00a2\u0006\u000e\n\u0000\u001a\u0004\b\u0005\u0010\u0006\"\u0004\b\u0007\u0010\b\u00a8\u0006\u000f"}, d2={"Lcc/unitmesh/rag/splitter/CharacterTextSplitter;", "Lcc/unitmesh/rag/splitter/TextSplitter;", "()V", "separator", "", "getSeparator", "()Ljava/lang/String;", "setSeparator", "(Ljava/lang/String;)V", "mergeSplits", "", "splits", "splitText", "text", "Companion", "cocoa-core"})
public final class CharacterTextSplitter
extends TextSplitter {
    @NotNull
    public static final Companion Companion = new Companion(null);
    @NotNull
    private String separator = "\n\n";
    private static final Logger log = LoggerFactory.getLogger(CharacterTextSplitter.class);

    @NotNull
    protected final String getSeparator() {
        return this.separator;
    }

    protected final void setSeparator(@NotNull String string) {
        Intrinsics.checkNotNullParameter(string, "<set-?>");
        this.separator = string;
    }

    @Override
    @NotNull
    public List<String> splitText(@NotNull String text) {
        Intrinsics.checkNotNullParameter(text, "text");
        List<String> splits = CharacterTextSplitterKt.splitTextWithRegex(text, this.separator, this.getKeepSeparator());
        String mergedSeparator = this.getKeepSeparator() ? "" : this.separator;
        return this.mergeSplits(splits, mergedSeparator);
    }

    @NotNull
    public final List<String> mergeSplits(@NotNull List<String> splits, @NotNull String separator) {
        Intrinsics.checkNotNullParameter(splits, "splits");
        Intrinsics.checkNotNullParameter(separator, "separator");
        int separatorLength = separator.length();
        List docs = new ArrayList();
        List currentDoc = new ArrayList();
        int total = 0;
        for (String d : splits) {
            int length = d.length();
            if (total + length + (!((Collection)currentDoc).isEmpty() ? separatorLength : 0) > this.getChunkSize()) {
                if (total > this.getChunkSize()) {
                    log.warn("Created a chunk of size {}, which is longer than the specified {}", (Object)total, (Object)this.getChunkSize());
                }
                if (!((Collection)currentDoc).isEmpty()) {
                    String doc = String.join((CharSequence)separator, currentDoc);
                    if (doc != null) {
                        docs.add(doc);
                    }
                    while (total > this.getChunkOverlap() || total + length + (!((Collection)currentDoc).isEmpty() ? separatorLength : 0) > this.getChunkSize() && total > 0) {
                        total -= ((String)currentDoc.get(0)).length() + (currentDoc.size() > 1 ? separatorLength : 0);
                        currentDoc.remove(0);
                    }
                }
            }
            currentDoc.add(d);
            total += length + (currentDoc.size() > 1 ? separatorLength : 0);
        }
        String doc = String.join((CharSequence)separator, currentDoc);
        if (doc != null) {
            docs.add(doc);
        }
        return docs;
    }

    @Metadata(mv={1, 9, 0}, k=1, xi=48, d1={"\u0000\u0014\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0002\b\u0086\u0003\u0018\u00002\u00020\u0001B\u0007\b\u0002\u00a2\u0006\u0002\u0010\u0002R\u0016\u0010\u0003\u001a\n \u0005*\u0004\u0018\u00010\u00040\u0004X\u0082\u0004\u00a2\u0006\u0002\n\u0000\u00a8\u0006\u0006"}, d2={"Lcc/unitmesh/rag/splitter/CharacterTextSplitter$Companion;", "", "()V", "log", "Lorg/slf4j/Logger;", "kotlin.jvm.PlatformType", "cocoa-core"})
    public static final class Companion {
        private Companion() {
        }

        public /* synthetic */ Companion(DefaultConstructorMarker $constructor_marker) {
            this();
        }
    }
}

