package com.llmagent.data.document.splitter;

import com.llmagent.data.document.Document;
import com.llmagent.data.document.DocumentSplitter;
import com.llmagent.data.document.id.DocumentIdGenerator;
import com.llmagent.data.segment.TextSegment;
import com.llmagent.util.StringUtil;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;

/* loaded from: input_file:com/llmagent/data/document/splitter/RecursiveCharacterTextSplitter.class */
public class RecursiveCharacterTextSplitter implements DocumentSplitter {
    private List<String> separators;
    private int chunkSize;
    private int chunkOverlap;

    public RecursiveCharacterTextSplitter(int i, int i2) {
        this.separators = Arrays.asList("\n\n", "\n", "。", ".", " ", "");
        this.chunkSize = 500;
        this.chunkOverlap = 50;
        this.chunkSize = i;
        this.chunkOverlap = i2;
    }

    public RecursiveCharacterTextSplitter(List<String> list, int i, int i2) {
        this.separators = Arrays.asList("\n\n", "\n", "。", ".", " ", "");
        this.chunkSize = 500;
        this.chunkOverlap = 50;
        this.separators = list;
        this.chunkSize = i;
        this.chunkOverlap = i2;
    }

    @Override // com.llmagent.data.document.DocumentSplitter
    public List<TextSegment> split(Document document, DocumentIdGenerator documentIdGenerator) {
        if (document == null || StringUtil.noText(document.text())) {
            return Collections.emptyList();
        }
        List<String> splitText = splitText(document.text());
        ArrayList arrayList = new ArrayList(splitText.size());
        Iterator<String> it = splitText.iterator();
        while (it.hasNext()) {
            arrayList.add(new TextSegment(documentIdGenerator.generateId(document), it.next(), document.metadata()));
        }
        return arrayList;
    }

    public List<String> splitText(String str) {
        ArrayList arrayList = new ArrayList();
        String str2 = this.separators.get(this.separators.size() - 1);
        for (String str3 : this.separators) {
            if (str.contains(str3) || str3.isEmpty()) {
                str2 = str3;
                break;
            }
        }
        List<String> asList = Arrays.asList(str.split(str2));
        ArrayList arrayList2 = new ArrayList();
        for (String str4 : asList) {
            if (str4.length() < this.chunkSize) {
                arrayList2.add(str4);
            } else {
                if (!arrayList2.isEmpty()) {
                    arrayList.addAll(mergeSplits(arrayList2, str2));
                    arrayList2.clear();
                }
                arrayList.addAll(splitText(str4));
            }
        }
        if (!arrayList2.isEmpty()) {
            arrayList.addAll(mergeSplits(arrayList2, str2));
        }
        return arrayList;
    }

    private List<String> mergeSplits(List<String> list, String str) {
        int length = str.length();
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        int i = 0;
        for (String str2 : list) {
            int length2 = str2.length();
            if (i + length2 + ((length <= 0 || arrayList2.isEmpty()) ? 0 : length) > this.chunkSize) {
                if (i > this.chunkSize) {
                    System.out.println("Warning: Created a chunk of size " + i + ", which is longer than the specified " + this.chunkSize);
                }
                if (!arrayList2.isEmpty()) {
                    String joinDocs = joinDocs(arrayList2, str);
                    if (joinDocs != null) {
                        arrayList.add(joinDocs);
                    }
                    while (true) {
                        if (i <= this.chunkOverlap) {
                            if (i + length2 + ((length <= 0 || arrayList2.isEmpty()) ? 0 : length) <= this.chunkSize) {
                                break;
                            }
                            if (i <= 0) {
                                break;
                            }
                        }
                        i -= arrayList2.get(0).length() + ((length <= 0 || arrayList2.size() <= 1) ? 0 : length);
                        arrayList2.remove(0);
                    }
                }
            }
            arrayList2.add(str2);
            i += length2 + ((length <= 0 || arrayList2.size() <= 1) ? 0 : length);
        }
        String joinDocs2 = joinDocs(arrayList2, str);
        if (joinDocs2 != null) {
            arrayList.add(joinDocs2);
        }
        return arrayList;
    }

    private String joinDocs(List<String> list, String str) {
        if (list.isEmpty()) {
            return null;
        }
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < list.size(); i++) {
            sb.append(list.get(i));
            if (i < list.size() - 1) {
                sb.append(str);
            }
        }
        return sb.toString();
    }
}
