package cn.yantu.util;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;

/* loaded from: input_file:cn/yantu/util/TextSplitter.class */
public class TextSplitter {
    private int chunkSize;
    private int chunkOverlap;
    private List<String> separators;
    private boolean stripWhitespace;

    /* loaded from: input_file:cn/yantu/util/TextSplitter$TextSplitterBuilder.class */
    public static class TextSplitterBuilder {
        private int chunkSize;
        private int chunkOverlap;
        private List<String> separators;
        private boolean stripWhitespace;

        TextSplitterBuilder() {
        }

        public TextSplitterBuilder chunkSize(int i) {
            this.chunkSize = i;
            return this;
        }

        public TextSplitterBuilder chunkOverlap(int i) {
            this.chunkOverlap = i;
            return this;
        }

        public TextSplitterBuilder separators(List<String> list) {
            this.separators = list;
            return this;
        }

        public TextSplitterBuilder stripWhitespace(boolean z) {
            this.stripWhitespace = z;
            return this;
        }

        public TextSplitter build() {
            return new TextSplitter(this.chunkSize, this.chunkOverlap, this.separators, this.stripWhitespace);
        }

        public String toString() {
            return "TextSplitter.TextSplitterBuilder(chunkSize=" + this.chunkSize + ", chunkOverlap=" + this.chunkOverlap + ", separators=" + this.separators + ", stripWhitespace=" + this.stripWhitespace + ")";
        }
    }

    public TextSplitter(int i, int i2, List<String> list, boolean z) {
        if (i2 > i) {
            throw new RuntimeException("chunkSize should be smaller than chunkOverlap!");
        }
        this.chunkSize = i;
        this.chunkOverlap = i2;
        this.separators = list;
        this.stripWhitespace = z;
    }

    public static List<String> split(String str, int i, int i2) {
        TextSplitter build = builder().chunkSize(i).chunkOverlap(i2).separators(Arrays.asList("\\n", "。", "")).stripWhitespace(true).build();
        return build.splitText(str, build.getSeparators());
    }

    public List<String> splitText(String str, List<String> list) {
        ArrayList arrayList = new ArrayList();
        String str2 = list.get(list.size() - 1);
        List<String> arrayList2 = new ArrayList();
        int i = 0;
        while (true) {
            if (i >= list.size()) {
                break;
            }
            String str3 = list.get(i);
            if (str3.isEmpty()) {
                str2 = str3;
                break;
            }
            if (str.contains(str3)) {
                str2 = str3;
                arrayList2 = list.subList(i + 1, list.size());
                break;
            }
            i++;
        }
        List<String> splitTextWithRegex = splitTextWithRegex(str, str2);
        ArrayList arrayList3 = new ArrayList();
        for (String str4 : splitTextWithRegex) {
            if (str4.length() < this.chunkSize) {
                arrayList3.add(str4);
            } else {
                if (!arrayList3.isEmpty()) {
                    arrayList.addAll(mergeSplits(arrayList3, ""));
                    arrayList3.clear();
                }
                if (arrayList2.isEmpty()) {
                    arrayList.add(str4);
                } else {
                    arrayList.addAll(splitText(str4, arrayList2));
                }
            }
        }
        if (!arrayList3.isEmpty()) {
            arrayList.addAll(mergeSplits(arrayList3, ""));
        }
        return arrayList;
    }

    private List<String> splitTextWithRegex(String str, String str2) {
        LinkedList linkedList = new LinkedList();
        if (str2.isEmpty()) {
            linkedList.addAll(Arrays.asList(str.split(str2)));
        } else {
            String[] split = str.split(str2);
            if (split.length > 0) {
                linkedList.add(split[0]);
            }
            for (int i = 1; i < split.length; i++) {
                linkedList.add(str2 + split[i]);
            }
        }
        linkedList.removeIf((v0) -> {
            return v0.isEmpty();
        });
        return linkedList;
    }

    private List<String> mergeSplits(List<String> list, String str) {
        int length = str.length();
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        int i = 0;
        for (String str2 : list) {
            int length2 = str2.length();
            int i2 = i + length2;
            if (!arrayList2.isEmpty()) {
                i2 += length;
            }
            if (i2 > this.chunkSize && !arrayList2.isEmpty()) {
                String joinDocs = joinDocs(arrayList2, str);
                if (joinDocs != null) {
                    arrayList.add(joinDocs);
                }
                int i3 = i + length2;
                if (!arrayList2.isEmpty()) {
                    i3 += length;
                }
                while (true) {
                    if (i <= this.chunkOverlap && (i3 <= this.chunkSize || i <= 0)) {
                        break;
                    }
                    i -= arrayList2.get(0).length();
                    if (arrayList2.size() > 1) {
                        i -= length;
                    }
                    arrayList2.remove(0);
                    i3 = i + length2;
                    if (!arrayList2.isEmpty()) {
                        i3 += length;
                    }
                }
            }
            arrayList2.add(str2);
            i += length2;
            if (arrayList2.size() > 1) {
                i += length;
            }
        }
        String joinDocs2 = joinDocs(arrayList2, str);
        if (joinDocs2 != null) {
            arrayList.add(joinDocs2);
        }
        return arrayList;
    }

    private String joinDocs(List<String> list, String str) {
        StringBuilder sb = new StringBuilder(list.get(0));
        for (int i = 1; i < list.size(); i++) {
            sb.append(str);
            sb.append(list.get(i));
        }
        String sb2 = sb.toString();
        if (this.stripWhitespace) {
            sb2 = sb2.strip();
        }
        if (sb2.isEmpty()) {
            return null;
        }
        return sb2;
    }

    public static TextSplitterBuilder builder() {
        return new TextSplitterBuilder();
    }

    public int getChunkSize() {
        return this.chunkSize;
    }

    public int getChunkOverlap() {
        return this.chunkOverlap;
    }

    public List<String> getSeparators() {
        return this.separators;
    }

    public boolean isStripWhitespace() {
        return this.stripWhitespace;
    }

    public void setChunkSize(int i) {
        this.chunkSize = i;
    }

    public void setChunkOverlap(int i) {
        this.chunkOverlap = i;
    }

    public void setSeparators(List<String> list) {
        this.separators = list;
    }

    public void setStripWhitespace(boolean z) {
        this.stripWhitespace = z;
    }

    public boolean equals(Object obj) {
        if (obj == this) {
            return true;
        }
        if (!(obj instanceof TextSplitter)) {
            return false;
        }
        TextSplitter textSplitter = (TextSplitter) obj;
        if (!textSplitter.canEqual(this) || getChunkSize() != textSplitter.getChunkSize() || getChunkOverlap() != textSplitter.getChunkOverlap() || isStripWhitespace() != textSplitter.isStripWhitespace()) {
            return false;
        }
        List<String> separators = getSeparators();
        List<String> separators2 = textSplitter.getSeparators();
        return separators == null ? separators2 == null : separators.equals(separators2);
    }

    protected boolean canEqual(Object obj) {
        return obj instanceof TextSplitter;
    }

    public int hashCode() {
        int chunkSize = (((((1 * 59) + getChunkSize()) * 59) + getChunkOverlap()) * 59) + (isStripWhitespace() ? 79 : 97);
        List<String> separators = getSeparators();
        return (chunkSize * 59) + (separators == null ? 43 : separators.hashCode());
    }

    public String toString() {
        return "TextSplitter(chunkSize=" + getChunkSize() + ", chunkOverlap=" + getChunkOverlap() + ", separators=" + getSeparators() + ", stripWhitespace=" + isStripWhitespace() + ")";
    }
}
