/*
 * Decompiled with CFR 0.152.
 */
package com.knuddels.jtokkit;

import com.knuddels.jtokkit.GptBytePairEncoding;
import com.knuddels.jtokkit.api.Encoding;
import com.knuddels.jtokkit.api.GptBytePairEncodingParams;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;

final class EncodingFactory {
    private static final String ENDOFTEXT = "<|endoftext|>";
    private static final String FIM_PREFIX = "<|fim_prefix|>";
    private static final String FIM_MIDDLE = "<|fim_middle|>";
    private static final String FIM_SUFFIX = "<|fim_suffix|>";
    private static final String ENDOFPROMPT = "<|endofprompt|>";
    private static final Map<String, Integer> SPECIAL_TOKENS_X50K_BASE;
    private static final Map<String, Integer> SPECIAL_TOKENS_P50K_EDIT;
    private static final Map<String, Integer> SPECIAL_TOKENS_CL100K_BASE;

    public static Encoding r50kBase() {
        return EncodingFactory.fromPredefinedParameters("r50k_base", "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "/com/knuddels/jtokkit/r50k_base.tiktoken", SPECIAL_TOKENS_X50K_BASE);
    }

    public static Encoding p50kBase() {
        return EncodingFactory.fromPredefinedParameters("p50k_base", "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "/com/knuddels/jtokkit/p50k_base.tiktoken", SPECIAL_TOKENS_X50K_BASE);
    }

    public static Encoding p50kEdit() {
        return EncodingFactory.fromPredefinedParameters("p50k_edit", "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+", "/com/knuddels/jtokkit/p50k_base.tiktoken", SPECIAL_TOKENS_P50K_EDIT);
    }

    public static Encoding cl100kBase() {
        return EncodingFactory.fromPredefinedParameters("cl100k_base", "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+", "/com/knuddels/jtokkit/cl100k_base.tiktoken", SPECIAL_TOKENS_CL100K_BASE);
    }

    public static Encoding fromParameters(GptBytePairEncodingParams parameters2) {
        return new GptBytePairEncoding(parameters2);
    }

    private static Encoding fromPredefinedParameters(String name, String patternString, String fileName, Map<String, Integer> specialTokens) {
        Pattern regex;
        try {
            regex = Pattern.compile(patternString, 256);
        }
        catch (IllegalArgumentException exception) {
            regex = Pattern.compile(patternString);
        }
        GptBytePairEncodingParams params = new GptBytePairEncodingParams(name, regex, EncodingFactory.loadMergeableRanks(fileName), specialTokens);
        return EncodingFactory.fromParameters(params);
    }

    /*
     * Enabled aggressive block sorting
     * Enabled unnecessary exception pruning
     * Enabled aggressive exception aggregation
     */
    private static Map<byte[], Integer> loadMergeableRanks(String fileName) {
        try (InputStream in = EncodingFactory.class.getResourceAsStream(fileName);){
            String line;
            if (in == null) {
                throw new IllegalStateException("Could not find " + fileName + " in resources");
            }
            HashMap<byte[], Integer> mergeableRanks = new HashMap<byte[], Integer>();
            BufferedReader reader = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8));
            while ((line = reader.readLine()) != null) {
                String[] parts = line.split("\\s+", 2);
                if (parts.length != 2) {
                    throw new IllegalStateException("Invalid line in " + fileName + ": " + line);
                }
                byte[] token2 = Base64.getDecoder().decode(parts[0].getBytes(StandardCharsets.UTF_8));
                int rank = Integer.parseInt(parts[1]);
                mergeableRanks.put(token2, rank);
            }
            HashMap<byte[], Integer> hashMap = mergeableRanks;
            return hashMap;
        }
        catch (IOException e) {
            throw new IllegalStateException("Could not load " + fileName + " from resources", e);
        }
    }

    private EncodingFactory() {
    }

    static {
        HashMap<String, Integer> map2 = new HashMap<String, Integer>();
        map2.put(ENDOFTEXT, 50256);
        SPECIAL_TOKENS_X50K_BASE = Collections.unmodifiableMap(map2);
        map2 = new HashMap();
        map2.put(ENDOFTEXT, 50256);
        map2.put(FIM_PREFIX, 50281);
        map2.put(FIM_MIDDLE, 50282);
        map2.put(FIM_SUFFIX, 50283);
        SPECIAL_TOKENS_P50K_EDIT = Collections.unmodifiableMap(map2);
        map2 = new HashMap();
        map2.put(ENDOFTEXT, 100257);
        map2.put(FIM_PREFIX, 100258);
        map2.put(FIM_MIDDLE, 100259);
        map2.put(FIM_SUFFIX, 100260);
        map2.put(ENDOFPROMPT, 100276);
        SPECIAL_TOKENS_CL100K_BASE = Collections.unmodifiableMap(map2);
    }
}

