package org.icij.datashare.text.nlp;

import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Optional;
import java.util.Properties;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.icij.datashare.function.ThrowingFunctions;
import org.icij.datashare.reflect.EnumTypeToken;
import org.icij.datashare.text.Document;
import org.icij.datashare.text.Language;
import org.icij.datashare.text.NamedEntity;

/* loaded from: input_file:org/icij/datashare/text/nlp/Pipeline.class */
public interface Pipeline {
    public static final Charset DEFAULT_ENCODING = StandardCharsets.UTF_8;
    public static final List<NlpStage> DEFAULT_TARGET_STAGES = Collections.singletonList(NlpStage.NER);
    public static final List<NamedEntity.Category> DEFAULT_ENTITIES = Arrays.asList(NamedEntity.Category.PERSON, NamedEntity.Category.ORGANIZATION, NamedEntity.Category.LOCATION);
    public static final boolean DEFAULT_CACHING = true;

    /* loaded from: input_file:org/icij/datashare/text/nlp/Pipeline$Property.class */
    public enum Property {
        STAGES,
        ENTITIES,
        CACHING,
        LANGUAGE,
        ENCODING;

        public static Function<List<NlpStage>, Function<List<NamedEntity.Category>, Function<Boolean, Properties>>> build = list -> {
            return list -> {
                return bool -> {
                    Properties properties = new Properties();
                    properties.setProperty(STAGES.getName(), ThrowingFunctions.joinComma.apply(list));
                    properties.setProperty(ENTITIES.getName(), ThrowingFunctions.joinComma.apply(list));
                    properties.setProperty(CACHING.getName(), String.valueOf(bool));
                    return properties;
                };
            };
        };

        public String getName() {
            return name().toLowerCase().replace('_', '-');
        }
    }

    /* loaded from: input_file:org/icij/datashare/text/nlp/Pipeline$Type.class */
    public enum Type implements EnumTypeToken {
        TEST(-1),
        CORENLP(0),
        GATENLP(1),
        IXAPIPE(2),
        MITIE(3),
        OPENNLP(4),
        EMAIL(5);

        private final String className = buildClassName(Pipeline.class, this);
        public final short code;
        public final int mask;

        Type(short s) {
            this.code = s;
            this.mask = 1 << s;
        }

        public static Type fromCode(int i) {
            for (Type type : values()) {
                if (type.code == i) {
                    return type;
                }
            }
            throw new IllegalArgumentException("cannot find code " + i);
        }

        @Override // org.icij.datashare.reflect.EnumTypeToken
        public String getClassName() {
            return this.className;
        }

        public static Type parse(String str) {
            return (Type) EnumTypeToken.parse(Type.class, str).orElseThrow(() -> {
                return new IllegalArgumentException("unknown pipeline type: " + str);
            });
        }

        public static Optional<Type> fromClassName(String str) {
            return EnumTypeToken.parseClassName(Pipeline.class, Type.class, str);
        }

        public static Set<Type> parseAll(String str) {
            return (str == null || str.isEmpty()) ? new HashSet() : (Set) Arrays.stream(str.split(",")).map(Type::valueOf).collect(Collectors.toSet());
        }
    }

    static Set<Type> set(Type... typeArr) {
        return new HashSet(Arrays.asList(typeArr));
    }

    Type getType();

    boolean initialize(Language language) throws InterruptedException;

    List<NamedEntity> process(Document document) throws InterruptedException;

    List<NamedEntity> process(Document document, int i, int i2) throws InterruptedException;

    void terminate(Language language) throws InterruptedException;

    boolean supports(NlpStage nlpStage, Language language);

    List<NamedEntity.Category> getTargetEntities();

    List<NlpStage> getStages();

    boolean isCaching();

    Charset getEncoding();

    Optional<String> getPosTagSet(Language language);
}
