package io.annot8.components.gazetteers.processors;

import io.annot8.api.annotations.Annotation;
import io.annot8.api.annotations.Group;
import io.annot8.api.capabilities.Capabilities;
import io.annot8.api.settings.Description;
import io.annot8.common.components.AbstractProcessorDescriptor;
import io.annot8.common.components.capabilities.SimpleCapabilities;
import io.annot8.common.data.bounds.SpanBounds;
import io.annot8.common.data.content.Text;
import io.annot8.components.base.processors.AbstractTextProcessor;
import io.annot8.components.gazetteers.processors.AhoCorasick.Settings;
import io.annot8.utils.text.PluralUtils;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.ahocorasick.trie.Emit;
import org.ahocorasick.trie.Trie;

/* loaded from: input_file:io/annot8/components/gazetteers/processors/AhoCorasick.class */
public abstract class AhoCorasick<S extends Settings> extends AbstractProcessorDescriptor<Processor, S> {

    /* loaded from: input_file:io/annot8/components/gazetteers/processors/AhoCorasick$Processor.class */
    public static class Processor extends AbstractTextProcessor {
        private final Trie trie;
        private final Gazetteer gazetteer;
        private final Settings settings;

        public Processor(Gazetteer gazetteer, Settings settings) {
            this.gazetteer = gazetteer;
            this.settings = settings;
            this.trie = buildTrie(gazetteer, settings);
        }

        protected Trie buildTrie(Gazetteer gazetteer, Settings settings) {
            Trie.TrieBuilder onlyWholeWords = Trie.builder().onlyWholeWords();
            if (!settings.isCaseSensitive()) {
                onlyWholeWords = onlyWholeWords.ignoreCase();
            }
            for (String str : gazetteer.getValues()) {
                onlyWholeWords = onlyWholeWords.addKeyword(str);
                if (settings.isPlurals()) {
                    onlyWholeWords = onlyWholeWords.addKeyword(PluralUtils.pluralise(str));
                }
            }
            return onlyWholeWords.build();
        }

        /* JADX INFO: Access modifiers changed from: protected */
        public void process(Text text) {
            TransformedString noopString = this.settings.isExactWhitespace() ? noopString((String) text.getData()) : normaliseString((String) text.getData());
            Collection<Emit> parseText = this.trie.parseText(noopString.getTransformedString());
            HashMap hashMap = new HashMap();
            for (Emit emit : parseText) {
                Annotation.Builder builder = (Annotation.Builder) text.getAnnotations().create().withBounds(new SpanBounds(noopString.getMapping().get(Integer.valueOf(emit.getStart())).intValue(), noopString.getMapping().get(Integer.valueOf(emit.getEnd() + 1)).intValue())).withType(this.settings.getType());
                if (this.settings.getSubType() != null) {
                    builder = (Annotation.Builder) builder.withProperty("subtype", this.settings.getSubType());
                }
                if (this.settings.isAdditionalData()) {
                    for (Map.Entry<String, Object> entry : this.gazetteer.getAdditionalData(emit.getKeyword()).entrySet()) {
                        builder = (Annotation.Builder) builder.withProperty(entry.getKey(), entry.getValue());
                    }
                }
                ((List) hashMap.computeIfAbsent(generateKey(this.gazetteer.getAliases(emit.getKeyword())), str -> {
                    return new ArrayList();
                })).add((Annotation) builder.save());
            }
            for (List list : hashMap.values()) {
                if (list.size() != 1) {
                    Group.Builder builder2 = (Group.Builder) text.getItem().getGroups().create().withType("group/aliases");
                    list.forEach(annotation -> {
                        builder2.withAnnotation("alias", annotation);
                    });
                    builder2.save();
                }
            }
        }

        private String generateKey(Collection<String> collection) {
            return (String) collection.stream().map((v0) -> {
                return v0.toLowerCase();
            }).sorted().collect(Collectors.joining("|"));
        }

        protected static TransformedString noopString(String str) {
            HashMap hashMap = new HashMap();
            for (int i = 0; i < str.length(); i++) {
                hashMap.put(Integer.valueOf(i), Integer.valueOf(i));
            }
            return new TransformedString(str, str, hashMap);
        }

        protected static TransformedString normaliseString(String str) {
            String str2 = str;
            StringBuilder sb = new StringBuilder();
            String str3 = "";
            HashMap hashMap = new HashMap();
            Integer num = 0;
            while (!str2.isEmpty()) {
                hashMap.put(Integer.valueOf(sb.length()), num);
                num = Integer.valueOf(num.intValue() + 1);
                String substring = str2.substring(0, 1);
                str2 = str2.substring(1);
                if (!substring.matches("\\h") || !str3.matches("\\h")) {
                    if (substring.matches("\\h")) {
                        substring = " ";
                    }
                    sb.append(substring);
                }
                str3 = substring;
            }
            hashMap.put(Integer.valueOf(sb.length()), num);
            return new TransformedString(str, sb.toString(), hashMap);
        }
    }

    /* loaded from: input_file:io/annot8/components/gazetteers/processors/AhoCorasick$Settings.class */
    public static class Settings implements io.annot8.api.settings.Settings {
        private boolean caseSensitive = false;
        private boolean exactWhitespace = false;
        private String type = "entity/unknown";
        private String subType = null;
        private boolean additionalData = false;
        private boolean plurals = true;

        public boolean validate() {
            return (this.type == null || this.type.isEmpty()) ? false : true;
        }

        @Description("Should comparisons be done case-sensitively?")
        public boolean isCaseSensitive() {
            return this.caseSensitive;
        }

        public void setCaseSensitive(boolean z) {
            this.caseSensitive = z;
        }

        @Description("Should whitespace in document be preserved?")
        public boolean isExactWhitespace() {
            return this.exactWhitespace;
        }

        public void setExactWhitespace(boolean z) {
            this.exactWhitespace = z;
        }

        @Description("The annotation type")
        public String getType() {
            return this.type;
        }

        public void setType(String str) {
            this.type = str;
        }

        @Description("The annotation subtype, or null")
        public String getSubType() {
            return this.subType;
        }

        public void setSubType(String str) {
            this.subType = str;
        }

        @Description("Should additional data be added to annotations as properties?")
        public boolean isAdditionalData() {
            return this.additionalData;
        }

        public void setAdditionalData(boolean z) {
            this.additionalData = z;
        }

        @Description("Should we accept plurals as matches?")
        public boolean isPlurals() {
            return this.plurals;
        }

        public void setPlurals(boolean z) {
            this.plurals = z;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:io/annot8/components/gazetteers/processors/AhoCorasick$TransformedString.class */
    public static class TransformedString {
        private final String original;
        private final String transformed;
        private final Map<Integer, Integer> map;

        public TransformedString(String str, String str2, Map<Integer, Integer> map) {
            this.original = str;
            this.transformed = str2;
            this.map = map;
        }

        public String getOriginalString() {
            return this.original;
        }

        public String getTransformedString() {
            return this.transformed;
        }

        public Map<Integer, Integer> getMapping() {
            return this.map;
        }
    }

    public Capabilities capabilities() {
        return new SimpleCapabilities.Builder().withProcessesContent(Text.class).withCreatesAnnotations(((Settings) getSettings()).getType(), SpanBounds.class).withCreatesGroups("aliases").build();
    }
}
