package org.icij.datashare.text;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonGetter;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import me.xuender.unidecode.Unidecode;
import org.icij.datashare.Entity;
import org.icij.datashare.function.ThrowingFunction;
import org.icij.datashare.function.ThrowingFunctions;
import org.icij.datashare.text.indexing.IndexId;
import org.icij.datashare.text.indexing.IndexParent;
import org.icij.datashare.text.indexing.IndexRoot;
import org.icij.datashare.text.indexing.IndexType;
import org.icij.datashare.text.nlp.Annotations;
import org.icij.datashare.text.nlp.NlpStage;
import org.icij.datashare.text.nlp.NlpTag;
import org.icij.datashare.text.nlp.Pipeline;

@IndexType("NamedEntity")
@JsonIgnoreProperties(ignoreUnknown = true)
/* loaded from: input_file:org/icij/datashare/text/NamedEntity.class */
public final class NamedEntity implements Entity {
    private static final long serialVersionUID = 1946532866377498L;
    private final String mention;
    private final String mentionNorm;

    @IndexId
    @JsonIgnore
    private final String id;
    private final Category category;

    @IndexParent
    @JsonIgnore
    private final String documentId;

    @IndexRoot
    @JsonIgnore
    private final String rootDocument;
    private final List<Long> offsets = new LinkedList();
    private final Pipeline.Type extractor;
    private final Language extractorLanguage;
    private final String partsOfSpeech;
    private Boolean hidden;

    /* loaded from: input_file:org/icij/datashare/text/NamedEntity$Category.class */
    public enum Category implements Serializable {
        PERSON("PERS"),
        ORGANIZATION("ORG"),
        LOCATION("LOC"),
        EMAIL("MAIL"),
        DATE("DATE"),
        MONEY("MON"),
        NUMBER("NUM"),
        NONE("NONE"),
        UNKNOWN("UNK");

        private static final long serialVersionUID = -1596432856473673L;
        private final String abbreviation;
        public static ThrowingFunction<List<String>, List<Category>> parseAll = list -> {
            return (List) list.stream().map(Category::parse).filter(category -> {
                return category != UNKNOWN;
            }).collect(Collectors.toList());
        };

        Category(String str) {
            this.abbreviation = str;
        }

        public String getAbbreviation() {
            return this.abbreviation;
        }

        public static Category parse(String str) {
            if (str == null || str.isEmpty() || str.trim().equals("0") || str.trim().equals("O")) {
                return NONE;
            }
            try {
                return valueOf(str.toUpperCase(Locale.ROOT));
            } catch (IllegalArgumentException e) {
                String apply = ThrowingFunctions.removePattFrom.apply("^I-").apply(str);
                for (Category category : values()) {
                    String abbreviation = category.getAbbreviation();
                    if (apply.equalsIgnoreCase(abbreviation) || apply.equalsIgnoreCase(abbreviation.substring(0, Math.min(abbreviation.length(), 3)))) {
                        return category;
                    }
                }
                return UNKNOWN;
            }
        }
    }

    public static NamedEntity create(Category category, String str, List<Long> list, String str2, String str3, Pipeline.Type type, Language language) {
        return new NamedEntity(category, str, list, str2, str3, type, language, false, null);
    }

    public static List<NamedEntity> allFrom(String str, Annotations annotations) {
        return (List) annotations.get(NlpStage.NER).stream().map(nlpTag -> {
            return from(str, nlpTag, annotations);
        }).filter(namedEntity -> {
            return namedEntity.category != Category.UNKNOWN;
        }).collect(Collectors.toList());
    }

    public static NamedEntity from(String str, NlpTag nlpTag, Annotations annotations) {
        String apply = ThrowingFunctions.removeNewLines.apply(str.substring(nlpTag.getBegin(), nlpTag.getEnd()));
        List<NlpTag> list = annotations.get(NlpStage.POS);
        int binarySearch = Collections.binarySearch(list, nlpTag, NlpTag.comparator);
        if (binarySearch > 0) {
            LOGGER.info(binarySearch + ", " + list.get(binarySearch));
        }
        return create(nlpTag.getCategory(), apply, Arrays.asList(Long.valueOf(nlpTag.getBegin())), annotations.documentId, annotations.rootId, annotations.pipelineType, annotations.language);
    }

    @JsonCreator
    private NamedEntity(@JsonProperty("category") Category category, @JsonProperty("mention") String str, @JsonProperty("offsets") List<Long> list, @JsonProperty("documentId") String str2, @JsonProperty("rootDocument") String str3, @JsonProperty("extractor") Pipeline.Type type, @JsonProperty("extractorLanguage") Language language, @JsonProperty("isHidden") Boolean bool, @JsonProperty("partOfSpeech") String str4) {
        if (str == null || str.isEmpty()) {
            throw new IllegalArgumentException("Mention is undefined");
        }
        this.mentionNorm = normalize(str);
        this.id = HASHER.hash(String.join("|", str2, String.valueOf(list), type.toString(), this.mentionNorm));
        this.category = (Category) Optional.ofNullable(category).orElse(Category.UNKNOWN);
        this.mention = str;
        this.documentId = str2;
        this.rootDocument = str3;
        this.offsets.addAll(list);
        this.extractor = type;
        this.extractorLanguage = language;
        this.hidden = bool;
        this.partsOfSpeech = str4;
    }

    @Override // org.icij.datashare.Entity
    @JsonIgnore
    public String getId() {
        return this.id;
    }

    public String getMention() {
        return this.mention;
    }

    public Category getCategory() {
        return this.category;
    }

    @JsonIgnore
    public String getDocumentId() {
        return this.documentId;
    }

    @JsonIgnore
    public String getRootDocument() {
        return this.rootDocument;
    }

    public int getMentionNormTextLength() {
        return this.mentionNorm.length();
    }

    public List<Long> getOffsets() {
        return this.offsets;
    }

    public Pipeline.Type getExtractor() {
        return this.extractor;
    }

    public Language getExtractorLanguage() {
        return this.extractorLanguage;
    }

    @JsonGetter("isHidden")
    public Boolean isHidden() {
        return this.hidden;
    }

    public NamedEntity hide() {
        this.hidden = true;
        return this;
    }

    public NamedEntity unhide() {
        this.hidden = false;
        return this;
    }

    public String getPartsOfSpeech() {
        return this.partsOfSpeech;
    }

    public String toString() {
        return "NamedEntity{mention='" + this.mention + "', id='" + this.id + "', category=" + this.category + ", offsets=" + this.offsets + "}";
    }

    public boolean equals(Object obj) {
        if (this == obj) {
            return true;
        }
        if (obj == null || getClass() != obj.getClass()) {
            return false;
        }
        return this.id.equals(((NamedEntity) obj).id);
    }

    public int hashCode() {
        return Objects.hash(this.id);
    }

    @JsonIgnore
    public static String normalize(String str) {
        return Unidecode.decode(str).trim().replaceAll("(\\s+)", " ").toLowerCase();
    }
}
