package io.annot8.components.opennlp.processors;

import io.annot8.api.annotations.Annotation;
import io.annot8.api.annotations.Group;
import io.annot8.api.capabilities.Capabilities;
import io.annot8.api.components.annotations.ComponentDescription;
import io.annot8.api.components.annotations.ComponentName;
import io.annot8.api.components.annotations.ComponentTags;
import io.annot8.api.components.annotations.SettingsClass;
import io.annot8.api.context.Context;
import io.annot8.api.exceptions.BadConfigurationException;
import io.annot8.api.settings.Description;
import io.annot8.common.components.AbstractProcessorDescriptor;
import io.annot8.common.components.capabilities.SimpleCapabilities;
import io.annot8.common.data.bounds.SpanBounds;
import io.annot8.common.data.content.Text;
import io.annot8.common.data.utils.SortUtils;
import io.annot8.components.base.text.processors.AbstractTextProcessor;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import opennlp.tools.chunker.ChunkerME;
import opennlp.tools.chunker.ChunkerModel;
import opennlp.tools.util.Span;

@ComponentDescription("Annotate phrase chunks identified by OpenNLP's chunker")
@ComponentTags({"opennlp", "phrases"})
@ComponentName("OpenNLP Phrase Chunks")
@SettingsClass(Settings.class)
/* loaded from: input_file:io/annot8/components/opennlp/processors/PhraseChunks.class */
public class PhraseChunks extends AbstractProcessorDescriptor<Processor, Settings> {

    /* loaded from: input_file:io/annot8/components/opennlp/processors/PhraseChunks$Processor.class */
    public static class Processor extends AbstractTextProcessor {
        private ChunkerME phraseChunker;
        private final Set<String> prepositions = Set.of((Object[]) new String[]{"about", "above", "across", "against", "amid", "around", "at", "atop", "behind", "below", "beneath", "beside", "between", "beyond", "by", "for", "from", "down", "in", "including", "inside", "into", "mid", "near", "of", "off", "on", "onto", "opposite", "out", "outside", "over", "round", "through", "throughout", "to", "under", "underneath", "with", "within", "without"});

        public Processor(InputStream inputStream) {
            try {
                this.phraseChunker = new ChunkerME(new ChunkerModel(inputStream));
            } catch (IOException e) {
                throw new BadConfigurationException("Cannot read Chunker model", e);
            }
        }

        /* JADX INFO: Access modifiers changed from: protected */
        public void process(Text text) {
            text.getAnnotations().getByBoundsAndType(SpanBounds.class, "grammar/sentence").forEach(annotation -> {
                SpanBounds bounds = annotation.getBounds();
                ArrayList<Annotation> arrayList = new ArrayList();
                Stream sorted = text.getBetween(bounds.getBegin(), bounds.getEnd()).filter(annotation -> {
                    return "grammar/wordToken".equals(annotation.getType());
                }).filter(annotation2 -> {
                    return annotation2.getBounds() instanceof SpanBounds;
                }).sorted(SortUtils.SORT_BY_SPANBOUNDS);
                Objects.requireNonNull(arrayList);
                sorted.forEach((v1) -> {
                    r1.add(v1);
                });
                String[] strArr = new String[arrayList.size()];
                String[] strArr2 = new String[arrayList.size()];
                int i = 0;
                for (Annotation annotation3 : arrayList) {
                    String str = (String) text.getText(annotation3).orElse("");
                    String str2 = (String) annotation3.getProperties().get("pos", String.class).orElse("UNK");
                    strArr[i] = str;
                    strArr2[i] = str2;
                    i++;
                }
                for (Span span : this.phraseChunker.chunkAsSpans(strArr, strArr2)) {
                    List list = (List) text.getBetween(((SpanBounds) ((Annotation) arrayList.get(span.getStart())).getBounds(SpanBounds.class).get()).getBegin(), ((SpanBounds) ((Annotation) arrayList.get(span.getEnd() - 1)).getBounds(SpanBounds.class).get()).getEnd()).filter(annotation4 -> {
                        return "grammar/wordToken".equals(annotation4.getType());
                    }).collect(Collectors.toList());
                    int size = list.size() - 1;
                    int size2 = list.size() - 2;
                    while (true) {
                        if (size2 <= 1) {
                            break;
                        }
                        String str3 = (String) ((Annotation) list.get(size2)).getProperties().get("pos", String.class).orElse("UNK");
                        String str4 = (String) text.getText((Annotation) list.get(size2)).orElse("");
                        if (!"IN".equals(str3) && !",".equals(str3) && !this.prepositions.contains(str4)) {
                            size = size2;
                            break;
                        } else {
                            size = size2 - 1;
                            size2--;
                        }
                    }
                    Group.Builder builder = (Group.Builder) ((Group.Builder) ((Group.Builder) text.getItem().getGroups().create().withType("group/grammar/phrase")).withProperty("probability", Double.valueOf(span.getProb()))).withProperty("subtype", span.getType());
                    int i2 = 0;
                    while (i2 < list.size()) {
                        builder = i2 == size ? builder.withAnnotation("grammar/head", (Annotation) list.get(i2)) : builder.withAnnotation("grammar/constituent", (Annotation) list.get(i2));
                        i2++;
                    }
                    builder.save();
                }
            });
        }
    }

    /* loaded from: input_file:io/annot8/components/opennlp/processors/PhraseChunks$Settings.class */
    public static class Settings implements io.annot8.api.settings.Settings {
        private File model;

        public boolean validate() {
            return true;
        }

        @Description("OpenNLP Phrase Chunk Model (or null to use default)")
        public File getModel() {
            return this.model;
        }

        public void setModel(File file) {
            this.model = file;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v6, types: [java.io.InputStream] */
    public Processor createComponent(Context context, Settings settings) {
        FileInputStream fileInputStream;
        if (settings.getModel() == null) {
            fileInputStream = POS.class.getResourceAsStream("en-chunker.bin");
        } else {
            try {
                fileInputStream = new FileInputStream(settings.getModel());
            } catch (IOException e) {
                throw new BadConfigurationException("Could not read Chunker model");
            }
        }
        return new Processor(fileInputStream);
    }

    public Capabilities capabilities() {
        return new SimpleCapabilities.Builder().withProcessesContent(Text.class).withProcessesAnnotations("grammar/sentence", SpanBounds.class).withProcessesAnnotations("grammar/wordToken", SpanBounds.class).withCreatesGroups("group/grammar/phrase").build();
    }
}
