package com.mware.core.ingest.dataworker;

import com.google.common.base.Charsets;
import com.google.common.io.CharStreams;
import com.mware.core.model.properties.BcSchema;
import com.mware.core.model.schema.Concept;
import com.mware.core.model.termMention.TermMentionBuilder;
import com.mware.core.util.BcLogger;
import com.mware.core.util.BcLoggerFactory;
import com.mware.ge.Element;
import com.mware.ge.Property;
import com.mware.ge.Vertex;
import com.mware.ge.values.storable.TextValue;
import com.mware.ge.values.storable.Value;
import com.mware.ge.values.storable.Values;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:com/mware/core/ingest/dataworker/RegexDataWorker.class */
public abstract class RegexDataWorker extends DataWorker {
    private static final BcLogger LOGGER = BcLoggerFactory.getLogger(RegexDataWorker.class);
    private final Pattern pattern;

    public RegexDataWorker(String str) {
        this.pattern = Pattern.compile(str, 8);
    }

    protected abstract Concept getConcept();

    @Override // com.mware.core.ingest.dataworker.DataWorker
    public void prepare(DataWorkerPrepareData dataWorkerPrepareData) throws Exception {
        super.prepare(dataWorkerPrepareData);
        LOGGER.debug("Extractor prepared for entity type [%s] with regular expression: %s", getConcept().getName(), this.pattern.toString());
    }

    @Override // com.mware.core.ingest.dataworker.DataWorker
    public void execute(InputStream inputStream, DataWorkerData dataWorkerData) throws Exception {
        LOGGER.debug("Extracting pattern [%s] from provided text", this.pattern);
        Matcher matcher = this.pattern.matcher(CharStreams.toString(new InputStreamReader(inputStream, Charsets.UTF_8)));
        Vertex vertex = (Vertex) dataWorkerData.getElement();
        ArrayList arrayList = new ArrayList();
        while (matcher.find()) {
            arrayList.add(new TermMentionBuilder().outVertex(vertex).propertyKey(dataWorkerData.getProperty().getKey()).propertyName(dataWorkerData.getProperty().getName()).start(matcher.start()).end(matcher.end()).title(matcher.group()).conceptName(getConcept().getName()).visibilityJson(dataWorkerData.getElementVisibilityJson()).process(getClass().getName()).save(getGraph(), getVisibilityTranslator(), getUser(), getAuthorizations()));
        }
        applyTermMentionFilters(vertex, arrayList);
        pushTextUpdated(dataWorkerData);
    }

    @Override // com.mware.core.ingest.dataworker.DataWorker
    public boolean isHandled(Element element, Property property) {
        if (property == null || property.getName().equals(BcSchema.RAW.getPropertyName())) {
            return false;
        }
        Value value = property.getMetadata().getValue(BcSchema.MIME_TYPE.getPropertyName());
        return !value.eq(Values.NO_VALUE) && ((TextValue) value).stringValue().startsWith("text");
    }
}
