package io.annot8.components.text.processors;

import io.annot8.api.capabilities.Capabilities;
import io.annot8.api.components.annotations.ComponentDescription;
import io.annot8.api.components.annotations.ComponentName;
import io.annot8.api.components.annotations.SettingsClass;
import io.annot8.api.context.Context;
import io.annot8.api.data.Content;
import io.annot8.api.data.Item;
import io.annot8.api.settings.Description;
import io.annot8.common.components.AbstractProcessorDescriptor;
import io.annot8.common.components.capabilities.SimpleCapabilities;
import io.annot8.common.data.content.Text;
import io.annot8.common.data.properties.EmptyImmutableProperties;
import io.annot8.components.base.text.processors.AbstractTextProcessor;

@ComponentName("Clean Text")
@ComponentDescription("Clean up Text content")
@SettingsClass(Settings.class)
/* loaded from: input_file:io/annot8/components/text/processors/Clean.class */
public class Clean extends AbstractProcessorDescriptor<Processor, Settings> {

    /* loaded from: input_file:io/annot8/components/text/processors/Clean$Processor.class */
    public static class Processor extends AbstractTextProcessor {
        private final Settings settings;
        private static final String TRIM_LINES = "(\\h+\\n\\h+|\\h+\\n|\\n\\h+)";
        private static final String SPLIT_LINES = "-\\n";
        private static final String SINGLE_NEW_LINES = "(?<=\\S)\\h*\\n\\h*(?=\\S)";
        private static final String REPEATED_NEW_LINES = "(\\h*\\n\\h*){2,}";
        private static final String REPEATED_TABS = "([ \t]+\t[ \t]+|[ \t]+\t|\t[ \t]+)";

        public Processor(Settings settings) {
            this.settings = settings;
        }

        /* JADX INFO: Access modifiers changed from: protected */
        public void process(Text text) {
            String replaceAll = ((String) text.getData()).replaceAll("\\r", "");
            if (this.settings.isTrim()) {
                replaceAll = replaceAll.strip();
            }
            if (this.settings.isTrimLines()) {
                replaceAll = replaceAll.replaceAll(TRIM_LINES, "\n");
            }
            if (this.settings.isReplaceSmartCharacters()) {
                replaceAll = replaceAll.replaceAll("[–—―]", "-").replaceAll("‗", "_").replaceAll("[‘’‛′]", "'").replaceAll("‚", ",").replaceAll("[“”„″]", "\"").replaceAll("…", "...");
            }
            if (this.settings.isRemoveSingleNewLines()) {
                replaceAll = replaceAll.replaceAll(SPLIT_LINES, "-").replaceAll(SINGLE_NEW_LINES, " ");
            }
            if (this.settings.isRemoveRepeatedNewLines()) {
                replaceAll = replaceAll.replaceAll(REPEATED_NEW_LINES, "\n\n");
            }
            if (this.settings.isRemoveRepeatedWhitespace()) {
                replaceAll = replaceAll.replaceAll(REPEATED_TABS, "\t").replaceAll(" {2,}", " ");
            }
            if (replaceAll.equalsIgnoreCase((String) text.getData())) {
                return;
            }
            Item item = text.getItem();
            ((Content.Builder) item.createContent(Text.class).withDescription("Cleaned content from " + text.getId()).withData(replaceAll).withProperties(this.settings.isCopyProperties() ? text.getProperties() : EmptyImmutableProperties.getInstance())).save();
            if (this.settings.isRemoveSourceContent()) {
                item.removeContent(text);
            }
        }
    }

    /* loaded from: input_file:io/annot8/components/text/processors/Clean$Settings.class */
    public static class Settings implements io.annot8.api.settings.Settings {
        private boolean removeSourceContent = false;
        private boolean trim = true;
        private boolean trimLines = true;
        private boolean removeSingleNewLines = true;
        private boolean removeRepeatedNewLines = true;
        private boolean replaceSmartCharacters = true;
        private boolean removeRepeatedWhitespace = true;
        private boolean copyProperties = true;

        public boolean validate() {
            return true;
        }

        @Description(value = "Should the source Content be removed after successful processing?", defaultValue = "true")
        public boolean isRemoveSourceContent() {
            return this.removeSourceContent;
        }

        public void setRemoveSourceContent(boolean z) {
            this.removeSourceContent = z;
        }

        @Description(value = "Should the text be trimmed to remove unnecessary whitespace at the start and end?", defaultValue = "true")
        public boolean isTrim() {
            return this.trim;
        }

        public void setTrim(boolean z) {
            this.trim = z;
        }

        @Description(value = "Should the lines be trimmed to remove unnecessary whitespace at the start and end of each line?", defaultValue = "true")
        public boolean isTrimLines() {
            return this.trimLines;
        }

        public void setTrimLines(boolean z) {
            this.trimLines = z;
        }

        @Description(value = "Should single new lines within text be removed?", defaultValue = "true")
        public boolean isRemoveSingleNewLines() {
            return this.removeSingleNewLines;
        }

        public void setRemoveSingleNewLines(boolean z) {
            this.removeSingleNewLines = z;
        }

        @Description(value = "Should repeated new lines within text be removed? This will reduce repeated new lines to 2 new lines.", defaultValue = "true")
        public boolean isRemoveRepeatedNewLines() {
            return this.removeRepeatedNewLines;
        }

        public void setRemoveRepeatedNewLines(boolean z) {
            this.removeRepeatedNewLines = z;
        }

        @Description(value = "Should smart characters (e.g. curly quotes) be replaced with their simpler representations?", defaultValue = "true")
        public boolean isReplaceSmartCharacters() {
            return this.replaceSmartCharacters;
        }

        public void setReplaceSmartCharacters(boolean z) {
            this.replaceSmartCharacters = z;
        }

        @Description(value = "Should we remove repeated white space characters, and replace with a single space or tab?", defaultValue = "true")
        public boolean isRemoveRepeatedWhitespace() {
            return this.removeRepeatedWhitespace;
        }

        public void setRemoveRepeatedWhitespace(boolean z) {
            this.removeRepeatedWhitespace = z;
        }

        @Description(value = "Should properties be copied from the source Content to the cleaned Content?", defaultValue = "true")
        public boolean isCopyProperties() {
            return this.copyProperties;
        }

        public void setCopyProperties(boolean z) {
            this.copyProperties = z;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Processor createComponent(Context context, Settings settings) {
        return new Processor(settings);
    }

    public Capabilities capabilities() {
        SimpleCapabilities.Builder withCreatesContent = new SimpleCapabilities.Builder().withProcessesContent(Text.class).withCreatesContent(Text.class);
        if (((Settings) getSettings()).isRemoveSourceContent()) {
            withCreatesContent = withCreatesContent.withDeletesContent(Text.class);
        }
        return withCreatesContent.build();
    }
}
