/*
 * Decompiled with CFR 0.152.
 */
package org.datacleaner.beans.transform;

import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.inject.Named;
import org.datacleaner.api.Categorized;
import org.datacleaner.api.Configured;
import org.datacleaner.api.Description;
import org.datacleaner.api.InputColumn;
import org.datacleaner.api.InputRow;
import org.datacleaner.api.OutputColumns;
import org.datacleaner.api.Transformer;
import org.datacleaner.components.categories.TextCategory;
import org.datacleaner.components.convert.ConvertToStringTransformer;

@Named(value="Remove substring")
@Description(value="Subtracts one or more substrings from a base text, i.e. [\"Hello world\",\"World\"] would yield \"Hello\".")
@Categorized(value={TextCategory.class})
public class RemoveSubstringTransformer
implements Transformer {
    @Configured(value="Base text column")
    @Description(value="Column containing the text to subtract from")
    InputColumn<String> baseColumn;
    @Configured(value="Substring columns")
    @Description(value="Columns containing the substrings to remove from the base text")
    InputColumn<?>[] substringColumns;
    @Configured(value="Match whole words only", required=false)
    @Description(value="If set, only whole words (surrounded by whitespace or punctuation) will be removed.\n This prevents removing partial words.")
    boolean wholeWordsOnly = false;
    @Configured
    @Description(value="Should substring matching be case-sensitive or not?")
    boolean caseSensitive = true;

    public OutputColumns getOutputColumns() {
        return new OutputColumns(String.class, this.baseColumn.getName() + " (substring removed)", new String[0]);
    }

    public String[] transform(InputRow inputRow) {
        String subtractedString = (String)inputRow.getValue(this.baseColumn);
        for (InputColumn<?> inputColumn : this.substringColumns) {
            Object value = inputRow.getValue(inputColumn);
            if (value instanceof List) {
                for (Object element : (List)value) {
                    subtractedString = this.subtract(subtractedString, element);
                }
                continue;
            }
            subtractedString = this.subtract(subtractedString, value);
        }
        return new String[]{subtractedString};
    }

    private String subtract(String subtractedString, Object element) {
        if (element == null || subtractedString == null) {
            return subtractedString;
        }
        String substring = this.caseSensitive ? ConvertToStringTransformer.transformValue((Object)element) : ConvertToStringTransformer.transformValue((Object)element).toLowerCase();
        String resultingString = subtractedString;
        if (this.caseSensitive && !this.wholeWordsOnly) {
            return resultingString.replace(substring, "");
        }
        String matchedString = this.caseSensitive ? resultingString : resultingString.toLowerCase();
        Pattern substringPattern = this.wholeWordsOnly ? Pattern.compile("\\b" + Pattern.quote(substring) + "\\b") : Pattern.compile(Pattern.quote(substring));
        Matcher matcher = substringPattern.matcher(matchedString);
        while (matcher.find()) {
            int start = matcher.start();
            int end = matcher.end();
            resultingString = resultingString.substring(0, start) + resultingString.substring(end);
            matchedString = this.caseSensitive ? resultingString : resultingString.toLowerCase();
            matcher = substringPattern.matcher(matchedString);
        }
        return resultingString;
    }
}

