package org.datacleaner.beans.stringpattern;

import java.io.Serializable;
import java.text.DecimalFormatSymbols;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import javax.inject.Named;
import org.datacleaner.api.Analyzer;
import org.datacleaner.api.ColumnProperty;
import org.datacleaner.api.Concurrent;
import org.datacleaner.api.Configured;
import org.datacleaner.api.Description;
import org.datacleaner.api.ExternalDocumentation;
import org.datacleaner.api.Initialize;
import org.datacleaner.api.InputColumn;
import org.datacleaner.api.InputRow;
import org.datacleaner.api.Provided;
import org.datacleaner.result.AnnotatedRowsResult;
import org.datacleaner.result.Crosstab;
import org.datacleaner.result.CrosstabDimension;
import org.datacleaner.result.CrosstabNavigator;
import org.datacleaner.storage.RowAnnotation;
import org.datacleaner.storage.RowAnnotationFactory;
import org.datacleaner.util.NullTolerableComparator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@ExternalDocumentation({@ExternalDocumentation.DocumentationLink(title = "Kasper's Source: Pattern Finder 2.0", url = "http://kasper.eobjects.org/2010/09/pattern-finder-20-latest-feature-in.html", type = ExternalDocumentation.DocumentationType.TECH, version = "2.0")})
@Named("Pattern finder")
@Description("The Pattern Finder will inspect your String values and generate and match string patterns that suit your data.\nIt can be used for a lot of purposes but is excellent for verifying or getting ideas about the format of the string-values in a column.")
@Concurrent(true)
/* loaded from: input_file:org/datacleaner/beans/stringpattern/PatternFinderAnalyzer.class */
public class PatternFinderAnalyzer implements Analyzer<PatternFinderResult> {
    private static final Logger logger = LoggerFactory.getLogger(PatternFinderAnalyzer.class);
    public static final String PROPERTY_COLUMN = "Column";
    public static final String PROPERTY_GROUP_COLUMN = "Group column";
    public static final String PROPERTY_DISCRIMINATE_TEXT_CASE = "Discriminate text case";
    public static final String PROPERTY_DISCRIMINATE_NEGATIVE_NUMBERS = "Discriminate negative numbers";
    public static final String PROPERTY_DISCRIMINATE_DECIMALS = "Discriminate decimals";
    public static final String PROPERTY_ENABLE_MIXED_TOKENS = "Enable mixed tokens";
    public static final String PROPERTY_IGNORE_REPEATED_SPACES = "Ignore repeated spaces";
    public static final String MEASURE_SAMPLE = "Sample";
    public static final String MEASURE_MATCH_COUNT = "Match count";
    public static final String DIMENSION_NAME_MEASURES = "Measures";
    public static final String DIMENSION_NAME_PATTERN = "Pattern";

    @ColumnProperty(escalateToMultipleJobs = true)
    @Configured(order = 1, value = PROPERTY_COLUMN)
    InputColumn<String> column;

    @Configured(required = false, order = 2, value = PROPERTY_GROUP_COLUMN)
    @Description("Optional column to group patterns by")
    InputColumn<String> groupColumn;

    @Configured(required = false, value = "Predefined token name", order = 10)
    String predefinedTokenName;

    @Configured(required = false, value = "Predefined token regexes", order = 11)
    String[] predefinedTokenPatterns;
    private Map<String, DefaultPatternFinder> _patternFinders;
    private TokenizerConfiguration _configuration;

    @Provided
    RowAnnotationFactory _rowAnnotationFactory;

    @Configured(required = false, order = 3, value = PROPERTY_DISCRIMINATE_TEXT_CASE)
    @Description("Separate text tokens based on case")
    Boolean discriminateTextCase = true;

    @Configured(required = false, order = 4, value = PROPERTY_DISCRIMINATE_NEGATIVE_NUMBERS)
    @Description("Separate number tokens based on negativity")
    Boolean discriminateNegativeNumbers = false;

    @Configured(required = false, order = 5, value = PROPERTY_DISCRIMINATE_DECIMALS)
    @Description("Separate number tokens for decimals")
    Boolean discriminateDecimals = true;

    @Configured(required = false, order = 6, value = PROPERTY_ENABLE_MIXED_TOKENS)
    @Description("Use '?'-tokens for mixed text and numbers")
    Boolean enableMixedTokens = true;

    @Configured(required = false, order = 7, value = PROPERTY_IGNORE_REPEATED_SPACES)
    @Description("Ignore whitespace differences")
    Boolean ignoreRepeatedSpaces = false;

    @Configured(required = false, value = "Upper case patterns expand in size", order = 8)
    @Description("Auto-adjust/expand uppercase text tokens")
    boolean upperCaseExpandable = false;

    @Configured(required = false, value = "Lower case patterns expand in size", order = 9)
    @Description("Auto-adjust/expand lowercase text tokens")
    boolean lowerCaseExpandable = true;

    @Configured(required = false, order = 12)
    Character decimalSeparator = Character.valueOf(DecimalFormatSymbols.getInstance().getDecimalSeparator());

    @Configured(required = false, order = 13)
    Character thousandsSeparator = Character.valueOf(DecimalFormatSymbols.getInstance().getGroupingSeparator());

    @Configured(required = false, order = 14)
    Character minusSign = Character.valueOf(DecimalFormatSymbols.getInstance().getMinusSign());

    @Initialize
    public void init() {
        if (this.enableMixedTokens != null) {
            this._configuration = new TokenizerConfiguration(this.enableMixedTokens.booleanValue());
        } else {
            this._configuration = new TokenizerConfiguration();
        }
        this._configuration.setUpperCaseExpandable(this.upperCaseExpandable);
        this._configuration.setLowerCaseExpandable(this.lowerCaseExpandable);
        if (this.discriminateNegativeNumbers != null) {
            this._configuration.setDiscriminateNegativeNumbers(this.discriminateNegativeNumbers.booleanValue());
        }
        if (this.discriminateDecimals != null) {
            this._configuration.setDiscriminateDecimalNumbers(this.discriminateDecimals.booleanValue());
        }
        if (this.discriminateTextCase != null) {
            this._configuration.setDiscriminateTextCase(this.discriminateTextCase.booleanValue());
        }
        if (this.ignoreRepeatedSpaces != null) {
            this._configuration.setDistriminateTokenLength(TokenType.WHITESPACE, !this.ignoreRepeatedSpaces.booleanValue());
        }
        if (this.decimalSeparator != null) {
            this._configuration.setDecimalSeparator(this.decimalSeparator);
        }
        if (this.thousandsSeparator != null) {
            this._configuration.setThousandsSeparator(this.thousandsSeparator);
        }
        if (this.minusSign != null) {
            this._configuration.setMinusSign(this.minusSign);
        }
        if (this.predefinedTokenName != null && this.predefinedTokenPatterns != null) {
            HashSet hashSet = new HashSet();
            for (String str : this.predefinedTokenPatterns) {
                hashSet.add(str);
            }
            this._configuration.getPredefinedTokens().add(new PredefinedTokenDefinition(this.predefinedTokenName, hashSet));
        }
        this._patternFinders = new HashMap();
    }

    public void run(InputRow inputRow, int i) {
        run(this.groupColumn == null ? null : (String) inputRow.getValue(this.groupColumn), (String) inputRow.getValue(this.column), inputRow, i);
    }

    private void run(String str, String str2, InputRow inputRow, int i) {
        getPatternFinderForGroup(str).run(inputRow, str2, i);
    }

    private DefaultPatternFinder getPatternFinderForGroup(String str) {
        DefaultPatternFinder defaultPatternFinder = this._patternFinders.get(str);
        if (defaultPatternFinder == null) {
            synchronized (this) {
                defaultPatternFinder = this._patternFinders.get(str);
                if (defaultPatternFinder == null) {
                    defaultPatternFinder = new DefaultPatternFinder(this._configuration, this._rowAnnotationFactory);
                    this._patternFinders.put(str, defaultPatternFinder);
                }
            }
        }
        return defaultPatternFinder;
    }

    /* renamed from: getResult, reason: merged with bridge method [inline-methods] */
    public PatternFinderResult m4getResult() {
        if (this.groupColumn == null) {
            return new PatternFinderResult(this.column, createCrosstab(getPatternFinderForGroup(null)), this._configuration);
        }
        TreeMap treeMap = new TreeMap(NullTolerableComparator.get(String.class));
        for (Map.Entry<String, DefaultPatternFinder> entry : this._patternFinders.entrySet()) {
            treeMap.put(entry.getKey(), createCrosstab(entry.getValue()));
        }
        if (logger.isInfoEnabled()) {
            logger.info("Grouped result contains {} groups", Integer.valueOf(treeMap.size()));
        }
        return new PatternFinderResult(this.column, this.groupColumn, treeMap, this._configuration);
    }

    public static Crosstab<Serializable> createCrosstab() {
        CrosstabDimension crosstabDimension = new CrosstabDimension(DIMENSION_NAME_MEASURES);
        crosstabDimension.addCategory(MEASURE_MATCH_COUNT);
        crosstabDimension.addCategory(MEASURE_SAMPLE);
        return new Crosstab<>(Serializable.class, new CrosstabDimension[]{crosstabDimension, new CrosstabDimension(DIMENSION_NAME_PATTERN)});
    }

    private Crosstab<Serializable> createCrosstab(DefaultPatternFinder defaultPatternFinder) {
        Crosstab<Serializable> createCrosstab = createCrosstab();
        Set<Map.Entry<TokenPattern, RowAnnotation>> entrySet = defaultPatternFinder.getAnnotations().entrySet();
        TreeSet<Map.Entry> treeSet = new TreeSet(new Comparator<Map.Entry<TokenPattern, RowAnnotation>>() { // from class: org.datacleaner.beans.stringpattern.PatternFinderAnalyzer.1
            @Override // java.util.Comparator
            public int compare(Map.Entry<TokenPattern, RowAnnotation> entry, Map.Entry<TokenPattern, RowAnnotation> entry2) {
                int rowCount = entry2.getValue().getRowCount() - entry.getValue().getRowCount();
                if (rowCount == 0) {
                    rowCount = entry.getKey().toSymbolicString().compareTo(entry2.getKey().toSymbolicString());
                }
                return rowCount;
            }
        });
        treeSet.addAll(entrySet);
        for (Map.Entry entry : treeSet) {
            TokenPattern tokenPattern = (TokenPattern) entry.getKey();
            CrosstabNavigator navigate = createCrosstab.navigate();
            navigate.where(DIMENSION_NAME_PATTERN, tokenPattern.toSymbolicString());
            navigate.where(DIMENSION_NAME_MEASURES, MEASURE_MATCH_COUNT);
            RowAnnotation rowAnnotation = (RowAnnotation) entry.getValue();
            navigate.put(Integer.valueOf(rowAnnotation.getRowCount()), true);
            navigate.attach(new AnnotatedRowsResult(rowAnnotation, this._rowAnnotationFactory, new InputColumn[]{this.column}));
            navigate.where(DIMENSION_NAME_MEASURES, MEASURE_SAMPLE);
            navigate.put(tokenPattern.getSampleString(), true);
        }
        return createCrosstab;
    }

    public void setRowAnnotationFactory(RowAnnotationFactory rowAnnotationFactory) {
        this._rowAnnotationFactory = rowAnnotationFactory;
    }

    public void setColumn(InputColumn<String> inputColumn) {
        this.column = inputColumn;
    }

    public void setPredefinedTokenName(String str) {
        this.predefinedTokenName = str;
    }

    public void setPredefinedTokenPatterns(String[] strArr) {
        this.predefinedTokenPatterns = strArr;
    }

    public void setDiscriminateTextCase(Boolean bool) {
        this.discriminateTextCase = bool;
    }

    public void setDiscriminateNegativeNumbers(Boolean bool) {
        this.discriminateNegativeNumbers = bool;
    }

    public void setDiscriminateDecimals(Boolean bool) {
        this.discriminateDecimals = bool;
    }

    public void setEnableMixedTokens(Boolean bool) {
        this.enableMixedTokens = bool;
    }

    public void setUpperCaseExpandable(boolean z) {
        this.upperCaseExpandable = z;
    }

    public void setLowerCaseExpandable(boolean z) {
        this.lowerCaseExpandable = z;
    }

    public void setDecimalSeparator(Character ch) {
        this.decimalSeparator = ch;
    }

    public void setIgnoreRepeatedSpaces(Boolean bool) {
        this.ignoreRepeatedSpaces = bool;
    }

    public void setMinusSign(Character ch) {
        this.minusSign = ch;
    }

    public void setThousandsSeparator(Character ch) {
        this.thousandsSeparator = ch;
    }

    public void setGroupColumn(InputColumn<String> inputColumn) {
        this.groupColumn = inputColumn;
    }
}
