/*
 * Decompiled with CFR 0.152.
 */
package org.datacleaner.beans.stringpattern;

import java.io.Serializable;
import java.text.DecimalFormatSymbols;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import javax.inject.Named;
import org.datacleaner.api.Analyzer;
import org.datacleaner.api.AnalyzerResult;
import org.datacleaner.api.ColumnProperty;
import org.datacleaner.api.Concurrent;
import org.datacleaner.api.Configured;
import org.datacleaner.api.Description;
import org.datacleaner.api.ExternalDocumentation;
import org.datacleaner.api.Initialize;
import org.datacleaner.api.InputColumn;
import org.datacleaner.api.InputRow;
import org.datacleaner.api.Provided;
import org.datacleaner.beans.stringpattern.DefaultPatternFinder;
import org.datacleaner.beans.stringpattern.PatternFinderResult;
import org.datacleaner.beans.stringpattern.PredefinedTokenDefinition;
import org.datacleaner.beans.stringpattern.TokenPattern;
import org.datacleaner.beans.stringpattern.TokenType;
import org.datacleaner.beans.stringpattern.TokenizerConfiguration;
import org.datacleaner.result.AnnotatedRowsResult;
import org.datacleaner.result.Crosstab;
import org.datacleaner.result.CrosstabDimension;
import org.datacleaner.result.CrosstabNavigator;
import org.datacleaner.storage.RowAnnotation;
import org.datacleaner.storage.RowAnnotationFactory;
import org.datacleaner.util.NullTolerableComparator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Named(value="Pattern finder")
@Description(value="The Pattern Finder will inspect your String values and generate and match string patterns that suit your data.\nIt can be used for a lot of purposes but is excellent for verifying or getting ideas about the format of the string-values in a column.")
@ExternalDocumentation(value={@ExternalDocumentation.DocumentationLink(title="Kasper's Source: Pattern Finder 2.0", url="http://kasper.eobjects.org/2010/09/pattern-finder-20-latest-feature-in.html", type=ExternalDocumentation.DocumentationType.TECH, version="2.0")})
@Concurrent(value=true)
public class PatternFinderAnalyzer
implements Analyzer<PatternFinderResult> {
    private static final Logger logger = LoggerFactory.getLogger(PatternFinderAnalyzer.class);
    public static final String PROPERTY_COLUMN = "Column";
    public static final String PROPERTY_GROUP_COLUMN = "Group column";
    public static final String PROPERTY_DISCRIMINATE_TEXT_CASE = "Discriminate text case";
    public static final String PROPERTY_DISCRIMINATE_NEGATIVE_NUMBERS = "Discriminate negative numbers";
    public static final String PROPERTY_DISCRIMINATE_DECIMALS = "Discriminate decimals";
    public static final String PROPERTY_ENABLE_MIXED_TOKENS = "Enable mixed tokens";
    public static final String PROPERTY_IGNORE_REPEATED_SPACES = "Ignore repeated spaces";
    public static final String MEASURE_SAMPLE = "Sample";
    public static final String MEASURE_MATCH_COUNT = "Match count";
    public static final String DIMENSION_NAME_MEASURES = "Measures";
    public static final String DIMENSION_NAME_PATTERN = "Pattern";
    @Configured(order=1, value="Column")
    @ColumnProperty(escalateToMultipleJobs=true)
    InputColumn<String> column;
    @Configured(required=false, order=2, value="Group column")
    @Description(value="Optional column to group patterns by")
    InputColumn<String> groupColumn;
    @Configured(required=false, order=3, value="Discriminate text case")
    @Description(value="Separate text tokens based on case")
    boolean discriminateTextCase = true;
    @Configured(required=false, order=4, value="Discriminate negative numbers")
    @Description(value="Separate number tokens based on negativity")
    boolean discriminateNegativeNumbers = false;
    @Configured(required=false, order=5, value="Discriminate decimals")
    @Description(value="Separate number tokens for decimals")
    boolean discriminateDecimals = true;
    @Configured(required=false, order=6, value="Enable mixed tokens")
    @Description(value="Use '?'-tokens for mixed text and numbers")
    boolean enableMixedTokens = true;
    @Configured(required=false, order=7, value="Ignore repeated spaces")
    @Description(value="Ignore whitespace differences")
    boolean ignoreRepeatedSpaces = false;
    @Configured(required=false, value="Upper case patterns expand in size", order=8)
    @Description(value="Auto-adjust/expand uppercase text tokens")
    boolean upperCaseExpandable = false;
    @Configured(required=false, value="Lower case patterns expand in size", order=9)
    @Description(value="Auto-adjust/expand lowercase text tokens")
    boolean lowerCaseExpandable = true;
    @Configured(required=false, value="Predefined token name", order=10)
    String predefinedTokenName;
    @Configured(required=false, value="Predefined token regexes", order=11)
    String[] predefinedTokenPatterns;
    @Configured(required=false, order=12)
    Character decimalSeparator = Character.valueOf(DecimalFormatSymbols.getInstance().getDecimalSeparator());
    @Configured(required=false, order=13)
    Character thousandsSeparator = Character.valueOf(DecimalFormatSymbols.getInstance().getGroupingSeparator());
    @Configured(required=false, order=14)
    Character minusSign = Character.valueOf(DecimalFormatSymbols.getInstance().getMinusSign());
    private Map<String, DefaultPatternFinder> _patternFinders;
    private TokenizerConfiguration _configuration;
    @Provided
    RowAnnotationFactory _rowAnnotationFactory;

    @Initialize
    public void init() {
        this._configuration = new TokenizerConfiguration(this.enableMixedTokens);
        this._configuration.setUpperCaseExpandable(this.upperCaseExpandable);
        this._configuration.setLowerCaseExpandable(this.lowerCaseExpandable);
        this._configuration.setDiscriminateNegativeNumbers(this.discriminateNegativeNumbers);
        this._configuration.setDiscriminateDecimalNumbers(this.discriminateDecimals);
        this._configuration.setDiscriminateTextCase(this.discriminateTextCase);
        this._configuration.setDistriminateTokenLength(TokenType.WHITESPACE, !this.ignoreRepeatedSpaces);
        if (this.decimalSeparator != null) {
            this._configuration.setDecimalSeparator(this.decimalSeparator);
        }
        if (this.thousandsSeparator != null) {
            this._configuration.setThousandsSeparator(this.thousandsSeparator);
        }
        if (this.minusSign != null) {
            this._configuration.setMinusSign(this.minusSign);
        }
        if (this.predefinedTokenName != null && this.predefinedTokenPatterns != null) {
            HashSet<String> tokenRegexes = new HashSet<String>();
            for (String predefinedTokenPattern : this.predefinedTokenPatterns) {
                tokenRegexes.add(predefinedTokenPattern);
            }
            this._configuration.getPredefinedTokens().add(new PredefinedTokenDefinition(this.predefinedTokenName, tokenRegexes));
        }
        this._patternFinders = new HashMap<String, DefaultPatternFinder>();
    }

    public void run(InputRow row, int distinctCount) {
        String group = this.groupColumn == null ? null : (String)row.getValue(this.groupColumn);
        String value = (String)row.getValue(this.column);
        this.run(group, value, row, distinctCount);
    }

    private void run(String group, String value, InputRow row, int distinctCount) {
        DefaultPatternFinder patternFinder = this.getPatternFinderForGroup(group);
        patternFinder.run(row, value, distinctCount);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private DefaultPatternFinder getPatternFinderForGroup(String group) {
        DefaultPatternFinder patternFinder = this._patternFinders.get(group);
        if (patternFinder == null) {
            PatternFinderAnalyzer patternFinderAnalyzer = this;
            synchronized (patternFinderAnalyzer) {
                patternFinder = this._patternFinders.get(group);
                if (patternFinder == null) {
                    patternFinder = new DefaultPatternFinder(this._configuration, this._rowAnnotationFactory);
                    this._patternFinders.put(group, patternFinder);
                }
            }
        }
        return patternFinder;
    }

    public PatternFinderResult getResult() {
        if (this.groupColumn == null) {
            Crosstab<Serializable> crosstab = this.createCrosstab(this.getPatternFinderForGroup(null));
            return new PatternFinderResult(this.column, crosstab, this._configuration);
        }
        TreeMap crosstabs = new TreeMap(NullTolerableComparator.get(String.class));
        Set<Map.Entry<String, DefaultPatternFinder>> patternFinderEntries = this._patternFinders.entrySet();
        for (Map.Entry<String, DefaultPatternFinder> entry : patternFinderEntries) {
            DefaultPatternFinder patternFinder = entry.getValue();
            Crosstab<Serializable> crosstab = this.createCrosstab(patternFinder);
            crosstabs.put(entry.getKey(), crosstab);
        }
        if (logger.isInfoEnabled()) {
            logger.info("Grouped result contains {} groups", (Object)crosstabs.size());
        }
        return new PatternFinderResult(this.column, this.groupColumn, crosstabs, this._configuration);
    }

    public static Crosstab<Serializable> createCrosstab() {
        CrosstabDimension measuresDimension = new CrosstabDimension(DIMENSION_NAME_MEASURES);
        measuresDimension.addCategory(MEASURE_MATCH_COUNT);
        measuresDimension.addCategory(MEASURE_SAMPLE);
        CrosstabDimension patternDimension = new CrosstabDimension(DIMENSION_NAME_PATTERN);
        Crosstab crosstab = new Crosstab(Serializable.class, new CrosstabDimension[]{measuresDimension, patternDimension});
        return crosstab;
    }

    private Crosstab<Serializable> createCrosstab(DefaultPatternFinder patternFinder) {
        Crosstab<Serializable> crosstab = PatternFinderAnalyzer.createCrosstab();
        Set<Map.Entry<TokenPattern, RowAnnotation>> entrySet = patternFinder.getAnnotations().entrySet();
        TreeSet<Map.Entry<TokenPattern, RowAnnotation>> sortedEntrySet = new TreeSet<Map.Entry<TokenPattern, RowAnnotation>>(new Comparator<Map.Entry<TokenPattern, RowAnnotation>>(){

            @Override
            public int compare(Map.Entry<TokenPattern, RowAnnotation> o1, Map.Entry<TokenPattern, RowAnnotation> o2) {
                int result = o2.getValue().getRowCount() - o1.getValue().getRowCount();
                if (result == 0) {
                    result = o1.getKey().toSymbolicString().compareTo(o2.getKey().toSymbolicString());
                }
                return result;
            }
        });
        sortedEntrySet.addAll(entrySet);
        for (Map.Entry entry : sortedEntrySet) {
            TokenPattern pattern = (TokenPattern)entry.getKey();
            CrosstabNavigator nav = crosstab.navigate();
            nav.where(DIMENSION_NAME_PATTERN, pattern.toSymbolicString());
            nav.where(DIMENSION_NAME_MEASURES, MEASURE_MATCH_COUNT);
            RowAnnotation annotation = (RowAnnotation)entry.getValue();
            int size = annotation.getRowCount();
            nav.put((Serializable)Integer.valueOf(size), true);
            nav.attach((AnalyzerResult)AnnotatedRowsResult.createIfSampleRowsAvailable((RowAnnotation)annotation, (RowAnnotationFactory)this._rowAnnotationFactory, (InputColumn[])new InputColumn[]{this.column}));
            nav.where(DIMENSION_NAME_MEASURES, MEASURE_SAMPLE);
            nav.put((Serializable)((Object)pattern.getSampleString()), true);
        }
        return crosstab;
    }

    public void setRowAnnotationFactory(RowAnnotationFactory rowAnnotationFactory) {
        this._rowAnnotationFactory = rowAnnotationFactory;
    }

    public void setColumn(InputColumn<String> column) {
        this.column = column;
    }

    public void setPredefinedTokenName(String predefinedTokenName) {
        this.predefinedTokenName = predefinedTokenName;
    }

    public void setPredefinedTokenPatterns(String[] predefinedTokenPatterns) {
        this.predefinedTokenPatterns = predefinedTokenPatterns;
    }

    public void setDiscriminateTextCase(boolean discriminateTextCase) {
        this.discriminateTextCase = discriminateTextCase;
    }

    public void setDiscriminateNegativeNumbers(boolean discriminateNegativeNumbers) {
        this.discriminateNegativeNumbers = discriminateNegativeNumbers;
    }

    public void setDiscriminateDecimals(boolean discriminateDecimals) {
        this.discriminateDecimals = discriminateDecimals;
    }

    public void setEnableMixedTokens(boolean enableMixedTokens) {
        this.enableMixedTokens = enableMixedTokens;
    }

    public void setUpperCaseExpandable(boolean upperCaseExpandable) {
        this.upperCaseExpandable = upperCaseExpandable;
    }

    public void setLowerCaseExpandable(boolean lowerCaseExpandable) {
        this.lowerCaseExpandable = lowerCaseExpandable;
    }

    public void setDecimalSeparator(Character decimalSeparator) {
        this.decimalSeparator = decimalSeparator;
    }

    public void setIgnoreRepeatedSpaces(boolean ignoreRepeatedSpaces) {
        this.ignoreRepeatedSpaces = ignoreRepeatedSpaces;
    }

    public void setMinusSign(Character minusSign) {
        this.minusSign = minusSign;
    }

    public void setThousandsSeparator(Character thousandsSeparator) {
        this.thousandsSeparator = thousandsSeparator;
    }

    public void setGroupColumn(InputColumn<String> groupColumn) {
        this.groupColumn = groupColumn;
    }
}

