/*
 * Decompiled with CFR 0.152.
 */
package org.eobjects.datacleaner.lucene;

import java.io.IOException;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.eobjects.analyzer.beans.api.Categorized;
import org.eobjects.analyzer.beans.api.Configured;
import org.eobjects.analyzer.beans.api.Convertable;
import org.eobjects.analyzer.beans.api.Description;
import org.eobjects.analyzer.beans.api.Initialize;
import org.eobjects.analyzer.beans.api.NumberProperty;
import org.eobjects.analyzer.beans.api.OutputColumns;
import org.eobjects.analyzer.beans.api.TransformerBean;
import org.eobjects.analyzer.beans.stringpattern.DefaultTokenizer;
import org.eobjects.analyzer.beans.stringpattern.Token;
import org.eobjects.analyzer.beans.stringpattern.TokenType;
import org.eobjects.analyzer.beans.stringpattern.TokenizerConfiguration;
import org.eobjects.analyzer.data.InputColumn;
import org.eobjects.analyzer.data.InputRow;
import org.eobjects.analyzer.util.StringUtils;
import org.eobjects.datacleaner.lucene.LuceneSearchCategory;
import org.eobjects.datacleaner.lucene.LuceneTransformer;
import org.eobjects.datacleaner.lucene.SearchIndex;
import org.eobjects.datacleaner.lucene.SearchIndexConverter;

@TransformerBean(value="Typo correction using search index")
@Description(value="Uses a search index field containing correct spellings of words to search/replace for typos and minor mistakes within strings.")
@Categorized(value={LuceneSearchCategory.class})
public class TypoCorrectionTransformer
implements LuceneTransformer<String> {
    @Configured
    @Description(value="Column containing search term(s) to fire.")
    InputColumn<String> searchInput;
    @Configured
    @Convertable(value=SearchIndexConverter.class)
    @Description(value="Search index to fire searches on.")
    SearchIndex searchIndex;
    @Configured
    @Description(value="Search field name")
    String searchField;
    @Configured
    @NumberProperty(negative=false)
    int fuzzFactor = 1;
    private DefaultTokenizer tokenizer;
    private IndexSearcher indexSearcher;

    public OutputColumns getOutputColumns() {
        return new OutputColumns(this.searchInput.getName() + " (typos corrected)", new String[0]);
    }

    @Initialize
    public void init() {
        this.indexSearcher = this.searchIndex.getSearcher();
        TokenizerConfiguration tokenizerConfiguration = new TokenizerConfiguration(false);
        tokenizerConfiguration.setDiscriminateTextCase(false);
        this.tokenizer = new DefaultTokenizer(tokenizerConfiguration);
    }

    public String[] transform(InputRow row) {
        String searchText = (String)row.getValue(this.searchInput);
        if (StringUtils.isNullOrEmpty((String)searchText)) {
            return new String[1];
        }
        StringBuilder result = new StringBuilder();
        List tokens = this.tokenizer.tokenize(searchText);
        int textTokens = 0;
        for (Token token : tokens) {
            if (token.getType() != TokenType.TEXT) continue;
            ++textTokens;
        }
        String fullResult = this.searchToken(searchText, textTokens * this.fuzzFactor);
        if (fullResult != null) {
            return new String[]{fullResult};
        }
        for (Token token : tokens) {
            String string = token.getString();
            if (token.getType() == TokenType.TEXT) {
                String outputToken = this.searchToken(string, this.fuzzFactor);
                if (outputToken == null || string.equalsIgnoreCase(outputToken)) {
                    result.append(string);
                    continue;
                }
                result.append(outputToken);
                continue;
            }
            result.append(string);
        }
        return new String[]{result.toString()};
    }

    private String searchToken(String inputToken, int fuzzFactor) {
        Document document;
        TopDocs searchResult;
        if (fuzzFactor > 2) {
            fuzzFactor = 2;
        }
        FuzzyQuery query = new FuzzyQuery(new Term(this.searchField, inputToken), fuzzFactor);
        try {
            searchResult = this.indexSearcher.search((Query)query, 1);
        }
        catch (IOException e) {
            throw new IllegalStateException("Searching index threw exception", e);
        }
        if (searchResult == null || searchResult.totalHits == 0) {
            return null;
        }
        ScoreDoc scoreDoc = searchResult.scoreDocs[0];
        try {
            document = this.indexSearcher.doc(scoreDoc.doc);
        }
        catch (Exception e) {
            throw new IllegalStateException("Fetching document from index threw exception", e);
        }
        return document.get(this.searchField);
    }
}

