package org.fbk.cit.hlt.core.lsa;

import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.io.PrintWriter;
import java.io.Reader;
import java.io.Writer;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.fbk.cit.hlt.core.analysis.stemmer.Stemmer;
import org.fbk.cit.hlt.thewikimachine.util.StringTable;

/* loaded from: input_file:org/fbk/cit/hlt/core/lsa/TermSet.class */
public class TermSet {
    public static final boolean DEFAULT_LOWERCASE = true;
    public static final int DEFAULT_COLUMN = 0;
    protected Set<String> set;
    int maxSize;
    int column;
    boolean lowercase;
    Stemmer stemmer;
    static Logger logger = Logger.getLogger(TermSet.class.getName());
    protected static Pattern tabPattern = Pattern.compile(StringTable.HORIZONTAL_TABULATION);

    public TermSet() {
        this(Integer.MAX_VALUE);
    }

    public TermSet(int i) {
        this.maxSize = i;
        this.lowercase = true;
        this.column = 0;
        this.set = new HashSet();
    }

    public Stemmer getStemmer() {
        return this.stemmer;
    }

    public void setStemmer(Stemmer stemmer) {
        this.stemmer = stemmer;
    }

    public boolean getLowercase() {
        return this.lowercase;
    }

    public void setLowercase(boolean z) {
        this.lowercase = z;
    }

    public int getColumn() {
        return this.column;
    }

    public void setColumn(int i) {
        this.column = i;
    }

    public int getMaxSize() {
        return this.maxSize;
    }

    public void setMaxSize(int i) {
        this.maxSize = i;
    }

    public boolean contains(String str) {
        return this.set.contains(str);
    }

    public int size() {
        return this.set.size();
    }

    public void read(Reader reader, Stemmer stemmer) throws IOException {
        this.stemmer = stemmer;
        read(reader);
    }

    public void read(Reader reader) throws IOException {
        logger.debug("reading term set...");
        LineNumberReader lineNumberReader = new LineNumberReader(reader);
        while (true) {
            String readLine = lineNumberReader.readLine();
            if (readLine == null || this.set.size() >= this.maxSize) {
                break;
            }
            String trim = readLine.trim();
            if (!trim.startsWith("#") && trim.length() > 0) {
                String[] split = tabPattern.split(trim);
                if (split.length > this.column) {
                    if (this.lowercase) {
                        split[this.column] = split[this.column].trim().toLowerCase();
                    }
                    if (this.stemmer != null) {
                        this.set.add(this.stemmer.stem(split[this.column]));
                    } else {
                        this.set.add(split[this.column]);
                    }
                }
            }
        }
        lineNumberReader.close();
        logger.debug(this.set.size() + " terms read");
    }

    public void write(Writer writer) throws IOException {
        logger.info("writing term set...");
        PrintWriter printWriter = new PrintWriter(writer);
        Iterator<String> it = this.set.iterator();
        while (it.hasNext()) {
            printWriter.println(it.next());
            printWriter.flush();
        }
        printWriter.close();
    }

    public String toString() {
        StringBuffer stringBuffer = new StringBuffer();
        Iterator<String> it = this.set.iterator();
        while (it.hasNext()) {
            stringBuffer.append(it.next());
            stringBuffer.append("\n");
        }
        return stringBuffer.toString();
    }

    public static void main(String[] strArr) throws Exception {
        String property = System.getProperty("log-config");
        if (property == null) {
            property = "log-config.txt";
        }
        System.currentTimeMillis();
        PropertyConfigurator.configure(property);
        if (strArr.length != 4) {
            System.out.println("Usage: java -mx1024M org.fbk.cit.hlt.core.lsa.TermSet file size column lowercase");
            System.exit(1);
        }
        TermSet termSet = new TermSet();
        termSet.setMaxSize(Integer.parseInt(strArr[1]));
        termSet.setColumn(Integer.parseInt(strArr[2]));
        termSet.setLowercase(Boolean.parseBoolean(strArr[3]));
        termSet.read(new InputStreamReader(new FileInputStream(strArr[0]), "UTF-8"));
        logger.info("set:\n" + termSet.toString());
        logger.info("size: " + termSet.size());
        logger.info("max size: " + termSet.getMaxSize());
        logger.info("column: " + termSet.getColumn());
        logger.info("lowercase: " + termSet.getLowercase());
    }
}
