package com.gengoai.hermes.tools;

import com.gengoai.LogUtils;
import com.gengoai.application.Application;
import com.gengoai.application.Option;
import com.gengoai.hermes.AnnotatableType;
import com.gengoai.hermes.Document;
import com.gengoai.hermes.corpus.Corpus;
import com.gengoai.hermes.corpus.DocumentCollection;
import com.gengoai.hermes.corpus.SearchResults;
import com.gengoai.hermes.format.DocFormatParameters;
import com.gengoai.hermes.format.DocFormatProvider;
import com.gengoai.hermes.format.DocFormatService;
import com.gengoai.string.Strings;
import com.gengoai.string.TableFormatter;
import java.io.IOException;
import java.lang.invoke.SerializedLambda;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import java.util.Set;
import java.util.TreeSet;
import java.util.logging.Logger;

@Application.Description("===========================================================\n           Application for working with corpora.\n===========================================================\n                         Operations\n---------------------------------------------------------\nINFO -  Displays the number of documents and completed AnnotatableType for the corpus.\nQUERY - Queries the corpus with the given query returning the top 10 results.\nGET - Gets the given document (or a random one if *rnd* is given) in Json format.\nIMPORT - Imports the documents from the input document collection into the corpus.\nANNOTATE - Annotates the corpus with the given annotatable types.\nFORMATS - List the available document formats and their parameters.\nSPLIT - Assigns a random split with the given % as TRAIN and the remaining as TEST\n---------------------------------------------------------\n\n                     Command Line Arguments ")
/* loaded from: input_file:com/gengoai/hermes/tools/CorpusApp.class */
public class CorpusApp extends HermesCLI {
    private static final Logger log = Logger.getLogger(CorpusApp.class.getName());

    @Option(description = "The specification or location the corpus or document collection to process.", name = "docFormat", aliases = {"df"})
    private String documentCollectionSpec;

    @Option(description = "The specification or location to save the output of the processing.", name = "corpus", aliases = {"c"})
    private String corpusLocation;

    @Option(description = "Annotations to add", defaultValue = "Annotation.TOKEN,Annotation.SENTENCE,Attribute.PART_OF_SPEECH,Attribute.LEMMA,Relation.DEPENDENCY,Annotation.PHRASE_CHUNK,Annotation.ENTITY,Attribute.CATEGORY", aliases = {"t"})
    private String[] types;

    public static void main(String[] strArr) throws Exception {
        new CorpusApp().run(strArr);
    }

    private void corpusAnnotate() throws Exception {
        if (this.types == null) {
            LogUtils.logSevere(log, "No AnnotatableTypes Given!", new Object[0]);
            System.exit(-1);
        }
        Corpus corpus = getCorpus();
        try {
            corpus.annotate(stringToAnnotatableType());
            if (corpus != null) {
                corpus.close();
            }
        } catch (Throwable th) {
            if (corpus != null) {
                try {
                    corpus.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    private void corpusExport() throws Exception {
        Corpus corpus = getCorpus();
        try {
            corpus.export(this.documentCollectionSpec);
            if (corpus != null) {
                corpus.close();
            }
        } catch (Throwable th) {
            if (corpus != null) {
                try {
                    corpus.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    private void corpusGetDocument() throws Exception {
        String str;
        ensurePositionalArgument(1, "No Document Id Given!");
        if (getPositionalArgs()[1].equalsIgnoreCase("*rnd*")) {
            Random random = new Random();
            List ids = getCorpus().getIds();
            str = (String) ids.get(random.nextInt(ids.size()));
        } else {
            str = getPositionalArgs()[0];
        }
        Corpus corpus = getCorpus();
        try {
            Document document = corpus.getDocument(str);
            if (document == null) {
                System.err.println(str + " does not exists in the corpus.");
            } else {
                System.out.println(document.toJson());
            }
            if (corpus != null) {
                corpus.close();
            }
        } catch (Throwable th) {
            if (corpus != null) {
                try {
                    corpus.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    private void corpusInfo() throws Exception {
        Corpus corpus = getCorpus();
        try {
            long size = corpus.size();
            Set completed = corpus.getCompleted();
            System.out.println("                 Corpus Information");
            System.out.println("========================================================");
            System.out.println("Corpus: " + this.corpusLocation);
            System.out.println("# of Documents: " + size);
            System.out.println("========================================================");
            System.out.println("              Completed AnnotatableTypes");
            System.out.println("------------------------------------------------------");
            Iterator it = completed.iterator();
            while (it.hasNext()) {
                System.out.println((AnnotatableType) it.next());
            }
            System.out.println("========================================================");
            if (corpus != null) {
                corpus.close();
            }
        } catch (Throwable th) {
            if (corpus != null) {
                try {
                    corpus.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    private void corpusQuery() throws Exception {
        ensurePositionalArgument(1, "No Query Given!");
        Corpus corpus = getCorpus();
        try {
            SearchResults query = corpus.query(getPositionalArgs()[1]);
            System.out.println("                     Query Results");
            System.out.println("========================================================");
            System.out.println("Corpus: " + this.corpusLocation);
            System.out.println("Query: " + getPositionalArgs()[0]);
            System.out.println("Total hits: " + query.size());
            System.out.println("========================================================");
            System.out.println("                      Top 10 Results");
            System.out.println("------------------------------------------------------");
            getCorpus().query(getPositionalArgs()[0]).stream().limit(10L).forEach(document -> {
                System.out.print("Document ID: ");
                System.out.println(document.getId());
                System.out.print("Content: ");
                System.out.print(document.substring(0, Math.min(255, document.length())));
                System.out.println("...");
                System.out.println("===============");
            });
            System.out.println("========================================================");
            if (corpus != null) {
                corpus.close();
            }
        } catch (Throwable th) {
            if (corpus != null) {
                try {
                    corpus.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    private void corpusSplit() throws Exception {
        double parseDouble = Double.parseDouble(getPositionalArgs()[1]);
        Corpus corpus = getCorpus();
        try {
            corpus.assignRandomSplit(parseDouble);
            if (corpus != null) {
                corpus.close();
            }
        } catch (Throwable th) {
            if (corpus != null) {
                try {
                    corpus.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    private void ensurePositionalArgument(int i, String str) {
        if (getPositionalArgs().length <= i) {
            LogUtils.logSevere(log, str, new Object[0]);
            System.exit(-1);
        }
    }

    private Corpus getCorpus() throws IOException {
        if (Strings.isNullOrBlank(this.corpusLocation)) {
            LogUtils.logSevere(log, "No Corpus Specified!", new Object[0]);
            System.exit(-1);
        }
        return Corpus.open(this.corpusLocation);
    }

    private DocumentCollection getDocumentCollection() {
        if (Strings.isNullOrBlank(this.documentCollectionSpec)) {
            LogUtils.logSevere(log, "No Document Collection Specified!", new Object[0]);
            System.exit(-1);
        }
        return DocumentCollection.create(this.documentCollectionSpec);
    }

    private void importDocuments() throws Exception {
        DocumentCollection documentCollection = getDocumentCollection();
        try {
            Corpus corpus = getCorpus();
            try {
                corpus.addAll(getDocumentCollection());
                if (corpus != null) {
                    corpus.close();
                }
                if (documentCollection != null) {
                    documentCollection.close();
                }
            } finally {
            }
        } catch (Throwable th) {
            if (documentCollection != null) {
                try {
                    documentCollection.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    private void listFormats() throws Exception {
        System.out.println("                 Document Formats");
        System.out.println("========================================================");
        System.out.println();
        for (DocFormatProvider docFormatProvider : DocFormatService.getProviders()) {
            TableFormatter tableFormatter = new TableFormatter();
            tableFormatter.title(docFormatProvider.getName().toUpperCase());
            tableFormatter.header(Arrays.asList("ParameterName", "ParameterType"));
            DocFormatParameters defaultFormatParameters = docFormatProvider.getDefaultFormatParameters();
            Iterator it = new TreeSet(defaultFormatParameters.parameterNames()).iterator();
            while (it.hasNext()) {
                String str = (String) it.next();
                tableFormatter.content(Arrays.asList(str, defaultFormatParameters.getParam(str).type.getSimpleName()));
            }
            tableFormatter.print(System.out);
            System.out.println();
        }
        System.out.println("========================================================");
    }

    protected void programLogic() throws Exception {
        ensurePositionalArgument(0, "No Operation Given!");
        String str = getPositionalArgs()[0];
        String upperCase = str.toUpperCase();
        boolean z = -1;
        switch (upperCase.hashCode()) {
            case -2131466331:
                if (upperCase.equals("IMPORT")) {
                    z = 2;
                    break;
                }
                break;
            case 70454:
                if (upperCase.equals("GET")) {
                    z = 3;
                    break;
                }
                break;
            case 2251950:
                if (upperCase.equals("INFO")) {
                    z = false;
                    break;
                }
                break;
            case 40538940:
                if (upperCase.equals("FORMATS")) {
                    z = 4;
                    break;
                }
                break;
            case 77406376:
                if (upperCase.equals("QUERY")) {
                    z = true;
                    break;
                }
                break;
            case 79110906:
                if (upperCase.equals("SPLIT")) {
                    z = 6;
                    break;
                }
                break;
            case 132964684:
                if (upperCase.equals("ANNOTATE")) {
                    z = 5;
                    break;
                }
                break;
            case 2059143092:
                if (upperCase.equals("EXPORT")) {
                    z = 7;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                corpusInfo();
                return;
            case true:
                corpusQuery();
                return;
            case true:
                importDocuments();
                return;
            case true:
                corpusGetDocument();
                return;
            case true:
                listFormats();
                return;
            case true:
                corpusAnnotate();
                return;
            case true:
                corpusSplit();
                return;
            case true:
                corpusExport();
                return;
            default:
                LogUtils.logSevere(log, "Invalid Operation: {0}", new Object[]{str});
                return;
        }
    }

    private AnnotatableType[] stringToAnnotatableType() {
        AnnotatableType[] annotatableTypeArr = new AnnotatableType[this.types.length];
        for (int i = 0; i < this.types.length; i++) {
            annotatableTypeArr[i] = AnnotatableType.valueOf(this.types[i]);
        }
        return annotatableTypeArr;
    }

    private static /* synthetic */ Object $deserializeLambda$(SerializedLambda serializedLambda) {
        String implMethodName = serializedLambda.getImplMethodName();
        boolean z = -1;
        switch (implMethodName.hashCode()) {
            case 1617788906:
                if (implMethodName.equals("lambda$corpusQuery$2f364bb9$1")) {
                    z = false;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("com/gengoai/function/SerializableConsumer") && serializedLambda.getFunctionalInterfaceMethodName().equals("accept") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)V") && serializedLambda.getImplClass().equals("com/gengoai/hermes/tools/CorpusApp") && serializedLambda.getImplMethodSignature().equals("(Lcom/gengoai/hermes/Document;)V")) {
                    return document -> {
                        System.out.print("Document ID: ");
                        System.out.println(document.getId());
                        System.out.print("Content: ");
                        System.out.print(document.substring(0, Math.min(255, document.length())));
                        System.out.println("...");
                        System.out.println("===============");
                    };
                }
                break;
        }
        throw new IllegalArgumentException("Invalid lambda deserialization");
    }
}
