/*
 * Decompiled with CFR 0.152.
 */
package com.mdfromhtml.markdown.transform;

import com.api.json.JSON;
import com.api.json.JSONArray;
import com.api.json.JSONArtifact;
import com.api.json.JSONObject;
import com.mdfromhtml.core.MDfromHTMLUtils;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.nio.file.FileSystems;
import java.nio.file.InvalidPathException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;

public class ExtractHTMLJSON {
    String _ext = "text";
    Path _inputPath = null;
    boolean _interactive = false;
    String _outputPath = ".";
    boolean _thumbsucker = false;
    String _filePrefix = "htmljson_";
    int _fileCounter = 1;
    List<String> _filters = new ArrayList<String>();
    Set<String> _processedURLs = new HashSet<String>();

    public static void main(String[] args) {
        int exitVal = 0;
        ExtractHTMLJSON pgm = new ExtractHTMLJSON();
        if (pgm.getParams(args)) {
            if (pgm._thumbsucker) {
                System.out.println("\nFiles ending with ." + pgm._ext + " will be read from " + pgm._inputPath + "\nand the generated htmljson files (.json) saved in " + pgm._outputPath);
            }
            if (pgm._thumbsucker) {
                System.out.println("\nFilter strings used to check html for bad pages:");
                for (String filter : pgm._filters) {
                    System.out.println(filter);
                }
                System.out.println();
            }
            if (pgm._interactive && MDfromHTMLUtils.prompt((String)"Press q to quit or press Enter to continue...").length() == 0) {
                pgm._interactive = false;
            }
            if (!pgm._interactive) {
                try {
                    Path file;
                    List files = MDfromHTMLUtils.listSourceFiles((Path)FileSystems.getDefault().getPath(pgm._inputPath.toString(), new String[0]), (String)pgm._ext);
                    Iterator iterator = files.iterator();
                    while (iterator.hasNext() && (exitVal = pgm.doWork(file = (Path)iterator.next())) == 0) {
                    }
                }
                catch (Exception e) {
                    System.out.println("Error: Can not reference files with extension " + pgm._ext + " in directory " + pgm._inputPath + " reason: " + e.getLocalizedMessage());
                    exitVal = -1;
                }
            }
            if (pgm._thumbsucker) {
                System.out.println();
            }
        } else {
            exitVal = -1;
        }
        if (pgm._thumbsucker) {
            System.out.println("Goodbye");
        }
        System.exit(exitVal);
    }

    int doWork(Path file) {
        int exitVal = 0;
        try {
            String fqFileName = file.toString();
            if (this._thumbsucker) {
                System.out.println("Processing: " + fqFileName);
            }
            BufferedReader br = MDfromHTMLUtils.openTextFile((String)fqFileName);
            StringBuffer sb = new StringBuffer();
            String line = br.readLine();
            int linenum = 0;
            while (line != null) {
                ++linenum;
                try {
                    if (line.startsWith("}")) {
                        sb.append(line);
                        sb.append("\n");
                        this.saveFile(sb.toString());
                        sb = new StringBuffer();
                    } else {
                        sb.append(line);
                        sb.append("\n");
                    }
                    line = br.readLine();
                }
                catch (OutOfMemoryError oome) {
                    sb = new StringBuffer();
                    System.out.println("Error reading line " + linenum);
                    line = br.readLine();
                    ++linenum;
                    while (line != null) {
                        if (line.startsWith("{")) {
                            sb.append(line);
                            sb.append("\n");
                            line = br.readLine();
                            ++linenum;
                            break;
                        }
                        line = br.readLine();
                        if (line.contains("\"url\":")) {
                            System.out.println("Skipping: " + line);
                        }
                        ++linenum;
                    }
                    System.out.println("Resuming at line " + linenum);
                }
            }
            if (sb.length() > 0) {
                try {
                    this.saveFile(sb.toString());
                }
                catch (Exception e) {
                    System.out.println("\n\nError: " + e.getLocalizedMessage() + "\n");
                    System.out.println(sb.toString());
                    System.out.println("\n\nEnd Error: " + e.getLocalizedMessage() + "\n");
                }
            }
            MDfromHTMLUtils.closeTextFile((BufferedReader)br);
        }
        catch (Exception e) {
            e.printStackTrace();
            exitVal = -1;
        }
        return exitVal;
    }

    void saveFile(String jsonContent) {
        String outputFileName = this._outputPath + this._filePrefix + MDfromHTMLUtils.padLeft((int)this._fileCounter++, (int)4, (char)'0') + ".json";
        JSONObject obj = null;
        try {
            JSONArtifact test = JSON.parse((String)jsonContent);
            if (!(test instanceof JSONObject)) {
                System.out.println("Error: got a non-JSONObject from parse: " + test);
                return;
            }
            obj = (JSONObject)test;
        }
        catch (IOException e) {
            System.out.println("Error: Can not transform to JSON: " + e.getLocalizedMessage() + "\n" + jsonContent);
            return;
        }
        catch (ClassCastException cce) {
            System.out.println("Error: Can not parse to JSON: " + cce.getLocalizedMessage() + "\n" + jsonContent);
            return;
        }
        try {
            if (!this.filterContent(obj)) {
                MDfromHTMLUtils.saveJSONFile((String)outputFileName, (JSONObject)obj);
            } else {
                MDfromHTMLUtils.saveJSONFile((String)(outputFileName + ".rejected"), (JSONObject)obj);
            }
        }
        catch (Exception e) {
            System.out.println("Can not save file " + outputFileName + "  Error: " + e.getLocalizedMessage());
        }
    }

    boolean filterContent(JSONObject jsonObj) {
        boolean result = true;
        if (jsonObj == null) {
            return result;
        }
        JSONArray rejectedURLs = new JSONArray();
        JSONArray captureArray = (JSONArray)jsonObj.get((Object)"captureArray");
        if (captureArray != null) {
            JSONObject htmlObj = new JSONObject();
            Iterator it = captureArray.iterator();
            while (it.hasNext()) {
                htmlObj = (JSONObject)it.next();
                String url = (String)htmlObj.get((Object)"url");
                if (url.endsWith("/")) {
                    url = url.substring(0, url.length() - 1);
                }
                if (this._processedURLs.contains(url)) {
                    JSONObject rejected = new JSONObject();
                    rejected.put("url", (Object)url);
                    rejected.put("reason", (Object)"duplicate url");
                    rejectedURLs.add((Object)rejected);
                    it.remove();
                    continue;
                }
                String html = (String)htmlObj.get((Object)"html");
                if (html != null && html.toLowerCase().indexOf("<body") == -1) {
                    JSONObject rejected = new JSONObject();
                    rejected.put("url", (Object)url);
                    rejected.put("reason", (Object)"no <body tag in html");
                    rejectedURLs.add((Object)rejected);
                    it.remove();
                    continue;
                }
                String content = (String)htmlObj.get((Object)"content");
                boolean filterIt = false;
                if (url != null && html != null) {
                    this._processedURLs.add(url);
                    html = html.toLowerCase();
                    if (content == null) {
                        content = "";
                    }
                    content = content.toLowerCase();
                    for (String filter : this._filters) {
                        JSONObject rejected;
                        String reason;
                        if (html.contains(filter)) {
                            filterIt = true;
                            reason = "Filter: \"" + filter + "\" found in HTML";
                            System.out.println(reason + " for URL " + url);
                            rejected = new JSONObject();
                            rejected.put("url", (Object)url);
                            rejected.put("reason", (Object)reason);
                            rejectedURLs.add((Object)rejected);
                            break;
                        }
                        if (!content.contains(filter)) continue;
                        filterIt = true;
                        reason = "Filter: \"" + filter + "\" found in Content";
                        System.out.println(reason + " for URL " + url);
                        rejected = new JSONObject();
                        rejected.put("url", (Object)url);
                        rejected.put("reason", (Object)reason);
                        rejectedURLs.add((Object)rejected);
                        break;
                    }
                    if (!filterIt) continue;
                    it.remove();
                    continue;
                }
                String reason = "Filter: Content at " + this._fileCounter + " does not have a url or html elements.";
                JSONObject rejected = new JSONObject();
                rejected.put("reason", (Object)reason);
                rejectedURLs.add((Object)rejected);
                System.out.println(reason);
            }
            result = captureArray.size() <= 0;
        } else {
            String reason = "Filter: Content at " + this._fileCounter + " does not have a captureArray. message: " + jsonObj.get((Object)"message");
            JSONObject rejected = new JSONObject();
            rejected.put("reason", (Object)reason);
            rejectedURLs.add((Object)rejected);
            System.out.println(reason);
        }
        if (rejectedURLs.size() > 0) {
            jsonObj.put("rejected", (Object)rejectedURLs);
        }
        return result;
    }

    boolean getParams(String[] args) {
        File testOutput;
        String inputPath = "./data/";
        String outputPath = "./data/htmljson";
        String tmp = "";
        try {
            if (args.length >= 1) {
                inputPath = args[0];
            } else {
                this._interactive = true;
                this._thumbsucker = true;
                tmp = MDfromHTMLUtils.prompt((String)("Enter the fully qualified path to directory containing " + this._ext + " html capture files, or q to exit (" + inputPath + "):"));
                if (tmp == null || tmp.length() == 0) {
                    tmp = inputPath;
                }
                if (tmp.toLowerCase().equals("q")) {
                    return false;
                }
                inputPath = tmp;
            }
            if (!inputPath.endsWith(File.separator)) {
                inputPath = inputPath + File.separator;
            }
            this._inputPath = FileSystems.getDefault().getPath(inputPath, new String[0]);
        }
        catch (InvalidPathException ipe) {
            System.out.println("Error: " + args[0] + " is not a valid directory to form a path.");
            return false;
        }
        if (args.length >= 2) {
            outputPath = args[1];
        } else {
            this._interactive = true;
            this._thumbsucker = true;
            tmp = MDfromHTMLUtils.prompt((String)("Enter the fully qualified path to the htmljson output directory, or q to exit (" + outputPath + "):"));
            if (tmp == null || tmp.length() == 0) {
                tmp = outputPath;
            }
            if (tmp.toLowerCase().equals("q")) {
                return false;
            }
            outputPath = tmp;
        }
        if (!outputPath.endsWith(File.separator)) {
            outputPath = outputPath + File.separator;
        }
        if (!(testOutput = new File(outputPath)).exists()) {
            System.out.println("Error: The output directory \"" + outputPath + "\" must exist.");
            return false;
        }
        if (!testOutput.isDirectory()) {
            System.out.println("Error: The output directory \"" + outputPath + "\" must be a directory.");
            return false;
        }
        this._outputPath = outputPath;
        if (args.length >= 3) {
            outputPath = args[2];
        } else {
            tmp = MDfromHTMLUtils.prompt((String)("Enter the starting file suffix or q to quit (" + this._fileCounter + "):"));
            if (tmp.length() == 0) {
                tmp = "" + this._fileCounter;
            }
            if ("q".equalsIgnoreCase(tmp)) {
                return false;
            }
            try {
                int test = new Integer(tmp);
                if (test < 1) {
                    System.out.println("File suffix must be a positive number.");
                    return false;
                }
                this._fileCounter = test;
            }
            catch (NumberFormatException nfe) {
                System.out.println("File suffix must be a positive number. Got \"" + tmp + "\"");
                return false;
            }
        }
        if (args.length >= 4) {
            this._thumbsucker = new Boolean(args[3]);
        }
        try {
            this._filters = MDfromHTMLUtils.loadTextFile((String)(this._inputPath + File.separator + "RejectStrings.txt"));
            ArrayList<String> newFilters = new ArrayList<String>();
            for (String filter : this._filters) {
                if (filter.startsWith("#") || (filter = filter.trim()).length() == 0) continue;
                newFilters.add(filter.toLowerCase());
            }
            this._filters = newFilters;
        }
        catch (Exception e) {
            e.printStackTrace();
            return false;
        }
        return true;
    }
}

