/*
 * Decompiled with CFR 0.152.
 */
package org.forester.applications;

import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.forester.io.parsers.FastaParser;
import org.forester.sequence.MolecularSequence;
import org.forester.util.EasyWriter;
import org.forester.util.ForesterUtil;

public class aaa {
    public static final Pattern GN_PATTERN = Pattern.compile("GN=(\\S+)\\s");
    public static final Pattern RANGE_PATTERN = Pattern.compile("\\[(\\d+-\\d+)\\]");
    public static final int MIN_LENGTH = 85;

    /*
     * WARNING - void declaration
     */
    public static void main(String[] args) {
        try {
            EasyWriter out = (EasyWriter)ForesterUtil.createEasyWriter("aaa_out");
            System.out.println("STARTING...");
            ArrayList<MolecularSequence> too_short = new ArrayList<MolecularSequence>();
            List<MolecularSequence> orig = FastaParser.parse(new FileInputStream("C:\\Users\\zma\\Desktop\\RRMa_domains_ext_20_2.fasta"));
            int initial_number = orig.size();
            ArrayList<String> new_seqs = new ArrayList<String>();
            for (MolecularSequence seq : orig) {
                if (seq.getLength() < 85) {
                    too_short.add(seq);
                    continue;
                }
                Matcher matcher = GN_PATTERN.matcher(seq.getIdentifier());
                String gn = "";
                if (matcher.find()) {
                    gn = matcher.group(1);
                } else {
                    System.out.println("ERROR: no gene for: " + seq.getIdentifier());
                    System.exit(-1);
                }
                new_seqs.add(">" + gn + "|" + seq.getIdentifier() + "\n" + seq.getMolecularSequenceAsString());
            }
            HashSet<String> gn_ra_set = new HashSet<String>();
            HashSet<String> mol_seq_set = new HashSet<String>();
            Collections.sort(new_seqs);
            int unique_counter = 0;
            ArrayList<String> duplicate_gn_ra = new ArrayList<String>();
            ArrayList<String> duplicate_mol_seq = new ArrayList<String>();
            ArrayList<String> new_seqs_unique = new ArrayList<String>();
            for (String seq : new_seqs) {
                void var16_19;
                Matcher matcher_ra = RANGE_PATTERN.matcher(seq);
                Matcher matcher_gn = GN_PATTERN.matcher(seq);
                String string = "";
                if (matcher_ra.find()) {
                    String string2 = matcher_ra.group(1);
                } else {
                    System.out.println("ERROR: no range for: " + seq);
                    System.exit(-1);
                }
                matcher_gn.find();
                String gn = matcher_gn.group(1);
                String gn_ra = gn + "_" + (String)var16_19;
                if (!gn_ra_set.contains(gn_ra)) {
                    gn_ra_set.add(gn_ra);
                    String mol_seq = seq.split("\n")[1];
                    if (!mol_seq_set.contains(mol_seq)) {
                        mol_seq_set.add(mol_seq);
                        new_seqs_unique.add(seq);
                        ++unique_counter;
                        continue;
                    }
                    duplicate_mol_seq.add(seq);
                    continue;
                }
                duplicate_gn_ra.add(seq);
            }
            String prev_gn = "___";
            boolean is_first = true;
            ArrayList<String> seqs_from_same_protein = new ArrayList<String>();
            for (String string : new_seqs_unique) {
                Matcher matcher_gn = GN_PATTERN.matcher(string);
                matcher_gn.find();
                String gn = matcher_gn.group(1);
                if (!prev_gn.equals(gn) && !is_first) {
                    aaa.doit(seqs_from_same_protein, out);
                    seqs_from_same_protein = new ArrayList();
                }
                prev_gn = gn;
                is_first = false;
                seqs_from_same_protein.add(string);
            }
            aaa.doit(seqs_from_same_protein, out);
            out.println("");
            out.println("");
            out.println("Removed because same GN and region:");
            for (String string : duplicate_gn_ra) {
                out.println(string);
            }
            out.println("");
            out.println("");
            out.println("Removed because identical mol sequence:");
            for (String string : duplicate_mol_seq) {
                out.println(string);
            }
            out.println("");
            out.println("");
            out.println("Removed because too short:");
            for (MolecularSequence molecularSequence : too_short) {
                out.println(molecularSequence.toString());
            }
            out.println("");
            out.println("");
            out.println("initial:" + initial_number);
            out.println("ignored because shorter than 85aa: " + too_short.size());
            out.println("unique   : " + unique_counter);
            out.println("unique   : " + new_seqs_unique.size());
            out.println("duplicate because gn and range same: " + duplicate_gn_ra.size());
            out.println("duplicate because mol seq same     : " + duplicate_mol_seq.size());
            out.flush();
            out.close();
            System.out.println("DONE ");
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    private static void doit(List<String> same_protein_seqs, EasyWriter out) throws IOException {
        int count = same_protein_seqs.size();
        if (count == 1) {
            out.println(same_protein_seqs.get(0));
        } else {
            int c = 1;
            for (String s2 : same_protein_seqs) {
                out.println(new StringBuffer(s2).insert(s2.indexOf("|"), "__" + c + "_OF_" + count).toString());
                ++c;
            }
        }
    }
}

