/*
 * Decompiled with CFR 0.152.
 */
package org.biojava.nbio.genome.homology;

import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.biojava.nbio.alignment.Alignments;
import org.biojava.nbio.alignment.SimpleGapPenalty;
import org.biojava.nbio.core.alignment.matrices.SimpleSubstitutionMatrix;
import org.biojava.nbio.core.alignment.template.SequencePair;
import org.biojava.nbio.core.sequence.CDSSequence;
import org.biojava.nbio.core.sequence.ChromosomeSequence;
import org.biojava.nbio.core.sequence.DNASequence;
import org.biojava.nbio.core.sequence.GeneSequence;
import org.biojava.nbio.core.sequence.ProteinSequence;
import org.biojava.nbio.core.sequence.Strand;
import org.biojava.nbio.core.sequence.TranscriptSequence;
import org.biojava.nbio.core.sequence.compound.AminoAcidCompound;
import org.biojava.nbio.core.sequence.compound.AminoAcidCompoundSet;
import org.biojava.nbio.core.sequence.features.DBReferenceInfo;
import org.biojava.nbio.core.sequence.features.DatabaseReferenceInterface;
import org.biojava.nbio.core.sequence.features.FeaturesKeyWordInterface;
import org.biojava.nbio.core.sequence.loader.UniprotProxySequenceReader;
import org.biojava.nbio.genome.GeneFeatureHelper;
import org.biojava.nbio.genome.homology.BlastHomologyHits;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class GFF3FromUniprotBlastHits {
    private static final Logger logger = LoggerFactory.getLogger(GFF3FromUniprotBlastHits.class);

    public void process(File xmlBlastHits, double ecutoff, LinkedHashMap<String, GeneSequence> geneSequenceHashMap, OutputStream gff3Output) throws Exception {
        LinkedHashMap<String, ArrayList<String>> hits = BlastHomologyHits.getMatches(xmlBlastHits, ecutoff);
        this.process(hits, geneSequenceHashMap, gff3Output);
    }

    public void process(LinkedHashMap<String, ArrayList<String>> hits, LinkedHashMap<String, GeneSequence> geneSequenceHashMap, OutputStream gff3Output) throws Exception {
        int size = hits.size();
        int index = 0;
        for (String accessionid : hits.keySet()) {
            if (++index == 12) {
                index = 12;
            }
            logger.error(accessionid + " " + index + "/" + size);
            try {
                String[] data = accessionid.split(" ");
                String id = data[0];
                GeneSequence geneSequence = geneSequenceHashMap.get(id);
                if (geneSequence == null) {
                    logger.error("Not found " + id);
                    continue;
                }
                ArrayList<String> uniprotProteinHits = hits.get(accessionid);
                String uniprotBestHit = uniprotProteinHits.get(0);
                UniprotProxySequenceReader<AminoAcidCompound> uniprotSequence = new UniprotProxySequenceReader<AminoAcidCompound>(uniprotBestHit, AminoAcidCompoundSet.getAminoAcidCompoundSet());
                ProteinSequence proteinSequence = new ProteinSequence(uniprotSequence);
                String hitSequence = proteinSequence.getSequenceAsString();
                for (TranscriptSequence transcriptSequence : geneSequence.getTranscripts().values()) {
                    String predictedProteinSequence = transcriptSequence.getProteinSequence().getSequenceAsString();
                    ArrayList<ProteinSequence> cdsProteinList = transcriptSequence.getProteinCDSSequences();
                    ArrayList<CDSSequence> cdsSequenceList = new ArrayList<CDSSequence>(transcriptSequence.getCDSSequences().values());
                    String testSequence = "";
                    for (ProteinSequence cdsProteinSequence : cdsProteinList) {
                        testSequence = testSequence + cdsProteinSequence.getSequenceAsString();
                    }
                    if (!testSequence.equals(predictedProteinSequence) && !predictedProteinSequence.equals(testSequence.substring(0, testSequence.length() - 1))) {
                        DNASequence codingSequence = transcriptSequence.getDNACodingSequence();
                        logger.info("Coding Sequence: {}", (Object)codingSequence.getSequenceAsString());
                        logger.info("Sequence agreement error");
                        logger.info("CDS seq={}", (Object)testSequence);
                        logger.info("PRE seq={}", (Object)predictedProteinSequence);
                        logger.info("UNI seq={}", (Object)hitSequence);
                    }
                    SequencePair<ProteinSequence, AminoAcidCompound> alignment = Alignments.getPairwiseAlignment(transcriptSequence.getProteinSequence(), proteinSequence, Alignments.PairwiseSequenceAlignerType.LOCAL, new SimpleGapPenalty(), SimpleSubstitutionMatrix.getBlosum62());
                    int proteinIndex = 0;
                    int gff3Index = 0;
                    for (int i = 0; i < cdsProteinList.size(); ++i) {
                        ProteinSequence peptideSequence = cdsProteinList.get(i);
                        String seq = peptideSequence.getSequenceAsString();
                        Integer startIndex = null;
                        int offsetStartIndex = 0;
                        for (int s2 = 0; s2 < seq.length(); ++s2) {
                            startIndex = alignment.getIndexInTargetForQueryAt(proteinIndex + s2);
                            if (startIndex == null) continue;
                            startIndex = startIndex + 1;
                            offsetStartIndex = s2;
                            break;
                        }
                        Integer endIndex = null;
                        int offsetEndIndex = 0;
                        for (int e = 0; e < seq.length(); ++e) {
                            endIndex = alignment.getIndexInTargetForQueryAt(proteinIndex + seq.length() - 1 - e);
                            if (endIndex == null) continue;
                            endIndex = endIndex + 1;
                            offsetEndIndex = e;
                            break;
                        }
                        proteinIndex += seq.length();
                        if (startIndex == null || endIndex == null || startIndex == endIndex) continue;
                        CDSSequence cdsSequence = cdsSequenceList.get(i);
                        String hitLabel = "";
                        hitLabel = transcriptSequence.getStrand() == Strand.POSITIVE ? uniprotBestHit + "_" + startIndex + "_" + endIndex : uniprotBestHit + "_" + endIndex + "_" + startIndex;
                        int dnaBeginIndex = cdsSequence.getBioBegin() + 3 * offsetStartIndex;
                        int dnaEndIndex = cdsSequence.getBioEnd() - 3 * offsetEndIndex;
                        String scaffold = geneSequence.getParentChromosomeSequence().getAccession().getID();
                        String line = scaffold + "\t" + geneSequence.getSource() + "_UNIPROT\tmatch\t" + dnaBeginIndex + "\t" + dnaEndIndex + "\t.\t" + transcriptSequence.getStrand().getStringRepresentation() + "\t.\t";
                        if (gff3Index == 0) {
                            DatabaseReferenceInterface databaseReferences;
                            List<String> keyWords;
                            FeaturesKeyWordInterface featureKeyWords = proteinSequence.getFeaturesKeyWord();
                            String notes = "";
                            if (featureKeyWords != null && (keyWords = featureKeyWords.getKeyWords()).size() > 0) {
                                notes = ";Note=";
                                for (String note : keyWords) {
                                    if (note.equals("Complete proteome") || note.equals("Direct protein sequencing")) continue;
                                    notes = notes + " " + note;
                                    geneSequence.addNote(note);
                                }
                            }
                            if ((databaseReferences = proteinSequence.getDatabaseReferences()) != null) {
                                Map<String, List<DBReferenceInfo>> databaseReferenceHashMap = databaseReferences.getDatabaseReferences();
                                List<DBReferenceInfo> pfamList = databaseReferenceHashMap.get("Pfam");
                                List<DBReferenceInfo> cazyList = databaseReferenceHashMap.get("CAZy");
                                List<DBReferenceInfo> goList = databaseReferenceHashMap.get("GO");
                                List<DBReferenceInfo> eccList = databaseReferenceHashMap.get("BRENDA");
                                if (pfamList != null && pfamList.size() > 0) {
                                    if (notes.length() == 0) {
                                        notes = ";Note=";
                                    }
                                    for (DBReferenceInfo note : pfamList) {
                                        notes = notes + " " + note.getId();
                                        geneSequence.addNote(note.getId());
                                    }
                                }
                                if (cazyList != null && cazyList.size() > 0) {
                                    if (notes.length() == 0) {
                                        notes = ";Note=";
                                    }
                                    for (DBReferenceInfo note : cazyList) {
                                        notes = notes + " " + note.getId();
                                        geneSequence.addNote(note.getId());
                                    }
                                }
                                if (eccList != null && eccList.size() > 0) {
                                    if (notes.length() == 0) {
                                        notes = ";Note=";
                                    }
                                    for (DBReferenceInfo note : eccList) {
                                        String dbid = note.getId();
                                        dbid = dbid.replace(".", "_");
                                        notes = notes + " EC:" + dbid;
                                        geneSequence.addNote("EC:" + dbid);
                                    }
                                }
                                if (goList != null && goList.size() > 0) {
                                    if (notes.length() == 0) {
                                        notes = ";Note=";
                                    }
                                    for (DBReferenceInfo note : goList) {
                                        notes = notes + " " + note.getId();
                                        geneSequence.addNote(note.getId());
                                        LinkedHashMap<String, String> properties = note.getProperties();
                                        for (String propertytype : properties.keySet()) {
                                            String property;
                                            if (propertytype.equals("evidence") || (property = properties.get(propertytype)).startsWith("C:")) continue;
                                            if (property.endsWith("...")) {
                                                property = property.substring(0, property.length() - 3);
                                            }
                                            notes = notes + " " + property;
                                            geneSequence.addNote(property);
                                        }
                                    }
                                }
                            }
                            line = line + "Name=" + hitLabel + ";Alias=" + uniprotBestHit + notes + "\n";
                        } else {
                            line = line + "Name=" + hitLabel + "\n";
                        }
                        ++gff3Index;
                        gff3Output.write(line.getBytes());
                    }
                }
            }
            catch (Exception e) {
                logger.info("Accession Id: {}", (Object)accessionid, (Object)e);
            }
        }
    }

    public static void main(String[] args) {
        try {
            LinkedHashMap<String, ChromosomeSequence> dnaSequenceHashMap = GeneFeatureHelper.loadFastaAddGeneFeaturesFromGlimmerGFF3(new File("/Users/Scooter/scripps/dyadic/analysis/454Scaffolds/454Scaffolds-16.fna"), new File("/Users/Scooter/scripps/dyadic/GlimmerHMM/c1_glimmerhmm-16.gff"));
            LinkedHashMap<String, GeneSequence> geneSequenceList = GeneFeatureHelper.getGeneSequences(dnaSequenceHashMap.values());
            FileOutputStream fo = new FileOutputStream("/Users/Scooter/scripps/dyadic/outputGlimmer/genemark_uniprot_match-16.gff3");
            LinkedHashMap<String, ArrayList<String>> blasthits = BlastHomologyHits.getMatches(new File("/Users/Scooter/scripps/dyadic/blastresults/c1_glimmer_in_uniprot.xml"), 1.0E-10);
            logger.error("Number of uniprot hits " + blasthits.size());
            GFF3FromUniprotBlastHits gff3FromUniprotBlastHits = new GFF3FromUniprotBlastHits();
            gff3FromUniprotBlastHits.process(blasthits, geneSequenceList, fo);
            fo.close();
        }
        catch (Exception e) {
            logger.error("Exception: ", (Throwable)e);
        }
    }
}

