package tech.molecules.deep.smiles;

import com.actelion.research.chem.SmilesParser;
import com.actelion.research.chem.StereoMolecule;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import java.util.Set;
import java.util.stream.Collectors;
import tech.molecules.leet.chem.ChemUtils;

/* loaded from: input_file:tech/molecules/deep/smiles/RunCreateChemblDataset.class */
public class RunCreateChemblDataset {
    public static final int LENGTH_A = 32;
    public static final int LENGTH_B = 96;
    public static final char paddingChar = 'y';
    public static final char blindedChar = 'x';

    /* loaded from: input_file:tech/molecules/deep/smiles/RunCreateChemblDataset$TrainingSample.class */
    public static class TrainingSample {
        public final String input_Smiles;
        public final String input_CanonicBlinded;
        public final String output_Canonic;

        public TrainingSample(String str, String str2, String str3) {
            this.input_Smiles = str;
            this.input_CanonicBlinded = str2;
            this.output_Canonic = str3;
        }

        public String toString() {
            return "Sample: " + this.input_Smiles + " " + this.input_CanonicBlinded + " -> " + this.output_Canonic;
        }

        public String toCSV() {
            return this.input_Smiles + "," + this.input_CanonicBlinded + "," + this.output_Canonic;
        }
    }

    public static void main(String[] strArr) {
        createCSVFiles("C:\\Temp\\leet_input\\chembl_size90_input_smiles.csv", "xx40", 40);
    }

    public static void createCSVFiles(String str, String str2, int i) {
        try {
            BufferedReader bufferedReader = new BufferedReader(new FileReader(new File(str)));
            ArrayList arrayList = new ArrayList();
            while (true) {
                try {
                    String readLine = bufferedReader.readLine();
                    if (readLine == null || arrayList.size() > 200000) {
                        break;
                    }
                    try {
                        SmilesParser smilesParser = new SmilesParser();
                        StereoMolecule stereoMolecule = new StereoMolecule();
                        smilesParser.parse(stereoMolecule, readLine);
                        stereoMolecule.ensureHelperArrays(31);
                        if ((1.0d * ChemUtils.countAtoms(stereoMolecule, Collections.singletonList(6))) / stereoMolecule.getAtoms() >= 0.4d && stereoMolecule.getAtoms() >= 12) {
                            arrayList.add(stereoMolecule.getIDCode());
                        }
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                } catch (IOException e2) {
                    throw new RuntimeException(e2);
                }
            }
            Collections.shuffle(arrayList);
            createCSVFiles(arrayList, str2, i);
        } catch (FileNotFoundException e3) {
            throw new RuntimeException(e3);
        }
    }

    public static void createCSVFiles(List<String> list, String str, int i) {
        System.out.println("Structures: " + list.size());
        List<TrainingSample> createTrainingSamples = createTrainingSamples(new Random(123L), list, 0.25d, 1, i);
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        createTrainingSamples(new Random(125L), list, 0.75d, 1, i);
        List<TrainingSample> createTrainingSamples2 = createTrainingSamples(new Random(126L), list, 1.0d, 1, i);
        HashSet hashSet = new HashSet();
        hashSet.add('x');
        hashSet.add('y');
        List list2 = (List) createTrainingSamples2.stream().filter(trainingSample -> {
            return trainingSample.input_Smiles.length() - countCharacters(trainingSample.input_Smiles, hashSet) < 36;
        }).collect(Collectors.toList());
        List list3 = (List) createTrainingSamples2.stream().filter(trainingSample2 -> {
            return trainingSample2.input_Smiles.length() - countCharacters(trainingSample2.input_Smiles, hashSet) < 64;
        }).collect(Collectors.toList());
        System.out.println("okay :)");
        HashSet hashSet2 = new HashSet();
        createTrainingSamples.stream().map(trainingSample3 -> {
            return trainingSample3.toCSV();
        }).forEach(str2 -> {
            for (char c : str2.toCharArray()) {
                hashSet2.add(Character.valueOf(c));
            }
        });
        arrayList.stream().map(trainingSample4 -> {
            return trainingSample4.toCSV();
        }).forEach(str3 -> {
            for (char c : str3.toCharArray()) {
                hashSet2.add(Character.valueOf(c));
            }
        });
        arrayList2.stream().map(trainingSample5 -> {
            return trainingSample5.toCSV();
        }).forEach(str4 -> {
            for (char c : str4.toCharArray()) {
                hashSet2.add(Character.valueOf(c));
            }
        });
        createTrainingSamples2.stream().map(trainingSample6 -> {
            return trainingSample6.toCSV();
        }).forEach(str5 -> {
            for (char c : str5.toCharArray()) {
                hashSet2.add(Character.valueOf(c));
            }
        });
        String str6 = "smilesdata_" + str + "_";
        try {
            BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(new File(str6 + "025.csv")));
            Iterator<TrainingSample> it = createTrainingSamples.iterator();
            while (it.hasNext()) {
                bufferedWriter.write(it.next().toCSV() + "\n");
            }
            bufferedWriter.flush();
            bufferedWriter.close();
            BufferedWriter bufferedWriter2 = new BufferedWriter(new FileWriter(new File(str6 + "05.csv")));
            Iterator it2 = arrayList.iterator();
            while (it2.hasNext()) {
                bufferedWriter2.write(((TrainingSample) it2.next()).toCSV() + "\n");
            }
            bufferedWriter2.flush();
            bufferedWriter2.close();
            BufferedWriter bufferedWriter3 = new BufferedWriter(new FileWriter(new File(str6 + "075.csv")));
            Iterator it3 = arrayList2.iterator();
            while (it3.hasNext()) {
                bufferedWriter3.write(((TrainingSample) it3.next()).toCSV() + "\n");
            }
            bufferedWriter3.flush();
            bufferedWriter3.close();
            BufferedWriter bufferedWriter4 = new BufferedWriter(new FileWriter(new File(str6 + "1_all.csv")));
            Iterator<TrainingSample> it4 = createTrainingSamples2.iterator();
            while (it4.hasNext()) {
                bufferedWriter4.write(it4.next().toCSV() + "\n");
            }
            bufferedWriter4.flush();
            bufferedWriter4.close();
            BufferedWriter bufferedWriter5 = new BufferedWriter(new FileWriter(new File(str6 + "1_lessThan36.csv")));
            Iterator it5 = list2.iterator();
            while (it5.hasNext()) {
                bufferedWriter5.write(((TrainingSample) it5.next()).toCSV() + "\n");
            }
            bufferedWriter5.flush();
            bufferedWriter5.close();
            BufferedWriter bufferedWriter6 = new BufferedWriter(new FileWriter(new File(str6 + "1_lessThan64.csv")));
            Iterator it6 = list3.iterator();
            while (it6.hasNext()) {
                bufferedWriter6.write(((TrainingSample) it6.next()).toCSV() + "\n");
            }
            bufferedWriter6.flush();
            bufferedWriter6.close();
            System.out.println("characters: " + hashSet2.size());
            System.out.println("Alphabet:\n");
            StringBuilder sb = new StringBuilder();
            for (Character ch : (List) hashSet2.stream().sorted().collect(Collectors.toList())) {
                System.out.print(ch);
                sb.append(ch);
            }
            try {
                BufferedWriter bufferedWriter7 = new BufferedWriter(new FileWriter(str + "_alphabet.txt"));
                bufferedWriter7.write(sb.toString());
                bufferedWriter7.flush();
                bufferedWriter7.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
            System.out.println("\nmkay");
        } catch (IOException e2) {
            throw new RuntimeException(e2);
        }
    }

    public static int countCharacters(String str, Set<Character> set) {
        int i = 0;
        for (char c : str.toCharArray()) {
            if (set.contains(Character.valueOf(c))) {
                i++;
            }
        }
        return i;
    }

    public static List<TrainingSample> createTrainingSamples(Random random, List<String> list, double d, int i, int i2) {
        ArrayList arrayList = new ArrayList();
        for (int i3 = 0; i3 < list.size(); i3++) {
            arrayList.addAll(createTrainingSamples(random, ChemUtils.parseIDCode(list.get(i3)), d, i, i2));
        }
        return arrayList;
    }

    /* JADX WARN: Multi-variable type inference failed */
    public static List<TrainingSample> createTrainingSamples(Random random, StereoMolecule stereoMolecule, double d, int i, int i2) {
        List arrayList = new ArrayList();
        try {
            arrayList = createTrainingSet(stereoMolecule, random, i2, d, i);
            return arrayList;
        } catch (Exception e) {
            e.printStackTrace();
            return arrayList;
        }
    }

    private static void exportToCSV(List<TrainingSample> list, String str) {
    }

    public static List<TrainingSample> createTrainingSet(StereoMolecule stereoMolecule, Random random, int i, double d, int i2) throws Exception {
        ArrayList arrayList = new ArrayList();
        String smiles = new IsomericSmilesGenerator2(stereoMolecule, 2).getSmiles();
        if (smiles.length() > i) {
            throw new Exception("Too long..");
        }
        String addPadding = addPadding(smiles, 'y', i);
        for (int i3 = 0; i3 < i2; i3++) {
            String smiles2 = new IsomericSmilesGenerator2(stereoMolecule, 2, random).getSmiles();
            if (smiles2.length() <= i) {
                String addPaddingBothSides = addPaddingBothSides(smiles2, 'y', i, random.nextDouble());
                char[] charArray = addPadding.toCharArray();
                StringBuilder sb = new StringBuilder();
                for (int i4 = 0; i4 < smiles.length(); i4++) {
                    if (random.nextDouble() < d) {
                        sb.append(charArray[i4]);
                        charArray[i4] = 'x';
                    }
                }
                arrayList.add(new TrainingSample(addPaddingBothSides, new String(charArray), addPadding(sb.toString(), 'y', i)));
            }
        }
        return arrayList;
    }

    public static String addPadding(String str, char c, int i) {
        int length = i - str.length();
        if (length <= 0) {
            return str;
        }
        StringBuilder sb = new StringBuilder(str);
        for (int i2 = 0; i2 < length; i2++) {
            sb.append(c);
        }
        return sb.toString();
    }

    public static String addPaddingBothSides(String str, char c, int i, double d) {
        int length = i - str.length();
        if (length <= 0) {
            return str;
        }
        int i2 = (int) (d * length);
        int i3 = length - i2;
        StringBuilder sb = new StringBuilder();
        for (int i4 = 0; i4 < i2; i4++) {
            sb.append(c);
        }
        sb.append(str);
        for (int i5 = 0; i5 < i3; i5++) {
            sb.append(c);
        }
        return sb.toString();
    }
}
