/*
 * Decompiled with CFR 0.152.
 */
package edu.sysu.pmglab.annotation.database.gene;

import edu.sysu.pmglab.bytecode.ASCIIUtility;
import edu.sysu.pmglab.bytecode.ByteStream;
import edu.sysu.pmglab.bytecode.Bytes;
import edu.sysu.pmglab.bytecode.BytesSplitter;
import edu.sysu.pmglab.container.indexable.DynamicIndexableMap;
import edu.sysu.pmglab.container.indexable.IndexableSet;
import edu.sysu.pmglab.container.indexable.LinkedSet;
import edu.sysu.pmglab.container.list.IntList;
import edu.sysu.pmglab.container.list.List;
import edu.sysu.pmglab.gtb.genome.coordinate.Chromosome;
import edu.sysu.pmglab.io.file.LiveFile;
import edu.sysu.pmglab.io.reader.ReaderStream;
import edu.sysu.pmglab.io.writer.WriterStream;
import java.io.File;
import java.io.IOException;
import java.util.HashSet;
import java.util.Iterator;

public class GEncodeGTFParser {
    String gtfFile;
    File outputKggFile;
    private static final byte[] ENSG = "ENSG".getBytes();
    HashSet<Chromosome> storedContigName;
    private static final BytesSplitter semicolonSplit = new BytesSplitter(59);
    private static ByteStream cache = new ByteStream();
    private static final Bytes EXIST = new Bytes(new byte[]{46});
    private static final List<Bytes> geneIDTagSet = new List<Bytes>(new Bytes[]{new Bytes("gene_id")});
    private static final List<Bytes> geneNameTagSet = new List<Bytes>(new Bytes[]{new Bytes("gene_name")});
    private static final List<Bytes> geneTypeTagSet = new List<Bytes>(new Bytes[]{new Bytes("gene_type")});
    private static final List<Bytes> transcriptNameTagSet = new List<Bytes>(new Bytes[]{new Bytes("transcript_id")});
    private static final List<Bytes> hgncTagSet = new List<Bytes>(new Bytes[]{new Bytes("hgnc_id")});
    private static final DynamicIndexableMap<Bytes, Bytes> kvMapInInfo = new DynamicIndexableMap();
    static Bytes CMPL_FLAG = new Bytes("cmpl");
    static Bytes UNKNOWN_FLAG = new Bytes("unk");
    static Bytes INCMPL_FLAG = new Bytes("incmpl");
    static IndexableSet<Bytes> indexableTypeSet = new LinkedSet<Bytes>(new Bytes[]{new Bytes("gene"), new Bytes("transcript"), new Bytes("start_codon"), new Bytes("CDS"), new Bytes("exon"), new Bytes("stop_codon")});

    public void submit() throws IOException {
        Bytes line;
        ByteStream cache = new ByteStream();
        LiveFile liveFile = LiveFile.of(this.gtfFile);
        ReaderStream readerStream = liveFile.openAsText();
        ByteStream outputCache = new ByteStream();
        WriterStream writerStream = new WriterStream(new File(this.outputKggFile.toString()), WriterStream.Option.DEFAULT);
        int count = 0;
        while (readerStream.readline(cache) != -1 && (line = cache.toBytes()).byteAt(0) == 35) {
            cache.clear();
        }
        KggSeqTranscriptRecord record = new KggSeqTranscriptRecord();
        List<KggSeqTranscriptRecord> list = new List<KggSeqTranscriptRecord>();
        boolean startGene = true;
        boolean startRNA = true;
        boolean firstRNAInGene = true;
        BytesSplitter tabSplitter = new BytesSplitter(9);
        do {
            Bytes line2;
            if ((line2 = cache.toBytes()).byteAt(0) == 35) {
                cache.clear();
                continue;
            }
            tabSplitter.init(line2);
            Bytes contigName = tabSplitter.next().detach();
            tabSplitter.next();
            int index = indexableTypeSet.indexOf(tabSplitter.next());
            switch (index) {
                case 0: {
                    int indexOfContig;
                    if (!startGene && !record.exonStartPos.isEmpty()) {
                        list.add(record);
                        record = record.retainContigAndGene();
                    }
                    startGene = false;
                    firstRNAInGene = true;
                    String contigStringName = contigName.toString();
                    Chromosome chromosome = Chromosome.get(contigStringName);
                    if (!chromosome.equals(Chromosome.UNKNOWN)) {
                        indexOfContig = chromosome.getIndex();
                    } else {
                        chromosome = Chromosome.get(contigStringName);
                        indexOfContig = chromosome.getIndex();
                    }
                    contigName = new Bytes(chromosome.getName());
                    for (int i = 3; i < 8; ++i) {
                        tabSplitter.next();
                    }
                    Bytes info = tabSplitter.next();
                    GEncodeGTFParser.parse(info);
                    Bytes geneName = GEncodeGTFParser.getGeneName();
                    record.setContigName(contigName).setGeneName(geneName).indexOfContig(indexOfContig);
                    break;
                }
                case 1: {
                    if (!startRNA && !firstRNAInGene) {
                        list.add(record);
                        record = record.retainContigAndGene();
                    }
                    startRNA = false;
                    firstRNAInGene = false;
                    int pos = tabSplitter.next().toInt();
                    int end = tabSplitter.next().toInt();
                    tabSplitter.next();
                    byte strand = tabSplitter.next().startsWith((byte)43) ? (byte)0 : 1;
                    tabSplitter.next();
                    GEncodeGTFParser.parse(tabSplitter.next());
                    Bytes transcriptName = kvMapInInfo.get(transcriptNameTagSet.fastGet(0)).detach();
                    int indexOfHGNC = -1;
                    indexOfHGNC = GEncodeGTFParser.idOfHGNC();
                    try {
                        record.setPos(pos).setEnd(end).setTranscriptName(transcriptName).setIdOfHGNC(indexOfHGNC).setStrand(strand);
                    }
                    catch (Exception e) {
                        System.out.println(count);
                        System.out.println(cache.toBytes());
                        record.setPos(pos).setEnd(end).setTranscriptName(transcriptName).setIdOfHGNC(-1);
                    }
                    break;
                }
                case 2: {
                    record.setCodingPos(tabSplitter.next().toInt());
                    record.setCodingEnd(tabSplitter.next().toInt());
                    record.startCodon(true);
                    break;
                }
                case 3: {
                    record.setCodingPos(tabSplitter.next().toInt());
                    record.setCodingEnd(tabSplitter.next().toInt());
                    break;
                }
                case 4: {
                    int exonStart = tabSplitter.next().toInt();
                    int exonEnd = tabSplitter.next().toInt();
                    record.addExon(exonStart, exonEnd);
                    break;
                }
                case 5: {
                    int stopCodonStart = tabSplitter.next().toInt();
                    int stopCodonEnd = tabSplitter.next().toInt();
                    record.updateCodingEnd(stopCodonStart);
                    record.updateCodingEnd(stopCodonEnd);
                    record.endCodon(true);
                    break;
                }
            }
            ++count;
            cache.clear();
        } while (readerStream.readline(cache) != -1);
        list.sort(KggSeqTranscriptRecord::compareTo);
        for (int i = 0; i < list.size(); ++i) {
            KggSeqTranscriptRecord tmp = (KggSeqTranscriptRecord)list.fastGet(i);
            Chromosome chromosome = Chromosome.get(tmp.contigName.toString());
            if (!this.storedContigName.contains(chromosome)) continue;
            ((KggSeqTranscriptRecord)list.fastGet(i)).writeToCache(outputCache);
            writerStream.write(outputCache.toBytes());
            outputCache.clear();
        }
        readerStream.close();
        writerStream.close();
    }

    public GEncodeGTFParser setGtfFile(Object gtfFile) {
        this.gtfFile = gtfFile.toString();
        return this;
    }

    public GEncodeGTFParser setOutputKggFile(Object outputKggFile) {
        this.outputKggFile = new File(outputKggFile.toString());
        return this;
    }

    public File getOutputKggFile() {
        return this.outputKggFile;
    }

    public static void setIndexableTypeSet(IndexableSet<Bytes> indexableTypeSet) {
        GEncodeGTFParser.indexableTypeSet = indexableTypeSet;
    }

    public GEncodeGTFParser setStoredContigName(HashSet<Chromosome> storedContigName) {
        this.storedContigName = storedContigName;
        return this;
    }

    public static void main(String[] args) throws IOException {
        Chromosome.get("chrMT").addAlias("NC_012920.1", "MT", "chrM");
        GEncodeGTFParser gEncodeGTFParser = new GEncodeGTFParser().setGtfFile("/Users/wenjiepeng/Desktop/SDFA3.0/annotation/annotation/resource/GEncode/gencode.v47.annotation.gtf.gz").setOutputKggFile("/Users/wenjiepeng/Desktop/SDFA3.0/annotation/annotation/resource/GEncode/kggseq.txt").setStoredContigName(new HashSet<Chromosome>(Chromosome.values()));
        gEncodeGTFParser.submit();
    }

    private static void parse(Bytes info) {
        kvMapInInfo.clear();
        semicolonSplit.init(info);
        List<Bytes> attrList = new List<Bytes>();
        BytesSplitter blankSplitter = new BytesSplitter(32);
        block5: while (semicolonSplit.hasNext()) {
            Bytes item = semicolonSplit.next().trim();
            if (item.length() == 0) continue;
            attrList.clear();
            blankSplitter.init(item);
            while (blankSplitter.hasNext()) {
                attrList.add(blankSplitter.next().detach());
            }
            int size = attrList.size();
            switch (size) {
                case 0: {
                    continue block5;
                }
                case 1: {
                    kvMapInInfo.put((Bytes)attrList.fastGet(0), EXIST);
                    continue block5;
                }
                case 2: {
                    Bytes v = (Bytes)attrList.fastGet(1);
                    if (v.startsWith((byte)34)) {
                        Iterator<Bytes> iterator2 = v.split((byte)34);
                        iterator2.next();
                        v = iterator2.next().detach();
                    }
                    kvMapInInfo.put((Bytes)attrList.fastGet(0), v);
                    continue block5;
                }
            }
            throw new UnsupportedOperationException(attrList.fastGet(0) + " has multiple blanks to split");
        }
    }

    private static Bytes getGeneName() {
        cache.clear();
        Bytes geneID = kvMapInInfo.get(geneIDTagSet.fastGet(0));
        Bytes geneType = kvMapInInfo.get(geneTypeTagSet.fastGet(0));
        Bytes geneNameID = kvMapInInfo.get(geneNameTagSet.fastGet(0));
        assert (geneID != null);
        assert (geneType != null);
        assert (geneNameID != null);
        cache.write(geneID);
        cache.write(59);
        if (geneNameID.startsWith(ENSG)) {
            cache.write((byte)46);
        } else {
            cache.write(geneNameID);
        }
        cache.write(59);
        cache.write(geneType);
        return cache.toBytes().detach();
    }

    private static int idOfHGNC() {
        Bytes hgncValue = kvMapInInfo.get(hgncTagSet.fastGet(0));
        if (hgncValue != null) {
            Iterator<Bytes> iterator2 = hgncValue.split((byte)58);
            iterator2.next();
            return iterator2.next().toInt();
        }
        return -1;
    }

    public GEncodeGTFParser setGtfFile(String gtfFile) {
        this.gtfFile = gtfFile;
        return this;
    }

    static class KggSeqTranscriptRecord
    implements Comparable<KggSeqTranscriptRecord> {
        int indexOfContig;
        int idOfHGNC;
        Bytes transcriptName;
        Bytes contigName;
        byte strand;
        int pos;
        int end;
        int codingPos = Integer.MAX_VALUE;
        int codingEnd = Integer.MIN_VALUE;
        int exonSize;
        IntList exonStartPos = new IntList();
        IntList exonEndPos = new IntList();
        Bytes geneName;
        boolean startCodon = false;
        boolean endCodon = false;

        public KggSeqTranscriptRecord setIdOfHGNC(int idOfHGNC) {
            this.idOfHGNC = idOfHGNC;
            return this;
        }

        public KggSeqTranscriptRecord setTranscriptName(Bytes transcriptName) {
            this.transcriptName = transcriptName;
            return this;
        }

        public KggSeqTranscriptRecord setContigName(Bytes contigName) {
            this.contigName = contigName;
            return this;
        }

        public KggSeqTranscriptRecord updateCodingEnd(int coding) {
            this.codingPos = Math.min(coding, this.codingPos);
            this.codingEnd = Math.max(this.codingEnd, coding);
            return this;
        }

        public KggSeqTranscriptRecord setStrand(byte strand) {
            this.strand = strand;
            return this;
        }

        public KggSeqTranscriptRecord setPos(int pos) {
            this.pos = pos;
            return this;
        }

        public KggSeqTranscriptRecord setEnd(int end) {
            this.end = end;
            return this;
        }

        public KggSeqTranscriptRecord setCodingPos(int codingPos) {
            this.codingPos = Math.min(codingPos, this.codingPos);
            return this;
        }

        public KggSeqTranscriptRecord setCodingEnd(int codingEnd) {
            this.codingEnd = Math.max(codingEnd, this.codingEnd);
            return this;
        }

        public KggSeqTranscriptRecord setExonSize(int exonSize) {
            this.exonSize = exonSize;
            return this;
        }

        public KggSeqTranscriptRecord setGeneName(Bytes geneName) {
            this.geneName = geneName;
            return this;
        }

        public void addExon(int start, int end) {
            this.exonStartPos.add(start - 1);
            this.exonEndPos.add(end);
        }

        public void writeToCache(ByteStream cache) {
            cache.write(ASCIIUtility.toASCII(this.idOfHGNC));
            cache.write(9);
            cache.write(this.transcriptName);
            cache.write(9);
            cache.write(this.contigName);
            cache.write(9);
            cache.write(this.strand == 0 ? (byte)43 : 45);
            cache.write(9);
            cache.write(ASCIIUtility.toASCII(this.pos - 1));
            cache.write(9);
            cache.write(ASCIIUtility.toASCII(this.end));
            cache.write(9);
            cache.write(ASCIIUtility.toASCII(this.codingPos == Integer.MAX_VALUE ? this.end : this.codingPos - 1));
            cache.write(9);
            cache.write(ASCIIUtility.toASCII(this.codingEnd == Integer.MIN_VALUE ? this.end : this.codingEnd));
            cache.write(9);
            cache.write(ASCIIUtility.toASCII(this.exonStartPos.size()));
            cache.write(9);
            this.exonStartPos.sort();
            this.writeMultiInt(cache, this.exonStartPos);
            cache.write(9);
            this.exonEndPos.sort();
            this.writeMultiInt(cache, this.exonEndPos);
            cache.write(9);
            cache.write(ASCIIUtility.toASCII(48));
            cache.write(9);
            cache.write(this.geneName);
            cache.write(9);
            cache.write(this.codingPos == Integer.MAX_VALUE ? UNKNOWN_FLAG : (this.startCodon ? CMPL_FLAG : INCMPL_FLAG));
            cache.write(9);
            cache.write(this.codingEnd == Integer.MIN_VALUE ? UNKNOWN_FLAG : (this.endCodon ? CMPL_FLAG : INCMPL_FLAG));
            cache.write(9);
            this.writeMultiInt(cache, this.exonStartPos.size());
            cache.write(9);
            cache.write((byte)10);
        }

        void writeMultiInt(ByteStream cache, IntList list) {
            if (list.isEmpty()) {
                throw new UnsupportedOperationException("No exons.");
            }
            int size = list.size();
            for (int i = 0; i < size; ++i) {
                cache.write(ASCIIUtility.toASCII(list.fastGet(i)));
                cache.write((byte)44);
            }
        }

        void writeMultiInt(ByteStream cache, int size) {
            for (int i = 0; i < size; ++i) {
                cache.write((byte)46);
                cache.write((byte)44);
            }
        }

        public void clear() {
            this.codingPos = Integer.MAX_VALUE;
            this.codingEnd = Integer.MIN_VALUE;
            this.exonStartPos.clear();
            this.exonEndPos.clear();
        }

        public KggSeqTranscriptRecord retainContigAndGene() {
            return new KggSeqTranscriptRecord().indexOfContig(this.indexOfContig).setContigName(this.contigName).setGeneName(this.geneName);
        }

        public KggSeqTranscriptRecord indexOfContig(int indexOfContig) {
            this.indexOfContig = indexOfContig;
            return this;
        }

        public KggSeqTranscriptRecord startCodon(boolean startCodon) {
            this.startCodon = startCodon;
            return this;
        }

        public KggSeqTranscriptRecord endCodon(boolean endCodon) {
            this.endCodon = endCodon;
            return this;
        }

        @Override
        public int compareTo(KggSeqTranscriptRecord o) {
            int status = Integer.compare(this.indexOfContig, o.indexOfContig);
            if (status == 0 && (status = Integer.compare(this.pos, o.pos)) == 0 && (status = Integer.compare(this.end, o.end)) == 0) {
                status = Integer.compare(this.exonStartPos.fastGet(0), o.exonStartPos.fastGet(0));
            }
            return status;
        }
    }
}

