package edu.sysu.pmglab.annotation.database.gene;

import edu.sysu.pmglab.bytecode.ASCIIUtility;
import edu.sysu.pmglab.bytecode.ByteStream;
import edu.sysu.pmglab.bytecode.Bytes;
import edu.sysu.pmglab.bytecode.BytesSplitter;
import edu.sysu.pmglab.container.indexable.DynamicIndexableMap;
import edu.sysu.pmglab.container.indexable.IndexableSet;
import edu.sysu.pmglab.container.indexable.LinkedSet;
import edu.sysu.pmglab.container.list.IntList;
import edu.sysu.pmglab.container.list.List;
import edu.sysu.pmglab.gtb.genome.coordinate.Chromosome;
import edu.sysu.pmglab.io.file.LiveFile;
import edu.sysu.pmglab.io.reader.ReaderStream;
import edu.sysu.pmglab.io.writer.WriterStream;
import java.io.File;
import java.io.IOException;
import java.util.HashSet;
import java.util.Iterator;

/* loaded from: input_file:edu/sysu/pmglab/annotation/database/gene/GEncodeGTFParser.class */
public class GEncodeGTFParser {
    String gtfFile;
    File outputKggFile;
    private static final byte[] ENSG;
    HashSet<Chromosome> storedContigName;
    private static final BytesSplitter semicolonSplit;
    private static ByteStream cache;
    private static final Bytes EXIST;
    private static final List<Bytes> geneIDTagSet;
    private static final List<Bytes> geneNameTagSet;
    private static final List<Bytes> geneTypeTagSet;
    private static final List<Bytes> transcriptNameTagSet;
    private static final List<Bytes> hgncTagSet;
    private static final DynamicIndexableMap<Bytes, Bytes> kvMapInInfo;
    static Bytes CMPL_FLAG;
    static Bytes UNKNOWN_FLAG;
    static Bytes INCMPL_FLAG;
    static IndexableSet<Bytes> indexableTypeSet;
    static final /* synthetic */ boolean $assertionsDisabled;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:edu/sysu/pmglab/annotation/database/gene/GEncodeGTFParser$KggSeqTranscriptRecord.class */
    public static class KggSeqTranscriptRecord implements Comparable<KggSeqTranscriptRecord> {
        int indexOfContig;
        int idOfHGNC;
        Bytes transcriptName;
        Bytes contigName;
        byte strand;
        int pos;
        int end;
        int exonSize;
        Bytes geneName;
        int codingPos = Integer.MAX_VALUE;
        int codingEnd = Integer.MIN_VALUE;
        IntList exonStartPos = new IntList();
        IntList exonEndPos = new IntList();
        boolean startCodon = false;
        boolean endCodon = false;

        public KggSeqTranscriptRecord setIdOfHGNC(int i) {
            this.idOfHGNC = i;
            return this;
        }

        public KggSeqTranscriptRecord setTranscriptName(Bytes bytes) {
            this.transcriptName = bytes;
            return this;
        }

        public KggSeqTranscriptRecord setContigName(Bytes bytes) {
            this.contigName = bytes;
            return this;
        }

        public KggSeqTranscriptRecord updateCodingEnd(int i) {
            this.codingPos = Math.min(i, this.codingPos);
            this.codingEnd = Math.max(this.codingEnd, i);
            return this;
        }

        public KggSeqTranscriptRecord setStrand(byte b) {
            this.strand = b;
            return this;
        }

        public KggSeqTranscriptRecord setPos(int i) {
            this.pos = i;
            return this;
        }

        public KggSeqTranscriptRecord setEnd(int i) {
            this.end = i;
            return this;
        }

        public KggSeqTranscriptRecord setCodingPos(int i) {
            this.codingPos = Math.min(i, this.codingPos);
            return this;
        }

        public KggSeqTranscriptRecord setCodingEnd(int i) {
            this.codingEnd = Math.max(i, this.codingEnd);
            return this;
        }

        public KggSeqTranscriptRecord setExonSize(int i) {
            this.exonSize = i;
            return this;
        }

        public KggSeqTranscriptRecord setGeneName(Bytes bytes) {
            this.geneName = bytes;
            return this;
        }

        public void addExon(int i, int i2) {
            this.exonStartPos.add(i - 1);
            this.exonEndPos.add(i2);
        }

        public void writeToCache(ByteStream byteStream) {
            byteStream.write(ASCIIUtility.toASCII(this.idOfHGNC));
            byteStream.write(9);
            byteStream.write(this.transcriptName);
            byteStream.write(9);
            byteStream.write(this.contigName);
            byteStream.write(9);
            byteStream.write(this.strand == 0 ? (byte) 43 : (byte) 45);
            byteStream.write(9);
            byteStream.write(ASCIIUtility.toASCII(this.pos - 1));
            byteStream.write(9);
            byteStream.write(ASCIIUtility.toASCII(this.end));
            byteStream.write(9);
            byteStream.write(ASCIIUtility.toASCII(this.codingPos == Integer.MAX_VALUE ? this.end : this.codingPos - 1));
            byteStream.write(9);
            byteStream.write(ASCIIUtility.toASCII(this.codingEnd == Integer.MIN_VALUE ? this.end : this.codingEnd));
            byteStream.write(9);
            byteStream.write(ASCIIUtility.toASCII(this.exonStartPos.size()));
            byteStream.write(9);
            this.exonStartPos.sort();
            writeMultiInt(byteStream, this.exonStartPos);
            byteStream.write(9);
            this.exonEndPos.sort();
            writeMultiInt(byteStream, this.exonEndPos);
            byteStream.write(9);
            byteStream.write(ASCIIUtility.toASCII(48));
            byteStream.write(9);
            byteStream.write(this.geneName);
            byteStream.write(9);
            byteStream.write(this.codingPos == Integer.MAX_VALUE ? GEncodeGTFParser.UNKNOWN_FLAG : this.startCodon ? GEncodeGTFParser.CMPL_FLAG : GEncodeGTFParser.INCMPL_FLAG);
            byteStream.write(9);
            byteStream.write(this.codingEnd == Integer.MIN_VALUE ? GEncodeGTFParser.UNKNOWN_FLAG : this.endCodon ? GEncodeGTFParser.CMPL_FLAG : GEncodeGTFParser.INCMPL_FLAG);
            byteStream.write(9);
            writeMultiInt(byteStream, this.exonStartPos.size());
            byteStream.write(9);
            byteStream.write((byte) 10);
        }

        void writeMultiInt(ByteStream byteStream, IntList intList) {
            if (intList.isEmpty()) {
                throw new UnsupportedOperationException("No exons.");
            }
            int size = intList.size();
            for (int i = 0; i < size; i++) {
                byteStream.write(ASCIIUtility.toASCII(intList.fastGet(i)));
                byteStream.write((byte) 44);
            }
        }

        void writeMultiInt(ByteStream byteStream, int i) {
            for (int i2 = 0; i2 < i; i2++) {
                byteStream.write((byte) 46);
                byteStream.write((byte) 44);
            }
        }

        public void clear() {
            this.codingPos = Integer.MAX_VALUE;
            this.codingEnd = Integer.MIN_VALUE;
            this.exonStartPos.clear();
            this.exonEndPos.clear();
        }

        public KggSeqTranscriptRecord retainContigAndGene() {
            return new KggSeqTranscriptRecord().indexOfContig(this.indexOfContig).setContigName(this.contigName).setGeneName(this.geneName);
        }

        public KggSeqTranscriptRecord indexOfContig(int i) {
            this.indexOfContig = i;
            return this;
        }

        public KggSeqTranscriptRecord startCodon(boolean z) {
            this.startCodon = z;
            return this;
        }

        public KggSeqTranscriptRecord endCodon(boolean z) {
            this.endCodon = z;
            return this;
        }

        @Override // java.lang.Comparable
        public int compareTo(KggSeqTranscriptRecord kggSeqTranscriptRecord) {
            int compare = Integer.compare(this.indexOfContig, kggSeqTranscriptRecord.indexOfContig);
            if (compare == 0) {
                compare = Integer.compare(this.pos, kggSeqTranscriptRecord.pos);
                if (compare == 0) {
                    compare = Integer.compare(this.end, kggSeqTranscriptRecord.end);
                    if (compare == 0) {
                        compare = Integer.compare(this.exonStartPos.fastGet(0), kggSeqTranscriptRecord.exonStartPos.fastGet(0));
                    }
                }
            }
            return compare;
        }
    }

    /* JADX WARN: Failed to find 'out' block for switch in B:30:0x00c3. Please report as an issue. */
    public void submit() throws IOException {
        int index;
        ByteStream byteStream = new ByteStream();
        ReaderStream openAsText = LiveFile.of(this.gtfFile).openAsText();
        ByteStream byteStream2 = new ByteStream();
        WriterStream writerStream = new WriterStream(new File(this.outputKggFile.toString()), WriterStream.Option.DEFAULT);
        int i = 0;
        while (openAsText.readline(byteStream) != -1 && byteStream.toBytes().byteAt(0) == 35) {
            byteStream.clear();
        }
        KggSeqTranscriptRecord kggSeqTranscriptRecord = new KggSeqTranscriptRecord();
        List list = new List();
        boolean z = true;
        boolean z2 = true;
        boolean z3 = true;
        BytesSplitter bytesSplitter = new BytesSplitter((byte) 9);
        do {
            Bytes bytes = byteStream.toBytes();
            if (bytes.byteAt(0) == 35) {
                byteStream.clear();
            } else {
                bytesSplitter.init(bytes);
                Bytes detach = bytesSplitter.next().detach();
                bytesSplitter.next();
                switch (indexableTypeSet.indexOf(bytesSplitter.next())) {
                    case 0:
                        if (!z && !kggSeqTranscriptRecord.exonStartPos.isEmpty()) {
                            list.add(kggSeqTranscriptRecord);
                            kggSeqTranscriptRecord = kggSeqTranscriptRecord.retainContigAndGene();
                        }
                        z = false;
                        z3 = true;
                        String bytes2 = detach.toString();
                        Chromosome chromosome = Chromosome.get(bytes2);
                        if (chromosome.equals(Chromosome.UNKNOWN)) {
                            chromosome = Chromosome.get(bytes2);
                            index = chromosome.getIndex();
                        } else {
                            index = chromosome.getIndex();
                        }
                        Bytes bytes3 = new Bytes(chromosome.getName());
                        for (int i2 = 3; i2 < 8; i2++) {
                            bytesSplitter.next();
                        }
                        parse(bytesSplitter.next());
                        kggSeqTranscriptRecord.setContigName(bytes3).setGeneName(getGeneName()).indexOfContig(index);
                        break;
                    case 1:
                        if (!z2 && !z3) {
                            list.add(kggSeqTranscriptRecord);
                            kggSeqTranscriptRecord = kggSeqTranscriptRecord.retainContigAndGene();
                        }
                        z2 = false;
                        z3 = false;
                        int i3 = bytesSplitter.next().toInt();
                        int i4 = bytesSplitter.next().toInt();
                        bytesSplitter.next();
                        byte b = bytesSplitter.next().startsWith((byte) 43) ? (byte) 0 : (byte) 1;
                        bytesSplitter.next();
                        parse(bytesSplitter.next());
                        Bytes detach2 = kvMapInInfo.get(transcriptNameTagSet.fastGet(0)).detach();
                        try {
                            kggSeqTranscriptRecord.setPos(i3).setEnd(i4).setTranscriptName(detach2).setIdOfHGNC(idOfHGNC()).setStrand(b);
                            break;
                        } catch (Exception e) {
                            System.out.println(i);
                            System.out.println(byteStream.toBytes());
                            kggSeqTranscriptRecord.setPos(i3).setEnd(i4).setTranscriptName(detach2).setIdOfHGNC(-1);
                            break;
                        }
                    case 2:
                        kggSeqTranscriptRecord.setCodingPos(bytesSplitter.next().toInt());
                        kggSeqTranscriptRecord.setCodingEnd(bytesSplitter.next().toInt());
                        kggSeqTranscriptRecord.startCodon(true);
                        break;
                    case 3:
                        kggSeqTranscriptRecord.setCodingPos(bytesSplitter.next().toInt());
                        kggSeqTranscriptRecord.setCodingEnd(bytesSplitter.next().toInt());
                        break;
                    case 4:
                        kggSeqTranscriptRecord.addExon(bytesSplitter.next().toInt(), bytesSplitter.next().toInt());
                        break;
                    case 5:
                        int i5 = bytesSplitter.next().toInt();
                        int i6 = bytesSplitter.next().toInt();
                        kggSeqTranscriptRecord.updateCodingEnd(i5);
                        kggSeqTranscriptRecord.updateCodingEnd(i6);
                        kggSeqTranscriptRecord.endCodon(true);
                        break;
                }
                i++;
                byteStream.clear();
            }
        } while (openAsText.readline(byteStream) != -1);
        list.sort((v0, v1) -> {
            return v0.compareTo(v1);
        });
        for (int i7 = 0; i7 < list.size(); i7++) {
            if (this.storedContigName.contains(Chromosome.get(((KggSeqTranscriptRecord) list.fastGet(i7)).contigName.toString()))) {
                ((KggSeqTranscriptRecord) list.fastGet(i7)).writeToCache(byteStream2);
                writerStream.write(byteStream2.toBytes());
                byteStream2.clear();
            }
        }
        openAsText.close();
        writerStream.close();
    }

    public GEncodeGTFParser setGtfFile(Object obj) {
        this.gtfFile = obj.toString();
        return this;
    }

    public GEncodeGTFParser setOutputKggFile(Object obj) {
        this.outputKggFile = new File(obj.toString());
        return this;
    }

    public File getOutputKggFile() {
        return this.outputKggFile;
    }

    public static void setIndexableTypeSet(IndexableSet<Bytes> indexableSet) {
        indexableTypeSet = indexableSet;
    }

    public GEncodeGTFParser setStoredContigName(HashSet<Chromosome> hashSet) {
        this.storedContigName = hashSet;
        return this;
    }

    public static void main(String[] strArr) throws IOException {
        Chromosome.get("chrMT").addAlias("NC_012920.1", "MT", "chrM");
        new GEncodeGTFParser().setGtfFile("/Users/wenjiepeng/Desktop/SDFA3.0/annotation/annotation/resource/GEncode/gencode.v47.annotation.gtf.gz").setOutputKggFile("/Users/wenjiepeng/Desktop/SDFA3.0/annotation/annotation/resource/GEncode/kggseq.txt").setStoredContigName(new HashSet<>(Chromosome.values())).submit();
    }

    private static void parse(Bytes bytes) {
        kvMapInInfo.clear();
        semicolonSplit.init(bytes);
        List list = new List();
        BytesSplitter bytesSplitter = new BytesSplitter((byte) 32);
        while (semicolonSplit.hasNext()) {
            Bytes trim = semicolonSplit.next().trim();
            if (trim.length() != 0) {
                list.clear();
                bytesSplitter.init(trim);
                while (bytesSplitter.hasNext()) {
                    list.add(bytesSplitter.next().detach());
                }
                switch (list.size()) {
                    case 0:
                        break;
                    case 1:
                        kvMapInInfo.put(list.fastGet(0), EXIST);
                        break;
                    case 2:
                        Bytes bytes2 = (Bytes) list.fastGet(1);
                        if (bytes2.startsWith((byte) 34)) {
                            Iterator<Bytes> split = bytes2.split((byte) 34);
                            split.next();
                            bytes2 = split.next().detach();
                        }
                        kvMapInInfo.put(list.fastGet(0), bytes2);
                        break;
                    default:
                        throw new UnsupportedOperationException(list.fastGet(0) + " has multiple blanks to split");
                }
            }
        }
    }

    private static Bytes getGeneName() {
        cache.clear();
        Bytes bytes = kvMapInInfo.get(geneIDTagSet.fastGet(0));
        Bytes bytes2 = kvMapInInfo.get(geneTypeTagSet.fastGet(0));
        Bytes bytes3 = kvMapInInfo.get(geneNameTagSet.fastGet(0));
        if (!$assertionsDisabled && bytes == null) {
            throw new AssertionError();
        }
        if (!$assertionsDisabled && bytes2 == null) {
            throw new AssertionError();
        }
        if (!$assertionsDisabled && bytes3 == null) {
            throw new AssertionError();
        }
        cache.write(bytes);
        cache.write(59);
        if (bytes3.startsWith(ENSG)) {
            cache.write((byte) 46);
        } else {
            cache.write(bytes3);
        }
        cache.write(59);
        cache.write(bytes2);
        return cache.toBytes().detach();
    }

    private static int idOfHGNC() {
        Bytes bytes = kvMapInInfo.get(hgncTagSet.fastGet(0));
        if (bytes == null) {
            return -1;
        }
        Iterator<Bytes> split = bytes.split((byte) 58);
        split.next();
        return split.next().toInt();
    }

    public GEncodeGTFParser setGtfFile(String str) {
        this.gtfFile = str;
        return this;
    }

    static {
        $assertionsDisabled = !GEncodeGTFParser.class.desiredAssertionStatus();
        ENSG = "ENSG".getBytes();
        semicolonSplit = new BytesSplitter((byte) 59);
        cache = new ByteStream();
        EXIST = new Bytes(new byte[]{46});
        geneIDTagSet = new List<>(new Bytes[]{new Bytes("gene_id")});
        geneNameTagSet = new List<>(new Bytes[]{new Bytes("gene_name")});
        geneTypeTagSet = new List<>(new Bytes[]{new Bytes("gene_type")});
        transcriptNameTagSet = new List<>(new Bytes[]{new Bytes("transcript_id")});
        hgncTagSet = new List<>(new Bytes[]{new Bytes("hgnc_id")});
        kvMapInInfo = new DynamicIndexableMap<>();
        CMPL_FLAG = new Bytes("cmpl");
        UNKNOWN_FLAG = new Bytes("unk");
        INCMPL_FLAG = new Bytes("incmpl");
        indexableTypeSet = new LinkedSet(new Bytes[]{new Bytes("gene"), new Bytes("transcript"), new Bytes("start_codon"), new Bytes("CDS"), new Bytes("exon"), new Bytes("stop_codon")});
    }
}
