package edu.sysu.pmglab.gtb.command.program;

import edu.sysu.pmglab.RuntimeProperty;
import edu.sysu.pmglab.bytecode.Bytes;
import edu.sysu.pmglab.ccf.field.IFieldCollection;
import edu.sysu.pmglab.ccf.record.BoxRecord;
import edu.sysu.pmglab.ccf.record.IRecord;
import edu.sysu.pmglab.ccf.toolkit.CCFSlidingPairwiseCalculator;
import edu.sysu.pmglab.ccf.toolkit.listener.InputOutputListener;
import edu.sysu.pmglab.ccf.toolkit.output.CCFOutputOption;
import edu.sysu.pmglab.ccf.type.FieldType;
import edu.sysu.pmglab.ccf.type.basic.VarInt32Box;
import edu.sysu.pmglab.commandParser.CommandOptions;
import edu.sysu.pmglab.commandParser.ICommandProgram;
import edu.sysu.pmglab.commandParser.annotation.option.CustomOption;
import edu.sysu.pmglab.commandParser.annotation.option.Option;
import edu.sysu.pmglab.commandParser.annotation.rule.Counter;
import edu.sysu.pmglab.commandParser.annotation.rule.Rule;
import edu.sysu.pmglab.commandParser.annotation.usage.OptionUsage;
import edu.sysu.pmglab.commandParser.annotation.usage.Parser;
import edu.sysu.pmglab.commandParser.annotation.usage.UsageItem;
import edu.sysu.pmglab.commandParser.validator.range.Float_0_1_RangeValidator;
import edu.sysu.pmglab.container.indexable.IndexableSet;
import edu.sysu.pmglab.container.indexable.LinkedSet;
import edu.sysu.pmglab.container.interval.FloatInterval;
import edu.sysu.pmglab.container.interval.IntInterval;
import edu.sysu.pmglab.container.intervaltree.inttree.IntIntervalTree;
import edu.sysu.pmglab.container.list.IntList;
import edu.sysu.pmglab.container.list.List;
import edu.sysu.pmglab.gtb.GTBManager;
import edu.sysu.pmglab.gtb.GTBReaderOption;
import edu.sysu.pmglab.gtb.command.GenomicCoordinatesSelectionConverter;
import edu.sysu.pmglab.gtb.command.IndividualsSelectionConverter;
import edu.sysu.pmglab.gtb.genome.Variant;
import edu.sysu.pmglab.gtb.genome.coordinate.Chromosome;
import edu.sysu.pmglab.gtb.genome.coordinate.Coordinate;
import edu.sysu.pmglab.gtb.genome.genotype.BitwiseGenotypes;
import edu.sysu.pmglab.gtb.genome.genotype.IGenotypes;
import edu.sysu.pmglab.gtb.genome.genotype.container.ConstantGenotypes;
import edu.sysu.pmglab.gtb.linkagedisequilibrium.EntropyLD;
import edu.sysu.pmglab.gtb.linkagedisequilibrium.GenotypeLD;
import edu.sysu.pmglab.gtb.linkagedisequilibrium.HaplotypeLD;
import edu.sysu.pmglab.gtb.linkagedisequilibrium.ILDModel;
import edu.sysu.pmglab.gtb.linkagedisequilibrium.LDProperty;
import edu.sysu.pmglab.io.file.LiveFile;
import edu.sysu.pmglab.objectpool.GenericObjectPool;
import java.io.File;
import java.io.IOException;
import java.util.Map;
import java.util.function.BiFunction;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Parser(usage = "ld <input> [options]", usage_item = {@UsageItem(key = "About", value = {"Calculate pairwise the linkage disequilibrium or genotypic correlation.", "GBC-LDCalculator performs linkage disequilibrium calculations for biallelic variants, which is a common processing strategy. For multi-allelic variants, GBC-LDCalculator designates the highest frequency ALLELE as reference ALLELE, treating others as alternative ALLELE during computation. For multiple variants with the same coordinates, GBC selects the variant with the maximum MAF for calculation and discards the others."})}, rule = @Rule(counter = {@Counter(item = {"--hap-ld", "--geno-ld", "--entropy-ld"}, rule = Counter.Type.EQUAL, count = 1)}))
/* loaded from: input_file:edu/sysu/pmglab/gtb/command/program/LDCalculatorProgram.class */
public class LDCalculatorProgram extends ICommandProgram {
    private static final Logger LOGGER = LoggerFactory.getLogger((Class<?>) LDCalculatorProgram.class);

    @Option(names = {"ld"}, type = FieldType.livefile, required = true)
    LiveFile input;

    @OptionUsage(defaultTo = "./archive.ld.ccf", description = {"Specify the output file path."})
    @Option(names = {"--output", "-o"}, type = FieldType.file, defaultTo = {"./archive.ld.ccf"})
    File output = new File(RuntimeProperty.WORKSPACE_PATH, "./archive.ld.ccf");

    @OptionUsage(description = {"Suppress terminal output logs."})
    @Option(names = {"--silent"}, type = FieldType.NULL)
    boolean silent = false;

    @OptionUsage(format = "--threads <int>", defaultTo = "4", description = {"Configure the number of concurrent threads."})
    @Option(names = {"--threads", "-t"}, type = FieldType.varInt32, defaultTo = {"4"})
    int threads = RuntimeProperty.INIT_THREADS;

    @OptionUsage(group = "LD Model Options", description = {"Calculate the linkage disequilibrium coefficient for paired variants using D' method."})
    @Option(names = {"--hap-ld"}, type = FieldType.NULL)
    boolean hapLd = false;

    @OptionUsage(group = "LD Model Options", description = {"Calculate the linkage disequilibrium coefficient for paired variants using the Pearson correlation coefficient method applied to the mutation genotype counts."})
    @Option(names = {"--geno-ld"}, type = FieldType.NULL)
    boolean genoLd = false;

    @OptionUsage(group = "LD Model Options", description = {"Calculate the linkage disequilibrium coefficient for paired variants using mutual information (MI) method."})
    @Option(names = {"--entropy-ld"}, type = FieldType.NULL)
    boolean entropyLd = false;

    @OptionUsage(group = "LD Model Options", defaultTo = "10000", format = "--window-bp <int>", description = {"Set the maximum number of physical bases between the variants being calculated for LD. "})
    @Option(names = {"--window-bp", "-bp"}, type = FieldType.varInt32, defaultTo = {"10000"})
    int windows = 10000;

    @OptionUsage(group = "LD Model Options", defaultTo = "0.2", format = "--min-assoc <float>", description = {"Exclude pairs with R2/NMI values less than --min-assoc."})
    @Option(names = {"--min-assoc"}, type = FieldType.float32, validator = Float_0_1_RangeValidator.class, defaultTo = {"0.2"})
    float minAssoc = 0.2f;

    @CustomOption(names = {"--individual"}, converter = IndividualsSelectionConverter.class)
    @OptionUsage(description = {"Select a subset of individuals. Individuals not found in the inputs will have their genotype filled with './.'."}, format = "--individual <string>,<string>,...", group = "Subset Selection Options")
    IndexableSet<String> individuals = null;

    @CustomOption(names = {"--pos"}, converter = GenomicCoordinatesSelectionConverter.class, arity = {-1})
    @OptionUsage(description = {"Retrieve the variants by the specified coordinate expression of variant.", "The expression can follow one of three formats: '<chr>' for the entire chromosome, '<chr>:<pos>,<pos>,...' for specific positions, or '<chr>:<start>-<end>,<start>-<end>,...' for coordinate ranges."}, format = "--pos [expression] [expression] ...", group = "Subset Selection Options")
    Map<Chromosome, List<IntInterval>> poses = null;

    @OptionUsage(group = "Subset Selection Options", defaultTo = "1~", format = "--seq-ac <minAc>~<maxAc>", description = {"Exclude variants with the alternate allele count (AC) per variant outside the range [minAc, maxAc].", "If a subset selection occurs, this filter applies to the genotype sequences after selection."})
    @Option(names = {"--seq-ac"}, type = FieldType.intInterval, defaultTo = {"1~"})
    IntInterval ac = new IntInterval(1, Integer.MAX_VALUE);

    @OptionUsage(group = "Subset Selection Options", defaultTo = "50~", format = "--seq-an <minAn>~<maxAn>", description = {"Exclude variants with the non-missing allele number (AN) per variant outside the range [minAn, maxAn].", "If a subset selection occurs, this filter applies to the genotype sequences after selection."})
    @Option(names = {"--seq-an"}, type = FieldType.intInterval, defaultTo = {"50~"})
    IntInterval an = new IntInterval(50, Integer.MAX_VALUE);

    @OptionUsage(group = "Subset Selection Options", defaultTo = "0.05~0.95", format = "--seq-af <minAf>~<maxAf>", description = {"Exclude variants with the alternate allele frequency (AF) per variant outside the range [minAf, maxAf].", "If a subset selection occurs, this filter applies to the genotype sequences after selection."})
    @Option(names = {"--seq-af"}, type = FieldType.floatInterval, defaultTo = {"0.05~0.95"})
    FloatInterval af = new FloatInterval(0.05f, 0.95f);

    public static void main(String[] strArr) throws IOException {
        ILDModel iLDModel;
        IFieldCollection iFieldCollection;
        IndexableSet<String> indexableSet;
        final LDCalculatorProgram lDCalculatorProgram = new LDCalculatorProgram();
        CommandOptions parse = lDCalculatorProgram.parse((strArr.length == 1 && strArr[0].equals("ld")) ? new String[]{"--help"} : strArr);
        if (parse.isHelp()) {
            LOGGER.info("\n{}", parse.usage());
            return;
        }
        LOGGER.info("\n{}", parse);
        GTBManager gTBManager = new GTBManager(lDCalculatorProgram.input);
        if (lDCalculatorProgram.hapLd) {
            iLDModel = HaplotypeLD.INSTANCE;
            iFieldCollection = HaplotypeLD.SCORE;
        } else if (lDCalculatorProgram.genoLd) {
            iLDModel = GenotypeLD.INSTANCE;
            iFieldCollection = GenotypeLD.SCORE;
        } else if (lDCalculatorProgram.entropyLd) {
            iLDModel = EntropyLD.INSTANCE;
            iFieldCollection = EntropyLD.SCORE;
        } else {
            iLDModel = null;
            iFieldCollection = null;
        }
        if (lDCalculatorProgram.individuals == null) {
            indexableSet = new LinkedSet();
            indexableSet.addAll(gTBManager.getIndividuals());
        } else {
            indexableSet = lDCalculatorProgram.individuals;
        }
        Map<Chromosome, IntIntervalTree<Void>> intervalTree = GenomicCoordinatesSelectionConverter.toIntervalTree(lDCalculatorProgram.poses);
        IntList findIndicesIn = gTBManager.getIndividuals().findIndicesIn(indexableSet);
        IndexableSet<String> indexableSet2 = indexableSet;
        GenericObjectPool genericObjectPool = new GenericObjectPool(() -> {
            return new LDProperty(indexableSet2.size());
        });
        IndexableSet<String> indexableSet3 = indexableSet;
        CCFSlidingPairwiseCalculator.InputSetting input = CCFSlidingPairwiseCalculator.setInput(new GTBReaderOption(gTBManager, true, false), boxRecord -> {
            Coordinate coordinate = new Coordinate((Chromosome) boxRecord.get(null, "CHROM"), ((VarInt32Box) boxRecord.getBox(null, "POS")).intValue());
            if (intervalTree != null) {
                if (!intervalTree.containsKey(coordinate.getChromosome())) {
                    return null;
                }
                IntIntervalTree intIntervalTree = (IntIntervalTree) intervalTree.get(coordinate.getChromosome());
                if (intIntervalTree != null && !intIntervalTree.contains(coordinate.getPosition())) {
                    return null;
                }
            }
            Variant variant = new Variant(coordinate);
            if (boxRecord.containsKey(null, "ALLELE", FieldType.stringIndexableSet)) {
                variant.addAlleles((IndexableSet) boxRecord.get("ALLELE"));
            }
            if (boxRecord.containsKey(null, "GT", FieldType.bytecode)) {
                variant.setGenotypes(IGenotypes.load((Bytes) boxRecord.get(null, "GT")).map(BitwiseGenotypes.BIALLELIC).subGenotypes(findIndicesIn));
            } else {
                variant.setGenotypes(new ConstantGenotypes(indexableSet3.size()).map(BitwiseGenotypes.BIALLELIC));
            }
            if (lDCalculatorProgram.ac != null && !lDCalculatorProgram.ac.contains(variant.getGenotypes().counter().getAC())) {
                return null;
            }
            if (lDCalculatorProgram.an != null && !lDCalculatorProgram.an.contains(variant.getGenotypes().counter().getAN())) {
                return null;
            }
            if (lDCalculatorProgram.af == null || lDCalculatorProgram.af.contains(variant.getGenotypes().counter().getAF())) {
                return List.singleton(variant);
            }
            return null;
        });
        final IFieldCollection iFieldCollection2 = iFieldCollection;
        final ILDModel iLDModel2 = iLDModel;
        input.calculate(new BiFunction<Variant, Variant, IRecord>() { // from class: edu.sysu.pmglab.gtb.command.program.LDCalculatorProgram.1
            final ThreadLocal<IRecord> records;

            {
                IFieldCollection iFieldCollection3 = IFieldCollection.this;
                this.records = ThreadLocal.withInitial(() -> {
                    return new BoxRecord(iFieldCollection3);
                });
            }

            @Override // java.util.function.BiFunction
            public IRecord apply(Variant variant, Variant variant2) {
                IRecord iRecord = this.records.get();
                if (iLDModel2.apply(variant, variant2, lDCalculatorProgram.minAssoc, this.records.get())) {
                    return iRecord;
                }
                return null;
            }
        }).setOutput(new CCFOutputOption(lDCalculatorProgram.output).addFields(iFieldCollection)).makeWindowsIf((variant, variant2) -> {
            return variant.getChromosome() == variant2.getChromosome() && Math.abs(variant.getPosition() - variant2.getPosition()) <= lDCalculatorProgram.windows;
        }).init(variant3 -> {
            variant3.setProperty(LDProperty.class.getName(), ((LDProperty) genericObjectPool.borrowObject()).reload(variant3));
        }).destroy(variant4 -> {
            genericObjectPool.returnObject(variant4.getProperty(LDProperty.class.getName()));
        }).setListener(lDCalculatorProgram.silent ? null : new InputOutputListener("Input", "variants", "Calculated", "pairs")).submit(lDCalculatorProgram.threads);
    }
}
