Again, following workflow-based programming, programmers can easily call the API function to annotate sequence variants. The input variants should be stored in the GTB format. The following are codes to annotate variant gene features and allele frequencies.

// This code snippet demonstrates how to configure and execute an annotation workflow using the KGGA // platform. The snippet includes setting up the annotation resources, defining input data, configuring // annotation databases, and exporting annotated variants to a TSV file format. The code also shows how // to add tasks to a workflow and execute them sequentially.

import edu.sysu.pmglab.container.interval.FloatInterval;
import edu.sysu.pmglab.executor.Executor;
import edu.sysu.pmglab.gtb.GTBManager;
import edu.sysu.pmglab.gtb.genome.coordinate.RefGenomeVersion;
import edu.sysu.pmglab.kgga.command.executor.Utility;
import edu.sysu.pmglab.kgga.command.pipeline.GeneralIOOptions;
import edu.sysu.pmglab.kgga.command.pipeline.LDPruneOptions;
import edu.sysu.pmglab.kgga.command.pipeline.PreprocessingPipeline;
import edu.sysu.pmglab.kgga.command.pipeline.VCFQualityControlOptions;
import edu.sysu.pmglab.kgga.command.task.LDPruningTask;
import edu.sysu.pmglab.kgga.command.task.OutputVariants2TSVTask;
import edu.sysu.pmglab.kgga.command.validator.VariantFileMeta;
import edu.sysu.pmglab.kgga.io.InputPhenotypeFileSet;
import edu.sysu.pmglab.kgga.io.InputType;

import java.io.File;
import java.io.IOException;

public class AnnotationExample {
 public static void main(String[] args) {
    // Initialize general input/output options.
GeneralIOOptions ioOptions = new GeneralIOOptions();
// Initialize options for VCF file quality control.
VCFQualityControlOptions vcfQualityControlOptions = new VCFQualityControlOptions();
try {
    // Add an input VCF file (from a URL) to the IO options.
    // Specifies its type (VCF) and reference genome version (hg19).
    ioOptions.inputGTYFiles.add(new VariantFileMeta("https://idc.biosino.org/pmglab/resource/kgg/kgga/example/assoc.hg19.vcf.gz", InputType.VCF, RefGenomeVersion.hg19));

    // Step : Specify the path of the subject information file.
    // The PED file contains phenotype and sample information for each individual in the VCF file.
    // This is necessary for linking genetic variants with phenotypic data.
    ioOptions.phenoFileSet = new InputPhenotypeFileSet("https://idc.biosino.org/pmglab/resource/kgg/kgga/example/assoc.ped");

    // Set a filter for local Minor Allele Frequency (MAF) to be between 0.05 (5%) and 0.5 (50%).
    ioOptions.localMaf = new FloatInterval(0.05f, 0.5f);

    // The workspace is a directory that stores intermediate files and results generated during the
    // annotation and analysis processes.
    File workspace = new File("./test1"); // Define the workspace directory.

    // Set the number of threads to use for parallel processing.
    int threadNum = 4;

    // Initialize a workflow executor to manage and run tasks.
    Executor workflow = new Executor();

    // Add tracking or logging for the workflow, associated with the workspace.
    Utility.addTrack(workflow, workspace);

    // Generate an annotation-based Genotype Table (GTB).
    // This is a key preprocessing step that takes IO options, QC options,
    // the workflow executor, and the workspace directory as input.
    // It likely reads VCFs, applies QC, and creates a structured GTB file.
    GTBManager annotationBasedGTB = PreprocessingPipeline.INSTANCE.generateAnnotationBase(ioOptions, vcfQualityControlOptions, workflow, workspace);

    // Execute the tasks currently added to the workflow (i.e., the GTB generation).
    workflow.execute();

    // Clear any residual tasks from the workflow to prevent conflicts before adding new ones.
    workflow.clearTasks();

    // Get the file path of the generated annotation-based GTB and create a File object for it.
    File localGTBFile = new File(String.valueOf(annotationBasedGTB.getFile()));

    // Set the generated GTB file as a parameter named "AnnotationBaseVariantSet" for the workflow.
    // This makes it available to subsequent tasks.
    workflow.setParam("AnnotationBaseVariantSet", localGTBFile);

    // Initialize options for Linkage Disequilibrium (LD) pruning.
    LDPruneOptions ldPruneOptions = new LDPruneOptions();

    // Set the R-squared threshold for LD pruning. Variants with R^2 > 0.01 with another variant
    // in a window might be removed.
    ldPruneOptions.pruneR2 = 0.01f;

    // Add an LD pruning task to the workflow using the specified options, workspace,
    // a boolean flag (true, specific purpose depends on task implementation, e.g., overwrite), and thread number.
    workflow.addTask(new LDPruningTask(ldPruneOptions, workspace, true, threadNum));
    // Add a task to output the processed (e.g., pruned) variants to a TSV (Tab-Separated Values) file.
    workflow.addTask(new OutputVariants2TSVTask(workspace, threadNum));

    // Execute the task to output variants to TSV.
    workflow.execute();

} catch (IOException e) {
    // Handle any exceptions that occur during file I/O operations.
    // Wrap and rethrow as a RuntimeException for simplicity here,
    // though more specific handling might be needed in production.
    throw new RuntimeException(e);
}
 }
}
Copyright ©MiaoXin Li all right reservedLast modified time: 2025-05-16 02:00:16

results matching ""

    No results matching ""