comparative-genomics-project/workflow/main.nf

34 lines
883 B
Plaintext
Raw Normal View History

2024-10-28 10:46:05 +01:00
/**
/** Comparative Genomics workflow
/**
/** This workflow find the duplicate genes from a proteome
/** Then, It finds the Tandemly Arrayed Genes (TAGs)
/**/
nextflow.enable.dsl = 2;
include { GUNZIP } from "./modules/gunzip.nf"
2024-11-04 11:37:20 +01:00
include { BLAST_ALL_AGAINST_ALL } from "./modules/blast.nf"
2024-10-28 10:46:05 +01:00
include { FILTER_FASTA } from "./modules/filter_fasta.nf"
include { FILTER_BLASTP } from "./modules/filter_blastp.nf"
include { CLUSTERING } from "./modules/clustering.nf"
2024-11-04 11:37:20 +01:00
process PROTEIN_GENE_MAPPING {
input:
path proteome
output:
path 'protein_gene.tsv'
}
2024-10-28 10:46:05 +01:00
workflow {
proteome = Channel.fromPath(params.proteome)
GUNZIP(proteome)
FILTER_FASTA(GUNZIP.out)
2024-11-04 11:37:20 +01:00
BLAST_ALL_AGAINST_ALL(FILTER_FASTA.out.proteome)
FILTER_BLASTP(params.min_coverage, params.min_identity, BLAST_ALL_AGAINST_ALL.out, FILTER_FASTA.out.lengths)
2024-10-28 10:46:05 +01:00
CLUSTERING(FILTER_BLASTP.out)
}