2024-10-28 10:46:05 +01:00
|
|
|
/**
|
|
|
|
/** Comparative Genomics workflow
|
|
|
|
/**
|
|
|
|
/** This workflow find the duplicate genes from a proteome
|
|
|
|
/** Then, It finds the Tandemly Arrayed Genes (TAGs)
|
|
|
|
/**/
|
|
|
|
|
|
|
|
nextflow.enable.dsl = 2;
|
|
|
|
|
|
|
|
include { GUNZIP } from "./modules/gunzip.nf"
|
2024-11-04 11:37:20 +01:00
|
|
|
include { BLAST_ALL_AGAINST_ALL } from "./modules/blast.nf"
|
2024-10-28 10:46:05 +01:00
|
|
|
include { FILTER_FASTA } from "./modules/filter_fasta.nf"
|
|
|
|
include { FILTER_BLASTP } from "./modules/filter_blastp.nf"
|
|
|
|
include { CLUSTERING } from "./modules/clustering.nf"
|
|
|
|
|
2024-11-04 11:37:20 +01:00
|
|
|
process PROTEIN_GENE_MAPPING {
|
|
|
|
|
|
|
|
input:
|
|
|
|
path proteome
|
|
|
|
|
|
|
|
output:
|
|
|
|
path 'protein_gene.tsv'
|
|
|
|
}
|
|
|
|
|
2024-10-28 10:46:05 +01:00
|
|
|
workflow {
|
|
|
|
proteome = Channel.fromPath(params.proteome)
|
|
|
|
GUNZIP(proteome)
|
|
|
|
FILTER_FASTA(GUNZIP.out)
|
2024-11-04 11:37:20 +01:00
|
|
|
BLAST_ALL_AGAINST_ALL(FILTER_FASTA.out.proteome)
|
|
|
|
FILTER_BLASTP(params.min_coverage, params.min_identity, BLAST_ALL_AGAINST_ALL.out, FILTER_FASTA.out.lengths)
|
|
|
|
|
2024-10-28 10:46:05 +01:00
|
|
|
CLUSTERING(FILTER_BLASTP.out)
|
|
|
|
}
|