comparative-genomics-project/workflow/modules/clustering.nf

55 lines
722 B
Plaintext

process BLASTP_TO_ABC {
input:
path blastp
output:
path 'graph.abc'
script:
"""
awk 'BEGIN { OFS="\t" } { print \$14, \$16, \$12 }' "${blastp}" > 'graph.abc'
"""
}
process MCL {
input:
path abc
output:
path 'clustering.mcl'
script:
"""
mcl "${abc}" --abc -o 'custering.mcl'
"""
}
process MCL_TO_TSV {
input:
path mcl
output:
path 'families.tsv'
script:
"""
awk -f "${baseDir}/scripts/mcl_to_tsv.awk" "${mcl}" > 'families.tsv'
"""
}
workflow CLUSTERING {
take:
blastp_tsv
main:
BLASTP_TO_ABC(blastp_tsv)
MCL(BLASTP_TO_ABC.out)
MCL_TO_TSV(MCL.out)
emit:
families = MCL_TO_TSV.out
}