comparative-genomics-project/workflow/KaKs/main.nf

40 lines
915 B
Plaintext

/** Compute Ka, Ks values for each
/** pair of duplicate genes in a TAG
/** and in a gene family
/** */
include { GUNZIP as GUNZIP_CDS } from "./modules/gunzip.nf"
include { KA_KS } from "./modules/ka_ks.nf"
process GENE_PAIRS {
input:
path families
output:
path 'pairs'
script:
"""
cat "${families}" | ${baseDir}/../../rust/pairs/target/release/pairs > 'pairs'
"""
}
workflow {
families = params.families
cds_gz = params.cds
proteome = params.proteome
cds = GUNZIP_CDS(cds_gz)
pairs = GENE_PAIRS(families)
.splitCsv(sep: '\t', header: false)
.map { row -> tuple(row[0], row[1]) }
kaks = KA_KS(pairs, proteome, cds)
// Save to a CSV with collectFile
header = Channel.value("gene_id_1\tgene_id_2\tKa\tKs")
header.concat( kaks )
.collectFile( name: 'test.txt', newLine: true, sort: false )
.view()
}