Add family size histogram

This commit is contained in:
Samuel Ortion 2024-12-29 14:06:22 +01:00
parent a75a0cb305
commit 76852dfaf8
Signed by: sortion
GPG Key ID: 9B02406F8C4FB765
1 changed files with 24 additions and 0 deletions

24
src/family_size_hist.R Normal file
View File

@ -0,0 +1,24 @@
#!/usr/bin/env Rscript
library(dplyr)
library(ggplot2)
args <- commandArgs(trailingOnly=TRUE)
input_file <- args[1]
output_file <- args[2]
title <- args[3]
family_df <- read.table(input_file, header=FALSE)
colnames(family_df) <- c("geneid", "familyid")
count_df <- family_df %>% count(familyid)
distribution_df <- count_df %>% count(n)
theme_set(theme_grey(base_size=20))
ggplot(distribution_df, aes(x=n, y=nn)) +
geom_bar(stat='identity') +
labs(title="Duplicate Gene Family Sizes",
subtitle=title) +
xlab("family size") +
ylab("count")
ggsave(output_file)