From 76852dfaf82f377e72ea6e76bde3c450880d4246 Mon Sep 17 00:00:00 2001 From: Samuel Ortion Date: Sun, 29 Dec 2024 14:06:22 +0100 Subject: [PATCH] Add family size histogram --- src/family_size_hist.R | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 src/family_size_hist.R diff --git a/src/family_size_hist.R b/src/family_size_hist.R new file mode 100644 index 0000000..b210731 --- /dev/null +++ b/src/family_size_hist.R @@ -0,0 +1,24 @@ +#!/usr/bin/env Rscript +library(dplyr) +library(ggplot2) + +args <- commandArgs(trailingOnly=TRUE) +input_file <- args[1] +output_file <- args[2] +title <- args[3] + +family_df <- read.table(input_file, header=FALSE) +colnames(family_df) <- c("geneid", "familyid") + +count_df <- family_df %>% count(familyid) +distribution_df <- count_df %>% count(n) + +theme_set(theme_grey(base_size=20)) + +ggplot(distribution_df, aes(x=n, y=nn)) + + geom_bar(stat='identity') + + labs(title="Duplicate Gene Family Sizes", + subtitle=title) + + xlab("family size") + + ylab("count") +ggsave(output_file)