Compare commits

...

2 Commits

Author SHA1 Message Date
Samuel Ortion 76852dfaf8
Add family size histogram 2024-12-29 14:06:22 +01:00
Samuel Ortion a75a0cb305
Update environment 2024-12-29 14:04:43 +01:00
6 changed files with 1363 additions and 0 deletions

7
README.org Normal file
View File

@ -0,0 +1,7 @@
#+title: Comparative Genomics Project
#+author: Samuel Ortion
#+date: 2024-2025
Analysis of Tandemly Arrayed Genes in /Glycine max/ (soy) proteome.
Part of the "Comparative Genomics" teaching unit in M2 GENomics Informatics and Mathematics for Health and Environment (GENIOMHE) at Université d'Évry Paris-Saclay.

1
environment.yml Normal file
View File

@ -0,0 +1 @@

7
renv/.gitignore vendored Normal file
View File

@ -0,0 +1,7 @@
library/
local/
cellar/
lock/
python/
sandbox/
staging/

1305
renv/activate.R Normal file

File diff suppressed because it is too large Load Diff

19
renv/settings.json Normal file
View File

@ -0,0 +1,19 @@
{
"bioconductor.version": null,
"external.libraries": [],
"ignored.packages": [],
"package.dependency.fields": [
"Imports",
"Depends",
"LinkingTo"
],
"ppm.enabled": null,
"ppm.ignored.urls": [],
"r.version": null,
"snapshot.type": "implicit",
"use.cache": true,
"vcs.ignore.cellar": true,
"vcs.ignore.library": true,
"vcs.ignore.local": true,
"vcs.manage.ignores": true
}

24
src/family_size_hist.R Normal file
View File

@ -0,0 +1,24 @@
#!/usr/bin/env Rscript
library(dplyr)
library(ggplot2)
args <- commandArgs(trailingOnly=TRUE)
input_file <- args[1]
output_file <- args[2]
title <- args[3]
family_df <- read.table(input_file, header=FALSE)
colnames(family_df) <- c("geneid", "familyid")
count_df <- family_df %>% count(familyid)
distribution_df <- count_df %>% count(n)
theme_set(theme_grey(base_size=20))
ggplot(distribution_df, aes(x=n, y=nn)) +
geom_bar(stat='identity') +
labs(title="Duplicate Gene Family Sizes",
subtitle=title) +
xlab("family size") +
ylab("count")
ggsave(output_file)