Compare commits

..

7 Commits

10 changed files with 236 additions and 218 deletions

View File

@ -2,7 +2,7 @@ sub createFolderStructure{
system("bash ./folder-structure.sh");
}
createFolderStructure();
# createFolderStructure();
$hash_calc_ignore_pattern{aux} =
'^\\\\gdef\\\\minted@oldcachelist\{,'

View File

@ -16,4 +16,6 @@ bib:
glossaries:
makeglossaries -d build $(SOURCE)
index:
makeindex -d build $(SOURCE)
.PHONY: build

BIN
figures/lallemand2020-fig1_copy.pdf (Stored with Git LFS)

Binary file not shown.

File diff suppressed because it is too large Load Diff

Before

Width:  |  Height:  |  Size: 50 KiB

After

Width:  |  Height:  |  Size: 52 KiB

BIN
figures/tag-definition.pdf (Stored with Git LFS)

Binary file not shown.

View File

@ -5,8 +5,9 @@
\usepackage{tikz}
\begin{document}
\definecolor{lammegreen}{HTML}{017d79}
\definecolor{lammeblue}{HTML}{0050b4}
\usetikzlibrary{positioning}
\usetikzlibrary{decorations.pathreplacing}
@ -21,28 +22,28 @@
\draw[-,very thick] (-6,0) to (6,0);
% Gene blocks
\node[rectangle,fill=orange,minimum height=0.5cm, minimum width=1cm] (orange1) at (-5,0) {};
\node[rectangle,fill=green,minimum height=0.5cm, minimum width=1cm] (green1) [right=of orange1] {};
\node[rectangle,fill=lammegreen,minimum height=0.5cm, minimum width=1cm] (green1) [right=of orange1] {};
\node[rectangle,fill=red,minimum height=0.5cm, minimum width=1cm] (red1) [right=of green1] {};
\node[rectangle,fill=green,minimum height=0.5cm, minimum width=1cm] (green2) [right=of red1] {};
\node[rectangle,fill=green,minimum height=0.5cm, minimum width=1cm] (green3) [right=of green2] {};
\node[rectangle,fill=green,minimum height=0.5cm, minimum width=1cm] (green4) [right=of green3] {};
\node[rectangle,fill=blue,minimum height=0.5cm, minimum width=1cm] (blue1) [right=of green4] {};
\node[rectangle,fill=blue,minimum height=0.5cm, minimum width=1cm] (blue2) [right=of blue1] {};
\node[rectangle,fill=lammegreen,minimum height=0.5cm, minimum width=1cm] (green2) [right=of red1] {};
\node[rectangle,fill=lammegreen,minimum height=0.5cm, minimum width=1cm] (green3) [right=of green2] {};
\node[rectangle,fill=lammegreen,minimum height=0.5cm, minimum width=1cm] (green4) [right=of green3] {};
\node[rectangle,fill=lammeblue,minimum height=0.5cm, minimum width=1cm] (blue1) [right=of green4] {};
\node[rectangle,fill=lammeblue,minimum height=0.5cm, minimum width=1cm] (blue2) [right=of blue1] {};
\node[rectangle,fill=orange,minimum height=0.5cm, minimum width=1cm] (orange2) [right=of blue2] {};
% Homology links
\draw[-,orange, bend left=40] (orange1) to coordinate[dot] node[nod] {$\mathrm{TAG}_7$} (orange2);
\draw[-,green,bend right=40] (green1) to (green2)
\draw[-,lammegreen,bend right=40] (green1) to (green2)
(green2) to (green3)
(green3) to (green4);
\draw[-,green,bend left=40] (green1) to (green3)
\draw[-,lammegreen,bend left=40] (green1) to (green3)
(green1) to coordinate[dot] node[nod] {$\mathrm{TAG}_3$}(green4);
\draw[-,green,bend right=40] (green1) to (green2)
\draw[-,lammegreen,bend right=40] (green1) to (green2)
(green2) to (green4);
\draw[-,blue, bend left=40] (blue1) to coordinate[dot] node[nod] {$\mathrm{TAG}_0$} (blue2);
\draw[-,lammeblue, bend left=40] (blue1) to coordinate[dot] node[nod] {$\mathrm{TAG}_0$} (blue2);
\end{tikzpicture}
\end{document}

View File

@ -9,13 +9,8 @@
#+exclude_tags: noexport
#+options: H:7
#+options: toc:nil
#+MACRO: conditional-header (eval (concat "#+header: :results output " (print-to-string org-export-current-backend)))
# ref. conditional-header https://emacs.stackexchange.com/a/64340/41374
# ref. https://write.as/dani/writing-a-phd-thesis-with-org-mode
#+name: acronyms
| key | abbreviation | full form |
|------------+--------------+--------------------------------------------|
@ -53,9 +48,12 @@
#+end_center
#+begin_export latex
{
\hypersetup{linkcolor=black}
\tableofcontents
\listoffigures
\listoftables
}
#+end_export
[[printglossaries:]]
@ -154,19 +152,21 @@ In this step, the typical tool involved is =BLAST= (Basic Local Alignment Search
Several =BLAST= metrics can be used as an homology measure, such as bitscore, identity percentage, E-value or variations of these. The choice of metrics can affect the results of graph clustering in the following step, and we should therefore chose them carefully [cite:@gibbonsEvaluationBLASTbasedEdgeweighting2015].
**** Identification of gene families
Based on the homology links between each pair of genes, we construct a undirected weighted graph whose vertices correspond to genes and edges to homology links between them.
Based on the homology links between each pair of genes, we construct an undirected weighted graph whose vertices correspond to genes and edges to homology links between them.
We apply a graph clustering algorithm on the graph in order to infer the gene families corresponding to densely connected communities of vertices.
FTAG Finder proposes three clustering algorithm alternatives: single linkage, Markov Clustering [cite:@vandongenNewClusterAlgorithm1998] or Walktrap [cite:@ponsComputingCommunitiesLarge2005].
**** Detection of TAGs
The final step of FTAG Finder consists in the identification of gls:TAG from the gene families and the positions of genes.
For a given chromosome, the tool seeks genes belonging to the same family and located close to each other. The tool allows a maximal number of genes between the homologous genes, with a parameter set by the user. Ref:fig:tag-definitions is a schematic representation of some possible gls:TAG positioning on a genome associated with their definition in FTAG Finder /Find Tags/ step.
#+begin_export latex
\fladdfig{
\includegraphics[width=.9\linewidth]{./figures/tag-definition.pdf}
\caption[Schematic representation of TAG definitions]{\label{fig:tag-definitions} Schematic representation of TAG definitions. Several genes are represented on a linear chromosome. The red box represent a singleton gene. Orange boxes represent a TAG with two duplicate genes seperated by 7 other genes ($\mathrm{TAG}_7$). Four green boxes constitute a TAG, the gene at the extremities are seperated by three genes ($\mathrm{TAG}_3$. The two blue boxes represents a TAG with two genes next to each other $\mathrm{TAG}_0$. The bended edges represents the homology links between each pair of genes of a TAG.}}
#+end_export
FTAG Finder proposes three clustering algorithm alternatives: single linkage, Markov Clustering [cite:@vandongenNewClusterAlgorithm1998] or Walktrap [cite:@ponsComputingCommunitiesLarge2005].
**** Detection of TAGs
The final step of FTAG Finder consists in the identification of gls:TAG from the gene families and the positions of genes.
For a given chromosome, the tool seeks genes belonging to the same family and located close to each other. The tool allows a maximal number of genes between the homologous genes, with a parameter set by the user. Cref:fig:tag-definitions is a schematic representation of some possible gls:TAG positioning on a genome associated with their definition in FTAG Finder /Find Tags/ step.
* Objectives for the internship
** Scientific questions
The underlying question of FTAG Finder is the study of the evolutionary fate of duplicate genes in Eukaryotes.
@ -180,22 +180,26 @@ Another objective of my internship will be to port FTAG Finder on a workflow man
We will have to make a choice for the tool we will use.
The two main options being Snakemake and Nextflow. Snakemake is a python powered workflow manager based on rules /à la/ GNU Make [cite:@kosterSnakemakeScalableBioinformatics2012]. Nextflow is a groovy powered workflow manager, which rely on the data flows paradigm [cite:@ditommasoNextflowEnablesReproducible2017]. Both are widely used in the bioinformatics community, and their use have been on the rise since they came out in 2012 and 2013 respectively [cite:@djaffardjyDevelopingReusingBioinformatics2023].
#+begin_export html
<h3>Bibliography</h3>
#+end_export
#+print_bibliography:
#+begin_export latex
\flstop
#+end_export
* References
:PROPERTIES:
:UNNUMBERED: t
:END:
#+begin_export latex
\printbibliography[heading=none]
#+end_export
#+begin_export latex
\cleartoleftpage
\clearpairofpagestyles
#+end_export
** Summary
* Summary
:PROPERTIES:
:UNNUMBERED: t
:END:
@ -216,6 +220,10 @@ Principle: construct vertex communities based on where an agent would get stuck
# LocalWords: speciation Subfunctionalization Neofunctionalization
# LocalWords: Pseudogenization
# Local Variables:
# eval: (progn (org-babel-goto-named-src-block "startup") (org-babel-execute-src-block) (outline-hide-sublevels 1))
# End:
* Setup :noexport:
#+name: startup
@ -225,7 +233,3 @@ Principle: construct vertex communities based on where an agent would get stuck
#+RESULTS: startup
: Loaded ./setup.el
# Local Variables:
# eval: (progn (org-babel-goto-named-src-block "startup") (org-babel-execute-src-block) (outline-hide-sublevels 1))
# End:

BIN
report.pdf (Stored with Git LFS)

Binary file not shown.

View File

@ -40,7 +40,7 @@ See: [[https://emacs.stackexchange.com/a/41456/41374]]
\\makeindex
\\makeglossaries "
;("\\part{%s}" . "\\part*{%s}")
("\\chapter{%s}" . "\\chapter{%s}")
("\\chapter{%s}" . "\\chapter*{%s}")
("\\section{%s}" . "\\section*{%s}")
("\\subsection{%s}" . "\\subsection*{%s}")
("\\subsubsection{%s}" . "\\subsubsection*{%s}")

View File

@ -50,7 +50,9 @@
citestyle=authoryear-comp,
backend=biber,
natbib=true
]{biblatex}
]{biblatex}
\renewcommand\bibname{References}
\RequirePackage{doi}
\RequirePackage{xurl}
@ -63,7 +65,7 @@
]{doclicense}
\RequirePackage[
nameinlink,
%nameinlink,
noabbrev
]{cleveref}
@ -83,7 +85,8 @@
urlcolor=primaryLink,
linkcolor=primaryLink,
anchorcolor=primaryLink,
citecolor=primaryCite
citecolor=primaryCite,
%linktoc=page
}
\newcommand*{\glsplainhyperlink}[2]{%
\begingroup%
@ -98,7 +101,15 @@
\let\oldGls=\Gls
\renewcommand{\Gls}[1]{{\hypersetup{hidelinks}%
\oldGls{#1}}}%
\oldGls{#1}}}%
\let\oldglspl=\glspl
\renewcommand{\glspl}[1]{{\hypersetup{hidelinks}%
\oldglspl{#1}}}%}
\let\oldGlspl=\Glspl
\renewcommand{\Glspl}[1]{{\hypersetup{hidelinks}%
\oldGlspl{#1}}}%}
%\renewcommand*{\glstextformat}[1]{\begingroup\hypersetup{hidelinks}#1\endgroup}