Compare commits
7 Commits
3f7dcc62b2
...
03f3efd668
Author | SHA1 | Date |
---|---|---|
Samuel Ortion | 03f3efd668 | |
Samuel Ortion | 366c732998 | |
Samuel Ortion | 2110e31754 | |
Samuel Ortion | 921b5821a2 | |
Samuel Ortion | 3264b79469 | |
Samuel Ortion | 25cf96e485 | |
Samuel Ortion | 5880678767 |
|
@ -2,7 +2,7 @@ sub createFolderStructure{
|
||||||
system("bash ./folder-structure.sh");
|
system("bash ./folder-structure.sh");
|
||||||
}
|
}
|
||||||
|
|
||||||
createFolderStructure();
|
# createFolderStructure();
|
||||||
|
|
||||||
$hash_calc_ignore_pattern{aux} =
|
$hash_calc_ignore_pattern{aux} =
|
||||||
'^\\\\gdef\\\\minted@oldcachelist\{,'
|
'^\\\\gdef\\\\minted@oldcachelist\{,'
|
||||||
|
|
2
Makefile
2
Makefile
|
@ -16,4 +16,6 @@ bib:
|
||||||
glossaries:
|
glossaries:
|
||||||
makeglossaries -d build $(SOURCE)
|
makeglossaries -d build $(SOURCE)
|
||||||
|
|
||||||
|
index:
|
||||||
|
makeindex -d build $(SOURCE)
|
||||||
.PHONY: build
|
.PHONY: build
|
||||||
|
|
BIN
figures/lallemand2020-fig1_copy.pdf (Stored with Git LFS)
BIN
figures/lallemand2020-fig1_copy.pdf (Stored with Git LFS)
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Before Width: | Height: | Size: 50 KiB After Width: | Height: | Size: 52 KiB |
BIN
figures/tag-definition.pdf (Stored with Git LFS)
BIN
figures/tag-definition.pdf (Stored with Git LFS)
Binary file not shown.
|
@ -5,8 +5,9 @@
|
||||||
|
|
||||||
\usepackage{tikz}
|
\usepackage{tikz}
|
||||||
|
|
||||||
|
|
||||||
\begin{document}
|
\begin{document}
|
||||||
|
\definecolor{lammegreen}{HTML}{017d79}
|
||||||
|
\definecolor{lammeblue}{HTML}{0050b4}
|
||||||
\usetikzlibrary{positioning}
|
\usetikzlibrary{positioning}
|
||||||
\usetikzlibrary{decorations.pathreplacing}
|
\usetikzlibrary{decorations.pathreplacing}
|
||||||
|
|
||||||
|
@ -21,28 +22,28 @@
|
||||||
\draw[-,very thick] (-6,0) to (6,0);
|
\draw[-,very thick] (-6,0) to (6,0);
|
||||||
% Gene blocks
|
% Gene blocks
|
||||||
\node[rectangle,fill=orange,minimum height=0.5cm, minimum width=1cm] (orange1) at (-5,0) {};
|
\node[rectangle,fill=orange,minimum height=0.5cm, minimum width=1cm] (orange1) at (-5,0) {};
|
||||||
\node[rectangle,fill=green,minimum height=0.5cm, minimum width=1cm] (green1) [right=of orange1] {};
|
\node[rectangle,fill=lammegreen,minimum height=0.5cm, minimum width=1cm] (green1) [right=of orange1] {};
|
||||||
\node[rectangle,fill=red,minimum height=0.5cm, minimum width=1cm] (red1) [right=of green1] {};
|
\node[rectangle,fill=red,minimum height=0.5cm, minimum width=1cm] (red1) [right=of green1] {};
|
||||||
\node[rectangle,fill=green,minimum height=0.5cm, minimum width=1cm] (green2) [right=of red1] {};
|
\node[rectangle,fill=lammegreen,minimum height=0.5cm, minimum width=1cm] (green2) [right=of red1] {};
|
||||||
\node[rectangle,fill=green,minimum height=0.5cm, minimum width=1cm] (green3) [right=of green2] {};
|
\node[rectangle,fill=lammegreen,minimum height=0.5cm, minimum width=1cm] (green3) [right=of green2] {};
|
||||||
\node[rectangle,fill=green,minimum height=0.5cm, minimum width=1cm] (green4) [right=of green3] {};
|
\node[rectangle,fill=lammegreen,minimum height=0.5cm, minimum width=1cm] (green4) [right=of green3] {};
|
||||||
\node[rectangle,fill=blue,minimum height=0.5cm, minimum width=1cm] (blue1) [right=of green4] {};
|
\node[rectangle,fill=lammeblue,minimum height=0.5cm, minimum width=1cm] (blue1) [right=of green4] {};
|
||||||
\node[rectangle,fill=blue,minimum height=0.5cm, minimum width=1cm] (blue2) [right=of blue1] {};
|
\node[rectangle,fill=lammeblue,minimum height=0.5cm, minimum width=1cm] (blue2) [right=of blue1] {};
|
||||||
\node[rectangle,fill=orange,minimum height=0.5cm, minimum width=1cm] (orange2) [right=of blue2] {};
|
\node[rectangle,fill=orange,minimum height=0.5cm, minimum width=1cm] (orange2) [right=of blue2] {};
|
||||||
|
|
||||||
% Homology links
|
% Homology links
|
||||||
|
|
||||||
\draw[-,orange, bend left=40] (orange1) to coordinate[dot] node[nod] {$\mathrm{TAG}_7$} (orange2);
|
\draw[-,orange, bend left=40] (orange1) to coordinate[dot] node[nod] {$\mathrm{TAG}_7$} (orange2);
|
||||||
|
|
||||||
\draw[-,green,bend right=40] (green1) to (green2)
|
\draw[-,lammegreen,bend right=40] (green1) to (green2)
|
||||||
(green2) to (green3)
|
(green2) to (green3)
|
||||||
(green3) to (green4);
|
(green3) to (green4);
|
||||||
\draw[-,green,bend left=40] (green1) to (green3)
|
\draw[-,lammegreen,bend left=40] (green1) to (green3)
|
||||||
(green1) to coordinate[dot] node[nod] {$\mathrm{TAG}_3$}(green4);
|
(green1) to coordinate[dot] node[nod] {$\mathrm{TAG}_3$}(green4);
|
||||||
\draw[-,green,bend right=40] (green1) to (green2)
|
\draw[-,lammegreen,bend right=40] (green1) to (green2)
|
||||||
(green2) to (green4);
|
(green2) to (green4);
|
||||||
|
|
||||||
\draw[-,blue, bend left=40] (blue1) to coordinate[dot] node[nod] {$\mathrm{TAG}_0$} (blue2);
|
\draw[-,lammeblue, bend left=40] (blue1) to coordinate[dot] node[nod] {$\mathrm{TAG}_0$} (blue2);
|
||||||
\end{tikzpicture}
|
\end{tikzpicture}
|
||||||
|
|
||||||
\end{document}
|
\end{document}
|
||||||
|
|
46
report.org
46
report.org
|
@ -9,13 +9,8 @@
|
||||||
#+exclude_tags: noexport
|
#+exclude_tags: noexport
|
||||||
#+options: H:7
|
#+options: H:7
|
||||||
#+options: toc:nil
|
#+options: toc:nil
|
||||||
#+MACRO: conditional-header (eval (concat "#+header: :results output " (print-to-string org-export-current-backend)))
|
|
||||||
# ref. conditional-header https://emacs.stackexchange.com/a/64340/41374
|
|
||||||
|
|
||||||
|
|
||||||
# ref. https://write.as/dani/writing-a-phd-thesis-with-org-mode
|
# ref. https://write.as/dani/writing-a-phd-thesis-with-org-mode
|
||||||
|
|
||||||
|
|
||||||
#+name: acronyms
|
#+name: acronyms
|
||||||
| key | abbreviation | full form |
|
| key | abbreviation | full form |
|
||||||
|------------+--------------+--------------------------------------------|
|
|------------+--------------+--------------------------------------------|
|
||||||
|
@ -53,9 +48,12 @@
|
||||||
#+end_center
|
#+end_center
|
||||||
|
|
||||||
#+begin_export latex
|
#+begin_export latex
|
||||||
|
{
|
||||||
|
\hypersetup{linkcolor=black}
|
||||||
\tableofcontents
|
\tableofcontents
|
||||||
\listoffigures
|
\listoffigures
|
||||||
\listoftables
|
\listoftables
|
||||||
|
}
|
||||||
#+end_export
|
#+end_export
|
||||||
|
|
||||||
[[printglossaries:]]
|
[[printglossaries:]]
|
||||||
|
@ -154,19 +152,21 @@ In this step, the typical tool involved is =BLAST= (Basic Local Alignment Search
|
||||||
|
|
||||||
Several =BLAST= metrics can be used as an homology measure, such as bitscore, identity percentage, E-value or variations of these. The choice of metrics can affect the results of graph clustering in the following step, and we should therefore chose them carefully [cite:@gibbonsEvaluationBLASTbasedEdgeweighting2015].
|
Several =BLAST= metrics can be used as an homology measure, such as bitscore, identity percentage, E-value or variations of these. The choice of metrics can affect the results of graph clustering in the following step, and we should therefore chose them carefully [cite:@gibbonsEvaluationBLASTbasedEdgeweighting2015].
|
||||||
**** Identification of gene families
|
**** Identification of gene families
|
||||||
Based on the homology links between each pair of genes, we construct a undirected weighted graph whose vertices correspond to genes and edges to homology links between them.
|
Based on the homology links between each pair of genes, we construct an undirected weighted graph whose vertices correspond to genes and edges to homology links between them.
|
||||||
We apply a graph clustering algorithm on the graph in order to infer the gene families corresponding to densely connected communities of vertices.
|
We apply a graph clustering algorithm on the graph in order to infer the gene families corresponding to densely connected communities of vertices.
|
||||||
|
|
||||||
FTAG Finder proposes three clustering algorithm alternatives: single linkage, Markov Clustering [cite:@vandongenNewClusterAlgorithm1998] or Walktrap [cite:@ponsComputingCommunitiesLarge2005].
|
|
||||||
**** Detection of TAGs
|
|
||||||
The final step of FTAG Finder consists in the identification of gls:TAG from the gene families and the positions of genes.
|
|
||||||
For a given chromosome, the tool seeks genes belonging to the same family and located close to each other. The tool allows a maximal number of genes between the homologous genes, with a parameter set by the user. Ref:fig:tag-definitions is a schematic representation of some possible gls:TAG positioning on a genome associated with their definition in FTAG Finder /Find Tags/ step.
|
|
||||||
#+begin_export latex
|
#+begin_export latex
|
||||||
\fladdfig{
|
\fladdfig{
|
||||||
\includegraphics[width=.9\linewidth]{./figures/tag-definition.pdf}
|
\includegraphics[width=.9\linewidth]{./figures/tag-definition.pdf}
|
||||||
\caption[Schematic representation of TAG definitions]{\label{fig:tag-definitions} Schematic representation of TAG definitions. Several genes are represented on a linear chromosome. The red box represent a singleton gene. Orange boxes represent a TAG with two duplicate genes seperated by 7 other genes ($\mathrm{TAG}_7$). Four green boxes constitute a TAG, the gene at the extremities are seperated by three genes ($\mathrm{TAG}_3$. The two blue boxes represents a TAG with two genes next to each other $\mathrm{TAG}_0$. The bended edges represents the homology links between each pair of genes of a TAG.}}
|
\caption[Schematic representation of TAG definitions]{\label{fig:tag-definitions} Schematic representation of TAG definitions. Several genes are represented on a linear chromosome. The red box represent a singleton gene. Orange boxes represent a TAG with two duplicate genes seperated by 7 other genes ($\mathrm{TAG}_7$). Four green boxes constitute a TAG, the gene at the extremities are seperated by three genes ($\mathrm{TAG}_3$. The two blue boxes represents a TAG with two genes next to each other $\mathrm{TAG}_0$. The bended edges represents the homology links between each pair of genes of a TAG.}}
|
||||||
#+end_export
|
#+end_export
|
||||||
|
|
||||||
|
|
||||||
|
FTAG Finder proposes three clustering algorithm alternatives: single linkage, Markov Clustering [cite:@vandongenNewClusterAlgorithm1998] or Walktrap [cite:@ponsComputingCommunitiesLarge2005].
|
||||||
|
**** Detection of TAGs
|
||||||
|
The final step of FTAG Finder consists in the identification of gls:TAG from the gene families and the positions of genes.
|
||||||
|
For a given chromosome, the tool seeks genes belonging to the same family and located close to each other. The tool allows a maximal number of genes between the homologous genes, with a parameter set by the user. Cref:fig:tag-definitions is a schematic representation of some possible gls:TAG positioning on a genome associated with their definition in FTAG Finder /Find Tags/ step.
|
||||||
|
|
||||||
* Objectives for the internship
|
* Objectives for the internship
|
||||||
** Scientific questions
|
** Scientific questions
|
||||||
The underlying question of FTAG Finder is the study of the evolutionary fate of duplicate genes in Eukaryotes.
|
The underlying question of FTAG Finder is the study of the evolutionary fate of duplicate genes in Eukaryotes.
|
||||||
|
@ -180,22 +180,26 @@ Another objective of my internship will be to port FTAG Finder on a workflow man
|
||||||
We will have to make a choice for the tool we will use.
|
We will have to make a choice for the tool we will use.
|
||||||
The two main options being Snakemake and Nextflow. Snakemake is a python powered workflow manager based on rules /à la/ GNU Make [cite:@kosterSnakemakeScalableBioinformatics2012]. Nextflow is a groovy powered workflow manager, which rely on the data flows paradigm [cite:@ditommasoNextflowEnablesReproducible2017]. Both are widely used in the bioinformatics community, and their use have been on the rise since they came out in 2012 and 2013 respectively [cite:@djaffardjyDevelopingReusingBioinformatics2023].
|
The two main options being Snakemake and Nextflow. Snakemake is a python powered workflow manager based on rules /à la/ GNU Make [cite:@kosterSnakemakeScalableBioinformatics2012]. Nextflow is a groovy powered workflow manager, which rely on the data flows paradigm [cite:@ditommasoNextflowEnablesReproducible2017]. Both are widely used in the bioinformatics community, and their use have been on the rise since they came out in 2012 and 2013 respectively [cite:@djaffardjyDevelopingReusingBioinformatics2023].
|
||||||
|
|
||||||
#+begin_export html
|
|
||||||
<h3>Bibliography</h3>
|
|
||||||
#+end_export
|
|
||||||
|
|
||||||
#+print_bibliography:
|
|
||||||
|
|
||||||
#+begin_export latex
|
#+begin_export latex
|
||||||
\flstop
|
\flstop
|
||||||
#+end_export
|
#+end_export
|
||||||
|
|
||||||
|
* References
|
||||||
|
:PROPERTIES:
|
||||||
|
:UNNUMBERED: t
|
||||||
|
:END:
|
||||||
|
|
||||||
|
#+begin_export latex
|
||||||
|
\printbibliography[heading=none]
|
||||||
|
#+end_export
|
||||||
|
|
||||||
#+begin_export latex
|
#+begin_export latex
|
||||||
\cleartoleftpage
|
\cleartoleftpage
|
||||||
\clearpairofpagestyles
|
\clearpairofpagestyles
|
||||||
#+end_export
|
#+end_export
|
||||||
|
|
||||||
** Summary
|
|
||||||
|
* Summary
|
||||||
:PROPERTIES:
|
:PROPERTIES:
|
||||||
:UNNUMBERED: t
|
:UNNUMBERED: t
|
||||||
:END:
|
:END:
|
||||||
|
@ -216,6 +220,10 @@ Principle: construct vertex communities based on where an agent would get stuck
|
||||||
|
|
||||||
# LocalWords: speciation Subfunctionalization Neofunctionalization
|
# LocalWords: speciation Subfunctionalization Neofunctionalization
|
||||||
# LocalWords: Pseudogenization
|
# LocalWords: Pseudogenization
|
||||||
|
# Local Variables:
|
||||||
|
# eval: (progn (org-babel-goto-named-src-block "startup") (org-babel-execute-src-block) (outline-hide-sublevels 1))
|
||||||
|
# End:
|
||||||
|
|
||||||
* Setup :noexport:
|
* Setup :noexport:
|
||||||
|
|
||||||
#+name: startup
|
#+name: startup
|
||||||
|
@ -225,7 +233,3 @@ Principle: construct vertex communities based on where an agent would get stuck
|
||||||
|
|
||||||
#+RESULTS: startup
|
#+RESULTS: startup
|
||||||
: Loaded ./setup.el
|
: Loaded ./setup.el
|
||||||
|
|
||||||
# Local Variables:
|
|
||||||
# eval: (progn (org-babel-goto-named-src-block "startup") (org-babel-execute-src-block) (outline-hide-sublevels 1))
|
|
||||||
# End:
|
|
||||||
|
|
BIN
report.pdf (Stored with Git LFS)
BIN
report.pdf (Stored with Git LFS)
Binary file not shown.
|
@ -40,7 +40,7 @@ See: [[https://emacs.stackexchange.com/a/41456/41374]]
|
||||||
\\makeindex
|
\\makeindex
|
||||||
\\makeglossaries "
|
\\makeglossaries "
|
||||||
;("\\part{%s}" . "\\part*{%s}")
|
;("\\part{%s}" . "\\part*{%s}")
|
||||||
("\\chapter{%s}" . "\\chapter{%s}")
|
("\\chapter{%s}" . "\\chapter*{%s}")
|
||||||
("\\section{%s}" . "\\section*{%s}")
|
("\\section{%s}" . "\\section*{%s}")
|
||||||
("\\subsection{%s}" . "\\subsection*{%s}")
|
("\\subsection{%s}" . "\\subsection*{%s}")
|
||||||
("\\subsubsection{%s}" . "\\subsubsection*{%s}")
|
("\\subsubsection{%s}" . "\\subsubsection*{%s}")
|
||||||
|
|
|
@ -50,7 +50,9 @@
|
||||||
citestyle=authoryear-comp,
|
citestyle=authoryear-comp,
|
||||||
backend=biber,
|
backend=biber,
|
||||||
natbib=true
|
natbib=true
|
||||||
]{biblatex}
|
]{biblatex}
|
||||||
|
|
||||||
|
\renewcommand\bibname{References}
|
||||||
|
|
||||||
\RequirePackage{doi}
|
\RequirePackage{doi}
|
||||||
\RequirePackage{xurl}
|
\RequirePackage{xurl}
|
||||||
|
@ -63,7 +65,7 @@
|
||||||
]{doclicense}
|
]{doclicense}
|
||||||
|
|
||||||
\RequirePackage[
|
\RequirePackage[
|
||||||
nameinlink,
|
%nameinlink,
|
||||||
noabbrev
|
noabbrev
|
||||||
]{cleveref}
|
]{cleveref}
|
||||||
|
|
||||||
|
@ -83,7 +85,8 @@
|
||||||
urlcolor=primaryLink,
|
urlcolor=primaryLink,
|
||||||
linkcolor=primaryLink,
|
linkcolor=primaryLink,
|
||||||
anchorcolor=primaryLink,
|
anchorcolor=primaryLink,
|
||||||
citecolor=primaryCite
|
citecolor=primaryCite,
|
||||||
|
%linktoc=page
|
||||||
}
|
}
|
||||||
\newcommand*{\glsplainhyperlink}[2]{%
|
\newcommand*{\glsplainhyperlink}[2]{%
|
||||||
\begingroup%
|
\begingroup%
|
||||||
|
@ -98,7 +101,15 @@
|
||||||
|
|
||||||
\let\oldGls=\Gls
|
\let\oldGls=\Gls
|
||||||
\renewcommand{\Gls}[1]{{\hypersetup{hidelinks}%
|
\renewcommand{\Gls}[1]{{\hypersetup{hidelinks}%
|
||||||
\oldGls{#1}}}%
|
\oldGls{#1}}}%
|
||||||
|
|
||||||
|
\let\oldglspl=\glspl
|
||||||
|
\renewcommand{\glspl}[1]{{\hypersetup{hidelinks}%
|
||||||
|
\oldglspl{#1}}}%}
|
||||||
|
|
||||||
|
\let\oldGlspl=\Glspl
|
||||||
|
\renewcommand{\Glspl}[1]{{\hypersetup{hidelinks}%
|
||||||
|
\oldGlspl{#1}}}%}
|
||||||
|
|
||||||
|
|
||||||
%\renewcommand*{\glstextformat}[1]{\begingroup\hypersetup{hidelinks}#1\endgroup}
|
%\renewcommand*{\glstextformat}[1]{\begingroup\hypersetup{hidelinks}#1\endgroup}
|
||||||
|
|
Loading…
Reference in New Issue