Remove duplicate gls entry

Add tag definition figure in SVG format
feat: Add duplicate gene fate in HTML export
2024-04-19 22:09:59 +02:00 · 2024-04-19 21:46:30 +02:00 · 2024-04-19 21:25:39 +02:00 · 2024-04-19 18:02:40 +02:00 · 2024-04-19 17:56:27 +02:00 · 2024-04-19 05:15:33 +02:00
22 changed files with 6264 additions and 312 deletions
--- a/.latexmkrc
+++ b/.latexmkrc
@ -2,7 +2,7 @@ sub createFolderStructure{
   system("bash ./folder-structure.sh");
 }
-createFolderStructure();
+# createFolderStructure();
 $hash_calc_ignore_pattern{aux} =
 '^\\\\gdef\\\\minted@oldcachelist\{,'
--- a/4
+++ b/4
@ -1,4 +1,4 @@
-OPTIONS=-shell-escape -file-line-error -synctex=1 -interaction=batchmode
+OPTIONS=-shell-escape -file-line-error -synctex=1
 SOURCE=report
 all: latexmk
 debug: 
@ -16,4 +16,6 @@ bib:
 glossaries:
 	makeglossaries -d build $(SOURCE)
 index:
 	makeindex -d build $(SOURCE)
 .PHONY: build
--- a/figures/Evolution_fate_duplicate_genes.pdf
+++ b/figures/Evolution_fate_duplicate_genes.pdf
--- a/figures/Evolution_fate_duplicate_genes.svg
+++ b/figures/Evolution_fate_duplicate_genes.svg
--- a/figures/lallemand2020-fig1_copy.pdf
+++ b/figures/lallemand2020-fig1_copy.pdf
--- a/figures/lallemand2020-fig1_copy.svg
+++ b/figures/lallemand2020-fig1_copy.svg
--- a/figures/tag-definition-figure.tex
+++ b/figures/tag-definition-figure.tex
@ -0,0 +1,3 @@
 \caption[Tandemly Arrayed Genes (TAG) definitions]{
  Tandemly Arrayed Genes (TAG) definitions.
 }
--- a/figures/tag-definition.pdf
+++ b/figures/tag-definition.pdf
--- a/figures/tag-definition.svg
+++ b/figures/tag-definition.svg
@ -0,0 +1,117 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="341.353pt" height="96.089pt" viewBox="0 0 341.353 96.089" version="1.1">
 <defs>
 <g>
 <symbol overflow="visible" id="glyph0-0">
 <path style="stroke:none;" d=""/>
 </symbol>
 <symbol overflow="visible" id="glyph0-1">
 <path style="stroke:none;" d="M 0.953125 -6.9375 L -2.640625 -3.921875 L -1.75 -2.578125 L -1.5625 -2.75 C -2.25 -3.734375 -2.1875 -3.96875 -1.375 -4.65625 C -1.265625 -4.734375 -1.109375 -4.875 -1.046875 -4.921875 C -0.921875 -5 -0.875 -4.9375 -0.765625 -4.8125 L 1.546875 -2.046875 C 1.703125 -1.875 1.765625 -1.796875 1.203125 -1.328125 L 0.984375 -1.140625 L 1.140625 -0.953125 C 1.390625 -1.203125 1.890625 -1.625 2.15625 -1.859375 C 2.4375 -2.09375 2.921875 -2.5 3.203125 -2.6875 L 3.046875 -2.875 L 2.828125 -2.6875 C 2.28125 -2.21875 2.21875 -2.296875 2.0625 -2.46875 L -0.25 -5.234375 C -0.359375 -5.359375 -0.40625 -5.421875 -0.3125 -5.546875 C -0.265625 -5.578125 -0.109375 -5.71875 0 -5.8125 C 0.296875 -6.0625 0.53125 -6.25 0.796875 -6.3125 C 1.15625 -6.359375 1.484375 -6.0625 1.515625 -6.03125 L 1.90625 -5.65625 L 2.09375 -5.8125 Z M 0.953125 -6.9375 "/>
 </symbol>
 <symbol overflow="visible" id="glyph0-2">
 <path style="stroke:none;" d="M -0.703125 -5.703125 C -0.78125 -5.75 -0.84375 -5.78125 -0.9375 -5.703125 C -1.046875 -5.609375 -1.03125 -5.546875 -1 -5.4375 L 0.390625 -1.28125 C 0.453125 -1.125 0.5625 -0.796875 0.078125 -0.390625 L 0.234375 -0.203125 C 0.40625 -0.359375 0.59375 -0.546875 0.78125 -0.6875 C 0.984375 -0.875 1.40625 -1.171875 1.4375 -1.203125 L 1.28125 -1.390625 C 1.046875 -1.1875 0.796875 -1.171875 0.671875 -1.3125 C 0.640625 -1.359375 0.625 -1.390625 0.609375 -1.453125 L 0.3125 -2.3125 L 1.828125 -3.59375 L 2.796875 -3.09375 C 2.8125 -3.0625 2.875 -3.046875 2.890625 -3.015625 C 3.03125 -2.859375 2.734375 -2.625 2.59375 -2.5 L 2.75 -2.3125 C 3 -2.546875 3.34375 -2.859375 3.53125 -3 C 3.78125 -3.21875 4.203125 -3.53125 4.25 -3.5625 L 4.09375 -3.75 L 4 -3.671875 C 3.640625 -3.375 3.5625 -3.40625 3.375 -3.5 Z M -0.546875 -4.890625 L 1.59375 -3.71875 L 0.234375 -2.578125 Z M -0.546875 -4.890625 "/>
 </symbol>
 <symbol overflow="visible" id="glyph0-3">
 <path style="stroke:none;" d="M 3.09375 -4.375 C 2.890625 -4.609375 2.953125 -4.671875 3.3125 -4.96875 L 3.15625 -5.15625 C 3.109375 -5.109375 2.75 -4.78125 2.484375 -4.5625 C 2.1875 -4.3125 1.796875 -3.984375 1.5 -3.765625 L 1.65625 -3.578125 L 1.859375 -3.734375 C 2.390625 -4.1875 2.453125 -4.109375 2.59375 -3.953125 L 2.890625 -3.59375 C 2.984375 -3.484375 3.15625 -3.28125 2.96875 -2.875 C 2.84375 -2.515625 2.546875 -2.265625 2.46875 -2.203125 C 1.859375 -1.6875 0.59375 -1.234375 -0.578125 -2.625 C -1.765625 -4.046875 -1.015625 -5.203125 -0.5 -5.65625 C 0.015625 -6.078125 0.96875 -6.265625 1.859375 -5.453125 C 1.921875 -5.40625 1.96875 -5.4375 2 -5.46875 C 2.09375 -5.546875 2.078125 -5.59375 1.984375 -5.703125 L 0.9375 -6.9375 C 0.859375 -7.03125 0.8125 -7.09375 0.75 -7.046875 C 0.71875 -7.015625 0.703125 -7 0.71875 -6.875 L 0.78125 -6.25 C 0.28125 -6.296875 -0.265625 -6.1875 -0.734375 -5.796875 C -1.859375 -4.859375 -2.0625 -3.203125 -1.15625 -2.140625 C -0.265625 -1.078125 1.375 -0.96875 2.53125 -1.921875 C 2.828125 -2.1875 3.359375 -2.734375 3.296875 -3.328125 C 3.5 -3.25 3.859375 -3.234375 3.90625 -3.28125 C 3.96875 -3.328125 3.921875 -3.390625 3.84375 -3.46875 Z M 3.09375 -4.375 "/>
 </symbol>
 <symbol overflow="visible" id="glyph1-0">
 <path style="stroke:none;" d=""/>
 </symbol>
 <symbol overflow="visible" id="glyph1-1">
 <path style="stroke:none;" d="M 0.453125 -4.28125 C 0.46875 -4.359375 0.46875 -4.359375 0.453125 -4.390625 L 0.375 -4.484375 L -0.5625 -3.703125 C -0.65625 -3.625 -1.234375 -3.1875 -1.3125 -3.140625 C -1.40625 -3.078125 -1.4375 -3.109375 -1.5 -3.140625 L -1.65625 -3.015625 L -1.078125 -2.03125 L -0.921875 -2.15625 C -0.96875 -2.234375 -1.125 -2.5 -1.09375 -2.625 C -1.078125 -2.640625 -0.71875 -2.953125 -0.65625 -3 L 0.25 -3.765625 C 0.234375 -3.4375 0.203125 -3.0625 0.203125 -2.75 C 0.234375 -1.796875 0.640625 -1.203125 0.84375 -0.953125 C 1.015625 -0.75 1.15625 -0.84375 1.234375 -0.90625 C 1.4375 -1.078125 1.28125 -1.265625 1.25 -1.3125 L 1.1875 -1.390625 C 0.609375 -2.0625 0.40625 -2.609375 0.421875 -3.046875 Z M 0.453125 -4.28125 "/>
 </symbol>
 <symbol overflow="visible" id="glyph1-2">
 <path style="stroke:none;" d="M 0.1875 -2.3125 C 0.546875 -2.625 0.96875 -2.59375 1.28125 -2.21875 C 1.609375 -1.828125 1.546875 -1.421875 1.203125 -1.140625 C 1.15625 -1.109375 0.703125 -0.71875 0.34375 -0.765625 C 0.484375 -0.90625 0.4375 -1.0625 0.359375 -1.15625 C 0.234375 -1.296875 0.046875 -1.296875 -0.09375 -1.1875 C -0.203125 -1.078125 -0.234375 -0.921875 -0.09375 -0.75 C 0.234375 -0.359375 0.8125 -0.546875 1.34375 -1 C 1.96875 -1.515625 2.03125 -2.21875 1.71875 -2.59375 C 1.453125 -2.90625 0.96875 -2.9375 0.453125 -2.671875 C 0.78125 -3.203125 0.65625 -3.59375 0.484375 -3.8125 C 0.21875 -4.125 -0.359375 -4.015625 -0.828125 -3.609375 C -1.3125 -3.21875 -1.53125 -2.71875 -1.234375 -2.375 C -1.09375 -2.1875 -0.890625 -2.25 -0.828125 -2.3125 C -0.71875 -2.40625 -0.6875 -2.59375 -0.796875 -2.734375 C -0.875 -2.8125 -1.015625 -2.875 -1.171875 -2.796875 C -1.15625 -3.09375 -0.8125 -3.390625 -0.734375 -3.46875 C -0.453125 -3.703125 -0.140625 -3.734375 0.078125 -3.46875 C 0.265625 -3.234375 0.421875 -2.765625 0.03125 -2.421875 C -0.078125 -2.328125 -0.078125 -2.3125 -0.1875 -2.21875 C -0.21875 -2.1875 -0.28125 -2.140625 -0.21875 -2.078125 C -0.171875 -2.015625 -0.125 -2.046875 -0.0625 -2.109375 Z M 0.1875 -2.3125 "/>
 </symbol>
 <symbol overflow="visible" id="glyph2-0">
 <path style="stroke:none;" d=""/>
 </symbol>
 <symbol overflow="visible" id="glyph2-1">
 <path style="stroke:none;" d="M 0.953125 -6.9375 L -2.640625 -3.921875 L -1.75 -2.578125 L -1.5625 -2.75 C -2.25 -3.734375 -2.1875 -3.96875 -1.375 -4.65625 C -1.265625 -4.734375 -1.109375 -4.875 -1.046875 -4.921875 C -0.921875 -5 -0.875 -4.9375 -0.765625 -4.8125 L 1.546875 -2.046875 C 1.703125 -1.875 1.765625 -1.796875 1.203125 -1.328125 L 0.984375 -1.140625 L 1.140625 -0.953125 C 1.390625 -1.203125 1.890625 -1.625 2.15625 -1.859375 C 2.4375 -2.09375 2.921875 -2.5 3.203125 -2.6875 L 3.046875 -2.875 L 2.828125 -2.6875 C 2.28125 -2.21875 2.21875 -2.296875 2.0625 -2.46875 L -0.25 -5.234375 C -0.359375 -5.359375 -0.40625 -5.421875 -0.3125 -5.546875 C -0.265625 -5.578125 -0.109375 -5.71875 0 -5.8125 C 0.296875 -6.0625 0.53125 -6.25 0.796875 -6.3125 C 1.15625 -6.359375 1.484375 -6.0625 1.515625 -6.03125 L 1.90625 -5.65625 L 2.09375 -5.8125 Z M 0.953125 -6.9375 "/>
 </symbol>
 <symbol overflow="visible" id="glyph2-2">
 <path style="stroke:none;" d="M -0.703125 -5.703125 C -0.78125 -5.75 -0.84375 -5.78125 -0.9375 -5.703125 C -1.046875 -5.609375 -1.03125 -5.546875 -1 -5.4375 L 0.390625 -1.28125 C 0.453125 -1.125 0.5625 -0.796875 0.078125 -0.390625 L 0.234375 -0.203125 C 0.40625 -0.359375 0.59375 -0.546875 0.78125 -0.6875 C 0.984375 -0.875 1.40625 -1.171875 1.4375 -1.203125 L 1.28125 -1.390625 C 1.046875 -1.1875 0.796875 -1.171875 0.671875 -1.3125 C 0.640625 -1.359375 0.625 -1.390625 0.609375 -1.453125 L 0.3125 -2.3125 L 1.828125 -3.59375 L 2.796875 -3.09375 C 2.8125 -3.0625 2.875 -3.046875 2.890625 -3.015625 C 3.03125 -2.859375 2.734375 -2.625 2.59375 -2.5 L 2.75 -2.3125 C 3 -2.546875 3.34375 -2.859375 3.53125 -3 C 3.78125 -3.21875 4.203125 -3.53125 4.25 -3.5625 L 4.09375 -3.75 L 4 -3.671875 C 3.640625 -3.375 3.5625 -3.40625 3.375 -3.5 Z M -0.546875 -4.890625 L 1.59375 -3.71875 L 0.234375 -2.578125 Z M -0.546875 -4.890625 "/>
 </symbol>
 <symbol overflow="visible" id="glyph2-3">
 <path style="stroke:none;" d="M 3.09375 -4.375 C 2.890625 -4.609375 2.953125 -4.671875 3.3125 -4.96875 L 3.15625 -5.15625 C 3.109375 -5.109375 2.75 -4.78125 2.484375 -4.5625 C 2.1875 -4.3125 1.796875 -3.984375 1.5 -3.765625 L 1.65625 -3.578125 L 1.859375 -3.734375 C 2.390625 -4.1875 2.453125 -4.109375 2.59375 -3.953125 L 2.890625 -3.59375 C 2.984375 -3.484375 3.15625 -3.28125 2.96875 -2.875 C 2.84375 -2.515625 2.546875 -2.265625 2.46875 -2.203125 C 1.859375 -1.6875 0.59375 -1.234375 -0.578125 -2.625 C -1.765625 -4.046875 -1.015625 -5.203125 -0.5 -5.65625 C 0.015625 -6.078125 0.96875 -6.265625 1.859375 -5.453125 C 1.921875 -5.40625 1.96875 -5.4375 2 -5.46875 C 2.09375 -5.546875 2.078125 -5.59375 1.984375 -5.703125 L 0.9375 -6.9375 C 0.859375 -7.03125 0.8125 -7.09375 0.75 -7.046875 C 0.71875 -7.015625 0.703125 -7 0.71875 -6.875 L 0.78125 -6.25 C 0.28125 -6.296875 -0.265625 -6.1875 -0.734375 -5.796875 C -1.859375 -4.859375 -2.0625 -3.203125 -1.15625 -2.140625 C -0.265625 -1.078125 1.375 -0.96875 2.53125 -1.921875 C 2.828125 -2.1875 3.359375 -2.734375 3.296875 -3.328125 C 3.5 -3.25 3.859375 -3.234375 3.90625 -3.28125 C 3.96875 -3.328125 3.921875 -3.390625 3.84375 -3.46875 Z M 3.09375 -4.375 "/>
 </symbol>
 <symbol overflow="visible" id="glyph3-0">
 <path style="stroke:none;" d=""/>
 </symbol>
 <symbol overflow="visible" id="glyph3-1">
 <path style="stroke:none;" d="M 1.265625 -3.140625 C 1.046875 -3.421875 0.171875 -4.453125 -0.828125 -3.609375 C -1.828125 -2.78125 -0.953125 -1.75 -0.734375 -1.46875 C -0.5 -1.203125 0.359375 -0.171875 1.359375 -1 C 2.359375 -1.84375 1.5 -2.875 1.265625 -3.140625 Z M 1.25 -1.125 C 1.109375 -1.015625 0.765625 -0.796875 0.328125 -1.109375 C 0.109375 -1.265625 -0.109375 -1.53125 -0.375 -1.84375 C -0.625 -2.140625 -0.84375 -2.390625 -0.96875 -2.65625 C -1.140625 -3.0625 -0.890625 -3.375 -0.734375 -3.5 C -0.453125 -3.75 -0.125 -3.78125 0.1875 -3.5625 C 0.40625 -3.421875 0.671875 -3.109375 0.859375 -2.875 C 1.078125 -2.609375 1.34375 -2.296875 1.46875 -2.03125 C 1.6875 -1.5625 1.40625 -1.265625 1.25 -1.125 Z M 1.25 -1.125 "/>
 </symbol>
 </g>
 <clipPath id="clip1">
  <path d="M 0 74 L 341.351562 74 L 341.351562 76 L 0 76 Z M 0 74 "/>
 </clipPath>
 <clipPath id="clip2">
  <path d="M 67 76 L 206 76 L 206 96.089844 L 67 96.089844 Z M 67 76 "/>
 </clipPath>
 <clipPath id="clip3">
  <path d="M 67 76 L 206 76 L 206 96.089844 L 67 96.089844 Z M 67 76 "/>
 </clipPath>
 </defs>
 <g id="surface1">
 <g clip-path="url(#clip1)" clip-rule="nonzero">
 <path style="fill:none;stroke-width:1.19553;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M -170.08225 -0.0001875 L 170.081813 -0.0001875 " transform="matrix(1,0,0,-1,170.676,74.992)"/>
 </g>
 <path style=" stroke:none;fill-rule:nonzero;fill:rgb(100%,50%,0%);fill-opacity:1;" d="M 14.769531 82.078125 L 43.117188 82.078125 L 43.117188 67.90625 L 14.769531 67.90625 Z M 14.769531 82.078125 "/>
 <path style=" stroke:none;fill-rule:nonzero;fill:rgb(0.390625%,49.01886%,47.451782%);fill-opacity:1;" d="M 50.601562 82.078125 L 78.949219 82.078125 L 78.949219 67.90625 L 50.601562 67.90625 Z M 50.601562 82.078125 "/>
 <path style=" stroke:none;fill-rule:nonzero;fill:rgb(100%,0%,0%);fill-opacity:1;" d="M 86.433594 82.078125 L 114.78125 82.078125 L 114.78125 67.90625 L 86.433594 67.90625 Z M 86.433594 82.078125 "/>
 <path style=" stroke:none;fill-rule:nonzero;fill:rgb(0.390625%,49.01886%,47.451782%);fill-opacity:1;" d="M 122.265625 82.078125 L 150.609375 82.078125 L 150.609375 67.90625 L 122.265625 67.90625 Z M 122.265625 82.078125 "/>
 <path style=" stroke:none;fill-rule:nonzero;fill:rgb(0.390625%,49.01886%,47.451782%);fill-opacity:1;" d="M 158.097656 82.078125 L 186.441406 82.078125 L 186.441406 67.90625 L 158.097656 67.90625 Z M 158.097656 82.078125 "/>
 <path style=" stroke:none;fill-rule:nonzero;fill:rgb(0.390625%,49.01886%,47.451782%);fill-opacity:1;" d="M 193.929688 82.078125 L 222.273438 82.078125 L 222.273438 67.90625 L 193.929688 67.90625 Z M 193.929688 82.078125 "/>
 <path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,31.373596%,70.588684%);fill-opacity:1;" d="M 229.761719 82.078125 L 258.105469 82.078125 L 258.105469 67.90625 L 229.761719 67.90625 Z M 229.761719 82.078125 "/>
 <path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,31.373596%,70.588684%);fill-opacity:1;" d="M 265.59375 82.078125 L 293.9375 82.078125 L 293.9375 67.90625 L 265.59375 67.90625 Z M 265.59375 82.078125 "/>
 <path style=" stroke:none;fill-rule:nonzero;fill:rgb(100%,50%,0%);fill-opacity:1;" d="M 301.425781 82.078125 L 329.769531 82.078125 L 329.769531 67.90625 L 301.425781 67.90625 Z M 301.425781 82.078125 "/>
 <path style="fill:none;stroke-width:0.3985;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(100%,50%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M -133.058812 7.284969 C -52.609594 74.792781 55.796656 74.792781 136.245875 7.284969 " transform="matrix(1,0,0,-1,170.676,74.992)"/>
 <path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 174.382812 17.074219 C 174.382812 15.910156 173.4375 14.960938 172.269531 14.960938 C 171.101562 14.960938 170.15625 15.910156 170.15625 17.074219 C 170.15625 18.242188 171.101562 19.1875 172.269531 19.1875 C 173.4375 19.1875 174.382812 18.242188 174.382812 17.074219 Z M 174.382812 17.074219 "/>
 <g style="fill:rgb(100%,50%,0%);fill-opacity:1;">
  <use xlink:href="#glyph0-1" x="33.278" y="65.453"/>
 </g>
 <g style="fill:rgb(100%,50%,0%);fill-opacity:1;">
  <use xlink:href="#glyph0-2" x="37.143016" y="62.209802"/>
 </g>
 <g style="fill:rgb(100%,50%,0%);fill-opacity:1;">
  <use xlink:href="#glyph0-3" x="41.48241" y="58.568544"/>
 </g>
 <g style="fill:rgb(100%,50%,0%);fill-opacity:1;">
  <use xlink:href="#glyph1-1" x="46.844673" y="55.370452"/>
 </g>
 <g clip-path="url(#clip2)" clip-rule="nonzero">
 <path style="fill:none;stroke-width:0.3985;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0.390625%,49.01886%,47.451782%);stroke-opacity:1;stroke-miterlimit:10;" d="M -97.222875 -7.285344 C -81.000219 -20.898625 -59.140844 -20.898625 -42.918187 -7.285344 M -25.558812 -7.285344 C -20.039281 -11.91425 -12.605687 -11.91425 -7.086156 -7.285344 M 10.273219 -7.285344 C 15.79275 -11.91425 23.226344 -11.91425 28.745875 -7.285344 " transform="matrix(1,0,0,-1,170.676,74.992)"/>
 </g>
 <path style="fill:none;stroke-width:0.3985;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0.390625%,49.01886%,47.451782%);stroke-opacity:1;stroke-miterlimit:10;" d="M -97.226781 7.284969 C -70.297094 29.882625 -34.011937 29.882625 -7.08225 7.284969 M -97.226781 7.284969 C -59.593969 38.867 -8.883031 38.867 28.749781 7.284969 " transform="matrix(1,0,0,-1,170.676,74.992)"/>
 <path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 138.550781 44.023438 C 138.550781 42.855469 137.605469 41.910156 136.4375 41.910156 C 135.269531 41.910156 134.324219 42.855469 134.324219 44.023438 C 134.324219 45.1875 135.269531 46.136719 136.4375 46.136719 C 137.605469 46.136719 138.550781 45.1875 138.550781 44.023438 Z M 138.550781 44.023438 "/>
 <g style="fill:rgb(0.390625%,49.01886%,47.451782%);fill-opacity:1;">
  <use xlink:href="#glyph0-1" x="69.109" y="65.453"/>
 </g>
 <g style="fill:rgb(0.390625%,49.01886%,47.451782%);fill-opacity:1;">
  <use xlink:href="#glyph0-2" x="72.974016" y="62.209802"/>
 </g>
 <g style="fill:rgb(0.390625%,49.01886%,47.451782%);fill-opacity:1;">
  <use xlink:href="#glyph0-3" x="77.31341" y="58.568544"/>
 </g>
 <g style="fill:rgb(0.390625%,49.01886%,47.451782%);fill-opacity:1;">
  <use xlink:href="#glyph1-2" x="82.675673" y="55.370452"/>
 </g>
 <g clip-path="url(#clip3)" clip-rule="nonzero">
 <path style="fill:none;stroke-width:0.3985;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0.390625%,49.01886%,47.451782%);stroke-opacity:1;stroke-miterlimit:10;" d="M -97.222875 -7.285344 C -81.000219 -20.898625 -59.140844 -20.898625 -42.918187 -7.285344 M -25.558812 -7.285344 C -9.336156 -20.898625 12.523219 -20.898625 28.745875 -7.285344 " transform="matrix(1,0,0,-1,170.676,74.992)"/>
 </g>
 <path style="fill:none;stroke-width:0.3985;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,31.373596%,70.588684%);stroke-opacity:1;stroke-miterlimit:10;" d="M 81.937281 7.284969 C 87.456813 11.917781 94.890406 11.917781 100.409938 7.284969 " transform="matrix(1,0,0,-1,170.676,74.992)"/>
 <path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 263.964844 64.234375 C 263.964844 63.066406 263.015625 62.121094 261.851562 62.121094 C 260.683594 62.121094 259.738281 63.066406 259.738281 64.234375 C 259.738281 65.402344 260.683594 66.347656 261.851562 66.347656 C 263.015625 66.347656 263.964844 65.402344 263.964844 64.234375 Z M 263.964844 64.234375 "/>
 <g style="fill:rgb(0%,31.373596%,70.588684%);fill-opacity:1;">
  <use xlink:href="#glyph2-1" x="248.273" y="65.453"/>
 </g>
 <g style="fill:rgb(0%,31.373596%,70.588684%);fill-opacity:1;">
  <use xlink:href="#glyph2-2" x="252.138116" y="62.209751"/>
 </g>
 <g style="fill:rgb(0%,31.373596%,70.588684%);fill-opacity:1;">
  <use xlink:href="#glyph2-3" x="256.477625" y="58.568437"/>
 </g>
 <g style="fill:rgb(0%,31.373596%,70.588684%);fill-opacity:1;">
  <use xlink:href="#glyph3-1" x="261.840021" y="55.370303"/>
 </g>
 </g>
 </svg>
--- a/figures/tag-definition.tex
+++ b/figures/tag-definition.tex
@ -0,0 +1,49 @@
 % TAG definitions
 % Figure inspired from fig. 1 Charles 2023 internship report
 \documentclass[tikz]{standalone}
 \usepackage{tikz}
 \begin{document}
 \definecolor{lammegreen}{HTML}{017d79}
 \definecolor{lammeblue}{HTML}{0050b4}
 \usetikzlibrary{positioning}
 \usetikzlibrary{decorations.pathreplacing}
  \tikzset{
    dot/.style={fill=black, circle, inner sep=1.5pt},
    nod/.style={sloped, at start, xshift=3mm, font=\scriptsize, above},
 }
 \begin{tikzpicture}[node distance=0.25]
  \newcommand{\geneLength}{1}
  \newcommand{\geneHalfHeight}{0.25}
  % DNA line
  \draw[-,very thick] (-6,0) to (6,0);
  % Gene blocks
  \node[rectangle,fill=orange,minimum height=0.5cm, minimum width=1cm] (orange1) at (-5,0) {};
  \node[rectangle,fill=lammegreen,minimum height=0.5cm, minimum width=1cm] (green1) [right=of orange1] {};
  \node[rectangle,fill=red,minimum height=0.5cm, minimum width=1cm] (red1) [right=of green1] {};
  \node[rectangle,fill=lammegreen,minimum height=0.5cm, minimum width=1cm] (green2) [right=of red1] {};
  \node[rectangle,fill=lammegreen,minimum height=0.5cm, minimum width=1cm] (green3) [right=of green2] {};
  \node[rectangle,fill=lammegreen,minimum height=0.5cm, minimum width=1cm] (green4) [right=of green3] {};
  \node[rectangle,fill=lammeblue,minimum height=0.5cm, minimum width=1cm] (blue1) [right=of green4] {};
  \node[rectangle,fill=lammeblue,minimum height=0.5cm, minimum width=1cm] (blue2) [right=of blue1] {};
  \node[rectangle,fill=orange,minimum height=0.5cm, minimum width=1cm] (orange2) [right=of blue2] {};
  % Homology links
  \draw[-,orange, bend left=40] (orange1) to coordinate[dot] node[nod] {$\mathrm{TAG}_7$} (orange2);
  \draw[-,lammegreen,bend right=40] (green1) to (green2)
  (green2) to (green3)
  (green3) to (green4);
  \draw[-,lammegreen,bend left=40] (green1) to (green3)
  (green1) to coordinate[dot] node[nod] {$\mathrm{TAG}_3$}(green4);
  \draw[-,lammegreen,bend right=40] (green1) to (green2)
  (green2) to (green4);
  \draw[-,lammeblue, bend left=40] (blue1) to  coordinate[dot] node[nod] {$\mathrm{TAG}_0$} (blue2);
 \end{tikzpicture}
 \end{document}
--- a/folder-structure.sh
+++ b/folder-structure.sh
@ -1,8 +0,0 @@
 #!/bin/sh
 find ./content -type d > folder_list.txt
 mkdir -p build
 cd build
 cat ../folder_list.txt | xargs mkdir -p
 rm ../folder_list.txt
--- a/media/dummy.png
+++ b/media/dummy.png
--- a/references.bib
+++ b/references.bib
@ -33,6 +33,19 @@
  langid = {english}
 }
@article{assisModelsRetentionDuplicate2024,
  title = {Models for the Retention of Duplicate Genes and Their Biological Underpinnings},
  author = {Assis, Raquel and Conant, Gavin and Holland, Barbara and Liberles, David and O'Reilly, Małgorzata and Wilson, Amanda},
  date = {2024-02-12},
  journaltitle = {F1000Research},
  shortjournal = {F1000Research},
  volume = {12},
  pages = {1400},
  doi = {10.12688/f1000research.141786.2},
  abstract = {Gene content in genomes changes through several different processes, with gene duplication being an important contributor to such changes. Gene duplication occurs over a range of scales from individual genes to whole genomes, and the dynamics of this process can be context dependent. Still, there are rules by which genes are retained or lost from genomes after duplication, and probabilistic modeling has enabled characterization of these rules, including their context-dependence. Here, we describe the biology and corresponding mathematical models that are used to understand duplicate gene retention and its contribution to the set of biochemical functions encoded in a genome.},
  keywords = {2read}
 }
@article{beallIdentificationAnalysisHyperactive2002,
  title = {Identification and {{Analysis}} of a {{Hyperactive Mutant Form}} of {{Drosophila P-Element Transposase}}},
  author = {Beall, Eileen L and Mahoney, Matthew B and Rio, Donald C},
@ -49,6 +62,24 @@
  abstract = {Transposition in many organisms is regulated to control the frequency of DNA damage caused by the DNA breakage and joining reactions. However, genetic studies in prokaryotic systems have led to the isolation of mutant transposase proteins with higher or novel activities compared to those of the wild-type protein. In the course of our study of the effects of mutating potential ATM-family DNA damage checkpoint protein kinase sites in the Drosophila P-element transposase protein, we found one mutation, S129A, that resulted in an elevated level of transposase activity using in vivo recombination assays, including P-element-mediated germline transformation. In vitro assays for P-element transposase activity indicate that the S129A mutant exhibits elevated donor DNA cleavage activity when compared to the wild-type protein, whereas the strand-transfer activity is similar to that of wild type. This difference may reflect the nature of the in vitro assays and that normally in vivo the two reactions may proceed in concert. The P-element transposase protein contains 10 potential consensus phosphorylation sites for the ATM family of PI3-related protein kinases. Of these 10 sites, 8 affect transposase activity either positively or negatively when substituted individually with alanine and tested in vivo. A mutant transposase protein that contains all eight N-terminal serine and threonine residues substituted with alanine is inactive and can be restored to full activity by substitution of wild-type amino acids back at only 3 of the 8 positions. These data suggest that the activity of P-element transposase may be regulated by phosphorylation and demonstrate that one mutation, S129A, results in hyperactive transposition.}
 }
@article{berthelotRainbowTroutGenome2014,
  title = {The Rainbow Trout Genome Provides Novel Insights into Evolution after Whole-Genome Duplication in Vertebrates},
  author = {Berthelot, Camille and Brunet, Frédéric and Chalopin, Domitille and Juanchich, Amélie and Bernard, Maria and Noël, Benjamin and Bento, Pascal and Da Silva, Corinne and Labadie, Karine and Alberti, Adriana and Aury, Jean-Marc and Louis, Alexandra and Dehais, Patrice and Bardou, Philippe and Montfort, Jérôme and Klopp, Christophe and Cabau, Cédric and Gaspin, Christine and Thorgaard, Gary H. and Boussaha, Mekki and Quillet, Edwige and Guyomard, René and Galiana, Delphine and Bobe, Julien and Volff, Jean-Nicolas and Genêt, Carine and Wincker, Patrick and Jaillon, Olivier and Roest Crollius, Hugues and Guiguen, Yann},
  date = {2014-04-22},
  journaltitle = {Nature Communications},
  shortjournal = {Nat Commun},
  volume = {5},
  eprint = {24755649},
  eprinttype = {pmid},
  pages = {3657},
  issn = {2041-1723},
  doi = {10.1038/ncomms4657},
  abstract = {Vertebrate evolution has been shaped by several rounds of whole-genome duplications (WGDs) that are often suggested to be associated with adaptive radiations and evolutionary innovations. Due to an additional round of WGD, the rainbow trout genome offers a unique opportunity to investigate the early evolutionary fate of a duplicated vertebrate genome. Here we show that after 100 million years of evolution the two ancestral subgenomes have remained extremely collinear, despite the loss of half of the duplicated protein-coding genes, mostly through pseudogenization. In striking contrast is the fate of miRNA genes that have almost all been retained as duplicated copies. The slow and stepwise rediploidization process characterized here challenges the current hypothesis that WGD is followed by massive and rapid genomic reorganizations and gene deletions.},
  langid = {english},
  pmcid = {PMC4071752},
  keywords = {2read,Animals,Evolution Molecular,Gene Duplication,Oncorhynchus mykiss,Vertebrates}
 }
@article{blankenbergGalaxyWebbasedGenome2010,
  title = {Galaxy: A Web-Based Genome Analysis Tool for Experimentalists},
  shorttitle = {Galaxy},
@ -93,6 +124,49 @@
  keywords = {Computational biology and bioinformatics,Genome informatics,Genomic analysis,Sequencing,Software}
 }
@unpublished{caronCyberGalaxy2013,
  title = {Towards a Cyber {{Galaxy}} ?},
  author = {Caron, Christophe C. and Carre, Wilfried and Cormier, Alexandre and Derozier, Sandra S. and Giacomoni, Franck and Inizan, Olivier and Le Corguillé, Gildas and Lermine, Alban and Maman Haddad, Sarah and Pericard, Pierre and Samson, Franck F.},
  date = {2013-07},
  series = {{{JOBIM TOULOUSE}} 2013 - {{RÉSUMÉS COURTS}} (Affiches)},
  pages = {246},
  url = {https://hal.inrae.fr/hal-02748994},
  urldate = {2024-04-09},
  abstract = {The success of the open web based platform “Galaxy” is growing among diverse scientific communities. The French Institute of Bioinformatics - IFB wish to initiate a collaborative work dedicated to scientific workflows and especially to the platform Galaxy. We report here the main items on which future collaborations could be build: (i) software and hardware architecture, (ii) tools integration and (iii) training.},
  keywords = {formation,galaxy,intégration d'outils,NGS,partage de données,workflow},
  annotation = {Published: JOBIM 2013}
 }
@unpublished{caronFrenchCyberGalaxy2013,
  title = {Toward a {{French}} Cyber {{Galaxy}} ?},
  author = {Caron, Christophe C. and Carré, Wilfrid and Cormier, Alexandre and Derozier, Sandra S. and Giacomoni, Franck and Inizan, Olivier and Le Corguillé, Gildas and Lermine, Alban and Maman Haddad, Sarah and Pericard, Pierre and Samson, Franck F.},
  date = {2013-06},
  series = {Galaxy {{Community Conference}} 2013 : {{Posters}} / {{Abstracts}}},
  pages = {online},
  url = {https://hal.inrae.fr/hal-02748274},
  urldate = {2024-04-09},
  abstract = {The success of the open web based platform “Galaxy” is growing among scientific communities. The French Institute of Bioinformatics (IFB) wishes to initiate a collaborative work dedicated to scientific workflows and especially to the Galaxy platform. We report here the main items on which future collaborations could be build: (i) software and hardware architecture, (ii) tools integration and (iii) training. High throughput technologies advent significantly alters analysis behaviour and strategy with mobilization of new infrastructure, new tools and new skills. IFB decided to conduct a cross action on "workflows" data analysis solutions, and especially on the Galaxy platform. The first item called "software and hardware architecture" addresses the operational issues in production environments, the potential for automating deployment tasks and the monitoring solutions for Galaxy servers. With the second one, "Tools integration" we aim to provide processes facilitating tool interfacing in a Galaxy instance. Priority will be the development of a good practice guide, as well as a technology watch around the methods proposed by the international community. We also want to promote the sharing of training activities at national level (such as the Aviesan Bioinformatics school, January 2013 - http://galaxy-ecole.sb-roscoff.fr/) and ensure a smooth transition to new uses, such as E-learning. A first working group is already effective. Previous items will be improved in the coming months thanks to a specific dedicated wiki and the first French Galaxy Workshop this autumn.},
  keywords = {data sharing,Galaxy,NGS,tools integration,training,workflow},
  annotation = {Published: Galaxy Community Conference}
 }
@article{casneufNonrandomDivergenceGene2006,
  title = {Nonrandom Divergence of Gene Expression Following Gene and Genome Duplications in the Flowering Plant {{Arabidopsis}} Thaliana},
  author = {Casneuf, Tineke and De Bodt, Stefanie and Raes, Jeroen and Maere, Steven and Van de Peer, Yves},
  date = {2006-02-20},
  journaltitle = {Genome Biology},
  shortjournal = {Genome Biology},
  volume = {7},
  number = {2},
  pages = {R13},
  issn = {1474-760X},
  doi = {10.1186/gb-2006-7-2-r13},
  url = {https://doi.org/10.1186/gb-2006-7-2-r13},
  urldate = {2024-04-13},
  abstract = {Genome analyses have revealed that gene duplication in plants is rampant. Furthermore, many of the duplicated genes seem to have been created through ancient genome-wide duplication events. Recently, we have shown that gene loss is strikingly different for large- and small-scale duplication events and highly biased towards the functional class to which a gene belongs. Here, we study the expression divergence of genes that were created during large- and small-scale gene duplication events by means of microarray data and investigate both the influence of the origin (mode of duplication) and the function of the duplicated genes on expression divergence.},
  keywords = {2read,Additional Data File,Anchor Point,Duplicate Gene,Duplication Event,Expression Divergence}
 }
@report{charlesFinalisationPipelineFTAG2023,
  type = {Internship Report},
  title = {Finalisation du pipeline FTAG (Families and TAG) Finder, un outil de détection des gènes dupliqués sous Galaxy},
@ -153,6 +227,30 @@
  langid = {english}
 }
@article{denoeudAnalyseGenomesRecherche,
  title = {Analyse des génomes à la recherche de répétitions en tandem polymorphes: outils d?épidémiologie bactérienne et locus hypermutables humains},
  author = {Denoeud, France},
  langid = {french}
 }
@article{desponsTandemGeneArrays2011,
  title = {Tandem Gene Arrays, Plastic Chromosomal Organizations},
  author = {Despons, Laurence and Uzunov, Zlatyo and Louis, Véronique Leh},
  date = {2011-08-01},
  journaltitle = {Comptes Rendus Biologies},
  shortjournal = {Comptes Rendus Biologies},
  series = {Ten Years of Genomic Exploration in Eukaryotes : Strategy and Progress of {{Genolevures}}},
  volume = {334},
  number = {8},
  pages = {639--646},
  issn = {1631-0691},
  doi = {10.1016/j.crvi.2011.05.012},
  url = {https://www.sciencedirect.com/science/article/pii/S1631069111001454},
  urldate = {2024-04-09},
  abstract = {This short article presents an overview of tandem gene arrays (TGAs) in hemiascomycete yeasts. In silico and in vivo analyses are combined to address structural, functional and evolutionary aspects of these particular chromosomal structures. Genomic instability of TGAs is discussed. We conclude that TGAs are generally dynamic regions of the genome in that they are the seats of chromosomal rearrangement events. In addition, they are often breeding grounds of new genes for a rapid adaptation of cells to demands of the environment. Résumé Ce court article présente une vue d’ensemble des tandems de gènes chez les levures hémiascomycètes. Des analyses in silico et in vivo ont été combinées pour aborder les aspects structuraux, fonctionnels et évolutifs de ces structures chromosomiques particulières. L’instabilité génomique des tandems de gènes est discutée. Nous concluons que les tandems de gènes sont généralement des régions dynamiques du génome car ils sont le siège d’événements de réarrangements chromosomiques. De surcroît, ils sont souvent des zones de reproduction de nouveaux gènes pour une adaptation rapide des cellules aux demandes de l’environnement.},
  keywords = {Chromosomal rearrangements,Duplication de gènes en tandem,Evolution,Évolution,Levure,Réarrangements chromosomiques,Tandem gene duplication,Yeast}
 }
@article{ditommasoNextflowEnablesReproducible2017,
  title = {Nextflow Enables Reproducible Computational Workflows},
  author = {Di Tommaso, Paolo and Chatzou, Maria and Floden, Evan W and Barja, Pablo Prieto and Palumbo, Emilio and Notredame, Cedric},
@ -204,14 +302,20 @@
  langid = {english}
 }
-@online{DupliquerPourAdapter2020,
+@article{duarteExpressionPatternShifts2006,
-  title = {Dupliquer pour s’adapter ou comment accélérer l’évolution des plantes ? | CNRS Biologie},
+  title = {Expression {{Pattern Shifts Following Duplication Indicative}} of {{Subfunctionalization}} and {{Neofunctionalization}} in {{Regulatory Genes}} of {{Arabidopsis}}},
-  shorttitle = {Dupliquer pour s’adapter ou comment accélérer l’évolution des plantes ?},
+  author = {Duarte, Jill M. and Cui, Liying and Wall, P. Kerr and Zhang, Qing and Zhang, Xiaohong and Leebens-Mack, Jim and Ma, Hong and Altman, Naomi and {dePamphilis}, Claude W.},
-  date = {2020-10-14},
+  date = {2006-02-01},
-  url = {https://www.insb.cnrs.fr/fr/cnrsinfo/dupliquer-pour-sadapter-ou-comment-accelerer-levolution-des-plantes},
+  journaltitle = {Molecular Biology and Evolution},
-  urldate = {2024-03-25},
+  shortjournal = {Molecular Biology and Evolution},
-  abstract = {Les duplications de portions de chromosomes permettant aux organismes de dupliquer des gènes existants et d’en créer de nouveaux sont bien},
+  volume = {23},
-  langid = {french}
+  number = {2},
  pages = {469--478},
  issn = {0737-4038},
  doi = {10.1093/molbev/msj051},
  url = {https://doi.org/10.1093/molbev/msj051},
  urldate = {2024-04-14},
  abstract = {Gene duplication plays an important role in the evolution of diversity and novel function and is especially prevalent in the nuclear genomes of flowering plants. Duplicate genes may be maintained through subfunctionalization and neofunctionalization at the level of expression or coding sequence. In order to test the hypothesis that duplicated regulatory genes will be differentially expressed in a specific manner indicative of regulatory subfunctionalization and/or neofunctionalization, we examined expression pattern shifts in duplicated regulatory genes in Arabidopsis. A two-way analysis of variance was performed on expression data for 280 phylogenetically identified paralogous pairs. Expression data were extracted from global expression profiles for wild-type root, stem, leaf, developing inflorescence, nearly mature flower buds, and seedpod. Gene, organ, and gene by organ interaction (G × O) effects were examined. Results indicate that 85\% of the paralogous pairs exhibited a significant G × O effect indicative of regulatory subfunctionalization and/or neofunctionalization. A significant G × O effect was associated with complementary expression patterns in 45\% of pairwise comparisons. No association was detected between a G × O effect and a relaxed evolutionary constraint as detected by the ratio of nonsynonymous to synonymous substitutions. Ancestral gene expression patterns inferred across a Type II MADS-box gene phylogeny suggest several cases of regulatory neofunctionalization and organ-specific nonfunctionalization. Complete linkage clustering of gene expression levels across organs suggests that regulatory modules for each organ are independent or ancestral genes had limited expression. We propose a new classification, regulatory hypofunctionalization, for an overall decrease in expression level in one member of a paralogous pair while still having a significant G × O effect. We conclude that expression divergence specifically indicative of subfunctionalization and/or neofunctionalization contributes to the maintenance of most if not all duplicated regulatory genes in Arabidopsis and hypothesize that this results in increasing expression diversity or specificity of regulatory genes after each round of duplication.}
 }
@article{emmsOrthoFinderPhylogeneticOrthology2019,
@ -250,6 +354,17 @@
  keywords = {Blast Score,Gene Length,Phylogenetic Distance,Sequence Similarity Score,Transcription Factor Gene Family}
 }
@video{evry-senartsciencesetinnovationCareneRizzonUEVE2014,
  entrysubtype = {video},
  title = {Carène {{Rizzon}} ({{UEVE}}) - {{Etude}} de l’évolution Des Gènes Dupliqués},
  editor = {{Evry-Sénart Sciences et Innovation}},
  editortype = {director},
  date = {2014},
  url = {https://www.youtube.com/watch?v=ubiOE7w3374},
  urldate = {2024-04-10},
  abstract = {Colloque ESI 2014 "Evry Bio \& Evry STIC" organisé par Evry Sciences et Innovation le 30 avril 2014 à Evry.  Intervention de Carène Rizzon de l'Université d’Évry: "Étude de l’évolution des gènes dupliqués chez Arabidopsis thaliana via les réseaux biologiques.}
 }
@article{gautRecombinationUnderappreciatedFactor2007,
  title = {Recombination: An Underappreciated Factor in the Evolution of Plant Genomes},
  shorttitle = {Recombination},
@ -322,12 +437,63 @@
  keywords = {Aegilops,molecular evolution,plasmon and B genome inheritance,Triticum,wheat}
 }
@article{grahamTandemGenesClustered1995,
  title = {Tandem Genes and Clustered Genes},
  author = {Graham, Geoffrey J.},
  date = {1995-07-07},
  journaltitle = {Journal of Theoretical Biology},
  shortjournal = {Journal of Theoretical Biology},
  volume = {175},
  number = {1},
  pages = {71--87},
  issn = {0022-5193},
  doi = {10.1006/jtbi.1995.0122},
  url = {https://www.sciencedirect.com/science/article/pii/S0022519385701221},
  urldate = {2024-04-09},
  abstract = {Two patterns of gene repetition are described: tandem arraying and clustering. Tandemly arrayed genes reside within segments of DNA that are repeated head-to-tail a number of times. Clustered genes are linked but irregularly spaced, are often mutually inverted in an unpredictable pattern and are connected by non-conserved DNA. Tandem arrays are homogenized by both unequal recombination and gene conversion, are necessary for the maintenance of large gene families, can expand and contract rapidly in response to changing demand, can keep functionally related genes equal in number, and do not engender increased genetic complexity. Gene clusters are homogenized by conversion only, seldom if ever contain more than 50 members, are stable in number, and often engender increased genetic complexity. The interrelationships among these properties are discussed. Tandem gene arrays can evolve into gene clusters. It is suggested that this occurs when some change in the array inhibits unequal recombination but not gene conversion. The most common such change is inversion of part of the tandem array with respect to the rest; however, arrays can evolve into clusters without inversion. Clustered genes are sometimes re-amplified into new tandem arrays. Clustered genes are probably more durable than tandemly arrayed genes during periods of relaxed selection, and in the case of fish antifreeze protein genes, seem to behave as a genetic memory.}
 }
@article{hanadaImportanceLineagespecificExpansion2008,
  title = {Importance of Lineage-Specific Expansion of Plant Tandem Duplicates in the Adaptive Response to Environmental Stimuli},
  author = {Hanada, Kousuke and Zou, Cheng and Lehti-Shiu, Melissa D. and Shinozaki, Kazuo and Shiu, Shin-Han},
  date = {2008-10},
  journaltitle = {Plant Physiology},
  shortjournal = {Plant Physiol},
  volume = {148},
  number = {2},
  eprint = {18715958},
  eprinttype = {pmid},
  pages = {993--1003},
  issn = {0032-0889},
  doi = {10.1104/pp.108.122457},
  abstract = {Plants have substantially higher gene duplication rates compared with most other eukaryotes. These plant gene duplicates are mostly derived from whole genome and/or tandem duplications. Earlier studies have shown that a large number of duplicate genes are retained over a long evolutionary time, and there is a clear functional bias in retention. However, the influence of duplication mechanism, particularly tandem duplication, on duplicate retention has not been thoroughly investigated. We have defined orthologous groups (OGs) between Arabidopsis (Arabidopsis thaliana) and three other land plants to examine the functional bias of retained duplicate genes during vascular plant evolution. Based on analysis of Gene Ontology categories, it is clear that genes in OGs that expanded via tandem duplication tend to be involved in responses to environmental stimuli, while those that expanded via nontandem mechanisms tend to have intracellular regulatory roles. Using Arabidopsis stress expression data, we further demonstrated that tandem duplicates in expanded OGs are significantly enriched in genes that are up-regulated by biotic stress conditions. In addition, tandem duplication of genes in an OG tends to be highly asymmetric. That is, expansion of OGs with tandem genes in one organismal lineage tends to be coupled with losses in the other. This is consistent with the notion that these tandem genes have experienced lineage-specific selection. In contrast, OGs with genes duplicated via nontandem mechanisms tend to experience convergent expansion, in which similar numbers of genes are gained in parallel. Our study demonstrates that the expansion of gene families and the retention of duplicates in plants exhibit substantial functional biases that are strongly influenced by the mechanism of duplication. In particular, genes involved in stress responses have an elevated probability of retention in a single-lineage fashion following tandem duplication, suggesting that these tandem duplicates are likely important for adaptive evolution to rapidly changing environments.},
  langid = {english},
  pmcid = {PMC2556807},
  keywords = {Adaptation Biological,Arabidopsis,Evolution Molecular,Gene Duplication,Genes Duplicate,Genes Plant,Genome Plant,Multigene Family,Oligonucleotide Array Sequence Analysis,Phylogeny}
 }
@online{HomeCromwell,
  title = {Home - {{Cromwell}}},
  url = {https://cromwell.readthedocs.io/en/stable/},
  urldate = {2024-03-27}
 }
@online{HttpsMicansOrg,
  title = {{{https://micans.org/mcl/lit/mimb.pdf}}},
  url = {https://micans.org/mcl/lit/mimb.pdf},
  urldate = {2024-04-11}
 }
@report{jasminStudyTandemlyArrayed2016,
  type = {Internship Report},
  title = {Study of Tandemly Arrayed Genes Expression for {{Arabidopsis}} Thaliana},
  author = {Jasmin, Fabien},
  year = {2016-06-2016},
  institution = {Laboratoire de Mathématiques et Modélisation d'Évry},
  abstract = {Tandemly arrayed genes, also called TAGs, are duplicated genes which come from tandem arrayed duplication. They can be separated or not by few genes called spacers. Although duplicated genes are commonly studied, TAGs features remain little known. In this study, I performed a statistical analysis of Arabidopsis thaliana TAGs using genomic and transcriptomic data of high quality providing from TAIR database and CATdb. After merging the different data and assessing it, I observed the distribution of the different size of TAG and the behaviour of TAGs depending on the number of spacers that I made vary from 0 to 10 in my survey. I equally defined different list of gene pairs to easily compare TAGs to other type of genes. In all 5 lists have been defined during my investigation. The defined lists are random genes pairs list, duplicated genes pairs list, successive genes pairs list, local genes pairs list and TAGs pairs list. After creating all lists previously defined, I made gene pairs lists comparisons between TAGs pair list and the other type of gene pairs list according to different features such as the effect of abiotic or biotic stress conditions, the genes orientation, or the correlation of the expression profiles.},
  langid = {english}
 }
@video{javiernovoDuplicationGenes2015,
  entrysubtype = {video},
  title = {Duplication of Genes},
@ -390,6 +556,22 @@
  pubstate = {preprint}
 }
@inproceedings{lajoieEvolutionTandemlyArrayed2007,
  title = {Evolution of {{Tandemly Arrayed Genes}} in {{Multiple Species}}},
  booktitle = {Comparative {{Genomics}}},
  author = {Lajoie, Mathieu and Bertrand, Denis and El-Mabrouk, Nadia},
  editor = {Tesler, Glenn and Durand, Dannie},
  date = {2007},
  pages = {96--109},
  publisher = {Springer},
  location = {Berlin, Heidelberg},
  doi = {10.1007/978-3-540-74960-8_8},
  abstract = {Tandemly arrayed genes (TAG) constitute a large fraction of most genomes and play important biological roles. They evolve through unequal recombination, which places duplicated genes next to the original ones (tandem duplications). Many algorithms have been proposed to infer a tandem duplication history for a TAG cluster in a single species. However, the presence of different transcriptional orientations in most TAG clusters highlight the fact that processes such as inversions also contribute to their evolution. This makes those algorithms unsuitable in many cases. To circumvent this limitation, we proposed in a previous work an extended evolutionary model which includes inversions and presented a branch-and-bound algorithm allowing to infer a most parsimonious scenario of evolution for a given TAG cluster. Here, we generalize this model to multiple species and present a general framework to infer ancestral gene orders that minimize the number of inversions in the whole evolutionary history. An application on a pair of human-rat TAG clusters is presented.},
  isbn = {978-3-540-74960-8},
  langid = {english},
  keywords = {Ancestral Genome,Gene Order,Gene Tree,Inversion Event,Tandem Duplication}
 }
@thesis{lallemandEvolutionGenesDupliques2022,
  type = {phdthesis},
  title = {Évolution des gènes dupliqués chez le pommier : Identification et caractérisation de la dominance du sous-génome dans le génome de la pomme},
@ -438,6 +620,13 @@
  langid = {english}
 }
@article{landes-devauchelleArtResumerPour,
  title = {De l’art de résumer pour tenter de comprendre en génomique évolutive},
  author = {Landès-Devauchelle, Claudine},
  url = {http://www.math-evry.cnrs.fr/_media/publications/devauchelle_hdr_2011.pdf},
  langid = {french}
 }
@article{lannesDoesPresenceTransposable2019,
  title = {Does the {{Presence}} of {{Transposable Elements Impact}} the {{Epigenetic Environment}} of {{Human Duplicated Genes}}?},
  author = {Lannes, Romain and Rizzon, Carène and Lerat, Emmanuelle},
@ -457,6 +646,16 @@
  pmcid = {PMC6470583}
 }
@report{le-hoangEtudeTranscriptomiqueGenes2017,
  type = {Internship Report},
  title = {Etude transcriptomique des gènes dupliqués en tandem (TAG) chez Arabidopsis thaliana},
  author = {Lê-Hoang, Julie},
  date = {2017},
  pages = {74},
  institution = {Laboratoire de Mathématiques et Modélisation d'Évry},
  langid = {french}
 }
@thesis{leducEtudeEvolutionGenes,
  title = {Étude de l’évolution des gènes dupliqués chez les Rosaceae},
  author = {Leduc, Martin},
@ -529,6 +728,22 @@
  pmcid = {PMC6347962}
 }
@online{moixPhylogeneticPlacementWhole2023,
  title = {Phylogenetic Placement of Whole Genome Duplications in Yeasts through Quantitative Analysis of Hierarchical Orthologous Groups},
  author = {Moix, Samuel and Glover, Natasha and Majidian, Sina},
  date = {2023-04-12},
  number = {12:382},
  eprint = {12:382},
  eprinttype = {F1000Research},
  doi = {10.12688/f1000research.128656.1},
  url = {https://f1000research.com/articles/12-382},
  urldate = {2024-04-17},
  abstract = {Background: Whole genome duplications (WGD) are genomic events leading to formation of polyploid organisms. Resulting duplicated genes play important roles in driving species evolution and diversification. After such events, the initial ploidy is usually restored, complicating their detection across evolution. With the advance of bioinformatics and the rising number of new well-assembled genomes, new detection methods are ongoingly being developed to overcome the weaknesses of different approaches. Results: Here we propose a novel method for detecting WGD in yeast lineages based on the quantitative and comparative analysis of hierarchical orthologous groups (HOGs) of duplicated genes for a given set of organisms. We reconstruct ancestral genomes to obtain evolutionary information for each phylogenetic branch. This reconstruction relies on the inference of HOGs from the selected species’ proteomes. To estimate WGD events, the number of HOGs of duplicated genes across all taxonomic ranges are adjusted according to the molecular clock hypothesis and by the average genome size. Branches with a significant increase in the adjusted number of duplicated gene families are kept as candidates for WGD placement. The developed method was tested on two real datasets and showed promising results in phylogenetic WGD placements on the yeast lineage.},
  langid = {english},
  pubstate = {preprint},
  keywords = {comparative genomics,orthologous groups,whole genome duplications,yeast}
 }
@online{molderSustainableDataAnalysis2021a,
  title = {Sustainable Data Analysis with {{Snakemake}}},
  author = {Mölder, Felix and Jablonski, Kim Philipp and Letcher, Brice and Hall, Michael B. and Tomkins-Tinch, Christopher H. and Sochat, Vanessa and Forster, Jan and Lee, Soohyun and Twardziok, Sven O. and Kanitz, Alexander and Wilm, Andreas and Holtgrewe, Manuel and Rahmann, Sven and Nahnsen, Sven and Köster, Johannes},
@ -573,6 +788,40 @@
  langid = {english}
 }
@article{ottoRecombinationSelectionEvolution2022,
  title = {Recombination, Selection, and the Evolution of Tandem Gene Arrays},
  author = {Otto, Moritz and Zheng, Yichen and Wiehe, Thomas},
  date = {2022-07-01},
  journaltitle = {Genetics},
  shortjournal = {Genetics},
  volume = {221},
  number = {3},
  pages = {iyac052},
  issn = {1943-2631},
  doi = {10.1093/genetics/iyac052},
  url = {https://doi.org/10.1093/genetics/iyac052},
  urldate = {2024-04-09},
  abstract = {Multigene families—immunity genes or sensory receptors, for instance—are often subject to diversifying selection. Allelic diversity may be favored not only through balancing or frequency-dependent selection at individual loci but also by associating different alleles in multicopy gene families. Using a combination of analytical calculations and simulations, we explored a population genetic model of epistatic selection and unequal recombination, where a trade-off exists between the benefit of allelic diversity and the cost of copy abundance. Starting from the neutral case, where we showed that gene copy number is Gamma distributed at equilibrium, we derived also the mean and shape of the limiting distribution under selection. Considering a more general model, which includes variable population size and population substructure, we explored by simulations mean fitness and some summary statistics of the copy number distribution. We determined the relative effects of selection, recombination, and demographic parameters in maintaining allelic diversity and shaping the mean fitness of a population. One way to control the variance of copy number is by lowering the rate of unequal recombination. Indeed, when encoding recombination by a rate modifier locus, we observe exactly this prediction. Finally, we analyzed the empirical copy number distribution of 3 genes in human and estimated recombination and selection parameters of our model.}
 }
@article{panTandemlyArrayedGenes2008,
  title = {Tandemly {{Arrayed Genes}} in {{Vertebrate Genomes}}},
  author = {Pan, Deng and Zhang, Liqing},
  date = {2008},
  journaltitle = {Comparative and Functional Genomics},
  shortjournal = {Comp Funct Genomics},
  volume = {2008},
  eprint = {18815629},
  eprinttype = {pmid},
  pages = {545269},
  issn = {1531-6912},
  doi = {10.1155/2008/545269},
  url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2547482/},
  urldate = {2024-04-09},
  abstract = {Tandemly arrayed genes (TAGs) are duplicated genes that are linked as neighbors on a chromosome, many of which have important physiological and biochemical functions. Here we performed a survey of these genes in 11 available vertebrate genomes. TAGs account for an average of about 14\% of all genes in these vertebrate genomes, and about 25\% of all duplications. The majority of TAGs (72–94\%) have parallel transcription orientation (i.e., they are encoded on the same strand) in contrast to the genome, which has about 50\% of its genes in parallel transcription orientation. The majority of tandem arrays have only two members. In all species, the proportion of genes that belong to TAGs tends to be higher in large gene families than in small ones; together with our recent finding that tandem duplication played a more important role than retroposition in large families, this fact suggests that among all types of duplication mechanisms, tandem duplication is the predominant mechanism of duplication, especially in large families. Finally, several species have a higher proportion of large tandem arrays that are species-specific than random expectation.},
  pmcid = {PMC2547482}
 }
@online{PEPkitBioData,
  title = {{{PEPkit}}: The Bio Data Management Toolkit - {{PEPkit}}: The Bio Data Management Toolkit},
  url = {https://pep.databio.org/},
@ -596,6 +845,23 @@
  langid = {english}
 }
@article{picart-picoloLargeTandemDuplications2020a,
  title = {Large Tandem Duplications Affect Gene Expression, {{3D}} Organization, and Plant–Pathogen Response},
  author = {Picart-Picolo, Ariadna and Grob, Stefan and Picault, Nathalie and Franek, Michal and Llauro, Christel and Halter, Thierry and Maier, Tom R. and Jobet, Edouard and Descombin, Julie and Zhang, Panpan and Paramasivan, Vijayapalani and Baum, Thomas J. and Navarro, Lionel and Dvořáčková, Martina and Mirouze, Marie and Pontvianne, Frédéric},
  date = {2020-10-08},
  journaltitle = {Genome Research},
  shortjournal = {Genome Res.},
  eprint = {33033057},
  eprinttype = {pmid},
  publisher = {Cold Spring Harbor Lab},
  issn = {1088-9051, 1549-5469},
  doi = {10.1101/gr.261586.120},
  url = {https://genome.cshlp.org/content/early/2020/10/05/gr.261586.120},
  urldate = {2024-04-09},
  abstract = {Rapid plant genome evolution is crucial to adapt to environmental changes. Chromosomal rearrangements and gene copy number variation (CNV) are two important tools for genome evolution and sources for the creation of new genes. However, their emergence takes many generations. In this study, we show that in Arabidopsis thaliana, a significant loss of ribosomal RNA (rRNA) genes with a past history of a mutation for the chromatin assembly factor 1 (CAF1) complex causes rapid changes in the genome structure. Using long-read sequencing and microscopic approaches, we have identified up to 15 independent large tandem duplications in direct orientation (TDDOs) ranging from 60 kb to 1.44 Mb. Our data suggest that these TDDOs appeared within a few generations, leading to the duplication of hundreds of genes. By subsequently focusing on a line only containing 20\% of rRNA gene copies (20rDNA line), we investigated the impact of TDDOs on 3D genome organization, gene expression, and cytosine methylation. We found that duplicated genes often accumulate more transcripts. Among them, several are involved in plant–pathogen response, which could explain why the 20rDNA line is hyper-resistant to both bacterial and nematode infections. Finally, we show that the TDDOs create gene fusions and/or truncations and discuss their potential implications for the evolution of plant genomes.},
  langid = {english}
 }
@online{ponsComputingCommunitiesLarge2005,
  title = {Computing Communities in Large Networks Using Random Walks (Long Version)},
  author = {Pons, Pascal and Latapy, Matthieu},
@ -605,11 +871,22 @@
  doi = {10.48550/arXiv.physics/0512106},
  url = {http://arxiv.org/abs/physics/0512106},
  urldate = {2024-03-30},
-  abstract = {Dense subgraphs of sparse graphs (communities), which appear in most real-world complex networks, play an important role in many contexts. Computing them however is generally expensive. We propose here a measure of similarities between vertices based on random walks which has several important advantages: it captures well the community structure in a network, it can be computed efficiently, and it can be used in an agglomerative algorithm to compute efficiently the community structure of a network. We propose such an algorithm, called Walktrap, which runs in time O(mn\^{}2) and space O(n\^{}2) in the worst case, and in time O(n\^{}2log n) and space O(n\^{}2) in most real-world cases (n and m are respectively the number of vertices and edges in the input graph). Extensive comparison tests show that our algorithm surpasses previously proposed ones concerning the quality of the obtained community structures and that it stands among the best ones concerning the running time.},
+  abstract = {Dense subgraphs of sparse graphs (communities), which appear in most real-world complex networks, play an important role in many contexts. Computing them however is generally expensive. We propose here a measure of similarities between vertices based on random walks which has several important advantages: it captures well the community structure in a network, it can be computed efficiently, and it can be used in an agglomerative algorithm to compute efficiently the community structure of a network. We propose such an algorithm, called Walktrap, which runs in time O(mn\textasciicircum 2) and space O(n\textasciicircum 2) in the worst case, and in time O(n\textasciicircum 2log n) and space O(n\textasciicircum 2) in most real-world cases (n and m are respectively the number of vertices and edges in the input graph). Extensive comparison tests show that our algorithm surpasses previously proposed ones concerning the quality of the obtained community structures and that it stands among the best ones concerning the running time.},
  pubstate = {preprint},
  keywords = {Condensed Matter - Disordered Systems and Neural Networks,Condensed Matter - Statistical Mechanics,Physics - Physics and Society}
 }
@online{pontvianneDupliquerPourAdapter2020,
  title = {Dupliquer pour s’adapter ou comment accélérer l’évolution des plantes ? | CNRS Biologie},
  shorttitle = {Dupliquer pour s’adapter ou comment accélérer l’évolution des plantes ?},
  author = {Pontvianne, Frédéric},
  date = {2020-10-14},
  url = {https://www.insb.cnrs.fr/fr/cnrsinfo/dupliquer-pour-sadapter-ou-comment-accelerer-levolution-des-plantes},
  urldate = {2024-03-25},
  abstract = {Les duplications de portions de chromosomes permettant aux organismes de dupliquer des gènes existants et d’en créer de nouveaux sont bien},
  langid = {french}
 }
@article{reamsSelectionGeneClustering2004,
  title = {Selection for {{Gene Clustering}} by {{Tandem Duplication}}},
  author = {Reams, Andrew B. and Neidle, Ellen L.},
@ -640,6 +917,25 @@
  abstract = {We analyzed the distribution of 54 families of transposable elements (TEs; transposons, LTR retrotransposons, and non-LTR retrotransposons) in the chromosomes of Drosophila melanogaster, using data from the sequenced genome. The density of LTR and non-LTR retrotransposons (RNA-based elements) was high in regions with low recombination rates, but there was no clear tendency to parallel the recombination rate. However, the density of transposons (DNA-based elements) was significantly negatively correlated with recombination rate. The accumulation of TEs in regions of reduced recombination rate is compatible with selection acting against TEs, as selection is expected to be weaker in regions with lower recombination. The differences in the relationship between recombination rate and TE density that exist between chromosome arms suggest that TE distribution depends on specific characteristics of the chromosomes (chromatin structure, distribution of other sequences), the TEs themselves (transposition mechanism), and the species (reproductive system, effective population size, etc.), that have differing influences on the effect of natural selection acting against the TE insertions.}
 }
@article{rizzonStrikingSimilaritiesGenomic2006,
  title = {Striking {{Similarities}} in the {{Genomic Distribution}} of {{Tandemly Arrayed Genes}} in {{Arabidopsis}} and {{Rice}}},
  author = {Rizzon, Carene and Ponger, Loic and Gaut, Brandon S},
  date = {2006-09},
  journaltitle = {PLoS Computational Biology},
  shortjournal = {PLoS Comput Biol},
  volume = {2},
  number = {9},
  eprint = {16948529},
  eprinttype = {pmid},
  pages = {e115},
  issn = {1553-734X},
  doi = {10.1371/journal.pcbi.0020115},
  url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1557586/},
  urldate = {2024-04-10},
  abstract = {In Arabidopsis, tandemly arrayed genes (TAGs) comprise {$>$}10\% of the genes in the genome. These duplicated genes represent a rich template for genetic innovation, but little is known of the evolutionary forces governing their generation and maintenance. Here we compare the organization and evolution of TAGs between Arabidopsis and rice, two plant genomes that diverged \textasciitilde 150 million years ago. TAGs from the two genomes are similar in a number of respects, including the proportion of genes that are tandemly arrayed, the number of genes within an array, the number of tandem arrays, and the dearth of TAGs relative to single copy genes in centromeric regions. Analysis of recombination rates along rice chromosomes confirms a positive correlation between the occurrence of TAGs and recombination rate, as found in Arabidopsis. TAGs are also biased functionally relative to duplicated, nontandemly arrayed genes. In both genomes, TAGs are enriched for genes that encode membrane proteins and function in “abiotic and biotic stress” but underrepresented for genes involved in transcription and DNA or RNA binding functions. We speculate that these observations reflect an evolutionary trend in which successful tandem duplication involves genes either at the end of biochemical pathways or in flexible steps in a pathway, for which fluctuation in copy number is unlikely to affect downstream genes. Despite differences in the age distribution of tandem arrays, the striking similarities between rice and Arabidopsis indicate similar mechanisms of TAG generation and maintenance., The nuclear genomes of higher plants vary tremendously in size and gene content. Much of this variation is attributable to gene duplication. To date, most studies of plant gene duplication have focused on whole genome duplication events, which duplicate all genes simultaneously. Another prominent process is single gene duplication, which often results in duplicated genes arranged in a tandem array. Here Rizzon, Ponger, and Gaut identify tandem arrays in rice and their genome organization between Arabidopsis and rice, two plant species that diverged \textasciitilde 150 million years ago. The two genomes contain a similar proportion of genes that are tandemly arrayed, with a similar number of genes within an array. Moreover, tandemly arrayed genes are most common in genomic regions of high recombination in both species. This organization appears to be a general feature of eukaryotic genomes, perhaps because duplication rates are higher in high recombination regions. Tandemly arrayed genes of rice and Arabidopsis also represent a biased gene set with regard to function. In contrast to genes duplicated through whole genome events, tandemly arrayed genes are enriched for genes that encode membrane proteins and genes that function in response to environmental stresses. Taken together, these observations suggest that tandemly arrayed genes represent a rich and relatively fluid source for plant adaptation.},
  pmcid = {PMC1557586}
 }
@article{rognesParAlignParallelSequence2001,
  title = {{{ParAlign}}: A Parallel Sequence Alignment Algorithm for Rapid and Sensitive Database Searches},
  shorttitle = {{{ParAlign}}},
@ -690,6 +986,24 @@
  issue = {suppl\_2}
 }
@article{shojaRoadmapTandemlyArrayed2006,
  title = {A Roadmap of Tandemly Arrayed Genes in the Genomes of Human, Mouse, and Rat},
  author = {Shoja, Valia and Zhang, Liqing},
  date = {2006-11},
  journaltitle = {Molecular Biology and Evolution},
  shortjournal = {Mol Biol Evol},
  volume = {23},
  number = {11},
  eprint = {16901985},
  eprinttype = {pmid},
  pages = {2134--2141},
  issn = {0737-4038},
  doi = {10.1093/molbev/msl085},
  abstract = {Tandemly arrayed genes (TAGs) play an important functional and physiological role in the genome. Most previous studies have focused on individual TAG families in a few species, yet a broad characterization of TAGs is not available. Here we identified all TAGs in the genomes of humans, mouse, and rat and performed a comprehensive analysis of TAG distribution, TAG sizes, TAG orientations and intergenic distances, and TAG functions. TAGs account for about 14-17\% of all genes in the genome and nearly one-third of all duplicated genes, highlighting the predominant role that tandem duplication plays in gene duplication. For all species, TAG distribution is highly heterogeneous along chromosomes and some chromosomes are enriched with TAG forests, whereas others are enriched with TAG deserts. The majority of TAGs are of size 2 for all genomes, similar to the previous findings in Caenorhabditis elegans, Arabidopsis thaliana, and Oryza sativa, suggesting that it is a rather general phenomenon in eukaryotes. The comparison with the genome patterns shows that TAG members have a significantly higher proportion of parallel gene orientation in all species, corroborating Graham's claim that parallel orientation is the preferred form of orientation in TAGs. Moreover, TAG members with parallel orientation tend to be closer to each other than all neighboring genes in the genome with parallel orientation. The analyses of Gene Ontology function indicate that genes with receptor or binding activities are significantly overrepresented by TAGs. Computer simulation reveals that random gene rearrangements have little effect on the statistics of TAGs for all genomes. Finally, the average proportion of TAGs shows a trend of increase with the increase of family sizes, although the correlation between TAG proportions in individual families and family sizes is not significant.},
  langid = {english},
  keywords = {Animals,Chromosome Mapping,Chromosomes,Gene Duplication,Genome,Humans,Mice,Rats,Recombination Genetic,Tandem Repeat Sequences}
 }
@article{smithIdentificationCommonMolecular1981,
  title = {Identification of Common Molecular Subsequences},
  author = {Smith, T. F. and Waterman, M. S.},
@ -759,6 +1073,41 @@
  keywords = {Algorithms,Base Sequence,Chromosome Mapping,Computer Graphics,DNA,DNA Mutational Analysis,Molecular Sequence Data,Sequence Alignment,Sequence Analysis DNA,Software,User-Computer Interface}
 }
@article{thibaud-nissenIdentificationCharacterizationPseudogenes2009,
  title = {Identification and Characterization of Pseudogenes in the Rice Gene Complement},
  author = {Thibaud-Nissen, Françoise and Ouyang, Shu and Buell, C. Robin},
  date = {2009-07-16},
  journaltitle = {BMC Genomics},
  shortjournal = {BMC Genomics},
  volume = {10},
  number = {1},
  pages = {317},
  issn = {1471-2164},
  doi = {10.1186/1471-2164-10-317},
  url = {https://doi.org/10.1186/1471-2164-10-317},
  urldate = {2024-04-09},
  abstract = {The Osa1 Genome Annotation of rice (Oryza sativa L. ssp. japonica cv. Nipponbare) is the product of a semi-automated pipeline that does not explicitly predict pseudogenes. As such, it is likely to mis-annotate pseudogenes as functional genes. A total of 22,033 gene models within the Osa1 Release 5 were investigated as potential pseudogenes as these genes exhibit at least one feature potentially indicative of pseudogenes: lack of transcript support, short coding region, long untranslated region, or, for genes residing within a segmentally duplicated region, lack of a paralog or significantly shorter corresponding paralog.},
  langid = {english},
  keywords = {Duplicate Region,GOSlim Term,Massively Parallel Signature Sequencing,Massively Parallel Signature Sequencing Data,Paralogous Family}
 }
@article{tremblaysavardEvolutionOrthologousTandemly2011,
  title = {Evolution of Orthologous Tandemly Arrayed Gene Clusters},
  author = {Tremblay Savard, Olivier and Bertrand, Denis and El-Mabrouk, Nadia},
  date = {2011-10-05},
  journaltitle = {BMC Bioinformatics},
  shortjournal = {BMC Bioinformatics},
  volume = {12},
  number = {9},
  pages = {S2},
  issn = {1471-2105},
  doi = {10.1186/1471-2105-12-S9-S2},
  url = {https://doi.org/10.1186/1471-2105-12-S9-S2},
  urldate = {2024-04-09},
  abstract = {Tandemly Arrayed Gene (TAG) clusters are groups of paralogous genes that are found adjacent on a chromosome. TAGs represent an important repertoire of genes in eukaryotes. In addition to tandem duplication events, TAG clusters are affected during their evolution by other mechanisms, such as inversion and deletion events, that affect the order and orientation of genes. The DILTAG algorithm developed in [1] makes it possible to infer a set of optimal evolutionary histories explaining the evolution of a single TAG cluster, from an ancestral single gene, through tandem duplications (simple or multiple, direct or inverted), deletions and inversion events.},
  keywords = {Gene Order,Gene Tree,Internal Vertex,Inversion Event,Tandem Duplication}
 }
@article{vandongenGraphClusteringDiscrete2008a,
  title = {Graph {{Clustering Via}} a {{Discrete Uncoupling Process}}},
  author = {Van Dongen, Stijn},
@ -787,6 +1136,58 @@
  langid = {english}
 }
@incollection{vandongenUsingMCLExtract2012,
  title = {Using {{MCL}} to {{Extract Clusters}} from {{Networks}}},
  booktitle = {Bacterial {{Molecular Networks}}},
  author = {Van Dongen, Stijn and Abreu-Goodger, Cei},
  editor = {Van Helden, Jacques and Toussaint, Ariane and Thieffry, Denis},
  date = {2012},
  volume = {804},
  pages = {281--295},
  publisher = {Springer New York},
  location = {New York, NY},
  doi = {10.1007/978-1-61779-361-5_15},
  url = {http://link.springer.com/10.1007/978-1-61779-361-5_15},
  urldate = {2024-04-11},
  isbn = {978-1-61779-360-8 978-1-61779-361-5},
  langid = {english}
 }
@incollection{vandongenUsingMCLExtract2012a,
  title = {Using {{MCL}} to {{Extract Clusters}} from {{Networks}}},
  booktitle = {Bacterial {{Molecular Networks}}},
  author = {Van Dongen, Stijn and Abreu-Goodger, Cei},
  editor = {Van Helden, Jacques and Toussaint, Ariane and Thieffry, Denis},
  date = {2012},
  volume = {804},
  pages = {281--295},
  publisher = {Springer New York},
  location = {New York, NY},
  doi = {10.1007/978-1-61779-361-5_15},
  url = {http://link.springer.com/10.1007/978-1-61779-361-5_15},
  urldate = {2024-04-11},
  isbn = {978-1-61779-360-8 978-1-61779-361-5},
  langid = {english}
 }
@article{vizuetaBitacoraComprehensiveTool2020,
  title = {Bitacora: {{A}} Comprehensive Tool for the Identification and Annotation of Gene Families in Genome Assemblies},
  shorttitle = {Bitacora},
  author = {Vizueta, Joel and Sánchez-Gracia, Alejandro and Rozas, Julio},
  date = {2020},
  journaltitle = {Molecular Ecology Resources},
  volume = {20},
  number = {5},
  pages = {1445--1452},
  issn = {1755-0998},
  doi = {10.1111/1755-0998.13202},
  url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/1755-0998.13202},
  urldate = {2024-04-09},
  abstract = {Gene annotation is a critical bottleneck in genomic research, especially for the comprehensive study of very large gene families in the genomes of nonmodel organisms. Despite the recent progress in automatic methods, state-of-the-art tools used for this task often produce inaccurate annotations, such as fused, chimeric, partial or even completely absent gene models for many family copies, errors that require considerable extra efforts to be corrected. Here we present bitacora, a bioinformatics solution that integrates popular sequence similarity-based search tools and Perl scripts to facilitate both the curation of these inaccurate annotations and the identification of previously undetected gene family copies directly in genomic DNA sequences. We tested the performance of bitacora in annotating the members of two chemosensory gene families with different repertoire size in seven available genome sequences, and compared its performance with that of augustus-ppx, a tool also designed to improve automatic annotations using a sequence similarity-based approach. Despite the relatively high fragmentation of some of these drafts, bitacora was able to improve the annotation of many members of these families and detected thousands of new chemoreceptors encoded in genome sequences. The program creates general feature format (GFF) files, with both curated and newly identified gene models, and FASTA files with the predicted proteins. These outputs can be easily integrated in genomic annotation editors, greatly facilitating subsequent manual annotation and downstream evolutionary analyses.},
  langid = {english},
  keywords = {bioinfomatics/phyloinfomatics,gene families,gene structure and function,genomics,molecular evolution,structural annotation,transcriptomics}
 }
@article{wolfeRobustnessItNot2000,
  title = {Robustness—It's Not Where You Think It Is},
  author = {Wolfe, Ken},
--- a/report.org
+++ b/report.org
@ -1,8 +1,8 @@
 #+title: Further development on FTAG Finder, a pipeline to identify Gene Families and Tandemly Arrayed Genes
 #+author: Samuel Ortion
 #+date: 2023-2024
-#+LATEX_CLASS: scientific-project
+#+LATEX_CLASS: lamme2024
-#+LATEX_CLASS_OPTIONS: [twoside=false]
+#+LATEX_CLASS_OPTIONS: [twoside=true]
 #+LATEX_HEADER: \usepackage{sty/lamme2024}
 #+bibliography: references.bib
@ -15,9 +15,27 @@
 | key        | abbreviation | full form                                  |
 |------------+--------------+--------------------------------------------|
 | TAG        | TAG          | Tandemly Arrayed Genes                     |
-| FTAGfinder | FTAG Finder  | Families and Tandemly Arrayed Genes Finder |
+| FTAGFinder | FTAG Finder  | Families and Tandemly Arrayed Genes Finder |
 | WGD        | WGD          | Whole Genome Duplication                   |
 | MCL        | MCL          | Markov Clustering                          |
 | BLAST      | BLAST        | Basic Local Alignment Search Tool          |
 |      GO    | GO           |                                         Gene Ontology |
 #+name: glossary
 | label                | name                 | description                                                                                                                   |
 |----------------------+----------------------+-------------------------------------------------------------------------------------------------------------------------------|
 | singleton            | singleton            | A gene with a single copy                                                                                                     |
 | polyploidisation     | polyploidisation     | Mechanism leading to the acquisition of at least three versions of the same original genome in a species                      |
 | pseudogene           | pseudogene           | A gene-like sequence that lost its capacity to transcribe                                                                     |
 | segment_duplication  | segment duplication  | Long stretches of DNA sequences with high identity score                                                                      |
 | retroduplication     | retroduplication     | Duplication of a gene through retro-transcription of its RNA transcript                                                       |
 | autopolyploidisation | autopolyploidisation | Polyploidisation within the same species                                                                                      |
 | allopolyploidisation | allopolyploidisation | Polyploidisation with genetic material coming from a diverged species                                                         |
 | polyspermy           | polyspermy           | Fertilization of an egg by more than one sperm                                                                                |
 | segment_duplication  | segment duplication  | DNA sequences present in multiple locations within a genome that share high level of sequence identity                        |
 | subfunctionalization | subfunctionalization | Fate of a duplicate gene which gets a part of the original gene function, the function being shared among multiple duplicates  |
 | orthologues          | orthologues          | Homologous genes whose divergence started at a speciation event                                                               |
 | neofunctionalization | neofunctionalization | Acquisition of a new function by the duplicate gene |
 #+begin_export latex
 \makeatletter
@ -33,9 +51,12 @@
 #+end_center
 #+begin_export latex
 {
 \hypersetup{linkcolor=black}
 \tableofcontents
 \listoffigures
-%\listoftables
+\listoftables
 }
 #+end_export
 [[printglossaries:]]
@ -45,117 +66,231 @@
 #+end_export
 * Scientific context
-It is estimated that between 46% and 65.5% of human genes could be considered as duplicate genes [cite:@correaTransposableElementEnvironment2021].
+[[latex:lettrine][D]]uplicate genes represent an important fraction of Eukaryotic genes: It is estimated that between 46% and 65.5% of human genes could be considered as duplicate[fn:: The estimate vary strongly depending on the criteria in use, because ancient duplication event may be hard to detect.] [cite:@correaTransposableElementEnvironment2021].
 Duplicate genes offers a pool of genetic material available for further experimentation during species evolution.
 ** Gene duplication mechanisms
 #+begin_src emacs-lisp :exports results :results raw
 (setq fig:gene-duplication-mechanisms "#+label: fig:gene-duplication-mechanisms
 #+caption[Different types of duplication]: Different types of duplication. (A) Whole genome duplication. (B) An unequal crossing-over leads to a duplication of a fragment of a chromosome. (C) In tandem duplication, two (set of) genes are duplicated one after the other. (D) Retrotransposon enables retroduplication: a RNA transcript is reverse transcribed and inserted back without introns and with a polyA tail in the genome. (E) A DNA transposon can acquire a fragment of a gene. (F) Segmental duplication corresponds to long stretches of duplicated sequences with high identity. Adapted from [cite:@lallemandOverviewDuplicatedGene2020] (fig. 1)
 [[./figures/lallemand2020-fig1_copy.svg]]")
 (if (eq org-export-current-backend 'html)
    fig:gene-duplication-mechanisms
  ""
  )
 #+end_src
 #+begin_export latex
 \fladdfig{
 	\includegraphics[width=.9\linewidth]{./figures/lallemand2020-fig1_copy.pdf}
-	\caption[Different types of duplication]{\label{fig:gene-duplication-mechanisms}Different types of duplication. (A) Whole genome duplication. (B) An unequal crossing-over leads to a duplication of a fragment of a chromosome. (C) In tandem duplication, two (set of) genes are duplicated one after the other. (D) Retrotransposon enables retroduplication: a RNA transcript is reverse transcribed and inserted back without introns and with a polyA tail in the genome. (E) A DNA transposon can acquire a fragment of a gene. (F) Segmental duplication corresponds to long stretches of duplicated sequences with high identity. Adapted from \autocite{lallemandOverviewDuplicatedGene2020} (fig. 1).}
+	\caption[Different types of duplication]{\label{fig:gene-duplication-mechanisms}Different types of duplication. (A) Whole genome duplication. (B) An unequal crossing-over leads to a duplication of a fragment of a chromosome. (C) In tandem duplication, two (set of) genes are duplicated one after the other. (D) Retrotransposon enables retroduplication: a RNA transcript is reverse transcribed and inserted back without introns and with a polyA tail in the genome. (E) A DNA transposon can acquire a fragment of a gene. (F) Segmental duplication corresponds to long stretches of duplicated sequences with high identity. (Adapted from \textcite{lallemandOverviewDuplicatedGene2020} (fig. 1)).}
 }
 #+end_export
 Multiple mechanisms may lead to a gene duplication. Their effect ranges from the duplication of the whole genome to the duplication of a fragment of a gene.
-Multiple mechanisms may lead to gene duplication. The following sections review them.
+*** Whole genome duplication and polyploidisation
-*** Polyploidisation and whole genome duplication
+During an event of gls:WGD, the entire set of genes present on the chromosomes is duplicated ([[cref:fig:gene-duplication-mechanisms]] (A)).
-In an event of gls:WGD, the entire set of genes present on the chromosomes is duplicated ([[cref:fig:gene-duplication-mechanisms]] (A)).
+gls:WGD can occur thanks to gls:polyspermy or in case of a non-reduced gamete.
-gls:WGD is more frequent in plants.
+Gls:polyploidisation is a mechanism leading to a species with at least three copies of an initial genome.
-A striking example is probably the /Triticum/ genus (wheat) in which some species (such as /T. aestivum/) are hexaploid, due to hybridisation events [cite:@golovninaMolecularPhylogenyGenus2007a].
+A striking example is probably /Triticum aestivum/ (wheat) which is hexaploid due to hybridisation events [cite:@golovninaMolecularPhylogenyGenus2007a].
-We distinguish two kinds of polyploidisation, based on the origin of the duplicate genome:
+We distinguish two kinds of glspl:polyploidisation, based on the origin of the duplicate genome: (i) Gls:allopolyploidisation occurs when the supplementary chromosomes come from a divergent species. This is the case for the /Triticum aestivum/ hybridisation, which consisted in the union of the chromosome set of a /Triticum/ species with that of an /Aegilops/ species. (ii) Gls:autopolyploidisation consists in the hybridisation or duplication of the whole genome within the same species.
 - Allopolyploidisation occurs when the supplementary chromosomes comes from an other species. This is the case for /Triticum aestivum/ hybridisation.
 - Autopolyploidisation consist in the duplication of the genome within the same species.
 gls:WGD can occur thanks to polyspermy or in case of a non-reduced gamete.
 *** Unequal crossing-over
-A crossing-over may occur during cell division. Two chromatids may exchange a fragment of chromosome. If the cleavage of the two chromatids occurs at different positions, the shared fragments may have different lengths. Homologous recombination of such uneven crossover results in the incorporation of a duplicate region, as represented in cref:fig:gene-duplication-mechanisms (B, C).
+Another source of gene duplication relies on unequal crossing-over. During cell division, a crossing-over occurs when two chromatids exchange fragments of chromosome. If the cleavage of the two chromatids occurs at different positions, the shared fragments may have different lengths. Homologous recombination of such uneven crossing-over leads to the incorporation of a duplicate region, as depicted in cref:fig:gene-duplication-mechanisms (B, C).
-This mechanism leads to the duplication of the whole set of genes present in the inserted fragment. These duplicate genes locate one set after the other, and are thus called gls:TAG.
+This mechanism leads to the duplication of the whole set of genes present in the fragment. These duplicate genes locate one set after the other: we call them gls:TAG. Gls:TAG are the kind of gene duplication we will be particularly interested in during this internship.
 *** Retroduplication
-Retrotransposons, or RNA transposons are one type of transposable elements. Retrotransposons share similar structure and mechanism with retroviruses.
+Transposable elements play a major role in genome plasticity, and enable gene duplication too.
-They may replicate in the genome through a mechanism known as "copy-and-paste".
+Retrotransposons, or RNA transposons are one type of transposable elements.
-These transposons typically contain a reverse transcriptase gene. This enzyme may proceed in the reverse transcription of an mRNA transcript into DNA sequence which can then be inserted elsewhere in the genome.
+They share similar structure and replication mechanisms with retroviruses.
-More generally, retroduplication refers to the duplication of a region of a chromosome through reverse transcription of a RNA transcript. In this case the duplicate gene lost its intronic sequences and brings a polyA tail with it ( cref:fig:gene-duplication-mechanisms (D)).
+Retrotransposons replicate in the genome through a mechanism known as "copy-and-paste".
 These transposons typically contain a reverse transcriptase gene. This enzyme proceeds in the reverse transcription of an mRNA transcript into its reverse complementary DNA sequence which can then insert elsewhere in the genome.
 More generally, gls:retroduplication refers to the duplication of a sequence through reverse transcription of a RNA transcript. Genes duplicated through retroduplication lose their intronic sequences and bring a polyA tail with them in their new locus (cref:fig:gene-duplication-mechanisms (D)).
 *** Transduplication
-DNA transposons are another type of transposable elements whose transposition mechanism can also lead to gene duplication.
+DNA transposons are another kind of transposable elements whose transposition mechanism can also lead to gene duplication.
-This type of transposable element moves in the genome through a mechanisms known as "cut-and-paste".
+This type of transposable element moves in the genome through a mechanism known as "cut-and-paste".
-A typical DNA transposon contains a transposase gene. This enzyme recognize two sites surrounding the donnor transposon sequence in the chromosome resulting in a DNA cleavage and excision of the transposon. The transposase can then insert the transposon in a new genome locus. A transposon can bring a fragment of a gene during its transposition in the other locus (cref:fig:gene-duplication-mechanisms (E)).
+A typical DNA transposon contains a transposase gene. This enzyme recognizes two sites surrounding the donnor transposon sequence in the chromosome resulting in a DNA cleavage and an excision of the transposon. The transposase can then insert the transposon at a new genome locus. A transposon may bring a fragment of a gene during its transposition in the new locus (cref:fig:gene-duplication-mechanisms (E)), leading to the duplication of this fragment.
 *** Segment duplication
-Segment duplications, also called low copy repeats are long stretches of DNA with high identity score ([[cref:fig:gene-duplication-mechanisms]] (F)). Their exact duplication mechanisms remains unclear [cite:@lallemandOverviewDuplicatedGene2020], they may results from an accidental replication, distinct from an uneven cross-over or a double stranded breakage.
+Finally, glspl:segment_duplication, also called /low copy repeats/ are long stretches of DNA with high identity score ([[cref:fig:gene-duplication-mechanisms]] (F)). Their exact duplication mechanism remains unclear [cite:@lallemandOverviewDuplicatedGene2020]. They may come from an accidental replication, distinct from an uneven cross-over or a double stranded breakage.
-Nevertheless, transposable elements may well be involved as a high enrichment of transposable elements has been found at segment extremities, in /Drosophila/ [cite:@lallemandOverviewDuplicatedGene2020].
+Transposable elements may well be involved in the mechanism, as a high enrichment of transposable elements is found next to duplicate segment extremities, in /Drosophila/ [cite:@lallemandOverviewDuplicatedGene2020].
 #+begin_src emacs-lisp :exports results :results raw
 (setq fig:duplicate-genes-fate "#+label: fig:duplicate-genes-fate
 ,#+caption[Fate of duplicate genes]: Fate of duplicate genes. An original gene with four functions is duplicated. Its two copies may both keep the original functions (functional redoundancy). The original functions may split between the different copies (subfunctionalization). One of the copy may acquire a new function (neofunctionalization). It may also degenerate and lose its original functions (pseudogenization). Adapted from [[https://commons.wikimedia.org/wiki/File:Evolution_fate_duplicate_genes_-_vector.svg][Smedlib]], [[https://creativecommons.org/licenses/by-sa/4.0][CC BY-SA 4.0]] via Wikimedia Commons.
 [[./figures/Evolution_fate_duplicate_genes.svg]]")
 (if (eq org-export-current-backend 'html)
    fig:duplicate-genes-fate
  ""
  )
 #+end_src
 #+RESULTS:
 #+begin_export latex
 \fladdfig{
 	\includegraphics[width=.9\linewidth]{figures/Evolution_fate_duplicate_genes.pdf}
 	\caption[Fate of duplicate genes]{\label{fig:fate-duplicate-genes} Fate of duplicate genes. An original gene with four functions is duplicated. Its two copies may both keep the original functions (functional redoundancy). The original functions may split between the different copies (subfunctionalization). One of the copy may acquire a new function (neofunctionalization). It may also degenerate and lose its original functions (pseudogenization). (Adapted from \href{https://commons.wikimedia.org/wiki/File:Evolution_fate_duplicate_genes_-_vector.svg}{Smedlib}, \href{https://creativecommons.org/licenses/by-sa/4.0}{CC BY-SA 4.0}, via Wikimedia Commons).}
 }
 #+end_export
 ** Fate of duplicate genes in genome evolution
-In his book /Evolution by Gene Duplication/, Susumu [[latex:textsc][Ohno]] proposed that gene duplication plays a major role in species evolution [cite:@ohnoEvolutionGeneDuplication1970], as it provides a new genetic material to build on new phenotypes while keeping a backup gene for the previous function.
+In his book /Evolution by Gene Duplication/, Susumu [[latex:textsc][Ohno]] proposed that gene duplication plays a major role in species evolution [cite:@ohnoEvolutionGeneDuplication1970], because it provides new genetic materials to build on new phenotypes while keeping a backup gene for the previous function.
 Indeed, duplicate genes evolve after duplication: they may be inactivated, and become glspl:pseudogene; they may be deleted or conserved, and if conserved, the may or may not acquire a new function.
 [[Cref:fig:fate-duplicate-genes]] depicts the different possible fates of a duplicate gene.
-Duplicate genes may be inactivated becoming pseudogenes, be deleted or conserved.
+# *** Pseudogenization
-*** Pseudogenisation
+As genome evolves, duplicate genes may be inactivated and become pseudogenes. These pseudogenes keep a gene-like structure which degrades as and when further genome modifications occur but they are no longer expressed.
 Duplicate genes may be inactivated and become pseudogenes. These pseudogenes keep a gene-like structure, which degrades as and when further genome modifications occur, but are no longer expressed.
 *** Neofunctionalisation
 Duplicate genes may be conserved and gain a new function.
 For instance, in /Drosophila/, the set of olfactory receptor genes result from several duplication and deletion events [cite:@nozawaEvolutionaryDynamicsOlfactory2007], after which the duplicate may specialize in the detection of a particular chemical compound.
 *** Subfunctionalisation
 Two duplicate genes with the same original function may encounter a subfunctionalisation during which each gene conserves only one part of the function.
 *** Functional redundancy
 Two copies may keep the ancestral function: in this case the organism may increase the quantity of gene product.
-** Methods to identify duplicate genes
+# *** Neofunctionalization
-[[latex:textsc][Lallemand]] et al. review the different methods used to detect duplicate genes. These methods depend on the type of duplicate genes they target, and vary on computation burden [cite:@lallemandOverviewDuplicatedGene2020].
+After duplication, the new gene copy may gain a new function. We call this possible outcome gls:neofunctionalization.
 For instance, the current set of olfactory receptor genes result from several duplication and deletion events (for /Drosophila/, see: [cite/t:@nozawaEvolutionaryDynamicsOlfactory2007]), after which each duplicate olfactory gene specialized in the detection of a particular chemical compound.
-*** Paralog detection
+# *** Subfunctionalization
-Paralogs are homologous genes derived from a duplication event. They can be identified as homologous genes located in the same genome, or as homologous genes between different species once we filtered out orthologous genes (homologous genes derived from a speciation event).
+Two duplicate genes with the same original function may encounter a gls:subfunctionalization: each gene conserves only one part of the function.
-Two gene characteristics can be used to assess to assess homology between two genes: gene structure of sequence similarity.
+# *** Functional redundancy
-The sequence similarity can be tested with a sequence alignment tool, such as =BLAST= [cite:@altschulBasicLocalAlignment1990], =Psi-BLAST=, and =HMMER3= [cite:@johnsonHiddenMarkovModel2010], or =diamond= [cite:@buchfinkSensitiveProteinAlignments2021], which are heuristic algorithm, which means they may not provide the best results, but do so way faster than exact algorithms, such as the classical Smith and Waterman algorithm [cite:@smithIdentificationCommonMolecular1981] or its optimized versions =PARALIGN= or =SWIMM=.
+Another possibility is that the two gene copies keep the ancestral function, resulting in a functional redoundancy. In this case the quantity of gene product may increase.
 *** FTAG Finder
 Developed in the LaMME laboratory, the gls:FTAGfinder pipeline targets the detection of gene Families and Tandemly Arrayed Genes from a given species' proteome [cite:@bouillonFTAGFinderOutil2016].
 The pipeline proceeds in three steps. First, it estimates the homology links between each pair of genes; then, it deduce the gene families and finally, it detects gls:TAG.
 **** Estimation of homology links between genes
 This step consists in establishing a relation between each genes in the proteome.
 In this step, the typical tool involved is =BLAST= (Basic Local Alignment Search Tool) [cite:@altschulBasicLocalAlignment1990] run "all against all" on the proteome.
 Several =BLAST= metrics can be used as homology measures, such as bitscore, identity percentage, E-value or variations of these. The choice of metrics can affect the results of graph clustering in the following step, and should therefore be chosen carefully [cite:@gibbonsEvaluationBLASTbasedEdgeweighting2015].
 **** Identification of gene families
 Based on the homology links between each pair of genes, we construct a undirected weighted graph whose vertices correspond to genes and edges to homology links between them.
 We apply a graph clustering algorithm on the graph in order to infer the gene families.
 FTAG Finder proposes three clustering algorithm alternatives: single linkage, Markov Clustering [cite:@vandongenNewClusterAlgorithm1998] or Walktrap [cite:@ponsComputingCommunitiesLarge2005].
 **** Detection of TAGs
 The final step of FTAG Finder consists in the determination of gls:TAG from the gene families and the chromosome sequence.
 For a given chromosome, the tool seeks genes belonging to the same family and located close to each other. The tool allows a maximal number of genes between the homologous genes, with a parameter set by the user.
 * Objectives for the internship
 ** Scientific questions
 The underlying question of FTAG Finder is the study of the evolutionary fate of duplicate genes in Eukaryotes.
 Duplicate genes are
 ** Extend the existing FTAG Finder Galaxy pipeline
-Galaxy is a web-based platform for running accessible data analysis pipelines, first designed for use in genomic data analysis [cite:@goecksGalaxyComprehensiveApproach2010].
+Galaxy is a web-based platform for running accessible data analysis pipelines, first designed for use in genomics data analysis [cite:@goecksGalaxyComprehensiveApproach2010].
 Last year, Séanna [[latex:textsc][Charles]] worked on the Galaxy version of the FTAG Finder pipeline during her M1 internship  [cite:@charlesFinalisationPipelineFTAG2023]. I will continue this work.
 FTAG Finder is currently deployed on the server of the /Laboratoire de Mathématiques et Modélisation d'Évry/[fn:: [[http://stat.genopole.cnrs.fr/galaxy]] ].
 ** Port FTAG Finder pipeline on a workflow manager
 Another objective of my internship will be to port FTAG Finder on a workflow manager better suited to larger and more reproducible analysis.
 We will have to make a choice for the tool we will use.
-The two main options are Snakemake and Nextflow. Snakemake is a python powered workflow manager based on rules /à la/ GNU Make [cite:@kosterSnakemakeScalableBioinformatics2012]. Nextflow, is a groovy powered workflow manager, which rely on data flows [cite:@ditommasoNextflowEnablesReproducible2017]. Both are widely used in the bioinformatics community, and their use have been on the rise since they came out in 2012 and 2013 respectively [cite:@djaffardjyDevelopingReusingBioinformatics2023].
+The two main options being Snakemake and Nextflow. Snakemake is a python powered workflow manager based on rules /à la/ GNU Make [cite:@kosterSnakemakeScalableBioinformatics2012]. Nextflow is a groovy powered workflow manager, which rely on the data flows paradigm [cite:@ditommasoNextflowEnablesReproducible2017]. Both are widely used in the bioinformatics community. Their use have been on the rise since they came out in 2012 and 2013 respectively [cite:@djaffardjyDevelopingReusingBioinformatics2023].
 # #+begin_export latex
 # \fladdtab{
 #        \begin{tabular}{ccc}
 #        \toprule
 #        & List ref & List $L$ \\
 #        \midrule
 #        related to $go$ & $a$ & $b$ \\
 #        unrelated & $c$ & $d$ \\
 #        \bottomrule
 #        \end{tabular}
 #        \caption{\label{tab:fisher-test-contigency-table}Contingency table for a Fisher exact test on gene lists}
 # }
 # #+end_export
 * Methodological approaches
 ** Duplicate gene detection method used in FTAG Finder
 #+begin_src emacs-lisp :exports results :results raw
 (setq fig:tag-definitions "#+label: fig:tag-definitions
 #+caption[Schematic representation of TAG definitions]: Schematic representation of TAG definitions. Several genes are represented on a linear chromosome. The red box represent a singleton gene. Orange boxes represent a TAG with two duplicate genes seperated by 7 other genes ($\\mathrm{TAG}_7$). Four green boxes constitute a TAG, the gene at the extremities are seperated by three genes ($\\mathrm{TAG}_3$). The two blue boxes represents a TAG with two genes next to each other ($\\mathrm{TAG}_0$). The bended edges represents the homology links between each pair of genes within a TAG.
 [[./figures/tag-definition.svg]]")
 (if (eq org-export-current-backend 'html)
    fig:tag-definitions
  ""
  )
 #+end_src
 #+begin_export latex
-%\flstop
+\fladdfig{
 	\includegraphics[width=.9\linewidth]{./figures/tag-definition.pdf}
 	\caption[Schematic representation of TAG definitions]{\label{fig:tag-definitions} Schematic representation of TAG definitions. Several genes are represented on a linear chromosome. The red box represent a singleton gene. Orange boxes represent a TAG with two duplicate genes seperated by 7 other genes ($\mathrm{TAG}_7$). Four green boxes constitute a TAG, the gene at the extremities are seperated by three genes ($\mathrm{TAG}_3$). The two blue boxes represents a TAG with two genes next to each other ($\mathrm{TAG}_0$). The bended edges represents the homology links between each pair of genes within a TAG.}}
 #+end_export
-#+begin_export html
+Different methods exists to detect duplicate genes. These methods depend on the type of duplicate genes they target and vary on computation burden as well as in the ease of use (for a review, see [cite/t:@lallemandOverviewDuplicatedGene2020]).
-<h3>Bibliography</h3>
+
 *** Paralog detection
 Paralogs are homologous genes derived from a duplication event. We can identify them as homologous genes coming from the same genome, or as homologous genes between different species once we filtered out gls:orthologues (homologous genes derived from a speciation event).
 We can use two gene characteristics to assess the homology between two genes: gene structure or sequence similarity.
 The sequence similarity can be tested with a sequence alignment tool, such as =BLAST= [cite:@altschulBasicLocalAlignment1990], =Psi-BLAST=, and =HMMER3= [cite:@johnsonHiddenMarkovModel2010], or =diamond= [cite:@buchfinkSensitiveProteinAlignments2021]. These tools are heuristic algorithms, which means they may not provide the best results, but do so way faster than exact algorithms, such as the classical Smith and Waterman algorithm [cite:@smithIdentificationCommonMolecular1981] or its optimized versions =PARALIGN= [cite:@rognesParAlignParallelSequence2001] or =SWIMM=.
 *** FTAG Finder
 Developed in the LaMME laboratory, the FTAG Finder (Families and Tandemly Arrayed Genes Finder) pipeline is a simple pipeline targeting the detection of gls:TAG based on the sequence of the proteome of single species [cite:@bouillonFTAGFinderOutil2016].
 The pipeline proceeds in three steps. First, it estimates the homology links between each pair of genes. Then, it deduces the gene families. Finally, it searches for gls:TAG, relying on the position of genes belonging to the same family.
 **** Estimation of homology links between genes
 This step consists in establishing a homology relationship between each genes in the proteome.
 In this step, FTAG Finder uses =BLAST= (Basic Local Alignment Search Tool) [cite:@altschulBasicLocalAlignment1990] with an "all against all" search on the proteome.
 Several =BLAST= metrics can be used as an homology measure, such as bitscore, identity percentage, E-value or a variation on these. The choice of metrics can affect the results of graph clustering in the following step, and we should therefore chose them carefully [cite:@gibbonsEvaluationBLASTbasedEdgeweighting2015].
 **** Identification of gene families
 Based on the homology links between each pair of genes, we construct an undirected weighted graph whose vertices correspond to genes and whose edges corresponds to homology links between them.
 We apply a graph clustering algorithm on the homology gene graph in order to infer the gene families corresponding to densely connected communities of vertices.
 FTAG Finder proposes three graph clustering algorithm alternatives: single linkage, Markov Clustering [cite:@vandongenNewClusterAlgorithm1998] or Walktrap [cite:@ponsComputingCommunitiesLarge2005].
 **** Detection of TAG
 The final step of FTAG Finder consists in the identification of gls:TAG from the gene families and the positions of genes.
 For a given chromosome, the tool seeks genes belonging to the same family and located close to each other. The tool allows a maximal number of genes between the homologous genes, with a parameter set by the user. Cref:fig:tag-definitions is a schematic representation of some possible gls:TAG positioning on a genome associated with their definition in this FTAG Finder step.
 ** Analyses performed on TAG
 FTAG Finder output consist mostly in list of genes, corresponding to TAG of various definition. These list can be subsequently used as the basis of more specific statistical analysis.
 *** Are there over-represented gene functions among TAG
 The gls:GO describes biological concepts across three main classes: Cellular Component, Molecular Function and Biological Process. It describes a controlled vocabulary of concepts and the relationships between them. We can link genes with function annotation with particular GO terms. We can then perform an GO enrichment analysis to assess whether a particular GO term is over-represented in a particular gene list, compared to another. To do so, we can use a Fisher exact test, using the FDR (False Discovery Rate) control procedure of [[latex:textsc][Benjamini]] and [[latex:textsc][Hocheberg]].
 # Let $go$ be a GO term. We construct a contingency matrix based on the count of genes associated with this GO term (or associated with one of its brother GO term) for the reference gene list and the list of interest (here, the list of genes in a TAG) (see cref:tab:fisher-test-contigency-table).
 *** Are TAG located preferentially on specific chromosome region?
 *** Are there chromosomes enriched or depleted in TAG?
 *** Do genes located next to each other in a TAG share the same orientation?
 The concordance of two genes of a TAG falls in three possible cases: either both genes are on the same strand (\(\rightarrow \rightarrow\)), either they have a divergent orientation (\(\leftarrow \rightarrow\)), or a convergent one (\(\rightarrow \leftarrow\)). Graham conjectured that genes of a TAG that are close to each other would be more likely to share the same orientation, and it seems to be effectively the case [cite:@shojaRoadmapTandemlyArrayed2006].
 # To test this, we can use a $\Chi^2$ test of goodness of fit or a Student $t$-test.
 *** What is the robustness and accuracy of the detection method?
 [cite/t:@le-hoangEtudeTranscriptomiqueGenes2017] started analyzing the impact of parameter choice on FTAG Finder results. A more detailed benchmark of FTAG Finder in comparison with other methods on some controlled test dataset might be of particular interest.
 This would pose the challenge of homogenization of the outputs of the different methods.
 #+begin_export latex
 \flstop
 #+end_export
 * References
 :PROPERTIES:
 :UNNUMBERED: t
 :END:
 #+print_bibliography:
 #+begin_export latex
 \cleartoleftpage
 \clearpairofpagestyles
 #+end_export
-** Summary
+
 * Summary
 :PROPERTIES:
 :UNNUMBERED: t
 :END:
 Duplicate genes is an important feature of Eukaryotic genomes. They contribute to the plasticity of genome, hence to the capacity of species to evolve.
 Several mechanisms may lead to gene duplication. Among them, an unequal crossing-over leads to the formation of Tandemly Arrayed Genes (TAG) corresponding to homologous genes located one set after the other on the same chromosome.
 There are multiple methods for detecting duplicate genes from sequences. These methods vary in terms of the particular gene duplication mechanism they target, computational efficiency and ease of use.
 FTAG Finder is a simple Galaxy pipeline aiming at the detection of families of duplicate genes and the identification of TAG based on the proteome of a single species. FTAG Finder is developed in the /Laboratoire de Mathématiques et Modélisation d'Évry/, where I will do my internship.
 On the one hand, the aim of my internship is to extend the current Galaxy implementation of FTAG Finder with new export lists best suited to the analysis requirements of the laboratory. On the other hand, the objective of my internship will be to port the Galaxy pipeline on another scientific workflow manager better suited to reproducible analyses such as Snakemake and Nextflow.
 Then, the updated version of the FTAG Finder pipeline will be used to perform an analysis on the TAG of a model species, to assess its proper behavior. A benchmark of the pipeline will probably be run to compare the FTAG Finder with alternative published methods targetting duplicate genes and TAG in particular.
 * Bean :noexport:
 ** MCL
 MCL uses two operations on a stochastic matrix representation $M$ of the graph first derived from the adjacency matrix, namely /expansion/ and /inflation/. Expansion consists in elevating the matrix to a power $r$, and subsequently scaling its columns so that they sum to 1 again. The image of the inflation operator $\Gamma_r$ is defined as
@ -164,10 +299,25 @@ MCL uses two operations on a stochastic matrix representation $M$ of the graph f
 \]
 where $m$ is number of rows in the matrix, and $M_{pq}$ is the value in the $p, q$ cell of the matrix $M$.
-This operator strengthens the edges with higher weights and tend to anihilate edges with lower flow.
+This operator strengthens the edges with higher weights and tend to annihilate edges with lower flow.
 The application of both operator iteratively eventually ends up in a partition of the initial graph's edges into clusters of closely connected nodes (corresponding, in our case to gene families).
 ** Walktrap
 Principle: construct vertex communities based on where an agent would get stuck in a random walk.
-#  LocalWords:  speciation
+* Setup :noexport:
 #+name: startup
 #+begin_src emacs-lisp
 (org-babel-load-file "./setup.org")
 #+end_src
 #+RESULTS: startup
 : Loaded ./setup.el
 #  LocalWords:  speciation subfunctionalization neofunctionalization
 #  LocalWords:  pseudogenization bioinformatics
 # #  Local Variables:
 # #  eval: (progn (org-babel-goto-named-src-block "startup") (org-babel-execute-src-block) (outline-hide-sublevels 1))
 # #  End:
--- a/report.pdf
+++ b/report.pdf
--- a/setup.org
+++ b/setup.org
@ -0,0 +1,95 @@
 #+title: Standalone Emacs Config file for my report
 * I can set a variable at startup!
 Let's say I want this emacs-lisp code to be run at startup when I open my org file (say, for config purpose)
 #+begin_src emacs-lisp :tangle yes
 (setq lamme2024 "org powered")
 #+end_src
 #+RESULTS:
 : org powered
 To do so, add the following somewhere in your =report.org= file:
 #+begin_src org
 #+name: startup
 #+begin_src emacs-lisp
 (org-babel-load-file "./setup.org")
 #+end_src
 #+end_src
 And add the following at the end of your =report.org= file:
 #+begin_src org
 #  Local Variables:
 #  eval: (progn (org-babel-goto-named-src-block "startup") (org-babel-execute-src-block) (outline-hide-sublevels 1))
 #  End:
 #+end_src
 The next time you will open your file, you will be asked twice if org-babel should run the =startup= cell.
 See: [[https://emacs.stackexchange.com/a/41456/41374]]
 * Configure LaTeX template
 #+begin_src emacs-lisp :tangle yes
 (add-to-list 'org-latex-classes
          '("lamme2024"
             "\\documentclass{scrreprt}
             [NO-DEFAULT-PACKAGES]
             [EXTRA]
                \\makeindex
                \\makeglossaries          "
             ;("\\part{%s}" . "\\part*{%s}")
             ("\\chapter{%s}" . "\\chapter*{%s}")
             ("\\section{%s}" . "\\section*{%s}")
             ("\\subsection{%s}" . "\\subsection*{%s}")
             ("\\subsubsection{%s}" . "\\subsubsection*{%s}")
             ("\\paragraph{%s}" . "\\paragraph*{%s}")
             ("\\subparagraph{%s}" . "\\subparagraph*{%s}")))
 #+end_src
 #+RESULTS:
 | lamme2024 | \documentclass{scrreprt} |
 * Add latex: link for custom latex macro text wrapping
 #+begin_src emacs-lisp :tangle yes
 (org-add-link-type
  "latex" nil
  (lambda (path desc format)
   (cond
    ((eq format 'html)
     (format "<span class=\"%s\">%s</span>" path desc))
    ((eq format 'latex)
     (format "\\%s{%s}" path desc)))))
 #+end_src
 #+RESULTS:
 : Created latex link.
 #+begin_src emacs-lisp :tangle yes
 (org-add-link-type
  "latex" nil
  (lambda (path desc format)
   (cond
    ((eq format 'html)
     (format "<span class=\"%s\">%s</span>" path desc))
    ((eq format 'latex)
     (format "\\%s{%s}" path desc)))))
 #+end_src
 #+RESULTS:
 : Created latex link.
 * Extend org reference system
 #+begin_src emacs-lisp :tangle yes
 (require 'org-ref)
 (require 'org-ref-refproc)
 (add-hook 'org-export-before-parsing-hook #'org-ref-acronyms-before-parsing)
 (add-hook 'org-export-before-parsing-hook #'org-ref-glossary-before-parsing)
 #+end_src
 #+RESULTS:
 | org-ref-glossary-before-parsing | org-ref-acronyms-before-parsing | org-attach-expand-links |
--- a/sty/cleanthesis-footer.sty
+++ b/sty/cleanthesis-footer.sty
@ -0,0 +1,97 @@
 \RequirePackage[manualmark]{scrlayer-scrpage}
 \iffalse
 \renewcommand*\chaptermark[1]{%
    \markboth{\Ifnumbered{chapter}{\chaptermarkformat}{}}{#1}% <- outdated macro replaced
 }
 \AfterTOCHead[toc]{\markboth{}{\contentsname}}
 \fi
 \clearpairofpagestyles
 \clubpenalty = 10000
 \widowpenalty = 10000
 \automark[section]{part}
 \setlength{\footheight}{120pt}      % avoids scrlayer-scrpage warning:
 % footheight to low warning
 \setlength{\footskip}{185pt}        % BAD HACK that moves the foot downwards
 \KOMAoption{footwidth}{foot:53pt}   % BAD HACK that moves the foot towards
 \setkomafont{pagefoot}{\normalfont\footnotesize}
 \setkomafont{pagenumber}{\normalfont \fontfamily{\sfdefault}\selectfont \normalsize  \bfseries\color{black}}
 \renewcommand{\partmark}[1]{%
    \markboth{%
        % use \@chapapp instead of \chaptername to avoid
        % 'Chapter A Appendix ...', thanks to @farbverlust (issue #47)
        \fontfamily{\sfdefault}\selectfont
        {\color{fgBlue}\textbf{\partname\ \thepart}}%
        \quad%
        \protect\begin{minipage}[t]{.65\textwidth}%
            #1%
            \protect\end{minipage}%
    }{}%
 }
 \newlength{\lensectionnumber}
 \renewcommand{\sectionmark}[1]{%
    \markright{%
        \normalsize\fontfamily{\sfdefault}\selectfont\bfseries
        \setlength{\lensectionnumber}{0em}
        \settowidth{\lensectionnumber}{\textbf{\thesection}\quad}
        \protect\begin{minipage}[t]{.72\textwidth}%
            {\ }% bad hack to prevent a wrong baseline for the minipage
            \protect\raggedleft%
            \hangindent=\lensectionnumber%
            {\color{black}\textbf{\fontfamily{\sfdefault}\selectfont\thesection}}%
            \quad%
            #1%
            \protect\end{minipage}%
    }%
 }
 \newcommand{\ctfooterline}{%
    \color{black}\rule[-90pt]{1.25pt}{100pt}%
 }
 % Page number for odd (right) pages
 \newcommand{\ctfooterrightpagenumber}{%
    \ctfooterline%
    \hspace*{10pt}%
    \begin{minipage}[b]{1.5cm}%
        \pagemark\ %
    \end{minipage}%
 }
 %% Page number for even (left) pages
 \newcommand{\ctfooterleftpagenumber}{%
    \begin{minipage}[b]{1.5cm}%
        \raggedleft\pagemark%
    \end{minipage}%
    \hspace*{10pt}%
    \ctfooterline%
 }
 %% Defines the content for header and footer
 \lehead{}
 \cehead{}
 \rehead{}
 \lohead{}
 \cohead{}
 \rohead{}
 \lefoot[%   > plain
    \ctfooterleftpagenumber%
 ]{%         > srcheadings
    \ctfooterleftpagenumber%
    \hspace*{0.75cm}%
    %\headmark%
 }
 \cefoot{}
 \refoot{}
 \lofoot{}
 \cofoot{}
 \rofoot[%   > plain
    \ctfooterrightpagenumber%
 ]{%         > srcheadings
    %\headmark%
    \hspace*{0.75cm}%
    \ctfooterrightpagenumber%
 }
--- a/sty/floatlefttextright.sty
+++ b/sty/floatlefttextright.sty
@ -15,16 +15,13 @@
 \newcommand{\flstop}{
 	\boolfalse{flcontinue}
 }
-\newcommand\@fladdfig{\relax}
+\newcommand\@fladdfloat{\relax}
-\newcommand\fladdfig[1]{\global\long\def\@fladdfig{#1}}
+\newcommand\fladdfloat[1]{\global\long\def\@fladdfloat{#1}}
-\newcommand\@flputfig{\@fladdfig\fladdfig{\relax}}
+\newcommand\@flputfloat{\@fladdfloat\fladdfloat{\relax}}
 \newcommand\flblankpage{%
 	\null
 	\vfill
-	\begin{figure}[H]
+	\@flputfloat%
 		\centering
 		\@flputfig%
 	\end{figure}
 	\vfill
 	%\thispagestyle{empty}%
 	\clearpage%
@ -34,19 +31,39 @@
 	}
 }
 \newcommand{\fladdfig}[1]{
  \fladdfloat{
 	\begin{figure}[H]
 		\centering
 		#1
 	\end{figure}
  }
 }
 \newcommand{\fladdtab}[1]{
  \fladdfloat{
 	\begin{table}[H]
 	\centering
 	#1
 	\end{table}
  }
 }
 \iffalse
 	% Example
-	% Add this at the beggining of the document (typically after the titlepage)
+	% Add this at the beginning of the document (typically after the titlepage)
 	\flstart
 	% Then for each new figure you want to add, add the following
-	\fladdfig{%
+	\fladdfloat{%
-		\includegraphics[scale=1]{Image1}
+		\begin{figure}[H]
-		\caption{Test}
+			\includegraphics[scale=1]{Image1}
-		\label{Ima1}
+			\caption{Test}
 			\label{Ima1}
 		\end{figure}
 	}
 	% At the end of the document, to avoid a unwanted late terminated loop of empty page creation, add the following
-	flstop
+	\flstop
 \fi
--- a/sty/lamme2024.sty
+++ b/sty/lamme2024.sty
@ -1,4 +1,4 @@
-
+\RequirePackage{lettrine}
 % Font
 \usepackage{fontspec}
@ -12,9 +12,27 @@
 \RequirePackage{float}
 \RequirePackage{caption}
 \RequirePackage{subcaption}
 \RequirePackage{xcolor}
 \RequirePackage{csquotes}
 \RequirePackage[english]{babel}
 %% Caption
 \captionsetup{
    labelfont={color=black, bf},
    figurename=Figure,
    tablename=Table,
    font={sf},
    labelsep=period,
    justification=raggedright,
    singlelinecheck=false
 }
 % Colors
 \RequirePackage{xcolor}
 \definecolor{lamme2024-blue}{HTML}{008682}
 \colorlet{primary}{lamme2024-blue}
 \colorlet{primaryLink}{primary!60!blue}
 \colorlet{primaryCite}{red!70!primary}
 % Math
 \RequirePackage{amsmath}
 \RequirePackage{amsthm}
@ -27,30 +45,17 @@
 % References
 \usepackage[
 	maxcitenames=2,
 	maxbibnames=99, % show all authors in the cited part
 	style=authoryear-comp,
 	backend=biber,
 	citestyle=authoryear-comp,
 	backend=biber,
 	natbib=true
-]{biblatex}
+    ]{biblatex}
 \renewcommand\bibname{References}
 \RequirePackage{doi}
 \RequirePackage{xurl}
 % \AtEveryBibitem{\clearfield{number}}
 \DeclareSortingNamekeyScheme{
 	\keypart{
 		\namepart{given}
 	}
 	\keypart{
 		\namepart{prefix}
 	}
 	\keypart{
 		\namepart{family}
 	}
 	\keypart{
 		\namepart{suffix}
 	}
 }
 \RequirePackage{orcidlink}
 \RequirePackage[
@ -60,30 +65,65 @@
 ]{doclicense}
 \RequirePackage[
-	nameinlink,
+	%nameinlink,
 	noabbrev
-]{cleveref}
+	]{cleveref}
 \usepackage[
 	abbreviations,         % create "abbreviations" glossary
-	nomain,                % don't create "main" glossary
+	%nomain,                % don't create "main" glossary
 	stylemods=longbooktabs, % do the adjustments for the longbooktabs styles,
 	automake
 ]{glossaries-extra}
 \setabbreviationstyle[acronym]{long-short}
 \usepackage{hyperref}
 \hypersetup{
  colorlinks=true,
  urlcolor=primaryLink,
  linkcolor=primaryLink,
  anchorcolor=primaryLink,
  citecolor=primaryCite,
  %linktoc=page
 }
 \newcommand*{\glsplainhyperlink}[2]{%
    \begingroup%
      \hypersetup{hidelinks}%
      \hyperlink{#1}{#2}%
    \endgroup%
  }
 \let\oldgls=\gls
 \renewcommand{\gls}[1]{{\hypersetup{hidelinks}%
 	\oldgls{#1}}}%
 \let\oldGls=\Gls
 \renewcommand{\Gls}[1]{{\hypersetup{hidelinks}%
    \oldGls{#1}}}%
 \let\oldglspl=\glspl
 \renewcommand{\glspl}[1]{{\hypersetup{hidelinks}%
    \oldglspl{#1}}}%}
 \let\oldGlspl=\Glspl
 \renewcommand{\Glspl}[1]{{\hypersetup{hidelinks}%
    \oldGlspl{#1}}}%}
 %\renewcommand*{\glstextformat}[1]{\begingroup\hypersetup{hidelinks}#1\endgroup}
 %\renewcommand*{\glsdohyperlink}[2]{%
 % {\hypersetup{hidelinks=true}\hyperlink{#1}{#2}}}
 \renewcommand*{\glsentryfmt}{%
  \hypersetup{hidelinks}\glshyperlink[\glsgenentryfmt]{\glslabel}%
 }
 % Force text on right side, float on left side (does not work well)
 \usepackage{sty/floatlefttextright}
 \renewcommand\maketitle{\include{titlepage}}
 \hypersetup{
 	hidelinks
 }
 \renewcommand*{\mkbibnamefamily}[1]{\textsc{#1}}
 \renewcommand*{\mkbibnameprefix}[1]{\textsc{#1}}
@ -98,3 +138,6 @@
  }
 \usepackage{scrhack}
 \usepackage{sty/cleanthesis-footer}
 \usepackage{sty/scr-legrand-heading}
--- a/sty/scr-legrand-heading.sty
+++ b/sty/scr-legrand-heading.sty
@ -0,0 +1,7 @@
 \colorlet{headingcolor}{black}
 \renewcommand*{\sectionformat}{\llap{\textcolor{headingcolor}{\thesection}\hspace{1em}}}
 \renewcommand*{\chapterformat}{\llap{\textcolor{headingcolor}{\thechapter}\hspace{1em}}}
 \renewcommand*{\subsectionformat}{\llap{\textcolor{headingcolor}{\thesubsection}\hspace{1em}}}
--- a/summary.tex
+++ b/summary.tex
--- a/titlepage.tex
+++ b/titlepage.tex
@ -7,14 +7,10 @@
 	\begin{center}
 		\Huge Scientific Project
-		\Large Master GENIOMHE
+		\Large Master 1 GENIOMHE
 		\Large 2023--2024
 		\vspace{2cm}
 		\Large Samuel \textsc{Ortion} \orcidlink{0009-0001-0971-497X}
 		\vfill
 		\LARGE
@ -22,6 +18,13 @@
 		\@title
 		\makeatother
 		\vspace{2em}
 		\Large Samuel \textsc{Ortion} \orcidlink{0009-0001-0971-497X}
 		\vfill
 		\vfill
 		\normalsize
 		\begin{minipage}{12.5em}
Author	SHA1	Message	Date
Samuel Ortion	4043a8a72b	Remove duplicate gls entry	2024-04-19 22:09:59 +02:00
Samuel Ortion	6c0fdbf626	Add tag definition figure in SVG format	2024-04-19 21:46:30 +02:00
Samuel Ortion	a4bf5bd5ce	feat: Add duplicate gene fate in HTML export	2024-04-19 21:25:39 +02:00
Samuel Ortion	30807f2bd0	Add new references	2024-04-19 18:02:40 +02:00
Samuel Ortion	0860998ac0	Add summary	2024-04-19 17:56:27 +02:00
Samuel Ortion	447b03797c	feat: Support tables in floatlefttextright	2024-04-19 05:15:33 +02:00
Samuel Ortion	1d3803d6ea	Add a figure from Wikimedia that depicts duplicate genes fate	2024-04-18 18:22:59 +02:00
Samuel Ortion	b441d29ba8	feat: Add llap for heading number	2024-04-18 16:33:38 +02:00
Samuel Ortion	03f3efd668	Update	2024-04-18 13:17:51 +02:00
Samuel Ortion	366c732998	fix: unnumbered chapter is chapter*	2024-04-18 13:17:04 +02:00
Samuel Ortion	2110e31754	rephrase and switch back to link all toc entries, but uncolored	2024-04-18 12:53:14 +02:00
Samuel Ortion	921b5821a2	Link in toc, only on page numbers	2024-04-18 12:49:53 +02:00
Samuel Ortion	3264b79469	Update build utils	2024-04-18 12:48:49 +02:00
Samuel Ortion	25cf96e485	Change color of figures to match color palette	2024-04-18 12:42:10 +02:00
Samuel Ortion	5880678767	Change eye hurting green for LaMME blue	2024-04-18 12:09:58 +02:00
Samuel Ortion	3f7dcc62b2	feat: Update titlepage	2024-04-18 12:01:51 +02:00
Samuel Ortion	d96cb1542d	clear page style on last coverpage (summary)	2024-04-17 10:57:48 +02:00
Samuel Ortion	3d262aca30	rm: summary.tex	2024-04-17 10:50:43 +02:00
Samuel Ortion	e28102591d	Use cleanthesis pretty page numbering	2024-04-17 10:50:29 +02:00
Samuel Ortion	1bcb211e40	feat: Add some colors and title formatting - Hide links in glossary entries	2024-04-17 08:07:39 +02:00
Samuel Ortion	69b607afc6	feat: Add auto-loading bespoke org config fix: Typo review from v0.2	2024-04-16 18:44:26 +02:00
Samuel Ortion	bb191d4cf7	feat: Add a figure from Charles 2023 that represents the TAG definitions	2024-04-16 15:15:28 +02:00
Samuel Ortion	d1e898af47	fix: Some more typo	2024-04-16 13:28:30 +02:00
Samuel Ortion	b7e3d67ed9	Fix typos, amend glossary entries, improve phrasing	2024-04-16 10:44:29 +02:00