From 18203b1e4998940c569f3f90116ed7a19e607f73 Mon Sep 17 00:00:00 2001 From: Samuel Ortion Date: Tue, 19 Mar 2024 13:11:18 +0100 Subject: [PATCH] Longest common subsequence Try to use includeonly to limit compilation time --- .latexmkrc | 41 +++++++ Makefile | 6 - content/chapters/include.tex | 5 +- content/chapters/part1/2.tex | 224 +++++++++++++++++------------------ content/chapters/part1/3.tex | 2 +- content/chapters/part1/4.tex | 113 ++++++++++++++++++ content/chapters/part2/0.tex | 24 ++++ figures/part2/* | 176 +++++++++++++++++++++++++++ figures/part2/lcsq.lua | 181 ++++++++++++++++++++++++++++ figures/part2/lcsq.pdf | 3 + figures/part2/lcsq.tex | 18 +++ folder-structure.sh | 8 ++ main.pdf | 4 +- main.tex | 15 ++- tmp.pdf | 4 +- tmp.tex | 50 +++++++- 16 files changed, 739 insertions(+), 135 deletions(-) create mode 100644 .latexmkrc create mode 100644 content/chapters/part1/4.tex create mode 100644 content/chapters/part2/0.tex create mode 100644 figures/part2/* create mode 100644 figures/part2/lcsq.lua create mode 100644 figures/part2/lcsq.pdf create mode 100644 figures/part2/lcsq.tex create mode 100644 folder-structure.sh diff --git a/.latexmkrc b/.latexmkrc new file mode 100644 index 0000000..9263c76 --- /dev/null +++ b/.latexmkrc @@ -0,0 +1,41 @@ +sub createFolderStructure{ + system("bash ./createFolderStructure.sh"); +} + +createFolderStructure(); + +$hash_calc_ignore_pattern{aux} = +'^\\\\gdef\\\\minted@oldcachelist\{,' + . '|^\s*default\.pygstyle,' + . '|^\s*[[:xdigit:]]+\.pygtex'; + +$pdflatex = +'lualatex -shell-escape -file-line-error -interaction=nonstopmode -synctex=1 -output-directory=build %O ' + . '\'\PassOptionsToPackage{outputdir=build}{minted}\input{%S}\''; +$aux_dir = 'build'; +$bibtex_use = 2; + +# Amend cleaned extensions +$clean_ext .= " fdb_latexmk run.xml synctex.gz"; + +# Make latexmk quiet +$latexmk_silent = 1; + +# Makeglossaries +add_cus_dep( 'acn', 'acr', 0, 'makeglossaries' ); +add_cus_dep( 'glo', 'gls', 0, 'makeglossaries' ); +$clean_ext .= " acr acn alg glo gls glg"; + +sub makeglossaries { + my ( $base_name, $path ) = fileparse( $_[0] ); + my @args = ( "-q", "-d", $path, $base_name ); + if ($silent) { unshift @args, "-q"; } + return system "makeglossaries", "-d", $path, $base_name; +} + +sub biber { + my ( $base_name, $path ) = fileparse( $_[0] ); + my @args = ( "--output-directory", $path, $base_name ); + return system "biber", @args; +} + diff --git a/Makefile b/Makefile index 2b85e32..e69de29 100755 --- a/Makefile +++ b/Makefile @@ -1,6 +0,0 @@ -options=-shell-escape -interaction=nonstopmode -file-line-error - -all: main.pdf - -%.pdf: %.tex - lualatex $(options) $< \ No newline at end of file diff --git a/content/chapters/include.tex b/content/chapters/include.tex index b59250b..d247ea5 100755 --- a/content/chapters/include.tex +++ b/content/chapters/include.tex @@ -6,13 +6,12 @@ \foreach \i in {0, ..., #2} {% \edef\FileName{content/chapters/#1/\i}% \IfFileExists{\FileName}{% - \input{\FileName}% + \include{\FileName}% } } } \includechapters{part1}{3} - -% \includechapters{part2}{2} +\includechapters{part2}{2} % \includechapters{part3}{1} diff --git a/content/chapters/part1/2.tex b/content/chapters/part1/2.tex index f4f558d..c5ae351 100644 --- a/content/chapters/part1/2.tex +++ b/content/chapters/part1/2.tex @@ -28,7 +28,7 @@ Let $S = $ ACGUUACGUU. Let's write the comparison matrix. \begin{table} - \includegraphics{figures/part1/comparison_matrix_repetitions.pdf} + \includegraphics{./figures/part1/comparison_matrix_repetitions.pdf} \caption{Comparison matrix for $seq = $``ACGUUACGUUGUU"} \end{table} @@ -40,13 +40,13 @@ Let $S = $ ACGUUACGUU. Let's write the comparison matrix. \Function{ComparisonMatrix}{$S$: Array($n$)} \State $M \gets $ Array($n$, $n$) \For{($i = 0$; $i < n$; $i++$)} - \For{$j = 0$; $j < n$; $j++$} - \If {$S[i] = S[j]$} - \State $M[i][j] = 1$ - \Else - \State $M[i][j] = 0$ - \EndIf - \EndFor + \For{$j = 0$; $j < n$; $j++$} + \If {$S[i] = S[j]$} + \State $M[i][j] = 1$ + \Else + \State $M[i][j] = 0$ + \EndIf + \EndFor \EndFor \State \Return $M$ \EndFunction @@ -59,13 +59,13 @@ Let $S = $ ACGUUACGUU. Let's write the comparison matrix. \Function{ComparisonMatrix}{$S$: Array($n$)} \State $M \gets$ Array($n$,$n$) \For{($i = 0$; $i < n$; $i++$)} - \For{j=i; j < n; j++} - \If {S[i] = S[j]} - \State M[i][j] = 1 - \Else - \State M[i][j] = 0 - \EndIf - \EndFor + \For{j=i; j < n; j++} + \If {S[i] = S[j]} + \State M[i][j] = 1 + \Else + \State M[i][j] = 0 + \EndIf + \EndFor \EndFor \State \Return M \EndFunction @@ -83,18 +83,18 @@ Let $S = $ ACGUUACGUU. Let's write the comparison matrix. \State $pos = \{\}$ \State $visited = \{\}$ \For {($i_{start} = 0$; $i_{start} < n$; $i_{start}++$)} - \For {($j_{start} = i_{start}+1$; $j_{start} < n$; $j_{start}++$)} - \If{$M[i_{start}][j_{start}] = 1$ and $(i_{start}, j_{start}) \notin visited$} - \State $i = i_{start}$ - \State $j = j_{start}$ - \While {$M[i][j] = 1$} - \State $i++$ - \State $j++$ - \State $visited = visited \cup \{(i, j)\}$ - \EndWhile - \State $pos = pos \cup \{(i_{start}, i), (j_{start},j)\}$ - \EndIf - \EndFor + \For {($j_{start} = i_{start}+1$; $j_{start} < n$; $j_{start}++$)} + \If{$M[i_{start}][j_{start}] = 1$ and $(i_{start}, j_{start}) \notin visited$} + \State $i = i_{start}$ + \State $j = j_{start}$ + \While {$M[i][j] = 1$} + \State $i++$ + \State $j++$ + \State $visited = visited \cup \{(i, j)\}$ + \EndWhile + \State $pos = pos \cup \{(i_{start}, i), (j_{start},j)\}$ + \EndIf + \EndFor \EndFor \EndFunction \end{algorithmic} @@ -109,22 +109,22 @@ Let $S = $ ACGUUACGUU. Let's write the comparison matrix. \State $M$ = \Call{ComparisonMatrix}{S} \State $pos = \{\}$ \For {($diag = 1$; $diag < n$; $diag++$)} - \State $j = diag$ - \State $i = 0$ - \While {$i < n$ and $j < n$} - \If {$M[i][j] = 1$} - \State $i_{start} = i$ - \State $j_{start} = j$ - \While {$i < n$ and $j < n$ and $M[i][j] = 1$} - \State i++ - \State j++ - \EndWhile - \State $pos = pos \cup \{((i_{start},i-1),(j_{start},j-1))\}$ - \EndIf - \State $i++$ - \State $j++$ - \State - \EndWhile + \State $j = diag$ + \State $i = 0$ + \While {$i < n$ and $j < n$} + \If {$M[i][j] = 1$} + \State $i_{start} = i$ + \State $j_{start} = j$ + \While {$i < n$ and $j < n$ and $M[i][j] = 1$} + \State i++ + \State j++ + \EndWhile + \State $pos = pos \cup \{((i_{start},i-1),(j_{start},j-1))\}$ + \EndIf + \State $i++$ + \State $j++$ + \State + \EndWhile \EndFor \EndFunction \end{algorithmic} @@ -138,24 +138,24 @@ Let $S = $ ACGUUACGUU. Let's write the comparison matrix. \State $M$ = \Call{ComparisonMatrix}{S} \State $pos = \{\}$ \For {($diag = 1$; $diag < n$; $diag++$)} - \State $j = diag$ - \State $i = 0$ - \State $l = 0$ - \While {$i < n$ and $j < n$} - \If {$M[i][j] = 1$} - \State $l++$ - \Else - \If {$l > 0$} - \State $pos = pos \cup \{(i-l,j-l,l)\}$ - \State $l = 0$ - \EndIf - \EndIf - \State $i++$ - \State $j++$ - \EndWhile - \If {$l > 0$} - \State $pos = pos \cup \{((i-l,j-l,l))\}$ - \EndIf + \State $j = diag$ + \State $i = 0$ + \State $l = 0$ + \While {$i < n$ and $j < n$} + \If {$M[i][j] = 1$} + \State $l++$ + \Else + \If {$l > 0$} + \State $pos = pos \cup \{(i-l,j-l,l)\}$ + \State $l = 0$ + \EndIf + \EndIf + \State $i++$ + \State $j++$ + \EndWhile + \If {$l > 0$} + \State $pos = pos \cup \{((i-l,j-l,l))\}$ + \EndIf \EndFor \State \Return $pos$ \EndFunction @@ -171,18 +171,18 @@ Let $S = $ ACGUUACGUU. Let's write the comparison matrix. \State $M$ = \Call{ComparisonMatrix}{S} \State $pos = \{\}$ \For {$i_{start} = 0$; $i_{start} < n$; $i_{start}++$} - \For {$j_{start} = i_{start}+1$; $j_{start} < n$; $j_{start}++$} - \If{$M[i_{start}][j_{start}] = 1$} - \State $i = i_{start}$ - \State $j = j_{start}$ - \While {$M[i][j] = 1$} - \State $M[i][j] = 0$ \Comment{Ensure that the segment is not explored again} - \State $i++$ - \State $j++$ - \EndWhile - \State $pos = pos \cup \{((i_{start}, i-1), (j_{start},j-1))\}$ - \EndIf - \EndFor + \For {$j_{start} = i_{start}+1$; $j_{start} < n$; $j_{start}++$} + \If{$M[i_{start}][j_{start}] = 1$} + \State $i = i_{start}$ + \State $j = j_{start}$ + \While {$M[i][j] = 1$} + \State $M[i][j] = 0$ \Comment{Ensure that the segment is not explored again} + \State $i++$ + \State $j++$ + \EndWhile + \State $pos = pos \cup \{((i_{start}, i-1), (j_{start},j-1))\}$ + \EndIf + \EndFor \EndFor \EndFunction \end{algorithmic} @@ -215,23 +215,23 @@ An automaton is a tuple $\langle S, s_{0}, T, \Sigma,f\rangle$ \caption{Check wether a word belong to a language for which we have an automaton} \begin{algorithmic}[1] \Function{WordInLanguage}{$W$: Array($n$), $A$: $\langle S, s_{0}, T, \Sigma,f \rangle$} - \Returns{A Boolean valued to \True{} if the word is recognized by the language automaton} - \State $s \gets s_{0}$ - \State $i \gets 0$ - \While {$i < n$} - \State $a \gets W[i]$ - \If {$\exists f(s, a)$} - \State $s \gets f(s, a)$ - \Else - \State \Return \False - \EndIf - \State i++ - \EndWhile - \If {$s \in T$} - \State \Return \True - \Else - \State \Return \False - \EndIf + \Returns{A Boolean valued to \True{} if the word is recognized by the language automaton} + \State $s \gets s_{0}$ + \State $i \gets 0$ + \While {$i < n$} + \State $a \gets W[i]$ + \If {$\exists f(s, a)$} + \State $s \gets f(s, a)$ + \Else + \State \Return \False + \EndIf + \State i++ + \EndWhile + \If {$s \in T$} + \State \Return \True + \Else + \State \Return \False + \EndIf \EndFunction \end{algorithmic} \end{algorithm} @@ -248,34 +248,34 @@ The suffix language of $S$ is $\{S, ACTACT, CTACT, TACT, ACT, CT, T\}$. \begin{figure} \centering - \includegraphics{figures/part1/minimal_suffix_automaton_exercise.pdf} + \includegraphics{./figures/part1/minimal_suffix_automaton_exercise.pdf} \caption{Suffix automaton for $S = $ AACTACT} \end{figure} \begin{figure} \centering - \includegraphics{figures/part1/minimal_suffix_automaton_exercise_bis.pdf} + \includegraphics{./figures/part1/minimal_suffix_automaton_exercise_bis.pdf} \caption{Suffix automaton for $S = $ TCATCATT} \end{figure} - \begin{algorithm} - \caption{Check if a sequences matches a motif, from a suffix automaton $\mathcal{O}(m)$, built from the automaton} - \begin{algorithmic}[1] - \Function{CheckMotifInSuffixAutomaton}{$W$: Array($m$), $A$: $\langle S, s_{0}, T, \Sigma,f \rangle$} - \Returns{Boolean valued to \True{} if the motif is in the sequence} - \State $s \gets s_{0}$ - \State $i \gets 0$ - \While {$i < m$ and $\exists f(s, W[i])$} - \State $s \gets f(s, W[i])$ - \State $i++$ - \EndWhile - \If {$i=n$} - \State \Return \True - \Else - \State \Return \False - \EndIf - \EndFunction - \end{algorithmic} - \end{algorithm} - The complexity of the pattern matching algorithm is $\mathcal{O}(n + m)$, because building the automaton is $\mathcal{O}(m)$ +\begin{algorithm} + \caption{Check if a sequences matches a motif, from a suffix automaton $\mathcal{O}(m)$, built from the automaton} + \begin{algorithmic}[1] + \Function{CheckMotifInSuffixAutomaton}{$W$: Array($m$), $A$: $\langle S, s_{0}, T, \Sigma,f \rangle$} + \Returns{Boolean valued to \True{} if the motif is in the sequence} + \State $s \gets s_{0}$ + \State $i \gets 0$ + \While {$i < m$ and $\exists f(s, W[i])$} + \State $s \gets f(s, W[i])$ + \State $i++$ + \EndWhile + \If {$i=n$} + \State \Return \True + \Else + \State \Return \False + \EndIf + \EndFunction + \end{algorithmic} +\end{algorithm} +The complexity of the pattern matching algorithm is $\mathcal{O}(n + m)$, because building the automaton is $\mathcal{O}(m)$ diff --git a/content/chapters/part1/3.tex b/content/chapters/part1/3.tex index 1bd7525..bddbe5a 100644 --- a/content/chapters/part1/3.tex +++ b/content/chapters/part1/3.tex @@ -4,7 +4,7 @@ Let $M$ be a motif $M = $ ACAT. \begin{figure} \centering - \includegraphics{figures/part1/motif_search_automaton.pdf} + \includegraphics{./figures/part1/motif_search_automaton.pdf} \caption{Motif search automaton for $M = $ ACAT} \end{figure} diff --git a/content/chapters/part1/4.tex b/content/chapters/part1/4.tex new file mode 100644 index 0000000..77b4fd2 --- /dev/null +++ b/content/chapters/part1/4.tex @@ -0,0 +1,113 @@ +\chapter{Longest common subsequence} + +Let $S_{1} = \text{ATCTGAT}$ and $S_{2} = \text{TGCATA}$. +In this case the longest common subsequence of $S_{1}$ and $S_{2}$ is $TCTA$. +\begin{algorithm} + \caption{Construct a longest common subsequence matrix} + \begin{algorithmic}[1] + \Function{LCSQ\_Matrix}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)} + \State $M \gets $ Array($m+1$, $n+1$) + \For{($i = 0$; $i < n+1$; $i++$)} + \For{$j = 0$; $j < m+1$; $j++$} + \If {$i = 0$ or $j = 0$} + \State $M[i][j] = 0$ + \Else + \If {$S_{1}[i] = S_{2}[j]$} + \State $match = M[i-1][j-1] + 1$ + \Else + \State $match = M[i-1][j-1]$ + \EndIf + \State $gap_{1} = M[i-1][j]$ + \State $gap_{2} = M[i][j-1]$ + \State $M[i][j] = \max \{ match, gap_{1}, gap_{2}\}$ + \EndIf + \EndFor + \EndFor + \State \Return $M$ + \EndFunction + \end{algorithmic} +\end{algorithm} + +\begin{algorithm} + \caption{Construct a longest common subsequence matrix keeping the path in memory} + \begin{algorithmic}[1] + \Function{LCSQ\_Matrix\_Path}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)} + \State $M \gets $ Array($m+1$, $n+1$) + \State $P \gets $ Array($m+1$, $n+1$) + \For {($i = 0$; $i < n+1$, $i++$)} + \State $M[i][0] \gets 0$ + \EndFor + \For {($j = 0$; $j < m+1$; $j+$)} + \State $M[0][j] \gets 0$ + \EndFor + \For{($i = 1$; $i < n+1$; $i++$)} + \For{($j = 1$; $j < m+1$; $j++$)} + \If {$i = 1$ or $j = 0$} + \State $M[i][j] = 0$ + \Else + \If {$S_{1}[i-1] = S_{2}[j-1]$} + \State $M[i][j] \gets M[i-1][j-1] + 1$ + \State $P[i][j] \gets '\nwarrow'$ + \ElsIf {$M[i][j-1] \geq M[i-1][j]$} + \State $M[i][j] \gets M[i][j-1]$ + \State $P[i][j] \gets '\leftarrow'$ + \Else + \State $M[i][j] \gets M[i-1][j]$ + \State $P[i][j] \gets '\downarrow'$ + \EndIf + \EndFor + \EndFor + \State \Return $M, P$ + \EndFunction + \end{algorithmic} +\end{algorithm} + +\begin{algorithm} + \caption{Backtrack the longest common subsequence} + \begin{algorithmic}[1] + \Function{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)} + \State $M, P \gets $ \Call{LCSQ\_Matrix}{$S_{1}$, $S_{2}$} + \State $L \gets Array(M[n][m])$ + \State $k \gets 0$ + \State $i \gets n$ + \State $j \gets m$ + \While{$i > 0$ and $j > 0$} + \If {$P[i][j] = '\nwarrow' $} + \State $L[k] \gets S_{1}[i]$ + \State $i--$ + \State $j--$ + \State $k++$ + \ElsIf {$P[i][j] = '\leftarrow'$} + \State $j--$ + \Else + \State $i--$ + \EndIf + \EndWhile + \State \Return $L$ + \EndFunction + \end{algorithmic} +\end{algorithm} + +\begin{algorithm} + \caption{Recursive reconstruction of the longest common subsequence} + \begin{algorithmic}[1] + \Procedure{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)} + \State $M, P \gets $ \Call{LCSQ\_Matrix}{$S_{1}$, $S_{2}$} + \State $i \gets n$ + \State $j \gets m$ + \State \Call{Aux}{$P$, $S_{1}$, $i$, $j$} + \EndProcedure + + \Procedure{Aux}{$P$: Array($n+1$, $m+1$), $S_{1}$: Array($n$), $i$, $j$} + \If {$P[i][j] = '\nwarrow' $} + \State $l \gets S_{1}[i]$ + \State \Call{Aux}{$P$, $S_{1}$, $i-1$, $j-1$} + \State \texttt{print}($l$) + \ElsIf {$P[i][j] = '\leftarrow'$} + \State \Call{Aux}{$P$, $S_{1}$, $i$, $j-1$} + \Else + \State \Call{Aux}{$P$, $S_{1}$, $i-1$, $j$} + \EndIf + \EndProcedure + \end{algorithmic} +\end{algorithm} diff --git a/content/chapters/part2/0.tex b/content/chapters/part2/0.tex new file mode 100644 index 0000000..abdcf13 --- /dev/null +++ b/content/chapters/part2/0.tex @@ -0,0 +1,24 @@ +\part{Sequence alignment} + +\section{Simililarity between sequences} + +A function $d$ is a distance between two sequences $x$ and $y$ in an alphabet $\Sigma$ if +\begin{itemize} + \item $x, y \in \Sigma^{*}, d(x, x) = 0$ + \item $\forall x, y \in \Sigma^{*}$ $d(x,y) = d(y,x)$ + \item $\forall x, y, z \in \Sigma^{*}$ $d(x, z) \leq d(x, y) + d(x, z)$ +\end{itemize} + +Here we are interested by the distance that is able to represent the transformation of $x$ to $y$ using three types of basic operations: +\begin{itemize} + \item Substition + \item Insertion + \item Deletion +\end{itemize} + +Example: +\begin{itemize} +\item $sub(a, b) = \begin{cases} 0 & \text{if} a = b \\ 1 &\text{otherwise} \end{cases}$. +\item $del(a) = 1$ +\item $ins(a) = 1$ +\end{itemize} diff --git a/figures/part2/* b/figures/part2/* new file mode 100644 index 0000000..07994eb --- /dev/null +++ b/figures/part2/* @@ -0,0 +1,176 @@ +function lcsq_matrix(seq1, seq2) + local gap_penalty = 0 + local match_score = 1 + local n1 = string.len(seq1) + local n2 = string.len(seq2) + -- Create a n1 x n2 matrix + local matrix = {} + for i=0,n1 do + matrix[i] = {} + for j=0,n2 do + matrix[i][j] = 0 + end + end + -- Fill the rest of the matrix + local match, delete, insert + for i=1,n1 do + for j=1,n2 do + if string.sub(seq1, i, i) == string.sub(seq2, j, j) then + match = matrix[i-1][j-1] + match_score + else + match = matrix[i-1][j-1] + end + gap1 = matrix[i-1][j] + gap_penalty + gap2 = matrix[i][j-1] + gap_penalty + matrix[i][j] = math.max(match, gap1, gap2) + end + end + return matrix +end + +local function has_value (tab, val) + for index, value in ipairs(tab) do + if value == val then + return true + end + end + + return false +end + +function repr_matrix(matrix) + repr = "" + for i=1,#matrix do + for j=1,#matrix do + repr = repr .. matrix[i][j] .. " " + end + repr = repr .. "\n" + end + return repr +end + + +function draw_lcsq_matrix_graph(seq1, seq2) + local matrix = lcsq_matrix(seq1, seq2) + local tikz_code = "" + function coordinate(i, j) + return i .. "_" .. j + end + local steps = { + {-1, 0}, + {-1, -1}, + {0, -1} + } + + local n1 = string.len(seq1) + local n2 = string.len(seq2) + local path = {} + local i = n1 + local j = n2 + while i >= 0 and j >= 0 do + path[#path+1] = coordinate(i, j) + local min = matrix[i][j] + local min_step = steps[1] + for index, step in ipairs(steps) do + local k = i + step[1] + local l = j + step[2] + if k >= 0 and l >= 0 and matrix[k][l] <= min then + min_step = step + min = matrix[k][l] + end + end + i = i + min_step[1] + j = j + min_step[2] + print(i, j) + end + -- Draw the matrix as tikz node with matrix value + for i=0,n1 do + for j=0,n2 do + local options = "" + if has_value(path, coordinate(i, j)) then + + options = "[fill=gray, draw, minimum size=1]" + end + tikz_code = tikz_code .. "\\node" .. options .. " (" .. coordinate(i, j) .. ") at (" .. i .. ", " .. -j .. ")" .. " {" .. matrix[i][j] .. "};" + end + end + -- Add nucleotide labels + for i=1,n1 do + local nt = string.sub(seq1, i, i) + tikz_code = tikz_code .. "\\node at (".. i .. "," .. 1 .. ")" .. "{$" .. nt .."$};" + end + for i=1,n2 do + local nt = string.sub(seq2, i, i) + tikz_code = tikz_code .. "\\node at (" .. -1 .. ", " .. -i .. ")" .. "{$ ".. nt .."$};" + end + -- For seq2 + for i=0,n1 do + for j=0,n2 do + local min = math.huge + for index, step in ipairs(steps) do + local k = i + step[1] + local l = j + step[2] + if k >= 0 and l >= 0 and matrix[k][l] < min then + min = matrix[k][l] + end + end + local highlighted = false + for index, step in ipairs(steps) do + local k = i + step[1] + local l = j + step[2] + if k >= 0 and l >= 0 and matrix[k][l] == min then + tikz_code = tikz_code .. "\\draw[->] (" .. coordinate(i, j) .. ")" .. " -- " .. "(" .. coordinate (k, l) .. ");" + end + end + end + end + return tikz_code +end + +function draw_lcsq_matrix(seq1, seq2) + -- print(string.format(" Path: %s -> %s", seq1, seq2)) + local matrix = lcsq_matrix(seq1, seq2) + local n1 = string.len(seq1) + local n2 = string.len(seq2) + -- Draw the matrix as tikz nodes + for i=0,n1-1 do + for j=0,n2-1 do + print(string.format("\\node[draw, minimum width=1cm, minimum height=1cm] at (%d, -%d) {};", i, j, matrix[i][j])) + end + end + -- Draw the sequence labels + for i=1,n1 do + print(string.format("\\node at (%d, -%d) {%s};", i-1, -1, string.sub(seq1, i, i))) + end + for i=1,n2 do + print(string.format("\\node at (%d, -%d) {%s};", -1, i-1, string.sub(seq2, i, i))) + end + -- Add a path from the bottom right corner to the top left corner, following the minimum of the three possible moves at each step + local i, j, value, previous_value + i = n1-1 + j = n2-1 + print(string.format("\\draw[-,line width=2, gray] (%d, -%d) --", i, j)) + while i > 0 and j > 0 do + value = math.min(matrix[i-1][j-1], table[i-1][j], table[i][j-1]) + if value == matrix[i-1][j-1] then + i = i - 1 + j = j - 1 + elseif value == matrix[i-1][j] then + i = i - 1 + else + j = j - 1 + end + print(string.format(" (%d, -%d) -- ", i, j)) + end + print(string.format("(0, 0) -- (-1, 1);", i, j)) +end + +function main() + local seq1 = "ATCTGAT" + local seq2 = "TGCATA" + + local matrix = lcsq_matrix(seq1, seq2) + print(repr_matrix(matrix)) +end + +main() diff --git a/figures/part2/lcsq.lua b/figures/part2/lcsq.lua new file mode 100644 index 0000000..fc42a03 --- /dev/null +++ b/figures/part2/lcsq.lua @@ -0,0 +1,181 @@ +function lcsq_matrix(seq1, seq2) + local gap_penalty = 0 + local match_score = 1 + local n1 = string.len(seq1) + local n2 = string.len(seq2) + -- Create a n1 x n2 matrix + local matrix = {} + for i=0,n1 do + matrix[i] = {} + for j=0,n2 do + matrix[i][j] = 0 + end + end + -- Fill the rest of the matrix + local match, delete, insert + for i=1,n1 do + for j=1,n2 do + if string.sub(seq1, i, i) == string.sub(seq2, j, j) then + match = matrix[i-1][j-1] + match_score + else + match = matrix[i-1][j-1] + end + gap1 = matrix[i-1][j] + gap_penalty + gap2 = matrix[i][j-1] + gap_penalty + matrix[i][j] = math.max(match, gap1, gap2) + end + end + return matrix +end + +local function has_value (tab, val) + for index, value in ipairs(tab) do + if value == val then + return true + end + end + + return false +end + +function repr_matrix(matrix) + repr = "" + for i=0,#matrix do + for j=0,#matrix[i] do + repr = repr .. matrix[i][j] .. " " + end + repr = repr .. "\n" + end + return repr +end + + +function draw_lcsq_matrix_graph(seq1, seq2, matrix) + local tikz_code = "" + function coordinate(i, j) + return i .. "_" .. j + end + local steps = { + {-1, -1}, + {0, -1}, + {-1, 0}, + } + + local n1 = string.len(seq1) + local n2 = string.len(seq2) + local path = {} + local i = n1 + local j = n2 + while i >= 0 and j >= 0 do + path[#path+1] = coordinate(i, j) + local max = matrix[i][j] + local max_step = steps[1] + for index, step in ipairs(steps) do + local k = i + step[1] + local l = j + step[2] + if k >= 0 and l >= 0 and matrix[k][l] > max then + max_step = step + max = matrix[k][l] + end + end + i = i + max_step[1] + j = j + max_step[2] + end + -- Draw the matrix as tikz node with matrix value + for i=0,n1 do + for j=0,n2 do + local options = "" + if has_value(path, coordinate(i, j)) then + + options = "[fill=gray, draw, minimum size=1]" + end + tikz_code = tikz_code .. "\\node" .. options .. " (" .. coordinate(i, j) .. ") at (" .. i .. ", " .. -j .. ")" .. " {" .. matrix[i][j] .. "};" + end + end + -- Add nucleotide labels + for i=1,n1 do + local nt = string.sub(seq1, i, i) + tikz_code = tikz_code .. "\\node at (".. i .. "," .. 1 .. ")" .. "{$" .. nt .."$};" + end + for i=1,n2 do + local nt = string.sub(seq2, i, i) + tikz_code = tikz_code .. "\\node at (" .. -1 .. ", " .. -i .. ")" .. "{$ ".. nt .."$};" + end + -- For seq2 + for i=0,n1 do + for j=0,n2 do + local max = 0 + for index, step in ipairs(steps) do + local k = i + step[1] + local l = j + step[2] + if k >= 0 and l >= 0 and matrix[k][l] > max then + max = matrix[k][l] + end + end + local highlighted = false + for index, step in ipairs(steps) do + local k = i + step[1] + local l = j + step[2] + if k >= 0 and l >= 0 and matrix[k][l] == max then + tikz_code = tikz_code .. "\\draw[->] (" .. coordinate(i, j) .. ")" .. " -- " .. "(" .. coordinate (k, l) .. ");" + end + end + end + end + return tikz_code +end + +function draw_lcsq_matrix(seq1, seq2) + -- print(string.format(" Path: %s -> %s", seq1, seq2)) + local matrix = lcsq_matrix(seq1, seq2) + local n1 = string.len(seq1) + local n2 = string.len(seq2) + local repr = "" + -- Draw the matrix as tikz nodes + for i=0,n1-1 do + for j=0,n2-1 do + repr = repr .. " " .. string.format("\\node[draw, minimum width=1cm, minimum height=1cm] at (%d, -%d) {};", i, j, matrix[i][j]) + end + end + -- Draw the sequence labels + for i=1,n1 do + repr = repr .. " " .. string.format("\\node at (%d, -%d) {%s};", i-1, -1, string.sub(seq1, i, i)) + end + for i=1,n2 do + repr = repr .. " " .. string.format("\\node at (%d, -%d) {%s};", -1, i-1, string.sub(seq2, i, i)) + end + -- Add a path from the bottom right corner to the top left corner, following the minimum of the three possible moves at each step + local i, j, value, previous_value + i = n1-1 + j = n2-1 + repr = repr .. " " string.format("\\draw[-,line width=2, gray] (%d, -%d) --", i, j) + while i > 0 and j > 0 do + value = math.min(matrix[i-1][j-1], matrix[i-1][j], matrix[i][j-1]) + if value == matrix[i-1][j-1] then + i = i - 1 + j = j - 1 + elseif value == matrix[i-1][j] then + i = i - 1 + else + j = j - 1 + end + repr = repr .. " " .. string.format(" (%d, -%d) -- ", i, j) + end + repr = repr .. " " .. string.format("(0, 0) -- (-1, 1);", i, j) + return repr +end + +function main() + local seq1 = "ATCTGAT" + local seq2 = "TGCATA" + local matrix = lcsq_matrix(seq1, seq2) + print(draw_lcsq_matrix_graph(seq1, seq2, matrix)) +end + +-- main() + +return { + lcsq_matrix=lcsq_matrix, + draw_lcsq_matrix_graph=draw_lcsq_matrix_graph, + draw_lcsq_matrix=draw_lcsq_matrix +} diff --git a/figures/part2/lcsq.pdf b/figures/part2/lcsq.pdf new file mode 100644 index 0000000..c035058 --- /dev/null +++ b/figures/part2/lcsq.pdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67daf12bd2ddbdcfe28189e46d7e2338a73b897c98bf7097350d627375e423a3 +size 14515 diff --git a/figures/part2/lcsq.tex b/figures/part2/lcsq.tex new file mode 100644 index 0000000..2c0ec70 --- /dev/null +++ b/figures/part2/lcsq.tex @@ -0,0 +1,18 @@ +\documentclass[tikz]{standalone} + +\usepackage{tikz} +\usepackage{luatextra} +\begin{document} + +\begin{tikzpicture} + \begin{luacode} + lcsq = require('lcsq') + seq2 = "ATCTGAT" + seq1 = "TGCATA" + matrix = lcsq.lcsq_matrix(seq1, seq2) + tikz_code = lcsq.draw_lcsq_matrix_graph(seq1, seq2, matrix) + tex.print(tikz_code) + \end{luacode} +\end{tikzpicture} + +\end{document} diff --git a/folder-structure.sh b/folder-structure.sh new file mode 100644 index 0000000..a7b6dc5 --- /dev/null +++ b/folder-structure.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +find ./content -type d > folder_list.txt + +mkdir -p build +cd build +cat ../folder_list.txt | xargs mkdir -p +rm ../folder_list.txt \ No newline at end of file diff --git a/main.pdf b/main.pdf index 37a5a01..e2afaaf 100644 --- a/main.pdf +++ b/main.pdf @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f7d399a14f4887c141303c27212572571690acfe832b36202fb3758bf5f76992 -size 279254 +oid sha256:73b2634cfd937c1997c40889d185922ba2608c8cc2c8d4bb59afecf1ed588692 +size 295634 diff --git a/main.tex b/main.tex index 974f978..65f6530 100755 --- a/main.tex +++ b/main.tex @@ -12,8 +12,9 @@ fontsize=10pt, fleqn, oneside -]{scrbook} + ]{scrbook} + \usepackage{mus} \RequirePackage{algorithm} @@ -39,16 +40,16 @@ \hypersetup{ pdftitle={ - Course - Sequence algorithms + Course - Sequence algorithms }, pdfauthor={ - Samuel Ortion - }, + Samuel Ortion + }, pdfsubject={}, pdfkeywords={}, pdfcreator={LaTeX} } - + \addbibresource{references} \usepackage[ @@ -64,6 +65,7 @@ \definecolor{clementine}{HTML}{dfa000} \colorlet{primary}{clementine} +% \includeonly{content/chapters/part1/1} \makeindex% \makeglossary% \begin{document} @@ -77,10 +79,7 @@ \newpage % \input{content ./introduction} - \input{content/chapters/include} - - % \input{content/conclusion} \end{document} diff --git a/tmp.pdf b/tmp.pdf index 1f58660..5abef09 100644 --- a/tmp.pdf +++ b/tmp.pdf @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a4ca0430df97538f18dd19ee77273b26e99f7e1ef07cc5b353cba53d88994dff -size 63306 +oid sha256:7529f889933c1c9d295cab8160ff7aabdcef3a55f4a4e351822f1ce4dd86621c +size 61328 diff --git a/tmp.tex b/tmp.tex index 47a449a..c619949 100644 --- a/tmp.tex +++ b/tmp.tex @@ -11,8 +11,56 @@ \input{definitions.tex} \begin{document} + \begin{algorithm} + \caption{Backtrack the longest common subsequence} + \begin{algorithmic}[1] + \Function{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)} + \State $M, P \gets $ \Call{LCSQ\_Matrix}{$S_{1}$, $S_{2}$} + \State $L \gets Array(M[n][m])$ + \State $k \gets 0$ + \State $i \gets n$ + \State $j \gets m$ + \While{$i > 0$ and $j > 0$} + \If {$P[i][j] = '\nwarrow' $} + \State $L[k] \gets S_{1}[i]$ + \State $i--$ + \State $j--$ + \State $k++$ + \ElsIf {$P[i][j] = '\leftarrow'$} + \State $j--$ + \Else + \State $i--$ + \EndIf + \EndWhile + \State \Return $L$ + \EndFunction + \end{algorithmic} + \end{algorithm} + \begin{algorithm} + \caption{Recursive reconstruction of the longest common subsequence} + \begin{algorithmic}[1] + \Procedure{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)} + \State $M, P \gets $ \Call{LCSQ\_Matrix}{$S_{1}$, $S_{2}$} + \State $i \gets n$ + \State $j \gets m$ + \State \Call{Aux}{$P$, $S_{1}$, $i$, $j$} + \EndProcedure + + \Procedure{Aux}{$P$: Array($n+1$, $m+1$), $S_{1}$: Array($n$), $i$, $j$} + \If {$P[i][j] = '\nwarrow' $} + \State $l \gets S_{1}[i]$ + \State \Call{Aux}{$P$, $S_{1}$, $i-1$, $j-1$} + \State \texttt{print}($l$) + \ElsIf {$P[i][j] = '\leftarrow'$} + \State \Call{Aux}{$P$, $S_{1}$, $i$, $j-1$} + \Else + \State \Call{Aux}{$P$, $S_{1}$, $i-1$, $j$} + \EndIf + \EndProcedure + \end{algorithmic} + \end{algorithm} \end{document} -\end{document} \ No newline at end of file +\end{document}