Compare commits

...

4 Commits

Author SHA1 Message Date
Samuel Ortion 18203b1e49 Longest common subsequence
Try to use includeonly to limit compilation time
2024-03-19 13:11:18 +01:00
Samuel Ortion fc0331e054 feat: Add more algorithm on automata 2024-03-15 11:40:26 +01:00
Samuel Ortion 5fb762e73f Add motif search algorithm 2024-03-15 09:34:02 +01:00
Samuel Ortion b830fded27 Add motif search automata 2024-03-15 09:16:50 +01:00
21 changed files with 921 additions and 136 deletions

41
.latexmkrc Normal file
View File

@ -0,0 +1,41 @@
sub createFolderStructure{
system("bash ./createFolderStructure.sh");
}
createFolderStructure();
$hash_calc_ignore_pattern{aux} =
'^\\\\gdef\\\\minted@oldcachelist\{,'
. '|^\s*default\.pygstyle,'
. '|^\s*[[:xdigit:]]+\.pygtex';
$pdflatex =
'lualatex -shell-escape -file-line-error -interaction=nonstopmode -synctex=1 -output-directory=build %O '
. '\'\PassOptionsToPackage{outputdir=build}{minted}\input{%S}\'';
$aux_dir = 'build';
$bibtex_use = 2;
# Amend cleaned extensions
$clean_ext .= " fdb_latexmk run.xml synctex.gz";
# Make latexmk quiet
$latexmk_silent = 1;
# Makeglossaries
add_cus_dep( 'acn', 'acr', 0, 'makeglossaries' );
add_cus_dep( 'glo', 'gls', 0, 'makeglossaries' );
$clean_ext .= " acr acn alg glo gls glg";
sub makeglossaries {
my ( $base_name, $path ) = fileparse( $_[0] );
my @args = ( "-q", "-d", $path, $base_name );
if ($silent) { unshift @args, "-q"; }
return system "makeglossaries", "-d", $path, $base_name;
}
sub biber {
my ( $base_name, $path ) = fileparse( $_[0] );
my @args = ( "--output-directory", $path, $base_name );
return system "biber", @args;
}

View File

@ -1,6 +0,0 @@
options=-shell-escape -interaction=nonstopmode -file-line-error
all: main.pdf
%.pdf: %.tex
lualatex $(options) $<

View File

@ -6,13 +6,12 @@
\foreach \i in {0, ..., #2} {% \foreach \i in {0, ..., #2} {%
\edef\FileName{content/chapters/#1/\i}% \edef\FileName{content/chapters/#1/\i}%
\IfFileExists{\FileName}{% \IfFileExists{\FileName}{%
\input{\FileName}% \include{\FileName}%
} }
} }
} }
\includechapters{part1}{2} \includechapters{part1}{3}
\includechapters{part2}{2}
% \includechapters{part2}{2}
% \includechapters{part3}{1} % \includechapters{part3}{1}

View File

@ -28,7 +28,7 @@ Let $S = $ ACGUUACGUU. Let's write the comparison matrix.
\begin{table} \begin{table}
\includegraphics{figures/part1/comparison_matrix_repetitions.pdf} \includegraphics{./figures/part1/comparison_matrix_repetitions.pdf}
\caption{Comparison matrix for $seq = $``ACGUUACGUUGUU"} \caption{Comparison matrix for $seq = $``ACGUUACGUUGUU"}
\end{table} \end{table}
@ -51,8 +51,8 @@ Let $S = $ ACGUUACGUU. Let's write the comparison matrix.
\State \Return $M$ \State \Return $M$
\EndFunction \EndFunction
\end{algorithmic} \end{algorithmic}
\end{algorithm} \end{algorithm}
\begin{algorithm} \begin{algorithm}
\caption{Construct the top half of a comparison matrix} \caption{Construct the top half of a comparison matrix}
\begin{algorithmic}[1] \begin{algorithmic}[1]
@ -248,18 +248,18 @@ The suffix language of $S$ is $\{S, ACTACT, CTACT, TACT, ACT, CT, T\}$.
\begin{figure} \begin{figure}
\centering \centering
\includegraphics{figures/part1/minimal_suffix_automaton_exercise.pdf} \includegraphics{./figures/part1/minimal_suffix_automaton_exercise.pdf}
\caption{Suffix automaton for $S = $ AACTACT} \caption{Suffix automaton for $S = $ AACTACT}
\end{figure} \end{figure}
\begin{figure} \begin{figure}
\centering \centering
\includegraphics{figures/part1/minimal_suffix_automaton_exercise_bis.pdf} \includegraphics{./figures/part1/minimal_suffix_automaton_exercise_bis.pdf}
\caption{Suffix automaton for $S = $ TCATCATT} \caption{Suffix automaton for $S = $ TCATCATT}
\end{figure} \end{figure}
\begin{algorithm} \begin{algorithm}
\caption{Check if a sequences matches a motif, from a suffix automaton $\mathcal{O}(m)$} \caption{Check if a sequences matches a motif, from a suffix automaton $\mathcal{O}(m)$, built from the automaton}
\begin{algorithmic}[1] \begin{algorithmic}[1]
\Function{CheckMotifInSuffixAutomaton}{$W$: Array($m$), $A$: $\langle S, s_{0}, T, \Sigma,f \rangle$} \Function{CheckMotifInSuffixAutomaton}{$W$: Array($m$), $A$: $\langle S, s_{0}, T, \Sigma,f \rangle$}
\Returns{Boolean valued to \True{} if the motif is in the sequence} \Returns{Boolean valued to \True{} if the motif is in the sequence}
@ -276,6 +276,6 @@ The suffix language of $S$ is $\{S, ACTACT, CTACT, TACT, ACT, CT, T\}$.
\EndIf \EndIf
\EndFunction \EndFunction
\end{algorithmic} \end{algorithmic}
\end{algorithm} \end{algorithm}
The complexity of the pattern matching algorithm is $\mathcal{O}(n + m)$, because building the automaton is $\mathcal{O}(m)$ The complexity of the pattern matching algorithm is $\mathcal{O}(n + m)$, because building the automaton is $\mathcal{O}(m)$

View File

@ -0,0 +1,84 @@
\chapter{Automata for motif search}
Let $M$ be a motif $M = $ ACAT.
\begin{figure}
\centering
\includegraphics{./figures/part1/motif_search_automaton.pdf}
\caption{Motif search automaton for $M = $ ACAT}
\end{figure}
The alphabet of motif is the same as the alphabet of the sequence.
The search automaton is complete.
If the there exists a letter $c$ in the sequence that is not
in the motif alphabet, we can make a virtual transition from
each state to the initial state whenever we encounter an unknown letter.
\begin{algorithm}
\caption{Search a motif in a sequence with an automaton}
\begin{algorithmic}[1]
\Function{SearchMotif}{$S$: Array($n$), $A$: $\langle S, s_{0}, T, \Sigma, f \rangle$, $P$: Array($m$)}
\Returns{A set of positions where the motif has been found}
\State $s \gets s_0$
\State $i \gets 0$
\State $pos \gets \{\}$
\While {$i < n$} % $\exists f(s, S[i])$ We assume $S$ and $P$ are formed on the same alphabet, so we could remove the second check, as $A$ is complete
\If {$s \in T$}
\State $pos \gets pos \cup \{ i - m \}$
\EndIf
\State $s \gets f(s, S[i])$
\State $i++$
\EndWhile
\State \Return $pos$
\EndFunction
\end{algorithmic}
\end{algorithm}
\begin{algorithm}
\caption{Check if the a motif automaton recognizes only the prefix of size $m-1$ of a motif $P$ of size $m$ }
\begin{algorithmic}[1]
\Function{SearchMotifLastPrefix}{$S$: Array($n$), $A$: $\langle S, s_{0}, T, \Sigma, f \rangle$, $P$: Array($m$)}
\Returns{A set of positions where the motif has been found}
\State $s \gets s_0$
\State $i \gets 0$
\State $T_{new} \gets \{\}$
\For {$s \in S$}
\For {$a \in \Sigma$}
\For {$t \in T$}
\If {$\exists f(s, a)$ and $f(s, a) = t$}
\State $T_{new} \gets T_{new} \cup s$
\EndIf
\EndFor
\EndFor
\EndFor
\While {$i < n$}
\If {$s \in T_{new}$}
\State \Return \True
\EndIf
\State $s \gets f(s, S[i])$
\State $i++$
\EndWhile
\State \Return \False
\EndFunction
\end{algorithmic}
\end{algorithm}
\begin{algorithm}
\caption{Check if the a motif automaton recognizes only the prefix of size $m-1$ of a motif $P$ of size $m$, knowing the sequence of the motif}
\begin{algorithmic}[1]
\Function{SearchMotifLastPrefix}{$S$: Array($n$), $A$: $\langle S, s_{0}, T, \Sigma, f \rangle$, $P$: Array($m$)}
\Returns{A set of positions where the motif has been found}
\State $s \gets s_0$
\State $i \gets 0$
\While {$i < n$ and $f(s, P[m-1]) \notin T$}
\State $s \gets f(s, S[i])$
\State $i++$
\EndWhile
\If{$f(s, P[m-1]) \in T$}
\State \Return \True
\Else
\State \Return \False
\EndIf
\EndFunction
\end{algorithmic}
\end{algorithm}

View File

@ -0,0 +1,113 @@
\chapter{Longest common subsequence}
Let $S_{1} = \text{ATCTGAT}$ and $S_{2} = \text{TGCATA}$.
In this case the longest common subsequence of $S_{1}$ and $S_{2}$ is $TCTA$.
\begin{algorithm}
\caption{Construct a longest common subsequence matrix}
\begin{algorithmic}[1]
\Function{LCSQ\_Matrix}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
\State $M \gets $ Array($m+1$, $n+1$)
\For{($i = 0$; $i < n+1$; $i++$)}
\For{$j = 0$; $j < m+1$; $j++$}
\If {$i = 0$ or $j = 0$}
\State $M[i][j] = 0$
\Else
\If {$S_{1}[i] = S_{2}[j]$}
\State $match = M[i-1][j-1] + 1$
\Else
\State $match = M[i-1][j-1]$
\EndIf
\State $gap_{1} = M[i-1][j]$
\State $gap_{2} = M[i][j-1]$
\State $M[i][j] = \max \{ match, gap_{1}, gap_{2}\}$
\EndIf
\EndFor
\EndFor
\State \Return $M$
\EndFunction
\end{algorithmic}
\end{algorithm}
\begin{algorithm}
\caption{Construct a longest common subsequence matrix keeping the path in memory}
\begin{algorithmic}[1]
\Function{LCSQ\_Matrix\_Path}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
\State $M \gets $ Array($m+1$, $n+1$)
\State $P \gets $ Array($m+1$, $n+1$)
\For {($i = 0$; $i < n+1$, $i++$)}
\State $M[i][0] \gets 0$
\EndFor
\For {($j = 0$; $j < m+1$; $j+$)}
\State $M[0][j] \gets 0$
\EndFor
\For{($i = 1$; $i < n+1$; $i++$)}
\For{($j = 1$; $j < m+1$; $j++$)}
\If {$i = 1$ or $j = 0$}
\State $M[i][j] = 0$
\Else
\If {$S_{1}[i-1] = S_{2}[j-1]$}
\State $M[i][j] \gets M[i-1][j-1] + 1$
\State $P[i][j] \gets '\nwarrow'$
\ElsIf {$M[i][j-1] \geq M[i-1][j]$}
\State $M[i][j] \gets M[i][j-1]$
\State $P[i][j] \gets '\leftarrow'$
\Else
\State $M[i][j] \gets M[i-1][j]$
\State $P[i][j] \gets '\downarrow'$
\EndIf
\EndFor
\EndFor
\State \Return $M, P$
\EndFunction
\end{algorithmic}
\end{algorithm}
\begin{algorithm}
\caption{Backtrack the longest common subsequence}
\begin{algorithmic}[1]
\Function{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
\State $M, P \gets $ \Call{LCSQ\_Matrix}{$S_{1}$, $S_{2}$}
\State $L \gets Array(M[n][m])$
\State $k \gets 0$
\State $i \gets n$
\State $j \gets m$
\While{$i > 0$ and $j > 0$}
\If {$P[i][j] = '\nwarrow' $}
\State $L[k] \gets S_{1}[i]$
\State $i--$
\State $j--$
\State $k++$
\ElsIf {$P[i][j] = '\leftarrow'$}
\State $j--$
\Else
\State $i--$
\EndIf
\EndWhile
\State \Return $L$
\EndFunction
\end{algorithmic}
\end{algorithm}
\begin{algorithm}
\caption{Recursive reconstruction of the longest common subsequence}
\begin{algorithmic}[1]
\Procedure{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
\State $M, P \gets $ \Call{LCSQ\_Matrix}{$S_{1}$, $S_{2}$}
\State $i \gets n$
\State $j \gets m$
\State \Call{Aux}{$P$, $S_{1}$, $i$, $j$}
\EndProcedure
\Procedure{Aux}{$P$: Array($n+1$, $m+1$), $S_{1}$: Array($n$), $i$, $j$}
\If {$P[i][j] = '\nwarrow' $}
\State $l \gets S_{1}[i]$
\State \Call{Aux}{$P$, $S_{1}$, $i-1$, $j-1$}
\State \texttt{print}($l$)
\ElsIf {$P[i][j] = '\leftarrow'$}
\State \Call{Aux}{$P$, $S_{1}$, $i$, $j-1$}
\Else
\State \Call{Aux}{$P$, $S_{1}$, $i-1$, $j$}
\EndIf
\EndProcedure
\end{algorithmic}
\end{algorithm}

View File

@ -0,0 +1,24 @@
\part{Sequence alignment}
\section{Simililarity between sequences}
A function $d$ is a distance between two sequences $x$ and $y$ in an alphabet $\Sigma$ if
\begin{itemize}
\item $x, y \in \Sigma^{*}, d(x, x) = 0$
\item $\forall x, y \in \Sigma^{*}$ $d(x,y) = d(y,x)$
\item $\forall x, y, z \in \Sigma^{*}$ $d(x, z) \leq d(x, y) + d(x, z)$
\end{itemize}
Here we are interested by the distance that is able to represent the transformation of $x$ to $y$ using three types of basic operations:
\begin{itemize}
\item Substition
\item Insertion
\item Deletion
\end{itemize}
Example:
\begin{itemize}
\item $sub(a, b) = \begin{cases} 0 & \text{if} a = b \\ 1 &\text{otherwise} \end{cases}$.
\item $del(a) = 1$
\item $ins(a) = 1$
\end{itemize}

BIN
figures/part1/motif_search_automaton.pdf (Stored with Git LFS) Normal file

Binary file not shown.

View File

@ -0,0 +1,42 @@
\documentclass[tikz]{standalone}
\usepackage{tikz}
\begin{document}
\usetikzlibrary{automata,positioning}
\iffalse
Let $M = $ ACACT be a motif.
$\Sigma = \{ACT\}$
\fi
\begin{tikzpicture}[shorten >=1pt,node distance=2cm,on grid, auto]
% TODO
\node[state, initial] (q_0) {$q_{0}$};
\node[state,right=of q_0] (q_1) {$q_{1}$};
\node[state,right=of q_1] (q_2) {$q_{2}$};
\node[state,right=of q_2] (q_3) {$q_{3}$};
\node[state,right=of q_3] (q_4) {$q_{4}$};
\node[state,right=of q_4,accepting] (q_5) {$q_{5}$};
% M itself
\path[->] (q_0) edge node {A} (q_1)
(q_1) edge node {C} (q_2)
(q_2) edge node {A} (q_3)
(q_3) edge node {C} (q_4)
(q_4) edge node {T} (q_5);
% Make it complete so that it recognize all motif in the sequence
\path[->] (q_0) edge[loop above] node {C, T} (q_1);
\path[->] (q_1) edge[loop above] node {A} (q_1);
\path[->] (q_1) edge[bend left=30] node {T} (q_0);
\path[->] (q_2) edge[bend left=30] node {C, T} (q_0);
\path[->] (q_3) edge[bend right=30] node[above] {A} (q_1);
\path[->] (q_3) edge[bend left=40] node {T} (q_0);
\path[->] (q_4) edge[bend right=50] node[above] {A} (q_3);
\path[->] (q_4) edge[bend left=40] node {C} (q_0);
\path[->] (q_5) edge[bend right=40] node[above] {A} (q_1);
\path[->] (q_5) edge[bend left=50] node {C,T} (q_0);
\end{tikzpicture}
\end{document}

BIN
figures/part1/motif_search_automaton_bis.pdf (Stored with Git LFS) Normal file

Binary file not shown.

View File

@ -0,0 +1,47 @@
\documentclass[tikz]{standalone}
\usepackage{tikz}
\begin{document}
\usetikzlibrary{automata,positioning}
\iffalse
Let $M = $ CCATCAT be a motif.
$\Sigma = \{A,C,T\}$
\fi
\begin{tikzpicture}[shorten >=1pt,node distance=2cm,on grid, auto]
\node[state, initial] (q_0) {$q_{0}$};
\node[state,right=of q_0] (q_1) {$q_{1}$};
\node[state,right=of q_1] (q_2) {$q_{2}$};
\node[state,right=of q_2] (q_3) {$q_{3}$};
\node[state,right=of q_3] (q_4) {$q_{4}$};
\node[state,right=of q_4] (q_5) {$q_{5}$};
\node[state,right=of q_5] (q_6) {$q_{6}$};
\node[state,right=of q_6,accepting] (q_7) {$q_{7}$};
% M itself
\path[->] (q_0) edge node {C} (q_1)
(q_1) edge node {C} (q_2)
(q_2) edge node {A} (q_3)
(q_3) edge node {T} (q_4)
(q_4) edge node {C} (q_5)
(q_5) edge node {A} (q_6)
(q_6) edge node {T} (q_7)
;
\path[->] (q_0) edge [loop above] node {A,T} (q_0);
\path[->] (q_1) edge [bend right=40] node {A, T} (q_0);
\path[->] (q_2) edge [bend right=40] node {C} (q_1);
\path[->] (q_2) edge [bend right=40] node {T} (q_0);
\path[->] (q_3) edge [bend right=40] node {A} (q_0);
\path[->] (q_3) edge [bend right=40] node {C} (q_1);
\path[->] (q_4) edge [bend right=40] node {A, T} (q_0);
\path[->] (q_5) edge [bend left=40] node[above] {C} (q_2);
\path[->] (q_5) edge [bend left=40] node[above] {T} (q_0);
\path[->] (q_6) edge [bend left=40] node[above] {A} (q_0);
\path[->] (q_6) edge [bend right=40] node {C} (q_1);
\path[->] (q_7) edge [bend right=40] node {A, C, T} (q_0);
\end{tikzpicture}
\end{document}

176
figures/part2/* Normal file
View File

@ -0,0 +1,176 @@
function lcsq_matrix(seq1, seq2)
local gap_penalty = 0
local match_score = 1
local n1 = string.len(seq1)
local n2 = string.len(seq2)
-- Create a n1 x n2 matrix
local matrix = {}
for i=0,n1 do
matrix[i] = {}
for j=0,n2 do
matrix[i][j] = 0
end
end
-- Fill the rest of the matrix
local match, delete, insert
for i=1,n1 do
for j=1,n2 do
if string.sub(seq1, i, i) == string.sub(seq2, j, j) then
match = matrix[i-1][j-1] + match_score
else
match = matrix[i-1][j-1]
end
gap1 = matrix[i-1][j] + gap_penalty
gap2 = matrix[i][j-1] + gap_penalty
matrix[i][j] = math.max(match, gap1, gap2)
end
end
return matrix
end
local function has_value (tab, val)
for index, value in ipairs(tab) do
if value == val then
return true
end
end
return false
end
function repr_matrix(matrix)
repr = ""
for i=1,#matrix do
for j=1,#matrix do
repr = repr .. matrix[i][j] .. " "
end
repr = repr .. "\n"
end
return repr
end
function draw_lcsq_matrix_graph(seq1, seq2)
local matrix = lcsq_matrix(seq1, seq2)
local tikz_code = ""
function coordinate(i, j)
return i .. "_" .. j
end
local steps = {
{-1, 0},
{-1, -1},
{0, -1}
}
local n1 = string.len(seq1)
local n2 = string.len(seq2)
local path = {}
local i = n1
local j = n2
while i >= 0 and j >= 0 do
path[#path+1] = coordinate(i, j)
local min = matrix[i][j]
local min_step = steps[1]
for index, step in ipairs(steps) do
local k = i + step[1]
local l = j + step[2]
if k >= 0 and l >= 0 and matrix[k][l] <= min then
min_step = step
min = matrix[k][l]
end
end
i = i + min_step[1]
j = j + min_step[2]
print(i, j)
end
-- Draw the matrix as tikz node with matrix value
for i=0,n1 do
for j=0,n2 do
local options = ""
if has_value(path, coordinate(i, j)) then
options = "[fill=gray, draw, minimum size=1]"
end
tikz_code = tikz_code .. "\\node" .. options .. " (" .. coordinate(i, j) .. ") at (" .. i .. ", " .. -j .. ")" .. " {" .. matrix[i][j] .. "};"
end
end
-- Add nucleotide labels
for i=1,n1 do
local nt = string.sub(seq1, i, i)
tikz_code = tikz_code .. "\\node at (".. i .. "," .. 1 .. ")" .. "{$" .. nt .."$};"
end
for i=1,n2 do
local nt = string.sub(seq2, i, i)
tikz_code = tikz_code .. "\\node at (" .. -1 .. ", " .. -i .. ")" .. "{$ ".. nt .."$};"
end
-- For seq2
for i=0,n1 do
for j=0,n2 do
local min = math.huge
for index, step in ipairs(steps) do
local k = i + step[1]
local l = j + step[2]
if k >= 0 and l >= 0 and matrix[k][l] < min then
min = matrix[k][l]
end
end
local highlighted = false
for index, step in ipairs(steps) do
local k = i + step[1]
local l = j + step[2]
if k >= 0 and l >= 0 and matrix[k][l] == min then
tikz_code = tikz_code .. "\\draw[->] (" .. coordinate(i, j) .. ")" .. " -- " .. "(" .. coordinate (k, l) .. ");"
end
end
end
end
return tikz_code
end
function draw_lcsq_matrix(seq1, seq2)
-- print(string.format(" Path: %s -> %s", seq1, seq2))
local matrix = lcsq_matrix(seq1, seq2)
local n1 = string.len(seq1)
local n2 = string.len(seq2)
-- Draw the matrix as tikz nodes
for i=0,n1-1 do
for j=0,n2-1 do
print(string.format("\\node[draw, minimum width=1cm, minimum height=1cm] at (%d, -%d) {};", i, j, matrix[i][j]))
end
end
-- Draw the sequence labels
for i=1,n1 do
print(string.format("\\node at (%d, -%d) {%s};", i-1, -1, string.sub(seq1, i, i)))
end
for i=1,n2 do
print(string.format("\\node at (%d, -%d) {%s};", -1, i-1, string.sub(seq2, i, i)))
end
-- Add a path from the bottom right corner to the top left corner, following the minimum of the three possible moves at each step
local i, j, value, previous_value
i = n1-1
j = n2-1
print(string.format("\\draw[-,line width=2, gray] (%d, -%d) --", i, j))
while i > 0 and j > 0 do
value = math.min(matrix[i-1][j-1], table[i-1][j], table[i][j-1])
if value == matrix[i-1][j-1] then
i = i - 1
j = j - 1
elseif value == matrix[i-1][j] then
i = i - 1
else
j = j - 1
end
print(string.format(" (%d, -%d) -- ", i, j))
end
print(string.format("(0, 0) -- (-1, 1);", i, j))
end
function main()
local seq1 = "ATCTGAT"
local seq2 = "TGCATA"
local matrix = lcsq_matrix(seq1, seq2)
print(repr_matrix(matrix))
end
main()

181
figures/part2/lcsq.lua Normal file
View File

@ -0,0 +1,181 @@
function lcsq_matrix(seq1, seq2)
local gap_penalty = 0
local match_score = 1
local n1 = string.len(seq1)
local n2 = string.len(seq2)
-- Create a n1 x n2 matrix
local matrix = {}
for i=0,n1 do
matrix[i] = {}
for j=0,n2 do
matrix[i][j] = 0
end
end
-- Fill the rest of the matrix
local match, delete, insert
for i=1,n1 do
for j=1,n2 do
if string.sub(seq1, i, i) == string.sub(seq2, j, j) then
match = matrix[i-1][j-1] + match_score
else
match = matrix[i-1][j-1]
end
gap1 = matrix[i-1][j] + gap_penalty
gap2 = matrix[i][j-1] + gap_penalty
matrix[i][j] = math.max(match, gap1, gap2)
end
end
return matrix
end
local function has_value (tab, val)
for index, value in ipairs(tab) do
if value == val then
return true
end
end
return false
end
function repr_matrix(matrix)
repr = ""
for i=0,#matrix do
for j=0,#matrix[i] do
repr = repr .. matrix[i][j] .. " "
end
repr = repr .. "\n"
end
return repr
end
function draw_lcsq_matrix_graph(seq1, seq2, matrix)
local tikz_code = ""
function coordinate(i, j)
return i .. "_" .. j
end
local steps = {
{-1, -1},
{0, -1},
{-1, 0},
}
local n1 = string.len(seq1)
local n2 = string.len(seq2)
local path = {}
local i = n1
local j = n2
while i >= 0 and j >= 0 do
path[#path+1] = coordinate(i, j)
local max = matrix[i][j]
local max_step = steps[1]
for index, step in ipairs(steps) do
local k = i + step[1]
local l = j + step[2]
if k >= 0 and l >= 0 and matrix[k][l] > max then
max_step = step
max = matrix[k][l]
end
end
i = i + max_step[1]
j = j + max_step[2]
end
-- Draw the matrix as tikz node with matrix value
for i=0,n1 do
for j=0,n2 do
local options = ""
if has_value(path, coordinate(i, j)) then
options = "[fill=gray, draw, minimum size=1]"
end
tikz_code = tikz_code .. "\\node" .. options .. " (" .. coordinate(i, j) .. ") at (" .. i .. ", " .. -j .. ")" .. " {" .. matrix[i][j] .. "};"
end
end
-- Add nucleotide labels
for i=1,n1 do
local nt = string.sub(seq1, i, i)
tikz_code = tikz_code .. "\\node at (".. i .. "," .. 1 .. ")" .. "{$" .. nt .."$};"
end
for i=1,n2 do
local nt = string.sub(seq2, i, i)
tikz_code = tikz_code .. "\\node at (" .. -1 .. ", " .. -i .. ")" .. "{$ ".. nt .."$};"
end
-- For seq2
for i=0,n1 do
for j=0,n2 do
local max = 0
for index, step in ipairs(steps) do
local k = i + step[1]
local l = j + step[2]
if k >= 0 and l >= 0 and matrix[k][l] > max then
max = matrix[k][l]
end
end
local highlighted = false
for index, step in ipairs(steps) do
local k = i + step[1]
local l = j + step[2]
if k >= 0 and l >= 0 and matrix[k][l] == max then
tikz_code = tikz_code .. "\\draw[->] (" .. coordinate(i, j) .. ")" .. " -- " .. "(" .. coordinate (k, l) .. ");"
end
end
end
end
return tikz_code
end
function draw_lcsq_matrix(seq1, seq2)
-- print(string.format(" Path: %s -> %s", seq1, seq2))
local matrix = lcsq_matrix(seq1, seq2)
local n1 = string.len(seq1)
local n2 = string.len(seq2)
local repr = ""
-- Draw the matrix as tikz nodes
for i=0,n1-1 do
for j=0,n2-1 do
repr = repr .. " " .. string.format("\\node[draw, minimum width=1cm, minimum height=1cm] at (%d, -%d) {};", i, j, matrix[i][j])
end
end
-- Draw the sequence labels
for i=1,n1 do
repr = repr .. " " .. string.format("\\node at (%d, -%d) {%s};", i-1, -1, string.sub(seq1, i, i))
end
for i=1,n2 do
repr = repr .. " " .. string.format("\\node at (%d, -%d) {%s};", -1, i-1, string.sub(seq2, i, i))
end
-- Add a path from the bottom right corner to the top left corner, following the minimum of the three possible moves at each step
local i, j, value, previous_value
i = n1-1
j = n2-1
repr = repr .. " " string.format("\\draw[-,line width=2, gray] (%d, -%d) --", i, j)
while i > 0 and j > 0 do
value = math.min(matrix[i-1][j-1], matrix[i-1][j], matrix[i][j-1])
if value == matrix[i-1][j-1] then
i = i - 1
j = j - 1
elseif value == matrix[i-1][j] then
i = i - 1
else
j = j - 1
end
repr = repr .. " " .. string.format(" (%d, -%d) -- ", i, j)
end
repr = repr .. " " .. string.format("(0, 0) -- (-1, 1);", i, j)
return repr
end
function main()
local seq1 = "ATCTGAT"
local seq2 = "TGCATA"
local matrix = lcsq_matrix(seq1, seq2)
print(draw_lcsq_matrix_graph(seq1, seq2, matrix))
end
-- main()
return {
lcsq_matrix=lcsq_matrix,
draw_lcsq_matrix_graph=draw_lcsq_matrix_graph,
draw_lcsq_matrix=draw_lcsq_matrix
}

BIN
figures/part2/lcsq.pdf (Stored with Git LFS) Normal file

Binary file not shown.

18
figures/part2/lcsq.tex Normal file
View File

@ -0,0 +1,18 @@
\documentclass[tikz]{standalone}
\usepackage{tikz}
\usepackage{luatextra}
\begin{document}
\begin{tikzpicture}
\begin{luacode}
lcsq = require('lcsq')
seq2 = "ATCTGAT"
seq1 = "TGCATA"
matrix = lcsq.lcsq_matrix(seq1, seq2)
tikz_code = lcsq.draw_lcsq_matrix_graph(seq1, seq2, matrix)
tex.print(tikz_code)
\end{luacode}
\end{tikzpicture}
\end{document}

8
folder-structure.sh Normal file
View File

@ -0,0 +1,8 @@
#!/bin/sh
find ./content -type d > folder_list.txt
mkdir -p build
cd build
cat ../folder_list.txt | xargs mkdir -p
rm ../folder_list.txt

BIN
main.pdf (Stored with Git LFS)

Binary file not shown.

View File

@ -12,7 +12,8 @@
fontsize=10pt, fontsize=10pt,
fleqn, fleqn,
oneside oneside
]{scrbook} ]{scrbook}
\usepackage{mus} \usepackage{mus}
@ -64,6 +65,7 @@
\definecolor{clementine}{HTML}{dfa000} \definecolor{clementine}{HTML}{dfa000}
\colorlet{primary}{clementine} \colorlet{primary}{clementine}
% \includeonly{content/chapters/part1/1}
\makeindex% \makeindex%
\makeglossary% \makeglossary%
\begin{document} \begin{document}
@ -77,10 +79,7 @@
\newpage \newpage
% \input{content ./introduction} % \input{content ./introduction}
\input{content/chapters/include} \input{content/chapters/include}
% \input{content/conclusion} % \input{content/conclusion}
\end{document} \end{document}

BIN
tmp.pdf (Stored with Git LFS)

Binary file not shown.

50
tmp.tex
View File

@ -11,6 +11,56 @@
\input{definitions.tex} \input{definitions.tex}
\begin{document} \begin{document}
\begin{algorithm}
\caption{Backtrack the longest common subsequence}
\begin{algorithmic}[1]
\Function{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
\State $M, P \gets $ \Call{LCSQ\_Matrix}{$S_{1}$, $S_{2}$}
\State $L \gets Array(M[n][m])$
\State $k \gets 0$
\State $i \gets n$
\State $j \gets m$
\While{$i > 0$ and $j > 0$}
\If {$P[i][j] = '\nwarrow' $}
\State $L[k] \gets S_{1}[i]$
\State $i--$
\State $j--$
\State $k++$
\ElsIf {$P[i][j] = '\leftarrow'$}
\State $j--$
\Else
\State $i--$
\EndIf
\EndWhile
\State \Return $L$
\EndFunction
\end{algorithmic}
\end{algorithm}
\begin{algorithm}
\caption{Recursive reconstruction of the longest common subsequence}
\begin{algorithmic}[1]
\Procedure{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
\State $M, P \gets $ \Call{LCSQ\_Matrix}{$S_{1}$, $S_{2}$}
\State $i \gets n$
\State $j \gets m$
\State \Call{Aux}{$P$, $S_{1}$, $i$, $j$}
\EndProcedure
\Procedure{Aux}{$P$: Array($n+1$, $m+1$), $S_{1}$: Array($n$), $i$, $j$}
\If {$P[i][j] = '\nwarrow' $}
\State $l \gets S_{1}[i]$
\State \Call{Aux}{$P$, $S_{1}$, $i-1$, $j-1$}
\State \texttt{print}($l$)
\ElsIf {$P[i][j] = '\leftarrow'$}
\State \Call{Aux}{$P$, $S_{1}$, $i$, $j-1$}
\Else
\State \Call{Aux}{$P$, $S_{1}$, $i-1$, $j$}
\EndIf
\EndProcedure
\end{algorithmic}
\end{algorithm}
\end{document}
\end{document} \end{document}