Longest common subsequence
Try to use includeonly to limit compilation time
This commit is contained in:
parent
fc0331e054
commit
18203b1e49
|
@ -0,0 +1,41 @@
|
||||||
|
sub createFolderStructure{
|
||||||
|
system("bash ./createFolderStructure.sh");
|
||||||
|
}
|
||||||
|
|
||||||
|
createFolderStructure();
|
||||||
|
|
||||||
|
$hash_calc_ignore_pattern{aux} =
|
||||||
|
'^\\\\gdef\\\\minted@oldcachelist\{,'
|
||||||
|
. '|^\s*default\.pygstyle,'
|
||||||
|
. '|^\s*[[:xdigit:]]+\.pygtex';
|
||||||
|
|
||||||
|
$pdflatex =
|
||||||
|
'lualatex -shell-escape -file-line-error -interaction=nonstopmode -synctex=1 -output-directory=build %O '
|
||||||
|
. '\'\PassOptionsToPackage{outputdir=build}{minted}\input{%S}\'';
|
||||||
|
$aux_dir = 'build';
|
||||||
|
$bibtex_use = 2;
|
||||||
|
|
||||||
|
# Amend cleaned extensions
|
||||||
|
$clean_ext .= " fdb_latexmk run.xml synctex.gz";
|
||||||
|
|
||||||
|
# Make latexmk quiet
|
||||||
|
$latexmk_silent = 1;
|
||||||
|
|
||||||
|
# Makeglossaries
|
||||||
|
add_cus_dep( 'acn', 'acr', 0, 'makeglossaries' );
|
||||||
|
add_cus_dep( 'glo', 'gls', 0, 'makeglossaries' );
|
||||||
|
$clean_ext .= " acr acn alg glo gls glg";
|
||||||
|
|
||||||
|
sub makeglossaries {
|
||||||
|
my ( $base_name, $path ) = fileparse( $_[0] );
|
||||||
|
my @args = ( "-q", "-d", $path, $base_name );
|
||||||
|
if ($silent) { unshift @args, "-q"; }
|
||||||
|
return system "makeglossaries", "-d", $path, $base_name;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub biber {
|
||||||
|
my ( $base_name, $path ) = fileparse( $_[0] );
|
||||||
|
my @args = ( "--output-directory", $path, $base_name );
|
||||||
|
return system "biber", @args;
|
||||||
|
}
|
||||||
|
|
6
Makefile
6
Makefile
|
@ -1,6 +0,0 @@
|
||||||
options=-shell-escape -interaction=nonstopmode -file-line-error
|
|
||||||
|
|
||||||
all: main.pdf
|
|
||||||
|
|
||||||
%.pdf: %.tex
|
|
||||||
lualatex $(options) $<
|
|
|
@ -6,13 +6,12 @@
|
||||||
\foreach \i in {0, ..., #2} {%
|
\foreach \i in {0, ..., #2} {%
|
||||||
\edef\FileName{content/chapters/#1/\i}%
|
\edef\FileName{content/chapters/#1/\i}%
|
||||||
\IfFileExists{\FileName}{%
|
\IfFileExists{\FileName}{%
|
||||||
\input{\FileName}%
|
\include{\FileName}%
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
\includechapters{part1}{3}
|
\includechapters{part1}{3}
|
||||||
|
\includechapters{part2}{2}
|
||||||
% \includechapters{part2}{2}
|
|
||||||
|
|
||||||
% \includechapters{part3}{1}
|
% \includechapters{part3}{1}
|
||||||
|
|
|
@ -28,7 +28,7 @@ Let $S = $ ACGUUACGUU. Let's write the comparison matrix.
|
||||||
|
|
||||||
|
|
||||||
\begin{table}
|
\begin{table}
|
||||||
\includegraphics{figures/part1/comparison_matrix_repetitions.pdf}
|
\includegraphics{./figures/part1/comparison_matrix_repetitions.pdf}
|
||||||
\caption{Comparison matrix for $seq = $``ACGUUACGUUGUU"}
|
\caption{Comparison matrix for $seq = $``ACGUUACGUUGUU"}
|
||||||
\end{table}
|
\end{table}
|
||||||
|
|
||||||
|
@ -248,17 +248,17 @@ The suffix language of $S$ is $\{S, ACTACT, CTACT, TACT, ACT, CT, T\}$.
|
||||||
|
|
||||||
\begin{figure}
|
\begin{figure}
|
||||||
\centering
|
\centering
|
||||||
\includegraphics{figures/part1/minimal_suffix_automaton_exercise.pdf}
|
\includegraphics{./figures/part1/minimal_suffix_automaton_exercise.pdf}
|
||||||
\caption{Suffix automaton for $S = $ AACTACT}
|
\caption{Suffix automaton for $S = $ AACTACT}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
\begin{figure}
|
\begin{figure}
|
||||||
\centering
|
\centering
|
||||||
\includegraphics{figures/part1/minimal_suffix_automaton_exercise_bis.pdf}
|
\includegraphics{./figures/part1/minimal_suffix_automaton_exercise_bis.pdf}
|
||||||
\caption{Suffix automaton for $S = $ TCATCATT}
|
\caption{Suffix automaton for $S = $ TCATCATT}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
\begin{algorithm}
|
\begin{algorithm}
|
||||||
\caption{Check if a sequences matches a motif, from a suffix automaton $\mathcal{O}(m)$, built from the automaton}
|
\caption{Check if a sequences matches a motif, from a suffix automaton $\mathcal{O}(m)$, built from the automaton}
|
||||||
\begin{algorithmic}[1]
|
\begin{algorithmic}[1]
|
||||||
\Function{CheckMotifInSuffixAutomaton}{$W$: Array($m$), $A$: $\langle S, s_{0}, T, \Sigma,f \rangle$}
|
\Function{CheckMotifInSuffixAutomaton}{$W$: Array($m$), $A$: $\langle S, s_{0}, T, \Sigma,f \rangle$}
|
||||||
|
@ -276,6 +276,6 @@ The suffix language of $S$ is $\{S, ACTACT, CTACT, TACT, ACT, CT, T\}$.
|
||||||
\EndIf
|
\EndIf
|
||||||
\EndFunction
|
\EndFunction
|
||||||
\end{algorithmic}
|
\end{algorithmic}
|
||||||
\end{algorithm}
|
\end{algorithm}
|
||||||
The complexity of the pattern matching algorithm is $\mathcal{O}(n + m)$, because building the automaton is $\mathcal{O}(m)$
|
The complexity of the pattern matching algorithm is $\mathcal{O}(n + m)$, because building the automaton is $\mathcal{O}(m)$
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,7 @@ Let $M$ be a motif $M = $ ACAT.
|
||||||
|
|
||||||
\begin{figure}
|
\begin{figure}
|
||||||
\centering
|
\centering
|
||||||
\includegraphics{figures/part1/motif_search_automaton.pdf}
|
\includegraphics{./figures/part1/motif_search_automaton.pdf}
|
||||||
\caption{Motif search automaton for $M = $ ACAT}
|
\caption{Motif search automaton for $M = $ ACAT}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,113 @@
|
||||||
|
\chapter{Longest common subsequence}
|
||||||
|
|
||||||
|
Let $S_{1} = \text{ATCTGAT}$ and $S_{2} = \text{TGCATA}$.
|
||||||
|
In this case the longest common subsequence of $S_{1}$ and $S_{2}$ is $TCTA$.
|
||||||
|
\begin{algorithm}
|
||||||
|
\caption{Construct a longest common subsequence matrix}
|
||||||
|
\begin{algorithmic}[1]
|
||||||
|
\Function{LCSQ\_Matrix}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
|
||||||
|
\State $M \gets $ Array($m+1$, $n+1$)
|
||||||
|
\For{($i = 0$; $i < n+1$; $i++$)}
|
||||||
|
\For{$j = 0$; $j < m+1$; $j++$}
|
||||||
|
\If {$i = 0$ or $j = 0$}
|
||||||
|
\State $M[i][j] = 0$
|
||||||
|
\Else
|
||||||
|
\If {$S_{1}[i] = S_{2}[j]$}
|
||||||
|
\State $match = M[i-1][j-1] + 1$
|
||||||
|
\Else
|
||||||
|
\State $match = M[i-1][j-1]$
|
||||||
|
\EndIf
|
||||||
|
\State $gap_{1} = M[i-1][j]$
|
||||||
|
\State $gap_{2} = M[i][j-1]$
|
||||||
|
\State $M[i][j] = \max \{ match, gap_{1}, gap_{2}\}$
|
||||||
|
\EndIf
|
||||||
|
\EndFor
|
||||||
|
\EndFor
|
||||||
|
\State \Return $M$
|
||||||
|
\EndFunction
|
||||||
|
\end{algorithmic}
|
||||||
|
\end{algorithm}
|
||||||
|
|
||||||
|
\begin{algorithm}
|
||||||
|
\caption{Construct a longest common subsequence matrix keeping the path in memory}
|
||||||
|
\begin{algorithmic}[1]
|
||||||
|
\Function{LCSQ\_Matrix\_Path}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
|
||||||
|
\State $M \gets $ Array($m+1$, $n+1$)
|
||||||
|
\State $P \gets $ Array($m+1$, $n+1$)
|
||||||
|
\For {($i = 0$; $i < n+1$, $i++$)}
|
||||||
|
\State $M[i][0] \gets 0$
|
||||||
|
\EndFor
|
||||||
|
\For {($j = 0$; $j < m+1$; $j+$)}
|
||||||
|
\State $M[0][j] \gets 0$
|
||||||
|
\EndFor
|
||||||
|
\For{($i = 1$; $i < n+1$; $i++$)}
|
||||||
|
\For{($j = 1$; $j < m+1$; $j++$)}
|
||||||
|
\If {$i = 1$ or $j = 0$}
|
||||||
|
\State $M[i][j] = 0$
|
||||||
|
\Else
|
||||||
|
\If {$S_{1}[i-1] = S_{2}[j-1]$}
|
||||||
|
\State $M[i][j] \gets M[i-1][j-1] + 1$
|
||||||
|
\State $P[i][j] \gets '\nwarrow'$
|
||||||
|
\ElsIf {$M[i][j-1] \geq M[i-1][j]$}
|
||||||
|
\State $M[i][j] \gets M[i][j-1]$
|
||||||
|
\State $P[i][j] \gets '\leftarrow'$
|
||||||
|
\Else
|
||||||
|
\State $M[i][j] \gets M[i-1][j]$
|
||||||
|
\State $P[i][j] \gets '\downarrow'$
|
||||||
|
\EndIf
|
||||||
|
\EndFor
|
||||||
|
\EndFor
|
||||||
|
\State \Return $M, P$
|
||||||
|
\EndFunction
|
||||||
|
\end{algorithmic}
|
||||||
|
\end{algorithm}
|
||||||
|
|
||||||
|
\begin{algorithm}
|
||||||
|
\caption{Backtrack the longest common subsequence}
|
||||||
|
\begin{algorithmic}[1]
|
||||||
|
\Function{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
|
||||||
|
\State $M, P \gets $ \Call{LCSQ\_Matrix}{$S_{1}$, $S_{2}$}
|
||||||
|
\State $L \gets Array(M[n][m])$
|
||||||
|
\State $k \gets 0$
|
||||||
|
\State $i \gets n$
|
||||||
|
\State $j \gets m$
|
||||||
|
\While{$i > 0$ and $j > 0$}
|
||||||
|
\If {$P[i][j] = '\nwarrow' $}
|
||||||
|
\State $L[k] \gets S_{1}[i]$
|
||||||
|
\State $i--$
|
||||||
|
\State $j--$
|
||||||
|
\State $k++$
|
||||||
|
\ElsIf {$P[i][j] = '\leftarrow'$}
|
||||||
|
\State $j--$
|
||||||
|
\Else
|
||||||
|
\State $i--$
|
||||||
|
\EndIf
|
||||||
|
\EndWhile
|
||||||
|
\State \Return $L$
|
||||||
|
\EndFunction
|
||||||
|
\end{algorithmic}
|
||||||
|
\end{algorithm}
|
||||||
|
|
||||||
|
\begin{algorithm}
|
||||||
|
\caption{Recursive reconstruction of the longest common subsequence}
|
||||||
|
\begin{algorithmic}[1]
|
||||||
|
\Procedure{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
|
||||||
|
\State $M, P \gets $ \Call{LCSQ\_Matrix}{$S_{1}$, $S_{2}$}
|
||||||
|
\State $i \gets n$
|
||||||
|
\State $j \gets m$
|
||||||
|
\State \Call{Aux}{$P$, $S_{1}$, $i$, $j$}
|
||||||
|
\EndProcedure
|
||||||
|
|
||||||
|
\Procedure{Aux}{$P$: Array($n+1$, $m+1$), $S_{1}$: Array($n$), $i$, $j$}
|
||||||
|
\If {$P[i][j] = '\nwarrow' $}
|
||||||
|
\State $l \gets S_{1}[i]$
|
||||||
|
\State \Call{Aux}{$P$, $S_{1}$, $i-1$, $j-1$}
|
||||||
|
\State \texttt{print}($l$)
|
||||||
|
\ElsIf {$P[i][j] = '\leftarrow'$}
|
||||||
|
\State \Call{Aux}{$P$, $S_{1}$, $i$, $j-1$}
|
||||||
|
\Else
|
||||||
|
\State \Call{Aux}{$P$, $S_{1}$, $i-1$, $j$}
|
||||||
|
\EndIf
|
||||||
|
\EndProcedure
|
||||||
|
\end{algorithmic}
|
||||||
|
\end{algorithm}
|
|
@ -0,0 +1,24 @@
|
||||||
|
\part{Sequence alignment}
|
||||||
|
|
||||||
|
\section{Simililarity between sequences}
|
||||||
|
|
||||||
|
A function $d$ is a distance between two sequences $x$ and $y$ in an alphabet $\Sigma$ if
|
||||||
|
\begin{itemize}
|
||||||
|
\item $x, y \in \Sigma^{*}, d(x, x) = 0$
|
||||||
|
\item $\forall x, y \in \Sigma^{*}$ $d(x,y) = d(y,x)$
|
||||||
|
\item $\forall x, y, z \in \Sigma^{*}$ $d(x, z) \leq d(x, y) + d(x, z)$
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
Here we are interested by the distance that is able to represent the transformation of $x$ to $y$ using three types of basic operations:
|
||||||
|
\begin{itemize}
|
||||||
|
\item Substition
|
||||||
|
\item Insertion
|
||||||
|
\item Deletion
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
Example:
|
||||||
|
\begin{itemize}
|
||||||
|
\item $sub(a, b) = \begin{cases} 0 & \text{if} a = b \\ 1 &\text{otherwise} \end{cases}$.
|
||||||
|
\item $del(a) = 1$
|
||||||
|
\item $ins(a) = 1$
|
||||||
|
\end{itemize}
|
|
@ -0,0 +1,176 @@
|
||||||
|
function lcsq_matrix(seq1, seq2)
|
||||||
|
local gap_penalty = 0
|
||||||
|
local match_score = 1
|
||||||
|
local n1 = string.len(seq1)
|
||||||
|
local n2 = string.len(seq2)
|
||||||
|
-- Create a n1 x n2 matrix
|
||||||
|
local matrix = {}
|
||||||
|
for i=0,n1 do
|
||||||
|
matrix[i] = {}
|
||||||
|
for j=0,n2 do
|
||||||
|
matrix[i][j] = 0
|
||||||
|
end
|
||||||
|
end
|
||||||
|
-- Fill the rest of the matrix
|
||||||
|
local match, delete, insert
|
||||||
|
for i=1,n1 do
|
||||||
|
for j=1,n2 do
|
||||||
|
if string.sub(seq1, i, i) == string.sub(seq2, j, j) then
|
||||||
|
match = matrix[i-1][j-1] + match_score
|
||||||
|
else
|
||||||
|
match = matrix[i-1][j-1]
|
||||||
|
end
|
||||||
|
gap1 = matrix[i-1][j] + gap_penalty
|
||||||
|
gap2 = matrix[i][j-1] + gap_penalty
|
||||||
|
matrix[i][j] = math.max(match, gap1, gap2)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return matrix
|
||||||
|
end
|
||||||
|
|
||||||
|
local function has_value (tab, val)
|
||||||
|
for index, value in ipairs(tab) do
|
||||||
|
if value == val then
|
||||||
|
return true
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return false
|
||||||
|
end
|
||||||
|
|
||||||
|
function repr_matrix(matrix)
|
||||||
|
repr = ""
|
||||||
|
for i=1,#matrix do
|
||||||
|
for j=1,#matrix do
|
||||||
|
repr = repr .. matrix[i][j] .. " "
|
||||||
|
end
|
||||||
|
repr = repr .. "\n"
|
||||||
|
end
|
||||||
|
return repr
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
function draw_lcsq_matrix_graph(seq1, seq2)
|
||||||
|
local matrix = lcsq_matrix(seq1, seq2)
|
||||||
|
local tikz_code = ""
|
||||||
|
function coordinate(i, j)
|
||||||
|
return i .. "_" .. j
|
||||||
|
end
|
||||||
|
local steps = {
|
||||||
|
{-1, 0},
|
||||||
|
{-1, -1},
|
||||||
|
{0, -1}
|
||||||
|
}
|
||||||
|
|
||||||
|
local n1 = string.len(seq1)
|
||||||
|
local n2 = string.len(seq2)
|
||||||
|
local path = {}
|
||||||
|
local i = n1
|
||||||
|
local j = n2
|
||||||
|
while i >= 0 and j >= 0 do
|
||||||
|
path[#path+1] = coordinate(i, j)
|
||||||
|
local min = matrix[i][j]
|
||||||
|
local min_step = steps[1]
|
||||||
|
for index, step in ipairs(steps) do
|
||||||
|
local k = i + step[1]
|
||||||
|
local l = j + step[2]
|
||||||
|
if k >= 0 and l >= 0 and matrix[k][l] <= min then
|
||||||
|
min_step = step
|
||||||
|
min = matrix[k][l]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
i = i + min_step[1]
|
||||||
|
j = j + min_step[2]
|
||||||
|
print(i, j)
|
||||||
|
end
|
||||||
|
-- Draw the matrix as tikz node with matrix value
|
||||||
|
for i=0,n1 do
|
||||||
|
for j=0,n2 do
|
||||||
|
local options = ""
|
||||||
|
if has_value(path, coordinate(i, j)) then
|
||||||
|
|
||||||
|
options = "[fill=gray, draw, minimum size=1]"
|
||||||
|
end
|
||||||
|
tikz_code = tikz_code .. "\\node" .. options .. " (" .. coordinate(i, j) .. ") at (" .. i .. ", " .. -j .. ")" .. " {" .. matrix[i][j] .. "};"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
-- Add nucleotide labels
|
||||||
|
for i=1,n1 do
|
||||||
|
local nt = string.sub(seq1, i, i)
|
||||||
|
tikz_code = tikz_code .. "\\node at (".. i .. "," .. 1 .. ")" .. "{$" .. nt .."$};"
|
||||||
|
end
|
||||||
|
for i=1,n2 do
|
||||||
|
local nt = string.sub(seq2, i, i)
|
||||||
|
tikz_code = tikz_code .. "\\node at (" .. -1 .. ", " .. -i .. ")" .. "{$ ".. nt .."$};"
|
||||||
|
end
|
||||||
|
-- For seq2
|
||||||
|
for i=0,n1 do
|
||||||
|
for j=0,n2 do
|
||||||
|
local min = math.huge
|
||||||
|
for index, step in ipairs(steps) do
|
||||||
|
local k = i + step[1]
|
||||||
|
local l = j + step[2]
|
||||||
|
if k >= 0 and l >= 0 and matrix[k][l] < min then
|
||||||
|
min = matrix[k][l]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
local highlighted = false
|
||||||
|
for index, step in ipairs(steps) do
|
||||||
|
local k = i + step[1]
|
||||||
|
local l = j + step[2]
|
||||||
|
if k >= 0 and l >= 0 and matrix[k][l] == min then
|
||||||
|
tikz_code = tikz_code .. "\\draw[->] (" .. coordinate(i, j) .. ")" .. " -- " .. "(" .. coordinate (k, l) .. ");"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return tikz_code
|
||||||
|
end
|
||||||
|
|
||||||
|
function draw_lcsq_matrix(seq1, seq2)
|
||||||
|
-- print(string.format(" Path: %s -> %s", seq1, seq2))
|
||||||
|
local matrix = lcsq_matrix(seq1, seq2)
|
||||||
|
local n1 = string.len(seq1)
|
||||||
|
local n2 = string.len(seq2)
|
||||||
|
-- Draw the matrix as tikz nodes
|
||||||
|
for i=0,n1-1 do
|
||||||
|
for j=0,n2-1 do
|
||||||
|
print(string.format("\\node[draw, minimum width=1cm, minimum height=1cm] at (%d, -%d) {};", i, j, matrix[i][j]))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
-- Draw the sequence labels
|
||||||
|
for i=1,n1 do
|
||||||
|
print(string.format("\\node at (%d, -%d) {%s};", i-1, -1, string.sub(seq1, i, i)))
|
||||||
|
end
|
||||||
|
for i=1,n2 do
|
||||||
|
print(string.format("\\node at (%d, -%d) {%s};", -1, i-1, string.sub(seq2, i, i)))
|
||||||
|
end
|
||||||
|
-- Add a path from the bottom right corner to the top left corner, following the minimum of the three possible moves at each step
|
||||||
|
local i, j, value, previous_value
|
||||||
|
i = n1-1
|
||||||
|
j = n2-1
|
||||||
|
print(string.format("\\draw[-,line width=2, gray] (%d, -%d) --", i, j))
|
||||||
|
while i > 0 and j > 0 do
|
||||||
|
value = math.min(matrix[i-1][j-1], table[i-1][j], table[i][j-1])
|
||||||
|
if value == matrix[i-1][j-1] then
|
||||||
|
i = i - 1
|
||||||
|
j = j - 1
|
||||||
|
elseif value == matrix[i-1][j] then
|
||||||
|
i = i - 1
|
||||||
|
else
|
||||||
|
j = j - 1
|
||||||
|
end
|
||||||
|
print(string.format(" (%d, -%d) -- ", i, j))
|
||||||
|
end
|
||||||
|
print(string.format("(0, 0) -- (-1, 1);", i, j))
|
||||||
|
end
|
||||||
|
|
||||||
|
function main()
|
||||||
|
local seq1 = "ATCTGAT"
|
||||||
|
local seq2 = "TGCATA"
|
||||||
|
|
||||||
|
local matrix = lcsq_matrix(seq1, seq2)
|
||||||
|
print(repr_matrix(matrix))
|
||||||
|
end
|
||||||
|
|
||||||
|
main()
|
|
@ -0,0 +1,181 @@
|
||||||
|
function lcsq_matrix(seq1, seq2)
|
||||||
|
local gap_penalty = 0
|
||||||
|
local match_score = 1
|
||||||
|
local n1 = string.len(seq1)
|
||||||
|
local n2 = string.len(seq2)
|
||||||
|
-- Create a n1 x n2 matrix
|
||||||
|
local matrix = {}
|
||||||
|
for i=0,n1 do
|
||||||
|
matrix[i] = {}
|
||||||
|
for j=0,n2 do
|
||||||
|
matrix[i][j] = 0
|
||||||
|
end
|
||||||
|
end
|
||||||
|
-- Fill the rest of the matrix
|
||||||
|
local match, delete, insert
|
||||||
|
for i=1,n1 do
|
||||||
|
for j=1,n2 do
|
||||||
|
if string.sub(seq1, i, i) == string.sub(seq2, j, j) then
|
||||||
|
match = matrix[i-1][j-1] + match_score
|
||||||
|
else
|
||||||
|
match = matrix[i-1][j-1]
|
||||||
|
end
|
||||||
|
gap1 = matrix[i-1][j] + gap_penalty
|
||||||
|
gap2 = matrix[i][j-1] + gap_penalty
|
||||||
|
matrix[i][j] = math.max(match, gap1, gap2)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return matrix
|
||||||
|
end
|
||||||
|
|
||||||
|
local function has_value (tab, val)
|
||||||
|
for index, value in ipairs(tab) do
|
||||||
|
if value == val then
|
||||||
|
return true
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return false
|
||||||
|
end
|
||||||
|
|
||||||
|
function repr_matrix(matrix)
|
||||||
|
repr = ""
|
||||||
|
for i=0,#matrix do
|
||||||
|
for j=0,#matrix[i] do
|
||||||
|
repr = repr .. matrix[i][j] .. " "
|
||||||
|
end
|
||||||
|
repr = repr .. "\n"
|
||||||
|
end
|
||||||
|
return repr
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
function draw_lcsq_matrix_graph(seq1, seq2, matrix)
|
||||||
|
local tikz_code = ""
|
||||||
|
function coordinate(i, j)
|
||||||
|
return i .. "_" .. j
|
||||||
|
end
|
||||||
|
local steps = {
|
||||||
|
{-1, -1},
|
||||||
|
{0, -1},
|
||||||
|
{-1, 0},
|
||||||
|
}
|
||||||
|
|
||||||
|
local n1 = string.len(seq1)
|
||||||
|
local n2 = string.len(seq2)
|
||||||
|
local path = {}
|
||||||
|
local i = n1
|
||||||
|
local j = n2
|
||||||
|
while i >= 0 and j >= 0 do
|
||||||
|
path[#path+1] = coordinate(i, j)
|
||||||
|
local max = matrix[i][j]
|
||||||
|
local max_step = steps[1]
|
||||||
|
for index, step in ipairs(steps) do
|
||||||
|
local k = i + step[1]
|
||||||
|
local l = j + step[2]
|
||||||
|
if k >= 0 and l >= 0 and matrix[k][l] > max then
|
||||||
|
max_step = step
|
||||||
|
max = matrix[k][l]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
i = i + max_step[1]
|
||||||
|
j = j + max_step[2]
|
||||||
|
end
|
||||||
|
-- Draw the matrix as tikz node with matrix value
|
||||||
|
for i=0,n1 do
|
||||||
|
for j=0,n2 do
|
||||||
|
local options = ""
|
||||||
|
if has_value(path, coordinate(i, j)) then
|
||||||
|
|
||||||
|
options = "[fill=gray, draw, minimum size=1]"
|
||||||
|
end
|
||||||
|
tikz_code = tikz_code .. "\\node" .. options .. " (" .. coordinate(i, j) .. ") at (" .. i .. ", " .. -j .. ")" .. " {" .. matrix[i][j] .. "};"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
-- Add nucleotide labels
|
||||||
|
for i=1,n1 do
|
||||||
|
local nt = string.sub(seq1, i, i)
|
||||||
|
tikz_code = tikz_code .. "\\node at (".. i .. "," .. 1 .. ")" .. "{$" .. nt .."$};"
|
||||||
|
end
|
||||||
|
for i=1,n2 do
|
||||||
|
local nt = string.sub(seq2, i, i)
|
||||||
|
tikz_code = tikz_code .. "\\node at (" .. -1 .. ", " .. -i .. ")" .. "{$ ".. nt .."$};"
|
||||||
|
end
|
||||||
|
-- For seq2
|
||||||
|
for i=0,n1 do
|
||||||
|
for j=0,n2 do
|
||||||
|
local max = 0
|
||||||
|
for index, step in ipairs(steps) do
|
||||||
|
local k = i + step[1]
|
||||||
|
local l = j + step[2]
|
||||||
|
if k >= 0 and l >= 0 and matrix[k][l] > max then
|
||||||
|
max = matrix[k][l]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
local highlighted = false
|
||||||
|
for index, step in ipairs(steps) do
|
||||||
|
local k = i + step[1]
|
||||||
|
local l = j + step[2]
|
||||||
|
if k >= 0 and l >= 0 and matrix[k][l] == max then
|
||||||
|
tikz_code = tikz_code .. "\\draw[->] (" .. coordinate(i, j) .. ")" .. " -- " .. "(" .. coordinate (k, l) .. ");"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return tikz_code
|
||||||
|
end
|
||||||
|
|
||||||
|
function draw_lcsq_matrix(seq1, seq2)
|
||||||
|
-- print(string.format(" Path: %s -> %s", seq1, seq2))
|
||||||
|
local matrix = lcsq_matrix(seq1, seq2)
|
||||||
|
local n1 = string.len(seq1)
|
||||||
|
local n2 = string.len(seq2)
|
||||||
|
local repr = ""
|
||||||
|
-- Draw the matrix as tikz nodes
|
||||||
|
for i=0,n1-1 do
|
||||||
|
for j=0,n2-1 do
|
||||||
|
repr = repr .. " " .. string.format("\\node[draw, minimum width=1cm, minimum height=1cm] at (%d, -%d) {};", i, j, matrix[i][j])
|
||||||
|
end
|
||||||
|
end
|
||||||
|
-- Draw the sequence labels
|
||||||
|
for i=1,n1 do
|
||||||
|
repr = repr .. " " .. string.format("\\node at (%d, -%d) {%s};", i-1, -1, string.sub(seq1, i, i))
|
||||||
|
end
|
||||||
|
for i=1,n2 do
|
||||||
|
repr = repr .. " " .. string.format("\\node at (%d, -%d) {%s};", -1, i-1, string.sub(seq2, i, i))
|
||||||
|
end
|
||||||
|
-- Add a path from the bottom right corner to the top left corner, following the minimum of the three possible moves at each step
|
||||||
|
local i, j, value, previous_value
|
||||||
|
i = n1-1
|
||||||
|
j = n2-1
|
||||||
|
repr = repr .. " " string.format("\\draw[-,line width=2, gray] (%d, -%d) --", i, j)
|
||||||
|
while i > 0 and j > 0 do
|
||||||
|
value = math.min(matrix[i-1][j-1], matrix[i-1][j], matrix[i][j-1])
|
||||||
|
if value == matrix[i-1][j-1] then
|
||||||
|
i = i - 1
|
||||||
|
j = j - 1
|
||||||
|
elseif value == matrix[i-1][j] then
|
||||||
|
i = i - 1
|
||||||
|
else
|
||||||
|
j = j - 1
|
||||||
|
end
|
||||||
|
repr = repr .. " " .. string.format(" (%d, -%d) -- ", i, j)
|
||||||
|
end
|
||||||
|
repr = repr .. " " .. string.format("(0, 0) -- (-1, 1);", i, j)
|
||||||
|
return repr
|
||||||
|
end
|
||||||
|
|
||||||
|
function main()
|
||||||
|
local seq1 = "ATCTGAT"
|
||||||
|
local seq2 = "TGCATA"
|
||||||
|
local matrix = lcsq_matrix(seq1, seq2)
|
||||||
|
print(draw_lcsq_matrix_graph(seq1, seq2, matrix))
|
||||||
|
end
|
||||||
|
|
||||||
|
-- main()
|
||||||
|
|
||||||
|
return {
|
||||||
|
lcsq_matrix=lcsq_matrix,
|
||||||
|
draw_lcsq_matrix_graph=draw_lcsq_matrix_graph,
|
||||||
|
draw_lcsq_matrix=draw_lcsq_matrix
|
||||||
|
}
|
Binary file not shown.
|
@ -0,0 +1,18 @@
|
||||||
|
\documentclass[tikz]{standalone}
|
||||||
|
|
||||||
|
\usepackage{tikz}
|
||||||
|
\usepackage{luatextra}
|
||||||
|
\begin{document}
|
||||||
|
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\begin{luacode}
|
||||||
|
lcsq = require('lcsq')
|
||||||
|
seq2 = "ATCTGAT"
|
||||||
|
seq1 = "TGCATA"
|
||||||
|
matrix = lcsq.lcsq_matrix(seq1, seq2)
|
||||||
|
tikz_code = lcsq.draw_lcsq_matrix_graph(seq1, seq2, matrix)
|
||||||
|
tex.print(tikz_code)
|
||||||
|
\end{luacode}
|
||||||
|
\end{tikzpicture}
|
||||||
|
|
||||||
|
\end{document}
|
|
@ -0,0 +1,8 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
find ./content -type d > folder_list.txt
|
||||||
|
|
||||||
|
mkdir -p build
|
||||||
|
cd build
|
||||||
|
cat ../folder_list.txt | xargs mkdir -p
|
||||||
|
rm ../folder_list.txt
|
7
main.tex
7
main.tex
|
@ -12,7 +12,8 @@
|
||||||
fontsize=10pt,
|
fontsize=10pt,
|
||||||
fleqn,
|
fleqn,
|
||||||
oneside
|
oneside
|
||||||
]{scrbook}
|
]{scrbook}
|
||||||
|
|
||||||
|
|
||||||
\usepackage{mus}
|
\usepackage{mus}
|
||||||
|
|
||||||
|
@ -64,6 +65,7 @@
|
||||||
\definecolor{clementine}{HTML}{dfa000}
|
\definecolor{clementine}{HTML}{dfa000}
|
||||||
\colorlet{primary}{clementine}
|
\colorlet{primary}{clementine}
|
||||||
|
|
||||||
|
% \includeonly{content/chapters/part1/1}
|
||||||
\makeindex%
|
\makeindex%
|
||||||
\makeglossary%
|
\makeglossary%
|
||||||
\begin{document}
|
\begin{document}
|
||||||
|
@ -77,10 +79,7 @@
|
||||||
\newpage
|
\newpage
|
||||||
|
|
||||||
% \input{content ./introduction}
|
% \input{content ./introduction}
|
||||||
|
|
||||||
\input{content/chapters/include}
|
\input{content/chapters/include}
|
||||||
|
|
||||||
|
|
||||||
% \input{content/conclusion}
|
% \input{content/conclusion}
|
||||||
|
|
||||||
\end{document}
|
\end{document}
|
||||||
|
|
48
tmp.tex
48
tmp.tex
|
@ -11,7 +11,55 @@
|
||||||
\input{definitions.tex}
|
\input{definitions.tex}
|
||||||
|
|
||||||
\begin{document}
|
\begin{document}
|
||||||
|
\begin{algorithm}
|
||||||
|
\caption{Backtrack the longest common subsequence}
|
||||||
|
\begin{algorithmic}[1]
|
||||||
|
\Function{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
|
||||||
|
\State $M, P \gets $ \Call{LCSQ\_Matrix}{$S_{1}$, $S_{2}$}
|
||||||
|
\State $L \gets Array(M[n][m])$
|
||||||
|
\State $k \gets 0$
|
||||||
|
\State $i \gets n$
|
||||||
|
\State $j \gets m$
|
||||||
|
\While{$i > 0$ and $j > 0$}
|
||||||
|
\If {$P[i][j] = '\nwarrow' $}
|
||||||
|
\State $L[k] \gets S_{1}[i]$
|
||||||
|
\State $i--$
|
||||||
|
\State $j--$
|
||||||
|
\State $k++$
|
||||||
|
\ElsIf {$P[i][j] = '\leftarrow'$}
|
||||||
|
\State $j--$
|
||||||
|
\Else
|
||||||
|
\State $i--$
|
||||||
|
\EndIf
|
||||||
|
\EndWhile
|
||||||
|
\State \Return $L$
|
||||||
|
\EndFunction
|
||||||
|
\end{algorithmic}
|
||||||
|
\end{algorithm}
|
||||||
|
|
||||||
|
\begin{algorithm}
|
||||||
|
\caption{Recursive reconstruction of the longest common subsequence}
|
||||||
|
\begin{algorithmic}[1]
|
||||||
|
\Procedure{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
|
||||||
|
\State $M, P \gets $ \Call{LCSQ\_Matrix}{$S_{1}$, $S_{2}$}
|
||||||
|
\State $i \gets n$
|
||||||
|
\State $j \gets m$
|
||||||
|
\State \Call{Aux}{$P$, $S_{1}$, $i$, $j$}
|
||||||
|
\EndProcedure
|
||||||
|
|
||||||
|
\Procedure{Aux}{$P$: Array($n+1$, $m+1$), $S_{1}$: Array($n$), $i$, $j$}
|
||||||
|
\If {$P[i][j] = '\nwarrow' $}
|
||||||
|
\State $l \gets S_{1}[i]$
|
||||||
|
\State \Call{Aux}{$P$, $S_{1}$, $i-1$, $j-1$}
|
||||||
|
\State \texttt{print}($l$)
|
||||||
|
\ElsIf {$P[i][j] = '\leftarrow'$}
|
||||||
|
\State \Call{Aux}{$P$, $S_{1}$, $i$, $j-1$}
|
||||||
|
\Else
|
||||||
|
\State \Call{Aux}{$P$, $S_{1}$, $i-1$, $j$}
|
||||||
|
\EndIf
|
||||||
|
\EndProcedure
|
||||||
|
\end{algorithmic}
|
||||||
|
\end{algorithm}
|
||||||
|
|
||||||
\end{document}
|
\end{document}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue