10 changed files with 284 additions and 422 deletions
--- a/.latexmkrc
+++ b/.latexmkrc
@ -1,5 +1,5 @@
 sub createFolderStructure{
-   system("bash ./folder-structure.sh");
+   system("bash ./createFolderStructure.sh");
 }
 createFolderStructure();
--- a/content/chapters/include.tex
+++ b/content/chapters/include.tex
@ -11,7 +11,7 @@
 		}
 }
-\includechapters{part1}{4}
+\includechapters{part1}{3}
-\includechapters{part2}{4}
+\includechapters{part2}{2}
 % \includechapters{part3}{1}
--- a/content/chapters/part1/4.bak0
+++ b/content/chapters/part1/4.bak0
@ -1,116 +0,0 @@
 \chapter{Longest common subsequence}
 Let $S_{1} = \text{ATCTGAT}$ and $S_{2} = \text{TGCATA}$.
 In this case the longest common subsequence of $S_{1}$ and $S_{2}$ is $TCTA$.
 \begin{algorithm}
 	\caption{Construct a longest common subsequence matrix}
 	\begin{algorithmic}[1]
 		\Function{LCSQ\_Matrix}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
 		\State $M \gets $ Array($m+1$, $n+1$)
 		\For{($i = 0$; $i < n+1$; $i++$)}
 		\For{$j = 0$; $j < m+1$; $j++$}
 		\If {$i = 0$ or $j = 0$}
 		\State $M[i][j] = 0$
 		\Else
 		\If {$S_{1}[i] = S_{2}[j]$}
 		\State $match = M[i-1][j-1] + 1$
 		\Else
 		\State $match = M[i-1][j-1]$
 		\EndIf
 		\State $gap_{1} = M[i-1][j]$
 		\State $gap_{2} = M[i][j-1]$
 		\State $M[i][j] = \max \{ match, gap_{1}, gap_{2}\}$
 		\EndIf
 		\EndFor
 		\EndFor
 		\State \Return $M$
 		\EndFunction
 	\end{algorithmic}
      \end{algorithm}
 \begin{algorithm}
 	\caption{Construct a longest common subsequence matrix keeping the path in memory}
 	\begin{algorithmic}[1]
 		\Function{LCSQ\_Matrix\_Path}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
 		\State $M \gets $ Array($m+1$, $n+1$)
 		\State $P \gets $ Array($m+1$, $n+1$)
 		\For {($i = 0$; $i < n+1$, $i++$)}
 		\State $M[i][0] \gets 0$
 		\EndFor
 		\For {($j = 0$; $j < m+1$; $j+$)}
 		\State $M[0][j] \gets 0$
 		\EndFor
 		\For{($i = 1$; $i < n+1$; $i++$)}
 		\For{($j = 1$; $j < m+1$; $j++$)}
 		\If {$i = 1$ or $j = 0$}
 		\State $M[i][j] = 0$
 		\Else
 		\If {$S_{1}[i-1] = S_{2}[j-1]$}
 		\State $M[i][j] \gets M[i-1][j-1] + 1$
 		\State $P[i][j] \gets '\nwarrow'$
 		\ElsIf {$M[i][j-1] \geq M[i-1][j]$}
 		\State $M[i][j] \gets M[i][j-1]$
 		\State $P[i][j] \gets '\leftarrow'$
 		\Else
 		\State $M[i][j] \gets M[i-1][j]$
 		\State $P[i][j] \gets '\downarrow'$
 		\EndIf
 		\EndFor
 		\EndFor
 		\State \Return $M, P$
 		\EndFunction
 	\end{algorithmic}
      \end{algorithm}
 \begin{algorithm}
 	\caption{Backtrack the longest common subsequence}
 	\begin{algorithmic}[1]
 		\Function{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
 		\State $M, P \gets $ \Call{LCSQ\_Matrix}{$S_{1}$, $S_{2}$}
 		\State $L \gets Array(M[n][m])$
 		\State $k \gets 0$
 		\State $i \gets n$
 		\State $j \gets m$
 		\While{$i > 0$ and $j > 0$}
 		\If {$P[i][j] = '\nwarrow' $}
 		\State $L[k] \gets S_{1}[i]$
 		\State $i--$
 		\State $j--$
 		\State $k++$
 		\ElsIf {$P[i][j] = '\leftarrow'$}
 		\State $j--$
 		\Else
 		\State $i--$
 		\EndIf
 		\EndWhile
 		\State \Return $L$
 		\EndFunction
 	\end{algorithmic}
 \end{algorithm}
 \iffalse
 \begin{algorithm}
 	\caption{Recursive reconstruction of the longest common subsequence}
 	\begin{algorithmic}[1]
 		\Procedure{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
 		\State $M, P \gets $ \Call{LCSQ\_Matrix}{$S_{1}$, $S_{2}$}
 		\State $i \gets n$
 		\State $j \gets m$
 		\State \Call{Aux}{$P$, $S_{1}$, $i$, $j$}
 		\EndProcedure
 		\Procedure{Aux}{$P$: Array($n+1$, $m+1$), $S_{1}$: Array($n$), $i$, $j$}
 		\If {$P[i][j] = '\nwarrow' $}
 		\State $l \gets S_{1}[i]$
 		\State \Call{Aux}{$P$, $S_{1}$, $i-1$, $j-1$}
 		\State \texttt{print}($l$)
 		\ElsIf {$P[i][j] = '\leftarrow'$}
 		\State \Call{Aux}{$P$, $S_{1}$, $i$, $j-1$}
 		\Else
 		\State \Call{Aux}{$P$, $S_{1}$, $i-1$, $j$}
 		\EndIf
 		\EndProcedure
 	\end{algorithmic}
 \end{algorithm}
 \fi
--- a/content/chapters/part1/4.tex
+++ b/content/chapters/part1/4.tex
@ -55,7 +55,6 @@ In this case the longest common subsequence of $S_{1}$ and $S_{2}$ is $TCTA$.
 		\State $M[i][j] \gets M[i-1][j]$
 		\State $P[i][j] \gets '\downarrow'$
 		\EndIf
 	\EndIf
 		\EndFor
 		\EndFor
 		\State \Return $M, P$
@ -89,7 +88,6 @@ In this case the longest common subsequence of $S_{1}$ and $S_{2}$ is $TCTA$.
 	\end{algorithmic}
 \end{algorithm}
 \iffalse
 \begin{algorithm}
 	\caption{Recursive reconstruction of the longest common subsequence}
 	\begin{algorithmic}[1]
@ -113,4 +111,3 @@ In this case the longest common subsequence of $S_{1}$ and $S_{2}$ is $TCTA$.
 		\EndProcedure
 	\end{algorithmic}
 \end{algorithm}
 \fi
--- a/content/chapters/part2/0.tex
+++ b/content/chapters/part2/0.tex
@ -22,16 +22,3 @@ Example:
 \item $del(a) = 1$
 \item $ins(a) = 1$
 \end{itemize}
 Let $X = x_{0} x_{1} \ldots x_{m-1}$, $Y = y_{0} y_{1} \ldots y_{n-1} $
 An alignment is noted as $z = \begin{pmatrix} \bar{x}_{0} \\ \bar{y}_{0} \end{pmatrix} \ldots \begin{pmatrix} \bar{x}_{p-1} \\ \bar{y}_{p-1} \end{pmatrix}$ of size $p$. $n \leq p \leq n + m$
 $\bar{x}_{i} = x_{j}$ or $\bar{x}_{i} = \varepsilon$ for $0 \leq i \leq p-1$ and $0 \leq j \leq m - 1$
 $\bar{y}_{i} = y_{j}$ or $\bar{y}_{i} = \varepsilon$ for $0 \leq i \leq p-1$ and $0 \leq j \leq n - 1$
 $X' = \bar{x}_{0} \bar{x}_{1} \ldots \bar{x}_{i} \ldots \bar{x}_{p-1}$
 $Y' = \bar{y}_{0} \bar{y}_{1} \ldots \bar{y}_{i} \ldots \bar{y}_{p-1}$
 for $0 \leq i \leq p-1$, $\nexists i$, such that $\bar{x}_{i} = \bar{y}_{i} = \varepsilon$
--- a/content/chapters/part2/1.tex
+++ b/content/chapters/part2/1.tex
@ -1,145 +0,0 @@
 \chapter{Section alignment}
 \section{Needleman - Wunsch algorithm}
 \begin{algorithm}
 	\caption{Needleman-Wunsch Algorithm}
 	\begin{algorithmic}[1]
 		\Procedure{FillMatrix}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)}
 		\Comment{$sub(a, b)$ is the substitution score, $del(a)$ and $ins(a)$ are the deletion and insertion penalty, in regard with the reference $S_{1}$ sequence}
 		\State $M = $ Array($m+1$, $n+1$)
 		\Comment{Initialize the matrix first column and first row}
 		\State $P = $ Array($m$, $n$) \Comment{Store the direction of the cell we chose to build the next cell up on.}
 		\State $M[0][0] = 0$
 		\For {($i = 1$; $i < m+1$; $i++$)}
 		\State $M[i][0] = M[i-1][0] + gap\_penalty$
 		\EndFor
 		\For {($j = 1$; $j < n+1$; $j++$)}
 		\State $M[0][j] = M[0][j-1] + gap\_penalty$
 		\EndFor
 		\Comment{Fill the remaining matrix}
 		\For {($i = 1$; $i < m+1$; $i++$)}
 		\For {($j = 1$; $j < n+1$; $j++$)}
 		\State $delete = M[i-1][j] + gap\_penalty$
 		\State $insert = M[i][j-1] + gap\_penalty$
 		\State $substitute = M[i-1][j-1] + sub(S_{1}[i-1], S_{2}[j-1])$
 		\State $choice = \min \{delete, insert, substitute\}$
 		\If {$substitute = choice$}
 		\State $P[i-1][j-1] = '\nwarrow'$
 		\ElsIf {$deletion = choice$}
 		\State $P[i-1][j-1] = '\leftarrow'$
 		\Else
 		\State $P[i-1][j-1] = '\uparrow'$
 		\EndIf
 		\State $M[i][j] = choice$
 		\EndFor
 		\EndFor
 		\EndProcedure
 	\end{algorithmic}
 \end{algorithm}
 \begin{algorithm}
 	\caption{Needleman-Wunsch Algorithm (Backtrack)}
 	\begin{algorithmic}[1]
 		\Procedure{ShowAlignment}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)}
 		\State $extend_{1} = ''$
 		\State $extend_{2} = ''$
 		\State $i = m$
 		\State $j = n$
 		\While{$i > 0$ and $j > 0$}
 		\If {$P[i-1][j-1] = '\nwarrow'$}
 		\State $extend_{1} = S_{1}[i-1] \circ extend_{1}$
 		\State $extend_{2} = S_{2}[j-1] \circ extend_{2}$
 		\State $i--$
 		\State $j--$
 		\ElsIf {$P[i-1][j-1] = '\uparrow'$}
 		\State $extend_{1} = S_{1}[i-1] \circ extend_{1}$
 		\State $extend_{2} = '-' \circ extend_{2}$
 		\State $i--$
 		\Else
 		\State $extend_{1} = '-' \circ extend_{1}$
 		\State $extend_{2} = S_{2}[j-1] \circ extend_{2}$
 		\State $j--$
 		\EndIf
 		\EndWhile
 		\While{$i > 0$}
 		\State $extend_{1} = S_{1}[i-1] \circ extend_{1}$
 		\State $extend_{2} = '-' \circ extend_{2}$
 		\State $i--$
 		\State \Call{Insert}{0, $alignment$,$tuple$}
 		\EndWhile
 		\While{$j > 0$}
 		\State $extend_{1} = '-' \circ extend_{1}$
 		\State $extend_{2} = S_{2}[j-1] \circ extend_{2}$
 		\State $j--$
 		\EndWhile
 		\State \Call{print}{$extend_{1}$}
 		\State \Call{print}{$extend_{2}$}
 		\EndProcedure
 		\State \Call{FillMatrix}{$S_{1}$, $S_{2}$}
 		\State \Call{ShowAlignment}{$S_{1}$, $S_{2}$}
 	\end{algorithmic}
 \end{algorithm}
 \begin{algorithm}
 	\caption{Needleman-Wunsch Algorithm (Backtrack) }
 	\begin{algorithmic}[1]
 		\Procedure{FillMatrix}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)}
 		\State $M = $ Array($m+1$, $n+1$)
 		\State $P = $ Array($m$, $n$)
 		\Comment{Store the direction of the cell we chose to build the next cell up on.}
 		\State $M[0][0] = 0$
 		\For {($i = 1$; $i < m+1$; $i++$)}
 		\State $M[i][0] = M[i-1][0] + gap\_penalty$
 		\EndFor
 		\For {($j = 1$; $j < n+1$; $j++$)}
 		\State $M[0][j] = M[0][j-1] + gap\_penalty$
 		\EndFor
 		\For {($i = 1$; $i < m+1$; $i++$)}
 		\For {($j = 1$; $j < n+1$; $j++$)}
 		\State $delete = M[i-1][j] + gap\_penalty$
 		\State $insert = M[i][j-1] + gap\_penalty$
 		\State $substitute = M[i-1][j-1] + sub(S_{1}[i-1], S_{2}[j-1])$
 		\State $M[i][j] = \min \{substitute, insert, delete\}$
 		\EndFor
 		\EndFor
 		\EndProcedure
 	\end{algorithmic}
 \end{algorithm}
 \begin{algorithm}
 	\caption{Needleman-Wunsch Algorithm, using proper notation (Backtrack)}
 	\begin{algorithmic}[1]
 		\Procedure{BacktrackAlignment}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)}
 		\State $alignment = LinkedList$
 		\State $i = m$
 		\State $j = n$
 		\While{$i > 0$ and $j > 0$}
 		\If {$M[i-1][j-1] = M[i][j] - sub(S_{1}[i-1], S_{2}[j-1])$}
 		\State $tuple = \begin{pmatrix} S_{1}[i-1] \\ S_{2}[j-1] \end{pmatrix}$
 		\State $i--$
 		\State $j--$
 		\ElsIf {$M[i-1][j-1] = M[i][j-1] - gap\_penalty$}
 		\State $tuple = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix}$
 		\State $i--$
 		\Else
 		\State $tuple = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$
 		\State $j--$
 		\EndIf
 		\State \Call{Insert}{0, $alignment$,$tuple$}
 		\EndWhile
 		\While{$i > 0$}
 		\State $tuple = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix}$
 		\State $i--$
 		\State \Call{Insert}{0, $alignment$,$tuple$}
 		\EndWhile
 		\While{$j > 0$}
 		\State $tuple = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$
 		\State $j--$
 		\State \Call{Insert}{0, $alignment$,$tuple$}
 		\EndWhile
 		\EndProcedure
 		\State \Call{FillMatrix}{$S_{1}$, $S_{2}$}
 		\State \Call{BacktrackAlignment}{$S_{1}$, $S_{2}$}
 	\end{algorithmic}
 \end{algorithm}
--- a/figures/part2/*
+++ b/figures/part2/*
@ -0,0 +1,176 @@
 function lcsq_matrix(seq1, seq2)
    local gap_penalty = 0
    local match_score = 1
    local n1 = string.len(seq1)
    local n2 = string.len(seq2)
    -- Create a n1 x n2 matrix
    local matrix = {}
    for i=0,n1 do
        matrix[i] = {}
        for j=0,n2 do
            matrix[i][j] = 0
        end
    end
    -- Fill the rest of the matrix
    local match, delete, insert
    for i=1,n1 do
        for j=1,n2 do
            if string.sub(seq1, i, i) == string.sub(seq2, j, j) then
                match = matrix[i-1][j-1] + match_score
            else
                match = matrix[i-1][j-1]
            end
            gap1 = matrix[i-1][j] + gap_penalty
            gap2 = matrix[i][j-1] + gap_penalty
            matrix[i][j] = math.max(match, gap1, gap2)
        end
    end
    return matrix
 end
 local function has_value (tab, val)
    for index, value in ipairs(tab) do
        if value == val then
            return true
        end
    end
    return false
 end
 function repr_matrix(matrix)
  repr = ""
  for i=1,#matrix do
    for j=1,#matrix do
      repr = repr .. matrix[i][j] .. " "
    end
    repr = repr .. "\n"
  end
  return repr
 end
 function draw_lcsq_matrix_graph(seq1, seq2)
    local matrix = lcsq_matrix(seq1, seq2)
    local tikz_code = ""
    function coordinate(i, j)
        return i .. "_" .. j
    end
    local steps = {
        {-1, 0},
        {-1, -1},
        {0, -1}
    }
    local n1 = string.len(seq1)
    local n2 = string.len(seq2)
    local path = {}
    local i = n1
    local j = n2
    while i >= 0 and j >= 0 do
        path[#path+1] = coordinate(i, j)
        local min = matrix[i][j]
        local min_step = steps[1]
        for index, step in ipairs(steps) do
            local k = i + step[1]
            local l = j + step[2]
            if k >= 0 and l >= 0 and matrix[k][l] <= min then
                min_step = step
                min = matrix[k][l]
            end
        end
        i = i + min_step[1]
        j = j + min_step[2]
        print(i, j)
    end
    -- Draw the matrix as tikz node with matrix value
    for i=0,n1 do
        for j=0,n2 do
            local options = ""
            if has_value(path, coordinate(i, j)) then
                options = "[fill=gray, draw, minimum size=1]"
            end
            tikz_code = tikz_code .. "\\node" .. options .. " (" .. coordinate(i, j) .. ") at (" .. i .. ", " .. -j .. ")" .. " {" .. matrix[i][j] .. "};"
        end
    end
    -- Add nucleotide labels
    for i=1,n1 do
        local nt = string.sub(seq1, i, i)
        tikz_code = tikz_code .. "\\node at (".. i .. "," .. 1 .. ")" .. "{$" .. nt .."$};"
    end
    for i=1,n2 do
        local nt = string.sub(seq2, i, i)
        tikz_code = tikz_code .. "\\node at (" .. -1 .. ", " .. -i .. ")" .. "{$ ".. nt .."$};"
    end
    -- For seq2
       for i=0,n1 do
        for j=0,n2 do
            local min = math.huge
            for index, step in ipairs(steps) do
                local k = i + step[1]
                local l = j + step[2]
                if k >= 0 and l >= 0 and matrix[k][l] < min then
                    min = matrix[k][l]
                end
            end
            local highlighted = false
            for index, step in ipairs(steps) do
                local k = i + step[1]
                local l = j + step[2]
                if k >= 0 and l >= 0 and matrix[k][l] == min then
                    tikz_code = tikz_code .. "\\draw[->] (" .. coordinate(i, j) .. ")" .. " -- " .. "(" .. coordinate (k, l) .. ");"
                end
            end
        end
    end
    return tikz_code
 end
 function draw_lcsq_matrix(seq1, seq2)
    -- print(string.format(" Path: %s -> %s", seq1, seq2))
    local matrix = lcsq_matrix(seq1, seq2)
    local n1 = string.len(seq1)
    local n2 = string.len(seq2)
    -- Draw the matrix as tikz nodes
    for i=0,n1-1 do
        for j=0,n2-1 do
            print(string.format("\\node[draw, minimum width=1cm, minimum height=1cm] at (%d, -%d) {};", i, j, matrix[i][j]))
        end
    end
    -- Draw the sequence labels
    for i=1,n1 do
        print(string.format("\\node at (%d, -%d) {%s};", i-1, -1, string.sub(seq1, i, i)))
    end
    for i=1,n2 do
        print(string.format("\\node at (%d, -%d) {%s};", -1, i-1, string.sub(seq2, i, i)))
    end
    -- Add a path from the bottom right corner to the top left corner, following the minimum of the three possible moves at each step
    local i, j, value, previous_value
    i = n1-1
    j = n2-1
    print(string.format("\\draw[-,line width=2, gray] (%d, -%d) --", i, j))
    while i > 0 and j > 0 do
        value = math.min(matrix[i-1][j-1], table[i-1][j], table[i][j-1])
        if value == matrix[i-1][j-1] then
            i = i - 1
            j = j - 1
        elseif value == matrix[i-1][j] then
            i = i - 1
        else
            j = j - 1
        end
        print(string.format(" (%d, -%d) -- ", i, j))
    end
    print(string.format("(0, 0) -- (-1, 1);", i, j))
 end
 function main()
  local seq1 = "ATCTGAT"
  local seq2 = "TGCATA"
  local matrix = lcsq_matrix(seq1, seq2)
  print(repr_matrix(matrix))
 end
 main()
--- a/main.pdf
+++ b/main.pdf
--- a/tmp.pdf
+++ b/tmp.pdf
--- a/tmp.tex
+++ b/tmp.tex
@ -11,43 +11,6 @@
 \input{definitions.tex}
 \begin{document}
 \begin{algorithm}
    \caption{Construct a longest common subsequence matrix keeping the path in memory}
    \begin{algorithmic}[1]
        \Function{LCSQ\_Matrix\_Path}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
        \State $M \gets $ Array($m+1$, $n+1$)
        \State $P \gets $ Array($m+1$, $n+1$)
        \For {($i = 0$; $i < n+1$, $i++$)}
            \State $M[i][0] \gets 0$
        \EndFor
        \For {($j = 0$; $j < m+1$; $j+$)}
            \State $M[0][j] \gets 0$
        \EndFor
        \For{($i = 1$; $i < n+1$; $i++$)}
        \For{($j = 1$; $j < m+1$; $j++$)}
        \If {$i = 1$ or $j = 0$}
        \State $M[i][j] = 0$
        \Else
        \If {$S_{1}[i-1] = S_{2}[j-1]$}
            \State $M[i][j] \gets M[i-1][j-1] + 1$
            \State $P[i][j] \gets '\nwarrow'$
        \ElsIf {$M[i][j-1] \geq M[i-1][j]$}
            \State $M[i][j] \gets M[i][j-1]$
            \State $P[i][j] \gets '\leftarrow'$
            \Else
            \State $M[i][j] \gets M[i-1][j]$
            \State $P[i][j] \gets '\downarrow'$
        \EndIf
 \EndIf
        \EndFor
        \EndFor
        \State \Return $M, P$
        \EndFunction
    \end{algorithmic}
 \end{algorithm}
 \iffalse
 	\begin{algorithm}
 		\caption{Backtrack the longest common subsequence}
 		\begin{algorithmic}[1]
@ -97,7 +60,7 @@
 			\EndProcedure
 		\end{algorithmic}
 	\end{algorithm}
-\fi
+
 \end{document}
 \end{document}