diff --git a/.latexmkrc b/.latexmkrc index 9263c76..6c6740f 100644 --- a/.latexmkrc +++ b/.latexmkrc @@ -1,5 +1,5 @@ sub createFolderStructure{ - system("bash ./createFolderStructure.sh"); + system("bash ./folder-structure.sh"); } createFolderStructure(); diff --git a/content/chapters/part2/0.tex b/content/chapters/part2/0.tex index abdcf13..7a7255c 100644 --- a/content/chapters/part2/0.tex +++ b/content/chapters/part2/0.tex @@ -18,7 +18,20 @@ Here we are interested by the distance that is able to represent the transformat Example: \begin{itemize} -\item $sub(a, b) = \begin{cases} 0 & \text{if} a = b \\ 1 &\text{otherwise} \end{cases}$. +\item $sub(a, b) = \begin{cases} 0 & \text{if} a = b \\ 1 &\text{otherwise} \end{cases}$. \item $del(a) = 1$ \item $ins(a) = 1$ \end{itemize} + +Let $X = x_{0} x_{1} \ldots x_{m-1}$, $Y = y_{0} y_{1} \ldots y_{n-1} $ + +An alignment is noted as $z = \begin{pmatrix} \bar{x}_{0} \\ \bar{y}_{0} \end{pmatrix} \ldots \begin{pmatrix} \bar{x}_{p-1} \\ \bar{y}_{p-1} \end{pmatrix}$ of size $p$. $n \leq p \leq n + m$ + + +$\bar{x}_{i} = x_{j}$ or $\bar{x}_{i} = \varepsilon$ for $0 \leq i \leq p-1$ and $0 \leq j \leq m - 1$ + +$\bar{y}_{i} = y_{j}$ or $\bar{y}_{i} = \varepsilon$ for $0 \leq i \leq p-1$ and $0 \leq j \leq n - 1$ + +$X' = \bar{x}_{0} \bar{x}_{1} \ldots \bar{x}_{i} \ldots \bar{x}_{p-1}$ +$Y' = \bar{y}_{0} \bar{y}_{1} \ldots \bar{y}_{i} \ldots \bar{y}_{p-1}$ +for $0 \leq i \leq p-1$, $\nexists i$, such that $\bar{x}_{i} = \bar{y}_{i} = \varepsilon$ diff --git a/content/chapters/part2/1.tex b/content/chapters/part2/1.tex new file mode 100644 index 0000000..8dc578d --- /dev/null +++ b/content/chapters/part2/1.tex @@ -0,0 +1,64 @@ +\chapter{Section alignment} + +\section{Needleman - Wunsch algorithm} + +\begin{algorithm} + \caption{Needleman-Wunsch Algorithm} + \begin{algorithmic}[1] + \Procedure{FillMatrix}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)} + \Comment{$sub(a, b)$ is the substitution score, $del(a)$ and $ins(a)$ are the deletion and insertion penalty, in regard with the reference $S_{1}$ sequence} + \State $M = $ Array($m+1$, $n+1$) + \Comment{Initialize the matrix first column and first row} + \State $P = $ Array($m$, $n$) \Comment{Store the direction of the cell we chose to build the next cell up on.} + \For {($i = 0$; $i < m+1$; $i++$)} + \State $M[i][0] = i * del(S_{1}[i])$ + \EndFor + \For {($j = 0$; $j < n+1$; $j++$)} + \State $M[0][j] = j * ins(S_{2}[j])$ + \EndFor + \Comment{Fill the remaining matrix} + \For {($i = 1$; $i < m+1$; $i++$)} + \For {($j = 1$; $j < n+1$; $j++$)} + \State $delete = M[i-1][j] + del(S_{1}[i-1])$ + \State $insert = M[i][j-1] + ins(S_{2}[j-1])$ + \State $substitute = M[i-1][j-1] + sub(S_{1}[i-1], S_{2}[j-1])$ + \State $choice = \max \{delete, insert, substitute\}$ + \If {$substitute = choice$} + \State $P[i-1][j-1] = '\nwarrow'$ + \ElsIf {$insertion = choice$} + \State $P[i-1][j-1] = '\uparrow'$ + \Else + \State $P[i-1][j-1] = '\leftarrow'$ + \EndIf + \State $M[i][j] = choice$ + \EndFor + \EndFor + \EndProcedure + \Procedure{ShowAlignment}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)} + \State $extend_{1} = ''$ + \State $extend_{2} = ''$ + \State $i = m$ + \State $j = n$ + \While{$i > 0$ and $j > 0$} + \If {$P[i-1][j-1] = '\nwarrow'$} + \State $extend_{1} = S_{1}[i-1] \circ extend_{1}$ + \State $extend_{2} = S_{2}[j-1] \circ extend_{2}$ + \State $i--$ + \State $j--$ + \ElsIf {$P[i-1][j-1] = '\uparrow'$} + \State $extend_{1} = S_{1}[i-1] \circ extend_{1}$ + \State $extend_{2} =\quad '-' \circ extend_{2}$ + \State $i--$ + \Else + \State $extend_{1} =\quad '-' \circ extend_{1}$ + \State $extend_{2} = S_{2}[j-1] \circ extend_{2}$ + \State $j--$ + \EndIf + \EndWhile + \State \Call{print}{$extend_{1}$} + \State \Call{print}{$extend_{2}$} + \EndProcedure + \State \Call{FillMatrix}{$S_{1}$, $S_{2}$} + \State \Call ShowAlignment($S_{1}$, $S_{2}$) + \end{algorithmic} +\end{algorithm} diff --git a/figures/part2/* b/figures/part2/* deleted file mode 100644 index 07994eb..0000000 --- a/figures/part2/* +++ /dev/null @@ -1,176 +0,0 @@ -function lcsq_matrix(seq1, seq2) - local gap_penalty = 0 - local match_score = 1 - local n1 = string.len(seq1) - local n2 = string.len(seq2) - -- Create a n1 x n2 matrix - local matrix = {} - for i=0,n1 do - matrix[i] = {} - for j=0,n2 do - matrix[i][j] = 0 - end - end - -- Fill the rest of the matrix - local match, delete, insert - for i=1,n1 do - for j=1,n2 do - if string.sub(seq1, i, i) == string.sub(seq2, j, j) then - match = matrix[i-1][j-1] + match_score - else - match = matrix[i-1][j-1] - end - gap1 = matrix[i-1][j] + gap_penalty - gap2 = matrix[i][j-1] + gap_penalty - matrix[i][j] = math.max(match, gap1, gap2) - end - end - return matrix -end - -local function has_value (tab, val) - for index, value in ipairs(tab) do - if value == val then - return true - end - end - - return false -end - -function repr_matrix(matrix) - repr = "" - for i=1,#matrix do - for j=1,#matrix do - repr = repr .. matrix[i][j] .. " " - end - repr = repr .. "\n" - end - return repr -end - - -function draw_lcsq_matrix_graph(seq1, seq2) - local matrix = lcsq_matrix(seq1, seq2) - local tikz_code = "" - function coordinate(i, j) - return i .. "_" .. j - end - local steps = { - {-1, 0}, - {-1, -1}, - {0, -1} - } - - local n1 = string.len(seq1) - local n2 = string.len(seq2) - local path = {} - local i = n1 - local j = n2 - while i >= 0 and j >= 0 do - path[#path+1] = coordinate(i, j) - local min = matrix[i][j] - local min_step = steps[1] - for index, step in ipairs(steps) do - local k = i + step[1] - local l = j + step[2] - if k >= 0 and l >= 0 and matrix[k][l] <= min then - min_step = step - min = matrix[k][l] - end - end - i = i + min_step[1] - j = j + min_step[2] - print(i, j) - end - -- Draw the matrix as tikz node with matrix value - for i=0,n1 do - for j=0,n2 do - local options = "" - if has_value(path, coordinate(i, j)) then - - options = "[fill=gray, draw, minimum size=1]" - end - tikz_code = tikz_code .. "\\node" .. options .. " (" .. coordinate(i, j) .. ") at (" .. i .. ", " .. -j .. ")" .. " {" .. matrix[i][j] .. "};" - end - end - -- Add nucleotide labels - for i=1,n1 do - local nt = string.sub(seq1, i, i) - tikz_code = tikz_code .. "\\node at (".. i .. "," .. 1 .. ")" .. "{$" .. nt .."$};" - end - for i=1,n2 do - local nt = string.sub(seq2, i, i) - tikz_code = tikz_code .. "\\node at (" .. -1 .. ", " .. -i .. ")" .. "{$ ".. nt .."$};" - end - -- For seq2 - for i=0,n1 do - for j=0,n2 do - local min = math.huge - for index, step in ipairs(steps) do - local k = i + step[1] - local l = j + step[2] - if k >= 0 and l >= 0 and matrix[k][l] < min then - min = matrix[k][l] - end - end - local highlighted = false - for index, step in ipairs(steps) do - local k = i + step[1] - local l = j + step[2] - if k >= 0 and l >= 0 and matrix[k][l] == min then - tikz_code = tikz_code .. "\\draw[->] (" .. coordinate(i, j) .. ")" .. " -- " .. "(" .. coordinate (k, l) .. ");" - end - end - end - end - return tikz_code -end - -function draw_lcsq_matrix(seq1, seq2) - -- print(string.format(" Path: %s -> %s", seq1, seq2)) - local matrix = lcsq_matrix(seq1, seq2) - local n1 = string.len(seq1) - local n2 = string.len(seq2) - -- Draw the matrix as tikz nodes - for i=0,n1-1 do - for j=0,n2-1 do - print(string.format("\\node[draw, minimum width=1cm, minimum height=1cm] at (%d, -%d) {};", i, j, matrix[i][j])) - end - end - -- Draw the sequence labels - for i=1,n1 do - print(string.format("\\node at (%d, -%d) {%s};", i-1, -1, string.sub(seq1, i, i))) - end - for i=1,n2 do - print(string.format("\\node at (%d, -%d) {%s};", -1, i-1, string.sub(seq2, i, i))) - end - -- Add a path from the bottom right corner to the top left corner, following the minimum of the three possible moves at each step - local i, j, value, previous_value - i = n1-1 - j = n2-1 - print(string.format("\\draw[-,line width=2, gray] (%d, -%d) --", i, j)) - while i > 0 and j > 0 do - value = math.min(matrix[i-1][j-1], table[i-1][j], table[i][j-1]) - if value == matrix[i-1][j-1] then - i = i - 1 - j = j - 1 - elseif value == matrix[i-1][j] then - i = i - 1 - else - j = j - 1 - end - print(string.format(" (%d, -%d) -- ", i, j)) - end - print(string.format("(0, 0) -- (-1, 1);", i, j)) -end - -function main() - local seq1 = "ATCTGAT" - local seq2 = "TGCATA" - - local matrix = lcsq_matrix(seq1, seq2) - print(repr_matrix(matrix)) -end - -main() diff --git a/main.pdf b/main.pdf index e2afaaf..ded9dcd 100644 --- a/main.pdf +++ b/main.pdf @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:73b2634cfd937c1997c40889d185922ba2608c8cc2c8d4bb59afecf1ed588692 -size 295634 +oid sha256:1f067fe2e3ed44151fc9477d9697bcf01e9d6c8fce05cc3a18c585382f4e54ae +size 313915