Add base needleman

2024-03-25 10:38:20 +01:00 · 2024-03-25 10:38:20 +01:00 · e945027027
parent 18203b1e49
commit e945027027
5 changed files with 81 additions and 180 deletions
--- a/.latexmkrc
+++ b/.latexmkrc
@ -1,5 +1,5 @@
 sub createFolderStructure{
-   system("bash ./createFolderStructure.sh");
+   system("bash ./folder-structure.sh");
 }

 createFolderStructure();
--- a/content/chapters/part2/0.tex
+++ b/content/chapters/part2/0.tex
@ -22,3 +22,16 @@ Example:
 \item $del(a) = 1$
 \item $ins(a) = 1$
 \end{itemize}
+
+Let $X = x_{0} x_{1} \ldots x_{m-1}$, $Y = y_{0} y_{1} \ldots y_{n-1} $
+
+An alignment is noted as $z = \begin{pmatrix} \bar{x}_{0} \\ \bar{y}_{0} \end{pmatrix} \ldots \begin{pmatrix} \bar{x}_{p-1} \\ \bar{y}_{p-1} \end{pmatrix}$ of size $p$. $n \leq p \leq n + m$
+
+
+$\bar{x}_{i} = x_{j}$ or $\bar{x}_{i} = \varepsilon$ for $0 \leq i \leq p-1$ and $0 \leq j \leq m - 1$
+
+$\bar{y}_{i} = y_{j}$ or $\bar{y}_{i} = \varepsilon$ for $0 \leq i \leq p-1$ and $0 \leq j \leq n - 1$
+
+$X' = \bar{x}_{0} \bar{x}_{1} \ldots \bar{x}_{i} \ldots \bar{x}_{p-1}$
+$Y' = \bar{y}_{0} \bar{y}_{1} \ldots \bar{y}_{i} \ldots \bar{y}_{p-1}$
+for $0 \leq i \leq p-1$, $\nexists i$, such that $\bar{x}_{i} = \bar{y}_{i} = \varepsilon$
--- a/content/chapters/part2/1.tex
+++ b/content/chapters/part2/1.tex
@ -0,0 +1,64 @@
+\chapter{Section alignment}
+
+\section{Needleman - Wunsch algorithm}
+
+\begin{algorithm}
+	\caption{Needleman-Wunsch Algorithm}
+	\begin{algorithmic}[1]
+		\Procedure{FillMatrix}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)}
+		\Comment{$sub(a, b)$ is the substitution score, $del(a)$ and $ins(a)$ are the deletion and insertion penalty, in regard with the reference $S_{1}$ sequence}
+		\State $M = $ Array($m+1$, $n+1$)
+		\Comment{Initialize the matrix first column and first row}
+		\State $P = $ Array($m$, $n$) \Comment{Store the direction of the cell we chose to build the next cell up on.}
+		\For {($i = 0$; $i < m+1$; $i++$)}
+		\State $M[i][0] = i * del(S_{1}[i])$
+		\EndFor
+		\For {($j = 0$; $j < n+1$; $j++$)}
+		\State $M[0][j] = j * ins(S_{2}[j])$
+		\EndFor
+		\Comment{Fill the remaining matrix}
+		\For {($i = 1$; $i < m+1$; $i++$)}
+		\For {($j = 1$; $j < n+1$; $j++$)}
+		\State $delete = M[i-1][j] + del(S_{1}[i-1])$
+		\State $insert = M[i][j-1] + ins(S_{2}[j-1])$
+		\State $substitute = M[i-1][j-1] + sub(S_{1}[i-1], S_{2}[j-1])$
+		\State $choice = \max \{delete, insert, substitute\}$
+		\If {$substitute = choice$}
+		\State $P[i-1][j-1] = '\nwarrow'$
+		\ElsIf {$insertion = choice$}
+		\State $P[i-1][j-1] = '\uparrow'$
+		\Else
+		\State $P[i-1][j-1] = '\leftarrow'$
+		\EndIf
+		\State $M[i][j] = choice$
+		\EndFor
+		\EndFor
+		\EndProcedure
+		\Procedure{ShowAlignment}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)}
+		\State $extend_{1} = ''$
+		\State $extend_{2} = ''$
+		\State $i = m$
+		\State $j = n$
+		\While{$i > 0$ and $j > 0$}
+		\If {$P[i-1][j-1] = '\nwarrow'$}
+		\State $extend_{1} = S_{1}[i-1] \circ extend_{1}$
+		\State $extend_{2} = S_{2}[j-1] \circ extend_{2}$
+		\State $i--$
+		\State $j--$
+		\ElsIf {$P[i-1][j-1] = '\uparrow'$}
+		\State $extend_{1} = S_{1}[i-1] \circ extend_{1}$
+		\State $extend_{2} =\quad '-' \circ extend_{2}$
+		\State $i--$
+		\Else
+		\State $extend_{1} =\quad '-' \circ extend_{1}$
+		\State $extend_{2} = S_{2}[j-1] \circ extend_{2}$
+		\State $j--$
+		\EndIf
+		\EndWhile
+		\State \Call{print}{$extend_{1}$}
+		\State \Call{print}{$extend_{2}$}
+		\EndProcedure
+		\State \Call{FillMatrix}{$S_{1}$, $S_{2}$}
+		\State \Call ShowAlignment($S_{1}$, $S_{2}$)
+	\end{algorithmic}
+\end{algorithm}
--- a/figures/part2/*
+++ b/figures/part2/*
@ -1,176 +0,0 @@
-function lcsq_matrix(seq1, seq2)
-    local gap_penalty = 0
-    local match_score = 1
-    local n1 = string.len(seq1)
-    local n2 = string.len(seq2)
-    -- Create a n1 x n2 matrix
-    local matrix = {}
-    for i=0,n1 do
-        matrix[i] = {}
-        for j=0,n2 do
-            matrix[i][j] = 0
-        end
-    end
-    -- Fill the rest of the matrix
-    local match, delete, insert
-    for i=1,n1 do
-        for j=1,n2 do
-            if string.sub(seq1, i, i) == string.sub(seq2, j, j) then
-                match = matrix[i-1][j-1] + match_score
-            else
-                match = matrix[i-1][j-1]
-            end
-            gap1 = matrix[i-1][j] + gap_penalty
-            gap2 = matrix[i][j-1] + gap_penalty
-            matrix[i][j] = math.max(match, gap1, gap2)
-        end
-    end
-    return matrix
-end
-
-local function has_value (tab, val)
-    for index, value in ipairs(tab) do
-        if value == val then
-            return true
-        end
-    end
-
-    return false
-end
-
-function repr_matrix(matrix)
-  repr = ""
-  for i=1,#matrix do
-    for j=1,#matrix do
-      repr = repr .. matrix[i][j] .. " "
-    end
-    repr = repr .. "\n"
-  end
-  return repr
-end
-
-
-function draw_lcsq_matrix_graph(seq1, seq2)
-    local matrix = lcsq_matrix(seq1, seq2)
-    local tikz_code = ""
-    function coordinate(i, j)
-        return i .. "_" .. j
-    end
-    local steps = {
-        {-1, 0},
-        {-1, -1},
-        {0, -1}
-    }
-
-    local n1 = string.len(seq1)
-    local n2 = string.len(seq2)
-    local path = {}
-    local i = n1
-    local j = n2
-    while i >= 0 and j >= 0 do
-        path[#path+1] = coordinate(i, j)
-        local min = matrix[i][j]
-        local min_step = steps[1]
-        for index, step in ipairs(steps) do
-            local k = i + step[1]
-            local l = j + step[2]
-            if k >= 0 and l >= 0 and matrix[k][l] <= min then
-                min_step = step
-                min = matrix[k][l]
-            end
-        end
-        i = i + min_step[1]
-        j = j + min_step[2]
-        print(i, j)
-    end
-    -- Draw the matrix as tikz node with matrix value
-    for i=0,n1 do
-        for j=0,n2 do
-            local options = ""
-            if has_value(path, coordinate(i, j)) then
-
-                options = "[fill=gray, draw, minimum size=1]"
-            end
-            tikz_code = tikz_code .. "\\node" .. options .. " (" .. coordinate(i, j) .. ") at (" .. i .. ", " .. -j .. ")" .. " {" .. matrix[i][j] .. "};"
-        end
-    end
-    -- Add nucleotide labels
-    for i=1,n1 do
-        local nt = string.sub(seq1, i, i)
-        tikz_code = tikz_code .. "\\node at (".. i .. "," .. 1 .. ")" .. "{$" .. nt .."$};"
-    end
-    for i=1,n2 do
-        local nt = string.sub(seq2, i, i)
-        tikz_code = tikz_code .. "\\node at (" .. -1 .. ", " .. -i .. ")" .. "{$ ".. nt .."$};"
-    end
-    -- For seq2
-       for i=0,n1 do
-        for j=0,n2 do
-            local min = math.huge
-            for index, step in ipairs(steps) do
-                local k = i + step[1]
-                local l = j + step[2]
-                if k >= 0 and l >= 0 and matrix[k][l] < min then
-                    min = matrix[k][l]
-                end
-            end
-            local highlighted = false
-            for index, step in ipairs(steps) do
-                local k = i + step[1]
-                local l = j + step[2]
-                if k >= 0 and l >= 0 and matrix[k][l] == min then
-                    tikz_code = tikz_code .. "\\draw[->] (" .. coordinate(i, j) .. ")" .. " -- " .. "(" .. coordinate (k, l) .. ");"
-                end
-            end
-        end
-    end
-    return tikz_code
-end
-
-function draw_lcsq_matrix(seq1, seq2)
-    -- print(string.format(" Path: %s -> %s", seq1, seq2))
-    local matrix = lcsq_matrix(seq1, seq2)
-    local n1 = string.len(seq1)
-    local n2 = string.len(seq2)
-    -- Draw the matrix as tikz nodes
-    for i=0,n1-1 do
-        for j=0,n2-1 do
-            print(string.format("\\node[draw, minimum width=1cm, minimum height=1cm] at (%d, -%d) {};", i, j, matrix[i][j]))
-        end
-    end
-    -- Draw the sequence labels
-    for i=1,n1 do
-        print(string.format("\\node at (%d, -%d) {%s};", i-1, -1, string.sub(seq1, i, i)))
-    end
-    for i=1,n2 do
-        print(string.format("\\node at (%d, -%d) {%s};", -1, i-1, string.sub(seq2, i, i)))
-    end
-    -- Add a path from the bottom right corner to the top left corner, following the minimum of the three possible moves at each step
-    local i, j, value, previous_value
-    i = n1-1
-    j = n2-1
-    print(string.format("\\draw[-,line width=2, gray] (%d, -%d) --", i, j))
-    while i > 0 and j > 0 do
-        value = math.min(matrix[i-1][j-1], table[i-1][j], table[i][j-1])
-        if value == matrix[i-1][j-1] then
-            i = i - 1
-            j = j - 1
-        elseif value == matrix[i-1][j] then
-            i = i - 1
-        else
-            j = j - 1
-        end
-        print(string.format(" (%d, -%d) -- ", i, j))
-    end
-    print(string.format("(0, 0) -- (-1, 1);", i, j))
-end
-
-function main()
-  local seq1 = "ATCTGAT"
-  local seq2 = "TGCATA"
-
-  local matrix = lcsq_matrix(seq1, seq2)
-  print(repr_matrix(matrix))
-end
-
-main()
--- a/main.pdf
+++ b/main.pdf