Add base needleman
This commit is contained in:
parent
18203b1e49
commit
e945027027
|
@ -1,5 +1,5 @@
|
||||||
sub createFolderStructure{
|
sub createFolderStructure{
|
||||||
system("bash ./createFolderStructure.sh");
|
system("bash ./folder-structure.sh");
|
||||||
}
|
}
|
||||||
|
|
||||||
createFolderStructure();
|
createFolderStructure();
|
||||||
|
|
|
@ -18,7 +18,20 @@ Here we are interested by the distance that is able to represent the transformat
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item $sub(a, b) = \begin{cases} 0 & \text{if} a = b \\ 1 &\text{otherwise} \end{cases}$.
|
\item $sub(a, b) = \begin{cases} 0 & \text{if} a = b \\ 1 &\text{otherwise} \end{cases}$.
|
||||||
\item $del(a) = 1$
|
\item $del(a) = 1$
|
||||||
\item $ins(a) = 1$
|
\item $ins(a) = 1$
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
|
|
||||||
|
Let $X = x_{0} x_{1} \ldots x_{m-1}$, $Y = y_{0} y_{1} \ldots y_{n-1} $
|
||||||
|
|
||||||
|
An alignment is noted as $z = \begin{pmatrix} \bar{x}_{0} \\ \bar{y}_{0} \end{pmatrix} \ldots \begin{pmatrix} \bar{x}_{p-1} \\ \bar{y}_{p-1} \end{pmatrix}$ of size $p$. $n \leq p \leq n + m$
|
||||||
|
|
||||||
|
|
||||||
|
$\bar{x}_{i} = x_{j}$ or $\bar{x}_{i} = \varepsilon$ for $0 \leq i \leq p-1$ and $0 \leq j \leq m - 1$
|
||||||
|
|
||||||
|
$\bar{y}_{i} = y_{j}$ or $\bar{y}_{i} = \varepsilon$ for $0 \leq i \leq p-1$ and $0 \leq j \leq n - 1$
|
||||||
|
|
||||||
|
$X' = \bar{x}_{0} \bar{x}_{1} \ldots \bar{x}_{i} \ldots \bar{x}_{p-1}$
|
||||||
|
$Y' = \bar{y}_{0} \bar{y}_{1} \ldots \bar{y}_{i} \ldots \bar{y}_{p-1}$
|
||||||
|
for $0 \leq i \leq p-1$, $\nexists i$, such that $\bar{x}_{i} = \bar{y}_{i} = \varepsilon$
|
||||||
|
|
|
@ -0,0 +1,64 @@
|
||||||
|
\chapter{Section alignment}
|
||||||
|
|
||||||
|
\section{Needleman - Wunsch algorithm}
|
||||||
|
|
||||||
|
\begin{algorithm}
|
||||||
|
\caption{Needleman-Wunsch Algorithm}
|
||||||
|
\begin{algorithmic}[1]
|
||||||
|
\Procedure{FillMatrix}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)}
|
||||||
|
\Comment{$sub(a, b)$ is the substitution score, $del(a)$ and $ins(a)$ are the deletion and insertion penalty, in regard with the reference $S_{1}$ sequence}
|
||||||
|
\State $M = $ Array($m+1$, $n+1$)
|
||||||
|
\Comment{Initialize the matrix first column and first row}
|
||||||
|
\State $P = $ Array($m$, $n$) \Comment{Store the direction of the cell we chose to build the next cell up on.}
|
||||||
|
\For {($i = 0$; $i < m+1$; $i++$)}
|
||||||
|
\State $M[i][0] = i * del(S_{1}[i])$
|
||||||
|
\EndFor
|
||||||
|
\For {($j = 0$; $j < n+1$; $j++$)}
|
||||||
|
\State $M[0][j] = j * ins(S_{2}[j])$
|
||||||
|
\EndFor
|
||||||
|
\Comment{Fill the remaining matrix}
|
||||||
|
\For {($i = 1$; $i < m+1$; $i++$)}
|
||||||
|
\For {($j = 1$; $j < n+1$; $j++$)}
|
||||||
|
\State $delete = M[i-1][j] + del(S_{1}[i-1])$
|
||||||
|
\State $insert = M[i][j-1] + ins(S_{2}[j-1])$
|
||||||
|
\State $substitute = M[i-1][j-1] + sub(S_{1}[i-1], S_{2}[j-1])$
|
||||||
|
\State $choice = \max \{delete, insert, substitute\}$
|
||||||
|
\If {$substitute = choice$}
|
||||||
|
\State $P[i-1][j-1] = '\nwarrow'$
|
||||||
|
\ElsIf {$insertion = choice$}
|
||||||
|
\State $P[i-1][j-1] = '\uparrow'$
|
||||||
|
\Else
|
||||||
|
\State $P[i-1][j-1] = '\leftarrow'$
|
||||||
|
\EndIf
|
||||||
|
\State $M[i][j] = choice$
|
||||||
|
\EndFor
|
||||||
|
\EndFor
|
||||||
|
\EndProcedure
|
||||||
|
\Procedure{ShowAlignment}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)}
|
||||||
|
\State $extend_{1} = ''$
|
||||||
|
\State $extend_{2} = ''$
|
||||||
|
\State $i = m$
|
||||||
|
\State $j = n$
|
||||||
|
\While{$i > 0$ and $j > 0$}
|
||||||
|
\If {$P[i-1][j-1] = '\nwarrow'$}
|
||||||
|
\State $extend_{1} = S_{1}[i-1] \circ extend_{1}$
|
||||||
|
\State $extend_{2} = S_{2}[j-1] \circ extend_{2}$
|
||||||
|
\State $i--$
|
||||||
|
\State $j--$
|
||||||
|
\ElsIf {$P[i-1][j-1] = '\uparrow'$}
|
||||||
|
\State $extend_{1} = S_{1}[i-1] \circ extend_{1}$
|
||||||
|
\State $extend_{2} =\quad '-' \circ extend_{2}$
|
||||||
|
\State $i--$
|
||||||
|
\Else
|
||||||
|
\State $extend_{1} =\quad '-' \circ extend_{1}$
|
||||||
|
\State $extend_{2} = S_{2}[j-1] \circ extend_{2}$
|
||||||
|
\State $j--$
|
||||||
|
\EndIf
|
||||||
|
\EndWhile
|
||||||
|
\State \Call{print}{$extend_{1}$}
|
||||||
|
\State \Call{print}{$extend_{2}$}
|
||||||
|
\EndProcedure
|
||||||
|
\State \Call{FillMatrix}{$S_{1}$, $S_{2}$}
|
||||||
|
\State \Call ShowAlignment($S_{1}$, $S_{2}$)
|
||||||
|
\end{algorithmic}
|
||||||
|
\end{algorithm}
|
176
figures/part2/*
176
figures/part2/*
|
@ -1,176 +0,0 @@
|
||||||
function lcsq_matrix(seq1, seq2)
|
|
||||||
local gap_penalty = 0
|
|
||||||
local match_score = 1
|
|
||||||
local n1 = string.len(seq1)
|
|
||||||
local n2 = string.len(seq2)
|
|
||||||
-- Create a n1 x n2 matrix
|
|
||||||
local matrix = {}
|
|
||||||
for i=0,n1 do
|
|
||||||
matrix[i] = {}
|
|
||||||
for j=0,n2 do
|
|
||||||
matrix[i][j] = 0
|
|
||||||
end
|
|
||||||
end
|
|
||||||
-- Fill the rest of the matrix
|
|
||||||
local match, delete, insert
|
|
||||||
for i=1,n1 do
|
|
||||||
for j=1,n2 do
|
|
||||||
if string.sub(seq1, i, i) == string.sub(seq2, j, j) then
|
|
||||||
match = matrix[i-1][j-1] + match_score
|
|
||||||
else
|
|
||||||
match = matrix[i-1][j-1]
|
|
||||||
end
|
|
||||||
gap1 = matrix[i-1][j] + gap_penalty
|
|
||||||
gap2 = matrix[i][j-1] + gap_penalty
|
|
||||||
matrix[i][j] = math.max(match, gap1, gap2)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
return matrix
|
|
||||||
end
|
|
||||||
|
|
||||||
local function has_value (tab, val)
|
|
||||||
for index, value in ipairs(tab) do
|
|
||||||
if value == val then
|
|
||||||
return true
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return false
|
|
||||||
end
|
|
||||||
|
|
||||||
function repr_matrix(matrix)
|
|
||||||
repr = ""
|
|
||||||
for i=1,#matrix do
|
|
||||||
for j=1,#matrix do
|
|
||||||
repr = repr .. matrix[i][j] .. " "
|
|
||||||
end
|
|
||||||
repr = repr .. "\n"
|
|
||||||
end
|
|
||||||
return repr
|
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
function draw_lcsq_matrix_graph(seq1, seq2)
|
|
||||||
local matrix = lcsq_matrix(seq1, seq2)
|
|
||||||
local tikz_code = ""
|
|
||||||
function coordinate(i, j)
|
|
||||||
return i .. "_" .. j
|
|
||||||
end
|
|
||||||
local steps = {
|
|
||||||
{-1, 0},
|
|
||||||
{-1, -1},
|
|
||||||
{0, -1}
|
|
||||||
}
|
|
||||||
|
|
||||||
local n1 = string.len(seq1)
|
|
||||||
local n2 = string.len(seq2)
|
|
||||||
local path = {}
|
|
||||||
local i = n1
|
|
||||||
local j = n2
|
|
||||||
while i >= 0 and j >= 0 do
|
|
||||||
path[#path+1] = coordinate(i, j)
|
|
||||||
local min = matrix[i][j]
|
|
||||||
local min_step = steps[1]
|
|
||||||
for index, step in ipairs(steps) do
|
|
||||||
local k = i + step[1]
|
|
||||||
local l = j + step[2]
|
|
||||||
if k >= 0 and l >= 0 and matrix[k][l] <= min then
|
|
||||||
min_step = step
|
|
||||||
min = matrix[k][l]
|
|
||||||
end
|
|
||||||
end
|
|
||||||
i = i + min_step[1]
|
|
||||||
j = j + min_step[2]
|
|
||||||
print(i, j)
|
|
||||||
end
|
|
||||||
-- Draw the matrix as tikz node with matrix value
|
|
||||||
for i=0,n1 do
|
|
||||||
for j=0,n2 do
|
|
||||||
local options = ""
|
|
||||||
if has_value(path, coordinate(i, j)) then
|
|
||||||
|
|
||||||
options = "[fill=gray, draw, minimum size=1]"
|
|
||||||
end
|
|
||||||
tikz_code = tikz_code .. "\\node" .. options .. " (" .. coordinate(i, j) .. ") at (" .. i .. ", " .. -j .. ")" .. " {" .. matrix[i][j] .. "};"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
-- Add nucleotide labels
|
|
||||||
for i=1,n1 do
|
|
||||||
local nt = string.sub(seq1, i, i)
|
|
||||||
tikz_code = tikz_code .. "\\node at (".. i .. "," .. 1 .. ")" .. "{$" .. nt .."$};"
|
|
||||||
end
|
|
||||||
for i=1,n2 do
|
|
||||||
local nt = string.sub(seq2, i, i)
|
|
||||||
tikz_code = tikz_code .. "\\node at (" .. -1 .. ", " .. -i .. ")" .. "{$ ".. nt .."$};"
|
|
||||||
end
|
|
||||||
-- For seq2
|
|
||||||
for i=0,n1 do
|
|
||||||
for j=0,n2 do
|
|
||||||
local min = math.huge
|
|
||||||
for index, step in ipairs(steps) do
|
|
||||||
local k = i + step[1]
|
|
||||||
local l = j + step[2]
|
|
||||||
if k >= 0 and l >= 0 and matrix[k][l] < min then
|
|
||||||
min = matrix[k][l]
|
|
||||||
end
|
|
||||||
end
|
|
||||||
local highlighted = false
|
|
||||||
for index, step in ipairs(steps) do
|
|
||||||
local k = i + step[1]
|
|
||||||
local l = j + step[2]
|
|
||||||
if k >= 0 and l >= 0 and matrix[k][l] == min then
|
|
||||||
tikz_code = tikz_code .. "\\draw[->] (" .. coordinate(i, j) .. ")" .. " -- " .. "(" .. coordinate (k, l) .. ");"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
return tikz_code
|
|
||||||
end
|
|
||||||
|
|
||||||
function draw_lcsq_matrix(seq1, seq2)
|
|
||||||
-- print(string.format(" Path: %s -> %s", seq1, seq2))
|
|
||||||
local matrix = lcsq_matrix(seq1, seq2)
|
|
||||||
local n1 = string.len(seq1)
|
|
||||||
local n2 = string.len(seq2)
|
|
||||||
-- Draw the matrix as tikz nodes
|
|
||||||
for i=0,n1-1 do
|
|
||||||
for j=0,n2-1 do
|
|
||||||
print(string.format("\\node[draw, minimum width=1cm, minimum height=1cm] at (%d, -%d) {};", i, j, matrix[i][j]))
|
|
||||||
end
|
|
||||||
end
|
|
||||||
-- Draw the sequence labels
|
|
||||||
for i=1,n1 do
|
|
||||||
print(string.format("\\node at (%d, -%d) {%s};", i-1, -1, string.sub(seq1, i, i)))
|
|
||||||
end
|
|
||||||
for i=1,n2 do
|
|
||||||
print(string.format("\\node at (%d, -%d) {%s};", -1, i-1, string.sub(seq2, i, i)))
|
|
||||||
end
|
|
||||||
-- Add a path from the bottom right corner to the top left corner, following the minimum of the three possible moves at each step
|
|
||||||
local i, j, value, previous_value
|
|
||||||
i = n1-1
|
|
||||||
j = n2-1
|
|
||||||
print(string.format("\\draw[-,line width=2, gray] (%d, -%d) --", i, j))
|
|
||||||
while i > 0 and j > 0 do
|
|
||||||
value = math.min(matrix[i-1][j-1], table[i-1][j], table[i][j-1])
|
|
||||||
if value == matrix[i-1][j-1] then
|
|
||||||
i = i - 1
|
|
||||||
j = j - 1
|
|
||||||
elseif value == matrix[i-1][j] then
|
|
||||||
i = i - 1
|
|
||||||
else
|
|
||||||
j = j - 1
|
|
||||||
end
|
|
||||||
print(string.format(" (%d, -%d) -- ", i, j))
|
|
||||||
end
|
|
||||||
print(string.format("(0, 0) -- (-1, 1);", i, j))
|
|
||||||
end
|
|
||||||
|
|
||||||
function main()
|
|
||||||
local seq1 = "ATCTGAT"
|
|
||||||
local seq2 = "TGCATA"
|
|
||||||
|
|
||||||
local matrix = lcsq_matrix(seq1, seq2)
|
|
||||||
print(repr_matrix(matrix))
|
|
||||||
end
|
|
||||||
|
|
||||||
main()
|
|
Loading…
Reference in New Issue