fix: Needleman-Wunsch backtrack was faulty
This commit is contained in:
parent
1953baa198
commit
80f4669d23
|
@ -1,5 +1,5 @@
|
||||||
build/
|
build/
|
||||||
**/.bak*
|
**/*.bak*
|
||||||
.auctex-auto
|
.auctex-auto
|
||||||
|
|
||||||
## Core latex/pdflatex auxiliary files:
|
## Core latex/pdflatex auxiliary files:
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
\chapter{Sequence alignment}
|
\chapter{Sequence alignment}
|
||||||
|
|
||||||
|
\iffalse
|
||||||
\begin{algorithm}
|
\begin{algorithm}
|
||||||
\caption{Needleman-Wunsch Algorithm}
|
\caption{Needleman-Wunsch Algorithm}
|
||||||
\begin{algorithmic}[1]
|
\begin{algorithmic}[1]
|
||||||
|
@ -77,10 +78,12 @@
|
||||||
\State \Call{FillMatrix}{$S_{1}$, $S_{2}$}
|
\State \Call{FillMatrix}{$S_{1}$, $S_{2}$}
|
||||||
\State \Call{ShowAlignment}{$S_{1}$, $S_{2}$}
|
\State \Call{ShowAlignment}{$S_{1}$, $S_{2}$}
|
||||||
\end{algorithmic}
|
\end{algorithmic}
|
||||||
\end{algorithm}
|
\end{algorithm}
|
||||||
|
|
||||||
|
\fi
|
||||||
|
|
||||||
\begin{algorithm}
|
\begin{algorithm}
|
||||||
\caption{Needleman-Wunsch Algorithm, using proper notation }
|
\caption{Needleman-Wunsch Algorithm, Build the matrix}
|
||||||
\begin{algorithmic}[1]
|
\begin{algorithmic}[1]
|
||||||
\Procedure{FillMatrix}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)}
|
\Procedure{FillMatrix}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)}
|
||||||
\State $M = $ Array($m+1$, $n+1$)
|
\State $M = $ Array($m+1$, $n+1$)
|
||||||
|
@ -106,7 +109,7 @@
|
||||||
\end{algorithm}
|
\end{algorithm}
|
||||||
|
|
||||||
\begin{algorithm}
|
\begin{algorithm}
|
||||||
\caption{Needleman-Wunsch Algorithm, using proper notation (Backtrack)}
|
\caption{Needleman-Wunsch Algorithm, reconstruct the alignment}
|
||||||
\begin{algorithmic}[1]
|
\begin{algorithmic}[1]
|
||||||
\Procedure{BacktrackAlignment}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)}
|
\Procedure{BacktrackAlignment}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)}
|
||||||
\State $alignment = LinkedList$
|
\State $alignment = LinkedList$
|
||||||
|
@ -149,14 +152,10 @@
|
||||||
\State $S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$),
|
\State $S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$),
|
||||||
\Function{BacktrackRecurse}{$i$, $j$}
|
\Function{BacktrackRecurse}{$i$, $j$}
|
||||||
\If {$i > 0$ and $j > 0$}
|
\If {$i > 0$ and $j > 0$}
|
||||||
\State $substitute = M[i-1][j-1]$
|
\If {$M[i-1][j-1] = M[i][j] - sub(S_{1}[i-1], S_{2}[j-1])$}
|
||||||
\State $delete = M[i-1][j]$
|
|
||||||
\State $insert = M[i][j-1]$
|
|
||||||
\State $min = \min \{ substitute, delete, insert \}$
|
|
||||||
\If {$substitute = min$}
|
|
||||||
\State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j-1$}
|
\State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j-1$}
|
||||||
\State $z = \begin{pmatrix} S_{1}[i-1] \\ S_{2}[j-1] \end{pmatrix} \circ z$
|
\State $z = \begin{pmatrix} S_{1}[i-1] \\ S_{2}[j-1] \end{pmatrix} \circ z$
|
||||||
\ElsIf {$delete = min$}
|
\ElsIf {$M[i-1][j] + gap\_penalty = M[i][j]$}
|
||||||
\State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$}
|
\State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$}
|
||||||
\State $z = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix} \circ z$
|
\State $z = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix} \circ z$
|
||||||
\Else
|
\Else
|
||||||
|
@ -172,7 +171,9 @@
|
||||||
\Else
|
\Else
|
||||||
\State \Return []
|
\State \Return []
|
||||||
\EndIf
|
\EndIf
|
||||||
|
\Else
|
||||||
\State \Return $z$
|
\State \Return $z$
|
||||||
|
\EndIf
|
||||||
\EndFunction
|
\EndFunction
|
||||||
\Function{Backtrack}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$)}
|
\Function{Backtrack}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$)}
|
||||||
\State \Return \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $m$, $n$}
|
\State \Return \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $m$, $n$}
|
||||||
|
@ -185,21 +186,17 @@
|
||||||
\begin{algorithmic}[1]
|
\begin{algorithmic}[1]
|
||||||
\Procedure{BacktrackRecurse}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$), $i$, $j$}
|
\Procedure{BacktrackRecurse}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$), $i$, $j$}
|
||||||
\If {$i > 0$ and $j > 0$}
|
\If {$i > 0$ and $j > 0$}
|
||||||
\State $substitute = M[i-1][j-1]$
|
\If {$M[i-1][j-1] = M[i][j] - sub(S_{1}[i-1], S_{2}[j-1])$}
|
||||||
\State $delete = M[i-1][j]$
|
|
||||||
\State $insert = M[i][j-1]$
|
|
||||||
\State $min = \min \{ substitute, delete, insert \}$
|
|
||||||
\If {$substitute = min$}
|
|
||||||
\State $value = \begin{pmatrix} S_{1}[i-1] \\ S_{2}[j-1] \end{pmatrix}$
|
\State $value = \begin{pmatrix} S_{1}[i-1] \\ S_{2}[j-1] \end{pmatrix}$
|
||||||
\State $z' = value \circ z$
|
\State $z' = value \circ z$
|
||||||
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j-1$, $z'$}
|
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j-1$, $z'$}
|
||||||
\EndIf
|
\EndIf
|
||||||
\If {$delete = min$}
|
\If {$M[i-1][j] + gap\_penalty = M[i][j]$}
|
||||||
\State $value = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix}$
|
\State $value = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix}$
|
||||||
\State $z' = value \circ z$
|
\State $z' = value \circ z$
|
||||||
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$, $z'$}
|
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$, $z'$}
|
||||||
\EndIf
|
\EndIf
|
||||||
\If {$insert = min$}
|
\If {$M[i][j-1] + gap\_penalty = M[i][j]$}
|
||||||
\State $value = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$
|
\State $value = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$
|
||||||
\State $z' = value \circ z$
|
\State $z' = value \circ z$
|
||||||
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$, $z'$}
|
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$, $z'$}
|
||||||
|
@ -212,11 +209,20 @@
|
||||||
\State $value = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$
|
\State $value = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$
|
||||||
\State $z' = value \circ z$
|
\State $z' = value \circ z$
|
||||||
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$, $z'$}
|
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$, $z'$}
|
||||||
\EndIf
|
\Else
|
||||||
\State \Call{print}{$z$}
|
\State \Call{print}{$z$}
|
||||||
|
\EndIf
|
||||||
\EndProcedure
|
\EndProcedure
|
||||||
\Procedure{Backtrack}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$)}
|
\Procedure{Backtrack}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$)}
|
||||||
\State \Return \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $m$, $n$, []}
|
\State \Return \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $m$, $n$, []}
|
||||||
\EndProcedure
|
\EndProcedure
|
||||||
\end{algorithmic}
|
\end{algorithmic}
|
||||||
\end{algorithm}
|
\end{algorithm}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\centering
|
||||||
|
\includegraphics{figures/part2/needle.pdf}
|
||||||
|
\caption{Needleman-Wunsch global alignment matrix with an example of optimal path.}
|
||||||
|
\end{figure}
|
||||||
|
|
|
@ -1,22 +1,105 @@
|
||||||
needle = require("./needle")
|
needle = require("./needle")
|
||||||
|
|
||||||
|
function table.shallow_copy(t)
|
||||||
|
local t2 = {}
|
||||||
|
for k,v in pairs(t) do
|
||||||
|
t2[k] = v
|
||||||
|
end
|
||||||
|
return t2
|
||||||
|
end
|
||||||
|
|
||||||
function multiple_path_backtrack_trace(matrix, seq1, seq2)
|
function multiple_path_backtrack_trace(matrix, seq1, seq2)
|
||||||
local stack = {}
|
local stack = {}
|
||||||
local m=string.len(seq1)
|
local m=string.len(seq1)
|
||||||
local n=string.len(seq2)
|
local n=string.len(seq2)
|
||||||
local i=m
|
local i=m
|
||||||
local j=n
|
local j=n
|
||||||
table.insert(1, {i, j, nil})
|
table.insert(stack, 1, {i, j, {}})
|
||||||
|
local trace = {}
|
||||||
while #stack ~= 0 do
|
while #stack ~= 0 do
|
||||||
local state = table.remove(stack, 1)
|
local state = table.remove(stack, 1)
|
||||||
|
table.insert(trace, #trace+1, state)
|
||||||
local i=state[1]
|
local i=state[1]
|
||||||
local j=state[2]
|
local j=state[2]
|
||||||
local alignment = state[3]
|
local alignment = state[3]
|
||||||
|
if (i > 0 and j > 0) then
|
||||||
|
local nt1 = string.sub(seq1, i-1, i-1)
|
||||||
|
local nt2 = string.sub(seq2, j-1, j-1)
|
||||||
|
if (matrix[i][j] == matrix[i-1][j-1] + needle.sub(nt1, nt2)) then
|
||||||
|
local new_alignment = table.shallow_copy(alignment)
|
||||||
|
table.insert(new_alignment, 1, {nt1, nt2})
|
||||||
|
table.insert(stack, 1, {i - 1, j - 1, new_alignment})
|
||||||
end
|
end
|
||||||
|
if (matrix[i][j] == matrix[i-1][j] + needle.gap_penalty) then
|
||||||
|
local new_alignment = table.shallow_copy(alignment)
|
||||||
|
table.insert(new_alignment, 1, {nt1, '-'})
|
||||||
|
table.insert(stack, 1, {i-1, j, new_alignment})
|
||||||
|
end
|
||||||
|
if (matrix[i][j] == matrix[i][j-1] + needle.gap_penalty) then
|
||||||
|
local new_alignment = table.shallow_copy(alignment)
|
||||||
|
table.insert(new_alignment, 1, {'-', nt2})
|
||||||
|
table.insert(stack, 1, {i, j-1, new_alignment})
|
||||||
|
end
|
||||||
|
end
|
||||||
|
if (i > 0) then
|
||||||
|
local nt1 = string.sub(seq1, i-1, i-1)
|
||||||
|
local new_alignment = table.shallow_copy(alignment)
|
||||||
|
table.insert(new_alignment, 1, {nt1, '-'})
|
||||||
|
table.insert(stack, 1, {i-1, j, new_alignment})
|
||||||
|
end
|
||||||
|
if (j > 0) then
|
||||||
|
local nt2 = string.sub(seq2, j-1, j-1)
|
||||||
|
local new_alignment = table.shallow_copy(alignment)
|
||||||
|
table.insert(new_alignment, 1, {'-', nt2})
|
||||||
|
table.insert(stack, 1, {i, j-1, new_alignment})
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return trace
|
||||||
|
end
|
||||||
|
|
||||||
|
function repr_alignment(alignment)
|
||||||
|
local repr = [[\begin{pmatrix}]]
|
||||||
|
for i, vector in ipairs(alignment) do
|
||||||
|
repr = repr .. vector[1]
|
||||||
|
if i < #alignment then
|
||||||
|
repr = repr .. " & "
|
||||||
|
end
|
||||||
|
end
|
||||||
|
repr = repr .. [[\\]] .. " \n"
|
||||||
|
for i, vector in ipairs(alignment) do
|
||||||
|
repr = repr .. vector[2]
|
||||||
|
if i < #alignment then
|
||||||
|
repr = repr .. " & "
|
||||||
|
end
|
||||||
|
end
|
||||||
|
repr = repr .. [[\end{pmatrix}]]
|
||||||
|
return repr
|
||||||
|
end
|
||||||
|
|
||||||
|
function trace_repr(trace)
|
||||||
|
local repr = ""
|
||||||
|
-- for stack_index, stack in ipairs(trace) do
|
||||||
|
-- repr = repr .. "iteration " .. stack_index .. " :" .. [[\\]]
|
||||||
|
repr = repr .. [[\begin{tabular}{|c|} \\ \hline ]]
|
||||||
|
for call_index, call in ipairs(trace) do
|
||||||
|
local i = call[1]
|
||||||
|
local j = call[2]
|
||||||
|
local aligment = call[3]
|
||||||
|
repr = repr .. [[ $\langle ]] .. i ..", " .. j .. ", " .. repr_alignment(alignment).. [[\rangle$ ]]
|
||||||
|
repr = repr .. [[\\ \hline]]
|
||||||
|
end
|
||||||
|
repr = repr .. [[\end{tabular}]]
|
||||||
|
-- end
|
||||||
|
return repr
|
||||||
end
|
end
|
||||||
|
|
||||||
function main()
|
function main()
|
||||||
local seq1 = "ATCTGAT"
|
local seq1 = "ATCTGAT"
|
||||||
local seq2 = "TGCATA"
|
local seq2 = "TGCATA"
|
||||||
local matrix = needle.needle_matrix(seq1, seq2)
|
local matrix = needle.needle_matrix(seq1, seq2)
|
||||||
|
local trace = multiple_path_backtrack_trace(matrix, seq1, seq2)
|
||||||
|
print(#trace)
|
||||||
|
print(trace_repr(trace))
|
||||||
end
|
end
|
||||||
|
|
||||||
|
main()
|
||||||
|
|
|
@ -191,5 +191,7 @@ return {
|
||||||
draw=draw_needle_matrix_graph,
|
draw=draw_needle_matrix_graph,
|
||||||
gap_penalty=gap_penalty,
|
gap_penalty=gap_penalty,
|
||||||
mismatch_penalty=mismatch_penalty,
|
mismatch_penalty=mismatch_penalty,
|
||||||
match_penalty=match_penalty
|
match_penalty=match_penalty,
|
||||||
|
needle_matrix=needle_matrix,
|
||||||
|
sub=sub
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue