fix: Needleman-Wunsch backtrack was faulty

This commit is contained in:
Samuel Ortion 2024-04-02 14:51:44 +02:00
parent 1953baa198
commit 80f4669d23
5 changed files with 113 additions and 22 deletions

2
.gitignore vendored
View File

@ -1,5 +1,5 @@
build/ build/
**/.bak* **/*.bak*
.auctex-auto .auctex-auto
## Core latex/pdflatex auxiliary files: ## Core latex/pdflatex auxiliary files:

View File

@ -1,5 +1,6 @@
\chapter{Sequence alignment} \chapter{Sequence alignment}
\iffalse
\begin{algorithm} \begin{algorithm}
\caption{Needleman-Wunsch Algorithm} \caption{Needleman-Wunsch Algorithm}
\begin{algorithmic}[1] \begin{algorithmic}[1]
@ -79,8 +80,10 @@
\end{algorithmic} \end{algorithmic}
\end{algorithm} \end{algorithm}
\fi
\begin{algorithm} \begin{algorithm}
\caption{Needleman-Wunsch Algorithm, using proper notation } \caption{Needleman-Wunsch Algorithm, Build the matrix}
\begin{algorithmic}[1] \begin{algorithmic}[1]
\Procedure{FillMatrix}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)} \Procedure{FillMatrix}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)}
\State $M = $ Array($m+1$, $n+1$) \State $M = $ Array($m+1$, $n+1$)
@ -106,7 +109,7 @@
\end{algorithm} \end{algorithm}
\begin{algorithm} \begin{algorithm}
\caption{Needleman-Wunsch Algorithm, using proper notation (Backtrack)} \caption{Needleman-Wunsch Algorithm, reconstruct the alignment}
\begin{algorithmic}[1] \begin{algorithmic}[1]
\Procedure{BacktrackAlignment}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)} \Procedure{BacktrackAlignment}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)}
\State $alignment = LinkedList$ \State $alignment = LinkedList$
@ -149,14 +152,10 @@
\State $S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$), \State $S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$),
\Function{BacktrackRecurse}{$i$, $j$} \Function{BacktrackRecurse}{$i$, $j$}
\If {$i > 0$ and $j > 0$} \If {$i > 0$ and $j > 0$}
\State $substitute = M[i-1][j-1]$ \If {$M[i-1][j-1] = M[i][j] - sub(S_{1}[i-1], S_{2}[j-1])$}
\State $delete = M[i-1][j]$
\State $insert = M[i][j-1]$
\State $min = \min \{ substitute, delete, insert \}$
\If {$substitute = min$}
\State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j-1$} \State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j-1$}
\State $z = \begin{pmatrix} S_{1}[i-1] \\ S_{2}[j-1] \end{pmatrix} \circ z$ \State $z = \begin{pmatrix} S_{1}[i-1] \\ S_{2}[j-1] \end{pmatrix} \circ z$
\ElsIf {$delete = min$} \ElsIf {$M[i-1][j] + gap\_penalty = M[i][j]$}
\State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$} \State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$}
\State $z = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix} \circ z$ \State $z = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix} \circ z$
\Else \Else
@ -172,7 +171,9 @@
\Else \Else
\State \Return [] \State \Return []
\EndIf \EndIf
\Else
\State \Return $z$ \State \Return $z$
\EndIf
\EndFunction \EndFunction
\Function{Backtrack}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$)} \Function{Backtrack}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$)}
\State \Return \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $m$, $n$} \State \Return \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $m$, $n$}
@ -185,21 +186,17 @@
\begin{algorithmic}[1] \begin{algorithmic}[1]
\Procedure{BacktrackRecurse}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$), $i$, $j$} \Procedure{BacktrackRecurse}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$), $i$, $j$}
\If {$i > 0$ and $j > 0$} \If {$i > 0$ and $j > 0$}
\State $substitute = M[i-1][j-1]$ \If {$M[i-1][j-1] = M[i][j] - sub(S_{1}[i-1], S_{2}[j-1])$}
\State $delete = M[i-1][j]$
\State $insert = M[i][j-1]$
\State $min = \min \{ substitute, delete, insert \}$
\If {$substitute = min$}
\State $value = \begin{pmatrix} S_{1}[i-1] \\ S_{2}[j-1] \end{pmatrix}$ \State $value = \begin{pmatrix} S_{1}[i-1] \\ S_{2}[j-1] \end{pmatrix}$
\State $z' = value \circ z$ \State $z' = value \circ z$
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j-1$, $z'$} \State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j-1$, $z'$}
\EndIf \EndIf
\If {$delete = min$} \If {$M[i-1][j] + gap\_penalty = M[i][j]$}
\State $value = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix}$ \State $value = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix}$
\State $z' = value \circ z$ \State $z' = value \circ z$
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$, $z'$} \State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$, $z'$}
\EndIf \EndIf
\If {$insert = min$} \If {$M[i][j-1] + gap\_penalty = M[i][j]$}
\State $value = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$ \State $value = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$
\State $z' = value \circ z$ \State $z' = value \circ z$
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$, $z'$} \State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$, $z'$}
@ -212,11 +209,20 @@
\State $value = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$ \State $value = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$
\State $z' = value \circ z$ \State $z' = value \circ z$
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$, $z'$} \State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$, $z'$}
\EndIf \Else
\State \Call{print}{$z$} \State \Call{print}{$z$}
\EndIf
\EndProcedure \EndProcedure
\Procedure{Backtrack}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$)} \Procedure{Backtrack}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$)}
\State \Return \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $m$, $n$, []} \State \Return \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $m$, $n$, []}
\EndProcedure \EndProcedure
\end{algorithmic} \end{algorithmic}
\end{algorithm} \end{algorithm}
\begin{figure}
\centering
\includegraphics{figures/part2/needle.pdf}
\caption{Needleman-Wunsch global alignment matrix with an example of optimal path.}
\end{figure}

View File

@ -1,22 +1,105 @@
needle = require("./needle") needle = require("./needle")
function table.shallow_copy(t)
local t2 = {}
for k,v in pairs(t) do
t2[k] = v
end
return t2
end
function multiple_path_backtrack_trace(matrix, seq1, seq2) function multiple_path_backtrack_trace(matrix, seq1, seq2)
local stack = {} local stack = {}
local m=string.len(seq1) local m=string.len(seq1)
local n=string.len(seq2) local n=string.len(seq2)
local i=m local i=m
local j=n local j=n
table.insert(1, {i, j, nil}) table.insert(stack, 1, {i, j, {}})
local trace = {}
while #stack ~= 0 do while #stack ~= 0 do
local state = table.remove(stack, 1) local state = table.remove(stack, 1)
table.insert(trace, #trace+1, state)
local i=state[1] local i=state[1]
local j=state[2] local j=state[2]
local alignment = state[3] local alignment = state[3]
if (i > 0 and j > 0) then
local nt1 = string.sub(seq1, i-1, i-1)
local nt2 = string.sub(seq2, j-1, j-1)
if (matrix[i][j] == matrix[i-1][j-1] + needle.sub(nt1, nt2)) then
local new_alignment = table.shallow_copy(alignment)
table.insert(new_alignment, 1, {nt1, nt2})
table.insert(stack, 1, {i - 1, j - 1, new_alignment})
end end
if (matrix[i][j] == matrix[i-1][j] + needle.gap_penalty) then
local new_alignment = table.shallow_copy(alignment)
table.insert(new_alignment, 1, {nt1, '-'})
table.insert(stack, 1, {i-1, j, new_alignment})
end
if (matrix[i][j] == matrix[i][j-1] + needle.gap_penalty) then
local new_alignment = table.shallow_copy(alignment)
table.insert(new_alignment, 1, {'-', nt2})
table.insert(stack, 1, {i, j-1, new_alignment})
end
end
if (i > 0) then
local nt1 = string.sub(seq1, i-1, i-1)
local new_alignment = table.shallow_copy(alignment)
table.insert(new_alignment, 1, {nt1, '-'})
table.insert(stack, 1, {i-1, j, new_alignment})
end
if (j > 0) then
local nt2 = string.sub(seq2, j-1, j-1)
local new_alignment = table.shallow_copy(alignment)
table.insert(new_alignment, 1, {'-', nt2})
table.insert(stack, 1, {i, j-1, new_alignment})
end
end
return trace
end
function repr_alignment(alignment)
local repr = [[\begin{pmatrix}]]
for i, vector in ipairs(alignment) do
repr = repr .. vector[1]
if i < #alignment then
repr = repr .. " & "
end
end
repr = repr .. [[\\]] .. " \n"
for i, vector in ipairs(alignment) do
repr = repr .. vector[2]
if i < #alignment then
repr = repr .. " & "
end
end
repr = repr .. [[\end{pmatrix}]]
return repr
end
function trace_repr(trace)
local repr = ""
-- for stack_index, stack in ipairs(trace) do
-- repr = repr .. "iteration " .. stack_index .. " :" .. [[\\]]
repr = repr .. [[\begin{tabular}{|c|} \\ \hline ]]
for call_index, call in ipairs(trace) do
local i = call[1]
local j = call[2]
local aligment = call[3]
repr = repr .. [[ $\langle ]] .. i ..", " .. j .. ", " .. repr_alignment(alignment).. [[\rangle$ ]]
repr = repr .. [[\\ \hline]]
end
repr = repr .. [[\end{tabular}]]
-- end
return repr
end end
function main() function main()
local seq1 = "ATCTGAT" local seq1 = "ATCTGAT"
local seq2 = "TGCATA" local seq2 = "TGCATA"
local matrix = needle.needle_matrix(seq1, seq2) local matrix = needle.needle_matrix(seq1, seq2)
local trace = multiple_path_backtrack_trace(matrix, seq1, seq2)
print(#trace)
print(trace_repr(trace))
end end
main()

View File

@ -191,5 +191,7 @@ return {
draw=draw_needle_matrix_graph, draw=draw_needle_matrix_graph,
gap_penalty=gap_penalty, gap_penalty=gap_penalty,
mismatch_penalty=mismatch_penalty, mismatch_penalty=mismatch_penalty,
match_penalty=match_penalty match_penalty=match_penalty,
needle_matrix=needle_matrix,
sub=sub
} }

BIN
main.pdf (Stored with Git LFS)

Binary file not shown.