From 80f4669d230fe177865f520cedaf666658e14039 Mon Sep 17 00:00:00 2001 From: Samuel Ortion Date: Tue, 2 Apr 2024 14:51:44 +0200 Subject: [PATCH] fix: Needleman-Wunsch backtrack was faulty --- .gitignore | 2 +- content/chapters/part2/1.tex | 40 ++++++----- figures/part2/needle-backtrack-stack.lua | 85 +++++++++++++++++++++++- figures/part2/needle.lua | 4 +- main.pdf | 4 +- 5 files changed, 113 insertions(+), 22 deletions(-) diff --git a/.gitignore b/.gitignore index b7d2d7a..fa368f5 100755 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ build/ -**/.bak* +**/*.bak* .auctex-auto ## Core latex/pdflatex auxiliary files: diff --git a/content/chapters/part2/1.tex b/content/chapters/part2/1.tex index 0986eae..8230131 100644 --- a/content/chapters/part2/1.tex +++ b/content/chapters/part2/1.tex @@ -1,5 +1,6 @@ \chapter{Sequence alignment} +\iffalse \begin{algorithm} \caption{Needleman-Wunsch Algorithm} \begin{algorithmic}[1] @@ -77,10 +78,12 @@ \State \Call{FillMatrix}{$S_{1}$, $S_{2}$} \State \Call{ShowAlignment}{$S_{1}$, $S_{2}$} \end{algorithmic} -\end{algorithm} + \end{algorithm} + + \fi \begin{algorithm} - \caption{Needleman-Wunsch Algorithm, using proper notation } + \caption{Needleman-Wunsch Algorithm, Build the matrix} \begin{algorithmic}[1] \Procedure{FillMatrix}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)} \State $M = $ Array($m+1$, $n+1$) @@ -106,7 +109,7 @@ \end{algorithm} \begin{algorithm} - \caption{Needleman-Wunsch Algorithm, using proper notation (Backtrack)} + \caption{Needleman-Wunsch Algorithm, reconstruct the alignment} \begin{algorithmic}[1] \Procedure{BacktrackAlignment}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)} \State $alignment = LinkedList$ @@ -149,14 +152,10 @@ \State $S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$), \Function{BacktrackRecurse}{$i$, $j$} \If {$i > 0$ and $j > 0$} - \State $substitute = M[i-1][j-1]$ - \State $delete = M[i-1][j]$ - \State $insert = M[i][j-1]$ - \State $min = \min \{ substitute, delete, insert \}$ - \If {$substitute = min$} + \If {$M[i-1][j-1] = M[i][j] - sub(S_{1}[i-1], S_{2}[j-1])$} \State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j-1$} \State $z = \begin{pmatrix} S_{1}[i-1] \\ S_{2}[j-1] \end{pmatrix} \circ z$ - \ElsIf {$delete = min$} + \ElsIf {$M[i-1][j] + gap\_penalty = M[i][j]$} \State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$} \State $z = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix} \circ z$ \Else @@ -172,7 +171,9 @@ \Else \State \Return [] \EndIf + \Else \State \Return $z$ + \EndIf \EndFunction \Function{Backtrack}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$)} \State \Return \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $m$, $n$} @@ -185,21 +186,17 @@ \begin{algorithmic}[1] \Procedure{BacktrackRecurse}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$), $i$, $j$} \If {$i > 0$ and $j > 0$} - \State $substitute = M[i-1][j-1]$ - \State $delete = M[i-1][j]$ - \State $insert = M[i][j-1]$ - \State $min = \min \{ substitute, delete, insert \}$ - \If {$substitute = min$} + \If {$M[i-1][j-1] = M[i][j] - sub(S_{1}[i-1], S_{2}[j-1])$} \State $value = \begin{pmatrix} S_{1}[i-1] \\ S_{2}[j-1] \end{pmatrix}$ \State $z' = value \circ z$ \State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j-1$, $z'$} \EndIf - \If {$delete = min$} + \If {$M[i-1][j] + gap\_penalty = M[i][j]$} \State $value = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix}$ \State $z' = value \circ z$ \State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$, $z'$} \EndIf - \If {$insert = min$} + \If {$M[i][j-1] + gap\_penalty = M[i][j]$} \State $value = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$ \State $z' = value \circ z$ \State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$, $z'$} @@ -212,11 +209,20 @@ \State $value = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$ \State $z' = value \circ z$ \State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$, $z'$} - \EndIf + \Else \State \Call{print}{$z$} + \EndIf \EndProcedure \Procedure{Backtrack}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$)} \State \Return \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $m$, $n$, []} \EndProcedure \end{algorithmic} \end{algorithm} + + + +\begin{figure} + \centering + \includegraphics{figures/part2/needle.pdf} + \caption{Needleman-Wunsch global alignment matrix with an example of optimal path.} + \end{figure} diff --git a/figures/part2/needle-backtrack-stack.lua b/figures/part2/needle-backtrack-stack.lua index e23e2fd..183f785 100644 --- a/figures/part2/needle-backtrack-stack.lua +++ b/figures/part2/needle-backtrack-stack.lua @@ -1,22 +1,105 @@ needle = require("./needle") +function table.shallow_copy(t) + local t2 = {} + for k,v in pairs(t) do + t2[k] = v + end + return t2 +end + function multiple_path_backtrack_trace(matrix, seq1, seq2) local stack = {} local m=string.len(seq1) local n=string.len(seq2) local i=m local j=n - table.insert(1, {i, j, nil}) + table.insert(stack, 1, {i, j, {}}) + local trace = {} while #stack ~= 0 do local state = table.remove(stack, 1) + table.insert(trace, #trace+1, state) local i=state[1] local j=state[2] local alignment = state[3] + if (i > 0 and j > 0) then + local nt1 = string.sub(seq1, i-1, i-1) + local nt2 = string.sub(seq2, j-1, j-1) + if (matrix[i][j] == matrix[i-1][j-1] + needle.sub(nt1, nt2)) then + local new_alignment = table.shallow_copy(alignment) + table.insert(new_alignment, 1, {nt1, nt2}) + table.insert(stack, 1, {i - 1, j - 1, new_alignment}) + end + if (matrix[i][j] == matrix[i-1][j] + needle.gap_penalty) then + local new_alignment = table.shallow_copy(alignment) + table.insert(new_alignment, 1, {nt1, '-'}) + table.insert(stack, 1, {i-1, j, new_alignment}) + end + if (matrix[i][j] == matrix[i][j-1] + needle.gap_penalty) then + local new_alignment = table.shallow_copy(alignment) + table.insert(new_alignment, 1, {'-', nt2}) + table.insert(stack, 1, {i, j-1, new_alignment}) + end + end + if (i > 0) then + local nt1 = string.sub(seq1, i-1, i-1) + local new_alignment = table.shallow_copy(alignment) + table.insert(new_alignment, 1, {nt1, '-'}) + table.insert(stack, 1, {i-1, j, new_alignment}) + end + if (j > 0) then + local nt2 = string.sub(seq2, j-1, j-1) + local new_alignment = table.shallow_copy(alignment) + table.insert(new_alignment, 1, {'-', nt2}) + table.insert(stack, 1, {i, j-1, new_alignment}) + end end + return trace +end + +function repr_alignment(alignment) + local repr = [[\begin{pmatrix}]] + for i, vector in ipairs(alignment) do + repr = repr .. vector[1] + if i < #alignment then + repr = repr .. " & " + end + end + repr = repr .. [[\\]] .. " \n" + for i, vector in ipairs(alignment) do + repr = repr .. vector[2] + if i < #alignment then + repr = repr .. " & " + end + end + repr = repr .. [[\end{pmatrix}]] + return repr +end + +function trace_repr(trace) + local repr = "" +-- for stack_index, stack in ipairs(trace) do +-- repr = repr .. "iteration " .. stack_index .. " :" .. [[\\]] + repr = repr .. [[\begin{tabular}{|c|} \\ \hline ]] + for call_index, call in ipairs(trace) do + local i = call[1] + local j = call[2] + local aligment = call[3] + repr = repr .. [[ $\langle ]] .. i ..", " .. j .. ", " .. repr_alignment(alignment).. [[\rangle$ ]] + repr = repr .. [[\\ \hline]] + end + repr = repr .. [[\end{tabular}]] +-- end + return repr end function main() local seq1 = "ATCTGAT" local seq2 = "TGCATA" local matrix = needle.needle_matrix(seq1, seq2) + local trace = multiple_path_backtrack_trace(matrix, seq1, seq2) + print(#trace) + print(trace_repr(trace)) end + +main() diff --git a/figures/part2/needle.lua b/figures/part2/needle.lua index c819bce..f8ef2e4 100755 --- a/figures/part2/needle.lua +++ b/figures/part2/needle.lua @@ -191,5 +191,7 @@ return { draw=draw_needle_matrix_graph, gap_penalty=gap_penalty, mismatch_penalty=mismatch_penalty, - match_penalty=match_penalty + match_penalty=match_penalty, + needle_matrix=needle_matrix, + sub=sub } diff --git a/main.pdf b/main.pdf index 2f81c3d..4149104 100644 --- a/main.pdf +++ b/main.pdf @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dbbf82cae2b489561f68b9ae243686124a41f634e527717a82979966e728a612 -size 335755 +oid sha256:453a13b505a4ab0d71682620eb6f28fb8bfb499c7111498ed214d3e92c9e11db +size 323717