From 80f4669d230fe177865f520cedaf666658e14039 Mon Sep 17 00:00:00 2001
From: Samuel Ortion <samuel+git@ortion.fr>
Date: Tue, 2 Apr 2024 14:51:44 +0200
Subject: [PATCH] fix: Needleman-Wunsch backtrack was faulty

---
 .gitignore                               |  2 +-
 content/chapters/part2/1.tex             | 40 ++++++-----
 figures/part2/needle-backtrack-stack.lua | 85 +++++++++++++++++++++++-
 figures/part2/needle.lua                 |  4 +-
 main.pdf                                 |  4 +-
 5 files changed, 113 insertions(+), 22 deletions(-)

diff --git a/.gitignore b/.gitignore
index b7d2d7a..fa368f5 100755
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,5 @@
 build/
-**/.bak*
+**/*.bak*
 .auctex-auto
 
 ## Core latex/pdflatex auxiliary files:
diff --git a/content/chapters/part2/1.tex b/content/chapters/part2/1.tex
index 0986eae..8230131 100644
--- a/content/chapters/part2/1.tex
+++ b/content/chapters/part2/1.tex
@@ -1,5 +1,6 @@
 \chapter{Sequence alignment}
 
+\iffalse
 \begin{algorithm}
 	\caption{Needleman-Wunsch Algorithm}
 	\begin{algorithmic}[1]
@@ -77,10 +78,12 @@
 		\State \Call{FillMatrix}{$S_{1}$, $S_{2}$}
 		\State \Call{ShowAlignment}{$S_{1}$, $S_{2}$}
 	\end{algorithmic}
-\end{algorithm}
+      \end{algorithm}
+
+      \fi
 
 \begin{algorithm}
-	\caption{Needleman-Wunsch Algorithm, using proper notation }
+	\caption{Needleman-Wunsch Algorithm, Build the matrix}
 	\begin{algorithmic}[1]
 		\Procedure{FillMatrix}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)}
 		\State $M = $ Array($m+1$, $n+1$)
@@ -106,7 +109,7 @@
 \end{algorithm}
 
 \begin{algorithm}
-	\caption{Needleman-Wunsch Algorithm, using proper notation (Backtrack)}
+	\caption{Needleman-Wunsch Algorithm, reconstruct the alignment}
 	\begin{algorithmic}[1]
 		\Procedure{BacktrackAlignment}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)}
 		\State $alignment = LinkedList$
@@ -149,14 +152,10 @@
 		\State $S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$),
 		\Function{BacktrackRecurse}{$i$, $j$}
 		\If {$i > 0$ and $j > 0$}
-		\State $substitute = M[i-1][j-1]$
-		\State $delete = M[i-1][j]$
-		\State $insert = M[i][j-1]$
-		\State $min = \min \{ substitute, delete, insert \}$
-		\If {$substitute = min$}
+		\If {$M[i-1][j-1] = M[i][j] - sub(S_{1}[i-1], S_{2}[j-1])$}
 		\State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j-1$}
 		\State $z = \begin{pmatrix} S_{1}[i-1] \\ S_{2}[j-1] \end{pmatrix} \circ z$
-		\ElsIf {$delete = min$}
+		\ElsIf {$M[i-1][j] + gap\_penalty = M[i][j]$}
 		\State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$}
 		\State $z = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix} \circ z$
 		\Else
@@ -172,7 +171,9 @@
 		\Else
 		\State \Return []
 		\EndIf
+		\Else
 		\State \Return $z$
+		\EndIf
 		\EndFunction
 		\Function{Backtrack}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$)}
 		\State \Return \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $m$, $n$}
@@ -185,21 +186,17 @@
 	\begin{algorithmic}[1]
 		\Procedure{BacktrackRecurse}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$), $i$, $j$}
 		\If {$i > 0$ and $j > 0$}
-		\State $substitute = M[i-1][j-1]$
-		\State $delete = M[i-1][j]$
-		\State $insert = M[i][j-1]$
-		\State $min = \min \{ substitute, delete, insert \}$
-		\If {$substitute = min$}
+		\If {$M[i-1][j-1] = M[i][j] - sub(S_{1}[i-1], S_{2}[j-1])$}
 		\State $value = \begin{pmatrix} S_{1}[i-1] \\ S_{2}[j-1] \end{pmatrix}$
 		\State $z' = value \circ z$
 		\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j-1$, $z'$}
 		\EndIf
-		\If {$delete = min$}
+		\If {$M[i-1][j] + gap\_penalty = M[i][j]$}
 		\State $value = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix}$
 		\State $z' = value \circ z$
 		\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$, $z'$}
 		\EndIf
-		\If {$insert = min$}
+		\If {$M[i][j-1] + gap\_penalty = M[i][j]$}
 		\State $value = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$
 		\State $z' = value \circ z$
 		\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$, $z'$}
@@ -212,11 +209,20 @@
 		\State $value = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$
 		\State $z' = value \circ z$
 		\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$, $z'$}
-		\EndIf
+		\Else
 		\State \Call{print}{$z$}
+		\EndIf
 		\EndProcedure
 		\Procedure{Backtrack}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$)}
 		\State \Return \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $m$, $n$, []}
 		\EndProcedure
 	\end{algorithmic}
 \end{algorithm}
+
+
+
+\begin{figure}
+  \centering
+  \includegraphics{figures/part2/needle.pdf}
+  \caption{Needleman-Wunsch global alignment matrix with an example of optimal path.}
+ \end{figure}
diff --git a/figures/part2/needle-backtrack-stack.lua b/figures/part2/needle-backtrack-stack.lua
index e23e2fd..183f785 100644
--- a/figures/part2/needle-backtrack-stack.lua
+++ b/figures/part2/needle-backtrack-stack.lua
@@ -1,22 +1,105 @@
 needle = require("./needle")
 
+function table.shallow_copy(t)
+  local t2 = {}
+  for k,v in pairs(t) do
+    t2[k] = v
+  end
+  return t2
+end
+
 function multiple_path_backtrack_trace(matrix, seq1, seq2)
     local stack = {}
     local m=string.len(seq1)
     local n=string.len(seq2)
     local i=m
     local j=n
-    table.insert(1, {i, j, nil})
+    table.insert(stack, 1, {i, j, {}})
+    local trace = {}
     while #stack ~= 0 do
         local state = table.remove(stack, 1)
+        table.insert(trace, #trace+1, state)
         local i=state[1]
         local j=state[2]
         local alignment = state[3]
+        if (i > 0 and j > 0) then
+            local nt1 = string.sub(seq1, i-1, i-1)
+            local nt2 = string.sub(seq2, j-1, j-1)
+            if (matrix[i][j] == matrix[i-1][j-1] + needle.sub(nt1, nt2)) then
+                local new_alignment = table.shallow_copy(alignment)
+                table.insert(new_alignment, 1, {nt1, nt2})
+                table.insert(stack, 1, {i - 1, j - 1, new_alignment})
+            end
+            if (matrix[i][j] == matrix[i-1][j] + needle.gap_penalty) then
+                local new_alignment = table.shallow_copy(alignment)
+                table.insert(new_alignment, 1, {nt1, '-'})
+                table.insert(stack, 1, {i-1, j, new_alignment})
+            end
+            if (matrix[i][j] == matrix[i][j-1] + needle.gap_penalty) then
+                local new_alignment = table.shallow_copy(alignment)
+                table.insert(new_alignment, 1, {'-', nt2})
+                table.insert(stack, 1, {i, j-1, new_alignment})
+            end
+        end
+        if (i > 0) then
+            local nt1 = string.sub(seq1, i-1, i-1)
+            local new_alignment = table.shallow_copy(alignment)
+            table.insert(new_alignment, 1, {nt1, '-'})
+            table.insert(stack, 1, {i-1, j, new_alignment})
+        end
+        if (j > 0) then
+            local nt2 = string.sub(seq2, j-1, j-1)
+            local new_alignment = table.shallow_copy(alignment)
+            table.insert(new_alignment, 1, {'-', nt2})
+            table.insert(stack, 1, {i, j-1, new_alignment})
+        end
     end
+    return trace
+end
+
+function repr_alignment(alignment)
+    local repr = [[\begin{pmatrix}]]
+    for i, vector in ipairs(alignment) do
+        repr = repr .. vector[1]
+        if i < #alignment then
+            repr = repr .. " & "
+        end
+    end
+    repr = repr .. [[\\]] .. " \n"
+    for i, vector in ipairs(alignment) do
+        repr = repr .. vector[2]
+        if i < #alignment then
+            repr = repr .. " & "
+        end
+    end
+    repr = repr .. [[\end{pmatrix}]]
+    return repr
+end
+
+function trace_repr(trace)
+    local repr = ""
+--     for stack_index, stack in ipairs(trace) do
+--         repr = repr .. "iteration " .. stack_index .. " :" .. [[\\]]
+        repr = repr .. [[\begin{tabular}{|c|} \\ \hline ]]
+        for call_index, call in ipairs(trace) do
+            local i = call[1]
+            local j = call[2]
+            local aligment = call[3]
+            repr = repr .. [[ $\langle ]] .. i ..", " .. j .. ", " .. repr_alignment(alignment).. [[\rangle$ ]]
+            repr = repr .. [[\\ \hline]]
+        end
+        repr = repr .. [[\end{tabular}]]
+--     end
+    return repr
 end
 
 function main()
     local seq1 = "ATCTGAT"
     local seq2 = "TGCATA"
     local matrix = needle.needle_matrix(seq1, seq2)
+    local trace = multiple_path_backtrack_trace(matrix, seq1, seq2)
+    print(#trace)
+    print(trace_repr(trace))
 end
+
+main()
diff --git a/figures/part2/needle.lua b/figures/part2/needle.lua
index c819bce..f8ef2e4 100755
--- a/figures/part2/needle.lua
+++ b/figures/part2/needle.lua
@@ -191,5 +191,7 @@ return {
   draw=draw_needle_matrix_graph,
   gap_penalty=gap_penalty,
   mismatch_penalty=mismatch_penalty,
-  match_penalty=match_penalty
+  match_penalty=match_penalty,
+  needle_matrix=needle_matrix,
+  sub=sub
 }
diff --git a/main.pdf b/main.pdf
index 2f81c3d..4149104 100644
--- a/main.pdf
+++ b/main.pdf
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dbbf82cae2b489561f68b9ae243686124a41f634e527717a82979966e728a612
-size 335755
+oid sha256:453a13b505a4ab0d71682620eb6f28fb8bfb499c7111498ed214d3e92c9e11db
+size 323717