Add variation on Needleman - Wunsch algorithm

2024-03-25 12:04:13 +01:00 · 2024-03-25 12:04:13 +01:00 · e64a1d711a
parent e945027027
commit e64a1d711a
7 changed files with 356 additions and 119 deletions
--- a/content/chapters/include.tex
+++ b/content/chapters/include.tex
@ -11,7 +11,7 @@
 		}
 }

-\includechapters{part1}{3}
-\includechapters{part2}{2}
+\includechapters{part1}{4}
+\includechapters{part2}{4}

 % \includechapters{part3}{1}
--- a/content/chapters/part1/4.bak0
+++ b/content/chapters/part1/4.bak0
@ -0,0 +1,116 @@
+\chapter{Longest common subsequence}
+
+Let $S_{1} = \text{ATCTGAT}$ and $S_{2} = \text{TGCATA}$.
+In this case the longest common subsequence of $S_{1}$ and $S_{2}$ is $TCTA$.
+\begin{algorithm}
+	\caption{Construct a longest common subsequence matrix}
+	\begin{algorithmic}[1]
+		\Function{LCSQ\_Matrix}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
+		\State $M \gets $ Array($m+1$, $n+1$)
+		\For{($i = 0$; $i < n+1$; $i++$)}
+		\For{$j = 0$; $j < m+1$; $j++$}
+		\If {$i = 0$ or $j = 0$}
+		\State $M[i][j] = 0$
+		\Else
+		\If {$S_{1}[i] = S_{2}[j]$}
+		\State $match = M[i-1][j-1] + 1$
+		\Else
+		\State $match = M[i-1][j-1]$
+		\EndIf
+		\State $gap_{1} = M[i-1][j]$
+		\State $gap_{2} = M[i][j-1]$
+		\State $M[i][j] = \max \{ match, gap_{1}, gap_{2}\}$
+		\EndIf
+		\EndFor
+		\EndFor
+		\State \Return $M$
+		\EndFunction
+	\end{algorithmic}
+      \end{algorithm}
+
+\begin{algorithm}
+	\caption{Construct a longest common subsequence matrix keeping the path in memory}
+	\begin{algorithmic}[1]
+		\Function{LCSQ\_Matrix\_Path}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
+		\State $M \gets $ Array($m+1$, $n+1$)
+		\State $P \gets $ Array($m+1$, $n+1$)
+		\For {($i = 0$; $i < n+1$, $i++$)}
+		\State $M[i][0] \gets 0$
+		\EndFor
+		\For {($j = 0$; $j < m+1$; $j+$)}
+		\State $M[0][j] \gets 0$
+		\EndFor
+		\For{($i = 1$; $i < n+1$; $i++$)}
+		\For{($j = 1$; $j < m+1$; $j++$)}
+		\If {$i = 1$ or $j = 0$}
+		\State $M[i][j] = 0$
+		\Else
+		\If {$S_{1}[i-1] = S_{2}[j-1]$}
+		\State $M[i][j] \gets M[i-1][j-1] + 1$
+		\State $P[i][j] \gets '\nwarrow'$
+		\ElsIf {$M[i][j-1] \geq M[i-1][j]$}
+		\State $M[i][j] \gets M[i][j-1]$
+		\State $P[i][j] \gets '\leftarrow'$
+		\Else
+		\State $M[i][j] \gets M[i-1][j]$
+		\State $P[i][j] \gets '\downarrow'$
+		\EndIf
+		\EndFor
+		\EndFor
+		\State \Return $M, P$
+		\EndFunction
+	\end{algorithmic}
+      \end{algorithm}
+
+\begin{algorithm}
+	\caption{Backtrack the longest common subsequence}
+	\begin{algorithmic}[1]
+		\Function{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
+		\State $M, P \gets $ \Call{LCSQ\_Matrix}{$S_{1}$, $S_{2}$}
+		\State $L \gets Array(M[n][m])$
+		\State $k \gets 0$
+		\State $i \gets n$
+		\State $j \gets m$
+		\While{$i > 0$ and $j > 0$}
+		\If {$P[i][j] = '\nwarrow' $}
+		\State $L[k] \gets S_{1}[i]$
+		\State $i--$
+		\State $j--$
+		\State $k++$
+		\ElsIf {$P[i][j] = '\leftarrow'$}
+		\State $j--$
+		\Else
+		\State $i--$
+		\EndIf
+		\EndWhile
+		\State \Return $L$
+		\EndFunction
+	\end{algorithmic}
+\end{algorithm}
+
+\iffalse
+
+\begin{algorithm}
+	\caption{Recursive reconstruction of the longest common subsequence}
+	\begin{algorithmic}[1]
+		\Procedure{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
+		\State $M, P \gets $ \Call{LCSQ\_Matrix}{$S_{1}$, $S_{2}$}
+		\State $i \gets n$
+		\State $j \gets m$
+		\State \Call{Aux}{$P$, $S_{1}$, $i$, $j$}
+		\EndProcedure
+
+		\Procedure{Aux}{$P$: Array($n+1$, $m+1$), $S_{1}$: Array($n$), $i$, $j$}
+		\If {$P[i][j] = '\nwarrow' $}
+		\State $l \gets S_{1}[i]$
+		\State \Call{Aux}{$P$, $S_{1}$, $i-1$, $j-1$}
+		\State \texttt{print}($l$)
+		\ElsIf {$P[i][j] = '\leftarrow'$}
+		\State \Call{Aux}{$P$, $S_{1}$, $i$, $j-1$}
+		\Else
+		\State \Call{Aux}{$P$, $S_{1}$, $i-1$, $j$}
+		\EndIf
+		\EndProcedure
+	\end{algorithmic}
+\end{algorithm}
+\fi
--- a/content/chapters/part1/4.tex
+++ b/content/chapters/part1/4.tex
@ -55,6 +55,7 @@ In this case the longest common subsequence of $S_{1}$ and $S_{2}$ is $TCTA$.
            \State $M[i][j] \gets M[i-1][j]$
            \State $P[i][j] \gets '\downarrow'$
        \EndIf
+	\EndIf
        \EndFor
        \EndFor
        \State \Return $M, P$
@ -88,7 +89,8 @@ In this case the longest common subsequence of $S_{1}$ and $S_{2}$ is $TCTA$.
    \end{algorithmic}
 \end{algorithm}

-\begin{algorithm}
+\iffalse
+    \begin{algorithm}
        \caption{Recursive reconstruction of the longest common subsequence}
        \begin{algorithmic}[1]
            \Procedure{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
@ -110,4 +112,5 @@ In this case the longest common subsequence of $S_{1}$ and $S_{2}$ is $TCTA$.
            \EndIf
            \EndProcedure
        \end{algorithmic}
-\end{algorithm}
+    \end{algorithm}
+\fi
--- a/content/chapters/part2/1.tex
+++ b/content/chapters/part2/1.tex
@ -10,30 +10,37 @@
 		\State $M = $ Array($m+1$, $n+1$)
 		\Comment{Initialize the matrix first column and first row}
 		\State $P = $ Array($m$, $n$) \Comment{Store the direction of the cell we chose to build the next cell up on.}
-		\For {($i = 0$; $i < m+1$; $i++$)}
-		\State $M[i][0] = i * del(S_{1}[i])$
+		\State $M[0][0] = 0$
+		\For {($i = 1$; $i < m+1$; $i++$)}
+		\State $M[i][0] = M[i-1][0] + gap\_penalty$
 		\EndFor
-		\For {($j = 0$; $j < n+1$; $j++$)}
-		\State $M[0][j] = j * ins(S_{2}[j])$
+		\For {($j = 1$; $j < n+1$; $j++$)}
+		\State $M[0][j] = M[0][j-1] + gap\_penalty$
 		\EndFor
 		\Comment{Fill the remaining matrix}
 		\For {($i = 1$; $i < m+1$; $i++$)}
 		\For {($j = 1$; $j < n+1$; $j++$)}
-		\State $delete = M[i-1][j] + del(S_{1}[i-1])$
-		\State $insert = M[i][j-1] + ins(S_{2}[j-1])$
+		\State $delete = M[i-1][j] + gap\_penalty$
+		\State $insert = M[i][j-1] + gap\_penalty$
 		\State $substitute = M[i-1][j-1] + sub(S_{1}[i-1], S_{2}[j-1])$
-		\State $choice = \max \{delete, insert, substitute\}$
+		\State $choice = \min \{delete, insert, substitute\}$
 		\If {$substitute = choice$}
 		\State $P[i-1][j-1] = '\nwarrow'$
-		\ElsIf {$insertion = choice$}
-		\State $P[i-1][j-1] = '\uparrow'$
-		\Else
+		\ElsIf {$deletion = choice$}
 		\State $P[i-1][j-1] = '\leftarrow'$
+		\Else
+		\State $P[i-1][j-1] = '\uparrow'$
 		\EndIf
 		\State $M[i][j] = choice$
 		\EndFor
 		\EndFor
 		\EndProcedure
+	\end{algorithmic}
+\end{algorithm}
+
+\begin{algorithm}
+	\caption{Needleman-Wunsch Algorithm (Backtrack)}
+	\begin{algorithmic}[1]
 		\Procedure{ShowAlignment}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)}
 		\State $extend_{1} = ''$
 		\State $extend_{2} = ''$
@ -47,18 +54,92 @@
 		\State $j--$
 		\ElsIf {$P[i-1][j-1] = '\uparrow'$}
 		\State $extend_{1} = S_{1}[i-1] \circ extend_{1}$
-		\State $extend_{2} =\quad '-' \circ extend_{2}$
+		\State $extend_{2} = '-' \circ extend_{2}$
 		\State $i--$
 		\Else
-		\State $extend_{1} =\quad '-' \circ extend_{1}$
+		\State $extend_{1} = '-' \circ extend_{1}$
 		\State $extend_{2} = S_{2}[j-1] \circ extend_{2}$
 		\State $j--$
 		\EndIf
 		\EndWhile
+		\While{$i > 0$}
+		\State $extend_{1} = S_{1}[i-1] \circ extend_{1}$
+		\State $extend_{2} = '-' \circ extend_{2}$
+		\State $i--$
+		\State \Call{Insert}{0, $alignment$,$tuple$}
+		\EndWhile
+		\While{$j > 0$}
+		\State $extend_{1} = '-' \circ extend_{1}$
+		\State $extend_{2} = S_{2}[j-1] \circ extend_{2}$
+		\State $j--$
+		\EndWhile
 		\State \Call{print}{$extend_{1}$}
 		\State \Call{print}{$extend_{2}$}
 		\EndProcedure
 		\State \Call{FillMatrix}{$S_{1}$, $S_{2}$}
-		\State \Call ShowAlignment($S_{1}$, $S_{2}$)
+		\State \Call{ShowAlignment}{$S_{1}$, $S_{2}$}
+	\end{algorithmic}
+\end{algorithm}
+
+\begin{algorithm}
+	\caption{Needleman-Wunsch Algorithm (Backtrack) }
+	\begin{algorithmic}[1]
+		\Procedure{FillMatrix}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)}
+		\State $M = $ Array($m+1$, $n+1$)
+		\State $P = $ Array($m$, $n$)
+		\Comment{Store the direction of the cell we chose to build the next cell up on.}
+		\State $M[0][0] = 0$
+		\For {($i = 1$; $i < m+1$; $i++$)}
+		\State $M[i][0] = M[i-1][0] + gap\_penalty$
+		\EndFor
+		\For {($j = 1$; $j < n+1$; $j++$)}
+		\State $M[0][j] = M[0][j-1] + gap\_penalty$
+		\EndFor
+		\For {($i = 1$; $i < m+1$; $i++$)}
+		\For {($j = 1$; $j < n+1$; $j++$)}
+		\State $delete = M[i-1][j] + gap\_penalty$
+		\State $insert = M[i][j-1] + gap\_penalty$
+		\State $substitute = M[i-1][j-1] + sub(S_{1}[i-1], S_{2}[j-1])$
+		\State $M[i][j] = \min \{substitute, insert, delete\}$
+		\EndFor
+		\EndFor
+		\EndProcedure
+	\end{algorithmic}
+\end{algorithm}
+
+\begin{algorithm}
+	\caption{Needleman-Wunsch Algorithm, using proper notation (Backtrack)}
+	\begin{algorithmic}[1]
+		\Procedure{BacktrackAlignment}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)}
+		\State $alignment = LinkedList$
+		\State $i = m$
+		\State $j = n$
+		\While{$i > 0$ and $j > 0$}
+		\If {$M[i-1][j-1] = M[i][j] - sub(S_{1}[i-1], S_{2}[j-1])$}
+		\State $tuple = \begin{pmatrix} S_{1}[i-1] \\ S_{2}[j-1] \end{pmatrix}$
+		\State $i--$
+		\State $j--$
+		\ElsIf {$M[i-1][j-1] = M[i][j-1] - gap\_penalty$}
+		\State $tuple = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix}$
+		\State $i--$
+		\Else
+		\State $tuple = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$
+		\State $j--$
+		\EndIf
+		\State \Call{Insert}{0, $alignment$,$tuple$}
+		\EndWhile
+		\While{$i > 0$}
+		\State $tuple = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix}$
+		\State $i--$
+		\State \Call{Insert}{0, $alignment$,$tuple$}
+		\EndWhile
+		\While{$j > 0$}
+		\State $tuple = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$
+		\State $j--$
+		\State \Call{Insert}{0, $alignment$,$tuple$}
+		\EndWhile
+		\EndProcedure
+		\State \Call{FillMatrix}{$S_{1}$, $S_{2}$}
+		\State \Call{BacktrackAlignment}{$S_{1}$, $S_{2}$}
 	\end{algorithmic}
 \end{algorithm}
--- a/main.pdf
+++ b/main.pdf
--- a/tmp.pdf
+++ b/tmp.pdf
--- a/tmp.tex
+++ b/tmp.tex
@ -11,6 +11,43 @@
 \input{definitions.tex}

 \begin{document}
+
+\begin{algorithm}
+    \caption{Construct a longest common subsequence matrix keeping the path in memory}
+    \begin{algorithmic}[1]
+        \Function{LCSQ\_Matrix\_Path}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
+        \State $M \gets $ Array($m+1$, $n+1$)
+        \State $P \gets $ Array($m+1$, $n+1$)
+        \For {($i = 0$; $i < n+1$, $i++$)}
+            \State $M[i][0] \gets 0$
+        \EndFor
+        \For {($j = 0$; $j < m+1$; $j+$)}
+            \State $M[0][j] \gets 0$
+        \EndFor
+        \For{($i = 1$; $i < n+1$; $i++$)}
+        \For{($j = 1$; $j < m+1$; $j++$)}
+        \If {$i = 1$ or $j = 0$}
+        \State $M[i][j] = 0$
+        \Else
+        \If {$S_{1}[i-1] = S_{2}[j-1]$}
+            \State $M[i][j] \gets M[i-1][j-1] + 1$
+            \State $P[i][j] \gets '\nwarrow'$
+        \ElsIf {$M[i][j-1] \geq M[i-1][j]$}
+            \State $M[i][j] \gets M[i][j-1]$
+            \State $P[i][j] \gets '\leftarrow'$
+            \Else
+            \State $M[i][j] \gets M[i-1][j]$
+            \State $P[i][j] \gets '\downarrow'$
+        \EndIf
+\EndIf
+        \EndFor
+        \EndFor
+        \State \Return $M, P$
+        \EndFunction
+    \end{algorithmic}
+\end{algorithm}
+
+\iffalse
 	\begin{algorithm}
 		\caption{Backtrack the longest common subsequence}
 		\begin{algorithmic}[1]
@ -60,7 +97,7 @@
 			\EndProcedure
 		\end{algorithmic}
 	\end{algorithm}
-
+\fi
 \end{document}

 \end{document}