Add variation on Needleman - Wunsch algorithm

This commit is contained in:
Samuel Ortion 2024-03-25 12:04:13 +01:00
parent e945027027
commit e64a1d711a
7 changed files with 356 additions and 119 deletions

View File

@ -11,7 +11,7 @@
} }
} }
\includechapters{part1}{3} \includechapters{part1}{4}
\includechapters{part2}{2} \includechapters{part2}{4}
% \includechapters{part3}{1} % \includechapters{part3}{1}

View File

@ -0,0 +1,116 @@
\chapter{Longest common subsequence}
Let $S_{1} = \text{ATCTGAT}$ and $S_{2} = \text{TGCATA}$.
In this case the longest common subsequence of $S_{1}$ and $S_{2}$ is $TCTA$.
\begin{algorithm}
\caption{Construct a longest common subsequence matrix}
\begin{algorithmic}[1]
\Function{LCSQ\_Matrix}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
\State $M \gets $ Array($m+1$, $n+1$)
\For{($i = 0$; $i < n+1$; $i++$)}
\For{$j = 0$; $j < m+1$; $j++$}
\If {$i = 0$ or $j = 0$}
\State $M[i][j] = 0$
\Else
\If {$S_{1}[i] = S_{2}[j]$}
\State $match = M[i-1][j-1] + 1$
\Else
\State $match = M[i-1][j-1]$
\EndIf
\State $gap_{1} = M[i-1][j]$
\State $gap_{2} = M[i][j-1]$
\State $M[i][j] = \max \{ match, gap_{1}, gap_{2}\}$
\EndIf
\EndFor
\EndFor
\State \Return $M$
\EndFunction
\end{algorithmic}
\end{algorithm}
\begin{algorithm}
\caption{Construct a longest common subsequence matrix keeping the path in memory}
\begin{algorithmic}[1]
\Function{LCSQ\_Matrix\_Path}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
\State $M \gets $ Array($m+1$, $n+1$)
\State $P \gets $ Array($m+1$, $n+1$)
\For {($i = 0$; $i < n+1$, $i++$)}
\State $M[i][0] \gets 0$
\EndFor
\For {($j = 0$; $j < m+1$; $j+$)}
\State $M[0][j] \gets 0$
\EndFor
\For{($i = 1$; $i < n+1$; $i++$)}
\For{($j = 1$; $j < m+1$; $j++$)}
\If {$i = 1$ or $j = 0$}
\State $M[i][j] = 0$
\Else
\If {$S_{1}[i-1] = S_{2}[j-1]$}
\State $M[i][j] \gets M[i-1][j-1] + 1$
\State $P[i][j] \gets '\nwarrow'$
\ElsIf {$M[i][j-1] \geq M[i-1][j]$}
\State $M[i][j] \gets M[i][j-1]$
\State $P[i][j] \gets '\leftarrow'$
\Else
\State $M[i][j] \gets M[i-1][j]$
\State $P[i][j] \gets '\downarrow'$
\EndIf
\EndFor
\EndFor
\State \Return $M, P$
\EndFunction
\end{algorithmic}
\end{algorithm}
\begin{algorithm}
\caption{Backtrack the longest common subsequence}
\begin{algorithmic}[1]
\Function{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
\State $M, P \gets $ \Call{LCSQ\_Matrix}{$S_{1}$, $S_{2}$}
\State $L \gets Array(M[n][m])$
\State $k \gets 0$
\State $i \gets n$
\State $j \gets m$
\While{$i > 0$ and $j > 0$}
\If {$P[i][j] = '\nwarrow' $}
\State $L[k] \gets S_{1}[i]$
\State $i--$
\State $j--$
\State $k++$
\ElsIf {$P[i][j] = '\leftarrow'$}
\State $j--$
\Else
\State $i--$
\EndIf
\EndWhile
\State \Return $L$
\EndFunction
\end{algorithmic}
\end{algorithm}
\iffalse
\begin{algorithm}
\caption{Recursive reconstruction of the longest common subsequence}
\begin{algorithmic}[1]
\Procedure{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
\State $M, P \gets $ \Call{LCSQ\_Matrix}{$S_{1}$, $S_{2}$}
\State $i \gets n$
\State $j \gets m$
\State \Call{Aux}{$P$, $S_{1}$, $i$, $j$}
\EndProcedure
\Procedure{Aux}{$P$: Array($n+1$, $m+1$), $S_{1}$: Array($n$), $i$, $j$}
\If {$P[i][j] = '\nwarrow' $}
\State $l \gets S_{1}[i]$
\State \Call{Aux}{$P$, $S_{1}$, $i-1$, $j-1$}
\State \texttt{print}($l$)
\ElsIf {$P[i][j] = '\leftarrow'$}
\State \Call{Aux}{$P$, $S_{1}$, $i$, $j-1$}
\Else
\State \Call{Aux}{$P$, $S_{1}$, $i-1$, $j$}
\EndIf
\EndProcedure
\end{algorithmic}
\end{algorithm}
\fi

View File

@ -55,6 +55,7 @@ In this case the longest common subsequence of $S_{1}$ and $S_{2}$ is $TCTA$.
\State $M[i][j] \gets M[i-1][j]$ \State $M[i][j] \gets M[i-1][j]$
\State $P[i][j] \gets '\downarrow'$ \State $P[i][j] \gets '\downarrow'$
\EndIf \EndIf
\EndIf
\EndFor \EndFor
\EndFor \EndFor
\State \Return $M, P$ \State \Return $M, P$
@ -88,6 +89,7 @@ In this case the longest common subsequence of $S_{1}$ and $S_{2}$ is $TCTA$.
\end{algorithmic} \end{algorithmic}
\end{algorithm} \end{algorithm}
\iffalse
\begin{algorithm} \begin{algorithm}
\caption{Recursive reconstruction of the longest common subsequence} \caption{Recursive reconstruction of the longest common subsequence}
\begin{algorithmic}[1] \begin{algorithmic}[1]
@ -111,3 +113,4 @@ In this case the longest common subsequence of $S_{1}$ and $S_{2}$ is $TCTA$.
\EndProcedure \EndProcedure
\end{algorithmic} \end{algorithmic}
\end{algorithm} \end{algorithm}
\fi

View File

@ -10,30 +10,37 @@
\State $M = $ Array($m+1$, $n+1$) \State $M = $ Array($m+1$, $n+1$)
\Comment{Initialize the matrix first column and first row} \Comment{Initialize the matrix first column and first row}
\State $P = $ Array($m$, $n$) \Comment{Store the direction of the cell we chose to build the next cell up on.} \State $P = $ Array($m$, $n$) \Comment{Store the direction of the cell we chose to build the next cell up on.}
\For {($i = 0$; $i < m+1$; $i++$)} \State $M[0][0] = 0$
\State $M[i][0] = i * del(S_{1}[i])$ \For {($i = 1$; $i < m+1$; $i++$)}
\State $M[i][0] = M[i-1][0] + gap\_penalty$
\EndFor \EndFor
\For {($j = 0$; $j < n+1$; $j++$)} \For {($j = 1$; $j < n+1$; $j++$)}
\State $M[0][j] = j * ins(S_{2}[j])$ \State $M[0][j] = M[0][j-1] + gap\_penalty$
\EndFor \EndFor
\Comment{Fill the remaining matrix} \Comment{Fill the remaining matrix}
\For {($i = 1$; $i < m+1$; $i++$)} \For {($i = 1$; $i < m+1$; $i++$)}
\For {($j = 1$; $j < n+1$; $j++$)} \For {($j = 1$; $j < n+1$; $j++$)}
\State $delete = M[i-1][j] + del(S_{1}[i-1])$ \State $delete = M[i-1][j] + gap\_penalty$
\State $insert = M[i][j-1] + ins(S_{2}[j-1])$ \State $insert = M[i][j-1] + gap\_penalty$
\State $substitute = M[i-1][j-1] + sub(S_{1}[i-1], S_{2}[j-1])$ \State $substitute = M[i-1][j-1] + sub(S_{1}[i-1], S_{2}[j-1])$
\State $choice = \max \{delete, insert, substitute\}$ \State $choice = \min \{delete, insert, substitute\}$
\If {$substitute = choice$} \If {$substitute = choice$}
\State $P[i-1][j-1] = '\nwarrow'$ \State $P[i-1][j-1] = '\nwarrow'$
\ElsIf {$insertion = choice$} \ElsIf {$deletion = choice$}
\State $P[i-1][j-1] = '\uparrow'$
\Else
\State $P[i-1][j-1] = '\leftarrow'$ \State $P[i-1][j-1] = '\leftarrow'$
\Else
\State $P[i-1][j-1] = '\uparrow'$
\EndIf \EndIf
\State $M[i][j] = choice$ \State $M[i][j] = choice$
\EndFor \EndFor
\EndFor \EndFor
\EndProcedure \EndProcedure
\end{algorithmic}
\end{algorithm}
\begin{algorithm}
\caption{Needleman-Wunsch Algorithm (Backtrack)}
\begin{algorithmic}[1]
\Procedure{ShowAlignment}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)} \Procedure{ShowAlignment}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)}
\State $extend_{1} = ''$ \State $extend_{1} = ''$
\State $extend_{2} = ''$ \State $extend_{2} = ''$
@ -47,18 +54,92 @@
\State $j--$ \State $j--$
\ElsIf {$P[i-1][j-1] = '\uparrow'$} \ElsIf {$P[i-1][j-1] = '\uparrow'$}
\State $extend_{1} = S_{1}[i-1] \circ extend_{1}$ \State $extend_{1} = S_{1}[i-1] \circ extend_{1}$
\State $extend_{2} =\quad '-' \circ extend_{2}$ \State $extend_{2} = '-' \circ extend_{2}$
\State $i--$ \State $i--$
\Else \Else
\State $extend_{1} =\quad '-' \circ extend_{1}$ \State $extend_{1} = '-' \circ extend_{1}$
\State $extend_{2} = S_{2}[j-1] \circ extend_{2}$ \State $extend_{2} = S_{2}[j-1] \circ extend_{2}$
\State $j--$ \State $j--$
\EndIf \EndIf
\EndWhile \EndWhile
\While{$i > 0$}
\State $extend_{1} = S_{1}[i-1] \circ extend_{1}$
\State $extend_{2} = '-' \circ extend_{2}$
\State $i--$
\State \Call{Insert}{0, $alignment$,$tuple$}
\EndWhile
\While{$j > 0$}
\State $extend_{1} = '-' \circ extend_{1}$
\State $extend_{2} = S_{2}[j-1] \circ extend_{2}$
\State $j--$
\EndWhile
\State \Call{print}{$extend_{1}$} \State \Call{print}{$extend_{1}$}
\State \Call{print}{$extend_{2}$} \State \Call{print}{$extend_{2}$}
\EndProcedure \EndProcedure
\State \Call{FillMatrix}{$S_{1}$, $S_{2}$} \State \Call{FillMatrix}{$S_{1}$, $S_{2}$}
\State \Call ShowAlignment($S_{1}$, $S_{2}$) \State \Call{ShowAlignment}{$S_{1}$, $S_{2}$}
\end{algorithmic}
\end{algorithm}
\begin{algorithm}
\caption{Needleman-Wunsch Algorithm (Backtrack) }
\begin{algorithmic}[1]
\Procedure{FillMatrix}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)}
\State $M = $ Array($m+1$, $n+1$)
\State $P = $ Array($m$, $n$)
\Comment{Store the direction of the cell we chose to build the next cell up on.}
\State $M[0][0] = 0$
\For {($i = 1$; $i < m+1$; $i++$)}
\State $M[i][0] = M[i-1][0] + gap\_penalty$
\EndFor
\For {($j = 1$; $j < n+1$; $j++$)}
\State $M[0][j] = M[0][j-1] + gap\_penalty$
\EndFor
\For {($i = 1$; $i < m+1$; $i++$)}
\For {($j = 1$; $j < n+1$; $j++$)}
\State $delete = M[i-1][j] + gap\_penalty$
\State $insert = M[i][j-1] + gap\_penalty$
\State $substitute = M[i-1][j-1] + sub(S_{1}[i-1], S_{2}[j-1])$
\State $M[i][j] = \min \{substitute, insert, delete\}$
\EndFor
\EndFor
\EndProcedure
\end{algorithmic}
\end{algorithm}
\begin{algorithm}
\caption{Needleman-Wunsch Algorithm, using proper notation (Backtrack)}
\begin{algorithmic}[1]
\Procedure{BacktrackAlignment}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)}
\State $alignment = LinkedList$
\State $i = m$
\State $j = n$
\While{$i > 0$ and $j > 0$}
\If {$M[i-1][j-1] = M[i][j] - sub(S_{1}[i-1], S_{2}[j-1])$}
\State $tuple = \begin{pmatrix} S_{1}[i-1] \\ S_{2}[j-1] \end{pmatrix}$
\State $i--$
\State $j--$
\ElsIf {$M[i-1][j-1] = M[i][j-1] - gap\_penalty$}
\State $tuple = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix}$
\State $i--$
\Else
\State $tuple = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$
\State $j--$
\EndIf
\State \Call{Insert}{0, $alignment$,$tuple$}
\EndWhile
\While{$i > 0$}
\State $tuple = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix}$
\State $i--$
\State \Call{Insert}{0, $alignment$,$tuple$}
\EndWhile
\While{$j > 0$}
\State $tuple = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$
\State $j--$
\State \Call{Insert}{0, $alignment$,$tuple$}
\EndWhile
\EndProcedure
\State \Call{FillMatrix}{$S_{1}$, $S_{2}$}
\State \Call{BacktrackAlignment}{$S_{1}$, $S_{2}$}
\end{algorithmic} \end{algorithmic}
\end{algorithm} \end{algorithm}

BIN
main.pdf (Stored with Git LFS)

Binary file not shown.

BIN
tmp.pdf (Stored with Git LFS)

Binary file not shown.

39
tmp.tex
View File

@ -11,6 +11,43 @@
\input{definitions.tex} \input{definitions.tex}
\begin{document} \begin{document}
\begin{algorithm}
\caption{Construct a longest common subsequence matrix keeping the path in memory}
\begin{algorithmic}[1]
\Function{LCSQ\_Matrix\_Path}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
\State $M \gets $ Array($m+1$, $n+1$)
\State $P \gets $ Array($m+1$, $n+1$)
\For {($i = 0$; $i < n+1$, $i++$)}
\State $M[i][0] \gets 0$
\EndFor
\For {($j = 0$; $j < m+1$; $j+$)}
\State $M[0][j] \gets 0$
\EndFor
\For{($i = 1$; $i < n+1$; $i++$)}
\For{($j = 1$; $j < m+1$; $j++$)}
\If {$i = 1$ or $j = 0$}
\State $M[i][j] = 0$
\Else
\If {$S_{1}[i-1] = S_{2}[j-1]$}
\State $M[i][j] \gets M[i-1][j-1] + 1$
\State $P[i][j] \gets '\nwarrow'$
\ElsIf {$M[i][j-1] \geq M[i-1][j]$}
\State $M[i][j] \gets M[i][j-1]$
\State $P[i][j] \gets '\leftarrow'$
\Else
\State $M[i][j] \gets M[i-1][j]$
\State $P[i][j] \gets '\downarrow'$
\EndIf
\EndIf
\EndFor
\EndFor
\State \Return $M, P$
\EndFunction
\end{algorithmic}
\end{algorithm}
\iffalse
\begin{algorithm} \begin{algorithm}
\caption{Backtrack the longest common subsequence} \caption{Backtrack the longest common subsequence}
\begin{algorithmic}[1] \begin{algorithmic}[1]
@ -60,7 +97,7 @@
\EndProcedure \EndProcedure
\end{algorithmic} \end{algorithmic}
\end{algorithm} \end{algorithm}
\fi
\end{document} \end{document}
\end{document} \end{document}