sequence-algorithms/content/chapters/part2/1.tex

229 lines
7.9 KiB
TeX
Raw Permalink Normal View History

2024-03-26 11:13:08 +01:00
\chapter{Sequence alignment}
2024-03-25 10:38:20 +01:00
\iffalse
2024-03-25 10:38:20 +01:00
\begin{algorithm}
\caption{Needleman-Wunsch Algorithm}
\begin{algorithmic}[1]
\Procedure{FillMatrix}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)}
\Comment{$sub(a, b)$ is the substitution score, $del(a)$ and $ins(a)$ are the deletion and insertion penalty, in regard with the reference $S_{1}$ sequence}
\State $M = $ Array($m+1$, $n+1$)
\Comment{Initialize the matrix first column and first row}
\State $P = $ Array($m$, $n$) \Comment{Store the direction of the cell we chose to build the next cell up on.}
\State $M[0][0] = 0$
\For {($i = 1$; $i < m+1$; $i++$)}
\State $M[i][0] = M[i-1][0] + gap\_penalty$
2024-03-25 10:38:20 +01:00
\EndFor
\For {($j = 1$; $j < n+1$; $j++$)}
\State $M[0][j] = M[0][j-1] + gap\_penalty$
2024-03-25 10:38:20 +01:00
\EndFor
\Comment{Fill the remaining matrix}
\For {($i = 1$; $i < m+1$; $i++$)}
\For {($j = 1$; $j < n+1$; $j++$)}
\State $delete = M[i-1][j] + gap\_penalty$
\State $insert = M[i][j-1] + gap\_penalty$
2024-03-25 10:38:20 +01:00
\State $substitute = M[i-1][j-1] + sub(S_{1}[i-1], S_{2}[j-1])$
\State $choice = \min \{delete, insert, substitute\}$
2024-03-25 10:38:20 +01:00
\If {$substitute = choice$}
\State $P[i-1][j-1] = '\nwarrow'$
\ElsIf {$deletion = choice$}
2024-03-25 10:38:20 +01:00
\State $P[i-1][j-1] = '\leftarrow'$
\Else
\State $P[i-1][j-1] = '\uparrow'$
2024-03-25 10:38:20 +01:00
\EndIf
\State $M[i][j] = choice$
\EndFor
\EndFor
\EndProcedure
\end{algorithmic}
\end{algorithm}
\begin{algorithm}
\caption{Needleman-Wunsch Algorithm (Backtrack)}
\begin{algorithmic}[1]
2024-03-25 10:38:20 +01:00
\Procedure{ShowAlignment}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)}
\State $extend_{1} = ''$
\State $extend_{2} = ''$
\State $i = m$
\State $j = n$
\While{$i > 0$ and $j > 0$}
\If {$P[i-1][j-1] = '\nwarrow'$}
\State $extend_{1} = S_{1}[i-1] \circ extend_{1}$
\State $extend_{2} = S_{2}[j-1] \circ extend_{2}$
\State $i--$
\State $j--$
\ElsIf {$P[i-1][j-1] = '\uparrow'$}
\State $extend_{1} = S_{1}[i-1] \circ extend_{1}$
\State $extend_{2} = '-' \circ extend_{2}$
2024-03-25 10:38:20 +01:00
\State $i--$
\Else
\State $extend_{1} = '-' \circ extend_{1}$
2024-03-25 10:38:20 +01:00
\State $extend_{2} = S_{2}[j-1] \circ extend_{2}$
\State $j--$
\EndIf
\EndWhile
\While{$i > 0$}
\State $extend_{1} = S_{1}[i-1] \circ extend_{1}$
\State $extend_{2} = '-' \circ extend_{2}$
\State $i--$
\State \Call{Insert}{0, $alignment$,$tuple$}
\EndWhile
\While{$j > 0$}
\State $extend_{1} = '-' \circ extend_{1}$
\State $extend_{2} = S_{2}[j-1] \circ extend_{2}$
\State $j--$
\EndWhile
2024-03-25 10:38:20 +01:00
\State \Call{print}{$extend_{1}$}
\State \Call{print}{$extend_{2}$}
\EndProcedure
\State \Call{FillMatrix}{$S_{1}$, $S_{2}$}
\State \Call{ShowAlignment}{$S_{1}$, $S_{2}$}
\end{algorithmic}
\end{algorithm}
\fi
\begin{algorithm}
\caption{Needleman-Wunsch Algorithm, Build the matrix}
\begin{algorithmic}[1]
\Procedure{FillMatrix}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)}
\State $M = $ Array($m+1$, $n+1$)
\State $P = $ Array($m$, $n$)
\Comment{Store the direction of the cell we chose to build the next cell up on.}
\State $M[0][0] = 0$
\For {($i = 1$; $i < m+1$; $i++$)}
\State $M[i][0] = M[i-1][0] + gap\_penalty$
\EndFor
\For {($j = 1$; $j < n+1$; $j++$)}
\State $M[0][j] = M[0][j-1] + gap\_penalty$
\EndFor
\For {($i = 1$; $i < m+1$; $i++$)}
\For {($j = 1$; $j < n+1$; $j++$)}
\State $delete = M[i-1][j] + gap\_penalty$
\State $insert = M[i][j-1] + gap\_penalty$
\State $substitute = M[i-1][j-1] + sub(S_{1}[i-1], S_{2}[j-1])$
\State $M[i][j] = \min \{substitute, insert, delete\}$
\EndFor
\EndFor
\EndProcedure
\end{algorithmic}
\end{algorithm}
\begin{algorithm}
\caption{Needleman-Wunsch Algorithm, reconstruct the alignment}
\begin{algorithmic}[1]
\Procedure{BacktrackAlignment}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)}
\State $alignment = LinkedList$
\State $i = m$
\State $j = n$
\While{$i > 0$ and $j > 0$}
\If {$M[i-1][j-1] = M[i][j] - sub(S_{1}[i-1], S_{2}[j-1])$}
\State $tuple = \begin{pmatrix} S_{1}[i-1] \\ S_{2}[j-1] \end{pmatrix}$
\State $i--$
\State $j--$
\ElsIf {$M[i-1][j-1] = M[i][j-1] - gap\_penalty$}
\State $tuple = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix}$
\State $i--$
\Else
\State $tuple = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$
\State $j--$
\EndIf
\State \Call{Insert}{0, $alignment$,$tuple$}
\EndWhile
\While{$i > 0$}
\State $tuple = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix}$
\State $i--$
\State \Call{Insert}{0, $alignment$,$tuple$}
\EndWhile
\While{$j > 0$}
\State $tuple = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$
\State $j--$
\State \Call{Insert}{0, $alignment$,$tuple$}
\EndWhile
\EndProcedure
\State \Call{FillMatrix}{$S_{1}$, $S_{2}$}
\State \Call{BacktrackAlignment}{$S_{1}$, $S_{2}$}
2024-03-25 10:38:20 +01:00
\end{algorithmic}
\end{algorithm}
2024-03-26 11:13:08 +01:00
\begin{algorithm}
\caption{Backtrack a single alignment in a recursive way}
\begin{algorithmic}[1]
\State $S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$),
\Function{BacktrackRecurse}{$i$, $j$}
\If {$i > 0$ and $j > 0$}
\If {$M[i-1][j-1] = M[i][j] - sub(S_{1}[i-1], S_{2}[j-1])$}
2024-03-26 11:13:08 +01:00
\State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j-1$}
\State $z = \begin{pmatrix} S_{1}[i-1] \\ S_{2}[j-1] \end{pmatrix} \circ z$
\ElsIf {$M[i-1][j] + gap\_penalty = M[i][j]$}
2024-03-26 11:13:08 +01:00
\State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$}
\State $z = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix} \circ z$
\Else
\State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$}
\State $z = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix} \circ z$
\EndIf
\ElsIf {$i > 0$}
\State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$}
\State $z = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix} \circ z$
\ElsIf {$j > 0$}
\State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$}
\State $z = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix} \circ z$
2024-03-26 11:13:08 +01:00
\Else
\State \Return []
\EndIf
\Else
2024-03-26 11:13:08 +01:00
\State \Return $z$
\EndIf
2024-03-26 11:13:08 +01:00
\EndFunction
\Function{Backtrack}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$)}
\State \Return \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $m$, $n$}
\EndFunction
\end{algorithmic}
\end{algorithm}
\begin{algorithm}
\caption{Backtrack all the optimum alignments in a recursive way}
\begin{algorithmic}[1]
\Procedure{BacktrackRecurse}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$), $i$, $j$}
\If {$i > 0$ and $j > 0$}
\If {$M[i-1][j-1] = M[i][j] - sub(S_{1}[i-1], S_{2}[j-1])$}
2024-03-26 11:13:08 +01:00
\State $value = \begin{pmatrix} S_{1}[i-1] \\ S_{2}[j-1] \end{pmatrix}$
\State $z' = value \circ z$
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j-1$, $z'$}
\EndIf
\If {$M[i-1][j] + gap\_penalty = M[i][j]$}
2024-03-26 11:13:08 +01:00
\State $value = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix}$
\State $z' = value \circ z$
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$, $z'$}
\EndIf
\If {$M[i][j-1] + gap\_penalty = M[i][j]$}
2024-03-26 11:13:08 +01:00
\State $value = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$
\State $z' = value \circ z$
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$, $z'$}
\EndIf
\ElsIf {$i > 0$}
\State $value = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix}$
\State $z' = value \circ z$
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$, $z'$}
\ElsIf {$j > 0$}
\State $value = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$
\State $z' = value \circ z$
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$, $z'$}
\Else
2024-03-26 11:13:08 +01:00
\State \Call{print}{$z$}
\EndIf
2024-03-26 11:13:08 +01:00
\EndProcedure
\Procedure{Backtrack}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$)}
\State \Return \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $m$, $n$, []}
\EndProcedure
\end{algorithmic}
\end{algorithm}
\begin{figure}
\centering
\includegraphics{figures/part2/needle.pdf}
\caption{Needleman-Wunsch global alignment matrix with an example of optimal path.}
\end{figure}