229 lines
7.9 KiB
TeX
229 lines
7.9 KiB
TeX
\chapter{Sequence alignment}
|
|
|
|
\iffalse
|
|
\begin{algorithm}
|
|
\caption{Needleman-Wunsch Algorithm}
|
|
\begin{algorithmic}[1]
|
|
\Procedure{FillMatrix}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)}
|
|
\Comment{$sub(a, b)$ is the substitution score, $del(a)$ and $ins(a)$ are the deletion and insertion penalty, in regard with the reference $S_{1}$ sequence}
|
|
\State $M = $ Array($m+1$, $n+1$)
|
|
\Comment{Initialize the matrix first column and first row}
|
|
\State $P = $ Array($m$, $n$) \Comment{Store the direction of the cell we chose to build the next cell up on.}
|
|
\State $M[0][0] = 0$
|
|
\For {($i = 1$; $i < m+1$; $i++$)}
|
|
\State $M[i][0] = M[i-1][0] + gap\_penalty$
|
|
\EndFor
|
|
\For {($j = 1$; $j < n+1$; $j++$)}
|
|
\State $M[0][j] = M[0][j-1] + gap\_penalty$
|
|
\EndFor
|
|
\Comment{Fill the remaining matrix}
|
|
\For {($i = 1$; $i < m+1$; $i++$)}
|
|
\For {($j = 1$; $j < n+1$; $j++$)}
|
|
\State $delete = M[i-1][j] + gap\_penalty$
|
|
\State $insert = M[i][j-1] + gap\_penalty$
|
|
\State $substitute = M[i-1][j-1] + sub(S_{1}[i-1], S_{2}[j-1])$
|
|
\State $choice = \min \{delete, insert, substitute\}$
|
|
\If {$substitute = choice$}
|
|
\State $P[i-1][j-1] = '\nwarrow'$
|
|
\ElsIf {$deletion = choice$}
|
|
\State $P[i-1][j-1] = '\leftarrow'$
|
|
\Else
|
|
\State $P[i-1][j-1] = '\uparrow'$
|
|
\EndIf
|
|
\State $M[i][j] = choice$
|
|
\EndFor
|
|
\EndFor
|
|
\EndProcedure
|
|
\end{algorithmic}
|
|
\end{algorithm}
|
|
|
|
\begin{algorithm}
|
|
\caption{Needleman-Wunsch Algorithm (Backtrack)}
|
|
\begin{algorithmic}[1]
|
|
\Procedure{ShowAlignment}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)}
|
|
\State $extend_{1} = ''$
|
|
\State $extend_{2} = ''$
|
|
\State $i = m$
|
|
\State $j = n$
|
|
\While{$i > 0$ and $j > 0$}
|
|
\If {$P[i-1][j-1] = '\nwarrow'$}
|
|
\State $extend_{1} = S_{1}[i-1] \circ extend_{1}$
|
|
\State $extend_{2} = S_{2}[j-1] \circ extend_{2}$
|
|
\State $i--$
|
|
\State $j--$
|
|
\ElsIf {$P[i-1][j-1] = '\uparrow'$}
|
|
\State $extend_{1} = S_{1}[i-1] \circ extend_{1}$
|
|
\State $extend_{2} = '-' \circ extend_{2}$
|
|
\State $i--$
|
|
\Else
|
|
\State $extend_{1} = '-' \circ extend_{1}$
|
|
\State $extend_{2} = S_{2}[j-1] \circ extend_{2}$
|
|
\State $j--$
|
|
\EndIf
|
|
\EndWhile
|
|
\While{$i > 0$}
|
|
\State $extend_{1} = S_{1}[i-1] \circ extend_{1}$
|
|
\State $extend_{2} = '-' \circ extend_{2}$
|
|
\State $i--$
|
|
\State \Call{Insert}{0, $alignment$,$tuple$}
|
|
\EndWhile
|
|
\While{$j > 0$}
|
|
\State $extend_{1} = '-' \circ extend_{1}$
|
|
\State $extend_{2} = S_{2}[j-1] \circ extend_{2}$
|
|
\State $j--$
|
|
\EndWhile
|
|
\State \Call{print}{$extend_{1}$}
|
|
\State \Call{print}{$extend_{2}$}
|
|
\EndProcedure
|
|
\State \Call{FillMatrix}{$S_{1}$, $S_{2}$}
|
|
\State \Call{ShowAlignment}{$S_{1}$, $S_{2}$}
|
|
\end{algorithmic}
|
|
\end{algorithm}
|
|
|
|
\fi
|
|
|
|
\begin{algorithm}
|
|
\caption{Needleman-Wunsch Algorithm, Build the matrix}
|
|
\begin{algorithmic}[1]
|
|
\Procedure{FillMatrix}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)}
|
|
\State $M = $ Array($m+1$, $n+1$)
|
|
\State $P = $ Array($m$, $n$)
|
|
\Comment{Store the direction of the cell we chose to build the next cell up on.}
|
|
\State $M[0][0] = 0$
|
|
\For {($i = 1$; $i < m+1$; $i++$)}
|
|
\State $M[i][0] = M[i-1][0] + gap\_penalty$
|
|
\EndFor
|
|
\For {($j = 1$; $j < n+1$; $j++$)}
|
|
\State $M[0][j] = M[0][j-1] + gap\_penalty$
|
|
\EndFor
|
|
\For {($i = 1$; $i < m+1$; $i++$)}
|
|
\For {($j = 1$; $j < n+1$; $j++$)}
|
|
\State $delete = M[i-1][j] + gap\_penalty$
|
|
\State $insert = M[i][j-1] + gap\_penalty$
|
|
\State $substitute = M[i-1][j-1] + sub(S_{1}[i-1], S_{2}[j-1])$
|
|
\State $M[i][j] = \min \{substitute, insert, delete\}$
|
|
\EndFor
|
|
\EndFor
|
|
\EndProcedure
|
|
\end{algorithmic}
|
|
\end{algorithm}
|
|
|
|
\begin{algorithm}
|
|
\caption{Needleman-Wunsch Algorithm, reconstruct the alignment}
|
|
\begin{algorithmic}[1]
|
|
\Procedure{BacktrackAlignment}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)}
|
|
\State $alignment = LinkedList$
|
|
\State $i = m$
|
|
\State $j = n$
|
|
\While{$i > 0$ and $j > 0$}
|
|
\If {$M[i-1][j-1] = M[i][j] - sub(S_{1}[i-1], S_{2}[j-1])$}
|
|
\State $tuple = \begin{pmatrix} S_{1}[i-1] \\ S_{2}[j-1] \end{pmatrix}$
|
|
\State $i--$
|
|
\State $j--$
|
|
\ElsIf {$M[i-1][j-1] = M[i][j-1] - gap\_penalty$}
|
|
\State $tuple = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix}$
|
|
\State $i--$
|
|
\Else
|
|
\State $tuple = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$
|
|
\State $j--$
|
|
\EndIf
|
|
\State \Call{Insert}{0, $alignment$,$tuple$}
|
|
\EndWhile
|
|
\While{$i > 0$}
|
|
\State $tuple = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix}$
|
|
\State $i--$
|
|
\State \Call{Insert}{0, $alignment$,$tuple$}
|
|
\EndWhile
|
|
\While{$j > 0$}
|
|
\State $tuple = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$
|
|
\State $j--$
|
|
\State \Call{Insert}{0, $alignment$,$tuple$}
|
|
\EndWhile
|
|
\EndProcedure
|
|
\State \Call{FillMatrix}{$S_{1}$, $S_{2}$}
|
|
\State \Call{BacktrackAlignment}{$S_{1}$, $S_{2}$}
|
|
\end{algorithmic}
|
|
\end{algorithm}
|
|
|
|
|
|
\begin{algorithm}
|
|
\caption{Backtrack a single alignment in a recursive way}
|
|
\begin{algorithmic}[1]
|
|
\State $S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$),
|
|
\Function{BacktrackRecurse}{$i$, $j$}
|
|
\If {$i > 0$ and $j > 0$}
|
|
\If {$M[i-1][j-1] = M[i][j] - sub(S_{1}[i-1], S_{2}[j-1])$}
|
|
\State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j-1$}
|
|
\State $z = \begin{pmatrix} S_{1}[i-1] \\ S_{2}[j-1] \end{pmatrix} \circ z$
|
|
\ElsIf {$M[i-1][j] + gap\_penalty = M[i][j]$}
|
|
\State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$}
|
|
\State $z = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix} \circ z$
|
|
\Else
|
|
\State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$}
|
|
\State $z = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix} \circ z$
|
|
\EndIf
|
|
\ElsIf {$i > 0$}
|
|
\State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$}
|
|
\State $z = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix} \circ z$
|
|
\ElsIf {$j > 0$}
|
|
\State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$}
|
|
\State $z = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix} \circ z$
|
|
\Else
|
|
\State \Return []
|
|
\EndIf
|
|
\Else
|
|
\State \Return $z$
|
|
\EndIf
|
|
\EndFunction
|
|
\Function{Backtrack}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$)}
|
|
\State \Return \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $m$, $n$}
|
|
\EndFunction
|
|
\end{algorithmic}
|
|
\end{algorithm}
|
|
|
|
\begin{algorithm}
|
|
\caption{Backtrack all the optimum alignments in a recursive way}
|
|
\begin{algorithmic}[1]
|
|
\Procedure{BacktrackRecurse}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$), $i$, $j$}
|
|
\If {$i > 0$ and $j > 0$}
|
|
\If {$M[i-1][j-1] = M[i][j] - sub(S_{1}[i-1], S_{2}[j-1])$}
|
|
\State $value = \begin{pmatrix} S_{1}[i-1] \\ S_{2}[j-1] \end{pmatrix}$
|
|
\State $z' = value \circ z$
|
|
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j-1$, $z'$}
|
|
\EndIf
|
|
\If {$M[i-1][j] + gap\_penalty = M[i][j]$}
|
|
\State $value = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix}$
|
|
\State $z' = value \circ z$
|
|
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$, $z'$}
|
|
\EndIf
|
|
\If {$M[i][j-1] + gap\_penalty = M[i][j]$}
|
|
\State $value = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$
|
|
\State $z' = value \circ z$
|
|
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$, $z'$}
|
|
\EndIf
|
|
\ElsIf {$i > 0$}
|
|
\State $value = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix}$
|
|
\State $z' = value \circ z$
|
|
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$, $z'$}
|
|
\ElsIf {$j > 0$}
|
|
\State $value = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$
|
|
\State $z' = value \circ z$
|
|
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$, $z'$}
|
|
\Else
|
|
\State \Call{print}{$z$}
|
|
\EndIf
|
|
\EndProcedure
|
|
\Procedure{Backtrack}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$)}
|
|
\State \Return \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $m$, $n$, []}
|
|
\EndProcedure
|
|
\end{algorithmic}
|
|
\end{algorithm}
|
|
|
|
|
|
|
|
\begin{figure}
|
|
\centering
|
|
\includegraphics{figures/part2/needle.pdf}
|
|
\caption{Needleman-Wunsch global alignment matrix with an example of optimal path.}
|
|
\end{figure}
|