2024-03-26 11:13:08 +01:00
\chapter { Sequence alignment}
2024-03-25 10:38:20 +01:00
2024-04-02 14:51:44 +02:00
\iffalse
2024-03-25 10:38:20 +01:00
\begin { algorithm}
\caption { Needleman-Wunsch Algorithm}
\begin { algorithmic} [1]
\Procedure { FillMatrix} { $ S _ { 1 } $ : Array($ m $ ), $ S _ { 2 } $ : Array($ n $ )}
\Comment { $ sub ( a, b ) $ is the substitution score, $ del ( a ) $ and $ ins ( a ) $ are the deletion and insertion penalty, in regard with the reference $ S _ { 1 } $ sequence}
\State $ M = $ Array($ m + 1 $ , $ n + 1 $ )
\Comment { Initialize the matrix first column and first row}
\State $ P = $ Array($ m $ , $ n $ ) \Comment { Store the direction of the cell we chose to build the next cell up on.}
2024-03-25 12:04:13 +01:00
\State $ M [ 0 ] [ 0 ] = 0 $
\For { ($ i = 1 $ ; $ i < m + 1 $ ; $ i + + $ )}
\State $ M [ i ] [ 0 ] = M [ i - 1 ] [ 0 ] + gap \_ penalty $
2024-03-25 10:38:20 +01:00
\EndFor
2024-03-25 12:04:13 +01:00
\For { ($ j = 1 $ ; $ j < n + 1 $ ; $ j + + $ )}
\State $ M [ 0 ] [ j ] = M [ 0 ] [ j - 1 ] + gap \_ penalty $
2024-03-25 10:38:20 +01:00
\EndFor
\Comment { Fill the remaining matrix}
\For { ($ i = 1 $ ; $ i < m + 1 $ ; $ i + + $ )}
\For { ($ j = 1 $ ; $ j < n + 1 $ ; $ j + + $ )}
2024-03-25 12:04:13 +01:00
\State $ delete = M [ i - 1 ] [ j ] + gap \_ penalty $
\State $ insert = M [ i ] [ j - 1 ] + gap \_ penalty $
2024-03-25 10:38:20 +01:00
\State $ substitute = M [ i - 1 ] [ j - 1 ] + sub ( S _ { 1 } [ i - 1 ] , S _ { 2 } [ j - 1 ] ) $
2024-03-25 12:04:13 +01:00
\State $ choice = \min \{ delete, insert, substitute \} $
2024-03-25 10:38:20 +01:00
\If { $ substitute = choice $ }
\State $ P [ i - 1 ] [ j - 1 ] = ' \nwarrow ' $
2024-03-25 12:04:13 +01:00
\ElsIf { $ deletion = choice $ }
2024-03-25 10:38:20 +01:00
\State $ P [ i - 1 ] [ j - 1 ] = ' \leftarrow ' $
2024-03-25 12:04:13 +01:00
\Else
\State $ P [ i - 1 ] [ j - 1 ] = ' \uparrow ' $
2024-03-25 10:38:20 +01:00
\EndIf
\State $ M [ i ] [ j ] = choice $
\EndFor
\EndFor
\EndProcedure
2024-03-25 12:04:13 +01:00
\end { algorithmic}
\end { algorithm}
\begin { algorithm}
\caption { Needleman-Wunsch Algorithm (Backtrack)}
\begin { algorithmic} [1]
2024-03-25 10:38:20 +01:00
\Procedure { ShowAlignment} { $ S _ { 1 } $ : Array($ m $ ), $ S _ { 2 } $ : Array($ n $ )}
\State $ extend _ { 1 } = '' $
\State $ extend _ { 2 } = '' $
\State $ i = m $
\State $ j = n $
\While { $ i > 0 $ and $ j > 0 $ }
\If { $ P [ i - 1 ] [ j - 1 ] = ' \nwarrow ' $ }
\State $ extend _ { 1 } = S _ { 1 } [ i - 1 ] \circ extend _ { 1 } $
\State $ extend _ { 2 } = S _ { 2 } [ j - 1 ] \circ extend _ { 2 } $
\State $ i - - $
\State $ j - - $
\ElsIf { $ P [ i - 1 ] [ j - 1 ] = ' \uparrow ' $ }
\State $ extend _ { 1 } = S _ { 1 } [ i - 1 ] \circ extend _ { 1 } $
2024-03-25 12:04:13 +01:00
\State $ extend _ { 2 } = ' - ' \circ extend _ { 2 } $
2024-03-25 10:38:20 +01:00
\State $ i - - $
\Else
2024-03-25 12:04:13 +01:00
\State $ extend _ { 1 } = ' - ' \circ extend _ { 1 } $
2024-03-25 10:38:20 +01:00
\State $ extend _ { 2 } = S _ { 2 } [ j - 1 ] \circ extend _ { 2 } $
\State $ j - - $
\EndIf
\EndWhile
2024-03-25 12:04:13 +01:00
\While { $ i > 0 $ }
\State $ extend _ { 1 } = S _ { 1 } [ i - 1 ] \circ extend _ { 1 } $
\State $ extend _ { 2 } = ' - ' \circ extend _ { 2 } $
\State $ i - - $
\State \Call { Insert} { 0, $ alignment $ ,$ tuple $ }
\EndWhile
\While { $ j > 0 $ }
\State $ extend _ { 1 } = ' - ' \circ extend _ { 1 } $
\State $ extend _ { 2 } = S _ { 2 } [ j - 1 ] \circ extend _ { 2 } $
\State $ j - - $
\EndWhile
2024-03-25 10:38:20 +01:00
\State \Call { print} { $ extend _ { 1 } $ }
\State \Call { print} { $ extend _ { 2 } $ }
\EndProcedure
\State \Call { FillMatrix} { $ S _ { 1 } $ , $ S _ { 2 } $ }
2024-03-25 12:04:13 +01:00
\State \Call { ShowAlignment} { $ S _ { 1 } $ , $ S _ { 2 } $ }
\end { algorithmic}
2024-04-02 14:51:44 +02:00
\end { algorithm}
\fi
2024-03-25 12:04:13 +01:00
\begin { algorithm}
2024-04-02 14:51:44 +02:00
\caption { Needleman-Wunsch Algorithm, Build the matrix}
2024-03-25 12:04:13 +01:00
\begin { algorithmic} [1]
\Procedure { FillMatrix} { $ S _ { 1 } $ : Array($ m $ ), $ S _ { 2 } $ : Array($ n $ )}
\State $ M = $ Array($ m + 1 $ , $ n + 1 $ )
\State $ P = $ Array($ m $ , $ n $ )
\Comment { Store the direction of the cell we chose to build the next cell up on.}
\State $ M [ 0 ] [ 0 ] = 0 $
\For { ($ i = 1 $ ; $ i < m + 1 $ ; $ i + + $ )}
\State $ M [ i ] [ 0 ] = M [ i - 1 ] [ 0 ] + gap \_ penalty $
\EndFor
\For { ($ j = 1 $ ; $ j < n + 1 $ ; $ j + + $ )}
\State $ M [ 0 ] [ j ] = M [ 0 ] [ j - 1 ] + gap \_ penalty $
\EndFor
\For { ($ i = 1 $ ; $ i < m + 1 $ ; $ i + + $ )}
\For { ($ j = 1 $ ; $ j < n + 1 $ ; $ j + + $ )}
\State $ delete = M [ i - 1 ] [ j ] + gap \_ penalty $
\State $ insert = M [ i ] [ j - 1 ] + gap \_ penalty $
\State $ substitute = M [ i - 1 ] [ j - 1 ] + sub ( S _ { 1 } [ i - 1 ] , S _ { 2 } [ j - 1 ] ) $
\State $ M [ i ] [ j ] = \min \{ substitute, insert, delete \} $
\EndFor
\EndFor
\EndProcedure
\end { algorithmic}
\end { algorithm}
\begin { algorithm}
2024-04-02 14:51:44 +02:00
\caption { Needleman-Wunsch Algorithm, reconstruct the alignment}
2024-03-25 12:04:13 +01:00
\begin { algorithmic} [1]
\Procedure { BacktrackAlignment} { $ S _ { 1 } $ : Array($ m $ ), $ S _ { 2 } $ : Array($ n $ )}
\State $ alignment = LinkedList $
\State $ i = m $
\State $ j = n $
\While { $ i > 0 $ and $ j > 0 $ }
\If { $ M [ i - 1 ] [ j - 1 ] = M [ i ] [ j ] - sub ( S _ { 1 } [ i - 1 ] , S _ { 2 } [ j - 1 ] ) $ }
\State $ tuple = \begin { pmatrix } S _ { 1 } [ i - 1 ] \\ S _ { 2 } [ j - 1 ] \end { pmatrix } $
\State $ i - - $
\State $ j - - $
\ElsIf { $ M [ i - 1 ] [ j - 1 ] = M [ i ] [ j - 1 ] - gap \_ penalty $ }
\State $ tuple = \begin { pmatrix } S _ { 1 } [ i - 1 ] \\ \varepsilon \end { pmatrix } $
\State $ i - - $
\Else
\State $ tuple = \begin { pmatrix } \varepsilon \\ S _ { 2 } [ j - 1 ] \end { pmatrix } $
\State $ j - - $
\EndIf
\State \Call { Insert} { 0, $ alignment $ ,$ tuple $ }
\EndWhile
\While { $ i > 0 $ }
\State $ tuple = \begin { pmatrix } S _ { 1 } [ i - 1 ] \\ \varepsilon \end { pmatrix } $
\State $ i - - $
\State \Call { Insert} { 0, $ alignment $ ,$ tuple $ }
\EndWhile
\While { $ j > 0 $ }
\State $ tuple = \begin { pmatrix } \varepsilon \\ S _ { 2 } [ j - 1 ] \end { pmatrix } $
\State $ j - - $
\State \Call { Insert} { 0, $ alignment $ ,$ tuple $ }
\EndWhile
\EndProcedure
\State \Call { FillMatrix} { $ S _ { 1 } $ , $ S _ { 2 } $ }
\State \Call { BacktrackAlignment} { $ S _ { 1 } $ , $ S _ { 2 } $ }
2024-03-25 10:38:20 +01:00
\end { algorithmic}
\end { algorithm}
2024-03-26 11:13:08 +01:00
\begin { algorithm}
\caption { Backtrack a single alignment in a recursive way}
\begin { algorithmic} [1]
\State $ S _ { 1 } $ : Array($ m $ ), $ S _ { 2 } $ : Array($ n $ ), $ M $ : Array($ m + 1 $ , $ n + 1 $ ),
\Function { BacktrackRecurse} { $ i $ , $ j $ }
\If { $ i > 0 $ and $ j > 0 $ }
2024-04-02 14:51:44 +02:00
\If { $ M [ i - 1 ] [ j - 1 ] = M [ i ] [ j ] - sub ( S _ { 1 } [ i - 1 ] , S _ { 2 } [ j - 1 ] ) $ }
2024-03-26 11:13:08 +01:00
\State $ z = $ \Call { BacktrackRecurse} { $ S _ { 1 } $ , $ S _ { 2 } $ , $ M $ , $ i - 1 $ , $ j - 1 $ }
\State $ z = \begin { pmatrix } S _ { 1 } [ i - 1 ] \\ S _ { 2 } [ j - 1 ] \end { pmatrix } \circ z $
2024-04-02 14:51:44 +02:00
\ElsIf { $ M [ i - 1 ] [ j ] + gap \_ penalty = M [ i ] [ j ] $ }
2024-03-26 11:13:08 +01:00
\State $ z = $ \Call { BacktrackRecurse} { $ S _ { 1 } $ , $ S _ { 2 } $ , $ M $ , $ i - 1 $ , $ j $ }
\State $ z = \begin { pmatrix } S _ { 1 } [ i - 1 ] \\ \varepsilon \end { pmatrix } \circ z $
\Else
\State $ z = $ \Call { BacktrackRecurse} { $ S _ { 1 } $ , $ S _ { 2 } $ , $ M $ , $ i $ , $ j - 1 $ }
\State $ z = \begin { pmatrix } \varepsilon \\ S _ { 2 } [ j - 1 ] \end { pmatrix } \circ z $
\EndIf
\ElsIf { $ i > 0 $ }
\State $ z = $ \Call { BacktrackRecurse} { $ S _ { 1 } $ , $ S _ { 2 } $ , $ M $ , $ i - 1 $ , $ j $ }
\State $ z = \begin { pmatrix } S _ { 1 } [ i - 1 ] \\ \varepsilon \end { pmatrix } \circ z $
\ElsIf { $ j > 0 $ }
\State $ z = $ \Call { BacktrackRecurse} { $ S _ { 1 } $ , $ S _ { 2 } $ , $ M $ , $ i $ , $ j - 1 $ }
2024-04-08 08:34:45 +02:00
\State $ z = \begin { pmatrix } \varepsilon \\ S _ { 2 } [ j - 1 ] \end { pmatrix } \circ z $
2024-03-26 11:13:08 +01:00
\Else
\State \Return []
\EndIf
2024-04-02 14:51:44 +02:00
\Else
2024-03-26 11:13:08 +01:00
\State \Return $ z $
2024-04-02 14:51:44 +02:00
\EndIf
2024-03-26 11:13:08 +01:00
\EndFunction
\Function { Backtrack} { $ S _ { 1 } $ : Array($ m $ ), $ S _ { 2 } $ : Array($ n $ ), $ M $ : Array($ m + 1 $ , $ n + 1 $ )}
\State \Return \Call { BacktrackRecurse} { $ S _ { 1 } $ , $ S _ { 2 } $ , $ M $ , $ m $ , $ n $ }
\EndFunction
\end { algorithmic}
\end { algorithm}
\begin { algorithm}
\caption { Backtrack all the optimum alignments in a recursive way}
\begin { algorithmic} [1]
\Procedure { BacktrackRecurse} { $ S _ { 1 } $ : Array($ m $ ), $ S _ { 2 } $ : Array($ n $ ), $ M $ : Array($ m + 1 $ , $ n + 1 $ ), $ i $ , $ j $ }
\If { $ i > 0 $ and $ j > 0 $ }
2024-04-02 14:51:44 +02:00
\If { $ M [ i - 1 ] [ j - 1 ] = M [ i ] [ j ] - sub ( S _ { 1 } [ i - 1 ] , S _ { 2 } [ j - 1 ] ) $ }
2024-03-26 11:13:08 +01:00
\State $ value = \begin { pmatrix } S _ { 1 } [ i - 1 ] \\ S _ { 2 } [ j - 1 ] \end { pmatrix } $
\State $ z' = value \circ z $
\State \Call { BacktrackRecurse} { $ S _ { 1 } $ , $ S _ { 2 } $ , $ M $ , $ i - 1 $ , $ j - 1 $ , $ z' $ }
\EndIf
2024-04-02 14:51:44 +02:00
\If { $ M [ i - 1 ] [ j ] + gap \_ penalty = M [ i ] [ j ] $ }
2024-03-26 11:13:08 +01:00
\State $ value = \begin { pmatrix } S _ { 1 } [ i - 1 ] \\ \varepsilon \end { pmatrix } $
\State $ z' = value \circ z $
\State \Call { BacktrackRecurse} { $ S _ { 1 } $ , $ S _ { 2 } $ , $ M $ , $ i - 1 $ , $ j $ , $ z' $ }
\EndIf
2024-04-02 14:51:44 +02:00
\If { $ M [ i ] [ j - 1 ] + gap \_ penalty = M [ i ] [ j ] $ }
2024-03-26 11:13:08 +01:00
\State $ value = \begin { pmatrix } \varepsilon \\ S _ { 2 } [ j - 1 ] \end { pmatrix } $
\State $ z' = value \circ z $
\State \Call { BacktrackRecurse} { $ S _ { 1 } $ , $ S _ { 2 } $ , $ M $ , $ i $ , $ j - 1 $ , $ z' $ }
\EndIf
\ElsIf { $ i > 0 $ }
\State $ value = \begin { pmatrix } S _ { 1 } [ i - 1 ] \\ \varepsilon \end { pmatrix } $
\State $ z' = value \circ z $
\State \Call { BacktrackRecurse} { $ S _ { 1 } $ , $ S _ { 2 } $ , $ M $ , $ i - 1 $ , $ j $ , $ z' $ }
\ElsIf { $ j > 0 $ }
\State $ value = \begin { pmatrix } \varepsilon \\ S _ { 2 } [ j - 1 ] \end { pmatrix } $
\State $ z' = value \circ z $
\State \Call { BacktrackRecurse} { $ S _ { 1 } $ , $ S _ { 2 } $ , $ M $ , $ i $ , $ j - 1 $ , $ z' $ }
2024-04-02 14:51:44 +02:00
\Else
2024-03-26 11:13:08 +01:00
\State \Call { print} { $ z $ }
2024-04-02 14:51:44 +02:00
\EndIf
2024-03-26 11:13:08 +01:00
\EndProcedure
\Procedure { Backtrack} { $ S _ { 1 } $ : Array($ m $ ), $ S _ { 2 } $ : Array($ n $ ), $ M $ : Array($ m + 1 $ , $ n + 1 $ )}
\State \Return \Call { BacktrackRecurse} { $ S _ { 1 } $ , $ S _ { 2 } $ , $ M $ , $ m $ , $ n $ , []}
\EndProcedure
\end { algorithmic}
\end { algorithm}
2024-04-02 14:51:44 +02:00
\begin { figure}
\centering
\includegraphics { figures/part2/needle.pdf}
\caption { Needleman-Wunsch global alignment matrix with an example of optimal path.}
\end { figure}