sequence-algorithms/content/chapters/part1/4.tex

114 lines
3.1 KiB
TeX

\chapter{Longest common subsequence}
Let $S_{1} = \text{ATCTGAT}$ and $S_{2} = \text{TGCATA}$.
In this case the longest common subsequence of $S_{1}$ and $S_{2}$ is $TCTA$.
\begin{algorithm}
\caption{Construct a longest common subsequence matrix}
\begin{algorithmic}[1]
\Function{LCSQ\_Matrix}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
\State $M \gets $ Array($m+1$, $n+1$)
\For{($i = 0$; $i < n+1$; $i++$)}
\For{$j = 0$; $j < m+1$; $j++$}
\If {$i = 0$ or $j = 0$}
\State $M[i][j] = 0$
\Else
\If {$S_{1}[i] = S_{2}[j]$}
\State $match = M[i-1][j-1] + 1$
\Else
\State $match = M[i-1][j-1]$
\EndIf
\State $gap_{1} = M[i-1][j]$
\State $gap_{2} = M[i][j-1]$
\State $M[i][j] = \max \{ match, gap_{1}, gap_{2}\}$
\EndIf
\EndFor
\EndFor
\State \Return $M$
\EndFunction
\end{algorithmic}
\end{algorithm}
\begin{algorithm}
\caption{Construct a longest common subsequence matrix keeping the path in memory}
\begin{algorithmic}[1]
\Function{LCSQ\_Matrix\_Path}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
\State $M \gets $ Array($m+1$, $n+1$)
\State $P \gets $ Array($m+1$, $n+1$)
\For {($i = 0$; $i < n+1$, $i++$)}
\State $M[i][0] \gets 0$
\EndFor
\For {($j = 0$; $j < m+1$; $j+$)}
\State $M[0][j] \gets 0$
\EndFor
\For{($i = 1$; $i < n+1$; $i++$)}
\For{($j = 1$; $j < m+1$; $j++$)}
\If {$i = 1$ or $j = 0$}
\State $M[i][j] = 0$
\Else
\If {$S_{1}[i-1] = S_{2}[j-1]$}
\State $M[i][j] \gets M[i-1][j-1] + 1$
\State $P[i][j] \gets '\nwarrow'$
\ElsIf {$M[i][j-1] \geq M[i-1][j]$}
\State $M[i][j] \gets M[i][j-1]$
\State $P[i][j] \gets '\leftarrow'$
\Else
\State $M[i][j] \gets M[i-1][j]$
\State $P[i][j] \gets '\downarrow'$
\EndIf
\EndFor
\EndFor
\State \Return $M, P$
\EndFunction
\end{algorithmic}
\end{algorithm}
\begin{algorithm}
\caption{Backtrack the longest common subsequence}
\begin{algorithmic}[1]
\Function{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
\State $M, P \gets $ \Call{LCSQ\_Matrix}{$S_{1}$, $S_{2}$}
\State $L \gets Array(M[n][m])$
\State $k \gets 0$
\State $i \gets n$
\State $j \gets m$
\While{$i > 0$ and $j > 0$}
\If {$P[i][j] = '\nwarrow' $}
\State $L[k] \gets S_{1}[i]$
\State $i--$
\State $j--$
\State $k++$
\ElsIf {$P[i][j] = '\leftarrow'$}
\State $j--$
\Else
\State $i--$
\EndIf
\EndWhile
\State \Return $L$
\EndFunction
\end{algorithmic}
\end{algorithm}
\begin{algorithm}
\caption{Recursive reconstruction of the longest common subsequence}
\begin{algorithmic}[1]
\Procedure{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
\State $M, P \gets $ \Call{LCSQ\_Matrix}{$S_{1}$, $S_{2}$}
\State $i \gets n$
\State $j \gets m$
\State \Call{Aux}{$P$, $S_{1}$, $i$, $j$}
\EndProcedure
\Procedure{Aux}{$P$: Array($n+1$, $m+1$), $S_{1}$: Array($n$), $i$, $j$}
\If {$P[i][j] = '\nwarrow' $}
\State $l \gets S_{1}[i]$
\State \Call{Aux}{$P$, $S_{1}$, $i-1$, $j-1$}
\State \texttt{print}($l$)
\ElsIf {$P[i][j] = '\leftarrow'$}
\State \Call{Aux}{$P$, $S_{1}$, $i$, $j-1$}
\Else
\State \Call{Aux}{$P$, $S_{1}$, $i-1$, $j$}
\EndIf
\EndProcedure
\end{algorithmic}
\end{algorithm}