refactor: Improve sectioning
This commit is contained in:
parent
e64a1d711a
commit
c9b2710f98
|
@ -1,5 +1,5 @@
|
||||||
build/
|
build/
|
||||||
.bak*
|
**/.bak*
|
||||||
.auctex-auto
|
.auctex-auto
|
||||||
|
|
||||||
## Core latex/pdflatex auxiliary files:
|
## Core latex/pdflatex auxiliary files:
|
||||||
|
|
|
@ -1,63 +1 @@
|
||||||
\chapter{Back to basics}
|
\part{Motifs algorithms}
|
||||||
|
|
||||||
\begin{algorithm}
|
|
||||||
\caption{Search an element in an array}
|
|
||||||
\begin{algorithmic}[1]
|
|
||||||
\Function{Search}{$A$: Array($n$), $E$: element}
|
|
||||||
\For {($i = 0$; $i < n$; $i++$)}
|
|
||||||
\If {$A[i] = E$}
|
|
||||||
\State \Return \True
|
|
||||||
\EndIf
|
|
||||||
\EndFor
|
|
||||||
\State \Return \False
|
|
||||||
\EndFunction
|
|
||||||
\end{algorithmic}
|
|
||||||
\end{algorithm}
|
|
||||||
|
|
||||||
\begin{algorithm}
|
|
||||||
\caption{Search an element in an array using a while loop}
|
|
||||||
\begin{algorithmic}[1]
|
|
||||||
\Function{Search}{$A$: Array($n$), $E$: element}
|
|
||||||
\State $i \gets 0$
|
|
||||||
\While {$i < n$}
|
|
||||||
\If {$A[i] = E$}
|
|
||||||
\State \Return \True
|
|
||||||
\EndIf
|
|
||||||
\State $i \gets i + 1$
|
|
||||||
\EndWhile
|
|
||||||
\State
|
|
||||||
\Return
|
|
||||||
\False
|
|
||||||
\EndFunction
|
|
||||||
\end{algorithmic}
|
|
||||||
\end{algorithm}
|
|
||||||
|
|
||||||
\begin{algorithm}
|
|
||||||
\caption{Search an element in an array using a while loop (bis)}
|
|
||||||
\begin{algorithmic}[1]
|
|
||||||
\Function{Search}{$A$: Array($n$), $E$: element}
|
|
||||||
% \Comment{Version ``preffered" by the professor}
|
|
||||||
\State $i \gets 0$
|
|
||||||
\While {$i < n$ and $A[i] \neq E$}
|
|
||||||
\State $i \gets i + 1$
|
|
||||||
\EndWhile
|
|
||||||
\If {$i = n$}
|
|
||||||
\State
|
|
||||||
\Return \False \Else \State \Return \True \EndIf
|
|
||||||
\EndFunction
|
|
||||||
\end{algorithmic}
|
|
||||||
\end{algorithm}
|
|
||||||
|
|
||||||
\begin{algorithm}
|
|
||||||
\caption{Count the occurrences of an element in an array}
|
|
||||||
\begin{algorithmic}[1]
|
|
||||||
\Function{Search}{$A$: Array($n$), $E$: element} \State $c \gets 0$
|
|
||||||
\For{($i = 0$; $i < n$; $i++$)}
|
|
||||||
\If {$A[i] = E$}
|
|
||||||
\State $c \gets c + 1$
|
|
||||||
\EndIf
|
|
||||||
\EndFor
|
|
||||||
\State \Return $c$
|
|
||||||
\EndFunction
|
|
||||||
\end{algorithmic}
|
|
||||||
\end{algorithm}
|
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
\chapter{Motif}
|
\chapter{Motif}
|
||||||
|
|
||||||
|
\section{Searching a substring in a string}
|
||||||
|
|
||||||
\begin{algorithm}
|
\begin{algorithm}
|
||||||
\caption{Brute-force search of a motif in a sequence}
|
\caption{Brute-force search of a motif in a sequence}
|
||||||
\begin{algorithmic}[1]
|
\begin{algorithmic}[1]
|
||||||
|
@ -72,3 +74,369 @@
|
||||||
\EndFunction
|
\EndFunction
|
||||||
\end{algorithmic}
|
\end{algorithmic}
|
||||||
\end{algorithm}
|
\end{algorithm}
|
||||||
|
|
||||||
|
\section{Using matrices to search motifs}
|
||||||
|
|
||||||
|
Let $S_{1}$ and $S_{2}$ be two sequences.
|
||||||
|
|
||||||
|
$S_{1} = $ ACGUUCC
|
||||||
|
$S_{2} = $ GUU
|
||||||
|
|
||||||
|
\begin{table}
|
||||||
|
\centering
|
||||||
|
\begin{tabular}{c|ccccccc}
|
||||||
|
& A & C & G & U & U & C & C \\
|
||||||
|
\hline
|
||||||
|
G & 0 & 0 & 1 & 0 & 0 & 0 & 0 \\
|
||||||
|
U & 0 & 0 & 0 & 1 & 1 & 0 & 0 \\
|
||||||
|
U & 0 & 0 & 0 & 1 & 1 & 0 & 0
|
||||||
|
\end{tabular}
|
||||||
|
\caption{Comparison matrix}
|
||||||
|
\end{table}
|
||||||
|
|
||||||
|
|
||||||
|
Let $n = |S_{1}|$, $m = |S_{2}|$
|
||||||
|
The complexity of this algorithm is $\mathcal{O}(n \cdot m)$ to build the matrix, and it requires also to find the diagonals and thus it is a bit less efficient than the \autoref{alg:naive-motif-matching}.
|
||||||
|
|
||||||
|
|
||||||
|
To find repetitions, we can use a comparison matrix with a single sequence against itself. A repetition would appear as a diagonal of ones, not on the main diagonal.
|
||||||
|
|
||||||
|
Let $S = $ ACGUUACGUU. Let's write the comparison matrix.
|
||||||
|
|
||||||
|
|
||||||
|
\begin{table}
|
||||||
|
\includegraphics{./figures/part1/comparison_matrix_repetitions.pdf}
|
||||||
|
\caption{Comparison matrix for $seq = $``ACGUUACGUUGUU"}
|
||||||
|
\end{table}
|
||||||
|
|
||||||
|
\begin{algorithm}
|
||||||
|
\caption{Construct a comparison matrix}
|
||||||
|
\begin{algorithmic}[1]
|
||||||
|
\Function{ComparisonMatrix}{$S$: Array($n$)}
|
||||||
|
\State $M \gets $ Array($n$, $n$)
|
||||||
|
\For{($i = 0$; $i < n$; $i++$)}
|
||||||
|
\For{$j = 0$; $j < n$; $j++$}
|
||||||
|
\If {$S[i] = S[j]$}
|
||||||
|
\State $M[i][j] = 1$
|
||||||
|
\Else
|
||||||
|
\State $M[i][j] = 0$
|
||||||
|
\EndIf
|
||||||
|
\EndFor
|
||||||
|
\EndFor
|
||||||
|
\State \Return $M$
|
||||||
|
\EndFunction
|
||||||
|
\end{algorithmic}
|
||||||
|
\end{algorithm}
|
||||||
|
|
||||||
|
\begin{algorithm}
|
||||||
|
\caption{Construct the top half of a comparison matrix}
|
||||||
|
\begin{algorithmic}[1]
|
||||||
|
\Function{ComparisonMatrix}{$S$: Array($n$)}
|
||||||
|
\State $M \gets$ Array($n$,$n$)
|
||||||
|
\For{($i = 0$; $i < n$; $i++$)}
|
||||||
|
\For{j=i; j < n; j++}
|
||||||
|
\If {S[i] = S[j]}
|
||||||
|
\State M[i][j] = 1
|
||||||
|
\Else
|
||||||
|
\State M[i][j] = 0
|
||||||
|
\EndIf
|
||||||
|
\EndFor
|
||||||
|
\EndFor
|
||||||
|
\State \Return M
|
||||||
|
\EndFunction
|
||||||
|
\end{algorithmic}
|
||||||
|
\end{algorithm}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
\begin{algorithm}
|
||||||
|
\caption{Find repetitions (with a set of visited segments)}
|
||||||
|
\begin{algorithmic}[1]
|
||||||
|
\Function{FindRepetions}{$S$: Array($n$)}
|
||||||
|
\Returns{A list of start and end positions for repeated sequences}
|
||||||
|
\State $M = $ \Call{ComparisonMatrix}{S}
|
||||||
|
\State $pos = \{\}$
|
||||||
|
\State $visited = \{\}$
|
||||||
|
\For {($i_{start} = 0$; $i_{start} < n$; $i_{start}++$)}
|
||||||
|
\For {($j_{start} = i_{start}+1$; $j_{start} < n$; $j_{start}++$)}
|
||||||
|
\If{$M[i_{start}][j_{start}] = 1$ and $(i_{start}, j_{start}) \notin visited$}
|
||||||
|
\State $i = i_{start}$
|
||||||
|
\State $j = j_{start}$
|
||||||
|
\While {$M[i][j] = 1$}
|
||||||
|
\State $i++$
|
||||||
|
\State $j++$
|
||||||
|
\State $visited = visited \cup \{(i, j)\}$
|
||||||
|
\EndWhile
|
||||||
|
\State $pos = pos \cup \{(i_{start}, i), (j_{start},j)\}$
|
||||||
|
\EndIf
|
||||||
|
\EndFor
|
||||||
|
\EndFor
|
||||||
|
\EndFunction
|
||||||
|
\end{algorithmic}
|
||||||
|
\end{algorithm}
|
||||||
|
|
||||||
|
|
||||||
|
\begin{algorithm}
|
||||||
|
\caption{Find repetitions with an exploration of diagonals}
|
||||||
|
\begin{algorithmic}[1]
|
||||||
|
\Function{FindRepetions}{$S$: Array($n$)}
|
||||||
|
\Returns{A list of start and end positions for repeted sequences}
|
||||||
|
\State $M$ = \Call{ComparisonMatrix}{S}
|
||||||
|
\State $pos = \{\}$
|
||||||
|
\For {($diag = 1$; $diag < n$; $diag++$)}
|
||||||
|
\State $j = diag$
|
||||||
|
\State $i = 0$
|
||||||
|
\While {$i < n$ and $j < n$}
|
||||||
|
\If {$M[i][j] = 1$}
|
||||||
|
\State $i_{start} = i$
|
||||||
|
\State $j_{start} = j$
|
||||||
|
\While {$i < n$ and $j < n$ and $M[i][j] = 1$}
|
||||||
|
\State i++
|
||||||
|
\State j++
|
||||||
|
\EndWhile
|
||||||
|
\State $pos = pos \cup \{((i_{start},i-1),(j_{start},j-1))\}$
|
||||||
|
\EndIf
|
||||||
|
\State $i++$
|
||||||
|
\State $j++$
|
||||||
|
\State
|
||||||
|
\EndWhile
|
||||||
|
\EndFor
|
||||||
|
\EndFunction
|
||||||
|
\end{algorithmic}
|
||||||
|
\end{algorithm}
|
||||||
|
|
||||||
|
\begin{algorithm}
|
||||||
|
\caption{Find repetitions with an exploration of diagonals, without nested while}
|
||||||
|
\begin{algorithmic}[1]
|
||||||
|
\Function{FindRepetions}{$S$: Array($n$)}
|
||||||
|
\Returns{A list of start positions for repeted sequences and match length}
|
||||||
|
\State $M$ = \Call{ComparisonMatrix}{S}
|
||||||
|
\State $pos = \{\}$
|
||||||
|
\For {($diag = 1$; $diag < n$; $diag++$)}
|
||||||
|
\State $j = diag$
|
||||||
|
\State $i = 0$
|
||||||
|
\State $l = 0$
|
||||||
|
\While {$i < n$ and $j < n$}
|
||||||
|
\If {$M[i][j] = 1$}
|
||||||
|
\State $l++$
|
||||||
|
\Else
|
||||||
|
\If {$l > 0$}
|
||||||
|
\State $pos = pos \cup \{(i-l,j-l,l)\}$
|
||||||
|
\State $l = 0$
|
||||||
|
\EndIf
|
||||||
|
\EndIf
|
||||||
|
\State $i++$
|
||||||
|
\State $j++$
|
||||||
|
\EndWhile
|
||||||
|
\If {$l > 0$}
|
||||||
|
\State $pos = pos \cup \{((i-l,j-l,l))\}$
|
||||||
|
\EndIf
|
||||||
|
\EndFor
|
||||||
|
\State \Return $pos$
|
||||||
|
\EndFunction
|
||||||
|
\end{algorithmic}
|
||||||
|
\end{algorithm}
|
||||||
|
|
||||||
|
|
||||||
|
\begin{algorithm}
|
||||||
|
\caption{Find repetitions}
|
||||||
|
\begin{algorithmic}[1]
|
||||||
|
\Function{FindRepetions}{$S$: Array($n$)}
|
||||||
|
\Returns{A list of start and end positions for repeted sequences}
|
||||||
|
\State $M$ = \Call{ComparisonMatrix}{S}
|
||||||
|
\State $pos = \{\}$
|
||||||
|
\For {$i_{start} = 0$; $i_{start} < n$; $i_{start}++$}
|
||||||
|
\For {$j_{start} = i_{start}+1$; $j_{start} < n$; $j_{start}++$}
|
||||||
|
\If{$M[i_{start}][j_{start}] = 1$}
|
||||||
|
\State $i = i_{start}$
|
||||||
|
\State $j = j_{start}$
|
||||||
|
\While {$M[i][j] = 1$}
|
||||||
|
\State $M[i][j] = 0$ \Comment{Ensure that the segment is not explored again}
|
||||||
|
\State $i++$
|
||||||
|
\State $j++$
|
||||||
|
\EndWhile
|
||||||
|
\State $pos = pos \cup \{((i_{start}, i-1), (j_{start},j-1))\}$
|
||||||
|
\EndIf
|
||||||
|
\EndFor
|
||||||
|
\EndFor
|
||||||
|
\EndFunction
|
||||||
|
\end{algorithmic}
|
||||||
|
\end{algorithm}
|
||||||
|
|
||||||
|
|
||||||
|
\section{Automata}
|
||||||
|
|
||||||
|
|
||||||
|
An automaton is a tuple $\langle S, s_{0}, T, \Sigma,f\rangle$
|
||||||
|
\begin{itemize}
|
||||||
|
\item $S$ the set of states
|
||||||
|
\item $s_{0}$ the initial state
|
||||||
|
\item $T$ the set of terminal states
|
||||||
|
\item $\Sigma$ the alphabet
|
||||||
|
\item $f$ the transition function $f: (s_{1}, c) \to s_{2}$
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\paragraph{Example} Given the language $L$ on the alphabet $\Sigma = \{A, C, T\}$, $L = \{A^{*}, CTT, CA^{*}\}$
|
||||||
|
|
||||||
|
\begin{definition}[Deterministic automaton]
|
||||||
|
An automaton is deterministic, if for each couple $(p, a) \in S \times \Sigma$ it exists at most a state $q$ such as $f(p, q) = q$
|
||||||
|
\end{definition}
|
||||||
|
|
||||||
|
\begin{definition}[Complete automaton]
|
||||||
|
An automaton is complete, if for each couple $(p, a) \in S \times \Sigma$ it exists at least a state $q$ such as $f(p, q) = q$.
|
||||||
|
\end{definition}
|
||||||
|
|
||||||
|
\begin{algorithm}
|
||||||
|
\caption{Check wether a word belong to a language for which we have an automaton}
|
||||||
|
\begin{algorithmic}[1]
|
||||||
|
\Function{WordInLanguage}{$W$: Array($n$), $A$: $\langle S, s_{0}, T, \Sigma,f \rangle$}
|
||||||
|
\Returns{A Boolean valued to \True{} if the word is recognized by the language automaton}
|
||||||
|
\State $s \gets s_{0}$
|
||||||
|
\State $i \gets 0$
|
||||||
|
\While {$i < n$}
|
||||||
|
\State $a \gets W[i]$
|
||||||
|
\If {$\exists f(s, a)$}
|
||||||
|
\State $s \gets f(s, a)$
|
||||||
|
\Else
|
||||||
|
\State \Return \False
|
||||||
|
\EndIf
|
||||||
|
\State i++
|
||||||
|
\EndWhile
|
||||||
|
\If {$s \in T$}
|
||||||
|
\State \Return \True
|
||||||
|
\Else
|
||||||
|
\State \Return \False
|
||||||
|
\EndIf
|
||||||
|
\EndFunction
|
||||||
|
\end{algorithmic}
|
||||||
|
\end{algorithm}
|
||||||
|
|
||||||
|
\subsection{Suffix Automaton}
|
||||||
|
|
||||||
|
Let $S = $ AACTACT
|
||||||
|
|
||||||
|
A suffix automata recognize all suffix of a given sequence.
|
||||||
|
|
||||||
|
|
||||||
|
The suffix language of $S$ is $\{S, ACTACT, CTACT, TACT, ACT, CT, T\}$.
|
||||||
|
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\centering
|
||||||
|
\includegraphics{./figures/part1/minimal_suffix_automaton_exercise.pdf}
|
||||||
|
\caption{Suffix automaton for $S = $ AACTACT}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\centering
|
||||||
|
\includegraphics{./figures/part1/minimal_suffix_automaton_exercise_bis.pdf}
|
||||||
|
\caption{Suffix automaton for $S = $ TCATCATT}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\begin{algorithm}
|
||||||
|
\caption{Check if a sequences matches a motif, from a suffix automaton $\mathcal{O}(m)$, built from the automaton}
|
||||||
|
\begin{algorithmic}[1]
|
||||||
|
\Function{CheckMotifInSuffixAutomaton}{$W$: Array($m$), $A$: $\langle S, s_{0}, T, \Sigma,f \rangle$}
|
||||||
|
\Returns{Boolean valued to \True{} if the motif is in the sequence}
|
||||||
|
\State $s \gets s_{0}$
|
||||||
|
\State $i \gets 0$
|
||||||
|
\While {$i < m$ and $\exists f(s, W[i])$}
|
||||||
|
\State $s \gets f(s, W[i])$
|
||||||
|
\State $i++$
|
||||||
|
\EndWhile
|
||||||
|
\If {$i=n$}
|
||||||
|
\State \Return \True
|
||||||
|
\Else
|
||||||
|
\State \Return \False
|
||||||
|
\EndIf
|
||||||
|
\EndFunction
|
||||||
|
\end{algorithmic}
|
||||||
|
\end{algorithm}
|
||||||
|
The complexity of the pattern matching algorithm is $\mathcal{O}(n + m)$, because building the automaton is $\mathcal{O}(m)$
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
\subsection{Automata for motif search}
|
||||||
|
|
||||||
|
Let $M$ be a motif $M = $ ACAT.
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\centering
|
||||||
|
\includegraphics{./figures/part1/motif_search_automaton.pdf}
|
||||||
|
\caption{Motif search automaton for $M = $ ACAT}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
The alphabet of motif is the same as the alphabet of the sequence.
|
||||||
|
The search automaton is complete.
|
||||||
|
If the there exists a letter $c$ in the sequence that is not
|
||||||
|
in the motif alphabet, we can make a virtual transition from
|
||||||
|
each state to the initial state whenever we encounter an unknown letter.
|
||||||
|
|
||||||
|
\begin{algorithm}
|
||||||
|
\caption{Search a motif in a sequence with an automaton}
|
||||||
|
\begin{algorithmic}[1]
|
||||||
|
\Function{SearchMotif}{$S$: Array($n$), $A$: $\langle S, s_{0}, T, \Sigma, f \rangle$, $P$: Array($m$)}
|
||||||
|
\Returns{A set of positions where the motif has been found}
|
||||||
|
\State $s \gets s_0$
|
||||||
|
\State $i \gets 0$
|
||||||
|
\State $pos \gets \{\}$
|
||||||
|
\While {$i < n$} % $\exists f(s, S[i])$ We assume $S$ and $P$ are formed on the same alphabet, so we could remove the second check, as $A$ is complete
|
||||||
|
\If {$s \in T$}
|
||||||
|
\State $pos \gets pos \cup \{ i - m \}$
|
||||||
|
\EndIf
|
||||||
|
\State $s \gets f(s, S[i])$
|
||||||
|
\State $i++$
|
||||||
|
\EndWhile
|
||||||
|
\State \Return $pos$
|
||||||
|
\EndFunction
|
||||||
|
\end{algorithmic}
|
||||||
|
\end{algorithm}
|
||||||
|
|
||||||
|
\begin{algorithm}
|
||||||
|
\caption{Check if the a motif automaton recognizes only the prefix of size $m-1$ of a motif $P$ of size $m$ }
|
||||||
|
\begin{algorithmic}[1]
|
||||||
|
\Function{SearchMotifLastPrefix}{$S$: Array($n$), $A$: $\langle S, s_{0}, T, \Sigma, f \rangle$, $P$: Array($m$)}
|
||||||
|
\Returns{A set of positions where the motif has been found}
|
||||||
|
\State $s \gets s_0$
|
||||||
|
\State $i \gets 0$
|
||||||
|
\State $T_{new} \gets \{\}$
|
||||||
|
\For {$s \in S$}
|
||||||
|
\For {$a \in \Sigma$}
|
||||||
|
\For {$t \in T$}
|
||||||
|
\If {$\exists f(s, a)$ and $f(s, a) = t$}
|
||||||
|
\State $T_{new} \gets T_{new} \cup s$
|
||||||
|
\EndIf
|
||||||
|
\EndFor
|
||||||
|
\EndFor
|
||||||
|
\EndFor
|
||||||
|
\While {$i < n$}
|
||||||
|
\If {$s \in T_{new}$}
|
||||||
|
\State \Return \True
|
||||||
|
\EndIf
|
||||||
|
\State $s \gets f(s, S[i])$
|
||||||
|
\State $i++$
|
||||||
|
\EndWhile
|
||||||
|
\State \Return \False
|
||||||
|
\EndFunction
|
||||||
|
\end{algorithmic}
|
||||||
|
\end{algorithm}
|
||||||
|
|
||||||
|
\begin{algorithm}
|
||||||
|
\caption{Check if the a motif automaton recognizes only the prefix of size $m-1$ of a motif $P$ of size $m$, knowing the sequence of the motif}
|
||||||
|
\begin{algorithmic}[1]
|
||||||
|
\Function{SearchMotifLastPrefix}{$S$: Array($n$), $A$: $\langle S, s_{0}, T, \Sigma, f \rangle$, $P$: Array($m$)}
|
||||||
|
\Returns{A set of positions where the motif has been found}
|
||||||
|
\State $s \gets s_0$
|
||||||
|
\State $i \gets 0$
|
||||||
|
\While {$i < n$ and $f(s, P[m-1]) \notin T$}
|
||||||
|
\State $s \gets f(s, S[i])$
|
||||||
|
\State $i++$
|
||||||
|
\EndWhile
|
||||||
|
\If{$f(s, P[m-1]) \in T$}
|
||||||
|
\State \Return \True
|
||||||
|
\Else
|
||||||
|
\State \Return \False
|
||||||
|
\EndIf
|
||||||
|
\EndFunction
|
||||||
|
\end{algorithmic}
|
||||||
|
\end{algorithm}
|
|
@ -1,50 +1,25 @@
|
||||||
\chapter{Matrices}
|
\chapter{Longest common subsequence}
|
||||||
|
|
||||||
Let $S_{1}$ and $S_{2}$ be two sequences.
|
|
||||||
|
|
||||||
$S_{1} = $ ACGUUCC
|
|
||||||
$S_{2} = $ GUU
|
|
||||||
|
|
||||||
\begin{table}
|
|
||||||
\centering
|
|
||||||
\begin{tabular}{c|ccccccc}
|
|
||||||
& A & C & G & U & U & C & C \\
|
|
||||||
\hline
|
|
||||||
G & 0 & 0 & 1 & 0 & 0 & 0 & 0 \\
|
|
||||||
U & 0 & 0 & 0 & 1 & 1 & 0 & 0 \\
|
|
||||||
U & 0 & 0 & 0 & 1 & 1 & 0 & 0
|
|
||||||
\end{tabular}
|
|
||||||
\caption{Comparison matrix}
|
|
||||||
\end{table}
|
|
||||||
|
|
||||||
|
|
||||||
Let $n = |S_{1}|$, $m = |S_{2}|$
|
|
||||||
The complexity of this algorithm is $\mathcal{O}(n \cdot m)$ to build the matrix, and it requires also to find the diagonals and thus it is a bit less efficient than the \autoref{alg:naive-motif-matching}.
|
|
||||||
|
|
||||||
|
|
||||||
To find repetitions, we can use a comparison matrix with a single sequence against itself. A repetition would appear as a diagonal of ones, not on the main diagonal.
|
|
||||||
|
|
||||||
Let $S = $ ACGUUACGUU. Let's write the comparison matrix.
|
|
||||||
|
|
||||||
|
|
||||||
\begin{table}
|
|
||||||
\includegraphics{./figures/part1/comparison_matrix_repetitions.pdf}
|
|
||||||
\caption{Comparison matrix for $seq = $``ACGUUACGUUGUU"}
|
|
||||||
\end{table}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Let $S_{1} = \text{ATCTGAT}$ and $S_{2} = \text{TGCATA}$.
|
||||||
|
In this case the longest common subsequence of $S_{1}$ and $S_{2}$ is $TCTA$.
|
||||||
\begin{algorithm}
|
\begin{algorithm}
|
||||||
\caption{Construct a comparison matrix}
|
\caption{Construct a longest common subsequence matrix}
|
||||||
\begin{algorithmic}[1]
|
\begin{algorithmic}[1]
|
||||||
\Function{ComparisonMatrix}{$S$: Array($n$)}
|
\Function{LCSQ\_Matrix}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
|
||||||
\State $M \gets $ Array($n$, $n$)
|
\State $M \gets $ Array($m+1$, $n+1$)
|
||||||
\For{($i = 0$; $i < n$; $i++$)}
|
\For{($i = 0$; $i < n+1$; $i++$)}
|
||||||
\For{$j = 0$; $j < n$; $j++$}
|
\For{$j = 0$; $j < m+1$; $j++$}
|
||||||
\If {$S[i] = S[j]$}
|
\If {$i = 0$ or $j = 0$}
|
||||||
\State $M[i][j] = 1$
|
|
||||||
\Else
|
|
||||||
\State $M[i][j] = 0$
|
\State $M[i][j] = 0$
|
||||||
|
\Else
|
||||||
|
\If {$S_{1}[i] = S_{2}[j]$}
|
||||||
|
\State $match = M[i-1][j-1] + 1$
|
||||||
|
\Else
|
||||||
|
\State $match = M[i-1][j-1]$
|
||||||
|
\EndIf
|
||||||
|
\State $gap_{1} = M[i-1][j]$
|
||||||
|
\State $gap_{2} = M[i][j-1]$
|
||||||
|
\State $M[i][j] = \max \{ match, gap_{1}, gap_{2}\}$
|
||||||
\EndIf
|
\EndIf
|
||||||
\EndFor
|
\EndFor
|
||||||
\EndFor
|
\EndFor
|
||||||
|
@ -54,228 +29,88 @@ Let $S = $ ACGUUACGUU. Let's write the comparison matrix.
|
||||||
\end{algorithm}
|
\end{algorithm}
|
||||||
|
|
||||||
\begin{algorithm}
|
\begin{algorithm}
|
||||||
\caption{Construct the top half of a comparison matrix}
|
\caption{Construct a longest common subsequence matrix keeping the path in memory}
|
||||||
\begin{algorithmic}[1]
|
\begin{algorithmic}[1]
|
||||||
\Function{ComparisonMatrix}{$S$: Array($n$)}
|
\Function{LCSQ\_Matrix\_Path}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
|
||||||
\State $M \gets$ Array($n$,$n$)
|
\State $M \gets $ Array($m+1$, $n+1$)
|
||||||
\For{($i = 0$; $i < n$; $i++$)}
|
\State $P \gets $ Array($m+1$, $n+1$)
|
||||||
\For{j=i; j < n; j++}
|
\For {($i = 0$; $i < n+1$, $i++$)}
|
||||||
\If {S[i] = S[j]}
|
\State $M[i][0] \gets 0$
|
||||||
\State M[i][j] = 1
|
\EndFor
|
||||||
|
\For {($j = 0$; $j < m+1$; $j+$)}
|
||||||
|
\State $M[0][j] \gets 0$
|
||||||
|
\EndFor
|
||||||
|
\For{($i = 1$; $i < n+1$; $i++$)}
|
||||||
|
\For{($j = 1$; $j < m+1$; $j++$)}
|
||||||
|
\If {$i = 1$ or $j = 0$}
|
||||||
|
\State $M[i][j] = 0$
|
||||||
\Else
|
\Else
|
||||||
\State M[i][j] = 0
|
\If {$S_{1}[i-1] = S_{2}[j-1]$}
|
||||||
\EndIf
|
\State $M[i][j] \gets M[i-1][j-1] + 1$
|
||||||
\EndFor
|
\State $P[i][j] \gets '\nwarrow'$
|
||||||
\EndFor
|
\ElsIf {$M[i][j-1] \geq M[i-1][j]$}
|
||||||
\State \Return M
|
\State $M[i][j] \gets M[i][j-1]$
|
||||||
\EndFunction
|
\State $P[i][j] \gets '\leftarrow'$
|
||||||
\end{algorithmic}
|
|
||||||
\end{algorithm}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
\begin{algorithm}
|
|
||||||
\caption{Find repetitions (with a set of visited segments)}
|
|
||||||
\begin{algorithmic}[1]
|
|
||||||
\Function{FindRepetions}{$S$: Array($n$)}
|
|
||||||
\Returns{A list of start and end positions for repeated sequences}
|
|
||||||
\State $M = $ \Call{ComparisonMatrix}{S}
|
|
||||||
\State $pos = \{\}$
|
|
||||||
\State $visited = \{\}$
|
|
||||||
\For {($i_{start} = 0$; $i_{start} < n$; $i_{start}++$)}
|
|
||||||
\For {($j_{start} = i_{start}+1$; $j_{start} < n$; $j_{start}++$)}
|
|
||||||
\If{$M[i_{start}][j_{start}] = 1$ and $(i_{start}, j_{start}) \notin visited$}
|
|
||||||
\State $i = i_{start}$
|
|
||||||
\State $j = j_{start}$
|
|
||||||
\While {$M[i][j] = 1$}
|
|
||||||
\State $i++$
|
|
||||||
\State $j++$
|
|
||||||
\State $visited = visited \cup \{(i, j)\}$
|
|
||||||
\EndWhile
|
|
||||||
\State $pos = pos \cup \{(i_{start}, i), (j_{start},j)\}$
|
|
||||||
\EndIf
|
|
||||||
\EndFor
|
|
||||||
\EndFor
|
|
||||||
\EndFunction
|
|
||||||
\end{algorithmic}
|
|
||||||
\end{algorithm}
|
|
||||||
|
|
||||||
|
|
||||||
\begin{algorithm}
|
|
||||||
\caption{Find repetitions with an exploration of diagonals}
|
|
||||||
\begin{algorithmic}[1]
|
|
||||||
\Function{FindRepetions}{$S$: Array($n$)}
|
|
||||||
\Returns{A list of start and end positions for repeted sequences}
|
|
||||||
\State $M$ = \Call{ComparisonMatrix}{S}
|
|
||||||
\State $pos = \{\}$
|
|
||||||
\For {($diag = 1$; $diag < n$; $diag++$)}
|
|
||||||
\State $j = diag$
|
|
||||||
\State $i = 0$
|
|
||||||
\While {$i < n$ and $j < n$}
|
|
||||||
\If {$M[i][j] = 1$}
|
|
||||||
\State $i_{start} = i$
|
|
||||||
\State $j_{start} = j$
|
|
||||||
\While {$i < n$ and $j < n$ and $M[i][j] = 1$}
|
|
||||||
\State i++
|
|
||||||
\State j++
|
|
||||||
\EndWhile
|
|
||||||
\State $pos = pos \cup \{((i_{start},i-1),(j_{start},j-1))\}$
|
|
||||||
\EndIf
|
|
||||||
\State $i++$
|
|
||||||
\State $j++$
|
|
||||||
\State
|
|
||||||
\EndWhile
|
|
||||||
\EndFor
|
|
||||||
\EndFunction
|
|
||||||
\end{algorithmic}
|
|
||||||
\end{algorithm}
|
|
||||||
|
|
||||||
\begin{algorithm}
|
|
||||||
\caption{Find repetitions with an exploration of diagonals, without nested while}
|
|
||||||
\begin{algorithmic}[1]
|
|
||||||
\Function{FindRepetions}{$S$: Array($n$)}
|
|
||||||
\Returns{A list of start positions for repeted sequences and match length}
|
|
||||||
\State $M$ = \Call{ComparisonMatrix}{S}
|
|
||||||
\State $pos = \{\}$
|
|
||||||
\For {($diag = 1$; $diag < n$; $diag++$)}
|
|
||||||
\State $j = diag$
|
|
||||||
\State $i = 0$
|
|
||||||
\State $l = 0$
|
|
||||||
\While {$i < n$ and $j < n$}
|
|
||||||
\If {$M[i][j] = 1$}
|
|
||||||
\State $l++$
|
|
||||||
\Else
|
\Else
|
||||||
\If {$l > 0$}
|
\State $M[i][j] \gets M[i-1][j]$
|
||||||
\State $pos = pos \cup \{(i-l,j-l,l)\}$
|
\State $P[i][j] \gets '\downarrow'$
|
||||||
\State $l = 0$
|
|
||||||
\EndIf
|
\EndIf
|
||||||
\EndIf
|
\EndIf
|
||||||
\State $i++$
|
|
||||||
\State $j++$
|
|
||||||
\EndWhile
|
|
||||||
\If {$l > 0$}
|
|
||||||
\State $pos = pos \cup \{((i-l,j-l,l))\}$
|
|
||||||
\EndIf
|
|
||||||
\EndFor
|
|
||||||
\State \Return $pos$
|
|
||||||
\EndFunction
|
|
||||||
\end{algorithmic}
|
|
||||||
\end{algorithm}
|
|
||||||
|
|
||||||
|
|
||||||
\begin{algorithm}
|
|
||||||
\caption{Find repetitions}
|
|
||||||
\begin{algorithmic}[1]
|
|
||||||
\Function{FindRepetions}{$S$: Array($n$)}
|
|
||||||
\Returns{A list of start and end positions for repeted sequences}
|
|
||||||
\State $M$ = \Call{ComparisonMatrix}{S}
|
|
||||||
\State $pos = \{\}$
|
|
||||||
\For {$i_{start} = 0$; $i_{start} < n$; $i_{start}++$}
|
|
||||||
\For {$j_{start} = i_{start}+1$; $j_{start} < n$; $j_{start}++$}
|
|
||||||
\If{$M[i_{start}][j_{start}] = 1$}
|
|
||||||
\State $i = i_{start}$
|
|
||||||
\State $j = j_{start}$
|
|
||||||
\While {$M[i][j] = 1$}
|
|
||||||
\State $M[i][j] = 0$ \Comment{Ensure that the segment is not explored again}
|
|
||||||
\State $i++$
|
|
||||||
\State $j++$
|
|
||||||
\EndWhile
|
|
||||||
\State $pos = pos \cup \{((i_{start}, i-1), (j_{start},j-1))\}$
|
|
||||||
\EndIf
|
|
||||||
\EndFor
|
\EndFor
|
||||||
\EndFor
|
\EndFor
|
||||||
|
\State \Return $M, P$
|
||||||
\EndFunction
|
\EndFunction
|
||||||
\end{algorithmic}
|
\end{algorithmic}
|
||||||
\end{algorithm}
|
\end{algorithm}
|
||||||
|
|
||||||
|
|
||||||
\section{Automata}
|
|
||||||
|
|
||||||
|
|
||||||
An automaton is a tuple $\langle S, s_{0}, T, \Sigma,f\rangle$
|
|
||||||
\begin{itemize}
|
|
||||||
\item $S$ the set of states
|
|
||||||
\item $s_{0}$ the initial state
|
|
||||||
\item $T$ the set of terminal states
|
|
||||||
\item $\Sigma$ the alphabet
|
|
||||||
\item $f$ the transition function $f: (s_{1}, c) \to s_{2}$
|
|
||||||
\end{itemize}
|
|
||||||
|
|
||||||
\paragraph{Example} Given the language $L$ on the alphabet $\Sigma = \{A, C, T\}$, $L = \{A^{*}, CTT, CA^{*}\}$
|
|
||||||
|
|
||||||
\begin{definition}[Deterministic automaton]
|
|
||||||
An automaton is deterministic, if for each couple $(p, a) \in S \times \Sigma$ it exists at most a state $q$ such as $f(p, q) = q$
|
|
||||||
\end{definition}
|
|
||||||
|
|
||||||
\begin{definition}[Complete automaton]
|
|
||||||
An automaton is complete, if for each couple $(p, a) \in S \times \Sigma$ it exists at least a state $q$ such as $f(p, q) = q$.
|
|
||||||
\end{definition}
|
|
||||||
|
|
||||||
\begin{algorithm}
|
\begin{algorithm}
|
||||||
\caption{Check wether a word belong to a language for which we have an automaton}
|
\caption{Backtrack the longest common subsequence}
|
||||||
\begin{algorithmic}[1]
|
\begin{algorithmic}[1]
|
||||||
\Function{WordInLanguage}{$W$: Array($n$), $A$: $\langle S, s_{0}, T, \Sigma,f \rangle$}
|
\Function{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
|
||||||
\Returns{A Boolean valued to \True{} if the word is recognized by the language automaton}
|
\State $M, P \gets $ \Call{LCSQ\_Matrix}{$S_{1}$, $S_{2}$}
|
||||||
\State $s \gets s_{0}$
|
\State $L \gets Array(M[n][m])$
|
||||||
\State $i \gets 0$
|
\State $k \gets 0$
|
||||||
\While {$i < n$}
|
\State $i \gets n$
|
||||||
\State $a \gets W[i]$
|
\State $j \gets m$
|
||||||
\If {$\exists f(s, a)$}
|
\While{$i > 0$ and $j > 0$}
|
||||||
\State $s \gets f(s, a)$
|
\If {$P[i][j] = '\nwarrow' $}
|
||||||
|
\State $L[k] \gets S_{1}[i]$
|
||||||
|
\State $i--$
|
||||||
|
\State $j--$
|
||||||
|
\State $k++$
|
||||||
|
\ElsIf {$P[i][j] = '\leftarrow'$}
|
||||||
|
\State $j--$
|
||||||
\Else
|
\Else
|
||||||
\State \Return \False
|
\State $i--$
|
||||||
\EndIf
|
\EndIf
|
||||||
\State i++
|
|
||||||
\EndWhile
|
\EndWhile
|
||||||
\If {$s \in T$}
|
\State \Return $L$
|
||||||
\State \Return \True
|
|
||||||
\Else
|
|
||||||
\State \Return \False
|
|
||||||
\EndIf
|
|
||||||
\EndFunction
|
\EndFunction
|
||||||
\end{algorithmic}
|
\end{algorithmic}
|
||||||
\end{algorithm}
|
\end{algorithm}
|
||||||
|
|
||||||
\section{Suffix Automaton}
|
\iffalse
|
||||||
|
\begin{algorithm}
|
||||||
Let $S = $ AACTACT
|
\caption{Recursive reconstruction of the longest common subsequence}
|
||||||
|
|
||||||
A suffix automata recognize all suffix of a given sequence.
|
|
||||||
|
|
||||||
|
|
||||||
The suffix language of $S$ is $\{S, ACTACT, CTACT, TACT, ACT, CT, T\}$.
|
|
||||||
|
|
||||||
|
|
||||||
\begin{figure}
|
|
||||||
\centering
|
|
||||||
\includegraphics{./figures/part1/minimal_suffix_automaton_exercise.pdf}
|
|
||||||
\caption{Suffix automaton for $S = $ AACTACT}
|
|
||||||
\end{figure}
|
|
||||||
|
|
||||||
\begin{figure}
|
|
||||||
\centering
|
|
||||||
\includegraphics{./figures/part1/minimal_suffix_automaton_exercise_bis.pdf}
|
|
||||||
\caption{Suffix automaton for $S = $ TCATCATT}
|
|
||||||
\end{figure}
|
|
||||||
|
|
||||||
\begin{algorithm}
|
|
||||||
\caption{Check if a sequences matches a motif, from a suffix automaton $\mathcal{O}(m)$, built from the automaton}
|
|
||||||
\begin{algorithmic}[1]
|
\begin{algorithmic}[1]
|
||||||
\Function{CheckMotifInSuffixAutomaton}{$W$: Array($m$), $A$: $\langle S, s_{0}, T, \Sigma,f \rangle$}
|
\Procedure{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
|
||||||
\Returns{Boolean valued to \True{} if the motif is in the sequence}
|
\State $M, P \gets $ \Call{LCSQ\_Matrix}{$S_{1}$, $S_{2}$}
|
||||||
\State $s \gets s_{0}$
|
\State $i \gets n$
|
||||||
\State $i \gets 0$
|
\State $j \gets m$
|
||||||
\While {$i < m$ and $\exists f(s, W[i])$}
|
\State \Call{Aux}{$P$, $S_{1}$, $i$, $j$}
|
||||||
\State $s \gets f(s, W[i])$
|
\EndProcedure
|
||||||
\State $i++$
|
|
||||||
\EndWhile
|
|
||||||
\If {$i=n$}
|
|
||||||
\State \Return \True
|
|
||||||
\Else
|
|
||||||
\State \Return \False
|
|
||||||
\EndIf
|
|
||||||
\EndFunction
|
|
||||||
\end{algorithmic}
|
|
||||||
\end{algorithm}
|
|
||||||
The complexity of the pattern matching algorithm is $\mathcal{O}(n + m)$, because building the automaton is $\mathcal{O}(m)$
|
|
||||||
|
|
||||||
|
\Procedure{Aux}{$P$: Array($n+1$, $m+1$), $S_{1}$: Array($n$), $i$, $j$}
|
||||||
|
\If {$P[i][j] = '\nwarrow' $}
|
||||||
|
\State $l \gets S_{1}[i]$
|
||||||
|
\State \Call{Aux}{$P$, $S_{1}$, $i-1$, $j-1$}
|
||||||
|
\State \texttt{print}($l$)
|
||||||
|
\ElsIf {$P[i][j] = '\leftarrow'$}
|
||||||
|
\State \Call{Aux}{$P$, $S_{1}$, $i$, $j-1$}
|
||||||
|
\Else
|
||||||
|
\State \Call{Aux}{$P$, $S_{1}$, $i-1$, $j$}
|
||||||
|
\EndIf
|
||||||
|
\EndProcedure
|
||||||
|
\end{algorithmic}
|
||||||
|
\end{algorithm}
|
||||||
|
\fi
|
||||||
|
|
|
@ -1,84 +0,0 @@
|
||||||
\chapter{Automata for motif search}
|
|
||||||
|
|
||||||
Let $M$ be a motif $M = $ ACAT.
|
|
||||||
|
|
||||||
\begin{figure}
|
|
||||||
\centering
|
|
||||||
\includegraphics{./figures/part1/motif_search_automaton.pdf}
|
|
||||||
\caption{Motif search automaton for $M = $ ACAT}
|
|
||||||
\end{figure}
|
|
||||||
|
|
||||||
The alphabet of motif is the same as the alphabet of the sequence.
|
|
||||||
The search automaton is complete.
|
|
||||||
If the there exists a letter $c$ in the sequence that is not
|
|
||||||
in the motif alphabet, we can make a virtual transition from
|
|
||||||
each state to the initial state whenever we encounter an unknown letter.
|
|
||||||
|
|
||||||
\begin{algorithm}
|
|
||||||
\caption{Search a motif in a sequence with an automaton}
|
|
||||||
\begin{algorithmic}[1]
|
|
||||||
\Function{SearchMotif}{$S$: Array($n$), $A$: $\langle S, s_{0}, T, \Sigma, f \rangle$, $P$: Array($m$)}
|
|
||||||
\Returns{A set of positions where the motif has been found}
|
|
||||||
\State $s \gets s_0$
|
|
||||||
\State $i \gets 0$
|
|
||||||
\State $pos \gets \{\}$
|
|
||||||
\While {$i < n$} % $\exists f(s, S[i])$ We assume $S$ and $P$ are formed on the same alphabet, so we could remove the second check, as $A$ is complete
|
|
||||||
\If {$s \in T$}
|
|
||||||
\State $pos \gets pos \cup \{ i - m \}$
|
|
||||||
\EndIf
|
|
||||||
\State $s \gets f(s, S[i])$
|
|
||||||
\State $i++$
|
|
||||||
\EndWhile
|
|
||||||
\State \Return $pos$
|
|
||||||
\EndFunction
|
|
||||||
\end{algorithmic}
|
|
||||||
\end{algorithm}
|
|
||||||
|
|
||||||
\begin{algorithm}
|
|
||||||
\caption{Check if the a motif automaton recognizes only the prefix of size $m-1$ of a motif $P$ of size $m$ }
|
|
||||||
\begin{algorithmic}[1]
|
|
||||||
\Function{SearchMotifLastPrefix}{$S$: Array($n$), $A$: $\langle S, s_{0}, T, \Sigma, f \rangle$, $P$: Array($m$)}
|
|
||||||
\Returns{A set of positions where the motif has been found}
|
|
||||||
\State $s \gets s_0$
|
|
||||||
\State $i \gets 0$
|
|
||||||
\State $T_{new} \gets \{\}$
|
|
||||||
\For {$s \in S$}
|
|
||||||
\For {$a \in \Sigma$}
|
|
||||||
\For {$t \in T$}
|
|
||||||
\If {$\exists f(s, a)$ and $f(s, a) = t$}
|
|
||||||
\State $T_{new} \gets T_{new} \cup s$
|
|
||||||
\EndIf
|
|
||||||
\EndFor
|
|
||||||
\EndFor
|
|
||||||
\EndFor
|
|
||||||
\While {$i < n$}
|
|
||||||
\If {$s \in T_{new}$}
|
|
||||||
\State \Return \True
|
|
||||||
\EndIf
|
|
||||||
\State $s \gets f(s, S[i])$
|
|
||||||
\State $i++$
|
|
||||||
\EndWhile
|
|
||||||
\State \Return \False
|
|
||||||
\EndFunction
|
|
||||||
\end{algorithmic}
|
|
||||||
\end{algorithm}
|
|
||||||
|
|
||||||
\begin{algorithm}
|
|
||||||
\caption{Check if the a motif automaton recognizes only the prefix of size $m-1$ of a motif $P$ of size $m$, knowing the sequence of the motif}
|
|
||||||
\begin{algorithmic}[1]
|
|
||||||
\Function{SearchMotifLastPrefix}{$S$: Array($n$), $A$: $\langle S, s_{0}, T, \Sigma, f \rangle$, $P$: Array($m$)}
|
|
||||||
\Returns{A set of positions where the motif has been found}
|
|
||||||
\State $s \gets s_0$
|
|
||||||
\State $i \gets 0$
|
|
||||||
\While {$i < n$ and $f(s, P[m-1]) \notin T$}
|
|
||||||
\State $s \gets f(s, S[i])$
|
|
||||||
\State $i++$
|
|
||||||
\EndWhile
|
|
||||||
\If{$f(s, P[m-1]) \in T$}
|
|
||||||
\State \Return \True
|
|
||||||
\Else
|
|
||||||
\State \Return \False
|
|
||||||
\EndIf
|
|
||||||
\EndFunction
|
|
||||||
\end{algorithmic}
|
|
||||||
\end{algorithm}
|
|
|
@ -1,116 +0,0 @@
|
||||||
\chapter{Longest common subsequence}
|
|
||||||
|
|
||||||
Let $S_{1} = \text{ATCTGAT}$ and $S_{2} = \text{TGCATA}$.
|
|
||||||
In this case the longest common subsequence of $S_{1}$ and $S_{2}$ is $TCTA$.
|
|
||||||
\begin{algorithm}
|
|
||||||
\caption{Construct a longest common subsequence matrix}
|
|
||||||
\begin{algorithmic}[1]
|
|
||||||
\Function{LCSQ\_Matrix}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
|
|
||||||
\State $M \gets $ Array($m+1$, $n+1$)
|
|
||||||
\For{($i = 0$; $i < n+1$; $i++$)}
|
|
||||||
\For{$j = 0$; $j < m+1$; $j++$}
|
|
||||||
\If {$i = 0$ or $j = 0$}
|
|
||||||
\State $M[i][j] = 0$
|
|
||||||
\Else
|
|
||||||
\If {$S_{1}[i] = S_{2}[j]$}
|
|
||||||
\State $match = M[i-1][j-1] + 1$
|
|
||||||
\Else
|
|
||||||
\State $match = M[i-1][j-1]$
|
|
||||||
\EndIf
|
|
||||||
\State $gap_{1} = M[i-1][j]$
|
|
||||||
\State $gap_{2} = M[i][j-1]$
|
|
||||||
\State $M[i][j] = \max \{ match, gap_{1}, gap_{2}\}$
|
|
||||||
\EndIf
|
|
||||||
\EndFor
|
|
||||||
\EndFor
|
|
||||||
\State \Return $M$
|
|
||||||
\EndFunction
|
|
||||||
\end{algorithmic}
|
|
||||||
\end{algorithm}
|
|
||||||
|
|
||||||
\begin{algorithm}
|
|
||||||
\caption{Construct a longest common subsequence matrix keeping the path in memory}
|
|
||||||
\begin{algorithmic}[1]
|
|
||||||
\Function{LCSQ\_Matrix\_Path}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
|
|
||||||
\State $M \gets $ Array($m+1$, $n+1$)
|
|
||||||
\State $P \gets $ Array($m+1$, $n+1$)
|
|
||||||
\For {($i = 0$; $i < n+1$, $i++$)}
|
|
||||||
\State $M[i][0] \gets 0$
|
|
||||||
\EndFor
|
|
||||||
\For {($j = 0$; $j < m+1$; $j+$)}
|
|
||||||
\State $M[0][j] \gets 0$
|
|
||||||
\EndFor
|
|
||||||
\For{($i = 1$; $i < n+1$; $i++$)}
|
|
||||||
\For{($j = 1$; $j < m+1$; $j++$)}
|
|
||||||
\If {$i = 1$ or $j = 0$}
|
|
||||||
\State $M[i][j] = 0$
|
|
||||||
\Else
|
|
||||||
\If {$S_{1}[i-1] = S_{2}[j-1]$}
|
|
||||||
\State $M[i][j] \gets M[i-1][j-1] + 1$
|
|
||||||
\State $P[i][j] \gets '\nwarrow'$
|
|
||||||
\ElsIf {$M[i][j-1] \geq M[i-1][j]$}
|
|
||||||
\State $M[i][j] \gets M[i][j-1]$
|
|
||||||
\State $P[i][j] \gets '\leftarrow'$
|
|
||||||
\Else
|
|
||||||
\State $M[i][j] \gets M[i-1][j]$
|
|
||||||
\State $P[i][j] \gets '\downarrow'$
|
|
||||||
\EndIf
|
|
||||||
\EndFor
|
|
||||||
\EndFor
|
|
||||||
\State \Return $M, P$
|
|
||||||
\EndFunction
|
|
||||||
\end{algorithmic}
|
|
||||||
\end{algorithm}
|
|
||||||
|
|
||||||
\begin{algorithm}
|
|
||||||
\caption{Backtrack the longest common subsequence}
|
|
||||||
\begin{algorithmic}[1]
|
|
||||||
\Function{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
|
|
||||||
\State $M, P \gets $ \Call{LCSQ\_Matrix}{$S_{1}$, $S_{2}$}
|
|
||||||
\State $L \gets Array(M[n][m])$
|
|
||||||
\State $k \gets 0$
|
|
||||||
\State $i \gets n$
|
|
||||||
\State $j \gets m$
|
|
||||||
\While{$i > 0$ and $j > 0$}
|
|
||||||
\If {$P[i][j] = '\nwarrow' $}
|
|
||||||
\State $L[k] \gets S_{1}[i]$
|
|
||||||
\State $i--$
|
|
||||||
\State $j--$
|
|
||||||
\State $k++$
|
|
||||||
\ElsIf {$P[i][j] = '\leftarrow'$}
|
|
||||||
\State $j--$
|
|
||||||
\Else
|
|
||||||
\State $i--$
|
|
||||||
\EndIf
|
|
||||||
\EndWhile
|
|
||||||
\State \Return $L$
|
|
||||||
\EndFunction
|
|
||||||
\end{algorithmic}
|
|
||||||
\end{algorithm}
|
|
||||||
|
|
||||||
\iffalse
|
|
||||||
|
|
||||||
\begin{algorithm}
|
|
||||||
\caption{Recursive reconstruction of the longest common subsequence}
|
|
||||||
\begin{algorithmic}[1]
|
|
||||||
\Procedure{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
|
|
||||||
\State $M, P \gets $ \Call{LCSQ\_Matrix}{$S_{1}$, $S_{2}$}
|
|
||||||
\State $i \gets n$
|
|
||||||
\State $j \gets m$
|
|
||||||
\State \Call{Aux}{$P$, $S_{1}$, $i$, $j$}
|
|
||||||
\EndProcedure
|
|
||||||
|
|
||||||
\Procedure{Aux}{$P$: Array($n+1$, $m+1$), $S_{1}$: Array($n$), $i$, $j$}
|
|
||||||
\If {$P[i][j] = '\nwarrow' $}
|
|
||||||
\State $l \gets S_{1}[i]$
|
|
||||||
\State \Call{Aux}{$P$, $S_{1}$, $i-1$, $j-1$}
|
|
||||||
\State \texttt{print}($l$)
|
|
||||||
\ElsIf {$P[i][j] = '\leftarrow'$}
|
|
||||||
\State \Call{Aux}{$P$, $S_{1}$, $i$, $j-1$}
|
|
||||||
\Else
|
|
||||||
\State \Call{Aux}{$P$, $S_{1}$, $i-1$, $j$}
|
|
||||||
\EndIf
|
|
||||||
\EndProcedure
|
|
||||||
\end{algorithmic}
|
|
||||||
\end{algorithm}
|
|
||||||
\fi
|
|
|
@ -1,116 +0,0 @@
|
||||||
\chapter{Longest common subsequence}
|
|
||||||
|
|
||||||
Let $S_{1} = \text{ATCTGAT}$ and $S_{2} = \text{TGCATA}$.
|
|
||||||
In this case the longest common subsequence of $S_{1}$ and $S_{2}$ is $TCTA$.
|
|
||||||
\begin{algorithm}
|
|
||||||
\caption{Construct a longest common subsequence matrix}
|
|
||||||
\begin{algorithmic}[1]
|
|
||||||
\Function{LCSQ\_Matrix}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
|
|
||||||
\State $M \gets $ Array($m+1$, $n+1$)
|
|
||||||
\For{($i = 0$; $i < n+1$; $i++$)}
|
|
||||||
\For{$j = 0$; $j < m+1$; $j++$}
|
|
||||||
\If {$i = 0$ or $j = 0$}
|
|
||||||
\State $M[i][j] = 0$
|
|
||||||
\Else
|
|
||||||
\If {$S_{1}[i] = S_{2}[j]$}
|
|
||||||
\State $match = M[i-1][j-1] + 1$
|
|
||||||
\Else
|
|
||||||
\State $match = M[i-1][j-1]$
|
|
||||||
\EndIf
|
|
||||||
\State $gap_{1} = M[i-1][j]$
|
|
||||||
\State $gap_{2} = M[i][j-1]$
|
|
||||||
\State $M[i][j] = \max \{ match, gap_{1}, gap_{2}\}$
|
|
||||||
\EndIf
|
|
||||||
\EndFor
|
|
||||||
\EndFor
|
|
||||||
\State \Return $M$
|
|
||||||
\EndFunction
|
|
||||||
\end{algorithmic}
|
|
||||||
\end{algorithm}
|
|
||||||
|
|
||||||
\begin{algorithm}
|
|
||||||
\caption{Construct a longest common subsequence matrix keeping the path in memory}
|
|
||||||
\begin{algorithmic}[1]
|
|
||||||
\Function{LCSQ\_Matrix\_Path}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
|
|
||||||
\State $M \gets $ Array($m+1$, $n+1$)
|
|
||||||
\State $P \gets $ Array($m+1$, $n+1$)
|
|
||||||
\For {($i = 0$; $i < n+1$, $i++$)}
|
|
||||||
\State $M[i][0] \gets 0$
|
|
||||||
\EndFor
|
|
||||||
\For {($j = 0$; $j < m+1$; $j+$)}
|
|
||||||
\State $M[0][j] \gets 0$
|
|
||||||
\EndFor
|
|
||||||
\For{($i = 1$; $i < n+1$; $i++$)}
|
|
||||||
\For{($j = 1$; $j < m+1$; $j++$)}
|
|
||||||
\If {$i = 1$ or $j = 0$}
|
|
||||||
\State $M[i][j] = 0$
|
|
||||||
\Else
|
|
||||||
\If {$S_{1}[i-1] = S_{2}[j-1]$}
|
|
||||||
\State $M[i][j] \gets M[i-1][j-1] + 1$
|
|
||||||
\State $P[i][j] \gets '\nwarrow'$
|
|
||||||
\ElsIf {$M[i][j-1] \geq M[i-1][j]$}
|
|
||||||
\State $M[i][j] \gets M[i][j-1]$
|
|
||||||
\State $P[i][j] \gets '\leftarrow'$
|
|
||||||
\Else
|
|
||||||
\State $M[i][j] \gets M[i-1][j]$
|
|
||||||
\State $P[i][j] \gets '\downarrow'$
|
|
||||||
\EndIf
|
|
||||||
\EndIf
|
|
||||||
\EndFor
|
|
||||||
\EndFor
|
|
||||||
\State \Return $M, P$
|
|
||||||
\EndFunction
|
|
||||||
\end{algorithmic}
|
|
||||||
\end{algorithm}
|
|
||||||
|
|
||||||
\begin{algorithm}
|
|
||||||
\caption{Backtrack the longest common subsequence}
|
|
||||||
\begin{algorithmic}[1]
|
|
||||||
\Function{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
|
|
||||||
\State $M, P \gets $ \Call{LCSQ\_Matrix}{$S_{1}$, $S_{2}$}
|
|
||||||
\State $L \gets Array(M[n][m])$
|
|
||||||
\State $k \gets 0$
|
|
||||||
\State $i \gets n$
|
|
||||||
\State $j \gets m$
|
|
||||||
\While{$i > 0$ and $j > 0$}
|
|
||||||
\If {$P[i][j] = '\nwarrow' $}
|
|
||||||
\State $L[k] \gets S_{1}[i]$
|
|
||||||
\State $i--$
|
|
||||||
\State $j--$
|
|
||||||
\State $k++$
|
|
||||||
\ElsIf {$P[i][j] = '\leftarrow'$}
|
|
||||||
\State $j--$
|
|
||||||
\Else
|
|
||||||
\State $i--$
|
|
||||||
\EndIf
|
|
||||||
\EndWhile
|
|
||||||
\State \Return $L$
|
|
||||||
\EndFunction
|
|
||||||
\end{algorithmic}
|
|
||||||
\end{algorithm}
|
|
||||||
|
|
||||||
\iffalse
|
|
||||||
\begin{algorithm}
|
|
||||||
\caption{Recursive reconstruction of the longest common subsequence}
|
|
||||||
\begin{algorithmic}[1]
|
|
||||||
\Procedure{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
|
|
||||||
\State $M, P \gets $ \Call{LCSQ\_Matrix}{$S_{1}$, $S_{2}$}
|
|
||||||
\State $i \gets n$
|
|
||||||
\State $j \gets m$
|
|
||||||
\State \Call{Aux}{$P$, $S_{1}$, $i$, $j$}
|
|
||||||
\EndProcedure
|
|
||||||
|
|
||||||
\Procedure{Aux}{$P$: Array($n+1$, $m+1$), $S_{1}$: Array($n$), $i$, $j$}
|
|
||||||
\If {$P[i][j] = '\nwarrow' $}
|
|
||||||
\State $l \gets S_{1}[i]$
|
|
||||||
\State \Call{Aux}{$P$, $S_{1}$, $i-1$, $j-1$}
|
|
||||||
\State \texttt{print}($l$)
|
|
||||||
\ElsIf {$P[i][j] = '\leftarrow'$}
|
|
||||||
\State \Call{Aux}{$P$, $S_{1}$, $i$, $j-1$}
|
|
||||||
\Else
|
|
||||||
\State \Call{Aux}{$P$, $S_{1}$, $i-1$, $j$}
|
|
||||||
\EndIf
|
|
||||||
\EndProcedure
|
|
||||||
\end{algorithmic}
|
|
||||||
\end{algorithm}
|
|
||||||
\fi
|
|
|
@ -1,6 +1,6 @@
|
||||||
\part{Sequence alignment}
|
\part{Sequence alignment}
|
||||||
|
|
||||||
\section{Simililarity between sequences}
|
\chapter{Definitions}
|
||||||
|
|
||||||
A function $d$ is a distance between two sequences $x$ and $y$ in an alphabet $\Sigma$ if
|
A function $d$ is a distance between two sequences $x$ and $y$ in an alphabet $\Sigma$ if
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
|
|
|
@ -1,6 +1,4 @@
|
||||||
\chapter{Section alignment}
|
\chapter{Sequence alignment}
|
||||||
|
|
||||||
\section{Needleman - Wunsch algorithm}
|
|
||||||
|
|
||||||
\begin{algorithm}
|
\begin{algorithm}
|
||||||
\caption{Needleman-Wunsch Algorithm}
|
\caption{Needleman-Wunsch Algorithm}
|
||||||
|
@ -82,7 +80,7 @@
|
||||||
\end{algorithm}
|
\end{algorithm}
|
||||||
|
|
||||||
\begin{algorithm}
|
\begin{algorithm}
|
||||||
\caption{Needleman-Wunsch Algorithm (Backtrack) }
|
\caption{Needleman-Wunsch Algorithm, using proper notation }
|
||||||
\begin{algorithmic}[1]
|
\begin{algorithmic}[1]
|
||||||
\Procedure{FillMatrix}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)}
|
\Procedure{FillMatrix}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)}
|
||||||
\State $M = $ Array($m+1$, $n+1$)
|
\State $M = $ Array($m+1$, $n+1$)
|
||||||
|
@ -143,3 +141,82 @@
|
||||||
\State \Call{BacktrackAlignment}{$S_{1}$, $S_{2}$}
|
\State \Call{BacktrackAlignment}{$S_{1}$, $S_{2}$}
|
||||||
\end{algorithmic}
|
\end{algorithmic}
|
||||||
\end{algorithm}
|
\end{algorithm}
|
||||||
|
|
||||||
|
|
||||||
|
\begin{algorithm}
|
||||||
|
\caption{Backtrack a single alignment in a recursive way}
|
||||||
|
\begin{algorithmic}[1]
|
||||||
|
\State $S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$),
|
||||||
|
\Function{BacktrackRecurse}{$i$, $j$}
|
||||||
|
\If {$i > 0$ and $j > 0$}
|
||||||
|
\State $substitute = M[i-1][j-1]$
|
||||||
|
\State $delete = M[i-1][j]$
|
||||||
|
\State $insert = M[i][j-1]$
|
||||||
|
\State $min = \min \{ substitute, delete, insert \}$
|
||||||
|
\If {$substitute = min$}
|
||||||
|
\State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j-1$}
|
||||||
|
\State $z = \begin{pmatrix} S_{1}[i-1] \\ S_{2}[j-1] \end{pmatrix} \circ z$
|
||||||
|
\ElsIf {$delete = min$}
|
||||||
|
\State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$}
|
||||||
|
\State $z = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix} \circ z$
|
||||||
|
\Else
|
||||||
|
\State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$}
|
||||||
|
\State $z = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix} \circ z$
|
||||||
|
\EndIf
|
||||||
|
\ElsIf {$i > 0$}
|
||||||
|
\State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$}
|
||||||
|
\State $z = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix} \circ z$
|
||||||
|
\ElsIf {$j > 0$}
|
||||||
|
\State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$}
|
||||||
|
\State $z = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix} \circ z$
|
||||||
|
\Else
|
||||||
|
\State \Return []
|
||||||
|
\EndIf
|
||||||
|
\State \Return $z$
|
||||||
|
\EndFunction
|
||||||
|
\Function{Backtrack}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$)}
|
||||||
|
\State \Return \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $m$, $n$}
|
||||||
|
\EndFunction
|
||||||
|
\end{algorithmic}
|
||||||
|
\end{algorithm}
|
||||||
|
|
||||||
|
\begin{algorithm}
|
||||||
|
\caption{Backtrack all the optimum alignments in a recursive way}
|
||||||
|
\begin{algorithmic}[1]
|
||||||
|
\Procedure{BacktrackRecurse}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$), $i$, $j$}
|
||||||
|
\If {$i > 0$ and $j > 0$}
|
||||||
|
\State $substitute = M[i-1][j-1]$
|
||||||
|
\State $delete = M[i-1][j]$
|
||||||
|
\State $insert = M[i][j-1]$
|
||||||
|
\State $min = \min \{ substitute, delete, insert \}$
|
||||||
|
\If {$substitute = min$}
|
||||||
|
\State $value = \begin{pmatrix} S_{1}[i-1] \\ S_{2}[j-1] \end{pmatrix}$
|
||||||
|
\State $z' = value \circ z$
|
||||||
|
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j-1$, $z'$}
|
||||||
|
\EndIf
|
||||||
|
\If {$delete = min$}
|
||||||
|
\State $value = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix}$
|
||||||
|
\State $z' = value \circ z$
|
||||||
|
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$, $z'$}
|
||||||
|
\EndIf
|
||||||
|
\If {$insert = min$}
|
||||||
|
\State $value = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$
|
||||||
|
\State $z' = value \circ z$
|
||||||
|
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$, $z'$}
|
||||||
|
\EndIf
|
||||||
|
\ElsIf {$i > 0$}
|
||||||
|
\State $value = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix}$
|
||||||
|
\State $z' = value \circ z$
|
||||||
|
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$, $z'$}
|
||||||
|
\ElsIf {$j > 0$}
|
||||||
|
\State $value = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$
|
||||||
|
\State $z' = value \circ z$
|
||||||
|
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$, $z'$}
|
||||||
|
\EndIf
|
||||||
|
\State \Call{print}{$z$}
|
||||||
|
\EndProcedure
|
||||||
|
\Procedure{Backtrack}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$)}
|
||||||
|
\State \Return \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $m$, $n$, []}
|
||||||
|
\EndProcedure
|
||||||
|
\end{algorithmic}
|
||||||
|
\end{algorithm}
|
||||||
|
|
|
@ -0,0 +1,64 @@
|
||||||
|
\part*{Introduction}
|
||||||
|
\chapter*{Back to basics}
|
||||||
|
|
||||||
|
\begin{algorithm}
|
||||||
|
\caption{Search an element in an array}
|
||||||
|
\begin{algorithmic}[1]
|
||||||
|
\Function{Search}{$A$: Array($n$), $E$: element}
|
||||||
|
\For {($i = 0$; $i < n$; $i++$)}
|
||||||
|
\If {$A[i] = E$}
|
||||||
|
\State \Return \True
|
||||||
|
\EndIf
|
||||||
|
\EndFor
|
||||||
|
\State \Return \False
|
||||||
|
\EndFunction
|
||||||
|
\end{algorithmic}
|
||||||
|
\end{algorithm}
|
||||||
|
|
||||||
|
\begin{algorithm}
|
||||||
|
\caption{Search an element in an array using a while loop}
|
||||||
|
\begin{algorithmic}[1]
|
||||||
|
\Function{Search}{$A$: Array($n$), $E$: element}
|
||||||
|
\State $i \gets 0$
|
||||||
|
\While {$i < n$}
|
||||||
|
\If {$A[i] = E$}
|
||||||
|
\State \Return \True
|
||||||
|
\EndIf
|
||||||
|
\State $i \gets i + 1$
|
||||||
|
\EndWhile
|
||||||
|
\State
|
||||||
|
\Return
|
||||||
|
\False
|
||||||
|
\EndFunction
|
||||||
|
\end{algorithmic}
|
||||||
|
\end{algorithm}
|
||||||
|
|
||||||
|
\begin{algorithm}
|
||||||
|
\caption{Search an element in an array using a while loop (bis)}
|
||||||
|
\begin{algorithmic}[1]
|
||||||
|
\Function{Search}{$A$: Array($n$), $E$: element}
|
||||||
|
% \Comment{Version ``preffered" by the professor}
|
||||||
|
\State $i \gets 0$
|
||||||
|
\While {$i < n$ and $A[i] \neq E$}
|
||||||
|
\State $i \gets i + 1$
|
||||||
|
\EndWhile
|
||||||
|
\If {$i = n$}
|
||||||
|
\State
|
||||||
|
\Return \False \Else \State \Return \True \EndIf
|
||||||
|
\EndFunction
|
||||||
|
\end{algorithmic}
|
||||||
|
\end{algorithm}
|
||||||
|
|
||||||
|
\begin{algorithm}
|
||||||
|
\caption{Count the occurrences of an element in an array}
|
||||||
|
\begin{algorithmic}[1]
|
||||||
|
\Function{Search}{$A$: Array($n$), $E$: element} \State $c \gets 0$
|
||||||
|
\For{($i = 0$; $i < n$; $i++$)}
|
||||||
|
\If {$A[i] = E$}
|
||||||
|
\State $c \gets c + 1$
|
||||||
|
\EndIf
|
||||||
|
\EndFor
|
||||||
|
\State \Return $c$
|
||||||
|
\EndFunction
|
||||||
|
\end{algorithmic}
|
||||||
|
\end{algorithm}
|
161
tmp.tex
161
tmp.tex
|
@ -9,95 +9,98 @@
|
||||||
\algnewcommand{\NIL}{\textbf{\texttt{NIL}}}
|
\algnewcommand{\NIL}{\textbf{\texttt{NIL}}}
|
||||||
\algnewcommand{\NULL}{\textbf{\texttt{null}}}
|
\algnewcommand{\NULL}{\textbf{\texttt{null}}}
|
||||||
\input{definitions.tex}
|
\input{definitions.tex}
|
||||||
|
\usepackage{mathtools}
|
||||||
\begin{document}
|
\begin{document}
|
||||||
|
|
||||||
\begin{algorithm}
|
\begin{algorithm}
|
||||||
\caption{Construct a longest common subsequence matrix keeping the path in memory}
|
\caption{Backtrack a single alignment in a recursive way}
|
||||||
\begin{algorithmic}[1]
|
\begin{algorithmic}[1]
|
||||||
\Function{LCSQ\_Matrix\_Path}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
|
\State $S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$),
|
||||||
\State $M \gets $ Array($m+1$, $n+1$)
|
\Function{BacktrackRecurse}{$i$, $j$}
|
||||||
\State $P \gets $ Array($m+1$, $n+1$)
|
\If {$i > 0$ and $j > 0$}
|
||||||
\For {($i = 0$; $i < n+1$, $i++$)}
|
\State $substitute = M[i-1][j-1]$
|
||||||
\State $M[i][0] \gets 0$
|
\State $delete = M[i-1][j]$
|
||||||
\EndFor
|
\State $insert = M[i][j-1]$
|
||||||
\For {($j = 0$; $j < m+1$; $j+$)}
|
\State $min = \min \{ substitute, delete, insert \}$
|
||||||
\State $M[0][j] \gets 0$
|
\If {$substitute = min$}
|
||||||
\EndFor
|
\State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j-1$}
|
||||||
\For{($i = 1$; $i < n+1$; $i++$)}
|
\State $z = \begin{pmatrix} S_{1}[i-1] \\ S_{2}[j-1] \end{pmatrix} \circ z$
|
||||||
\For{($j = 1$; $j < m+1$; $j++$)}
|
\ElsIf {$delete = min$}
|
||||||
\If {$i = 1$ or $j = 0$}
|
\State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$}
|
||||||
\State $M[i][j] = 0$
|
\State $z = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix} \circ z$
|
||||||
\Else
|
\Else
|
||||||
\If {$S_{1}[i-1] = S_{2}[j-1]$}
|
\State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$}
|
||||||
\State $M[i][j] \gets M[i-1][j-1] + 1$
|
\State $z = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix} \circ z$
|
||||||
\State $P[i][j] \gets '\nwarrow'$
|
|
||||||
\ElsIf {$M[i][j-1] \geq M[i-1][j]$}
|
|
||||||
\State $M[i][j] \gets M[i][j-1]$
|
|
||||||
\State $P[i][j] \gets '\leftarrow'$
|
|
||||||
\Else
|
|
||||||
\State $M[i][j] \gets M[i-1][j]$
|
|
||||||
\State $P[i][j] \gets '\downarrow'$
|
|
||||||
\EndIf
|
\EndIf
|
||||||
\EndIf
|
\ElsIf {$i > 0$}
|
||||||
\EndFor
|
\State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$}
|
||||||
\EndFor
|
\State $z = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix} \circ z$
|
||||||
\State \Return $M, P$
|
\ElsIf {$j > 0$}
|
||||||
|
\State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$}
|
||||||
|
\State $z = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix} \circ z$
|
||||||
|
\Else
|
||||||
|
\State \Return []
|
||||||
|
\EndIf
|
||||||
|
\State \Return $z$
|
||||||
|
\EndFunction
|
||||||
|
\Function{Backtrack}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$)}
|
||||||
|
\State \Return \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $m$, $n$}
|
||||||
\EndFunction
|
\EndFunction
|
||||||
\end{algorithmic}
|
\end{algorithmic}
|
||||||
\end{algorithm}
|
\end{algorithm}
|
||||||
|
|
||||||
|
\begin{algorithm}
|
||||||
|
\caption{Backtrack all the optimum alignments in a recursive way}
|
||||||
|
\begin{algorithmic}[1]
|
||||||
|
\Procedure{Recurse}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$), $i$, $j$}
|
||||||
|
\If {$i > 0$ and $j > 0$}
|
||||||
|
\State $substitute = M[i-1][j-1]$
|
||||||
|
\State $delete = M[i-1][j]$
|
||||||
|
\State $insert = M[i][j-1]$
|
||||||
|
\State $min = \min \{ substitute, delete, insert \}$
|
||||||
|
\If {$substitute = min$}
|
||||||
|
\State $value = \begin{pmatrix} S_{1}[i-1] \\ S_{2}[j-1] \end{pmatrix}$
|
||||||
|
\State $z' = value \circ z$
|
||||||
|
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j-1$, $z'$}
|
||||||
|
\EndIf
|
||||||
|
\If {$delete = min$}
|
||||||
|
\State $value = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix}$
|
||||||
|
\State $z' = value \circ z$
|
||||||
|
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$, $z'$}
|
||||||
|
\EndIf
|
||||||
|
\If {$insert = min$}
|
||||||
|
\State $value = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$
|
||||||
|
\State $z' = value \circ z$
|
||||||
|
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$, $z'$}
|
||||||
|
\EndIf
|
||||||
|
\ElsIf {$i > 0$}
|
||||||
|
\State $value = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix}$
|
||||||
|
\State $z' = value \circ z$
|
||||||
|
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$, $z'$}
|
||||||
|
\ElsIf {$j > 0$}
|
||||||
|
\State $value = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$
|
||||||
|
\State $z' = value \circ z$
|
||||||
|
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$, $z'$}
|
||||||
|
\EndIf
|
||||||
|
\State \Call{print}{$z$}
|
||||||
|
\EndProcedure
|
||||||
|
\Procedure{Backtrack}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$)}
|
||||||
|
\State \Return \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $m$, $n$, []}
|
||||||
|
\EndProcedure
|
||||||
|
\end{algorithmic}
|
||||||
|
\end{algorithm}
|
||||||
|
|
||||||
|
\end{document}
|
||||||
|
|
||||||
|
\end{document}
|
||||||
\iffalse
|
\iffalse
|
||||||
\begin{algorithm}
|
\Function{AppendToAll}{$value$, $set$}
|
||||||
\caption{Backtrack the longest common subsequence}
|
\Returns {A new set with all elements from $set$ with value appended first to them }
|
||||||
\begin{algorithmic}[1]
|
\State $res = \{\}$
|
||||||
\Function{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
|
\For {$element \in set$}
|
||||||
\State $M, P \gets $ \Call{LCSQ\_Matrix}{$S_{1}$, $S_{2}$}
|
\State $element = value \circ element$
|
||||||
\State $L \gets Array(M[n][m])$
|
\State $res = res \cup element$
|
||||||
\State $k \gets 0$
|
\EndFor
|
||||||
\State $i \gets n$
|
\State \Return $res$
|
||||||
\State $j \gets m$
|
|
||||||
\While{$i > 0$ and $j > 0$}
|
|
||||||
\If {$P[i][j] = '\nwarrow' $}
|
|
||||||
\State $L[k] \gets S_{1}[i]$
|
|
||||||
\State $i--$
|
|
||||||
\State $j--$
|
|
||||||
\State $k++$
|
|
||||||
\ElsIf {$P[i][j] = '\leftarrow'$}
|
|
||||||
\State $j--$
|
|
||||||
\Else
|
|
||||||
\State $i--$
|
|
||||||
\EndIf
|
|
||||||
\EndWhile
|
|
||||||
\State \Return $L$
|
|
||||||
\EndFunction
|
\EndFunction
|
||||||
\end{algorithmic}
|
|
||||||
\end{algorithm}
|
|
||||||
|
|
||||||
\begin{algorithm}
|
|
||||||
\caption{Recursive reconstruction of the longest common subsequence}
|
|
||||||
\begin{algorithmic}[1]
|
|
||||||
\Procedure{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)}
|
|
||||||
\State $M, P \gets $ \Call{LCSQ\_Matrix}{$S_{1}$, $S_{2}$}
|
|
||||||
\State $i \gets n$
|
|
||||||
\State $j \gets m$
|
|
||||||
\State \Call{Aux}{$P$, $S_{1}$, $i$, $j$}
|
|
||||||
\EndProcedure
|
|
||||||
|
|
||||||
\Procedure{Aux}{$P$: Array($n+1$, $m+1$), $S_{1}$: Array($n$), $i$, $j$}
|
|
||||||
\If {$P[i][j] = '\nwarrow' $}
|
|
||||||
\State $l \gets S_{1}[i]$
|
|
||||||
\State \Call{Aux}{$P$, $S_{1}$, $i-1$, $j-1$}
|
|
||||||
\State \texttt{print}($l$)
|
|
||||||
\ElsIf {$P[i][j] = '\leftarrow'$}
|
|
||||||
\State \Call{Aux}{$P$, $S_{1}$, $i$, $j-1$}
|
|
||||||
\Else
|
|
||||||
\State \Call{Aux}{$P$, $S_{1}$, $i-1$, $j$}
|
|
||||||
\EndIf
|
|
||||||
\EndProcedure
|
|
||||||
\end{algorithmic}
|
|
||||||
\end{algorithm}
|
|
||||||
\fi
|
\fi
|
||||||
\end{document}
|
|
||||||
|
|
||||||
\end{document}
|
|
||||||
|
|
Loading…
Reference in New Issue