Improve typography and formatting
This commit is contained in:
parent
a35cd3ea43
commit
5f909eb955
|
@ -1,4 +1,5 @@
|
|||
build/
|
||||
.bak*
|
||||
.auctex-auto
|
||||
|
||||
## Core latex/pdflatex auxiliary files:
|
||||
|
|
|
@ -1,58 +1,63 @@
|
|||
\chapter{Back to basics}
|
||||
|
||||
\begin{algorithm}
|
||||
\caption{Search an element in an array}
|
||||
\begin{algorithmic}[1]
|
||||
\Function{Search}{$A$: Array($n$), $E$: element}
|
||||
\For {($i = 0$; $i < n$; $i++$)}
|
||||
\If {$A[i] = E$}
|
||||
\State \Return \True
|
||||
\EndIf
|
||||
\EndFor
|
||||
\State \Return \False
|
||||
\EndFunction
|
||||
\end{algorithmic}
|
||||
\caption{Search an element in an array}
|
||||
\begin{algorithmic}[1]
|
||||
\Function{Search}{$A$: Array($n$), $E$: element}
|
||||
\For {($i = 0$; $i < n$; $i++$)}
|
||||
\If {$A[i] = E$}
|
||||
\State \Return \True
|
||||
\EndIf
|
||||
\EndFor
|
||||
\State \Return \False
|
||||
\EndFunction
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
|
||||
\begin{algorithm}
|
||||
\caption{Search an element in an array using a while loop}
|
||||
\begin{algorithmic}[1]
|
||||
\Function{Search}{$A$: Array($n$), $E$: element}
|
||||
\State $i \gets 0$
|
||||
\While {$i < n$}
|
||||
\If {$A[i] = E$}
|
||||
\State \Return \True
|
||||
\EndIf
|
||||
\State $i \gets i + 1$
|
||||
\EndWhile
|
||||
\State
|
||||
\Return
|
||||
\False
|
||||
\EndFunction
|
||||
\end{algorithmic}
|
||||
\caption{Search an element in an array using a while loop}
|
||||
\begin{algorithmic}[1]
|
||||
\Function{Search}{$A$: Array($n$), $E$: element}
|
||||
\State $i \gets 0$
|
||||
\While {$i < n$}
|
||||
\If {$A[i] = E$}
|
||||
\State \Return \True
|
||||
\EndIf
|
||||
\State $i \gets i + 1$
|
||||
\EndWhile
|
||||
\State
|
||||
\Return
|
||||
\False
|
||||
\EndFunction
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
|
||||
\begin{algorithm}
|
||||
\caption{Search an element in an array using a while loop (bis)}
|
||||
\begin{algorithmic}[1]
|
||||
\Function{Search}{$A$: Array($n$), $E$: element}
|
||||
% \Comment{Version ``preffered" by the professor}
|
||||
\State $i \gets 0$
|
||||
\While {$i < n$ and $A[i] \neq E$}
|
||||
\State $i \gets i + 1$
|
||||
\EndWhile
|
||||
\If {$i = n$}
|
||||
\State
|
||||
\Return \False \Else \State \Return \True \EndIf
|
||||
\EndFunction
|
||||
\end{algorithmic}
|
||||
\caption{Search an element in an array using a while loop (bis)}
|
||||
\begin{algorithmic}[1]
|
||||
\Function{Search}{$A$: Array($n$), $E$: element}
|
||||
% \Comment{Version ``preffered" by the professor}
|
||||
\State $i \gets 0$
|
||||
\While {$i < n$ and $A[i] \neq E$}
|
||||
\State $i \gets i + 1$
|
||||
\EndWhile
|
||||
\If {$i = n$}
|
||||
\State
|
||||
\Return \False \Else \State \Return \True \EndIf
|
||||
\EndFunction
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
|
||||
\begin{algorithm}
|
||||
\caption{Count the occurrences of an element in an array}
|
||||
\begin{algorithmic}[1]
|
||||
\Function{Search}{$A$: Array($n$), $E$: element} \State $c \gets 0$
|
||||
\For{($i = 0$; $i < n$; $i++$)} \If {A[i] $ = $ E} \State $c \gets c + 1$ \EndIf
|
||||
\EndFor \State \Return $c$ \EndFunction
|
||||
\end{algorithmic}
|
||||
\caption{Count the occurrences of an element in an array}
|
||||
\begin{algorithmic}[1]
|
||||
\Function{Search}{$A$: Array($n$), $E$: element} \State $c \gets 0$
|
||||
\For{($i = 0$; $i < n$; $i++$)}
|
||||
\If {$A[i] = E$}
|
||||
\State $c \gets c + 1$
|
||||
\EndIf
|
||||
\EndFor
|
||||
\State \Return $c$
|
||||
\EndFunction
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
|
|
|
@ -1,77 +1,74 @@
|
|||
\chapter{Motif}
|
||||
|
||||
\begin{algorithm}
|
||||
\caption{Brute-force search of a motif in a sequence}
|
||||
\begin{algorithmic}[1]
|
||||
\Function{FindMotif}{S: Array(n), M: Array(m)} {
|
||||
\Returns{list of position}
|
||||
\State $pos \gets \{\}$
|
||||
\State $i \gets 0$
|
||||
\While {$i < n - m + 1$} {
|
||||
\State $j \gets 0$
|
||||
\While {$j < m$ and S[i+j] $ = $ M[j]} {
|
||||
\State $j++$
|
||||
}
|
||||
\EndWhile
|
||||
\If {$j = m$}
|
||||
\State $pos \gets pos \cup \{i\}$
|
||||
\EndIf
|
||||
\State $i++$
|
||||
}
|
||||
\EndWhile
|
||||
\State \Return $pos$
|
||||
}
|
||||
\EndFunction
|
||||
\end{algorithmic}
|
||||
\label{alg:naive-motif-matching}
|
||||
\caption{Brute-force search of a motif in a sequence}
|
||||
\begin{algorithmic}[1]
|
||||
\Function{FindMotif}{$S$: Array($n$), $M$: Array($m$)}
|
||||
\Returns{a list of position}
|
||||
\State $pos \gets \{\}$
|
||||
\State $i \gets 0$
|
||||
\While {$i < n - m + 1$}
|
||||
\State $j \gets 0$
|
||||
\While {$j < m$ and $S[i+j] = M[j]$}
|
||||
\State $j++$
|
||||
\EndWhile
|
||||
\If {$j = m$}
|
||||
\State $pos \gets pos \cup \{i\}$
|
||||
\EndIf
|
||||
\State $i++$
|
||||
\EndWhile
|
||||
\State \Return $pos$
|
||||
\EndFunction
|
||||
\end{algorithmic}
|
||||
\label{alg:naive-motif-matching}
|
||||
\end{algorithm}
|
||||
|
||||
\begin{algorithm}
|
||||
\caption{Knuth-Morris-Pratt algorithm}
|
||||
\begin{algorithmic}[1]
|
||||
\Function{KMP\_Search}{S: Array(n), M: Array(m)}
|
||||
\Returns{Integer}
|
||||
\State $table \gets \Call{KMP\_Table}{M}$
|
||||
\State $c \gets 0$ \Comment{Count the number of matches}
|
||||
\State $i \gets 0$
|
||||
\State $j \gets 0$
|
||||
\While {$i < n$}
|
||||
\If{$S[i] = M[i]$}
|
||||
\State $i \gets i + 1$
|
||||
\State $j \gets j + 1$
|
||||
\EndIf
|
||||
\If {$j = m$}
|
||||
\State $c \gets c + 1$
|
||||
\State $j \gets table[j-1]$
|
||||
\ElsIf {$j < n$ and $M[j] \neq S[i]$}
|
||||
\If {$j \neq 0$}
|
||||
\State $j \gets table[j-1]$
|
||||
\Else
|
||||
\State $i \gets i + 1$
|
||||
\EndIf
|
||||
\EndIf`
|
||||
\EndWhile
|
||||
\State \Return $c$
|
||||
\EndFunction
|
||||
\caption{Knuth-Morris-Pratt algorithm}
|
||||
\begin{algorithmic}[1]
|
||||
\Function{KMP\_Search}{$S$: Array($n$), $M$: Array($m$)}
|
||||
\Returns{Integer}
|
||||
\State $table \gets$ \Call{KMP\_Table}{$M$}
|
||||
\State $c \gets 0$ \Comment{Count the number of matches}
|
||||
\State $i \gets 0$
|
||||
\State $j \gets 0$
|
||||
\While {$i < n$}
|
||||
\If{$S[i] = M[i]$}
|
||||
\State $i \gets i + 1$
|
||||
\State $j \gets j + 1$
|
||||
\EndIf
|
||||
\If {$j = m$}
|
||||
\State $c \gets c + 1$
|
||||
\State $j \gets table[j-1]$
|
||||
\ElsIf {$j < n$ and $M[j] \neq S[i]$}
|
||||
\If {$j \neq 0$}
|
||||
\State $j \gets table[j-1]$
|
||||
\Else
|
||||
\State $i \gets i + 1$
|
||||
\EndIf
|
||||
\EndIf`
|
||||
\EndWhile
|
||||
\State \Return $c$
|
||||
\EndFunction
|
||||
|
||||
\Function{KMP\_Table}{M: Array(m)}
|
||||
\State \textbf{Returns} Array(m)
|
||||
\State $previous \gets 0$
|
||||
\State $table \gets $ array of zeros of size m
|
||||
\For {$i = 0$; $i < m$; $i++$}
|
||||
\If {$M[i] = M[previous]$}
|
||||
\State $previous \gets previous + 1$
|
||||
\State $table[i] \gets previous$
|
||||
\State $i \gets i + 1$
|
||||
\Else
|
||||
\If {$previous = 0$}
|
||||
\State $previous \gets table[previous - 1]$
|
||||
\Else
|
||||
\State $table[i] \gets 0$
|
||||
\State $i \gets 1$
|
||||
\EndIf
|
||||
\EndIf
|
||||
\EndFor
|
||||
\EndFunction
|
||||
\end{algorithmic}
|
||||
\Function{KMP\_Table}{M: Array(m)}
|
||||
\State \textbf{Returns} Array(m)
|
||||
\State $previous \gets 0$
|
||||
\State $table \gets $ array of zeros of size m
|
||||
\For {$i = 0$; $i < m$; $i++$}
|
||||
\If {$M[i] = M[previous]$}
|
||||
\State $previous \gets previous + 1$
|
||||
\State $table[i] \gets previous$
|
||||
\State $i \gets i + 1$
|
||||
\Else
|
||||
\If {$previous = 0$}
|
||||
\State $previous \gets table[previous - 1]$
|
||||
\Else
|
||||
\State $table[i] \gets 0$
|
||||
\State $i \gets 1$
|
||||
\EndIf
|
||||
\EndIf
|
||||
\EndFor
|
||||
\EndFunction
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
|
|
|
@ -0,0 +1,281 @@
|
|||
\chapter{Matrices}
|
||||
|
||||
Let $S_{1}$ and $S_{2}$ be two sequences.
|
||||
|
||||
$S_{1} = $ ACGUUCC
|
||||
$S_{2} = $ GUU
|
||||
|
||||
\begin{table}
|
||||
\centering
|
||||
\begin{tabular}{c|ccccccc}
|
||||
& A & C & G & U & U & C & C \\
|
||||
\hline
|
||||
G & 0 & 0 & 1 & 0 & 0 & 0 & 0 \\
|
||||
U & 0 & 0 & 0 & 1 & 1 & 0 & 0 \\
|
||||
U & 0 & 0 & 0 & 1 & 1 & 0 & 0
|
||||
\end{tabular}
|
||||
\caption{Comparison matrix}
|
||||
\end{table}
|
||||
|
||||
|
||||
Let $n = |S_{1}|$, $m = |S_{2}|$
|
||||
The complexity of this algorithm is $\mathcal{O}(n \cdot m)$ to build the matrix, and it requires also to find the diagonals and thus it is a bit less efficient than the \autoref{alg:naive-motif-matching}.
|
||||
|
||||
|
||||
To find repetitions, we can use a comparison matrix with a single sequence against itself. A repetition would appear as a diagonal of ones, not on the main diagonal.
|
||||
|
||||
Let $S = $ ACGUUACGUU. Let's write the comparison matrix.
|
||||
|
||||
|
||||
\begin{table}
|
||||
\includegraphics{figures/part1/comparison_matrix_repetitions.pdf}
|
||||
\caption{Comparison matrix for $seq = $``ACGUUACGUUGUU"}
|
||||
\end{table}
|
||||
|
||||
|
||||
|
||||
\begin{algorithm}
|
||||
\caption{Construct a comparison matrix}
|
||||
\begin{algorithmic}[1]
|
||||
\Function{ComparisonMatrix}{$S$: Array($n$)}
|
||||
\State $M \gets $ Array($n$, $n$)
|
||||
\For{($i = 0$; $i < n$; $i++$)}
|
||||
\For{$j = 0$; $j < n$; $j++$}
|
||||
\If {$S[i] = S[j]$}
|
||||
\State $M[i][j] = 1$
|
||||
\Else
|
||||
\State $M[i][j] = 0$
|
||||
\EndIf
|
||||
\EndFor
|
||||
\EndFor
|
||||
\State \Return $M$
|
||||
\EndFunction
|
||||
\end{algorithmic}
|
||||
|
||||
\end{algorithm}
|
||||
\begin{algorithm}
|
||||
\caption{Construct the top half of a comparison matrix}
|
||||
\begin{algorithmic}[1]
|
||||
\Function{ComparisonMatrix}{$S$: Array($n$)}
|
||||
\State $M \gets$ Array($n$,$n$)
|
||||
\For{($i = 0$; $i < n$; $i++$)}
|
||||
\For{j=i; j < n; j++}
|
||||
\If {S[i] = S[j]}
|
||||
\State M[i][j] = 1
|
||||
\Else
|
||||
\State M[i][j] = 0
|
||||
\EndIf
|
||||
\EndFor
|
||||
\EndFor
|
||||
\State \Return M
|
||||
\EndFunction
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
|
||||
|
||||
|
||||
\begin{algorithm}
|
||||
\caption{Find repetitions (with a set of visited segments)}
|
||||
\begin{algorithmic}[1]
|
||||
\Function{FindRepetions}{$S$: Array($n$)}
|
||||
\Returns{A list of start and end positions for repeated sequences}
|
||||
\State $M = $ \Call{ComparisonMatrix}{S}
|
||||
\State $pos = \{\}$
|
||||
\State $visited = \{\}$
|
||||
\For {($i_{start} = 0$; $i_{start} < n$; $i_{start}++$)}
|
||||
\For {($j_{start} = i_{start}+1$; $j_{start} < n$; $j_{start}++$)}
|
||||
\If{$M[i_{start}][j_{start}] = 1$ and $(i_{start}, j_{start}) \notin visited$}
|
||||
\State $i = i_{start}$
|
||||
\State $j = j_{start}$
|
||||
\While {$M[i][j] = 1$}
|
||||
\State $i++$
|
||||
\State $j++$
|
||||
\State $visited = visited \cup \{(i, j)\}$
|
||||
\EndWhile
|
||||
\State $pos = pos \cup \{(i_{start}, i), (j_{start},j)\}$
|
||||
\EndIf
|
||||
\EndFor
|
||||
\EndFor
|
||||
\EndFunction
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
|
||||
|
||||
\begin{algorithm}
|
||||
\caption{Find repetitions with an exploration of diagonals}
|
||||
\begin{algorithmic}[1]
|
||||
\Function{FindRepetions}{$S$: Array($n$)}
|
||||
\Returns{A list of start and end positions for repeted sequences}
|
||||
\State $M$ = \Call{ComparisonMatrix}{S}
|
||||
\State $pos = \{\}$
|
||||
\For {($diag = 1$; $diag < n$; $diag++$)}
|
||||
\State $j = diag$
|
||||
\State $i = 0$
|
||||
\While {$i < n$ and $j < n$}
|
||||
\If {$M[i][j] = 1$}
|
||||
\State $i_{start} = i$
|
||||
\State $j_{start} = j$
|
||||
\While {$i < n$ and $j < n$ and $M[i][j] = 1$}
|
||||
\State i++
|
||||
\State j++
|
||||
\EndWhile
|
||||
\State $pos = pos \cup \{((i_{start},i-1),(j_{start},j-1))\}$
|
||||
\EndIf
|
||||
\State $i++$
|
||||
\State $j++$
|
||||
\State
|
||||
\EndWhile
|
||||
\EndFor
|
||||
\EndFunction
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
|
||||
\begin{algorithm}
|
||||
\caption{Find repetitions with an exploration of diagonals, without nested while}
|
||||
\begin{algorithmic}[1]
|
||||
\Function{FindRepetions}{$S$: Array($n$)}
|
||||
\Returns{A list of start positions for repeted sequences and match length}
|
||||
\State $M$ = \Call{ComparisonMatrix}{S}
|
||||
\State $pos = \{\}$
|
||||
\For {($diag = 1$; $diag < n$; $diag++$)}
|
||||
\State $j = diag$
|
||||
\State $i = 0$
|
||||
\State $l = 0$
|
||||
\While {$i < n$ and $j < n$}
|
||||
\If {$M[i][j] = 1$}
|
||||
\State $l++$
|
||||
\Else
|
||||
\If {$l > 0$}
|
||||
\State $pos = pos \cup \{(i-l,j-l,l)\}$
|
||||
\State $l = 0$
|
||||
\EndIf
|
||||
\EndIf
|
||||
\State $i++$
|
||||
\State $j++$
|
||||
\EndWhile
|
||||
\If {$l > 0$}
|
||||
\State $pos = pos \cup \{((i-l,j-l,l))\}$
|
||||
\EndIf
|
||||
\EndFor
|
||||
\State \Return $pos$
|
||||
\EndFunction
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
|
||||
|
||||
\begin{algorithm}
|
||||
\caption{Find repetitions}
|
||||
\begin{algorithmic}[1]
|
||||
\Function{FindRepetions}{$S$: Array($n$)}
|
||||
\Returns{A list of start and end positions for repeted sequences}
|
||||
\State $M$ = \Call{ComparisonMatrix}{S}
|
||||
\State $pos = \{\}$
|
||||
\For {$i_{start} = 0$; $i_{start} < n$; $i_{start}++$}
|
||||
\For {$j_{start} = i_{start}+1$; $j_{start} < n$; $j_{start}++$}
|
||||
\If{$M[i_{start}][j_{start}] = 1$}
|
||||
\State $i = i_{start}$
|
||||
\State $j = j_{start}$
|
||||
\While {$M[i][j] = 1$}
|
||||
\State $M[i][j] = 0$ \Comment{Ensure that the segment is not explored again}
|
||||
\State $i++$
|
||||
\State $j++$
|
||||
\EndWhile
|
||||
\State $pos = pos \cup \{((i_{start}, i-1), (j_{start},j-1))\}$
|
||||
\EndIf
|
||||
\EndFor
|
||||
\EndFor
|
||||
\EndFunction
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
|
||||
|
||||
\section{Automata}
|
||||
|
||||
|
||||
An automaton is a tuple $\langle S, s_{0}, T, \Sigma,f\rangle$
|
||||
\begin{itemize}
|
||||
\item $S$ the set of states
|
||||
\item $s_{0}$ the initial state
|
||||
\item $T$ the set of terminal states
|
||||
\item $\Sigma$ the alphabet
|
||||
\item $f$ the transition function $f: (s_{1}, c) \to s_{2}$
|
||||
\end{itemize}
|
||||
|
||||
\paragraph{Example} Given the language $L$ on the alphabet $\Sigma = \{A, C, T\}$, $L = \{A^{*}, CTT, CA^{*}\}$
|
||||
|
||||
\begin{definition}[Deterministic automaton]
|
||||
An automaton is deterministic, if for each couple $(p, a) \in S \times \Sigma$ it exists at most a state $q$ such as $f(p, q) = q$
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}[Complete automaton]
|
||||
An automaton is complete, if for each couple $(p, a) \in S \times \Sigma$ it exists at least a state $q$ such as $f(p, q) = q$.
|
||||
\end{definition}
|
||||
|
||||
\begin{algorithm}
|
||||
\caption{Check wether a word belong to a language for which we have an automaton}
|
||||
\begin{algorithmic}[1]
|
||||
\Function{WordInLanguage}{$W$: Array($n$), $A$: $\langle S, s_{0}, T, \Sigma,f \rangle$}
|
||||
\Returns{A Boolean valued to \True{} if the word is recognized by the language automaton}
|
||||
\State $s \gets s_{0}$
|
||||
\State $i \gets 0$
|
||||
\While {$i < n$}
|
||||
\State $a \gets W[i]$
|
||||
\If {$\exists f(s, a)$}
|
||||
\State $s \gets f(s, a)$
|
||||
\Else
|
||||
\State \Return \False
|
||||
\EndIf
|
||||
\State i++
|
||||
\EndWhile
|
||||
\If {$s \in T$}
|
||||
\State \Return \True
|
||||
\Else
|
||||
\State \Return \False
|
||||
\EndIf
|
||||
\EndFunction
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
|
||||
\section{Suffix Automaton}
|
||||
|
||||
Let $S = $ AACTACT
|
||||
|
||||
A suffix automata recognize all suffix of a given sequence.
|
||||
|
||||
|
||||
The suffix language of $S$ is $\{S, ACTACT, CTACT, TACT, ACT, CT, T\}$.
|
||||
|
||||
|
||||
\begin{figure}
|
||||
\centering
|
||||
\includegraphics{figures/part1/minimal_suffix_automaton_exercise.pdf}
|
||||
\caption{Suffix automaton for $S = $ AACTACT}
|
||||
\end{figure}
|
||||
|
||||
\begin{figure}
|
||||
\centering
|
||||
\includegraphics{figures/part1/minimal_suffix_automaton_exercise_bis.pdf}
|
||||
\caption{Suffix automaton for $S = $ TCATCATT}
|
||||
\end{figure}
|
||||
|
||||
\begin{algorithm}
|
||||
\caption{Check if a sequences matches a motif, from a suffix automaton $\mathcal{O}(m)$}
|
||||
\begin{algorithmic}[1]
|
||||
\Function{CheckMotifInSuffixAutomaton}{$W$: Array($m$), $A$: $\langle S, s_{0}, T, \Sigma,f \rangle$}
|
||||
\Returns{Boolean valued to \True{} if the motif is in the sequence}
|
||||
\State $s \gets s_{0}$
|
||||
\State $i \gets 0$
|
||||
\While {$i < m$ and $\exists f(s, W[i])$}
|
||||
\State $s \gets f(s, W[i])$
|
||||
\State $i++$
|
||||
\EndWhile
|
||||
\If {$i=n$}
|
||||
\State \Return \True
|
||||
\Else
|
||||
\State \Return \False
|
||||
\EndIf
|
||||
\EndFunction
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
The complexity of the pattern matching algorithm is $\mathcal{O}(n + m)$, because building the automaton is $\mathcal{O}(m)$
|
||||
|
|
@ -1,289 +0,0 @@
|
|||
\chapter{Matrices}
|
||||
|
||||
Let $S_{1}$ and $S_{2}$ be two sequences.
|
||||
|
||||
$S_{1} = $ ACGUUCC
|
||||
$S_{2} = $ GUU
|
||||
|
||||
\begin{table}
|
||||
\centering
|
||||
\begin{tabular}{c|ccccccc}
|
||||
& A & C & G & U & U & C & C \\
|
||||
\hline
|
||||
G & 0 & 0 & 1 & 0 & 0 & 0 & 0 \\
|
||||
U & 0 & 0 & 0 & 1 & 1 & 0 & 0 \\
|
||||
U & 0 & 0 & 0 & 1 & 1 & 0 & 0
|
||||
\end{tabular}
|
||||
\caption{Comparison matrix}
|
||||
\end{table}
|
||||
|
||||
|
||||
Let $n = |S_{1}|$, $m = |S_{2}|$
|
||||
The complexity of this algorithm is $\mathcal{O}(n \cdot m)$ to build the matrix, and it requires also to find the diagonals and thus it is a bit less efficient than the \autoref{alg:naive-motif-matching}.
|
||||
|
||||
|
||||
To find repetitions, we can use a comparison matrix with a single sequence against itself. A repetition would appear as a diagonal of ones, not on the main diagonal.
|
||||
|
||||
Let $S = $ ACGUUACGUU. Let's write the comparison matrix.
|
||||
|
||||
|
||||
\begin{table}
|
||||
\includegraphics{figures/part1/comparison_matrix_repetitions.pdf}
|
||||
\caption{Comparison matrix for seq="ACGUUACGUUGUU"}
|
||||
\end{table}
|
||||
|
||||
|
||||
|
||||
\begin{algorithm}
|
||||
\caption{Construct a comparison matrix}
|
||||
\begin{algorithmic}[1]
|
||||
\Function{ComparisonMatrix}{S: Array(n)}
|
||||
\State $M \gets Array(n, n)$
|
||||
\For{$i = 0$; $i < n$; $i++$}
|
||||
\For{$j=0$; $j < n$; $j++$}
|
||||
\If {S[i] $ = $ S[j]}
|
||||
\State $M[i][j] = 1$
|
||||
\Else
|
||||
\State $M[i][j] = 0$
|
||||
\EndIf
|
||||
\EndFor
|
||||
\EndFor
|
||||
\State \Return $M$
|
||||
\EndFunction
|
||||
\end{algorithmic}
|
||||
|
||||
\end{algorithm}
|
||||
\begin{algorithm}
|
||||
\caption{Construct the top half of comparison matrix}
|
||||
\begin{algorithmic}[1]
|
||||
\Function{ComparisonMatrix}{S: Array(n)}
|
||||
\State $M \gets Array(n,n)$
|
||||
\For{i = 0; i < n; i++}
|
||||
\For{j=i; j < n; j++}
|
||||
\If {S[i] = S[j]}
|
||||
\State M[i][j] = 1
|
||||
\Else
|
||||
\State M[i][j] = 0
|
||||
\EndIf
|
||||
\EndFor
|
||||
\EndFor
|
||||
\State \Return M
|
||||
\EndFunction
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
|
||||
|
||||
|
||||
\begin{algorithm}
|
||||
\caption{Find repetitions (with the set of visited segments)}
|
||||
\begin{algorithmic}[1]
|
||||
\Function{FindRepetions}{S: Array(n)}
|
||||
\Returns{A list of start and end positions for repeted sequences}
|
||||
\State M = \Call{ComparisonMatrix}{S}
|
||||
\State pos = \{\}
|
||||
\State visited = \{\}
|
||||
\For {i_{start} = 0; i_{start} < n; i_{start}++}
|
||||
\For {j_{start} = i_{start}+1; j_{start} < n; j_{start}++}
|
||||
\If{M[i_{start}][j_{start}] = 1 and (i_{start}, j_{start}) \notin visited}
|
||||
\State i = i_{start}
|
||||
\State j = j_{start}
|
||||
\While {M[i][j] = 1}
|
||||
\State i++
|
||||
\State j++
|
||||
\State visited = visited \cup \{(i, j)\}
|
||||
\EndWhile
|
||||
\State pos = pos \cup \{(i_{start}, i), (j_{start},j)\}
|
||||
\EndIf
|
||||
\EndFor
|
||||
\EndFor
|
||||
\EndFunction
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
|
||||
|
||||
\begin{algorithm}
|
||||
\caption{Find repetitions with an exploration of diagonals}
|
||||
\begin{algorithmic}[1]
|
||||
\Function{FindRepetions}{S: Array(n)}
|
||||
\Returns{A list of start and end positions for repeted sequences}
|
||||
\State M = \Call{ComparisonMatrix}{S}
|
||||
\State $pos = \{\}$
|
||||
\For {diag = 1; diag < n; diag++}
|
||||
\State j = diag
|
||||
\State i = 0
|
||||
\While {i < n and j < n}
|
||||
\If {M[i][j] = 1}
|
||||
\State $i_{start} = i$
|
||||
\State $j_{start} = j$
|
||||
\While {i < n and j < n and M[i][j] = 1 }
|
||||
\State i++
|
||||
\State j++
|
||||
\EndWhile
|
||||
\State $pos = pos \cup \{((i_{start},i-1),(j_{start},j-1)\}$
|
||||
\EndIf
|
||||
\State i++
|
||||
\State j++
|
||||
\State
|
||||
\EndWhile
|
||||
\EndFor
|
||||
\EndFunction
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
|
||||
\begin{algorithm}
|
||||
\caption{Find repetitions with an exploration of diagonals, without nested while}
|
||||
\begin{algorithmic}[1]
|
||||
\Function{FindRepetions}{S: Array(n)}
|
||||
\Returns{A list of start positions for repeted sequences and match length}
|
||||
\State M = \Call{ComparisonMatrix}{S}
|
||||
\State $pos = \{\}$
|
||||
\For {diag = 1; diag < n; diag++}
|
||||
\State j = diag
|
||||
\State i = 0
|
||||
\State l = 0
|
||||
\While {i < n and j < n}
|
||||
\If {M[i][j] = 1}
|
||||
\State l++
|
||||
\Else
|
||||
\If {l > 0}
|
||||
\State $pos = pos \cup \{((i-l,j-l,l)\}$
|
||||
\State l = 0
|
||||
\EndIf
|
||||
\EndIf
|
||||
\State i++
|
||||
\State j++
|
||||
\EndWhile
|
||||
\If {$l > 0$}
|
||||
\State $pos = pos \cup \{((i-l,j-l,l)\}$
|
||||
\EndIf
|
||||
\EndFor
|
||||
\State \Return pos
|
||||
\EndFunction
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
|
||||
|
||||
\begin{algorithm}
|
||||
\caption{Find repetitions}
|
||||
\begin{algorithmic}[1]
|
||||
\Function{FindRepetions}{S: Array(n)}
|
||||
\Returns{A list of start and end positions for repeted sequences}
|
||||
\State M = \Call{ComparisonMatrix}{S}
|
||||
\State pos = \{\}
|
||||
\For {$i_{start} = 0$; $i_{start} < n$; $i_{start}++$}
|
||||
\For {$j_{start} = i_{start}+1$; $j_{start} < n$; $j_{start}++$}
|
||||
\If{$M[i_{start}][j_{start}] = 1$}
|
||||
\State $i = i_{start}$
|
||||
\State $j = j_{start}$
|
||||
\While {M[i][j] = 1}
|
||||
\State M[i][j] = 0 \Comment{Ensure that the segment is not explored again}
|
||||
\State i++
|
||||
\State j++
|
||||
\EndWhile
|
||||
\State $pos = pos \cup \{((i_{start}, i-1), (j_{start},j-1))\}$
|
||||
\EndIf
|
||||
\EndFor
|
||||
\EndFor
|
||||
\EndFunction
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
|
||||
|
||||
\section{Automata}
|
||||
|
||||
|
||||
An automaton is a tuple $\langle S, s_{0}, T, \Sigma,f\langle$
|
||||
\begin{itemize}
|
||||
\item $S$ the set of states
|
||||
\item $s_{0}$ the initial state
|
||||
\item $T$ the set of terminal states
|
||||
\item $\Sigma$ the alphabet
|
||||
\item $f$ the transition function $f: (s_{1}, c) \to s_{2}$
|
||||
\end{itemize}
|
||||
|
||||
\paragraph{Example} Given the language $L$ on the alphabet $\Sigma = \{A, C, T\}$, $L = \{A^{*}, CTT, CA^{*}\}$
|
||||
|
||||
\begin{definition}[Deterministic automaton]
|
||||
An automaton is deterministic, if for each couple $(p, a) \in S \times \Sigma$ it exists at most a state $q$ such as $f(p, q) = q$
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}[Complete automaton]
|
||||
An automaton is complete, if for each couple $(p, a) \in S \times \Sigma$ it exists at least a state $q$ such as $f(p, q) = q$.
|
||||
\end{definition}
|
||||
\begin{algorithm}
|
||||
\caption{Check wether a word belong to a language for which we have an automaton}
|
||||
\begin{algorithmic}[1]
|
||||
\Function{WordInLanguage}{W: Array(n), A: $\langle S, s_{0}, T, \Sigma,f \rangle$}
|
||||
\Returns{A Boolean valued to \True{} if the word is recognized by the language automaton}
|
||||
\State $s \gets s_{0}$
|
||||
\State $i \gets 0$
|
||||
\While {i < n} {
|
||||
|
||||
\State $a \gets W[i]$
|
||||
\If {$\exists f(s, a)$} {
|
||||
\State $s \gets f(s, a)$
|
||||
\Else
|
||||
\State \Return \False
|
||||
}
|
||||
\EndIf
|
||||
\State i++
|
||||
}
|
||||
\EndWhile
|
||||
\If {$s \in T$} {
|
||||
\State \Return \True
|
||||
}
|
||||
\Else {
|
||||
|
||||
\State \Return \False
|
||||
}
|
||||
\EndIf
|
||||
\EndFunction
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
|
||||
\section{Suffix Automaton}
|
||||
|
||||
Let $S = $ AACTACT
|
||||
|
||||
A suffix automata recognize all suffix of a given sequence.
|
||||
|
||||
|
||||
The suffix language of $S$ is $\{S, ACTACT, CTACT, TACT, ACT, CT, T\}$.
|
||||
|
||||
|
||||
\begin{figure}
|
||||
\centering
|
||||
\includegraphics{figures/part1/minimal_suffix_automaton_exercise.pdf}
|
||||
\caption{Suffix automaton for $S = $ AACTACT}
|
||||
\end{figure}
|
||||
|
||||
\begin{figure}
|
||||
\centering
|
||||
\includegraphics{figures/part1/minimal_suffix_automaton_exercise_bis.pdf}
|
||||
\caption{Suffix automaton for $S = $ TCATCATT}
|
||||
\end{figure}
|
||||
|
||||
\end{definition}
|
||||
\begin{algorithm}
|
||||
\begin{algorithm}
|
||||
\caption{Check if a sequences matches a motif, from a suffix automaton $\mathcal{O}(m)$}
|
||||
\begin{algorithmic}[1]
|
||||
\Function{CheckMotifInSuffixAutomaton}{W: Array(m), A: $\langle S, s_{0}, T, \Sigma,f \rangle$}
|
||||
\Returns{Boolean valued to \True{} if the motif is in the sequence}
|
||||
\State $s \gets s_{0}$
|
||||
\State $i \gets 0$
|
||||
\While {i < m and $\exists f(s, W[i])$}
|
||||
\State $s \gets f(s, W[i])$
|
||||
\State i++
|
||||
\EndWhile
|
||||
\If {i=n}
|
||||
\State \Return \True
|
||||
\Else
|
||||
\State \Return \False
|
||||
\EndIf
|
||||
\EndFunction
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
|
||||
The complexity of the pattern matching algorithm is $\mathcal{O}(n + m)$, because building the automaton is $\mathcal{O}(m)$
|
||||
\end{algorithm}
|
|
@ -1,15 +0,0 @@
|
|||
defaultIndent: " "
|
||||
specialBeginEnd:
|
||||
If:
|
||||
begin: '\\If'
|
||||
middle: '\\ElsIf'
|
||||
end: '\\EndIf'
|
||||
lookForThis: 1
|
||||
While:
|
||||
begin: '\\While'
|
||||
end: '\\EndWhile'
|
||||
lookForThis: 1
|
||||
For:
|
||||
begin: '\\For'
|
||||
end: '\\EndFor'
|
||||
specialBeforeCommand: 1
|
Loading…
Reference in New Issue