2024-03-15 11:40:26 +01:00
|
|
|
\chapter{Automata for motif search}
|
|
|
|
|
|
|
|
Let $M$ be a motif $M = $ ACAT.
|
|
|
|
|
|
|
|
\begin{figure}
|
|
|
|
\centering
|
2024-03-19 13:11:18 +01:00
|
|
|
\includegraphics{./figures/part1/motif_search_automaton.pdf}
|
2024-03-15 11:40:26 +01:00
|
|
|
\caption{Motif search automaton for $M = $ ACAT}
|
|
|
|
\end{figure}
|
|
|
|
|
|
|
|
The alphabet of motif is the same as the alphabet of the sequence.
|
|
|
|
The search automaton is complete.
|
|
|
|
If the there exists a letter $c$ in the sequence that is not
|
|
|
|
in the motif alphabet, we can make a virtual transition from
|
|
|
|
each state to the initial state whenever we encounter an unknown letter.
|
|
|
|
|
|
|
|
\begin{algorithm}
|
|
|
|
\caption{Search a motif in a sequence with an automaton}
|
|
|
|
\begin{algorithmic}[1]
|
|
|
|
\Function{SearchMotif}{$S$: Array($n$), $A$: $\langle S, s_{0}, T, \Sigma, f \rangle$, $P$: Array($m$)}
|
|
|
|
\Returns{A set of positions where the motif has been found}
|
|
|
|
\State $s \gets s_0$
|
|
|
|
\State $i \gets 0$
|
|
|
|
\State $pos \gets \{\}$
|
|
|
|
\While {$i < n$} % $\exists f(s, S[i])$ We assume $S$ and $P$ are formed on the same alphabet, so we could remove the second check, as $A$ is complete
|
|
|
|
\If {$s \in T$}
|
|
|
|
\State $pos \gets pos \cup \{ i - m \}$
|
|
|
|
\EndIf
|
|
|
|
\State $s \gets f(s, S[i])$
|
|
|
|
\State $i++$
|
|
|
|
\EndWhile
|
|
|
|
\State \Return $pos$
|
|
|
|
\EndFunction
|
|
|
|
\end{algorithmic}
|
|
|
|
\end{algorithm}
|
|
|
|
|
|
|
|
\begin{algorithm}
|
|
|
|
\caption{Check if the a motif automaton recognizes only the prefix of size $m-1$ of a motif $P$ of size $m$ }
|
|
|
|
\begin{algorithmic}[1]
|
|
|
|
\Function{SearchMotifLastPrefix}{$S$: Array($n$), $A$: $\langle S, s_{0}, T, \Sigma, f \rangle$, $P$: Array($m$)}
|
|
|
|
\Returns{A set of positions where the motif has been found}
|
|
|
|
\State $s \gets s_0$
|
|
|
|
\State $i \gets 0$
|
|
|
|
\State $T_{new} \gets \{\}$
|
|
|
|
\For {$s \in S$}
|
|
|
|
\For {$a \in \Sigma$}
|
|
|
|
\For {$t \in T$}
|
|
|
|
\If {$\exists f(s, a)$ and $f(s, a) = t$}
|
|
|
|
\State $T_{new} \gets T_{new} \cup s$
|
|
|
|
\EndIf
|
|
|
|
\EndFor
|
|
|
|
\EndFor
|
|
|
|
\EndFor
|
|
|
|
\While {$i < n$}
|
|
|
|
\If {$s \in T_{new}$}
|
|
|
|
\State \Return \True
|
|
|
|
\EndIf
|
|
|
|
\State $s \gets f(s, S[i])$
|
|
|
|
\State $i++$
|
|
|
|
\EndWhile
|
|
|
|
\State \Return \False
|
|
|
|
\EndFunction
|
|
|
|
\end{algorithmic}
|
|
|
|
\end{algorithm}
|
|
|
|
|
|
|
|
\begin{algorithm}
|
|
|
|
\caption{Check if the a motif automaton recognizes only the prefix of size $m-1$ of a motif $P$ of size $m$, knowing the sequence of the motif}
|
|
|
|
\begin{algorithmic}[1]
|
|
|
|
\Function{SearchMotifLastPrefix}{$S$: Array($n$), $A$: $\langle S, s_{0}, T, \Sigma, f \rangle$, $P$: Array($m$)}
|
|
|
|
\Returns{A set of positions where the motif has been found}
|
|
|
|
\State $s \gets s_0$
|
|
|
|
\State $i \gets 0$
|
|
|
|
\While {$i < n$ and $f(s, P[m-1]) \notin T$}
|
|
|
|
\State $s \gets f(s, S[i])$
|
|
|
|
\State $i++$
|
|
|
|
\EndWhile
|
|
|
|
\If{$f(s, P[m-1]) \in T$}
|
|
|
|
\State \Return \True
|
|
|
|
\Else
|
|
|
|
\State \Return \False
|
|
|
|
\EndIf
|
|
|
|
\EndFunction
|
|
|
|
\end{algorithmic}
|
|
|
|
\end{algorithm}
|