diff --git a/.gitignore b/.gitignore index 6573436..b7d2d7a 100755 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ build/ -.bak* +**/.bak* .auctex-auto ## Core latex/pdflatex auxiliary files: diff --git a/content/chapters/part1/0.tex b/content/chapters/part1/0.tex index 8a6fa1d..88d3966 100755 --- a/content/chapters/part1/0.tex +++ b/content/chapters/part1/0.tex @@ -1,63 +1 @@ -\chapter{Back to basics} - -\begin{algorithm} - \caption{Search an element in an array} - \begin{algorithmic}[1] - \Function{Search}{$A$: Array($n$), $E$: element} - \For {($i = 0$; $i < n$; $i++$)} - \If {$A[i] = E$} - \State \Return \True - \EndIf - \EndFor - \State \Return \False - \EndFunction - \end{algorithmic} -\end{algorithm} - -\begin{algorithm} - \caption{Search an element in an array using a while loop} - \begin{algorithmic}[1] - \Function{Search}{$A$: Array($n$), $E$: element} - \State $i \gets 0$ - \While {$i < n$} - \If {$A[i] = E$} - \State \Return \True - \EndIf - \State $i \gets i + 1$ - \EndWhile - \State - \Return - \False - \EndFunction - \end{algorithmic} -\end{algorithm} - -\begin{algorithm} - \caption{Search an element in an array using a while loop (bis)} - \begin{algorithmic}[1] - \Function{Search}{$A$: Array($n$), $E$: element} - % \Comment{Version ``preffered" by the professor} - \State $i \gets 0$ - \While {$i < n$ and $A[i] \neq E$} - \State $i \gets i + 1$ - \EndWhile - \If {$i = n$} - \State - \Return \False \Else \State \Return \True \EndIf - \EndFunction - \end{algorithmic} -\end{algorithm} - -\begin{algorithm} - \caption{Count the occurrences of an element in an array} - \begin{algorithmic}[1] - \Function{Search}{$A$: Array($n$), $E$: element} \State $c \gets 0$ - \For{($i = 0$; $i < n$; $i++$)} - \If {$A[i] = E$} - \State $c \gets c + 1$ - \EndIf - \EndFor - \State \Return $c$ - \EndFunction - \end{algorithmic} -\end{algorithm} +\part{Motifs algorithms} diff --git a/content/chapters/part1/1.tex b/content/chapters/part1/1.tex index b62c32e..46a8710 100644 --- a/content/chapters/part1/1.tex +++ b/content/chapters/part1/1.tex @@ -1,5 +1,7 @@ \chapter{Motif} +\section{Searching a substring in a string} + \begin{algorithm} \caption{Brute-force search of a motif in a sequence} \begin{algorithmic}[1] @@ -72,3 +74,369 @@ \EndFunction \end{algorithmic} \end{algorithm} + +\section{Using matrices to search motifs} + +Let $S_{1}$ and $S_{2}$ be two sequences. + +$S_{1} = $ ACGUUCC +$S_{2} = $ GUU + +\begin{table} + \centering + \begin{tabular}{c|ccccccc} + & A & C & G & U & U & C & C \\ + \hline + G & 0 & 0 & 1 & 0 & 0 & 0 & 0 \\ + U & 0 & 0 & 0 & 1 & 1 & 0 & 0 \\ + U & 0 & 0 & 0 & 1 & 1 & 0 & 0 + \end{tabular} + \caption{Comparison matrix} +\end{table} + + +Let $n = |S_{1}|$, $m = |S_{2}|$ +The complexity of this algorithm is $\mathcal{O}(n \cdot m)$ to build the matrix, and it requires also to find the diagonals and thus it is a bit less efficient than the \autoref{alg:naive-motif-matching}. + + +To find repetitions, we can use a comparison matrix with a single sequence against itself. A repetition would appear as a diagonal of ones, not on the main diagonal. + +Let $S = $ ACGUUACGUU. Let's write the comparison matrix. + + +\begin{table} + \includegraphics{./figures/part1/comparison_matrix_repetitions.pdf} + \caption{Comparison matrix for $seq = $``ACGUUACGUUGUU"} +\end{table} + +\begin{algorithm} + \caption{Construct a comparison matrix} + \begin{algorithmic}[1] + \Function{ComparisonMatrix}{$S$: Array($n$)} + \State $M \gets $ Array($n$, $n$) + \For{($i = 0$; $i < n$; $i++$)} + \For{$j = 0$; $j < n$; $j++$} + \If {$S[i] = S[j]$} + \State $M[i][j] = 1$ + \Else + \State $M[i][j] = 0$ + \EndIf + \EndFor + \EndFor + \State \Return $M$ + \EndFunction + \end{algorithmic} +\end{algorithm} + +\begin{algorithm} + \caption{Construct the top half of a comparison matrix} + \begin{algorithmic}[1] + \Function{ComparisonMatrix}{$S$: Array($n$)} + \State $M \gets$ Array($n$,$n$) + \For{($i = 0$; $i < n$; $i++$)} + \For{j=i; j < n; j++} + \If {S[i] = S[j]} + \State M[i][j] = 1 + \Else + \State M[i][j] = 0 + \EndIf + \EndFor + \EndFor + \State \Return M + \EndFunction + \end{algorithmic} +\end{algorithm} + + + +\begin{algorithm} + \caption{Find repetitions (with a set of visited segments)} + \begin{algorithmic}[1] + \Function{FindRepetions}{$S$: Array($n$)} + \Returns{A list of start and end positions for repeated sequences} + \State $M = $ \Call{ComparisonMatrix}{S} + \State $pos = \{\}$ + \State $visited = \{\}$ + \For {($i_{start} = 0$; $i_{start} < n$; $i_{start}++$)} + \For {($j_{start} = i_{start}+1$; $j_{start} < n$; $j_{start}++$)} + \If{$M[i_{start}][j_{start}] = 1$ and $(i_{start}, j_{start}) \notin visited$} + \State $i = i_{start}$ + \State $j = j_{start}$ + \While {$M[i][j] = 1$} + \State $i++$ + \State $j++$ + \State $visited = visited \cup \{(i, j)\}$ + \EndWhile + \State $pos = pos \cup \{(i_{start}, i), (j_{start},j)\}$ + \EndIf + \EndFor + \EndFor + \EndFunction + \end{algorithmic} +\end{algorithm} + + +\begin{algorithm} + \caption{Find repetitions with an exploration of diagonals} + \begin{algorithmic}[1] + \Function{FindRepetions}{$S$: Array($n$)} + \Returns{A list of start and end positions for repeted sequences} + \State $M$ = \Call{ComparisonMatrix}{S} + \State $pos = \{\}$ + \For {($diag = 1$; $diag < n$; $diag++$)} + \State $j = diag$ + \State $i = 0$ + \While {$i < n$ and $j < n$} + \If {$M[i][j] = 1$} + \State $i_{start} = i$ + \State $j_{start} = j$ + \While {$i < n$ and $j < n$ and $M[i][j] = 1$} + \State i++ + \State j++ + \EndWhile + \State $pos = pos \cup \{((i_{start},i-1),(j_{start},j-1))\}$ + \EndIf + \State $i++$ + \State $j++$ + \State + \EndWhile + \EndFor + \EndFunction + \end{algorithmic} +\end{algorithm} + +\begin{algorithm} + \caption{Find repetitions with an exploration of diagonals, without nested while} + \begin{algorithmic}[1] + \Function{FindRepetions}{$S$: Array($n$)} + \Returns{A list of start positions for repeted sequences and match length} + \State $M$ = \Call{ComparisonMatrix}{S} + \State $pos = \{\}$ + \For {($diag = 1$; $diag < n$; $diag++$)} + \State $j = diag$ + \State $i = 0$ + \State $l = 0$ + \While {$i < n$ and $j < n$} + \If {$M[i][j] = 1$} + \State $l++$ + \Else + \If {$l > 0$} + \State $pos = pos \cup \{(i-l,j-l,l)\}$ + \State $l = 0$ + \EndIf + \EndIf + \State $i++$ + \State $j++$ + \EndWhile + \If {$l > 0$} + \State $pos = pos \cup \{((i-l,j-l,l))\}$ + \EndIf + \EndFor + \State \Return $pos$ + \EndFunction + \end{algorithmic} +\end{algorithm} + + +\begin{algorithm} + \caption{Find repetitions} + \begin{algorithmic}[1] + \Function{FindRepetions}{$S$: Array($n$)} + \Returns{A list of start and end positions for repeted sequences} + \State $M$ = \Call{ComparisonMatrix}{S} + \State $pos = \{\}$ + \For {$i_{start} = 0$; $i_{start} < n$; $i_{start}++$} + \For {$j_{start} = i_{start}+1$; $j_{start} < n$; $j_{start}++$} + \If{$M[i_{start}][j_{start}] = 1$} + \State $i = i_{start}$ + \State $j = j_{start}$ + \While {$M[i][j] = 1$} + \State $M[i][j] = 0$ \Comment{Ensure that the segment is not explored again} + \State $i++$ + \State $j++$ + \EndWhile + \State $pos = pos \cup \{((i_{start}, i-1), (j_{start},j-1))\}$ + \EndIf + \EndFor + \EndFor + \EndFunction + \end{algorithmic} +\end{algorithm} + + +\section{Automata} + + +An automaton is a tuple $\langle S, s_{0}, T, \Sigma,f\rangle$ +\begin{itemize} + \item $S$ the set of states + \item $s_{0}$ the initial state + \item $T$ the set of terminal states + \item $\Sigma$ the alphabet + \item $f$ the transition function $f: (s_{1}, c) \to s_{2}$ +\end{itemize} + +\paragraph{Example} Given the language $L$ on the alphabet $\Sigma = \{A, C, T\}$, $L = \{A^{*}, CTT, CA^{*}\}$ + +\begin{definition}[Deterministic automaton] + An automaton is deterministic, if for each couple $(p, a) \in S \times \Sigma$ it exists at most a state $q$ such as $f(p, q) = q$ +\end{definition} + +\begin{definition}[Complete automaton] + An automaton is complete, if for each couple $(p, a) \in S \times \Sigma$ it exists at least a state $q$ such as $f(p, q) = q$. +\end{definition} + +\begin{algorithm} + \caption{Check wether a word belong to a language for which we have an automaton} + \begin{algorithmic}[1] + \Function{WordInLanguage}{$W$: Array($n$), $A$: $\langle S, s_{0}, T, \Sigma,f \rangle$} + \Returns{A Boolean valued to \True{} if the word is recognized by the language automaton} + \State $s \gets s_{0}$ + \State $i \gets 0$ + \While {$i < n$} + \State $a \gets W[i]$ + \If {$\exists f(s, a)$} + \State $s \gets f(s, a)$ + \Else + \State \Return \False + \EndIf + \State i++ + \EndWhile + \If {$s \in T$} + \State \Return \True + \Else + \State \Return \False + \EndIf + \EndFunction + \end{algorithmic} +\end{algorithm} + +\subsection{Suffix Automaton} + +Let $S = $ AACTACT + +A suffix automata recognize all suffix of a given sequence. + + +The suffix language of $S$ is $\{S, ACTACT, CTACT, TACT, ACT, CT, T\}$. + + +\begin{figure} + \centering + \includegraphics{./figures/part1/minimal_suffix_automaton_exercise.pdf} + \caption{Suffix automaton for $S = $ AACTACT} +\end{figure} + +\begin{figure} + \centering + \includegraphics{./figures/part1/minimal_suffix_automaton_exercise_bis.pdf} + \caption{Suffix automaton for $S = $ TCATCATT} +\end{figure} + +\begin{algorithm} + \caption{Check if a sequences matches a motif, from a suffix automaton $\mathcal{O}(m)$, built from the automaton} + \begin{algorithmic}[1] + \Function{CheckMotifInSuffixAutomaton}{$W$: Array($m$), $A$: $\langle S, s_{0}, T, \Sigma,f \rangle$} + \Returns{Boolean valued to \True{} if the motif is in the sequence} + \State $s \gets s_{0}$ + \State $i \gets 0$ + \While {$i < m$ and $\exists f(s, W[i])$} + \State $s \gets f(s, W[i])$ + \State $i++$ + \EndWhile + \If {$i=n$} + \State \Return \True + \Else + \State \Return \False + \EndIf + \EndFunction + \end{algorithmic} +\end{algorithm} +The complexity of the pattern matching algorithm is $\mathcal{O}(n + m)$, because building the automaton is $\mathcal{O}(m)$ + + + +\subsection{Automata for motif search} + +Let $M$ be a motif $M = $ ACAT. + +\begin{figure} + \centering + \includegraphics{./figures/part1/motif_search_automaton.pdf} + \caption{Motif search automaton for $M = $ ACAT} +\end{figure} + +The alphabet of motif is the same as the alphabet of the sequence. +The search automaton is complete. +If the there exists a letter $c$ in the sequence that is not +in the motif alphabet, we can make a virtual transition from +each state to the initial state whenever we encounter an unknown letter. + +\begin{algorithm} + \caption{Search a motif in a sequence with an automaton} + \begin{algorithmic}[1] + \Function{SearchMotif}{$S$: Array($n$), $A$: $\langle S, s_{0}, T, \Sigma, f \rangle$, $P$: Array($m$)} + \Returns{A set of positions where the motif has been found} + \State $s \gets s_0$ + \State $i \gets 0$ + \State $pos \gets \{\}$ + \While {$i < n$} % $\exists f(s, S[i])$ We assume $S$ and $P$ are formed on the same alphabet, so we could remove the second check, as $A$ is complete + \If {$s \in T$} + \State $pos \gets pos \cup \{ i - m \}$ + \EndIf + \State $s \gets f(s, S[i])$ + \State $i++$ + \EndWhile + \State \Return $pos$ + \EndFunction + \end{algorithmic} +\end{algorithm} + +\begin{algorithm} + \caption{Check if the a motif automaton recognizes only the prefix of size $m-1$ of a motif $P$ of size $m$ } + \begin{algorithmic}[1] + \Function{SearchMotifLastPrefix}{$S$: Array($n$), $A$: $\langle S, s_{0}, T, \Sigma, f \rangle$, $P$: Array($m$)} + \Returns{A set of positions where the motif has been found} + \State $s \gets s_0$ + \State $i \gets 0$ + \State $T_{new} \gets \{\}$ + \For {$s \in S$} + \For {$a \in \Sigma$} + \For {$t \in T$} + \If {$\exists f(s, a)$ and $f(s, a) = t$} + \State $T_{new} \gets T_{new} \cup s$ + \EndIf + \EndFor + \EndFor + \EndFor + \While {$i < n$} + \If {$s \in T_{new}$} + \State \Return \True + \EndIf + \State $s \gets f(s, S[i])$ + \State $i++$ + \EndWhile + \State \Return \False + \EndFunction + \end{algorithmic} +\end{algorithm} + +\begin{algorithm} + \caption{Check if the a motif automaton recognizes only the prefix of size $m-1$ of a motif $P$ of size $m$, knowing the sequence of the motif} + \begin{algorithmic}[1] + \Function{SearchMotifLastPrefix}{$S$: Array($n$), $A$: $\langle S, s_{0}, T, \Sigma, f \rangle$, $P$: Array($m$)} + \Returns{A set of positions where the motif has been found} + \State $s \gets s_0$ + \State $i \gets 0$ + \While {$i < n$ and $f(s, P[m-1]) \notin T$} + \State $s \gets f(s, S[i])$ + \State $i++$ + \EndWhile + \If{$f(s, P[m-1]) \in T$} + \State \Return \True + \Else + \State \Return \False + \EndIf + \EndFunction + \end{algorithmic} +\end{algorithm} \ No newline at end of file diff --git a/content/chapters/part1/2.tex b/content/chapters/part1/2.tex index c5ae351..0f35a14 100644 --- a/content/chapters/part1/2.tex +++ b/content/chapters/part1/2.tex @@ -1,52 +1,27 @@ -\chapter{Matrices} - -Let $S_{1}$ and $S_{2}$ be two sequences. - -$S_{1} = $ ACGUUCC -$S_{2} = $ GUU - -\begin{table} - \centering - \begin{tabular}{c|ccccccc} - & A & C & G & U & U & C & C \\ - \hline - G & 0 & 0 & 1 & 0 & 0 & 0 & 0 \\ - U & 0 & 0 & 0 & 1 & 1 & 0 & 0 \\ - U & 0 & 0 & 0 & 1 & 1 & 0 & 0 - \end{tabular} - \caption{Comparison matrix} -\end{table} - - -Let $n = |S_{1}|$, $m = |S_{2}|$ -The complexity of this algorithm is $\mathcal{O}(n \cdot m)$ to build the matrix, and it requires also to find the diagonals and thus it is a bit less efficient than the \autoref{alg:naive-motif-matching}. - - -To find repetitions, we can use a comparison matrix with a single sequence against itself. A repetition would appear as a diagonal of ones, not on the main diagonal. - -Let $S = $ ACGUUACGUU. Let's write the comparison matrix. - - -\begin{table} - \includegraphics{./figures/part1/comparison_matrix_repetitions.pdf} - \caption{Comparison matrix for $seq = $``ACGUUACGUUGUU"} -\end{table} - - +\chapter{Longest common subsequence} +Let $S_{1} = \text{ATCTGAT}$ and $S_{2} = \text{TGCATA}$. +In this case the longest common subsequence of $S_{1}$ and $S_{2}$ is $TCTA$. \begin{algorithm} - \caption{Construct a comparison matrix} + \caption{Construct a longest common subsequence matrix} \begin{algorithmic}[1] - \Function{ComparisonMatrix}{$S$: Array($n$)} - \State $M \gets $ Array($n$, $n$) - \For{($i = 0$; $i < n$; $i++$)} - \For{$j = 0$; $j < n$; $j++$} - \If {$S[i] = S[j]$} - \State $M[i][j] = 1$ - \Else - \State $M[i][j] = 0$ - \EndIf - \EndFor + \Function{LCSQ\_Matrix}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)} + \State $M \gets $ Array($m+1$, $n+1$) + \For{($i = 0$; $i < n+1$; $i++$)} + \For{$j = 0$; $j < m+1$; $j++$} + \If {$i = 0$ or $j = 0$} + \State $M[i][j] = 0$ + \Else + \If {$S_{1}[i] = S_{2}[j]$} + \State $match = M[i-1][j-1] + 1$ + \Else + \State $match = M[i-1][j-1]$ + \EndIf + \State $gap_{1} = M[i-1][j]$ + \State $gap_{2} = M[i][j-1]$ + \State $M[i][j] = \max \{ match, gap_{1}, gap_{2}\}$ + \EndIf + \EndFor \EndFor \State \Return $M$ \EndFunction @@ -54,228 +29,88 @@ Let $S = $ ACGUUACGUU. Let's write the comparison matrix. \end{algorithm} \begin{algorithm} - \caption{Construct the top half of a comparison matrix} + \caption{Construct a longest common subsequence matrix keeping the path in memory} \begin{algorithmic}[1] - \Function{ComparisonMatrix}{$S$: Array($n$)} - \State $M \gets$ Array($n$,$n$) - \For{($i = 0$; $i < n$; $i++$)} - \For{j=i; j < n; j++} - \If {S[i] = S[j]} - \State M[i][j] = 1 + \Function{LCSQ\_Matrix\_Path}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)} + \State $M \gets $ Array($m+1$, $n+1$) + \State $P \gets $ Array($m+1$, $n+1$) + \For {($i = 0$; $i < n+1$, $i++$)} + \State $M[i][0] \gets 0$ + \EndFor + \For {($j = 0$; $j < m+1$; $j+$)} + \State $M[0][j] \gets 0$ + \EndFor + \For{($i = 1$; $i < n+1$; $i++$)} + \For{($j = 1$; $j < m+1$; $j++$)} + \If {$i = 1$ or $j = 0$} + \State $M[i][j] = 0$ \Else - \State M[i][j] = 0 + \If {$S_{1}[i-1] = S_{2}[j-1]$} + \State $M[i][j] \gets M[i-1][j-1] + 1$ + \State $P[i][j] \gets '\nwarrow'$ + \ElsIf {$M[i][j-1] \geq M[i-1][j]$} + \State $M[i][j] \gets M[i][j-1]$ + \State $P[i][j] \gets '\leftarrow'$ + \Else + \State $M[i][j] \gets M[i-1][j]$ + \State $P[i][j] \gets '\downarrow'$ \EndIf + \EndIf \EndFor \EndFor - \State \Return M - \EndFunction - \end{algorithmic} -\end{algorithm} - - - -\begin{algorithm} - \caption{Find repetitions (with a set of visited segments)} - \begin{algorithmic}[1] - \Function{FindRepetions}{$S$: Array($n$)} - \Returns{A list of start and end positions for repeated sequences} - \State $M = $ \Call{ComparisonMatrix}{S} - \State $pos = \{\}$ - \State $visited = \{\}$ - \For {($i_{start} = 0$; $i_{start} < n$; $i_{start}++$)} - \For {($j_{start} = i_{start}+1$; $j_{start} < n$; $j_{start}++$)} - \If{$M[i_{start}][j_{start}] = 1$ and $(i_{start}, j_{start}) \notin visited$} - \State $i = i_{start}$ - \State $j = j_{start}$ - \While {$M[i][j] = 1$} - \State $i++$ - \State $j++$ - \State $visited = visited \cup \{(i, j)\}$ - \EndWhile - \State $pos = pos \cup \{(i_{start}, i), (j_{start},j)\}$ - \EndIf - \EndFor - \EndFor - \EndFunction - \end{algorithmic} -\end{algorithm} - - -\begin{algorithm} - \caption{Find repetitions with an exploration of diagonals} - \begin{algorithmic}[1] - \Function{FindRepetions}{$S$: Array($n$)} - \Returns{A list of start and end positions for repeted sequences} - \State $M$ = \Call{ComparisonMatrix}{S} - \State $pos = \{\}$ - \For {($diag = 1$; $diag < n$; $diag++$)} - \State $j = diag$ - \State $i = 0$ - \While {$i < n$ and $j < n$} - \If {$M[i][j] = 1$} - \State $i_{start} = i$ - \State $j_{start} = j$ - \While {$i < n$ and $j < n$ and $M[i][j] = 1$} - \State i++ - \State j++ - \EndWhile - \State $pos = pos \cup \{((i_{start},i-1),(j_{start},j-1))\}$ - \EndIf - \State $i++$ - \State $j++$ - \State - \EndWhile - \EndFor + \State \Return $M, P$ \EndFunction \end{algorithmic} \end{algorithm} \begin{algorithm} - \caption{Find repetitions with an exploration of diagonals, without nested while} + \caption{Backtrack the longest common subsequence} \begin{algorithmic}[1] - \Function{FindRepetions}{$S$: Array($n$)} - \Returns{A list of start positions for repeted sequences and match length} - \State $M$ = \Call{ComparisonMatrix}{S} - \State $pos = \{\}$ - \For {($diag = 1$; $diag < n$; $diag++$)} - \State $j = diag$ - \State $i = 0$ - \State $l = 0$ - \While {$i < n$ and $j < n$} - \If {$M[i][j] = 1$} - \State $l++$ - \Else - \If {$l > 0$} - \State $pos = pos \cup \{(i-l,j-l,l)\}$ - \State $l = 0$ - \EndIf - \EndIf - \State $i++$ - \State $j++$ + \Function{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)} + \State $M, P \gets $ \Call{LCSQ\_Matrix}{$S_{1}$, $S_{2}$} + \State $L \gets Array(M[n][m])$ + \State $k \gets 0$ + \State $i \gets n$ + \State $j \gets m$ + \While{$i > 0$ and $j > 0$} + \If {$P[i][j] = '\nwarrow' $} + \State $L[k] \gets S_{1}[i]$ + \State $i--$ + \State $j--$ + \State $k++$ + \ElsIf {$P[i][j] = '\leftarrow'$} + \State $j--$ + \Else + \State $i--$ + \EndIf \EndWhile - \If {$l > 0$} - \State $pos = pos \cup \{((i-l,j-l,l))\}$ - \EndIf - \EndFor - \State \Return $pos$ + \State \Return $L$ \EndFunction \end{algorithmic} \end{algorithm} +\iffalse + \begin{algorithm} + \caption{Recursive reconstruction of the longest common subsequence} + \begin{algorithmic}[1] + \Procedure{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)} + \State $M, P \gets $ \Call{LCSQ\_Matrix}{$S_{1}$, $S_{2}$} + \State $i \gets n$ + \State $j \gets m$ + \State \Call{Aux}{$P$, $S_{1}$, $i$, $j$} + \EndProcedure -\begin{algorithm} - \caption{Find repetitions} - \begin{algorithmic}[1] - \Function{FindRepetions}{$S$: Array($n$)} - \Returns{A list of start and end positions for repeted sequences} - \State $M$ = \Call{ComparisonMatrix}{S} - \State $pos = \{\}$ - \For {$i_{start} = 0$; $i_{start} < n$; $i_{start}++$} - \For {$j_{start} = i_{start}+1$; $j_{start} < n$; $j_{start}++$} - \If{$M[i_{start}][j_{start}] = 1$} - \State $i = i_{start}$ - \State $j = j_{start}$ - \While {$M[i][j] = 1$} - \State $M[i][j] = 0$ \Comment{Ensure that the segment is not explored again} - \State $i++$ - \State $j++$ - \EndWhile - \State $pos = pos \cup \{((i_{start}, i-1), (j_{start},j-1))\}$ - \EndIf - \EndFor - \EndFor - \EndFunction - \end{algorithmic} -\end{algorithm} - - -\section{Automata} - - -An automaton is a tuple $\langle S, s_{0}, T, \Sigma,f\rangle$ -\begin{itemize} - \item $S$ the set of states - \item $s_{0}$ the initial state - \item $T$ the set of terminal states - \item $\Sigma$ the alphabet - \item $f$ the transition function $f: (s_{1}, c) \to s_{2}$ -\end{itemize} - -\paragraph{Example} Given the language $L$ on the alphabet $\Sigma = \{A, C, T\}$, $L = \{A^{*}, CTT, CA^{*}\}$ - -\begin{definition}[Deterministic automaton] - An automaton is deterministic, if for each couple $(p, a) \in S \times \Sigma$ it exists at most a state $q$ such as $f(p, q) = q$ -\end{definition} - -\begin{definition}[Complete automaton] - An automaton is complete, if for each couple $(p, a) \in S \times \Sigma$ it exists at least a state $q$ such as $f(p, q) = q$. -\end{definition} - -\begin{algorithm} - \caption{Check wether a word belong to a language for which we have an automaton} - \begin{algorithmic}[1] - \Function{WordInLanguage}{$W$: Array($n$), $A$: $\langle S, s_{0}, T, \Sigma,f \rangle$} - \Returns{A Boolean valued to \True{} if the word is recognized by the language automaton} - \State $s \gets s_{0}$ - \State $i \gets 0$ - \While {$i < n$} - \State $a \gets W[i]$ - \If {$\exists f(s, a)$} - \State $s \gets f(s, a)$ - \Else - \State \Return \False - \EndIf - \State i++ - \EndWhile - \If {$s \in T$} - \State \Return \True - \Else - \State \Return \False - \EndIf - \EndFunction - \end{algorithmic} -\end{algorithm} - -\section{Suffix Automaton} - -Let $S = $ AACTACT - -A suffix automata recognize all suffix of a given sequence. - - -The suffix language of $S$ is $\{S, ACTACT, CTACT, TACT, ACT, CT, T\}$. - - -\begin{figure} - \centering - \includegraphics{./figures/part1/minimal_suffix_automaton_exercise.pdf} - \caption{Suffix automaton for $S = $ AACTACT} -\end{figure} - -\begin{figure} - \centering - \includegraphics{./figures/part1/minimal_suffix_automaton_exercise_bis.pdf} - \caption{Suffix automaton for $S = $ TCATCATT} -\end{figure} - -\begin{algorithm} - \caption{Check if a sequences matches a motif, from a suffix automaton $\mathcal{O}(m)$, built from the automaton} - \begin{algorithmic}[1] - \Function{CheckMotifInSuffixAutomaton}{$W$: Array($m$), $A$: $\langle S, s_{0}, T, \Sigma,f \rangle$} - \Returns{Boolean valued to \True{} if the motif is in the sequence} - \State $s \gets s_{0}$ - \State $i \gets 0$ - \While {$i < m$ and $\exists f(s, W[i])$} - \State $s \gets f(s, W[i])$ - \State $i++$ - \EndWhile - \If {$i=n$} - \State \Return \True - \Else - \State \Return \False - \EndIf - \EndFunction - \end{algorithmic} -\end{algorithm} -The complexity of the pattern matching algorithm is $\mathcal{O}(n + m)$, because building the automaton is $\mathcal{O}(m)$ - + \Procedure{Aux}{$P$: Array($n+1$, $m+1$), $S_{1}$: Array($n$), $i$, $j$} + \If {$P[i][j] = '\nwarrow' $} + \State $l \gets S_{1}[i]$ + \State \Call{Aux}{$P$, $S_{1}$, $i-1$, $j-1$} + \State \texttt{print}($l$) + \ElsIf {$P[i][j] = '\leftarrow'$} + \State \Call{Aux}{$P$, $S_{1}$, $i$, $j-1$} + \Else + \State \Call{Aux}{$P$, $S_{1}$, $i-1$, $j$} + \EndIf + \EndProcedure + \end{algorithmic} + \end{algorithm} +\fi diff --git a/content/chapters/part1/3.tex b/content/chapters/part1/3.tex deleted file mode 100644 index bddbe5a..0000000 --- a/content/chapters/part1/3.tex +++ /dev/null @@ -1,84 +0,0 @@ -\chapter{Automata for motif search} - -Let $M$ be a motif $M = $ ACAT. - -\begin{figure} - \centering - \includegraphics{./figures/part1/motif_search_automaton.pdf} - \caption{Motif search automaton for $M = $ ACAT} -\end{figure} - -The alphabet of motif is the same as the alphabet of the sequence. -The search automaton is complete. -If the there exists a letter $c$ in the sequence that is not -in the motif alphabet, we can make a virtual transition from -each state to the initial state whenever we encounter an unknown letter. - -\begin{algorithm} - \caption{Search a motif in a sequence with an automaton} - \begin{algorithmic}[1] - \Function{SearchMotif}{$S$: Array($n$), $A$: $\langle S, s_{0}, T, \Sigma, f \rangle$, $P$: Array($m$)} - \Returns{A set of positions where the motif has been found} - \State $s \gets s_0$ - \State $i \gets 0$ - \State $pos \gets \{\}$ - \While {$i < n$} % $\exists f(s, S[i])$ We assume $S$ and $P$ are formed on the same alphabet, so we could remove the second check, as $A$ is complete - \If {$s \in T$} - \State $pos \gets pos \cup \{ i - m \}$ - \EndIf - \State $s \gets f(s, S[i])$ - \State $i++$ - \EndWhile - \State \Return $pos$ - \EndFunction - \end{algorithmic} -\end{algorithm} - -\begin{algorithm} - \caption{Check if the a motif automaton recognizes only the prefix of size $m-1$ of a motif $P$ of size $m$ } - \begin{algorithmic}[1] - \Function{SearchMotifLastPrefix}{$S$: Array($n$), $A$: $\langle S, s_{0}, T, \Sigma, f \rangle$, $P$: Array($m$)} - \Returns{A set of positions where the motif has been found} - \State $s \gets s_0$ - \State $i \gets 0$ - \State $T_{new} \gets \{\}$ - \For {$s \in S$} - \For {$a \in \Sigma$} - \For {$t \in T$} - \If {$\exists f(s, a)$ and $f(s, a) = t$} - \State $T_{new} \gets T_{new} \cup s$ - \EndIf - \EndFor - \EndFor - \EndFor - \While {$i < n$} - \If {$s \in T_{new}$} - \State \Return \True - \EndIf - \State $s \gets f(s, S[i])$ - \State $i++$ - \EndWhile - \State \Return \False - \EndFunction - \end{algorithmic} -\end{algorithm} - -\begin{algorithm} - \caption{Check if the a motif automaton recognizes only the prefix of size $m-1$ of a motif $P$ of size $m$, knowing the sequence of the motif} - \begin{algorithmic}[1] - \Function{SearchMotifLastPrefix}{$S$: Array($n$), $A$: $\langle S, s_{0}, T, \Sigma, f \rangle$, $P$: Array($m$)} - \Returns{A set of positions where the motif has been found} - \State $s \gets s_0$ - \State $i \gets 0$ - \While {$i < n$ and $f(s, P[m-1]) \notin T$} - \State $s \gets f(s, S[i])$ - \State $i++$ - \EndWhile - \If{$f(s, P[m-1]) \in T$} - \State \Return \True - \Else - \State \Return \False - \EndIf - \EndFunction - \end{algorithmic} -\end{algorithm} \ No newline at end of file diff --git a/content/chapters/part1/4.bak0 b/content/chapters/part1/4.bak0 deleted file mode 100644 index bf53b9c..0000000 --- a/content/chapters/part1/4.bak0 +++ /dev/null @@ -1,116 +0,0 @@ -\chapter{Longest common subsequence} - -Let $S_{1} = \text{ATCTGAT}$ and $S_{2} = \text{TGCATA}$. -In this case the longest common subsequence of $S_{1}$ and $S_{2}$ is $TCTA$. -\begin{algorithm} - \caption{Construct a longest common subsequence matrix} - \begin{algorithmic}[1] - \Function{LCSQ\_Matrix}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)} - \State $M \gets $ Array($m+1$, $n+1$) - \For{($i = 0$; $i < n+1$; $i++$)} - \For{$j = 0$; $j < m+1$; $j++$} - \If {$i = 0$ or $j = 0$} - \State $M[i][j] = 0$ - \Else - \If {$S_{1}[i] = S_{2}[j]$} - \State $match = M[i-1][j-1] + 1$ - \Else - \State $match = M[i-1][j-1]$ - \EndIf - \State $gap_{1} = M[i-1][j]$ - \State $gap_{2} = M[i][j-1]$ - \State $M[i][j] = \max \{ match, gap_{1}, gap_{2}\}$ - \EndIf - \EndFor - \EndFor - \State \Return $M$ - \EndFunction - \end{algorithmic} - \end{algorithm} - -\begin{algorithm} - \caption{Construct a longest common subsequence matrix keeping the path in memory} - \begin{algorithmic}[1] - \Function{LCSQ\_Matrix\_Path}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)} - \State $M \gets $ Array($m+1$, $n+1$) - \State $P \gets $ Array($m+1$, $n+1$) - \For {($i = 0$; $i < n+1$, $i++$)} - \State $M[i][0] \gets 0$ - \EndFor - \For {($j = 0$; $j < m+1$; $j+$)} - \State $M[0][j] \gets 0$ - \EndFor - \For{($i = 1$; $i < n+1$; $i++$)} - \For{($j = 1$; $j < m+1$; $j++$)} - \If {$i = 1$ or $j = 0$} - \State $M[i][j] = 0$ - \Else - \If {$S_{1}[i-1] = S_{2}[j-1]$} - \State $M[i][j] \gets M[i-1][j-1] + 1$ - \State $P[i][j] \gets '\nwarrow'$ - \ElsIf {$M[i][j-1] \geq M[i-1][j]$} - \State $M[i][j] \gets M[i][j-1]$ - \State $P[i][j] \gets '\leftarrow'$ - \Else - \State $M[i][j] \gets M[i-1][j]$ - \State $P[i][j] \gets '\downarrow'$ - \EndIf - \EndFor - \EndFor - \State \Return $M, P$ - \EndFunction - \end{algorithmic} - \end{algorithm} - -\begin{algorithm} - \caption{Backtrack the longest common subsequence} - \begin{algorithmic}[1] - \Function{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)} - \State $M, P \gets $ \Call{LCSQ\_Matrix}{$S_{1}$, $S_{2}$} - \State $L \gets Array(M[n][m])$ - \State $k \gets 0$ - \State $i \gets n$ - \State $j \gets m$ - \While{$i > 0$ and $j > 0$} - \If {$P[i][j] = '\nwarrow' $} - \State $L[k] \gets S_{1}[i]$ - \State $i--$ - \State $j--$ - \State $k++$ - \ElsIf {$P[i][j] = '\leftarrow'$} - \State $j--$ - \Else - \State $i--$ - \EndIf - \EndWhile - \State \Return $L$ - \EndFunction - \end{algorithmic} -\end{algorithm} - -\iffalse - -\begin{algorithm} - \caption{Recursive reconstruction of the longest common subsequence} - \begin{algorithmic}[1] - \Procedure{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)} - \State $M, P \gets $ \Call{LCSQ\_Matrix}{$S_{1}$, $S_{2}$} - \State $i \gets n$ - \State $j \gets m$ - \State \Call{Aux}{$P$, $S_{1}$, $i$, $j$} - \EndProcedure - - \Procedure{Aux}{$P$: Array($n+1$, $m+1$), $S_{1}$: Array($n$), $i$, $j$} - \If {$P[i][j] = '\nwarrow' $} - \State $l \gets S_{1}[i]$ - \State \Call{Aux}{$P$, $S_{1}$, $i-1$, $j-1$} - \State \texttt{print}($l$) - \ElsIf {$P[i][j] = '\leftarrow'$} - \State \Call{Aux}{$P$, $S_{1}$, $i$, $j-1$} - \Else - \State \Call{Aux}{$P$, $S_{1}$, $i-1$, $j$} - \EndIf - \EndProcedure - \end{algorithmic} -\end{algorithm} -\fi diff --git a/content/chapters/part1/4.tex b/content/chapters/part1/4.tex deleted file mode 100644 index 0f35a14..0000000 --- a/content/chapters/part1/4.tex +++ /dev/null @@ -1,116 +0,0 @@ -\chapter{Longest common subsequence} - -Let $S_{1} = \text{ATCTGAT}$ and $S_{2} = \text{TGCATA}$. -In this case the longest common subsequence of $S_{1}$ and $S_{2}$ is $TCTA$. -\begin{algorithm} - \caption{Construct a longest common subsequence matrix} - \begin{algorithmic}[1] - \Function{LCSQ\_Matrix}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)} - \State $M \gets $ Array($m+1$, $n+1$) - \For{($i = 0$; $i < n+1$; $i++$)} - \For{$j = 0$; $j < m+1$; $j++$} - \If {$i = 0$ or $j = 0$} - \State $M[i][j] = 0$ - \Else - \If {$S_{1}[i] = S_{2}[j]$} - \State $match = M[i-1][j-1] + 1$ - \Else - \State $match = M[i-1][j-1]$ - \EndIf - \State $gap_{1} = M[i-1][j]$ - \State $gap_{2} = M[i][j-1]$ - \State $M[i][j] = \max \{ match, gap_{1}, gap_{2}\}$ - \EndIf - \EndFor - \EndFor - \State \Return $M$ - \EndFunction - \end{algorithmic} -\end{algorithm} - -\begin{algorithm} - \caption{Construct a longest common subsequence matrix keeping the path in memory} - \begin{algorithmic}[1] - \Function{LCSQ\_Matrix\_Path}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)} - \State $M \gets $ Array($m+1$, $n+1$) - \State $P \gets $ Array($m+1$, $n+1$) - \For {($i = 0$; $i < n+1$, $i++$)} - \State $M[i][0] \gets 0$ - \EndFor - \For {($j = 0$; $j < m+1$; $j+$)} - \State $M[0][j] \gets 0$ - \EndFor - \For{($i = 1$; $i < n+1$; $i++$)} - \For{($j = 1$; $j < m+1$; $j++$)} - \If {$i = 1$ or $j = 0$} - \State $M[i][j] = 0$ - \Else - \If {$S_{1}[i-1] = S_{2}[j-1]$} - \State $M[i][j] \gets M[i-1][j-1] + 1$ - \State $P[i][j] \gets '\nwarrow'$ - \ElsIf {$M[i][j-1] \geq M[i-1][j]$} - \State $M[i][j] \gets M[i][j-1]$ - \State $P[i][j] \gets '\leftarrow'$ - \Else - \State $M[i][j] \gets M[i-1][j]$ - \State $P[i][j] \gets '\downarrow'$ - \EndIf - \EndIf - \EndFor - \EndFor - \State \Return $M, P$ - \EndFunction - \end{algorithmic} -\end{algorithm} - -\begin{algorithm} - \caption{Backtrack the longest common subsequence} - \begin{algorithmic}[1] - \Function{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)} - \State $M, P \gets $ \Call{LCSQ\_Matrix}{$S_{1}$, $S_{2}$} - \State $L \gets Array(M[n][m])$ - \State $k \gets 0$ - \State $i \gets n$ - \State $j \gets m$ - \While{$i > 0$ and $j > 0$} - \If {$P[i][j] = '\nwarrow' $} - \State $L[k] \gets S_{1}[i]$ - \State $i--$ - \State $j--$ - \State $k++$ - \ElsIf {$P[i][j] = '\leftarrow'$} - \State $j--$ - \Else - \State $i--$ - \EndIf - \EndWhile - \State \Return $L$ - \EndFunction - \end{algorithmic} -\end{algorithm} - -\iffalse - \begin{algorithm} - \caption{Recursive reconstruction of the longest common subsequence} - \begin{algorithmic}[1] - \Procedure{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)} - \State $M, P \gets $ \Call{LCSQ\_Matrix}{$S_{1}$, $S_{2}$} - \State $i \gets n$ - \State $j \gets m$ - \State \Call{Aux}{$P$, $S_{1}$, $i$, $j$} - \EndProcedure - - \Procedure{Aux}{$P$: Array($n+1$, $m+1$), $S_{1}$: Array($n$), $i$, $j$} - \If {$P[i][j] = '\nwarrow' $} - \State $l \gets S_{1}[i]$ - \State \Call{Aux}{$P$, $S_{1}$, $i-1$, $j-1$} - \State \texttt{print}($l$) - \ElsIf {$P[i][j] = '\leftarrow'$} - \State \Call{Aux}{$P$, $S_{1}$, $i$, $j-1$} - \Else - \State \Call{Aux}{$P$, $S_{1}$, $i-1$, $j$} - \EndIf - \EndProcedure - \end{algorithmic} - \end{algorithm} -\fi diff --git a/content/chapters/part2/0.tex b/content/chapters/part2/0.tex index 7a7255c..036319b 100644 --- a/content/chapters/part2/0.tex +++ b/content/chapters/part2/0.tex @@ -1,6 +1,6 @@ \part{Sequence alignment} -\section{Simililarity between sequences} +\chapter{Definitions} A function $d$ is a distance between two sequences $x$ and $y$ in an alphabet $\Sigma$ if \begin{itemize} diff --git a/content/chapters/part2/1.tex b/content/chapters/part2/1.tex index 6acf367..0986eae 100644 --- a/content/chapters/part2/1.tex +++ b/content/chapters/part2/1.tex @@ -1,6 +1,4 @@ -\chapter{Section alignment} - -\section{Needleman - Wunsch algorithm} +\chapter{Sequence alignment} \begin{algorithm} \caption{Needleman-Wunsch Algorithm} @@ -82,7 +80,7 @@ \end{algorithm} \begin{algorithm} - \caption{Needleman-Wunsch Algorithm (Backtrack) } + \caption{Needleman-Wunsch Algorithm, using proper notation } \begin{algorithmic}[1] \Procedure{FillMatrix}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)} \State $M = $ Array($m+1$, $n+1$) @@ -143,3 +141,82 @@ \State \Call{BacktrackAlignment}{$S_{1}$, $S_{2}$} \end{algorithmic} \end{algorithm} + + +\begin{algorithm} + \caption{Backtrack a single alignment in a recursive way} + \begin{algorithmic}[1] + \State $S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$), + \Function{BacktrackRecurse}{$i$, $j$} + \If {$i > 0$ and $j > 0$} + \State $substitute = M[i-1][j-1]$ + \State $delete = M[i-1][j]$ + \State $insert = M[i][j-1]$ + \State $min = \min \{ substitute, delete, insert \}$ + \If {$substitute = min$} + \State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j-1$} + \State $z = \begin{pmatrix} S_{1}[i-1] \\ S_{2}[j-1] \end{pmatrix} \circ z$ + \ElsIf {$delete = min$} + \State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$} + \State $z = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix} \circ z$ + \Else + \State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$} + \State $z = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix} \circ z$ + \EndIf + \ElsIf {$i > 0$} + \State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$} + \State $z = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix} \circ z$ + \ElsIf {$j > 0$} + \State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$} + \State $z = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix} \circ z$ + \Else + \State \Return [] + \EndIf + \State \Return $z$ + \EndFunction + \Function{Backtrack}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$)} + \State \Return \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $m$, $n$} + \EndFunction + \end{algorithmic} +\end{algorithm} + +\begin{algorithm} + \caption{Backtrack all the optimum alignments in a recursive way} + \begin{algorithmic}[1] + \Procedure{BacktrackRecurse}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$), $i$, $j$} + \If {$i > 0$ and $j > 0$} + \State $substitute = M[i-1][j-1]$ + \State $delete = M[i-1][j]$ + \State $insert = M[i][j-1]$ + \State $min = \min \{ substitute, delete, insert \}$ + \If {$substitute = min$} + \State $value = \begin{pmatrix} S_{1}[i-1] \\ S_{2}[j-1] \end{pmatrix}$ + \State $z' = value \circ z$ + \State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j-1$, $z'$} + \EndIf + \If {$delete = min$} + \State $value = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix}$ + \State $z' = value \circ z$ + \State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$, $z'$} + \EndIf + \If {$insert = min$} + \State $value = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$ + \State $z' = value \circ z$ + \State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$, $z'$} + \EndIf + \ElsIf {$i > 0$} + \State $value = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix}$ + \State $z' = value \circ z$ + \State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$, $z'$} + \ElsIf {$j > 0$} + \State $value = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$ + \State $z' = value \circ z$ + \State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$, $z'$} + \EndIf + \State \Call{print}{$z$} + \EndProcedure + \Procedure{Backtrack}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$)} + \State \Return \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $m$, $n$, []} + \EndProcedure + \end{algorithmic} +\end{algorithm} diff --git a/content/introduction.tex b/content/introduction.tex index e69de29..b0eb2f0 100755 --- a/content/introduction.tex +++ b/content/introduction.tex @@ -0,0 +1,64 @@ +\part*{Introduction} +\chapter*{Back to basics} + +\begin{algorithm} + \caption{Search an element in an array} + \begin{algorithmic}[1] + \Function{Search}{$A$: Array($n$), $E$: element} + \For {($i = 0$; $i < n$; $i++$)} + \If {$A[i] = E$} + \State \Return \True + \EndIf + \EndFor + \State \Return \False + \EndFunction + \end{algorithmic} +\end{algorithm} + +\begin{algorithm} + \caption{Search an element in an array using a while loop} + \begin{algorithmic}[1] + \Function{Search}{$A$: Array($n$), $E$: element} + \State $i \gets 0$ + \While {$i < n$} + \If {$A[i] = E$} + \State \Return \True + \EndIf + \State $i \gets i + 1$ + \EndWhile + \State + \Return + \False + \EndFunction + \end{algorithmic} +\end{algorithm} + +\begin{algorithm} + \caption{Search an element in an array using a while loop (bis)} + \begin{algorithmic}[1] + \Function{Search}{$A$: Array($n$), $E$: element} + % \Comment{Version ``preffered" by the professor} + \State $i \gets 0$ + \While {$i < n$ and $A[i] \neq E$} + \State $i \gets i + 1$ + \EndWhile + \If {$i = n$} + \State + \Return \False \Else \State \Return \True \EndIf + \EndFunction + \end{algorithmic} +\end{algorithm} + +\begin{algorithm} + \caption{Count the occurrences of an element in an array} + \begin{algorithmic}[1] + \Function{Search}{$A$: Array($n$), $E$: element} \State $c \gets 0$ + \For{($i = 0$; $i < n$; $i++$)} + \If {$A[i] = E$} + \State $c \gets c + 1$ + \EndIf + \EndFor + \State \Return $c$ + \EndFunction + \end{algorithmic} +\end{algorithm} diff --git a/main.pdf b/main.pdf index f9cf970..2f81c3d 100644 --- a/main.pdf +++ b/main.pdf @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:129c003255cb3de4bdd4777c1ac9769a6b4cd1633a15cade1611eb3e8a2ad71a -size 333748 +oid sha256:dbbf82cae2b489561f68b9ae243686124a41f634e527717a82979966e728a612 +size 335755 diff --git a/tmp.pdf b/tmp.pdf index 9f6b6c3..e4d06c9 100644 --- a/tmp.pdf +++ b/tmp.pdf @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bf9d5f869f3ee9e50f21d2cb73785aa5336477c399af4ce70ebaab1f4d6d557c -size 56195 +oid sha256:895284137c8cb7723fec3e88d18ac4a36db3920c84867e358220af7229ff0c0d +size 73988 diff --git a/tmp.tex b/tmp.tex index 1b2b75b..5d55adb 100644 --- a/tmp.tex +++ b/tmp.tex @@ -9,95 +9,98 @@ \algnewcommand{\NIL}{\textbf{\texttt{NIL}}} \algnewcommand{\NULL}{\textbf{\texttt{null}}} \input{definitions.tex} - +\usepackage{mathtools} \begin{document} \begin{algorithm} - \caption{Construct a longest common subsequence matrix keeping the path in memory} + \caption{Backtrack a single alignment in a recursive way} \begin{algorithmic}[1] - \Function{LCSQ\_Matrix\_Path}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)} - \State $M \gets $ Array($m+1$, $n+1$) - \State $P \gets $ Array($m+1$, $n+1$) - \For {($i = 0$; $i < n+1$, $i++$)} - \State $M[i][0] \gets 0$ - \EndFor - \For {($j = 0$; $j < m+1$; $j+$)} - \State $M[0][j] \gets 0$ - \EndFor - \For{($i = 1$; $i < n+1$; $i++$)} - \For{($j = 1$; $j < m+1$; $j++$)} - \If {$i = 1$ or $j = 0$} - \State $M[i][j] = 0$ - \Else - \If {$S_{1}[i-1] = S_{2}[j-1]$} - \State $M[i][j] \gets M[i-1][j-1] + 1$ - \State $P[i][j] \gets '\nwarrow'$ - \ElsIf {$M[i][j-1] \geq M[i-1][j]$} - \State $M[i][j] \gets M[i][j-1]$ - \State $P[i][j] \gets '\leftarrow'$ + \State $S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$), + \Function{BacktrackRecurse}{$i$, $j$} + \If {$i > 0$ and $j > 0$} + \State $substitute = M[i-1][j-1]$ + \State $delete = M[i-1][j]$ + \State $insert = M[i][j-1]$ + \State $min = \min \{ substitute, delete, insert \}$ + \If {$substitute = min$} + \State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j-1$} + \State $z = \begin{pmatrix} S_{1}[i-1] \\ S_{2}[j-1] \end{pmatrix} \circ z$ + \ElsIf {$delete = min$} + \State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$} + \State $z = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix} \circ z$ + \Else + \State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$} + \State $z = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix} \circ z$ + \EndIf + \ElsIf {$i > 0$} + \State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$} + \State $z = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix} \circ z$ + \ElsIf {$j > 0$} + \State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$} + \State $z = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix} \circ z$ \Else - \State $M[i][j] \gets M[i-1][j]$ - \State $P[i][j] \gets '\downarrow'$ + \State \Return [] \EndIf -\EndIf - \EndFor - \EndFor - \State \Return $M, P$ + \State \Return $z$ + \EndFunction + \Function{Backtrack}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$)} + \State \Return \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $m$, $n$} \EndFunction \end{algorithmic} \end{algorithm} +\begin{algorithm} + \caption{Backtrack all the optimum alignments in a recursive way} + \begin{algorithmic}[1] + \Procedure{Recurse}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$), $i$, $j$} + \If {$i > 0$ and $j > 0$} + \State $substitute = M[i-1][j-1]$ + \State $delete = M[i-1][j]$ + \State $insert = M[i][j-1]$ + \State $min = \min \{ substitute, delete, insert \}$ + \If {$substitute = min$} + \State $value = \begin{pmatrix} S_{1}[i-1] \\ S_{2}[j-1] \end{pmatrix}$ + \State $z' = value \circ z$ + \State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j-1$, $z'$} + \EndIf + \If {$delete = min$} + \State $value = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix}$ + \State $z' = value \circ z$ + \State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$, $z'$} + \EndIf + \If {$insert = min$} + \State $value = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$ + \State $z' = value \circ z$ + \State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$, $z'$} + \EndIf + \ElsIf {$i > 0$} + \State $value = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix}$ + \State $z' = value \circ z$ + \State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$, $z'$} + \ElsIf {$j > 0$} + \State $value = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$ + \State $z' = value \circ z$ + \State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$, $z'$} + \EndIf + \State \Call{print}{$z$} + \EndProcedure + \Procedure{Backtrack}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$)} + \State \Return \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $m$, $n$, []} + \EndProcedure + \end{algorithmic} +\end{algorithm} + +\end{document} + +\end{document} \iffalse - \begin{algorithm} - \caption{Backtrack the longest common subsequence} - \begin{algorithmic}[1] - \Function{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)} - \State $M, P \gets $ \Call{LCSQ\_Matrix}{$S_{1}$, $S_{2}$} - \State $L \gets Array(M[n][m])$ - \State $k \gets 0$ - \State $i \gets n$ - \State $j \gets m$ - \While{$i > 0$ and $j > 0$} - \If {$P[i][j] = '\nwarrow' $} - \State $L[k] \gets S_{1}[i]$ - \State $i--$ - \State $j--$ - \State $k++$ - \ElsIf {$P[i][j] = '\leftarrow'$} - \State $j--$ - \Else - \State $i--$ - \EndIf - \EndWhile - \State \Return $L$ - \EndFunction - \end{algorithmic} - \end{algorithm} - - \begin{algorithm} - \caption{Recursive reconstruction of the longest common subsequence} - \begin{algorithmic}[1] - \Procedure{LCSQ}{$S_{1}$: Array($n$), $S_{2}$: Array($m$)} - \State $M, P \gets $ \Call{LCSQ\_Matrix}{$S_{1}$, $S_{2}$} - \State $i \gets n$ - \State $j \gets m$ - \State \Call{Aux}{$P$, $S_{1}$, $i$, $j$} - \EndProcedure - - \Procedure{Aux}{$P$: Array($n+1$, $m+1$), $S_{1}$: Array($n$), $i$, $j$} - \If {$P[i][j] = '\nwarrow' $} - \State $l \gets S_{1}[i]$ - \State \Call{Aux}{$P$, $S_{1}$, $i-1$, $j-1$} - \State \texttt{print}($l$) - \ElsIf {$P[i][j] = '\leftarrow'$} - \State \Call{Aux}{$P$, $S_{1}$, $i$, $j-1$} - \Else - \State \Call{Aux}{$P$, $S_{1}$, $i-1$, $j$} - \EndIf - \EndProcedure - \end{algorithmic} - \end{algorithm} + \Function{AppendToAll}{$value$, $set$} + \Returns {A new set with all elements from $set$ with value appended first to them } + \State $res = \{\}$ + \For {$element \in set$} + \State $element = value \circ element$ + \State $res = res \cup element$ + \EndFor + \State \Return $res$ + \EndFunction \fi -\end{document} - -\end{document}