Compare commits
No commits in common. "020c1b749745ec1b20e5ba1a9c5f4b540baa397b" and "c9b2710f98891ee2a43f25fdd31b4f65c65418d5" have entirely different histories.
020c1b7497
...
c9b2710f98
|
@ -1,5 +1,5 @@
|
|||
build/
|
||||
**/*.bak*
|
||||
**/.bak*
|
||||
.auctex-auto
|
||||
|
||||
## Core latex/pdflatex auxiliary files:
|
||||
|
|
|
@ -279,11 +279,11 @@ An automaton is a tuple $\langle S, s_{0}, T, \Sigma,f\rangle$
|
|||
\paragraph{Example} Given the language $L$ on the alphabet $\Sigma = \{A, C, T\}$, $L = \{A^{*}, CTT, CA^{*}\}$
|
||||
|
||||
\begin{definition}[Deterministic automaton]
|
||||
An automaton is deterministic, if for each couple $(p, a) \in S \times \Sigma$ it exists at most a state $q$ such as $f(p, a) = q$
|
||||
An automaton is deterministic, if for each couple $(p, a) \in S \times \Sigma$ it exists at most a state $q$ such as $f(p, q) = q$
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}[Complete automaton]
|
||||
An automaton is complete, if for each couple $(p, a) \in S \times \Sigma$ it exists at least a state $q$ such as $f(p, a) = q$.
|
||||
An automaton is complete, if for each couple $(p, a) \in S \times \Sigma$ it exists at least a state $q$ such as $f(p, q) = q$.
|
||||
\end{definition}
|
||||
|
||||
\begin{algorithm}
|
||||
|
@ -439,4 +439,4 @@ each state to the initial state whenever we encounter an unknown letter.
|
|||
\EndIf
|
||||
\EndFunction
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
\end{algorithm}
|
|
@ -1,6 +1,5 @@
|
|||
\chapter{Sequence alignment}
|
||||
|
||||
\iffalse
|
||||
\begin{algorithm}
|
||||
\caption{Needleman-Wunsch Algorithm}
|
||||
\begin{algorithmic}[1]
|
||||
|
@ -78,12 +77,10 @@
|
|||
\State \Call{FillMatrix}{$S_{1}$, $S_{2}$}
|
||||
\State \Call{ShowAlignment}{$S_{1}$, $S_{2}$}
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
|
||||
\fi
|
||||
\end{algorithm}
|
||||
|
||||
\begin{algorithm}
|
||||
\caption{Needleman-Wunsch Algorithm, Build the matrix}
|
||||
\caption{Needleman-Wunsch Algorithm, using proper notation }
|
||||
\begin{algorithmic}[1]
|
||||
\Procedure{FillMatrix}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)}
|
||||
\State $M = $ Array($m+1$, $n+1$)
|
||||
|
@ -109,7 +106,7 @@
|
|||
\end{algorithm}
|
||||
|
||||
\begin{algorithm}
|
||||
\caption{Needleman-Wunsch Algorithm, reconstruct the alignment}
|
||||
\caption{Needleman-Wunsch Algorithm, using proper notation (Backtrack)}
|
||||
\begin{algorithmic}[1]
|
||||
\Procedure{BacktrackAlignment}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)}
|
||||
\State $alignment = LinkedList$
|
||||
|
@ -152,10 +149,14 @@
|
|||
\State $S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$),
|
||||
\Function{BacktrackRecurse}{$i$, $j$}
|
||||
\If {$i > 0$ and $j > 0$}
|
||||
\If {$M[i-1][j-1] = M[i][j] - sub(S_{1}[i-1], S_{2}[j-1])$}
|
||||
\State $substitute = M[i-1][j-1]$
|
||||
\State $delete = M[i-1][j]$
|
||||
\State $insert = M[i][j-1]$
|
||||
\State $min = \min \{ substitute, delete, insert \}$
|
||||
\If {$substitute = min$}
|
||||
\State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j-1$}
|
||||
\State $z = \begin{pmatrix} S_{1}[i-1] \\ S_{2}[j-1] \end{pmatrix} \circ z$
|
||||
\ElsIf {$M[i-1][j] + gap\_penalty = M[i][j]$}
|
||||
\ElsIf {$delete = min$}
|
||||
\State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$}
|
||||
\State $z = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix} \circ z$
|
||||
\Else
|
||||
|
@ -171,9 +172,7 @@
|
|||
\Else
|
||||
\State \Return []
|
||||
\EndIf
|
||||
\Else
|
||||
\State \Return $z$
|
||||
\EndIf
|
||||
\EndFunction
|
||||
\Function{Backtrack}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$)}
|
||||
\State \Return \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $m$, $n$}
|
||||
|
@ -186,17 +185,21 @@
|
|||
\begin{algorithmic}[1]
|
||||
\Procedure{BacktrackRecurse}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$), $i$, $j$}
|
||||
\If {$i > 0$ and $j > 0$}
|
||||
\If {$M[i-1][j-1] = M[i][j] - sub(S_{1}[i-1], S_{2}[j-1])$}
|
||||
\State $substitute = M[i-1][j-1]$
|
||||
\State $delete = M[i-1][j]$
|
||||
\State $insert = M[i][j-1]$
|
||||
\State $min = \min \{ substitute, delete, insert \}$
|
||||
\If {$substitute = min$}
|
||||
\State $value = \begin{pmatrix} S_{1}[i-1] \\ S_{2}[j-1] \end{pmatrix}$
|
||||
\State $z' = value \circ z$
|
||||
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j-1$, $z'$}
|
||||
\EndIf
|
||||
\If {$M[i-1][j] + gap\_penalty = M[i][j]$}
|
||||
\If {$delete = min$}
|
||||
\State $value = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix}$
|
||||
\State $z' = value \circ z$
|
||||
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$, $z'$}
|
||||
\EndIf
|
||||
\If {$M[i][j-1] + gap\_penalty = M[i][j]$}
|
||||
\If {$insert = min$}
|
||||
\State $value = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$
|
||||
\State $z' = value \circ z$
|
||||
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$, $z'$}
|
||||
|
@ -209,20 +212,11 @@
|
|||
\State $value = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$
|
||||
\State $z' = value \circ z$
|
||||
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$, $z'$}
|
||||
\Else
|
||||
\State \Call{print}{$z$}
|
||||
\EndIf
|
||||
\State \Call{print}{$z$}
|
||||
\EndProcedure
|
||||
\Procedure{Backtrack}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$)}
|
||||
\State \Return \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $m$, $n$, []}
|
||||
\EndProcedure
|
||||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
|
||||
|
||||
|
||||
\begin{figure}
|
||||
\centering
|
||||
\includegraphics{figures/part2/needle.pdf}
|
||||
\caption{Needleman-Wunsch global alignment matrix with an example of optimal path.}
|
||||
\end{figure}
|
||||
|
|
|
@ -1,105 +0,0 @@
|
|||
needle = require("./needle")
|
||||
|
||||
function table.shallow_copy(t)
|
||||
local t2 = {}
|
||||
for k,v in pairs(t) do
|
||||
t2[k] = v
|
||||
end
|
||||
return t2
|
||||
end
|
||||
|
||||
function multiple_path_backtrack_trace(matrix, seq1, seq2)
|
||||
local stack = {}
|
||||
local m=string.len(seq1)
|
||||
local n=string.len(seq2)
|
||||
local i=m
|
||||
local j=n
|
||||
table.insert(stack, 1, {i, j, {}})
|
||||
local trace = {}
|
||||
while #stack ~= 0 do
|
||||
local state = table.remove(stack, 1)
|
||||
table.insert(trace, #trace+1, state)
|
||||
local i=state[1]
|
||||
local j=state[2]
|
||||
local alignment = state[3]
|
||||
if (i > 0 and j > 0) then
|
||||
local nt1 = string.sub(seq1, i-1, i-1)
|
||||
local nt2 = string.sub(seq2, j-1, j-1)
|
||||
if (matrix[i][j] == matrix[i-1][j-1] + needle.sub(nt1, nt2)) then
|
||||
local new_alignment = table.shallow_copy(alignment)
|
||||
table.insert(new_alignment, 1, {nt1, nt2})
|
||||
table.insert(stack, 1, {i - 1, j - 1, new_alignment})
|
||||
end
|
||||
if (matrix[i][j] == matrix[i-1][j] + needle.gap_penalty) then
|
||||
local new_alignment = table.shallow_copy(alignment)
|
||||
table.insert(new_alignment, 1, {nt1, '-'})
|
||||
table.insert(stack, 1, {i-1, j, new_alignment})
|
||||
end
|
||||
if (matrix[i][j] == matrix[i][j-1] + needle.gap_penalty) then
|
||||
local new_alignment = table.shallow_copy(alignment)
|
||||
table.insert(new_alignment, 1, {'-', nt2})
|
||||
table.insert(stack, 1, {i, j-1, new_alignment})
|
||||
end
|
||||
end
|
||||
if (i > 0) then
|
||||
local nt1 = string.sub(seq1, i-1, i-1)
|
||||
local new_alignment = table.shallow_copy(alignment)
|
||||
table.insert(new_alignment, 1, {nt1, '-'})
|
||||
table.insert(stack, 1, {i-1, j, new_alignment})
|
||||
end
|
||||
if (j > 0) then
|
||||
local nt2 = string.sub(seq2, j-1, j-1)
|
||||
local new_alignment = table.shallow_copy(alignment)
|
||||
table.insert(new_alignment, 1, {'-', nt2})
|
||||
table.insert(stack, 1, {i, j-1, new_alignment})
|
||||
end
|
||||
end
|
||||
return trace
|
||||
end
|
||||
|
||||
function repr_alignment(alignment)
|
||||
local repr = [[\begin{pmatrix}]]
|
||||
for i, vector in ipairs(alignment) do
|
||||
repr = repr .. vector[1]
|
||||
if i < #alignment then
|
||||
repr = repr .. " & "
|
||||
end
|
||||
end
|
||||
repr = repr .. [[\\]] .. " \n"
|
||||
for i, vector in ipairs(alignment) do
|
||||
repr = repr .. vector[2]
|
||||
if i < #alignment then
|
||||
repr = repr .. " & "
|
||||
end
|
||||
end
|
||||
repr = repr .. [[\end{pmatrix}]]
|
||||
return repr
|
||||
end
|
||||
|
||||
function trace_repr(trace)
|
||||
local repr = ""
|
||||
-- for stack_index, stack in ipairs(trace) do
|
||||
-- repr = repr .. "iteration " .. stack_index .. " :" .. [[\\]]
|
||||
repr = repr .. [[\begin{tabular}{|c|} \\ \hline ]]
|
||||
for call_index, call in ipairs(trace) do
|
||||
local i = call[1]
|
||||
local j = call[2]
|
||||
local aligment = call[3]
|
||||
repr = repr .. [[ $\langle ]] .. i ..", " .. j .. ", " .. repr_alignment(alignment).. [[\rangle$ ]]
|
||||
repr = repr .. [[\\ \hline]]
|
||||
end
|
||||
repr = repr .. [[\end{tabular}]]
|
||||
-- end
|
||||
return repr
|
||||
end
|
||||
|
||||
function main()
|
||||
local seq1 = "ATCTGAT"
|
||||
local seq2 = "TGCATA"
|
||||
local matrix = needle.needle_matrix(seq1, seq2)
|
||||
local trace = multiple_path_backtrack_trace(matrix, seq1, seq2)
|
||||
print(#trace)
|
||||
print(trace_repr(trace))
|
||||
end
|
||||
|
||||
main()
|
|
@ -1,197 +0,0 @@
|
|||
gap_penalty = 1
|
||||
mismatch_penalty = 1
|
||||
match_penalty = 0
|
||||
|
||||
function needle_matrix(seq1, seq2)
|
||||
|
||||
local n1 = string.len(seq1)
|
||||
local n2 = string.len(seq2)
|
||||
-- Create a n1 x n2 matrix
|
||||
local matrix = {}
|
||||
for i=0,n1 do
|
||||
matrix[i] = {}
|
||||
for j=0,n2 do
|
||||
matrix[i][j] = 0
|
||||
end
|
||||
end
|
||||
-- Fill first row and first column
|
||||
for i=1,n1 do
|
||||
matrix[i][0] = i * gap_penalty
|
||||
end
|
||||
for i=1,n2 do
|
||||
matrix[0][i] = i * gap_penalty
|
||||
end
|
||||
-- Fill the rest of the matrix
|
||||
local match, delete, insert
|
||||
for i=1,n1 do
|
||||
for j=1,n2 do
|
||||
if string.sub(seq1, i, i) == string.sub(seq2, j, j) then
|
||||
match = matrix[i-1][j-1] + match_penalty
|
||||
else
|
||||
match = matrix[i-1][j-1] + mismatch_penalty
|
||||
end
|
||||
delete = matrix[i-1][j] + gap_penalty
|
||||
insert = matrix[i][j-1] + gap_penalty
|
||||
matrix[i][j] = math.min(match, delete, insert)
|
||||
end
|
||||
end
|
||||
return matrix
|
||||
end
|
||||
|
||||
function draw_needle_matrix(seq1, seq2)
|
||||
-- tex.print(string.format(" Path: %s -> %s", seq1, seq2))
|
||||
matrix = needle_matrix(seq1, seq2)
|
||||
n1 = string.len(seq1)
|
||||
n2 = string.len(seq2)
|
||||
-- Draw the matrix as tikz nodes
|
||||
for i=0,n1-1 do
|
||||
for j=0,n2-1 do
|
||||
tex.print(string.format("\\node[draw, minimum width=1cm, minimum height=1cm] at (%d, -%d) {};", i, j, matrix[i][j]))
|
||||
end
|
||||
end
|
||||
-- Draw the sequence labels
|
||||
for i=1,n1 do
|
||||
tex.print(string.format("\\node at (%d, -%d) {%s};", i-1, -1, string.sub(seq1, i, i)))
|
||||
end
|
||||
for i=1,n2 do
|
||||
tex.print(string.format("\\node at (%d, -%d) {%s};", -1, i-1, string.sub(seq2, i, i)))
|
||||
end
|
||||
-- Add a path from the bottom right corner to the top left corner, following the minimum of the three possible moves at each step
|
||||
local i, j, value, previous_value
|
||||
i = n1-1
|
||||
j = n2-1
|
||||
tex.print(string.format("\\draw[-,line width=2, gray] (%d, -%d) --", i, j))
|
||||
while i > 0 and j > 0 do
|
||||
value = math.min(matrix[i-1][j-1], table[i-1][j], table[i][j-1])
|
||||
if value == matrix[i-1][j-1] then
|
||||
i = i - 1
|
||||
j = j - 1
|
||||
elseif value == matrix[i-1][j] then
|
||||
i = i - 1
|
||||
else
|
||||
j = j - 1
|
||||
end
|
||||
tex.print(string.format(" (%d, -%d) -- ", i, j))
|
||||
end
|
||||
tex.print(string.format("(0, 0) -- (-1, 1);", i, j))
|
||||
end
|
||||
|
||||
local function has_value (tab, val)
|
||||
for index, value in ipairs(tab) do
|
||||
if value == val then
|
||||
return true
|
||||
end
|
||||
end
|
||||
|
||||
return false
|
||||
end
|
||||
|
||||
function sub(a, b)
|
||||
if (a==b) then
|
||||
return match_penalty
|
||||
else
|
||||
return mismatch_penalty
|
||||
end
|
||||
end
|
||||
|
||||
-- Returns true if it could have passed from k, l, to i, j
|
||||
-- during dynamic programming matrix building
|
||||
function check_path(matrix, i, j, k, l, seq1, seq2)
|
||||
-- diagonal
|
||||
if ((i == k + 1) and (j == l + 1)) then
|
||||
if (matrix[i][j] == matrix[k][l] + sub(string.sub(seq1, i, i), string.sub(seq2, j, j))) then
|
||||
return true
|
||||
end
|
||||
elseif (matrix[i][j] == matrix[k][l] + 1) then
|
||||
return true
|
||||
end
|
||||
return false
|
||||
end
|
||||
|
||||
function draw_needle_matrix_graph(seq1, seq2)
|
||||
local matrix = needle_matrix(seq1, seq2)
|
||||
local tikz_code = ""
|
||||
function coordinate(i, j)
|
||||
return i .. "_" .. j
|
||||
end
|
||||
local steps = {
|
||||
{-1, -1},
|
||||
{-1, 0},
|
||||
{0, -1}
|
||||
}
|
||||
|
||||
local n1 = string.len(seq1)
|
||||
local n2 = string.len(seq2)
|
||||
local path = {}
|
||||
local i = n1
|
||||
local j = n2
|
||||
while i >= 0 and j >= 0 do
|
||||
path[#path+1] = coordinate(i, j)
|
||||
local min = matrix[i][j]
|
||||
local min_step = steps[1]
|
||||
for index, step in ipairs(steps) do
|
||||
local k = i + step[1]
|
||||
local l = j + step[2]
|
||||
if k >= 0 and l >= 0 and check_path(matrix, i, j, k, l, seq1, seq2) then
|
||||
min_step = step
|
||||
min = matrix[k][l]
|
||||
break
|
||||
end
|
||||
end
|
||||
i = i + min_step[1]
|
||||
j = j + min_step[2]
|
||||
-- print(i, j)
|
||||
end
|
||||
-- Draw the matrix as tikz node with matrix value
|
||||
for i=0,n1 do
|
||||
for j=0,n2 do
|
||||
local options = ""
|
||||
if has_value(path, coordinate(i, j)) then
|
||||
options = "[fill=gray, draw, minimum size=1]"
|
||||
end
|
||||
tikz_code = tikz_code .. "\\node" .. options .. " (" .. coordinate(i, j) .. ") at (" .. i .. ", " .. -j .. ")" .. " {" .. matrix[i][j] .. "};"
|
||||
end
|
||||
end
|
||||
-- Add nucleotide labels
|
||||
for i=1,n1 do
|
||||
local nt = string.sub(seq1, i, i)
|
||||
tikz_code = tikz_code .. "\\node at (".. i .. "," .. 1 .. ")" .. "{$" .. nt .."$};"
|
||||
end
|
||||
for i=1,n2 do
|
||||
local nt = string.sub(seq2, i, i)
|
||||
tikz_code = tikz_code .. "\\node at (" .. -1 .. ", " .. -i .. ")" .. "{$ ".. nt .."$};"
|
||||
end
|
||||
-- For seq2
|
||||
for i=0,n1 do
|
||||
for j=0,n2 do
|
||||
local min = math.huge
|
||||
for index, step in ipairs(steps) do
|
||||
local k = i + step[1]
|
||||
local l = j + step[2]
|
||||
if k >= 0 and l >= 0 and matrix[k][l] < min then
|
||||
min = matrix[k][l]
|
||||
end
|
||||
end
|
||||
-- local highlighted = false
|
||||
for index, step in ipairs(steps) do
|
||||
local k = i + step[1]
|
||||
local l = j + step[2]
|
||||
if k >= 0 and l >= 0 and check_path(matrix, i, j, k, l, seq1, seq2) then
|
||||
tikz_code = tikz_code .. "\\draw[->] (" .. coordinate(i, j) .. ")" .. " -- " .. "(" .. coordinate (k, l) .. ");"
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
return tikz_code
|
||||
end
|
||||
|
||||
-- print(draw_needle_matrix_graph("ATGC", "TAGCGA"))
|
||||
|
||||
return {
|
||||
draw=draw_needle_matrix_graph,
|
||||
gap_penalty=gap_penalty,
|
||||
mismatch_penalty=mismatch_penalty,
|
||||
match_penalty=match_penalty,
|
||||
needle_matrix=needle_matrix,
|
||||
sub=sub
|
||||
}
|
BIN
figures/part2/needle.pdf (Stored with Git LFS)
BIN
figures/part2/needle.pdf (Stored with Git LFS)
Binary file not shown.
|
@ -1,20 +0,0 @@
|
|||
\documentclass[tikz]{standalone}
|
||||
|
||||
\usepackage{luacode}
|
||||
\usepackage{tikz}
|
||||
|
||||
\begin{document}
|
||||
|
||||
\begin{tikzpicture}
|
||||
\newcommand{\seqone}{TGCATA}
|
||||
\newcommand{\seqtwo}{ATCTGAT}
|
||||
\begin{luacode}
|
||||
local needle = require("needle")
|
||||
seq1 = \luastring{\seqone}
|
||||
seq2 = \luastring{\seqtwo}
|
||||
local tikz_code = needle.draw(seq1, seq2)
|
||||
tex.print(tikz_code)
|
||||
\end{luacode}
|
||||
\end{tikzpicture}
|
||||
|
||||
\end{document}
|
Loading…
Reference in New Issue