Compare commits

..

3 Commits

Author SHA1 Message Date
Samuel Ortion 020c1b7497 fix: Definition on automata 2024-04-02 15:05:56 +02:00
Samuel Ortion 80f4669d23 fix: Needleman-Wunsch backtrack was faulty 2024-04-02 14:51:44 +02:00
Samuel Ortion 1953baa198 fix: Use correct backtrack algo
Co-authored-by: Naïa Périnelle <naia.perinelle@laposte.net>
2024-03-29 10:05:13 +01:00
8 changed files with 354 additions and 23 deletions

2
.gitignore vendored
View File

@ -1,5 +1,5 @@
build/
**/.bak*
**/*.bak*
.auctex-auto
## Core latex/pdflatex auxiliary files:

View File

@ -279,11 +279,11 @@ An automaton is a tuple $\langle S, s_{0}, T, \Sigma,f\rangle$
\paragraph{Example} Given the language $L$ on the alphabet $\Sigma = \{A, C, T\}$, $L = \{A^{*}, CTT, CA^{*}\}$
\begin{definition}[Deterministic automaton]
An automaton is deterministic, if for each couple $(p, a) \in S \times \Sigma$ it exists at most a state $q$ such as $f(p, q) = q$
An automaton is deterministic, if for each couple $(p, a) \in S \times \Sigma$ it exists at most a state $q$ such as $f(p, a) = q$
\end{definition}
\begin{definition}[Complete automaton]
An automaton is complete, if for each couple $(p, a) \in S \times \Sigma$ it exists at least a state $q$ such as $f(p, q) = q$.
An automaton is complete, if for each couple $(p, a) \in S \times \Sigma$ it exists at least a state $q$ such as $f(p, a) = q$.
\end{definition}
\begin{algorithm}

View File

@ -1,5 +1,6 @@
\chapter{Sequence alignment}
\iffalse
\begin{algorithm}
\caption{Needleman-Wunsch Algorithm}
\begin{algorithmic}[1]
@ -79,8 +80,10 @@
\end{algorithmic}
\end{algorithm}
\fi
\begin{algorithm}
\caption{Needleman-Wunsch Algorithm, using proper notation }
\caption{Needleman-Wunsch Algorithm, Build the matrix}
\begin{algorithmic}[1]
\Procedure{FillMatrix}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)}
\State $M = $ Array($m+1$, $n+1$)
@ -106,7 +109,7 @@
\end{algorithm}
\begin{algorithm}
\caption{Needleman-Wunsch Algorithm, using proper notation (Backtrack)}
\caption{Needleman-Wunsch Algorithm, reconstruct the alignment}
\begin{algorithmic}[1]
\Procedure{BacktrackAlignment}{$S_{1}$: Array($m$), $S_{2}$: Array($n$)}
\State $alignment = LinkedList$
@ -149,14 +152,10 @@
\State $S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$),
\Function{BacktrackRecurse}{$i$, $j$}
\If {$i > 0$ and $j > 0$}
\State $substitute = M[i-1][j-1]$
\State $delete = M[i-1][j]$
\State $insert = M[i][j-1]$
\State $min = \min \{ substitute, delete, insert \}$
\If {$substitute = min$}
\If {$M[i-1][j-1] = M[i][j] - sub(S_{1}[i-1], S_{2}[j-1])$}
\State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j-1$}
\State $z = \begin{pmatrix} S_{1}[i-1] \\ S_{2}[j-1] \end{pmatrix} \circ z$
\ElsIf {$delete = min$}
\ElsIf {$M[i-1][j] + gap\_penalty = M[i][j]$}
\State $z = $ \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$}
\State $z = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix} \circ z$
\Else
@ -172,7 +171,9 @@
\Else
\State \Return []
\EndIf
\Else
\State \Return $z$
\EndIf
\EndFunction
\Function{Backtrack}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$)}
\State \Return \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $m$, $n$}
@ -185,21 +186,17 @@
\begin{algorithmic}[1]
\Procedure{BacktrackRecurse}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$), $i$, $j$}
\If {$i > 0$ and $j > 0$}
\State $substitute = M[i-1][j-1]$
\State $delete = M[i-1][j]$
\State $insert = M[i][j-1]$
\State $min = \min \{ substitute, delete, insert \}$
\If {$substitute = min$}
\If {$M[i-1][j-1] = M[i][j] - sub(S_{1}[i-1], S_{2}[j-1])$}
\State $value = \begin{pmatrix} S_{1}[i-1] \\ S_{2}[j-1] \end{pmatrix}$
\State $z' = value \circ z$
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j-1$, $z'$}
\EndIf
\If {$delete = min$}
\If {$M[i-1][j] + gap\_penalty = M[i][j]$}
\State $value = \begin{pmatrix} S_{1}[i-1] \\ \varepsilon \end{pmatrix}$
\State $z' = value \circ z$
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i-1$, $j$, $z'$}
\EndIf
\If {$insert = min$}
\If {$M[i][j-1] + gap\_penalty = M[i][j]$}
\State $value = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$
\State $z' = value \circ z$
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$, $z'$}
@ -212,11 +209,20 @@
\State $value = \begin{pmatrix} \varepsilon \\ S_{2}[j-1] \end{pmatrix}$
\State $z' = value \circ z$
\State \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $i$, $j-1$, $z'$}
\EndIf
\Else
\State \Call{print}{$z$}
\EndIf
\EndProcedure
\Procedure{Backtrack}{$S_{1}$: Array($m$), $S_{2}$: Array($n$), $M$: Array($m+1$, $n+1$)}
\State \Return \Call{BacktrackRecurse}{$S_{1}$, $S_{2}$, $M$, $m$, $n$, []}
\EndProcedure
\end{algorithmic}
\end{algorithm}
\begin{figure}
\centering
\includegraphics{figures/part2/needle.pdf}
\caption{Needleman-Wunsch global alignment matrix with an example of optimal path.}
\end{figure}

View File

@ -0,0 +1,105 @@
needle = require("./needle")
function table.shallow_copy(t)
local t2 = {}
for k,v in pairs(t) do
t2[k] = v
end
return t2
end
function multiple_path_backtrack_trace(matrix, seq1, seq2)
local stack = {}
local m=string.len(seq1)
local n=string.len(seq2)
local i=m
local j=n
table.insert(stack, 1, {i, j, {}})
local trace = {}
while #stack ~= 0 do
local state = table.remove(stack, 1)
table.insert(trace, #trace+1, state)
local i=state[1]
local j=state[2]
local alignment = state[3]
if (i > 0 and j > 0) then
local nt1 = string.sub(seq1, i-1, i-1)
local nt2 = string.sub(seq2, j-1, j-1)
if (matrix[i][j] == matrix[i-1][j-1] + needle.sub(nt1, nt2)) then
local new_alignment = table.shallow_copy(alignment)
table.insert(new_alignment, 1, {nt1, nt2})
table.insert(stack, 1, {i - 1, j - 1, new_alignment})
end
if (matrix[i][j] == matrix[i-1][j] + needle.gap_penalty) then
local new_alignment = table.shallow_copy(alignment)
table.insert(new_alignment, 1, {nt1, '-'})
table.insert(stack, 1, {i-1, j, new_alignment})
end
if (matrix[i][j] == matrix[i][j-1] + needle.gap_penalty) then
local new_alignment = table.shallow_copy(alignment)
table.insert(new_alignment, 1, {'-', nt2})
table.insert(stack, 1, {i, j-1, new_alignment})
end
end
if (i > 0) then
local nt1 = string.sub(seq1, i-1, i-1)
local new_alignment = table.shallow_copy(alignment)
table.insert(new_alignment, 1, {nt1, '-'})
table.insert(stack, 1, {i-1, j, new_alignment})
end
if (j > 0) then
local nt2 = string.sub(seq2, j-1, j-1)
local new_alignment = table.shallow_copy(alignment)
table.insert(new_alignment, 1, {'-', nt2})
table.insert(stack, 1, {i, j-1, new_alignment})
end
end
return trace
end
function repr_alignment(alignment)
local repr = [[\begin{pmatrix}]]
for i, vector in ipairs(alignment) do
repr = repr .. vector[1]
if i < #alignment then
repr = repr .. " & "
end
end
repr = repr .. [[\\]] .. " \n"
for i, vector in ipairs(alignment) do
repr = repr .. vector[2]
if i < #alignment then
repr = repr .. " & "
end
end
repr = repr .. [[\end{pmatrix}]]
return repr
end
function trace_repr(trace)
local repr = ""
-- for stack_index, stack in ipairs(trace) do
-- repr = repr .. "iteration " .. stack_index .. " :" .. [[\\]]
repr = repr .. [[\begin{tabular}{|c|} \\ \hline ]]
for call_index, call in ipairs(trace) do
local i = call[1]
local j = call[2]
local aligment = call[3]
repr = repr .. [[ $\langle ]] .. i ..", " .. j .. ", " .. repr_alignment(alignment).. [[\rangle$ ]]
repr = repr .. [[\\ \hline]]
end
repr = repr .. [[\end{tabular}]]
-- end
return repr
end
function main()
local seq1 = "ATCTGAT"
local seq2 = "TGCATA"
local matrix = needle.needle_matrix(seq1, seq2)
local trace = multiple_path_backtrack_trace(matrix, seq1, seq2)
print(#trace)
print(trace_repr(trace))
end
main()

197
figures/part2/needle.lua Executable file
View File

@ -0,0 +1,197 @@
gap_penalty = 1
mismatch_penalty = 1
match_penalty = 0
function needle_matrix(seq1, seq2)
local n1 = string.len(seq1)
local n2 = string.len(seq2)
-- Create a n1 x n2 matrix
local matrix = {}
for i=0,n1 do
matrix[i] = {}
for j=0,n2 do
matrix[i][j] = 0
end
end
-- Fill first row and first column
for i=1,n1 do
matrix[i][0] = i * gap_penalty
end
for i=1,n2 do
matrix[0][i] = i * gap_penalty
end
-- Fill the rest of the matrix
local match, delete, insert
for i=1,n1 do
for j=1,n2 do
if string.sub(seq1, i, i) == string.sub(seq2, j, j) then
match = matrix[i-1][j-1] + match_penalty
else
match = matrix[i-1][j-1] + mismatch_penalty
end
delete = matrix[i-1][j] + gap_penalty
insert = matrix[i][j-1] + gap_penalty
matrix[i][j] = math.min(match, delete, insert)
end
end
return matrix
end
function draw_needle_matrix(seq1, seq2)
-- tex.print(string.format(" Path: %s -> %s", seq1, seq2))
matrix = needle_matrix(seq1, seq2)
n1 = string.len(seq1)
n2 = string.len(seq2)
-- Draw the matrix as tikz nodes
for i=0,n1-1 do
for j=0,n2-1 do
tex.print(string.format("\\node[draw, minimum width=1cm, minimum height=1cm] at (%d, -%d) {};", i, j, matrix[i][j]))
end
end
-- Draw the sequence labels
for i=1,n1 do
tex.print(string.format("\\node at (%d, -%d) {%s};", i-1, -1, string.sub(seq1, i, i)))
end
for i=1,n2 do
tex.print(string.format("\\node at (%d, -%d) {%s};", -1, i-1, string.sub(seq2, i, i)))
end
-- Add a path from the bottom right corner to the top left corner, following the minimum of the three possible moves at each step
local i, j, value, previous_value
i = n1-1
j = n2-1
tex.print(string.format("\\draw[-,line width=2, gray] (%d, -%d) --", i, j))
while i > 0 and j > 0 do
value = math.min(matrix[i-1][j-1], table[i-1][j], table[i][j-1])
if value == matrix[i-1][j-1] then
i = i - 1
j = j - 1
elseif value == matrix[i-1][j] then
i = i - 1
else
j = j - 1
end
tex.print(string.format(" (%d, -%d) -- ", i, j))
end
tex.print(string.format("(0, 0) -- (-1, 1);", i, j))
end
local function has_value (tab, val)
for index, value in ipairs(tab) do
if value == val then
return true
end
end
return false
end
function sub(a, b)
if (a==b) then
return match_penalty
else
return mismatch_penalty
end
end
-- Returns true if it could have passed from k, l, to i, j
-- during dynamic programming matrix building
function check_path(matrix, i, j, k, l, seq1, seq2)
-- diagonal
if ((i == k + 1) and (j == l + 1)) then
if (matrix[i][j] == matrix[k][l] + sub(string.sub(seq1, i, i), string.sub(seq2, j, j))) then
return true
end
elseif (matrix[i][j] == matrix[k][l] + 1) then
return true
end
return false
end
function draw_needle_matrix_graph(seq1, seq2)
local matrix = needle_matrix(seq1, seq2)
local tikz_code = ""
function coordinate(i, j)
return i .. "_" .. j
end
local steps = {
{-1, -1},
{-1, 0},
{0, -1}
}
local n1 = string.len(seq1)
local n2 = string.len(seq2)
local path = {}
local i = n1
local j = n2
while i >= 0 and j >= 0 do
path[#path+1] = coordinate(i, j)
local min = matrix[i][j]
local min_step = steps[1]
for index, step in ipairs(steps) do
local k = i + step[1]
local l = j + step[2]
if k >= 0 and l >= 0 and check_path(matrix, i, j, k, l, seq1, seq2) then
min_step = step
min = matrix[k][l]
break
end
end
i = i + min_step[1]
j = j + min_step[2]
-- print(i, j)
end
-- Draw the matrix as tikz node with matrix value
for i=0,n1 do
for j=0,n2 do
local options = ""
if has_value(path, coordinate(i, j)) then
options = "[fill=gray, draw, minimum size=1]"
end
tikz_code = tikz_code .. "\\node" .. options .. " (" .. coordinate(i, j) .. ") at (" .. i .. ", " .. -j .. ")" .. " {" .. matrix[i][j] .. "};"
end
end
-- Add nucleotide labels
for i=1,n1 do
local nt = string.sub(seq1, i, i)
tikz_code = tikz_code .. "\\node at (".. i .. "," .. 1 .. ")" .. "{$" .. nt .."$};"
end
for i=1,n2 do
local nt = string.sub(seq2, i, i)
tikz_code = tikz_code .. "\\node at (" .. -1 .. ", " .. -i .. ")" .. "{$ ".. nt .."$};"
end
-- For seq2
for i=0,n1 do
for j=0,n2 do
local min = math.huge
for index, step in ipairs(steps) do
local k = i + step[1]
local l = j + step[2]
if k >= 0 and l >= 0 and matrix[k][l] < min then
min = matrix[k][l]
end
end
-- local highlighted = false
for index, step in ipairs(steps) do
local k = i + step[1]
local l = j + step[2]
if k >= 0 and l >= 0 and check_path(matrix, i, j, k, l, seq1, seq2) then
tikz_code = tikz_code .. "\\draw[->] (" .. coordinate(i, j) .. ")" .. " -- " .. "(" .. coordinate (k, l) .. ");"
end
end
end
end
return tikz_code
end
-- print(draw_needle_matrix_graph("ATGC", "TAGCGA"))
return {
draw=draw_needle_matrix_graph,
gap_penalty=gap_penalty,
mismatch_penalty=mismatch_penalty,
match_penalty=match_penalty,
needle_matrix=needle_matrix,
sub=sub
}

BIN
figures/part2/needle.pdf (Stored with Git LFS) Normal file

Binary file not shown.

20
figures/part2/needle.tex Executable file
View File

@ -0,0 +1,20 @@
\documentclass[tikz]{standalone}
\usepackage{luacode}
\usepackage{tikz}
\begin{document}
\begin{tikzpicture}
\newcommand{\seqone}{TGCATA}
\newcommand{\seqtwo}{ATCTGAT}
\begin{luacode}
local needle = require("needle")
seq1 = \luastring{\seqone}
seq2 = \luastring{\seqtwo}
local tikz_code = needle.draw(seq1, seq2)
tex.print(tikz_code)
\end{luacode}
\end{tikzpicture}
\end{document}

BIN
main.pdf (Stored with Git LFS)

Binary file not shown.