From 1953baa1980d7b5cc8881927cc4db00799e957c6 Mon Sep 17 00:00:00 2001 From: Samuel Ortion Date: Fri, 29 Mar 2024 10:05:13 +0100 Subject: [PATCH] fix: Use correct backtrack algo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Naïa Périnelle --- figures/part2/needle-backtrack-stack.lua | 22 +++ figures/part2/needle.lua | 195 +++++++++++++++++++++++ figures/part2/needle.pdf | 3 + figures/part2/needle.tex | 20 +++ 4 files changed, 240 insertions(+) create mode 100644 figures/part2/needle-backtrack-stack.lua create mode 100755 figures/part2/needle.lua create mode 100644 figures/part2/needle.pdf create mode 100755 figures/part2/needle.tex diff --git a/figures/part2/needle-backtrack-stack.lua b/figures/part2/needle-backtrack-stack.lua new file mode 100644 index 0000000..e23e2fd --- /dev/null +++ b/figures/part2/needle-backtrack-stack.lua @@ -0,0 +1,22 @@ +needle = require("./needle") + +function multiple_path_backtrack_trace(matrix, seq1, seq2) + local stack = {} + local m=string.len(seq1) + local n=string.len(seq2) + local i=m + local j=n + table.insert(1, {i, j, nil}) + while #stack ~= 0 do + local state = table.remove(stack, 1) + local i=state[1] + local j=state[2] + local alignment = state[3] + end +end + +function main() + local seq1 = "ATCTGAT" + local seq2 = "TGCATA" + local matrix = needle.needle_matrix(seq1, seq2) +end diff --git a/figures/part2/needle.lua b/figures/part2/needle.lua new file mode 100755 index 0000000..c819bce --- /dev/null +++ b/figures/part2/needle.lua @@ -0,0 +1,195 @@ +gap_penalty = 1 +mismatch_penalty = 1 +match_penalty = 0 + +function needle_matrix(seq1, seq2) + + local n1 = string.len(seq1) + local n2 = string.len(seq2) + -- Create a n1 x n2 matrix + local matrix = {} + for i=0,n1 do + matrix[i] = {} + for j=0,n2 do + matrix[i][j] = 0 + end + end + -- Fill first row and first column + for i=1,n1 do + matrix[i][0] = i * gap_penalty + end + for i=1,n2 do + matrix[0][i] = i * gap_penalty + end + -- Fill the rest of the matrix + local match, delete, insert + for i=1,n1 do + for j=1,n2 do + if string.sub(seq1, i, i) == string.sub(seq2, j, j) then + match = matrix[i-1][j-1] + match_penalty + else + match = matrix[i-1][j-1] + mismatch_penalty + end + delete = matrix[i-1][j] + gap_penalty + insert = matrix[i][j-1] + gap_penalty + matrix[i][j] = math.min(match, delete, insert) + end + end + return matrix +end + +function draw_needle_matrix(seq1, seq2) + -- tex.print(string.format(" Path: %s -> %s", seq1, seq2)) + matrix = needle_matrix(seq1, seq2) + n1 = string.len(seq1) + n2 = string.len(seq2) + -- Draw the matrix as tikz nodes + for i=0,n1-1 do + for j=0,n2-1 do + tex.print(string.format("\\node[draw, minimum width=1cm, minimum height=1cm] at (%d, -%d) {};", i, j, matrix[i][j])) + end + end + -- Draw the sequence labels + for i=1,n1 do + tex.print(string.format("\\node at (%d, -%d) {%s};", i-1, -1, string.sub(seq1, i, i))) + end + for i=1,n2 do + tex.print(string.format("\\node at (%d, -%d) {%s};", -1, i-1, string.sub(seq2, i, i))) + end + -- Add a path from the bottom right corner to the top left corner, following the minimum of the three possible moves at each step + local i, j, value, previous_value + i = n1-1 + j = n2-1 + tex.print(string.format("\\draw[-,line width=2, gray] (%d, -%d) --", i, j)) + while i > 0 and j > 0 do + value = math.min(matrix[i-1][j-1], table[i-1][j], table[i][j-1]) + if value == matrix[i-1][j-1] then + i = i - 1 + j = j - 1 + elseif value == matrix[i-1][j] then + i = i - 1 + else + j = j - 1 + end + tex.print(string.format(" (%d, -%d) -- ", i, j)) + end + tex.print(string.format("(0, 0) -- (-1, 1);", i, j)) +end + +local function has_value (tab, val) + for index, value in ipairs(tab) do + if value == val then + return true + end + end + + return false +end + +function sub(a, b) + if (a==b) then + return match_penalty + else + return mismatch_penalty + end +end + +-- Returns true if it could have passed from k, l, to i, j +-- during dynamic programming matrix building +function check_path(matrix, i, j, k, l, seq1, seq2) + -- diagonal + if ((i == k + 1) and (j == l + 1)) then + if (matrix[i][j] == matrix[k][l] + sub(string.sub(seq1, i, i), string.sub(seq2, j, j))) then + return true + end + elseif (matrix[i][j] == matrix[k][l] + 1) then + return true + end + return false +end + +function draw_needle_matrix_graph(seq1, seq2) + local matrix = needle_matrix(seq1, seq2) + local tikz_code = "" + function coordinate(i, j) + return i .. "_" .. j + end + local steps = { + {-1, -1}, + {-1, 0}, + {0, -1} + } + + local n1 = string.len(seq1) + local n2 = string.len(seq2) + local path = {} + local i = n1 + local j = n2 + while i >= 0 and j >= 0 do + path[#path+1] = coordinate(i, j) + local min = matrix[i][j] + local min_step = steps[1] + for index, step in ipairs(steps) do + local k = i + step[1] + local l = j + step[2] + if k >= 0 and l >= 0 and check_path(matrix, i, j, k, l, seq1, seq2) then + min_step = step + min = matrix[k][l] + break + end + end + i = i + min_step[1] + j = j + min_step[2] +-- print(i, j) + end + -- Draw the matrix as tikz node with matrix value + for i=0,n1 do + for j=0,n2 do + local options = "" + if has_value(path, coordinate(i, j)) then + options = "[fill=gray, draw, minimum size=1]" + end + tikz_code = tikz_code .. "\\node" .. options .. " (" .. coordinate(i, j) .. ") at (" .. i .. ", " .. -j .. ")" .. " {" .. matrix[i][j] .. "};" + end + end + -- Add nucleotide labels + for i=1,n1 do + local nt = string.sub(seq1, i, i) + tikz_code = tikz_code .. "\\node at (".. i .. "," .. 1 .. ")" .. "{$" .. nt .."$};" + end + for i=1,n2 do + local nt = string.sub(seq2, i, i) + tikz_code = tikz_code .. "\\node at (" .. -1 .. ", " .. -i .. ")" .. "{$ ".. nt .."$};" + end + -- For seq2 + for i=0,n1 do + for j=0,n2 do + local min = math.huge + for index, step in ipairs(steps) do + local k = i + step[1] + local l = j + step[2] + if k >= 0 and l >= 0 and matrix[k][l] < min then + min = matrix[k][l] + end + end +-- local highlighted = false + for index, step in ipairs(steps) do + local k = i + step[1] + local l = j + step[2] + if k >= 0 and l >= 0 and check_path(matrix, i, j, k, l, seq1, seq2) then + tikz_code = tikz_code .. "\\draw[->] (" .. coordinate(i, j) .. ")" .. " -- " .. "(" .. coordinate (k, l) .. ");" + end + end + end + end + return tikz_code +end + +-- print(draw_needle_matrix_graph("ATGC", "TAGCGA")) + +return { + draw=draw_needle_matrix_graph, + gap_penalty=gap_penalty, + mismatch_penalty=mismatch_penalty, + match_penalty=match_penalty +} diff --git a/figures/part2/needle.pdf b/figures/part2/needle.pdf new file mode 100644 index 0000000..c032ef7 --- /dev/null +++ b/figures/part2/needle.pdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a190228082d5d244929900a3dd22ba34f727ea1aae9226362f421a7670741416 +size 14729 diff --git a/figures/part2/needle.tex b/figures/part2/needle.tex new file mode 100755 index 0000000..b7a2d75 --- /dev/null +++ b/figures/part2/needle.tex @@ -0,0 +1,20 @@ +\documentclass[tikz]{standalone} + +\usepackage{luacode} +\usepackage{tikz} + +\begin{document} + +\begin{tikzpicture} + \newcommand{\seqone}{TGCATA} + \newcommand{\seqtwo}{ATCTGAT} + \begin{luacode} + local needle = require("needle") + seq1 = \luastring{\seqone} + seq2 = \luastring{\seqtwo} + local tikz_code = needle.draw(seq1, seq2) + tex.print(tikz_code) + \end{luacode} +\end{tikzpicture} + +\end{document}