feat: Exercise considered done
This commit is contained in:
commit
b5cdf0a828
|
@ -0,0 +1 @@
|
|||
data filter=lfs diff=lfs merge=lfs -text
|
|
@ -0,0 +1,2 @@
|
|||
build
|
||||
Testing
|
|
@ -0,0 +1,39 @@
|
|||
cmake_minimum_required(VERSION 3.9)
|
||||
project(dnapp)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 20)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g3 -ggdb")
|
||||
|
||||
set(SRCS src/main.cpp)
|
||||
|
||||
add_executable(${PROJECT_NAME} ${SRCS})
|
||||
|
||||
if(MSVC)
|
||||
target_compile_options(${PROJECT_NAME} PRIVATE /W4 /WX)
|
||||
else()
|
||||
target_compile_options(${PROJECT_NAME} PRIVATE -Wall -Wextra -Wpedantic -Werror)
|
||||
endif()
|
||||
|
||||
# Google Test
|
||||
include(FetchContent)
|
||||
FetchContent_Declare(
|
||||
googletest
|
||||
URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
|
||||
)
|
||||
# For Windows: Prevent overriding the parent project's compiler/linker settings
|
||||
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
|
||||
FetchContent_MakeAvailable(googletest)
|
||||
enable_testing()
|
||||
|
||||
add_executable(
|
||||
Sequence_test
|
||||
tests/Sequence_test.cpp
|
||||
)
|
||||
target_link_libraries(
|
||||
Sequence_test
|
||||
GTest::gtest_main
|
||||
)
|
||||
|
||||
include(GoogleTest)
|
||||
gtest_discover_tests(Sequence_test)
|
|
@ -0,0 +1,22 @@
|
|||
all: build run
|
||||
|
||||
build:
|
||||
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release
|
||||
cmake --build build
|
||||
|
||||
conan-install:
|
||||
conan install . --output-folder=build --build=missing
|
||||
|
||||
conan-build: conan-install
|
||||
cmake -S . -B build/ -DCMAKE_TOOLCHAIN_FILE=conan_toolchain.cmake -DCMAKE_BUILD_TYPE=Release
|
||||
cmake --build build
|
||||
|
||||
run:
|
||||
./build/dnapp -i ./data/sample.fa
|
||||
|
||||
test:
|
||||
cmake -S . -B build
|
||||
cmake --build build
|
||||
cd build && ctest --output-on-failure
|
||||
|
||||
.PHONY: build
|
|
@ -0,0 +1,24 @@
|
|||
# DNA++
|
||||
|
||||
## Instructions
|
||||
|
||||
1. Create a base class Sequence that represents a generic sequence. Include the following attributes and methods:
|
||||
1. Attributes:
|
||||
1. sequence: A string representing the sequence itself.
|
||||
2. Methods:
|
||||
1. `__init__(self, sequence)`: Initialize the sequence.
|
||||
2. `get_length(self)`: Return the length of the sequence.
|
||||
3. `get_sequence(self)`: Return the sequence as a string.
|
||||
2. Create a subclass DNASequence that inherits from the Sequence class. Add the following methods:
|
||||
1. `get_complement(self)`: Return the complementary DNA sequence. A pairs with T, and C pairs with G.
|
||||
2. `transcribe_to_rna(self)`: Return the RNA sequence by replacing all T's with U's.
|
||||
3. Create another subclass ProteinSequence that inherits from the Sequence class. Add a method `translate_to_protein(self)` that translates the DNA sequence into a protein
|
||||
sequence using the genetic code (you can use a dictionary to map codons to amino acids).
|
||||
1. Provide an example DNA sequence and demonstrate the functionality of your program by:
|
||||
1. Creating a DNA sequence object.
|
||||
2. Getting its complement and transcribed RNA sequence.
|
||||
3. Translating it into a protein sequence.
|
||||
|
||||
Hints:
|
||||
* You can create a dictionary to represent the genetic code, where keys are codons (triplets of nucleotides), and values are the corresponding amino acids.
|
||||
* The complement of a DNA sequence can be found by replacing A with T, T with A, C with G, and G with C.
|
|
@ -0,0 +1,2 @@
|
|||
>BBa_J45004 SAM:benzoic acid/salicylic acid carboxyl methyltransferase I; converts salicylic acid to methyl sali
|
||||
atggaagttgttgaagttcttcacatgaatggaggaaatggagacagtagctatgcaaacaattctttggttcagcaaaaggtgattctcatgacaaagccaataactgagcaagccatgattgatctctacagcagcctctttccagaaaccttatgcattgcagatttgggttgttctttgggagctaacactttcttggtggtctcacagcttgttaaaatagtagaaaaagaacgaaaaaagcatggttttaagtctccagagttttattttcacttcaatgatcttcctggcaatgattttaatacactttttcagtcactgggggcatttcaagaagatttgagaaagcatataggggaaagctttggtccatgttttttcagtggagtgcctggttcattttatactagacttttcccttccaaaagtttacattttgtttactcctcctacagtctcatgtggctatctcaggtgcctaatgggattgaaaataacaagggaaacatttacatggcaagaacaagccctctaagtgttattaaagcatactacaagcaatatgaaatagatttttcaaattttctcaagtaccgttcagaggaattgatgaaaggtggaaagatggtgttaacactcctaggtagagaaagtgaggatcctactagcaaagaatgctgttacatttgggagcttctagccatggccctcaataagttggttgaagagggattgataaaagaagagaaagtagatgcattcaatattcctcaatacacaccatcaccagcagaagtaaagtacatagttgagaaggaaggatcattcaccattaatcgcttggaaacatcaagagttcattggaatgcttctaataatgagaagaatggtggttacaatgtgtcaaggtgcatgagagctgtggctgagcctttgcttgtcagccactttgacaaggaattgatggatttagtgttccacaagtacgaagagattgtttctgattgcatgtccaaagagaatactgagtttataaatgtcatcatctccttgaccaaaataaattaa
|
|
@ -0,0 +1,98 @@
|
|||
/** Implement the genetic code as a thread-safe GeneticCode.
|
||||
*
|
||||
* @see https://refactoring.guru/design-patterns/GeneticCode/cpp/example#example-1
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <complex>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <mutex>
|
||||
|
||||
#include "Sequence.hpp"
|
||||
#include "strutils.hpp"
|
||||
|
||||
|
||||
class GeneticCode {
|
||||
|
||||
|
||||
private:
|
||||
|
||||
static GeneticCode *pinstance_;
|
||||
static std::mutex mutex_;
|
||||
|
||||
protected:
|
||||
|
||||
std::unordered_map<std::string, char> codon_table;
|
||||
GeneticCode(): GeneticCode("./resources/codons.txt") {}
|
||||
|
||||
GeneticCode(std::string input_filename) {
|
||||
std::ifstream input(input_filename);
|
||||
if (!input.good()) {
|
||||
throw std::invalid_argument("Error: could not read codon table from " + input_filename + " file.");
|
||||
}
|
||||
load_genetic_code(input);
|
||||
}
|
||||
|
||||
|
||||
void load_genetic_code(std::ifstream &input) {
|
||||
if (!input.good()) {
|
||||
throw std::invalid_argument("Error: Could not read the codon table from the text file.");
|
||||
}
|
||||
std::string line;
|
||||
while(input.good()) {
|
||||
std::getline(input, line);
|
||||
std::vector<std::string> fields = split_string(line, '\t');
|
||||
std::string codon = fields.at(0);
|
||||
char amino_acid = fields.at(2)[0];
|
||||
GeneticCode::codon_table.insert({codon, amino_acid});
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public:
|
||||
GeneticCode(GeneticCode &other) = delete;
|
||||
void operator=(const GeneticCode &) = delete;
|
||||
static GeneticCode *GetInstance();
|
||||
static GeneticCode *GetInstance(std::string input_filename);
|
||||
|
||||
|
||||
char get_amino_acid(std::string codon) {
|
||||
replace_all(codon, 'U', 'T');
|
||||
if (codon_table.find(codon) != codon_table.end()) {
|
||||
return codon_table.at(codon);
|
||||
} else {
|
||||
throw std::invalid_argument("Error: codon '" + codon + "' not found in genetic code");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
GeneticCode* GeneticCode::pinstance_{nullptr};
|
||||
std::mutex GeneticCode::mutex_;
|
||||
|
||||
GeneticCode *GeneticCode::GetInstance()
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
if (pinstance_ == nullptr)
|
||||
{
|
||||
pinstance_ = new GeneticCode();
|
||||
}
|
||||
return pinstance_;
|
||||
}
|
||||
|
||||
GeneticCode *GeneticCode::GetInstance(std::string input_filename)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
if (pinstance_ == nullptr)
|
||||
{
|
||||
pinstance_ = new GeneticCode(input_filename);
|
||||
}
|
||||
return pinstance_;
|
||||
}
|
|
@ -0,0 +1,92 @@
|
|||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "GeneticCode.hpp"
|
||||
#include "strutils.hpp"
|
||||
|
||||
|
||||
class Sequence{
|
||||
|
||||
public:
|
||||
Sequence(std::string sequence) {
|
||||
this->sequence = sequence;
|
||||
}
|
||||
|
||||
unsigned int get_length() {
|
||||
return this->sequence.length();
|
||||
}
|
||||
|
||||
std::string get_sequence() {
|
||||
return this->sequence;
|
||||
}
|
||||
|
||||
protected:
|
||||
std::string sequence;
|
||||
};
|
||||
|
||||
|
||||
class ProteinSequence: public Sequence {
|
||||
|
||||
using Sequence::Sequence;
|
||||
};
|
||||
|
||||
class RNASequence: public Sequence {
|
||||
|
||||
using Sequence::Sequence;
|
||||
|
||||
public:
|
||||
|
||||
ProteinSequence translate_to_protein() {
|
||||
GeneticCode *code = GeneticCode::GetInstance();
|
||||
std::string protein_sequence = "";
|
||||
for (unsigned int i=0; i < this->sequence.length()-2; i+=3) {
|
||||
std::string codon = this->sequence.substr(i, 3);
|
||||
char amino_acid = code->get_amino_acid(codon);
|
||||
protein_sequence += amino_acid;
|
||||
}
|
||||
return ProteinSequence(protein_sequence);
|
||||
}
|
||||
};
|
||||
|
||||
class DNASequence: public Sequence {
|
||||
|
||||
using Sequence::Sequence;
|
||||
public:
|
||||
DNASequence get_complement() {
|
||||
std::string complement = "";
|
||||
for (char &c: this->sequence) {
|
||||
complement += DNASequence::complement(c);
|
||||
}
|
||||
return DNASequence(complement);
|
||||
}
|
||||
|
||||
static char complement(char c) {
|
||||
if (c == 'A') {
|
||||
return 'T';
|
||||
} else if (c == 'T') {
|
||||
return 'A';
|
||||
} else if (c == 'G') {
|
||||
return 'C';
|
||||
} else if (c == 'C') {
|
||||
return 'G';
|
||||
} else {
|
||||
throw std::invalid_argument("Error: nucleotide " + std::to_string(c) + " has no complement");
|
||||
}
|
||||
}
|
||||
|
||||
RNASequence transcribe_to_rna() {
|
||||
std::string rna_sequence = this->sequence.substr(0);
|
||||
replace_all(rna_sequence, 'T', 'U');
|
||||
return RNASequence(rna_sequence);
|
||||
}
|
||||
|
||||
|
||||
|
||||
};
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
/** Simple FASTA file format reader
|
||||
*
|
||||
* @author The BioloGeeks Team
|
||||
* @date 2023-09-30
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
|
||||
class Record {
|
||||
public:
|
||||
Record(std::string name, std::string seq) {
|
||||
this->name = name;
|
||||
this->seq = seq;
|
||||
}
|
||||
|
||||
std::string name, seq;
|
||||
};
|
||||
|
||||
namespace fasta {
|
||||
|
||||
/**
|
||||
* @see https://www.rosettacode.org/wiki/FASTA_format?section=10#C++
|
||||
*
|
||||
*/
|
||||
std::vector<Record> read_file(std::ifstream &input) {
|
||||
std::string line, name, content;
|
||||
std::vector<Record> Records;
|
||||
// Read fasta file lines and append each FASTA records
|
||||
// to the Records vector.
|
||||
while (input.good()) {
|
||||
std::getline(input, line);
|
||||
if (line[0] == '>') {
|
||||
if (!name.empty()) {
|
||||
Record Record(name, content);
|
||||
Records.push_back(Record);
|
||||
name.clear();
|
||||
content.clear();
|
||||
}
|
||||
name = line.substr(1);
|
||||
} else {
|
||||
content += line;
|
||||
}
|
||||
}
|
||||
|
||||
if (!name.empty()) {
|
||||
Record Record(name, content);
|
||||
Records.push_back(Record);
|
||||
}
|
||||
return Records;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,30 @@
|
|||
#pragma once
|
||||
#include <functional>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
|
||||
void replace_all(std::string &str, char from, char to) {
|
||||
for (unsigned int i=0; i < str.length(); i++) {
|
||||
if (str[i] == from) {
|
||||
str[i] = to;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
std::vector<std::string> split_string(std::string &str, char delim) {
|
||||
std::stringstream sstream(str);
|
||||
std::vector<std::string> fields;
|
||||
std::string item;
|
||||
while (std::getline(sstream, item, delim)) {
|
||||
fields.push_back(item);
|
||||
}
|
||||
return fields;
|
||||
}
|
||||
|
||||
void uppercase_string(std::string &str) {
|
||||
std::transform(str.cbegin(), str.cend(),
|
||||
str.begin(), // write to the same location
|
||||
[](unsigned char c) { return std::toupper(c); });
|
||||
}
|
|
@ -0,0 +1,64 @@
|
|||
AAA Lys K Lysine
|
||||
AAC Asn N Asparagine
|
||||
AAG Lys K Lysine
|
||||
AAT Asn N Asparagine
|
||||
ACA Thr T Threonine
|
||||
ACC Thr T Threonine
|
||||
ACG Thr T Threonine
|
||||
ACT Thr T Threonine
|
||||
AGA Arg R Arginine
|
||||
AGC Ser S Serine
|
||||
AGG Arg R Arginine
|
||||
AGT Ser S Serine
|
||||
ATA Ile I Isoleucine
|
||||
ATC Ile I Isoleucine
|
||||
ATG Met M Methionine
|
||||
ATT Ile I Isoleucine
|
||||
CAA Gln Q Glutamine
|
||||
CAC His H Histidine
|
||||
CAG Gln Q Glutamine
|
||||
CAT His H Histidine
|
||||
CCA Pro P Proline
|
||||
CCC Pro P Proline
|
||||
CCG Pro P Proline
|
||||
CCT Pro P Proline
|
||||
CGA Arg R Arginine
|
||||
CGC Arg R Arginine
|
||||
CGG Arg R Arginine
|
||||
CGT Arg R Arginine
|
||||
CTA Leu L Leucine
|
||||
CTC Leu L Leucine
|
||||
CTG Leu L Leucine
|
||||
CTT Leu L Leucine
|
||||
GAA Glu E Glutamic_acid
|
||||
GAC Asp D Aspartic_acid
|
||||
GAG Glu E Glutamic_acid
|
||||
GAT Asp D Aspartic_acid
|
||||
GCA Ala A Alanine
|
||||
GCC Ala A Alanine
|
||||
GCG Ala A Alanine
|
||||
GCT Ala A Alanine
|
||||
GGA Gly G Glycine
|
||||
GGC Gly G Glycine
|
||||
GGG Gly G Glycine
|
||||
GGT Gly G Glycine
|
||||
GTA Val V Valine
|
||||
GTC Val V Valine
|
||||
GTG Val V Valine
|
||||
GTT Val V Valine
|
||||
TAA Stp O Stop
|
||||
TAC Tyr Y Tyrosine
|
||||
TAG Stp O Stop
|
||||
TAT Tyr Y Tyrosine
|
||||
TCA Ser S Serine
|
||||
TCC Ser S Serine
|
||||
TCG Ser S Serine
|
||||
TCT Ser S Serine
|
||||
TGA Stp O Stop
|
||||
TGC Cys C Cysteine
|
||||
TGG Trp W Tryptophan
|
||||
TGT Cys C Cysteine
|
||||
TTA Leu L Leucine
|
||||
TTC Phe F Phenylalanine
|
||||
TTG Leu L Leucine
|
||||
TTT Phe F Phenylalanine
|
|
@ -0,0 +1,53 @@
|
|||
/** Basic DNA sequence
|
||||
*/
|
||||
#include <cstdlib>
|
||||
#include <fstream>
|
||||
#include <ios>
|
||||
#include <iostream>
|
||||
#include <type_traits>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "../include/Sequence.hpp"
|
||||
#include "../include/GeneticCode.hpp"
|
||||
#include "../include/fasta.hpp"
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
std::string codon_filename = "resources/codons.txt";
|
||||
std::string fasta_filename = "";
|
||||
int opt;
|
||||
while ((opt = getopt (argc, argv, "i:c:h")) != -1) {
|
||||
switch(opt) {
|
||||
case 'i':
|
||||
fasta_filename = optarg;
|
||||
break;
|
||||
case 'c':
|
||||
codon_filename = optarg;
|
||||
break;
|
||||
case '?':
|
||||
case 'h':
|
||||
default:
|
||||
std::cout << "Usage: " << argv[0] << "-i <dna-fasta>" << std::endl;
|
||||
break;
|
||||
case -1:
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (fasta_filename.length() == 0) {
|
||||
std::cerr << "Error: no fasta file given" << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
GeneticCode::GetInstance(codon_filename);
|
||||
std::ifstream fasta_ifstream(fasta_filename);
|
||||
std::vector<Record> fasta_records = fasta::read_file(fasta_ifstream);
|
||||
Record fasta_record(fasta_records.at(0));
|
||||
std::string sequence(fasta_record.seq);
|
||||
uppercase_string(sequence);
|
||||
DNASequence dna_sequence(sequence);
|
||||
RNASequence rna_sequence = dna_sequence.transcribe_to_rna();
|
||||
ProteinSequence protein_sequence = rna_sequence.translate_to_protein();
|
||||
std::cout << dna_sequence.get_sequence() << std::endl;
|
||||
std::cout << rna_sequence.get_sequence() << std::endl;
|
||||
std::cout << protein_sequence.get_sequence() << std::endl;
|
||||
return EXIT_SUCCESS;
|
||||
}
|
|
@ -0,0 +1,18 @@
|
|||
#include <gtest/gtest.h>
|
||||
|
||||
#include "../include/Sequence.hpp"
|
||||
|
||||
TEST(TranscriptionTest, BasicTranscription) {
|
||||
std::string source = "ATG";
|
||||
std::string target = "AUG";
|
||||
std::string result = DNASequence(source).transcribe_to_rna().get_sequence();
|
||||
EXPECT_EQ(target, result);
|
||||
}
|
||||
|
||||
TEST(TranslationTest, BasicTranslation) {
|
||||
GeneticCode::GetInstance("../resources/codons.txt");
|
||||
std::string dna_source = "ATGATG";
|
||||
std::string aa_target = "MM";
|
||||
std::string result = DNASequence(dna_source).transcribe_to_rna().translate_to_protein().get_sequence();
|
||||
EXPECT_EQ(aa_target, result);
|
||||
}
|
Loading…
Reference in New Issue