@article{altschulBasicLocalAlignment1990, title = {Basic Local Alignment Search Tool}, author = {Altschul, Stephen F. and Gish, Warren and Miller, Webb and Myers, Eugene W. and Lipman, David J.}, date = {1990-10-05}, journaltitle = {Journal of Molecular Biology}, shortjournal = {Journal of Molecular Biology}, volume = {215}, number = {3}, pages = {403--410}, issn = {0022-2836}, doi = {10.1016/S0022-2836(05)80360-2}, url = {https://www.sciencedirect.com/science/article/pii/S0022283605803602}, urldate = {2023-04-30}, abstract = {A new approach to rapid sequence comparison, basic local alignment search tool (BLAST), directly approximates alignments that optimize a measure of local similarity, the maximal segment pair (MSP) score. Recent mathematical results on the stochastic properties of MSP scores allow an analysis of the performance of this method as well as the statistical significance of alignments it generates. The basic algorithm is simple and robust; it can be implemented in a number of ways and applied in a variety of contexts including straight-forward DNA and protein sequence database searches, motif searches, gene identification searches, and in the analysis of multiple regions of similarity in long DNA sequences. In addition to its flexibility and tractability to mathematical analysis, BLAST is an order of magnitude faster than existing sequence comparison tools of comparable sensitivity.}, langid = {english} } @article{blancWidespreadPaleopolyploidyModel2004, title = {Widespread {{Paleopolyploidy}} in {{Model Plant Species Inferred}} from {{Age Distributions}} of {{Duplicate Genes}}}, author = {Blanc, Guillaume and Wolfe, Kenneth H.}, date = {2004-07-02}, journaltitle = {The Plant Cell}, shortjournal = {The Plant Cell}, volume = {16}, number = {7}, pages = {1667--1678}, issn = {1040-4651}, doi = {10.1105/tpc.021345}, url = {https://doi.org/10.1105/tpc.021345}, urldate = {2025-01-09}, abstract = {It is often anticipated that many of today's diploid plant species are in fact paleopolyploids. Given that an ancient large-scale duplication will result in an excess of relatively old duplicated genes with similar ages, we analyzed the timing of duplication of pairs of paralogous genes in 14 model plant species. Using EST contigs (unigenes), we identified pairs of paralogous genes in each species and used the level of synonymous nucleotide substitution to estimate the relative ages of gene duplication. For nine of the investigated species (wheat [Triticum aestivum], maize [Zea mays], tetraploid cotton [Gossypium hirsutum], diploid cotton [G. arboretum], tomato [Lycopersicon esculentum], potato [Solanum tuberosum], soybean [Glycine max], barrel medic [Medicago truncatula], and Arabidopsis thaliana), the age distributions of duplicated genes contain peaks corresponding to short evolutionary periods during which large numbers of duplicated genes were accumulated. Large-scale duplications (polyploidy or aneuploidy) are strongly suspected to be the cause of these temporal peaks of gene duplication. However, the unusual age profile of tandem gene duplications in Arabidopsis indicates that other scenarios, such as variation in the rate at which duplicated genes are deleted, must also be considered.} } @article{lariviereMethodesBioinformatiquesDanalyse, title = {Méthodes bioinformatiques d'analyse de l'histoire évolutive des familles de gènes ˸ intégration de données, indices évolutifs, et analyses fonctionnelles appliquées aux familles de gènes impliquées dans la réponse des plantes aux stress environnementaux}, author = {Lariviere, Delphine}, langid = {french} } @report{le-hoangEtudeTranscriptomiqueGenes2017, type = {Internship Report}, title = {Etude transcriptomique des gènes dupliqués en tandem (TAG) chez Arabidopsis thaliana}, author = {Lê-Hoang, Julie}, date = {2017}, institution = {Laboratoire de Mathématiques et Modélisation d'Évry}, langid = {french} } @article{liTranscriptionalAnalysisHighly2008, title = {Transcriptional Analysis of Highly Syntenic Regions between {{Medicago}} Truncatula and {{Glycine}} Max Using Tiling Microarrays}, author = {Li, Lei and He, Hang and Zhang, Juan and Wang, Xiangfeng and Bai, Sulan and Stolc, Viktor and Tongprasit, Waraporn and Young, Nevin D. and Yu, Oliver and Deng, Xing-Wang}, date = {2008-03-19}, journaltitle = {Genome Biology}, shortjournal = {Genome Biol}, volume = {9}, number = {3}, pages = {R57}, issn = {1474-760X}, doi = {10.1186/gb-2008-9-3-r57}, url = {https://doi.org/10.1186/gb-2008-9-3-r57}, urldate = {2025-01-08}, abstract = {Legumes are the third largest family of flowering plants and are unique among crop species in their ability to fix atmospheric nitrogen. As a result of recent genome sequencing efforts, legumes are now one of a few plant families with extensive genomic and transcriptomic data available in multiple species. The unprecedented complexity and impending completeness of these data create opportunities for new approaches to discovery.}, langid = {english}, keywords = {Additional Data File,Medicago Truncatula,Organ Type,Syntenic Region,Tiling Array} } @book{ohnoEvolutionGeneDuplication1970, title = {Evolution by {{Gene Duplication}}}, author = {Ohno, Susumu}, date = {1970}, publisher = {Springer Berlin Heidelberg}, location = {Berlin, Heidelberg}, doi = {10.1007/978-3-642-86659-3}, url = {http://link.springer.com/10.1007/978-3-642-86659-3}, urldate = {2024-03-21}, isbn = {978-3-642-86661-6}, langid = {english} } @article{pfeilPlacingPaleopolyploidyRelation2005, title = {Placing {{Paleopolyploidy}} in {{Relation}} to {{Taxon Divergence}}: {{A Phylogenetic Analysis}} in {{Legumes Using}} 39 {{Gene Families}}}, shorttitle = {Placing {{Paleopolyploidy}} in {{Relation}} to {{Taxon Divergence}}}, author = {Pfeil, B E and Schlueter, J A and Shoemaker, R C and Doyle, J J}, date = {2005-06-01}, journaltitle = {Systematic Biology}, shortjournal = {Systematic Biology}, volume = {54}, number = {3}, pages = {441--454}, issn = {1063-5157}, doi = {10.1080/10635150590945359}, url = {https://doi.org/10.1080/10635150590945359}, urldate = {2025-01-15}, abstract = {Young polyploid events are easily diagnosed by various methods, but older polyploid events become increasingly difficult to identify as chromosomal rearrangements, tandem gene or partial chromosome duplications, changes in substitution rates among duplicated genes, pseudogenization or locus loss, and interlocus interactions complicate the means of inferring past genetic events. Genomic data have provided valuable information about the polyploid history of numerous species, but on their own fail to show whether related species, each with a polyploid past, share a particular polyploid event. A phylogenetic approach provides a powerful method to determine this but many processes may mislead investigators. These processes can affect individual gene trees, but most likely will not affect all genes, and almost certainly will not affect all genes in the same way. Thus, a multigene approach, which combines the large-scale aspect of genomics with the resolution of phylogenetics, has the power to overcome these difficulties and allow us to infer genomic events further into the past than would otherwise be possible. Previous work using synonymous distances among gene pairs within species has shown evidence for large-scale duplications in the legumes Glycine max and Medicago truncatula. We present a case study using 39 gene families, each with three or four members in G. max and the putative orthologues in M. truncatula, rooted using Arabidopsis thaliana. We tested whether the gene duplications in these legumes occurred separately in each lineage after their divergence (Hypothesis 1), or whether they share a round of gene duplications (Hypothesis 2). Many more gene family topologies supported Hypothesis 2 over Hypothesis 1 (11 and 2, respectively), even after synonymous distance analysis revealed that some topologies were providing misleading results. Only ca. 33\% of genes examined support either hypothesis, which strongly suggests that single gene family approaches may be insufficient when studying ancient events with nuclear DNA. Our results suggest that G. max and M. truncatula, along with approximately 7000 other legume species from the same clade, share an ancient round of gene duplications, either due to polyploidy or to some other process.} } @article{reimandProfilerWebServer2016, title = {G:{{Profiler}}—a Web Server for Functional Interpretation of Gene Lists (2016 Update)}, shorttitle = {G}, author = {Reimand, Jüri and Arak, Tambet and Adler, Priit and Kolberg, Liis and Reisberg, Sulev and Peterson, Hedi and Vilo, Jaak}, date = {2016-07-08}, journaltitle = {Nucleic Acids Research}, shortjournal = {Nucleic Acids Research}, volume = {44}, number = {W1}, pages = {W83-W89}, issn = {0305-1048}, doi = {10.1093/nar/gkw199}, url = {https://doi.org/10.1093/nar/gkw199}, urldate = {2025-01-16}, abstract = {Functional enrichment analysis is a key step in interpreting gene lists discovered in diverse high-throughput experiments. g:Profiler studies flat and ranked gene lists and finds statistically significant Gene Ontology terms, pathways and other gene function related terms. Translation of hundreds of gene identifiers is another core feature of g:Profiler. Since its first publication in 2007, our web server has become a popular tool of choice among basic and translational researchers. Timeliness is a major advantage of g:Profiler as genome and pathway information is synchronized with the Ensembl database in quarterly updates. g:Profiler supports 213 species including mammals and other vertebrates, plants, insects and fungi. The 2016 update of g:Profiler introduces several novel features. We have added further functional datasets to interpret gene lists, including transcription factor binding site predictions, Mendelian disease annotations, information about protein expression and complexes and gene mappings of human genetic polymorphisms. Besides the interactive web interface, g:Profiler can be accessed in computational pipelines using our R package, Python interface and BioJS component. g:Profiler is freely available at http://biit.cs.ut.ee/gprofiler/.} } @article{schlueterFractionationSyntenyGenomic2008, title = {Fractionation of {{Synteny}} in a {{Genomic Region Containing Tandemly Duplicated Genes}} across {{Glycine}} Max, {{Medicago}} Truncatula, and {{Arabidopsis}} Thaliana}, author = {Schlueter, Jessica A. and Scheffler, Brian E. and Jackson, Scott and Shoemaker, Randy C.}, date = {2008-07-01}, journaltitle = {Journal of Heredity}, shortjournal = {Journal of Heredity}, volume = {99}, number = {4}, pages = {390--395}, issn = {0022-1503}, doi = {10.1093/jhered/esn010}, url = {https://doi.org/10.1093/jhered/esn010}, urldate = {2025-01-08}, abstract = {Extended comparison of gene sequences found on homeologous soybean Bacterial Artificial Chromosomes to Medicago truncatula and Arabidopsis thaliana genomic sequences demonstrated a network of synteny within conserved regions interrupted by gene addition and/or deletions. Consolidation of gene order among all 3 species provides a picture of ancestral gene order. The observation supports a genome history of fractionation resulting from gene loss/addition and rearrangement. In all 3 species, clusters of N-hydroxycinnamoyl/benzoyltransferase genes were identified in tandemly duplicated clusters. Parsimony-based gene trees suggest that the genes within the arrays have independently undergone tandem duplication in each species.} } @article{schmutzGenomeSequencePalaeopolyploid2010, title = {Genome Sequence of the Palaeopolyploid Soybean}, author = {Schmutz, Jeremy and Cannon, Steven B. and Schlueter, Jessica and Ma, Jianxin and Mitros, Therese and Nelson, William and Hyten, David L. and Song, Qijian and Thelen, Jay J. and Cheng, Jianlin and Xu, Dong and Hellsten, Uffe and May, Gregory D. and Yu, Yeisoo and Sakurai, Tetsuya and Umezawa, Taishi and Bhattacharyya, Madan K. and Sandhu, Devinder and Valliyodan, Babu and Lindquist, Erika and Peto, Myron and Grant, David and Shu, Shengqiang and Goodstein, David and Barry, Kerrie and Futrell-Griggs, Montona and Abernathy, Brian and Du, Jianchang and Tian, Zhixi and Zhu, Liucun and Gill, Navdeep and Joshi, Trupti and Libault, Marc and Sethuraman, Anand and Zhang, Xue-Cheng and Shinozaki, Kazuo and Nguyen, Henry T. and Wing, Rod A. and Cregan, Perry and Specht, James and Grimwood, Jane and Rokhsar, Dan and Stacey, Gary and Shoemaker, Randy C. and Jackson, Scott A.}, date = {2010-01}, journaltitle = {Nature}, volume = {463}, number = {7278}, pages = {178--183}, publisher = {Nature Publishing Group}, issn = {1476-4687}, doi = {10.1038/nature08670}, url = {https://www.nature.com/articles/nature08670}, urldate = {2025-01-15}, abstract = {Soybean (Glycine max) is one of the most important crop plants for seed protein and oil content, and for its capacity to fix atmospheric nitrogen through symbioses with soil-borne microorganisms. We sequenced the 1.1-gigabase genome by a whole-genome shotgun approach and integrated it with physical and high-density genetic maps to create a chromosome-scale draft sequence assembly. We predict 46,430 protein-coding genes, 70\% more than Arabidopsis and similar to the poplar genome which, like soybean, is an ancient polyploid (palaeopolyploid). About 78\% of the predicted genes occur in chromosome ends, which comprise less than one-half of the genome but account for nearly all of the genetic recombination. Genome duplications occurred at approximately 59 and 13 million years ago, resulting in a highly duplicated genome with nearly 75\% of the genes present in multiple copies. The two duplication events were followed by gene diversification and loss, and numerous chromosome rearrangements. An accurate soybean genome sequence will facilitate the identification of the genetic basis of many soybean traits, and accelerate the creation of improved soybean varieties.}, langid = {english}, keywords = {DNA sequencing,Plant genetics} } @incollection{stuparInsightsSoybeanGlycine2013, title = {Insights from the {{Soybean}} ({{Glycine}} Max and {{Glycine}} Soja) {{Genome}}: {{Past}}, {{Present}}, and {{Future}} - {{ScienceDirect}}}, booktitle = {Advances in {{Agronomy}}}, author = {Stupar, Robert M. and Specht, James E.}, date = {2013}, volume = {118}, pages = {177--204}, url = {https://www.sciencedirect.com/science/article/abs/pii/B9780124059429000049}, urldate = {2025-01-08}, langid = {english} } @article{thomasPANTHERMakingGenomescale2022, title = {{{PANTHER}}: {{Making}} Genome-Scale Phylogenetics Accessible to All}, shorttitle = {{{PANTHER}}}, author = {Thomas, Paul D. and Ebert, Dustin and Muruganujan, Anushya and Mushayahama, Tremayne and Albou, Laurent-Philippe and Mi, Huaiyu}, date = {2022}, journaltitle = {Protein Science}, volume = {31}, number = {1}, pages = {8--22}, issn = {1469-896X}, doi = {10.1002/pro.4218}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/pro.4218}, urldate = {2025-01-16}, abstract = {Phylogenetics is a powerful tool for analyzing protein sequences, by inferring their evolutionary relationships to other proteins. However, phylogenetics analyses can be challenging: they are computationally expensive and must be performed carefully in order to avoid systematic errors and artifacts. Protein Analysis THrough Evolutionary Relationships (PANTHER; http://pantherdb.org) is a publicly available, user-focused knowledgebase that stores the results of an extensive phylogenetic reconstruction pipeline that includes computational and manual processes and quality control steps. First, fully reconciled phylogenetic trees (including ancestral protein sequences) are reconstructed for a set of “reference” protein sequences obtained from fully sequenced genomes of organisms across the tree of life. Second, the resulting phylogenetic trees are manually reviewed and annotated with function evolution events: inferred gains and losses of protein function along branches of the phylogenetic tree. Here, we describe in detail the current contents of PANTHER, how those contents are generated, and how they can be used in a variety of applications. The PANTHER knowledgebase can be downloaded or accessed via an extensive API. In addition, PANTHER provides software tools to facilitate the application of the knowledgebase to common protein sequence analysis tasks: exploring an annotated genome by gene function; performing “enrichment analysis” of lists of genes; annotating a single sequence or large batch of sequences by homology; and assessing the likelihood that a genetic variant at a particular site in a protein will have deleterious effects.}, langid = {english}, keywords = {gene ontology,genome analysis,hidden Markov model,molecular evolution,omics data analysis,phylogenetic tree,protein function annotation,protein function evolution} } @incollection{vandongenUsingMCLExtract2012a, title = {Using {{MCL}} to {{Extract Clusters}} from {{Networks}}}, booktitle = {Bacterial {{Molecular Networks}}}, author = {Van Dongen, Stijn and Abreu-Goodger, Cei}, editor = {Van Helden, Jacques and Toussaint, Ariane and Thieffry, Denis}, date = {2012}, volume = {804}, pages = {281--295}, publisher = {Springer New York}, location = {New York, NY}, doi = {10.1007/978-1-61779-361-5_15}, url = {http://link.springer.com/10.1007/978-1-61779-361-5_15}, urldate = {2024-04-11}, isbn = {978-1-61779-360-8 978-1-61779-361-5}, langid = {english} } @article{yangEstimatingSynonymousNonsynonymous2000a, title = {Estimating {{Synonymous}} and {{Nonsynonymous Substitution Rates Under Realistic Evolutionary Models}}}, author = {Yang, Ziheng and Nielsen, Rasmus}, date = {2000-01-01}, journaltitle = {Molecular Biology and Evolution}, shortjournal = {Molecular Biology and Evolution}, volume = {17}, number = {1}, pages = {32--43}, issn = {0737-4038}, doi = {10.1093/oxfordjournals.molbev.a026236}, url = {https://doi.org/10.1093/oxfordjournals.molbev.a026236}, urldate = {2024-12-29}, abstract = {Approximate methods for estimating the numbers of synonymous and nonsynonymous substitutions between two DNA sequences involve three steps: counting of synonymous and nonsynonymous sites in the two sequences, counting of synonymous and nonsynonymous differences between the two sequences, and correcting for multiple substitutions at the same site. We examine complexities involved in those steps and propose a new approximate method that takes into account two major features of DNA sequence evolution: transition/transversion rate bias and base/codon frequency bias. We compare the new method with maximum likelihood, as well as several other approximate methods, by examining infinitely long sequences, performing computer simulations, and analyzing a real data set. The results suggest that when there are transition/transversion rate biases and base/codon frequency biases, previously described approximate methods for estimating the nonsynonymous/synonymous rate ratio may involve serious biases, and the bias can be both positive and negative. The new method is, in general, superior to earlier approximate methods and may be useful for analyzing large data sets, although maximum likelihood appears to always be the method of choice.} }