Merge branch 'dev'

2023-09-27 18:06:58 +02:00 · 2023-09-27 18:06:58 +02:00 · 4bf97cb0fd
parent b2d7fc3af6 7f89b6c842
commit 4bf97cb0fd
19 changed files with 508 additions and 29 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -0,0 +1 @@
 main.pdf filter=lfs diff=lfs merge=lfs -text
--- a/6
+++ b/6
@ -0,0 +1,6 @@
 options=-shell-escape -file-line-error
 all: main.pdf
 %.pdf: %.tex
 	lualatex $(options) $<
--- a/content/chapters/1.tex
+++ b/content/chapters/1.tex
@ -0,0 +1,168 @@
 \chapter{Unsupervised Learning}
 \begin{definition}[Precision Medicine]
  Design of treatment for a given patient, based on genomic data.
 \end{definition}
 \begin{definition}[Hierarchical clustering]
 \end{definition}
 Gene expression time series: look for genes with similar expression footprint.
 \paragraph{Representation of data}
 \begin{itemize}
  \item Tables;
  \item Trees / Graphs;
  \item Time series...
 \end{itemize}
 \begin{figure}
  \includestandalone{figures/plots/genes_expression_timeseries}
  \caption{Example of gene expression time series}
 \end{figure}
 \section{Distances and Similarities}
 \begin{property}[Distance]
  \begin{description}
    \item[non-negativity] $d(i, j) \geq 0$
    \item[isolation] $d(i, i) = 0$
    \item[symmetry] $d(i, j) = d(j, i)$
    \item[triangular inequality] $d(i, j) \leq d(i, h) + d(h, j)$   
  \end{description}
 \end{property}
 \begin{definition}[Dissimilarity]
  Distance without triangular inequality.
 \end{definition}
 \begin{definition}[Similarity]
  Function $s$ from $X \times X$ to $\RR_+$ such that:
  \begin{enumerate}
    \item $s$ is symmetric: $(x, y) \in X \times X; s(x, y) = s(y, x)$
    \item $(x, y) \in X \times X; s(x, x) = s(y, y) > s(x, y)$.
  \end{enumerate}
 \end{definition}
 \begin{exercise}
  Let $d(x, y)$ be the distance, $d(x, y) \in [0, +\infty[$.
  What should be the similarity measure $S(x, y) = f(d(x, y))$ that satisfies the following property:
  \[
    (x, y) \in X \times X \: | \: S(x, y) > S(x, y)
  \]
  having $S(x, y) \leq M$, $S(x, y) \in ]0, M]$.
 \end{exercise}
 $d(x, y) \geq 0 \: \forall (x, y)$
 \begin{equation}
  S(x, y) = \frac{M}{d(x, y) + 1}
  \label{eq:similarity-first}
 \end{equation}
 In \cref{eq:similarity-first}, $S(x, y)$ ranges from 0 to M.
 \begin{eqnarray}
  \lim_{n \to \infty} \frac{M}{n + 1} = 0 && \lim_{n \to 0}  \frac{M}{n + 1} = M
 \end{eqnarray}
 \section{Data Representation}
 \paragraph{Data matrix}
 \paragraph{Distance matrix}
 \[
  \begin{bmatrix}
    0 \\
    d(2, 1) & 0 \\
    d(3, 1) & d(3, 2) & 0 \\
    \vdots & \vdots & \ddots \\
    d(n, 1) & d(n,2) & \dots & \dots & 0
  \end{bmatrix}
 \]
 \begin{table}
  \centering
  \begin{tabular}{c|cc}
    &$s_{1}$ & $s_{2}$ \\
    \hline
    $p_{1}$ & 0 & 1 \\
    $p_{2}$ & 1 & 0 \\
    $p_{3}$ & 3 & 2 \\
    \end{tabular}
  \caption{Example data matrix: 2 symptoms for 3 patients.}
 \end{table}
 \begin{definition}[Minkowski distance]
  \[
    L_p (x, y) = \left(\abs{x_1 - y_1}^p + \abs{x_2 - y_2}^p + \ldots + \abs{x_d - y_d}^p\right)^{\sfrac{1}{p}} = \left(\sum_{i=1}^d \left(x_i - y_i\right)^p\right)^{\sfrac{1}{p}}
  \]
  where $p$ is a positive integer.
 \end{definition}
 \begin{definition}[Manhattan distance]
  \[
  L_1(x, y) = \sum_{i=1}^d \abs{x_i - y_i}
  \]
 \end{definition}
 \begin{definition}[Euclidian distance]
  Let $A$ and $B$ be two points, with $(x_{A}, y_{A})$ and $(x_{B}, y_{B})$ their respective coordinates,
 \end{definition}
 If $p=2$, $L_2$ is the Euclidian distance:
 \begin{definition}[Euclidian distance]
  \[
    d(x, y) = \sqrt{\abs{x_1 - y_1}^2 + \abs{x_2 - y_2} + \ldots + \abs{x_d - y_d}^2}
  \]
 \end{definition}
 We can add weights
 \subsection{K-means}
 The cost function is minimized:
 \[
  Cost(C) \sum_{i=1}^{k}...
 \]
 \begin{algorithm}[H]
  Choose the number of clusters $k$.
  Choose randomly $k$ means.
  For each point, compute the distance between the point and each means.
  We allocate the point to the cluster represented by the clostest center.
  We set each means to the center of the cluster, and reiterate.
  \caption{$K$-means algorithm}
 \end{algorithm}
 \begin{exercise}
  We have six genes:
  \begin{table}[H]
    \centering
    \begin{tabular}{ccccccc}
      \toprule
      & $g_{1}$ & $g_{2}$ & $g_{3}$ & $g_{4}$ & $g_{5}$  & $g_{6}$ \\
      \midrule
      $\times 10^{-2}$ & 10 & 12 & 9 & 15 & 17 & 18 \\
      \bottomrule
    \end{tabular}
    \caption{Sample values for six gene expressions.}
  \end{table}
  With $k=2$ and $m_{1} = 10 \cdot 10^{-2}$ and $m_{2} = 9 \cdot 10^{-2}$ the two initial randomly chosen means, run the $k$-means algorithm.
 \end{exercise}
 \begin{figure}
  \centering
  \includegraphics[scale=1]{figures/plots/kmeans.pdf}
  \caption{$k$-means states at each of the 3 steps}
 \end{figure}
--- a/content/chapters/include.tex
+++ b/content/chapters/include.tex
@ -11,9 +11,7 @@
 		}
 }
-
+\includechapters{}{2}
 \includechapters{part1}{2}
 % \includechapters{part2}{2}
--- a/content/chapters/part1/1.tex
+++ b/content/chapters/part1/1.tex
--- a/content/genes_expression_timeseries.tex
+++ b/content/genes_expression_timeseries.tex
@ -0,0 +1,19 @@
 \documentclass[tikz,a4paper]{standalone}
 \usepackage{tikz}
 \begin{document}
 \usetikzlibrary{datavisualization}
 \begin{tikzpicture}
  \datavisualization[visualize as smooth line]
  data {
    x, y
    2, 1,
    3, 2,
    4, 1.5
  };
 \end{tikzpicture}
 \end{document}
--- a/figures/euclidian_distance.tex
+++ b/figures/euclidian_distance.tex
@ -0,0 +1,19 @@
 \documentclass[tikz]{standalone}
 \usepackage{tikz}
 \usepackage{tkz-euclide}
 \begin{document}
 \begin{tikzpicture}[scale=1]
  \tkzInit[xmax=5,ymax=5]
  \tkzDrawX[>=latex]
  \tkzDraw[>=latex]
  \tkzDefPoints()
 \end{tikzpicture}
 \end{document}
--- a/figures/plots/.gitattributes
+++ b/figures/plots/.gitattributes
@ -0,0 +1,2 @@
 genes_expression_timeseries.pdf filter=lfs diff=lfs merge=lfs -text
 kmeans.pdf filter=lfs diff=lfs merge=lfs -text
--- a/figures/plots/genes_expression_timeseries.pdf
+++ b/figures/plots/genes_expression_timeseries.pdf
--- a/figures/plots/genes_expression_timeseries.tex
+++ b/figures/plots/genes_expression_timeseries.tex
@ -0,0 +1,45 @@
 \documentclass[tikz]{standalone}
 \usepackage{tikz}
 \begin{document}
 \usetikzlibrary{datavisualization}
 \begin{tikzpicture}
  \datavisualization data group {genes} = {
    data[set=gene1] {
      x, y
      0, 1,
      1, 2,
      2, 1.5
    }
    data[set=gene2] {
      x, y
      0, 1.5,
      1, 2.25,
      2, 1.75
    }
    data[set=gene3] {
      x, y
      0, 0.25,
      1, 0.26,
      2, 0.7
    }
    data[set=gene4] {
      x, y
      0, 0.5,
      1, 0.25,
      2, 1
    }
  };
  \datavisualization [
  school book axes, all axes={unit length=7.5mm},
  visualize as smooth line/.list={gene1, gene2, gene3, gene4},
  style sheet=strong colors,
  x axis={label=$t$},
  y axis={label={expression}}]
 data group {genes};
 \end{tikzpicture}
 \end{document}
--- a/figures/plots/kmeans.pdf
+++ b/figures/plots/kmeans.pdf
--- a/figures/plots/kmeans.tex
+++ b/figures/plots/kmeans.tex
@ -0,0 +1,54 @@
 \documentclass[margin=0.5cm]{standalone}
 \usepackage{tikz}
 \usepackage{pyluatex}
 \usepackage{pgf}
 \begin{document}
 \begin{python}
 # %%
 import io
 import numpy as np
 import matplotlib.pyplot as plt
 from sklearn.cluster import KMeans
 # %%
 data = """g1,10
 g2,12
 g3,9
 g4,15
 g5,17
 g6,18"""
 points =[int(row.split(",")[1]) for row in data.split("\n")]
 X = np.array([[point] for point in points])
 initial_means = [[10], [9]]
 points
 # %%
 kmeans_values = []
 for i in range(1,4): 
    kmeans = KMeans(n_clusters=2, random_state=42, max_iter=i, init=initial_means, n_init=1)
    kmeans.fit(X)
    kmeans_values.append(kmeans.cluster_centers_)
 # %%
 fig, axs = plt.subplots(len(kmeans_values), 1, sharex=True)
 for i, centroids in enumerate(kmeans_values):
    ax = axs[i]
    ax.scatter(centroids, [i]*len(centroids), marker='x')
    ax.scatter(points, [i]*len(points), s=2, color="black")
    ax.axis('off')
 with io.StringIO() as file:
    fig.savefig(file, format="pgf", bbox_inches="tight", pad_inches=0.1)
    print(file.getvalue())
 \end{python}
 \begin{tikzpicture}
 \end{tikzpicture}
 \end{document}
--- a/content/chapters/part1/0.tex
+++ b/content/chapters/part1/0.tex
--- a/main.pdf
+++ b/main.pdf
--- a/main.tex
+++ b/main.tex
@ -1,46 +1,69 @@
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-% Course of None
+% Course on "Data-mining and Machine Learning" - GENIOMHE - M1-S1
-% 
+%
-% Author: Samuel ORTION <samuel@ortion.fr> 
+% Author: Samuel Ortion <samuel@ortion.fr>
-% Version: 0.0.1
+% Version: 0.1.0
 % Date: 2023
 % Licence: CC-By-SA 4.0+ International 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\documentclass[
+\documentclass[twoside=false,fontsize=10pt,fleqn]{scrbook}
-	a4paper,
+\usepackage{mus}
-	10pt,
+\usepackage{standalone}
-	fleqn,
+\titlehead{GENIOMHE}
-	oneside
+\title{Data-mining and\newline{}Machine Learning}
-]{talpa}
+\subtitle{}
 \author{Samuel Ortion}
 \date{Fall 2023}
 \teacher{Farida Zerhaoui}
 \cursus{GENIOMHE}
 \university{Université d'Évry val d'Essonne -- Université Paris-Saclay}
 \semester{M1 - S1}
-\input{colors.tex}
+\input{definitions}
-\input{meta.tex}
+\input{preamble}
 \input{definitions.tex}
 \hypersetup{
-	pdftitle={
+	pdftitle={Course - Data-mining and Machine Learning},
-		Course - None
+	pdfauthor={Samuel Ortion},
 	},
 	pdfauthor={
 		Samuel Ortion
 	},
 	pdfsubject={},
-	pdfkeywords={},
+	pdfkeywords={GENIOMHE, Master, bioinformatics, machine learning, statistics, data},
 	pdfcreator={LaTeX}
 }
-% \addbibressource{bibliography.bib}
+\usepackage{ccicons}
 \usepackage[
    type={CC},
    modifier={by-sa},
    version={4.0},
 ]{doclicense}
 \addbibresource{references.bib}
 \makeindex
 \begin{document}
-\tableofcontents
+\setkomafont{fullpagetitle}{\fontsize{1.5cm}{3em}\fontseries{b}\selectfont}
 \maketitlefullpage
-% \input{content/introduction.tex}
+{
 	\hypersetup{
 		linkcolor=black
 	}
 	\tableofcontents
 }
-\input{content/chapters/include.tex}
+\doclicenseThis%
-% \input{content/conclusion.tex}
+% \input{content/introduction}
-\end{document}
+\input{content/chapters/include}
 % \input{content/conclusion}
 \nocite{*}
 \printbibliography%
 % \printglossary%
 \end{document}
--- a/notebooks/kmeans1d.ipynb
+++ b/notebooks/kmeans1d.ipynb
@ -0,0 +1,109 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import io\n",
    "\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.cluster import KMeans"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[10, 12, 9, 15, 17, 18]"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "\n",
    "data = \"\"\"g1,10\n",
    "g2,12\n",
    "g3,9\n",
    "g4,15\n",
    "g5,17\n",
    "g6,18\"\"\"\n",
    "\n",
    "points =[int(row.split(\",\")[1]) for row in data.split(\"\\n\")]\n",
    "X = np.array([[point] for point in points])\n",
    "initial_means = [[10], [9]]\n",
    "points"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [],
   "source": [
    "kmeans_values = []\n",
    "for i in range(1,4): \n",
    "    kmeans = KMeans(n_clusters=2, random_state=42, max_iter=i, init=initial_means, n_init=1)\n",
    "    kmeans.fit(X)\n",
    "    kmeans_values.append(kmeans.cluster_centers_)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAgMAAAGFCAYAAABg2vAPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAMpklEQVR4nO3dMU9Va77H8f+54cgRjBhsRpPDJDsZKS0GM+z2vgUL7Ow8mel4JSbTGC2spZj7Fm41kRAtKGUSEr0JTCMBIm6QnXALMxxRi3N0b9Zi/T6fbkPifh6f9Tz5Zu219YeTk5OTAgBi/VfTAwAAmiUGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgXCMxsH94XNt7g6/+bntvUPuHx+c8IqDtnBswPuceA/uHx3X/6VotPV6trd2zG3trd1BLj1fr/tM1Gxs45dyA8Tr3GDg4Gtbbdx/qzc77uvfk1429tTuoe09W683O+3r77kMdHA3Pe2hASzk3YLzOPQZuzFyuZw8Wa252qt7svK+lR/+s//nftVp69M96s/O+5man6tmDxboxc/m8h/ZdhsNhbWxs1HDoMGoD69Etn58b956s1svXO6chcF7nhuuqXbqyHm2YRyPPDNy89nFj/zwzWWt//1vd/e+/1Nrf/1Y/z0zWsweLdfPaxQuBfr9f8/Pz1e/3L/yFedFZj276z7nxnyC4++j5mRAY97nhumqXrqxHW+bR2LcJbl67XMuLV+vDv/9VVVUf/v2vWl68euFCoKpqc3OzXrx4UVVVL168qM3NzYZHlM16dNfNa5fr4dLtMz97uHT7XM4N11W7dGU92jKPxmJga3dQD1f369If/lRVVZf+8Kd6uLr/xcNBF0Gv16uFhYWqqrpz5071er2GR5TNenTX1u6gllfWz/xseWX9XM4N11W7dGU92jKPH05OTk7O+00/fejn55nJWl68Wg9X9+v/9o7O7ZbfqA2Hw9rc3Kxer1cTExNNDyee9eieT8+Nudmperh0u5ZX1s/9owLXVXt0ZT3aMI9zj4HtvY9fA/p8A3++0Vd+uXgPEQLj4dyA8Tr3jwmmJyfq+pVLX5T8pw8HXb9yqaYnL27lAaPl3IDxauRjgv3D4zo4Gn614Lf3BjU9OVFXf/rxvIcFtJhzA8ankRgAANrDf1QEAOHEAACEEwMAEE4MAEA4MQAA4cQAAIQTAwAQTgwAQDgxAADhxAAAhBMDABBODABAODEAAOHEAACEEwMAEE4MAEA4MQAA4cQAAIQTAwAQTgwAQDgxAADhxAAAhBMDABBODABAODEAAOHEAACEEwMAEE4MAEA4MQAA4cQAAIQTAwAQTgwAQDgxAADhxAAAhBMDABBODABAODEAAOHEAACEEwMAEE4MAEA4MQAA4cQAAIQTAwAQTgwAQDgxAADhxAAAhBMDABBODABAODEAAOHEAACEEwMAEE4MAEA4MQAA4cQAAIQTAwAQTgwAQDgxAADhxAAAhBMDABBODABAODEAAOHEAACEEwMAEE4MAEA4MQAA4cQAAIQTAwAQTgwAQDgxAADhxAAAhBMDABBODABAODEAAOHEAACEEwMAEE4MAEA4MQAA4cQAAIQTAwAQTgwAQDgxAADhxAAAhBMDABBODABAODEAAOHEAACEEwMAEE4MAEA4MQAA4cQAAIQTAwAQTgwAQDgxAADhxAAAhBMDABBODABAODEAAOHEAACEEwMAEE4MAEA4MQAA4cQAAIQTAwAQTgwAQDgxAADhxAAAhBMDABBODABAODEAAOHEAACEEwMAEE4MAEA4MQAA4cQAAIQTAwAQTgwAQDgxAADhxAAAhOtsDOwfHtf23uCrv9veG9T+4fE5jwjg65xXNK2TMbB/eFz3n67V0uPV2to9u8G2dge19Hi17j9ds8GAxjmvaINOxsDB0bDevvtQb3be170nv26wrd1B3XuyWm923tfbdx/q4GjY8EiBdM4r2qDxGBgOh7WxsVHD4egu9Bszl+vZg8Wam5063WAvX++cbqy52al69mCxbsxcHtl7jmMefDvrwTh05bzqiq7s8zbMo9EYGA6H1e/3a35+vvr9/kj/Im5eO7vB7j56fmZj3bw22hAY1zz4/awH49CV86orurLP2zKPH05OTk4aeeeq2tjYqPn5+dPXr169qlu3bo30PV6+3qm7j56fvv7HX/v15z/OjvQ9zmMe/HbWg3HoynnVFV3Z522ZR6N3Bnq9Xi0sLFRV1Z07d6rX6430z9/aHdTyyvqZny2vrH/xkM73Gvc8+H2sB+PQlfOqK7qyz9syj0bvDFR9vEWyublZvV6vJiYmRvbnfvrwzdzsVD1cul3LK+tj/ahgHPPg21gPxqEr51VXdGWft2EejcfAOGzvffw6zucb6fMNt/KLh3KAZjmvaIPGv00wDtOTE3X9yqUvivrTh3SuX7lU05MXtySBbnBe0QadvDNQ9fEf8jg4Gn61pLf3BjU9OVFXf/qxgZEBnOW8ommdjQEA4Lfp5McEAMBvJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGKgQfuHx7W9N/jq77b3BrV/eHzOIwIYL+deO4mBhuwfHtf9p2u19Hi1tnbPboyt3UEtPV6t+0/XbAygM5x77SUGGnJwNKy37z7Um533de/Jrxtja3dQ956s1pud9/X23Yc6OBo2PFKA0XDutVfjMTAcDmtjY6OGw4u9+L93HjdmLtezB4s1Nzt1ujFevt453RBzs1P17MFi3Zi5POaRd1NXrivaxXX1fUZ97nVlPdowj0ZjYDgcVr/fr/n5+er3+xd2Qb91Hjevnd0Ydx89P7Mhbl4TAt+iK9cV7eK6Go1RnXtdWY+2zOOHk5OTk0beuao2NjZqfn7+9PWrV6/q1q1bTQ3nm33vPF6+3qm7j56fvv7HX/v15z/OjnSMSbpyXdEurqvR+t5zryvr0ZZ5NHpnoNfr1cLCQlVV3blzp3q9XpPD+WbfM4+t3UEtr6yf+dnyyvoXD9fw23XluqJdXFejM4pzryvr0ZZ5NHpnoOrjLZLNzc3q9Xo1MTHR5FC+y7fM49OHZuZmp+rh0u1aXln3UcEIdOW6ol1cV99vlOdeV9ajDfNoPAZSbe99/BrN5xvg842y8ouHCIFucO61V+PfJkg1PTlR169c+qKEP3245vqVSzU9eXFrF+BTzr32cmegQfuHx3VwNPxqAW/vDWp6cqKu/vRjAyMDGA/nXjuJAQAI52MCAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAI9/8lWKkDA/pOhQAAAABJRU5ErkJggg==",
      "text/plain": [
       "<Figure size 640x480 with 3 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "fig, axs = plt.subplots(len(kmeans_values), 1, sharex=True)\n",
    "for i, centroids in enumerate(kmeans_values):\n",
    "    ax = axs[i]\n",
    "    ax.scatter(centroids, [i]*len(centroids), marker='x')\n",
    "    ax.scatter(points, [i]*len(points), s=2, color=\"black\")\n",
    "    ax.axis('off')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "geniomhe-ml",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
--- a/preamble.tex
+++ b/preamble.tex
@ -0,0 +1,2 @@
 \usepackage{mus-learn}
 \usepackage{xfrac}
--- a/references.bib
+++ b/references.bib
@ -0,0 +1,25 @@
@book{geron_hands-machine_2019,
  edition   = {2},
  title     = {Hands-On Machine Learning with Scikit-Learn, Keras, and {TensorFlow}},
  abstract  = {Through a recent series of breakthroughs, deep learning has boosted the entire field of machine learning. Now, even programmers who know close to nothing about this technology can use simple, … - Selection from Hands-On Machine Learning with Scikit-Learn, Keras, and {TensorFlow}, 2nd Edition [Book]},
  publisher = {O'{REILLY}},
  author    = {Géron, Aurélien},
  date      = {2019},
  langid    = {english},
  note      = {{ISBN}: 9781098125974}
 }
@collection{witten_data_2011,
  location  = {Boston},
  edition   = {4},
  title     = {Data Mining - Practical Machine Learning Tools an Techniques},
  isbn      = {978-0-12-374856-0},
  series    = {The Morgan Kaufmann Series in Data Management Systems},
  publisher = {Morgan Kaufmann},
  editor    = {Witten, Ian H. and Frank, Eibe and Hall, Mark A.},
  urldate   = {2023-06-16},
  date      = {2011-01-01},
  langid    = {english},
  doi       = {10.1016/B978-0-12-374856-0.00018-3}
 }
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,2 @@
 scikit-learn
 numpy
		`@ -0,0 +1 @@`
							`main.pdf filter=lfs diff=lfs merge=lfs -text`
		`@ -0,0 +1,2 @@`
							`genes_expression_timeseries.pdf filter=lfs diff=lfs merge=lfs -text`
							`kmeans.pdf filter=lfs diff=lfs merge=lfs -text`
		`@ -0,0 +1,2 @@`
							`\usepackage{mus-learn}`
							`\usepackage{xfrac}`