Compare commits

...

3 Commits

Author SHA1 Message Date
Samuel Ortion 4bf97cb0fd Merge branch 'dev' 2023-09-27 18:06:58 +02:00
Samuel Ortion b2d7fc3af6 rm .vscode 2023-09-27 18:06:50 +02:00
Samuel Ortion 7f89b6c842 feat: Add a pgf-plot scikit-learn KMeans figure 2023-09-27 18:02:07 +02:00
20 changed files with 508 additions and 32 deletions

1
.gitattributes vendored Normal file
View File

@ -0,0 +1 @@
main.pdf filter=lfs diff=lfs merge=lfs -text

View File

@ -1,3 +0,0 @@
{
"ansible.python.interpreterPath": "/bin/python"
}

6
Makefile Normal file
View File

@ -0,0 +1,6 @@
options=-shell-escape -file-line-error
all: main.pdf
%.pdf: %.tex
lualatex $(options) $<

168
content/chapters/1.tex Normal file
View File

@ -0,0 +1,168 @@
\chapter{Unsupervised Learning}
\begin{definition}[Precision Medicine]
Design of treatment for a given patient, based on genomic data.
\end{definition}
\begin{definition}[Hierarchical clustering]
\end{definition}
Gene expression time series: look for genes with similar expression footprint.
\paragraph{Representation of data}
\begin{itemize}
\item Tables;
\item Trees / Graphs;
\item Time series...
\end{itemize}
\begin{figure}
\includestandalone{figures/plots/genes_expression_timeseries}
\caption{Example of gene expression time series}
\end{figure}
\section{Distances and Similarities}
\begin{property}[Distance]
\begin{description}
\item[non-negativity] $d(i, j) \geq 0$
\item[isolation] $d(i, i) = 0$
\item[symmetry] $d(i, j) = d(j, i)$
\item[triangular inequality] $d(i, j) \leq d(i, h) + d(h, j)$
\end{description}
\end{property}
\begin{definition}[Dissimilarity]
Distance without triangular inequality.
\end{definition}
\begin{definition}[Similarity]
Function $s$ from $X \times X$ to $\RR_+$ such that:
\begin{enumerate}
\item $s$ is symmetric: $(x, y) \in X \times X; s(x, y) = s(y, x)$
\item $(x, y) \in X \times X; s(x, x) = s(y, y) > s(x, y)$.
\end{enumerate}
\end{definition}
\begin{exercise}
Let $d(x, y)$ be the distance, $d(x, y) \in [0, +\infty[$.
What should be the similarity measure $S(x, y) = f(d(x, y))$ that satisfies the following property:
\[
(x, y) \in X \times X \: | \: S(x, y) > S(x, y)
\]
having $S(x, y) \leq M$, $S(x, y) \in ]0, M]$.
\end{exercise}
$d(x, y) \geq 0 \: \forall (x, y)$
\begin{equation}
S(x, y) = \frac{M}{d(x, y) + 1}
\label{eq:similarity-first}
\end{equation}
In \cref{eq:similarity-first}, $S(x, y)$ ranges from 0 to M.
\begin{eqnarray}
\lim_{n \to \infty} \frac{M}{n + 1} = 0 && \lim_{n \to 0} \frac{M}{n + 1} = M
\end{eqnarray}
\section{Data Representation}
\paragraph{Data matrix}
\paragraph{Distance matrix}
\[
\begin{bmatrix}
0 \\
d(2, 1) & 0 \\
d(3, 1) & d(3, 2) & 0 \\
\vdots & \vdots & \ddots \\
d(n, 1) & d(n,2) & \dots & \dots & 0
\end{bmatrix}
\]
\begin{table}
\centering
\begin{tabular}{c|cc}
&$s_{1}$ & $s_{2}$ \\
\hline
$p_{1}$ & 0 & 1 \\
$p_{2}$ & 1 & 0 \\
$p_{3}$ & 3 & 2 \\
\end{tabular}
\caption{Example data matrix: 2 symptoms for 3 patients.}
\end{table}
\begin{definition}[Minkowski distance]
\[
L_p (x, y) = \left(\abs{x_1 - y_1}^p + \abs{x_2 - y_2}^p + \ldots + \abs{x_d - y_d}^p\right)^{\sfrac{1}{p}} = \left(\sum_{i=1}^d \left(x_i - y_i\right)^p\right)^{\sfrac{1}{p}}
\]
where $p$ is a positive integer.
\end{definition}
\begin{definition}[Manhattan distance]
\[
L_1(x, y) = \sum_{i=1}^d \abs{x_i - y_i}
\]
\end{definition}
\begin{definition}[Euclidian distance]
Let $A$ and $B$ be two points, with $(x_{A}, y_{A})$ and $(x_{B}, y_{B})$ their respective coordinates,
\end{definition}
If $p=2$, $L_2$ is the Euclidian distance:
\begin{definition}[Euclidian distance]
\[
d(x, y) = \sqrt{\abs{x_1 - y_1}^2 + \abs{x_2 - y_2} + \ldots + \abs{x_d - y_d}^2}
\]
\end{definition}
We can add weights
\subsection{K-means}
The cost function is minimized:
\[
Cost(C) \sum_{i=1}^{k}...
\]
\begin{algorithm}[H]
Choose the number of clusters $k$.
Choose randomly $k$ means.
For each point, compute the distance between the point and each means.
We allocate the point to the cluster represented by the clostest center.
We set each means to the center of the cluster, and reiterate.
\caption{$K$-means algorithm}
\end{algorithm}
\begin{exercise}
We have six genes:
\begin{table}[H]
\centering
\begin{tabular}{ccccccc}
\toprule
& $g_{1}$ & $g_{2}$ & $g_{3}$ & $g_{4}$ & $g_{5}$ & $g_{6}$ \\
\midrule
$\times 10^{-2}$ & 10 & 12 & 9 & 15 & 17 & 18 \\
\bottomrule
\end{tabular}
\caption{Sample values for six gene expressions.}
\end{table}
With $k=2$ and $m_{1} = 10 \cdot 10^{-2}$ and $m_{2} = 9 \cdot 10^{-2}$ the two initial randomly chosen means, run the $k$-means algorithm.
\end{exercise}
\begin{figure}
\centering
\includegraphics[scale=1]{figures/plots/kmeans.pdf}
\caption{$k$-means states at each of the 3 steps}
\end{figure}

View File

@ -11,9 +11,7 @@
} }
} }
\includechapters{}{2}
\includechapters{part1}{2}
% \includechapters{part2}{2} % \includechapters{part2}{2}

View File

@ -0,0 +1,19 @@
\documentclass[tikz,a4paper]{standalone}
\usepackage{tikz}
\begin{document}
\usetikzlibrary{datavisualization}
\begin{tikzpicture}
\datavisualization[visualize as smooth line]
data {
x, y
2, 1,
3, 2,
4, 1.5
};
\end{tikzpicture}
\end{document}

View File

@ -0,0 +1,19 @@
\documentclass[tikz]{standalone}
\usepackage{tikz}
\usepackage{tkz-euclide}
\begin{document}
\begin{tikzpicture}[scale=1]
\tkzInit[xmax=5,ymax=5]
\tkzDrawX[>=latex]
\tkzDraw[>=latex]
\tkzDefPoints()
\end{tikzpicture}
\end{document}

2
figures/plots/.gitattributes vendored Normal file
View File

@ -0,0 +1,2 @@
genes_expression_timeseries.pdf filter=lfs diff=lfs merge=lfs -text
kmeans.pdf filter=lfs diff=lfs merge=lfs -text

BIN
figures/plots/genes_expression_timeseries.pdf (Stored with Git LFS) Normal file

Binary file not shown.

View File

@ -0,0 +1,45 @@
\documentclass[tikz]{standalone}
\usepackage{tikz}
\begin{document}
\usetikzlibrary{datavisualization}
\begin{tikzpicture}
\datavisualization data group {genes} = {
data[set=gene1] {
x, y
0, 1,
1, 2,
2, 1.5
}
data[set=gene2] {
x, y
0, 1.5,
1, 2.25,
2, 1.75
}
data[set=gene3] {
x, y
0, 0.25,
1, 0.26,
2, 0.7
}
data[set=gene4] {
x, y
0, 0.5,
1, 0.25,
2, 1
}
};
\datavisualization [
school book axes, all axes={unit length=7.5mm},
visualize as smooth line/.list={gene1, gene2, gene3, gene4},
style sheet=strong colors,
x axis={label=$t$},
y axis={label={expression}}]
data group {genes};
\end{tikzpicture}
\end{document}

BIN
figures/plots/kmeans.pdf (Stored with Git LFS) Normal file

Binary file not shown.

54
figures/plots/kmeans.tex Normal file
View File

@ -0,0 +1,54 @@
\documentclass[margin=0.5cm]{standalone}
\usepackage{tikz}
\usepackage{pyluatex}
\usepackage{pgf}
\begin{document}
\begin{python}
# %%
import io
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
# %%
data = """g1,10
g2,12
g3,9
g4,15
g5,17
g6,18"""
points =[int(row.split(",")[1]) for row in data.split("\n")]
X = np.array([[point] for point in points])
initial_means = [[10], [9]]
points
# %%
kmeans_values = []
for i in range(1,4):
kmeans = KMeans(n_clusters=2, random_state=42, max_iter=i, init=initial_means, n_init=1)
kmeans.fit(X)
kmeans_values.append(kmeans.cluster_centers_)
# %%
fig, axs = plt.subplots(len(kmeans_values), 1, sharex=True)
for i, centroids in enumerate(kmeans_values):
ax = axs[i]
ax.scatter(centroids, [i]*len(centroids), marker='x')
ax.scatter(points, [i]*len(points), s=2, color="black")
ax.axis('off')
with io.StringIO() as file:
fig.savefig(file, format="pgf", bbox_inches="tight", pad_inches=0.1)
print(file.getvalue())
\end{python}
\begin{tikzpicture}
\end{tikzpicture}
\end{document}

BIN
main.pdf

Binary file not shown.

View File

@ -1,46 +1,69 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Course of None % Course on "Data-mining and Machine Learning" - GENIOMHE - M1-S1
% %
% Author: Samuel ORTION <samuel@ortion.fr> % Author: Samuel Ortion <samuel@ortion.fr>
% Version: 0.0.1 % Version: 0.1.0
% Date: 2023 % Date: 2023
% Licence: CC-By-SA 4.0+ International % Licence: CC-By-SA 4.0+ International
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\documentclass[ \documentclass[twoside=false,fontsize=10pt,fleqn]{scrbook}
a4paper, \usepackage{mus}
10pt, \usepackage{standalone}
fleqn, \titlehead{GENIOMHE}
oneside \title{Data-mining and\newline{}Machine Learning}
]{talpa} \subtitle{}
\author{Samuel Ortion}
\date{Fall 2023}
\teacher{Farida Zerhaoui}
\cursus{GENIOMHE}
\university{Université d'Évry val d'Essonne -- Université Paris-Saclay}
\semester{M1 - S1}
\input{colors.tex} \input{definitions}
\input{meta.tex} \input{preamble}
\input{definitions.tex}
\hypersetup{ \hypersetup{
pdftitle={ pdftitle={Course - Data-mining and Machine Learning},
Course - None pdfauthor={Samuel Ortion},
},
pdfauthor={
Samuel Ortion
},
pdfsubject={}, pdfsubject={},
pdfkeywords={}, pdfkeywords={GENIOMHE, Master, bioinformatics, machine learning, statistics, data},
pdfcreator={LaTeX} pdfcreator={LaTeX}
} }
% \addbibressource{bibliography.bib} \usepackage{ccicons}
\usepackage[
type={CC},
modifier={by-sa},
version={4.0},
]{doclicense}
\addbibresource{references.bib}
\makeindex \makeindex
\begin{document} \begin{document}
\setkomafont{fullpagetitle}{\fontsize{1.5cm}{3em}\fontseries{b}\selectfont}
\maketitlefullpage
{
\hypersetup{
linkcolor=black
}
\tableofcontents \tableofcontents
}
% \input{content/introduction.tex} \doclicenseThis%
\input{content/chapters/include.tex} % \input{content/introduction}
% \input{content/conclusion.tex} \input{content/chapters/include}
% \input{content/conclusion}
\nocite{*}
\printbibliography%
% \printglossary%
\end{document} \end{document}

109
notebooks/kmeans1d.ipynb Normal file
View File

@ -0,0 +1,109 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import io\n",
"\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.cluster import KMeans"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[10, 12, 9, 15, 17, 18]"
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"\n",
"data = \"\"\"g1,10\n",
"g2,12\n",
"g3,9\n",
"g4,15\n",
"g5,17\n",
"g6,18\"\"\"\n",
"\n",
"points =[int(row.split(\",\")[1]) for row in data.split(\"\\n\")]\n",
"X = np.array([[point] for point in points])\n",
"initial_means = [[10], [9]]\n",
"points"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [],
"source": [
"kmeans_values = []\n",
"for i in range(1,4): \n",
" kmeans = KMeans(n_clusters=2, random_state=42, max_iter=i, init=initial_means, n_init=1)\n",
" kmeans.fit(X)\n",
" kmeans_values.append(kmeans.cluster_centers_)\n"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAgMAAAGFCAYAAABg2vAPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAMpklEQVR4nO3dMU9Va77H8f+54cgRjBhsRpPDJDsZKS0GM+z2vgUL7Ow8mel4JSbTGC2spZj7Fm41kRAtKGUSEr0JTCMBIm6QnXALMxxRi3N0b9Zi/T6fbkPifh6f9Tz5Zu219YeTk5OTAgBi/VfTAwAAmiUGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgXCMxsH94XNt7g6/+bntvUPuHx+c8IqDtnBswPuceA/uHx3X/6VotPV6trd2zG3trd1BLj1fr/tM1Gxs45dyA8Tr3GDg4Gtbbdx/qzc77uvfk1429tTuoe09W683O+3r77kMdHA3Pe2hASzk3YLzOPQZuzFyuZw8Wa252qt7svK+lR/+s//nftVp69M96s/O+5man6tmDxboxc/m8h/ZdhsNhbWxs1HDoMGoD69Etn58b956s1svXO6chcF7nhuuqXbqyHm2YRyPPDNy89nFj/zwzWWt//1vd/e+/1Nrf/1Y/z0zWsweLdfPaxQuBfr9f8/Pz1e/3L/yFedFZj276z7nxnyC4++j5mRAY97nhumqXrqxHW+bR2LcJbl67XMuLV+vDv/9VVVUf/v2vWl68euFCoKpqc3OzXrx4UVVVL168qM3NzYZHlM16dNfNa5fr4dLtMz97uHT7XM4N11W7dGU92jKPxmJga3dQD1f369If/lRVVZf+8Kd6uLr/xcNBF0Gv16uFhYWqqrpz5071er2GR5TNenTX1u6gllfWz/xseWX9XM4N11W7dGU92jKPH05OTk7O+00/fejn55nJWl68Wg9X9+v/9o7O7ZbfqA2Hw9rc3Kxer1cTExNNDyee9eieT8+Nudmperh0u5ZX1s/9owLXVXt0ZT3aMI9zj4HtvY9fA/p8A3++0Vd+uXgPEQLj4dyA8Tr3jwmmJyfq+pVLX5T8pw8HXb9yqaYnL27lAaPl3IDxauRjgv3D4zo4Gn614Lf3BjU9OVFXf/rxvIcFtJhzA8ankRgAANrDf1QEAOHEAACEEwMAEE4MAEA4MQAA4cQAAIQTAwAQTgwAQDgxAADhxAAAhBMDABBODABAODEAAOHEAACEEwMAEE4MAEA4MQAA4cQAAIQTAwAQTgwAQDgxAADhxAAAhBMDABBODABAODEAAOHEAACEEwMAEE4MAEA4MQAA4cQAAIQTAwAQTgwAQDgxAADhxAAAhBMDABBODABAODEAAOHEAACEEwMAEE4MAEA4MQAA4cQAAIQTAwAQTgwAQDgxAADhxAAAhBMDABBODABAODEAAOHEAACEEwMAEE4MAEA4MQAA4cQAAIQTAwAQTgwAQDgxAADhxAAAhBMDABBODABAODEAAOHEAACEEwMAEE4MAEA4MQAA4cQAAIQTAwAQTgwAQDgxAADhxAAAhBMDABBODABAODEAAOHEAACEEwMAEE4MAEA4MQAA4cQAAIQTAwAQTgwAQDgxAADhxAAAhBMDABBODABAODEAAOHEAACEEwMAEE4MAEA4MQAA4cQAAIQTAwAQTgwAQDgxAADhxAAAhBMDABBODABAODEAAOHEAACEEwMAEE4MAEA4MQAA4cQAAIQTAwAQTgwAQDgxAADhxAAAhBMDABBODABAODEAAOHEAACEEwMAEE4MAEA4MQAA4cQAAIQTAwAQTgwAQDgxAADhxAAAhOtsDOwfHtf23uCrv9veG9T+4fE5jwjg65xXNK2TMbB/eFz3n67V0uPV2to9u8G2dge19Hi17j9ds8GAxjmvaINOxsDB0bDevvtQb3be170nv26wrd1B3XuyWm923tfbdx/q4GjY8EiBdM4r2qDxGBgOh7WxsVHD4egu9Bszl+vZg8Wam5063WAvX++cbqy52al69mCxbsxcHtl7jmMefDvrwTh05bzqiq7s8zbMo9EYGA6H1e/3a35+vvr9/kj/Im5eO7vB7j56fmZj3bw22hAY1zz4/awH49CV86orurLP2zKPH05OTk4aeeeq2tjYqPn5+dPXr169qlu3bo30PV6+3qm7j56fvv7HX/v15z/OjvQ9zmMe/HbWg3HoynnVFV3Z522ZR6N3Bnq9Xi0sLFRV1Z07d6rX6430z9/aHdTyyvqZny2vrH/xkM73Gvc8+H2sB+PQlfOqK7qyz9syj0bvDFR9vEWyublZvV6vJiYmRvbnfvrwzdzsVD1cul3LK+tj/ahgHPPg21gPxqEr51VXdGWft2EejcfAOGzvffw6zucb6fMNt/KLh3KAZjmvaIPGv00wDtOTE3X9yqUvivrTh3SuX7lU05MXtySBbnBe0QadvDNQ9fEf8jg4Gn61pLf3BjU9OVFXf/qxgZEBnOW8ommdjQEA4Lfp5McEAMBvJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGKgQfuHx7W9N/jq77b3BrV/eHzOIwIYL+deO4mBhuwfHtf9p2u19Hi1tnbPboyt3UEtPV6t+0/XbAygM5x77SUGGnJwNKy37z7Um533de/Jrxtja3dQ956s1pud9/X23Yc6OBo2PFKA0XDutVfjMTAcDmtjY6OGw4u9+L93HjdmLtezB4s1Nzt1ujFevt453RBzs1P17MFi3Zi5POaRd1NXrivaxXX1fUZ97nVlPdowj0ZjYDgcVr/fr/n5+er3+xd2Qb91Hjevnd0Ydx89P7Mhbl4TAt+iK9cV7eK6Go1RnXtdWY+2zOOHk5OTk0beuao2NjZqfn7+9PWrV6/q1q1bTQ3nm33vPF6+3qm7j56fvv7HX/v15z/OjnSMSbpyXdEurqvR+t5zryvr0ZZ5NHpnoNfr1cLCQlVV3blzp3q9XpPD+WbfM4+t3UEtr6yf+dnyyvoXD9fw23XluqJdXFejM4pzryvr0ZZ5NHpnoOrjLZLNzc3q9Xo1MTHR5FC+y7fM49OHZuZmp+rh0u1aXln3UcEIdOW6ol1cV99vlOdeV9ajDfNoPAZSbe99/BrN5xvg842y8ouHCIFucO61V+PfJkg1PTlR169c+qKEP3245vqVSzU9eXFrF+BTzr32cmegQfuHx3VwNPxqAW/vDWp6cqKu/vRjAyMDGA/nXjuJAQAI52MCAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAI9/8lWKkDA/pOhQAAAABJRU5ErkJggg==",
"text/plain": [
"<Figure size 640x480 with 3 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig, axs = plt.subplots(len(kmeans_values), 1, sharex=True)\n",
"for i, centroids in enumerate(kmeans_values):\n",
" ax = axs[i]\n",
" ax.scatter(centroids, [i]*len(centroids), marker='x')\n",
" ax.scatter(points, [i]*len(points), s=2, color=\"black\")\n",
" ax.axis('off')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "geniomhe-ml",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -0,0 +1,2 @@
\usepackage{mus-learn}
\usepackage{xfrac}

25
references.bib Normal file
View File

@ -0,0 +1,25 @@
@book{geron_hands-machine_2019,
edition = {2},
title = {Hands-On Machine Learning with Scikit-Learn, Keras, and {TensorFlow}},
abstract = {Through a recent series of breakthroughs, deep learning has boosted the entire field of machine learning. Now, even programmers who know close to nothing about this technology can use simple, … - Selection from Hands-On Machine Learning with Scikit-Learn, Keras, and {TensorFlow}, 2nd Edition [Book]},
publisher = {O'{REILLY}},
author = {Géron, Aurélien},
date = {2019},
langid = {english},
note = {{ISBN}: 9781098125974}
}
@collection{witten_data_2011,
location = {Boston},
edition = {4},
title = {Data Mining - Practical Machine Learning Tools an Techniques},
isbn = {978-0-12-374856-0},
series = {The Morgan Kaufmann Series in Data Management Systems},
publisher = {Morgan Kaufmann},
editor = {Witten, Ian H. and Frank, Eibe and Hall, Mark A.},
urldate = {2023-06-16},
date = {2011-01-01},
langid = {english},
doi = {10.1016/B978-0-12-374856-0.00018-3}
}

2
requirements.txt Normal file
View File

@ -0,0 +1,2 @@
scikit-learn
numpy