feat: Reinitialize commit

This commit is contained in:
Samuel Ortion 2023-11-16 16:47:14 +01:00
commit 9af6993a28
42 changed files with 3009 additions and 0 deletions

2
.gitattributes vendored Executable file
View File

@ -0,0 +1,2 @@
main.pdf filter=lfs diff=lfs merge=lfs -text
**/*.pdf filter=lfs diff=lfs merge=lfs -text

304
.gitignore vendored Executable file
View File

@ -0,0 +1,304 @@
build/
.auctex-auto
## Core latex/pdflatex auxiliary files:
*.aux
*.lof
*.log
*.lot
*.fls
*.out
*.toc
*.fmt
*.fot
*.cb
*.cb2
.*.lb
## Intermediate documents:
*.dvi
*.xdv
*-converted-to.*
# these rules might exclude image files for figures etc.
# *.ps
# *.eps
# *.pdf
## Generated if empty string is given at "Please type another file name for output:"
.pdf
## Bibliography auxiliary files (bibtex/biblatex/biber):
*.bbl
*.bcf
*.blg
*-blx.aux
*-blx.bib
*.run.xml
## Build tool auxiliary files:
*.fdb_latexmk
*.synctex
*.synctex(busy)
*.synctex.gz
*.synctex.gz(busy)
*.pdfsync
## Build tool directories for auxiliary files
# latexrun
latex.out/
## Auxiliary and intermediate files from other packages:
# algorithms
*.alg
*.loa
# achemso
acs-*.bib
# amsthm
*.thm
# beamer
*.nav
*.pre
*.snm
*.vrb
# changes
*.soc
# comment
*.cut
# cprotect
*.cpt
# elsarticle (documentclass of Elsevier journals)
*.spl
# endnotes
*.ent
# fixme
*.lox
# feynmf/feynmp
*.mf
*.mp
*.t[1-9]
*.t[1-9][0-9]
*.tfm
#(r)(e)ledmac/(r)(e)ledpar
*.end
*.?end
*.[1-9]
*.[1-9][0-9]
*.[1-9][0-9][0-9]
*.[1-9]R
*.[1-9][0-9]R
*.[1-9][0-9][0-9]R
*.eledsec[1-9]
*.eledsec[1-9]R
*.eledsec[1-9][0-9]
*.eledsec[1-9][0-9]R
*.eledsec[1-9][0-9][0-9]
*.eledsec[1-9][0-9][0-9]R
# glossaries
*.acn
*.acr
*.glg
*.glo
*.gls
*.glsdefs
*.lzo
*.lzs
*.slg
*.slo
*.sls
# uncomment this for glossaries-extra (will ignore makeindex's style files!)
# *.ist
# gnuplot
*.gnuplot
*.table
# gnuplottex
*-gnuplottex-*
# gregoriotex
*.gaux
*.glog
*.gtex
# htlatex
*.4ct
*.4tc
*.idv
*.lg
*.trc
*.xref
# hyperref
*.brf
# knitr
*-concordance.tex
# TODO Uncomment the next line if you use knitr and want to ignore its generated tikz files
# *.tikz
*-tikzDictionary
# listings
*.lol
# luatexja-ruby
*.ltjruby
# makeidx
*.idx
*.ilg
*.ind
# minitoc
*.maf
*.mlf
*.mlt
*.mtc[0-9]*
*.slf[0-9]*
*.slt[0-9]*
*.stc[0-9]*
# minted
_minted*
*.pyg
# morewrites
*.mw
# newpax
*.newpax
# nomencl
*.nlg
*.nlo
*.nls
# pax
*.pax
# pdfpcnotes
*.pdfpc
# sagetex
*.sagetex.sage
*.sagetex.py
*.sagetex.scmd
# scrwfile
*.wrt
# svg
svg-inkscape/
# sympy
*.sout
*.sympy
sympy-plots-for-*.tex/
# pdfcomment
*.upa
*.upb
# pythontex
*.pytxcode
pythontex-files-*/
# tcolorbox
*.listing
# thmtools
*.loe
# TikZ & PGF
*.dpth
*.md5
*.auxlock
# titletoc
*.ptc
# todonotes
*.tdo
# vhistory
*.hst
*.ver
# easy-todo
*.lod
# xcolor
*.xcp
# xmpincl
*.xmpi
# xindy
*.xdy
# xypic precompiled matrices and outlines
*.xyc
*.xyd
# endfloat
*.ttt
*.fff
# Latexian
TSWLatexianTemp*
## Editors:
# WinEdt
*.bak
*.sav
# Texpad
.texpadtmp
# LyX
*.lyx~
# Kile
*.backup
# gummi
.*.swp
# KBibTeX
*~[0-9]*
# TeXnicCenter
*.tps
# auto folder when using emacs and auctex
./auto/*
*.el
# expex forward references with \gathertags
*-tags.tex
# standalone packages
*.sta
# Makeindex log files
*.lpz
# xwatermark package
*.xwm
# REVTeX puts footnotes in the bibliography by default, unless the nofootinbib
# option is specified. Footnotes are the stored in a file with suffix Notes.bib.
# Uncomment the next line to have this generated file ignored.
#*Notes.bib

6
Makefile Executable file
View File

@ -0,0 +1,6 @@
options=-shell-escape -file-line-error
all: main.pdf
%.pdf: %.tex
lualatex $(options) $<

18
content/chapters/include.tex Executable file
View File

@ -0,0 +1,18 @@
%----------------------------------------
% CHAPTERS
%----------------------------------------
\newcommand{\includechapters}[2]{%
\foreach \i in {0, ..., #2} {%
\edef\FileName{content/chapters/#1/\i}%
\IfFileExists{\FileName}{%
\input{\FileName}%
}
}
}
\includechapters{part1}{4}
\includechapters{part2}{2}
% \includechapters{part3}{1}

1
content/chapters/part1/0.tex Executable file
View File

@ -0,0 +1 @@
\part{}

653
content/chapters/part1/1.tex Executable file
View File

@ -0,0 +1,653 @@
\chapter{Linear Model}
\section{Simple Linear Regression}
\[
Y_i = \beta_0 + \beta_1 X_i + \varepsilon_i
\]
\[
\Y = \X \beta + \varepsilon.
\]
\[
\begin{pmatrix}
Y_1 \\
Y_2 \\
\vdots \\
Y_n
\end{pmatrix}
=
\begin{pmatrix}
1 & X_1 \\
1 & X_2 \\
\vdots & \vdots \\
1 & X_n
\end{pmatrix}
\begin{pmatrix}
\beta_0 \\
\beta_1
\end{pmatrix}
+
\begin{pmatrix}
\varepsilon_1 \\
\varepsilon_2 \\
\vdots
\varepsilon_n
\end{pmatrix}
\]
\paragraph*{Assumptions}
\begin{enumerate}[label={\color{primary}{($A_\arabic*$)}}]
\item $\varepsilon_i$ are independent;
\item $\varepsilon_i$ are identically distributed;
\item $\varepsilon_i$ are i.i.d $\sim \Norm(0, \sigma^2)$ (homoscedasticity).
\end{enumerate}
\section{Generalized Linear Model}
\[
g(\EE(Y)) = X \beta
\]
with $g$ being
\begin{itemize}
\item Logistic regression: $g(v) = \log \left(\frac{v}{1-v}\right)$, for instance for boolean values,
\item Poisson regression: $g(v) = \log(v)$, for instance for discrete variables.
\end{itemize}
\subsection{Penalized Regression}
When the number of variables is large, e.g, when the number of explanatory variable is above the number of observations, if $p >> n$ ($p$: the number of explanatory variable, $n$ is the number of observations), we cannot estimate the parameters.
In order to estimate the parameters, we can use penalties (additional terms).
Lasso regression, Elastic Net, etc.
\[
Y = X \beta + \varepsilon,
\]
is noted equivalently as
\[
\begin{pmatrix}
y_1 \\
y_2 \\
y_3 \\
y_4
\end{pmatrix}
= \begin{pmatrix}
1 & x_{11} & x_{12} \\
1 & x_{21} & x_{22} \\
1 & x_{31} & x_{32} \\
1 & x_{41} & x_{42}
\end{pmatrix}
\begin{pmatrix}
\beta_0 \\
\beta_1 \\
\beta_2
\end{pmatrix} +
\begin{pmatrix}
\varepsilon_1 \\
\varepsilon_2 \\
\varepsilon_3 \\
\varepsilon_4
\end{pmatrix}.
\]
\section{Parameter Estimation}
\subsection{Simple Linear Regression}
\subsection{General Case}
If $\X^T\X$ is invertible, the OLS estimator is:
\begin{equation}
\hat{\beta} = (\X^T\X)^{-1} \X^T \Y
\end{equation}
\subsection{Ordinary Least Square Algorithm}
We want to minimize the distance between $\X\beta$ and $\Y$:
\[
\min \norm{\Y - \X\beta}^2
\]
(See \autoref{ch:elements-of-linear-algebra}).
\begin{align*}
\Rightarrow& \X \beta = proj^{(1, \X)} \Y\\
\Rightarrow& \forall v \in w,\, vy = v proj^w(y)\\
\Rightarrow& \forall i: \\
& \X_i \Y = \X_i \X\hat{\beta} \qquad \text{where $\hat{\beta}$ is the estimator of $\beta$} \\
\Rightarrow& \X^T \Y = \X^T \X \hat{\beta} \\
\Rightarrow& {\color{gray}(\X^T \X)^{-1}} \X^T \Y = {\color{gray}(\X^T \X)^{-1}} (\X^T\X) \hat{\beta} \\
\Rightarrow& \hat{\beta} = (\X^T\X)^{-1} \X^T \Y
\end{align*}
This formula comes from the orthogonal projection of $\Y$ on the vector subspace defined by the explanatory variables $\X$
$\X \hat{\beta}$ is the closest point to $\Y$ in the subspace generated by $\X$.
If $H$ is the projection matrix of the subspace generated by $\X$, $\X\Y$ is the projection on $\Y$ on this subspace, that corresponds to $\X\hat{\beta}$.
\section{Sum of squares}
$\Y - \X \hat{\beta} \perp \X \hat{\beta} - \Y \One$ if $\One \in V$, so
\[
\underbrace{\norm{\Y - \bar{\Y}\One}}_{\text{Total SS}} = \underbrace{\norm{\Y - \X \hat{\beta}}^2}_{\text{Residual SS}} + \underbrace{\norm{\X \hat{\beta} - \bar{\Y} \One}^2}_{\text{Explicated SS}}
\]
\section{Coefficient of Determination: \texorpdfstring{$R^2$}{R\textsuperscript{2}}}
\begin{definition}[$R^2$]
\[
0 \leq R^2 = \frac{\norm{\X\hat{\beta} - \bar{\Y}\One}^2}{\norm{\Y - \bar{\Y}\One}^2} = 1 - \frac{\norm{\Y - \X\hat{\beta}}^2}{\norm{\Y - \bar{\Y}\One}^2} \leq 1
\] proportion of variation of $\Y$ explained by the model.
\end{definition}
\begin{figure}
\centering
\includegraphics{figures/schemes/orthogonal_projection.pdf}
\caption{Orthogonal projection of $\Y$ on plan generated by the base described by $\X$. $\color{blue}a$ corresponds to $\norm{\X\hat{\beta} - \bar{\Y}}^2$ and $\color{blue}b$ corresponds to $\hat{\varepsilon} = \norm{\Y - \hat{\beta}\X}^2$} and $\color{blue}c$ corresponds to $\norm{Y - \bar{Y}}^2$.
\label{fig:scheme-orthogonal-projection}
\end{figure}
\begin{figure}
\centering
\includegraphics{figures/schemes/ordinary_least_squares.pdf}
\caption{Ordinary least squares and regression line with simulated data.}
\label{fig:ordinary-least-squares}
\end{figure}
\begin{definition}[Model dimension]
Let $\M$ be a model.
The dimension of $\M$ is the dimension of the subspace generated by $\X$, that is the number of parameters in the $\beta$ vector.
\textit{Nb.} The dimension of the model is not the number of parameter, as $\sigma^2$ is one of the model parameters.
\end{definition}
\section{Gaussian vectors}
\begin{definition}[Normal distribution]
$X \sim \Norm(\mu, \sigma^{2})$, with density function $f$
\[
f(x) = \frac{1}{\sigma \sqrt{2\pi}}e^{-\frac{1}{2}(\frac{x-\mu}{\sigma})^{2}}
\]
\end{definition}
\begin{definition}[Gaussian vector]
A random vector $\Y \in \RR[n]$ is a gaussian vector if every linear combination of its component is a gaussian random variable.
\end{definition}
\begin{property}
$m = \EE(Y) = (m_1, \ldots, m_n)^T$, where $m_i = \EE(Y_i)$
\[
\Y \sim \Norm_n(m, \Sigma)
\]
where $\Sigma$ is the variance-covariance matrix!
\[
\Sigma = \E\left[(\Y -m)(\Y - m)^T\right].
\]
\end{property}
\begin{remark}
\[
\Cov(Y_i, Y_i) = \Var(Y_i)
\]
\end{remark}
\begin{definition}[Covariance]
\[
\Cov(Y_i, Y_j) = \EE\left((Y_i-\EE(Y_j))(Y_j-\EE(Y_j))\right)
\]
\end{definition}
When two variable are linked, the covariance is large.
If two variables $X, Y$ are independent, $\Cov(X, Y) = 0$.
\begin{definition}[Correlation coefficient]
\[
\Cor(Y_i, Y_j) = \frac{\EE\left((Y_i-\EE(Y_j))(Y_j-\EE(Y_j))\right)}{\sqrt{\EE(Y_i - \EE(Y_i)) \cdot \EE(Y_j - \EE(Y_j))}}
\]
\end{definition}
Covariance is really sensitive to scale of variables. For instance, if we measure distance in millimeters, the covariance would be larger than in the case of a measure expressed in metters. Thus the correlation coefficient, which is a sort of normalized covariance is useful, to be able to compare the values.
\begin{remark}
\begin{align*}
\Cov(Y_i, Y_i) &= \EE((Y_i - \EE(Y_i)) (Y_i - \EE(Y_i))) \\
&= \EE((Y_i - \EE(Y_i))^2) \\
&= \Var(Y_i)
\end{align*}
\end{remark}
\begin{equation}
\Sigma = \begin{pNiceMatrix}
\VVar(Y_1) & & & &\\
& \Ddots & & & \\
& \Cov(Y_i, Y_j) & \VVar(Y_i) & & \\
& & & \Ddots & \\
& & & & \VVar(Y_n)
\end{pNiceMatrix}
\end{equation}
\begin{definition}[Identity matrix]
\[
\mathcal{I}_n = \begin{pNiceMatrix}
1 & 0 & 0 \\
0 & \Ddots & 0\\
0 & 0 & 1
\end{pNiceMatrix}
\]
\end{definition}
\begin{theorem}[Cochran Theorem (Consequence)]
\label{thm:cochran}
Let $\mathbf{Z}$ be a gaussian vector: $\mathbf{Z} \sim \Norm_n(0_n, I_n)$.
\begin{itemize}
\item If $V_1, V_n$ are orthogonal subspaces of $\RR[n]$ with dimensions $n_1, n_2$ such that
\[
\RR[n] = V_1 \overset{\perp}{\oplus} V_2.
\]
\item If $Z_1, Z_2$ are orthogonal of $\mathbf{Z}$ on $V_1$ and $V_2$ i.e. $Z_1 = \Pi_{V_1}(\mathbf{Z}) = \Pi_1 \Y$ and $Z_2 = \Pi_{V_2} (\mathbf{Z}) = \Pi_2 \Y$ ($\Pi_{1}$ and $\Pi_{2}$ being projection matrices)
then:
\item $z_{1}$, $Z_{2}$ are independent gaussian vectors, $Z_{1} \sim \Norm_{n_{1}} (0_{n}, \Pi_{1})$ and $Z_{2} \sim \Norm(0_{n_{2}}, \Pi_{2})$.
In particular $\norm{Z_{1}} \sim \chi^{2}(n_{1})$ and $\norm{Z_{2}} \sim \chi^{2}(n_{2})$.
\end{itemize}
$Z_2 = \Pi_{V_1}(\Z)$ is the projection of $\Z$ on subspace $V_1$.
\dots
\end{theorem}
\begin{property}[Estimators properties in the linear model]
According to \autoref{thm:cochran},
\[
\hat{m} \text{ is independent from $\hat{\sigma}^2$}
\]
\[
\norm{\Y - \Pi_V(\Y)}^2 = \norm{\varepsilon - \Pi_{V}(\varepsilon)}^{2} = \norm{\Pi_{V}^{\perp} (\varepsilon)}^{2}
\]
$\hat{m} = \X \hat{\beta}$
$\hat{m}$ is the estimation of the mean.
\end{property}
\begin{definition}[Chi 2 distribution]
If $X_1, \ldots, X_n$ i.i.d. $\sim \Norm(0, 1)$, then;,
\[
X_1^2 + \ldots X_n^2 \sim \chi_n^2
\]
\end{definition}
\subsection{Estimator's properties}
\[
\Pi_V = \X(\X^T\X)^{-1} \X^T
\]
\begin{align*}
\hat{m} &= \X \hat{\beta} = \X(\X^T\X)^{-1} \X^T \Y \\
\intertext{so} \\
&= \Pi_V \Y
\end{align*}
According to Cochran theorem, we can deduce that the estimator of the predicted value $\hat{m}$ is independent $\hat{\sigma}^2$
All the sum of squares follows a $\chi^2$ distribution.
\subsection{Estimators properties}
\begin{itemize}
\item $\hat{m}$ is unbiased and estimator of $m$;
\item $\EE(\hat{\sigma}^{2}) = \sigma^{2}(n-q)/n$ $\hat{\sigma}^{2}$ is a biased estimator of $\sigma^{2}$.
\[
S^{2} = \frac{1}{n-q} \norm{\Y - \Pi_{V}}^{2}
\]
is an unbiased estimator of $\sigma²$.
\end{itemize}
We can derive statistical test from these properties.
\section{Statistical tests}
\subsection{Student $t$-test}
\[
\frac{\hat{\theta}-\theta}{\sqrt{\frac{\widehat{\VVar}(\hat{\theta})}{n}}} \underset{H_0}{\sim} t_{n-q}
\]
where
\paragraph{Estimation of $\sigma^2$}
A biased estimator of $\sigma^2$ is:
\[
\hat{\sigma^2} = ?
\]
$S^2$ is the unbiased estimator of $\sigma^2$
\begin{align*}
S^2 &= \frac{1}{n-q} \norm{\Y - \Pi_V(\Y)}^2 \\
&= \frac{1}{n-q} \sum_{i=1}^n (Y_i - (\X\hat{\beta})_i)^2
\end{align*}
\begin{remark}[On $\hat{m}$]
\begin{align*}
&\Y = \X \beta + \varepsilon
\Leftrightarrow& \EE(\Y) = \X \beta
\end{align*}
\end{remark}
\section{Student test of nullity of a parameter}
Let $\beta_j$ be a parameter, the tested hypotheses are as follows:
\[
\begin{cases}
(H_0): \beta_j = 0 \\
(H_1): \beta_j \neq 0
\end{cases}
\]
Under the null hypothesis:
\[
\frac{\hat{\beta}_j - \beta_j}{S \sqrt{(\X^T \X)^1_{j,j}}} \sim \St(n-q).
\]
The test statistic is:
\[
W_n = \frac{\hat{\beta}_j}{S \sqrt{(\X^T\X)^{-1}_{j,j}}} \underset{H_0}{\sim} \St(n-q).
\]
$\hat{\beta}$ is a multinormal vector.
Let's consider a vector of 4 values:
\begin{align*}
\begin{pmatrix}
\hat{\beta}_0 \\
\hat{\beta}_1 \\
\hat{\beta}_2 \\
\hat{\beta}_3
\end{pmatrix}
\sim \Norm_4 \left( \begin{pmatrix}
\beta_0 \\
\beta_1 \\
\beta_2 \\
\beta_3
\end{pmatrix} ;
\sigma^2 \left(\X^T \X\right)^{-1}
\right)
\end{align*}
Let $\M$ be the following model
\begin{align*}
Y_i &= \beta_0 + \beta_1 X_{1i} + \beta_2 X_{2i} + \beta_3 X_{3i} + \varepsilon_i
\end{align*}
Why can't we use the following model to test each of the parameters values (here for $X_2$)?
\[
Y_i = \theta_0 + \theta_1 X_{2i} + \varepsilon_i
\]
We can't use such a model, we would probably meet a confounding factor: even if we are only interested in relationship $X_2$ with $Y$, we have to fit the whole model.
\begin{example}[Confounding parameter]
Let $Y$ be a variable related to the lung cancer. Let $X_1$ be the smoking status, and $X_2$ the variable `alcohol' (for instance the quantity of alcohol drunk per week).
If we only fit the model $\M: Y_i = \theta_0 + \theta_1 X_{2i} + \varepsilon_i$, we could conclude for a relationship between alcohol and lung cancer, because alcohol consumption and smoking is strongly related. If we had fit the model $\M = Y_i = \theta_0 + \theta_1 X_{1i} + \theta_2 X_{2i} + \varepsilon_i$, we could indeed have found no significant relationship between $X_2$ and $Y$.
\end{example}
\begin{definition}[Student law]
Let $X$ and $Y$ be two random variables such as $X \indep Y$, and such that $X \sim \Norm(0, 1)$ and $Y \sim \chi_n^2$, then
\[
\frac{X}{\sqrt{Y}} \sim \St(n)
\]
\end{definition}
\subsection{Model comparison}
\begin{definition}[Nested models]
\end{definition}
Let $\M_2$ and $\M_4$ be two models:
$\M_2: Y_i = \beta_0 + \beta_3 X_{3_i} + \varepsilon_i$
$\M_4: Y_i = \beta_0 + \beta_1 X_{1i} + \beta_2 X_{2i} + \beta_3 X_{3i} + \varepsilon_i$
$\M_2$ is nested in $\M_4$.
\paragraph*{Principle} We compare the residual variances of the two models, that is, the variance that is not explained by the model.
The better the model is, the smallest the variance would be.
If everything is explained by the model, the residual variance would be null.
Here $\M_4$ holds all the information found in $\M_2$ plus other informations. In the worst case It would be at least as good as $\M_2$.
\subsection{Fisher $F$-test of model comparison}
Let $\M_q$ and $\M_{q'}$ be two models such as $\dim(\M_q) = q$, $\dim(\M_{q'}) = q'$, $q > q'$ and $\M_{q'}$ is nested in $\M_q$.
\paragraph{Tested hypotheses}
\[
\begin{cases}
(H_0): \M_{q'} \text{ is the proper model} \\
(H_1): \M_q \text{ is a better model}
\end{cases}
\]
\begin{description}
\item[ESS] Estimated Sum of Squares
\item[RSS] Residual Sum of Squares
\item[EMS] Estimates Mean Square
\item[RMS] Residual Mean Square
\end{description}
\[
ESS = RSS(\M_{q'}) - RSS(\M_q)
\]
\[
RSS(\M) = \norm{\Y - \X\hat{\beta}} = \sum_{i=1}^n \hat{\varepsilon}_i^2
\]
\[
EMS = \frac{ESS}{q - q'}
\]
\[
RMS = \frac{RSS(\M_q)}{n-q}
\]
Under the null hypotheses:
\[
F = \frac{EMS}{RMS} \underset{H_0}{\sim} \Fish(q-q'; n-q)
\]
\section{Model validity}
Assumptions:
\begin{itemize}
\item $\X$ is a full rank matrix;
\item Residuals are i.i.d. $\varepsilon \sim \Norm(0_n, \sigma^2 \mathcal{I}_n)$;
\end{itemize}
We have also to look for influential variables.
\subsection{$\X$ is full rank}
To check that the rank of the matrix is $p+1$, we can calculate the eigen value of the correlation value of the matrix. If there is a perfect relationship between two variables (two columns of $\X$), one of the eigen value would be null. In practice, we never get a null eigen value. We consider the condition index as the ratio between the largest and the smallest eigenvalues, if the condition index $\kappa = \frac{\lambda_1}{\lambda_p}$, with $\lambda_1 \geq \lambda_2 \geq \ldots \geq \lambda_p$ the eigenvalues.
If all eigenvalues is different from 0, $\X^T \X$ can be inverted, but the estimated parameter variance would be large, thus the estimation of the parameters would be not relevant (not good enough).
\paragraph{Variance Inflation Factor}
Perform a regression of each of the predictors against the other predictors.
If there is a strong linear relationship between a parameter and the others, it would reflect that the coefficient of determination $R^2$ (the amount of variance explained by the model) for this model, which would mean that there is a strong relationship between the parameters.
We do this for all parameters, and for parameter $j = 1, \ldots, p$, the variance inflation factor would be:
\[
VIF_j = \frac{1}{1-R^2_j}.
\]
\subparagraph*{Rule}
If $VIF > 10$ or $VIF > 100$\dots
In case of multicollinearity, we have to remove the variable one by one until there is no longer multicollinearity.
Variables have to be removed based on statistical results and through discussion with experimenters.
\subsection{Residuals analysis}
\paragraph*{Assumption}
\[
\varepsilon \sim \Norm_n(0_n, \sigma^2 I_n)
\]
\paragraph{Normality of the residuals} If $\varepsilon_i$ ($i=1, \ldots, n$) could be observed we could build a QQ-plot of $\varepsilon_i / \sigma$ against quantiles of $\Norm(0, 1)$.
Only the residual errors $\hat{e}_i$ can be observed:
Let $e_i^*$ be the studentized residual, considered as estimators of $\varepsilon_i$
\[
e_i^* = \frac{\hat{e}_i}{\sqrt{\sigma^2_{(i)(1-H_{ii})}}}
\]
\begin{align*}
\hat{Y} &= X \hat{\beta} \\
&= X \left( (X^TX)^{-1} X^T Y\right) \\
&= \underbrace{X (X^TX)^{-1} X^T}_{H} Y
\end{align*}
\paragraph{Centered residuals} If $(1, \ldots, 1)^T$ belongs to $\X$ $\EE(\varepsilon) = 0$, by construction.
\paragraph{Independence} We do not have a statistical test for independence in R, we would plot the residuals $e$ against $\X \hat{\beta}$.
\paragraph{Homoscedastiscity} Plot the $\sqrt{e^*}$ against $\X \hat{\beta}$.
\paragraph{Influential observations}
We make the distinction between observations:
\begin{itemize}
\item With too large residual
$\rightarrow$ Influence on the estimation of $\sigma^2$
\item Which are too isolated
$\rightarrow$ Influence on the estimation of $\beta$
\end{itemize}
\[
e_i^* \sim \St(n-p-1)
\]
\subparagraph*{Rule} We consider an observation to be aberrant if:
\[
e_i^* > \F^{-1}_{\St(n-p-1)}(1-\alpha)
\]
quantile of order $1-\alpha$, $\alpha$ being often set as $1/n$, or we set the threshold to 2.
\paragraph{Leverage} Leverage is the diagonal term of the orthogonal projection matrix(?) $H_{ii}$.
\begin{property}
\begin{itemize}
\item $0 \leq H_{ii} \leq 1$
\item $\sum_i H_ii = p$
\end{itemize}
\end{property}
\subparagraph*{Rule} We consider that the observation is aberrant if the leverage is ??.
\paragraph{Non-linearity}
\section{Model Selection}
We want to select the best model with the smallest number of predictors.
When models have too many explicative variables, the power of statistical tests decreases.
Different methods:
\begin{itemize}
\item Comparison of nested models;
\item Information criteria;
\item Method based on the prediction error.
\end{itemize}
\subsection{Information criteria}
\subsubsection{Likelihood}
\begin{definition}[Likelihood]
Probability to observe what we observed for a particular model.
\[
L_n (\M(k))
\]
\end{definition}
\begin{definition}[Akaike Information Criterion]
\[
AIC(\M(k)) = -2 \log L_n (\M(k)) + 2k.
\]
$2k$ is a penalty, leading to privilege the smallest model.
\end{definition}
\begin{definition}[Bayesian Information Criterion]
\[
BIC(\M(k)) = -2 \log L_n (\M(k)) + \log(n) k.
\]
$\log(n) k$ is a penalty.
\end{definition}
Usually $AIC$ have smaller penalty than $BIC$, thus $AIC$ criterion tends to select models with more variables than $BIC$ criterion.
\subsection{Stepwise}
\begin{description}
\item[forward] Add new predictor iteratively, beginning with the most contributing predictors.
\item[backward] Remove predictors iteratively.
\item[stepwise] Combination of forward and backward selection. We start by no predictors. We add predictor. Before adding the predictor, we check whether all previously predictors remain meaningful.
\end{description}
The problem with this iterative regression, is that at each step we make a test. We have to reduce the confidence level for multiple test.
In practice, the multiple testing problem is not taken into account in these approaches.
We can use information criteria or model comparison in these methods.
\section{Predictions}
Let $X_i$ the $i$-th row of the matrix $\X$. The observed value $Y_i$ can be estimated by:
\[
\hat{Y}_i = (\X \hat{\beta})_i = X_i \hat{\beta}
\]
\begin{align*}
\EE (\hat{Y}_i) &= (\X \beta)_i = X_i \beta \\
\sigma^{-1} (\X \hat{\beta} - \X \beta) \sim \Norm (0_{p+1}, (\X^T \X)^{-1}), \qquad \text{and} \\
\Var(\hat{Y}_i) = ... \\
S^2 = \norm{...}
\end{align*}
\paragraph{Prediction Confidence Interval}
We can build confidence interval for predicted values $(\X \hat{\beta})_i$
\dots
\paragraph{Prediction error of $Y$}
\paragraph{Prediction interval for a new observation $Y_{n+1}$}

186
content/chapters/part1/2.tex Executable file
View File

@ -0,0 +1,186 @@
\chapter{Generalized Linear Model}
\begin{example}
\begin{description}
\item[Ex. 1 - Credit Carb Default]
Let $Y_i$ be a boolean random variable following a Bernoulli distribution.
\item[Ex. 2 - Horseshoe Crabs]
Let $Y_i$, be the number of satellites males.
$Y_i$ can be described as following a Poisson distribution.
\end{description}
\end{example}
\begin{remark}
A Poisson distribution can be viewed as an approximation of binomial distribution when $n$ is high and $p$ low.
\end{remark}
We will consider the following relation:
\[
\EE(Y_i) = g^{-1} X_i \beta,
\]
equivalently:
\[
g(\EE(Y_i)) = X_i \beta.
\]
\begin{itemize}
\item $\beta$ is estimated by the maximum likelihood;
\item $g$ is called the link function.
\end{itemize}
\begin{remark}
In standard linear model, the OLS estimator is the estimator of maximum of likelihood.
\end{remark}
\section{Logistic Regression}
\begin{align*}
& \log(\frac{\Pi}{1 - \Pi}) & = \X \beta \\
\Leftrightarrow & e^{\ln \frac{\Pi}{1 - \Pi}} = e^{\X \beta} \\
\Leftrightarrow & \frac{\Pi}{1 - \Pi} = e^{\X \beta} \\
\Leftrightarrow & \Pi = (1 - \Pi) e^{\X\beta} \\
\Leftrightarrow & \Pi = e^{\X \beta} - \Pi e^{\X\beta} \\
\Leftrightarrow & \Pi + \Pi e^{\X\beta} = e^{\X \beta} \\
\Leftrightarrow & \Pi (1 - e^{\X\beta}) = e^{\X \beta} \\
\Leftrightarrow & \Pi = \frac{e^{\X\beta}}{1 + e^{\X \beta}}
\end{align*}
\section{Maximum Likelihood estimator}
log-likelihood: the probability to observe what we observe.
Estimate $\beta$ by $\hat{\beta}$ such that $\forall \beta \in \RR[p+1]$:
\[
L_n (\hat{\beta}) \geq L_n (\beta)
\]
These estimators are consistent, but not necessarily unbiased.
\section{Test for each single coordinate}
\begin{example}[Payment Default]
Let $Y_i$ be the default value for individual $i$.
\[
\log (\frac{\Pi (X)}{1 - \Pi (X)}) = \beta_0 + \beta_1 \text{student} + \beta_2 \text{balance} + \beta_3 \text{income}
\]
In this example, only $\beta_0$ and $\beta_2$ are significantly different from 0.
\end{example}
\begin{remark}
We do not add $\varepsilon_i$, because $\log(\frac{\Pi (X)}{1 - \Pi (X)})$ corresponds to the expectation.
\end{remark}
\subsection{Comparison of nested models}
To test $H_0:\: \beta_0 = \ldots = \beta_p = 0$, we use the likelihood ratio test:
\[
T_n = -2 \log (\mathcal{L}^{\texttt{null}}) + 2 \log (\mathcal{L}(\hat{\beta})) \underset{H_0}{\overunderset{\mathcal{L}}{n \to \infty}{\longrightarrow}} \chi^2(p).
\]
\begin{remark}[Family of Tests]
\begin{itemize}
\item Comparison of estimated values and values under the null hypothesis;
\item Likelihood ratio test;
\item Based on the slope on the derivative.
\end{itemize}
\end{remark}
\section{Relative risk}
$RR_i$ is the probably to have the disease, conditional to the predictor $X_{i1}$ over the probability of having the disease, conditional to the predictor $X_{i2}$.
\[
RR(j) = \frac{\Prob(Y_{i_1} = 1 \: | \: X_{i_1})}{\Prob(Y_{i_2} = 1) \: | \: X_{i_2}} = \frac{\EE(Y_{i_1})}{\EE(Y_{i_2})}.
\]
$\pi(X_i)$ is the probability of having the disease, according to $X_i$.
The relative risk can be written as\dots
\section{Odds}
Quantity providing a measure of the likelihood of a particular outcome:
\[
odd = \frac{\pi(X_i)}{1 - \pi(X_i)}
\]
\[
odds = \exp(X_i \beta)
\]
odds is the ratio of people having the disease, if Y represent the disease, over the people not having the disease.
\section{Odds Ratio}
\begin{align*}
OR(j) =\frac{odds(X_{i_1})}{odds(X_{i_2})} & = \frac{\frac{\pi{X_{i_1}}}{1 - \pi(X_{i_1})}}{\frac{\pi{X_{i_2}}}{1 - \pi(X_{i_2})}}
\end{align*}
The OR can be written as:
\[
OR(j) = \exp(\beta_j)
\]
\begin{exercise}
Show that $OR(j) = \exp(\beta_j)$.
\end{exercise}
\begin{align*}
OR(j) & = \frac{odds(X_{i_1})}{odds(X_{i_2})} \\
& = \frac{\exp(X_{i_1} \beta)}{\exp(X_{i_2} \beta)} \\
\end{align*}
\[
\log \left(
\frac{\Prob(Y=1 \: |\: X_{i_1})}{1 - \Prob(Y=1 \: |\: X_{i_1})}\right)
= \beta_0 + \beta_1 X_1^{(1)} + \beta_2 X_2^{(1)} + \ldots + \beta_p X_p^{(1)}
\]
Similarly
\[
\log \left(
\frac{\Prob(Y=1 \: |\: X_{i_2})}{1 - \Prob(Y=1 \: |\: X_{i_2})}\right)
= \beta_0 + \beta_1 X_1^{(2)} + \beta_2 X_2^{(2)} + \ldots + \beta_p X_p^{(2)}
\]
We substract both equations:
\begin{align*}
&\log \left(
\frac{\Prob(Y=1 \: |\: X_{i_1})}{1 - \Prob(Y=1 \: |\: X_{i_1})} \right) - \log \left(\frac{\Prob(Y=1 \: |\: X_{i_2})}{1 - \Prob(Y=1 \: |\: X_{i_2})}\right) \\
& = \beta_0 + \beta_1 X_1^{(1)} + \beta_2 X_2^{(1)} + \ldots + \beta_p X_p^{(1)} - \beta_0 + \beta_1 X_1^{(2)} + \beta_2 X_2^{(2)} + \ldots + \beta_p X_p^{(2)} \\
& = \log OR(j) \\
& = \cancel{(\beta_0 - \beta_0)} + \beta_1 \cancel{(X_1^{(1)} - X_1^{(2)})} + \beta_2 \cancel{(X_2^{(1)} - X_2^{(2)})} + \ldots + \beta_j \cancelto{1}{(X_j^{(1)} - X_j^{(2)})} + \ldots + \beta_p \cancel{(X_p^{(1)} - X_p^{(2)})} \\
&\Leftrightarrow \log (OR_j) = \beta_j \\
&\Leftrightarrow OR(j) = \exp(\beta_j)
\end{align*}
OR is not equal to RR, except in the particular case of probability (?)
If OR is significantly different from 1, the $\exp(\beta_j)$ is significantly different from 1, thus $\beta_j$ is significantly different from 0.
If we have more than two classes, we do not know what means $X_{i_1} - X_{i_2} = 0$. We will have to take a reference class, and compare successively each class with the reference class.
$\hat{\pi}(X_{+}) = \hat{\Prob(X=1 \: | X_{i1})}$ for a new individual.
\section{Poisson model}
Let $Y_{i} \sim \mathcal{P}(\lambda_{i})$, corresponding to a counting.
\begin{align*}
\EE(Y_{i}) & = g^{-1}(X_{i} \beta) \\
\Leftrightarrow g(\EE(Y_{i})) = X_{i} \beta
\end{align*}
where $g(x) = \ln(x)$, and $g^{-1}(x) = e^{x}$.
\[
\lambda_{i} = \EE(Y_{i}) = \Var(Y_{i})
\]

26
content/chapters/part1/3.tex Executable file
View File

@ -0,0 +1,26 @@
\chapter{Tests Reminders}
\section{\texorpdfstring{$\chi^2$}{chi2} test of independence}
[...]
\section{\texorpdfstring{$\chi^2$}{chi2} test of goodness of fit}
Check if the observations is in adequation with a particular distribution.
\begin{example}[Mendel experiments]
Let $AB$, $Ab$, $aB$, $ab$ be the four possible genotypes of peas: colors and grain shape.
\begin{tabular}{cccc}
\toprule
AB & Ab & aB & ab \\
\midrule
315 & 108 & 101 & 32 \\
\bottomrule
\end{tabular}
\end{example}
The test statistics is:
\[
D_{k,n} = \sum_{i=1}^{k} \frac{(N_i - np_i)^2}{np_i} \underset{H_0}{\overunderset{\mathcal{L}}{n \to \infty}{\longrightarrow}} \chi^2_{(n-1)(q-1)??}
\]

125
content/chapters/part1/4.tex Executable file
View File

@ -0,0 +1,125 @@
\chapter{Regularized regressions}
Let $\Y$ be a vector of observations and $\X$ a matrix of dimension $n \times (p+1)$.
Suppose the real model is:
\[
\Y = \X^{m^{*}} \beta^{m^{*}} + \varepsilon^{m^{*}} = \X^{*} \beta^{*} + \varepsilon^{*}.
\]
if $p$ is large compared to $n$:
\begin{itemize}
\item $\hat{\beta} = (\X^{T}\X)^{-1} \X^{T} \Y$ is not defined as $\X^{T}\X$ is not invertible.
$m^{*}$ is the number of true predictors, that is, the number of predictor with non-zero values.
\item
\item
\end{itemize}
\section{Ridge regression}
Instead of minimizing the mean square error, we want to minimize the following regularize expression:
\[
\hat{\beta}^{\text{ridge}}_{\lambda} = \argmin_{\beta \in \RR[p]} \norm{Y - X \beta}^{2} \lambda \sum_{j=1}^{p} \beta_{j}^{2}
\]
it is a way to favor the solution with small values for parameters.
where $\lambda$ is used to callibrate the regularization.
\[
\sum_{j=1}^{p} \beta_{j}^{2} = \norm{\beta_{j}}^{2}
\]
is the classical square norm of the vector.
\section{Cross validation}
\subsection{Leave-one-out \textit{jackknife}}
\begin{example}
Let $\M_{0}$ be the model $Y_{i} = \beta_{0} + \beta_{1} X_{1i} + \beta_{2}X_{2i} + \beta_{3} X_{3i}$
The model will be:
\[
\begin{pmatrix}
y_{1} \\
y_{2} \\
y_{3} \\
y_{4} \\
y_{5}
\end{pmatrix} =
\beta_{0} + \beta_{1} \begin{pmatrix}
x_{11} \\
x_{12} \\
x_{13} \\
x_{14} \\
x_{15}
\end{pmatrix}
+ \beta_{2} \begin{pmatrix}
x_{21} \\
x_{22} \\
x_{23} \\
x_{24} \\
x_{25}
\end{pmatrix}
+
\beta_{3} \begin{pmatrix}
x_{31} \\
x_{32} \\
x_{33} \\
x_{34} \\
x_{35}
\end{pmatrix}
\]
\def\x{$\times$}
\begin{tabular}{ccccc}
\toprule
1 & 2 & 3 & 4 & 5 \\
\midrule
. & \x & \x & \x & \x \\
\x & . & \x & \x & \x \\
\x & \x & . & \x & \x \\
\x & \x & \x & . & \x \\
\x & \x & \x & \x & . \\
\bottomrule
\end{tabular}
\end{example}
We perform computation of $\lambda$ for each dataset without one observation.
\subsection{K-fold cross-validation}
We will have as many tables as subsets.
We chose lambda such that the generalization error is the smallest.
\section{Lasso regression}
The difference with the Ridge regression lies in the penalty:
\[
\hat{\beta}_{\lambda}^{\text{lasso}}= \argmin \norm{Y-X\beta}^{2} + \sum_{j=1}^{p} \abs{\beta_{j}}
\]
$\sum_{j=1}^{p} \abs{\beta_j} = \norm{\beta}_1$
Instead of having a smooth increasing for each parameters, each parameters will enter iteratively in the model. Some parameters can be set to 0.
Lasso regression can be used to perform variable selection.
We can use the same methods (K-fold and Leave-one-out) to select the $\lambda$ value.
\section{Elastic Net}
Combination of the Ridge and Lasso regression:
\[
\hat{\beta}_\lambda^{en} = \argmin \norm{Y-X\beta}^{2} + \lambda_{1} \norm{\beta}_{1} + \lambda_{2} \norm{\beta}_{2}^{2}
\]
\begin{remark}
In the case of Lasso, Elastic net or Ridge regression, we can no longer perform statistical test on the parameters.
\end{remark}

2
content/chapters/part2/0.tex Executable file
View File

@ -0,0 +1,2 @@
\part{Linear Algebra}

220
content/chapters/part2/1.tex Executable file
View File

@ -0,0 +1,220 @@
\chapter{Elements of Linear Algebra}
\label{ch:elements-of-linear-algebra}
\begin{remark}[vector]
Let $u$ a vector, we will use interchangeably the following notations: $u$ and $\vec{u}$
\end{remark}
Let $u = \begin{pmatrix}
u_1 \\
\vdots \\
u_n
\end{pmatrix}$ and $v = \begin{pmatrix}
v_1 \\
\vdots \\
v_n
\end{pmatrix}$
\begin{definition}[Scalar Product (Dot Product)]
\begin{align*}
\scalar{u, v} & = \begin{pmatrix}
u_1, \ldots, u_v
\end{pmatrix}
\begin{pmatrix}
v_1 \\
\vdots \\
v_n
\end{pmatrix} \\
& = u_1 v_1 + u_2 v_2 + \ldots + u_n v_n
\end{align*}
We may use $\scalar{u, v}$ or $u \cdot v$ notations.
\end{definition}
\paragraph{Dot product properties}
\begin{description}
\item[Commutative] $\scalar{u, v} = \scalar{v, u}$
\item[Distributive] $\scalar{(u+v), w} = \scalar{u, w} + \scalar{v, w}$
\item $\scalar{u, v} = \norm{u} \times \norm{v} \times \cos(\widehat{u, v})$
\item $\scalar{a, a} = \norm{a}^2$
\end{description}
\begin{definition}[Norm]
Length of the vector.
\[
\norm{u} = \sqrt{\scalar{u, v}}
\]
$\norm{u, v} > 0$
\end{definition}
\begin{definition}[Distance]
\[
dist(u, v) = \norm{u-v}
\]
\end{definition}
\begin{definition}[Orthogonality]
\end{definition}
\begin{remark}
\[
(dist(u, v))^2 = \norm{u - v}^2,
\] and
\[
\scalar{v-u, v-u}
\]
\end{remark}
\begin{figure}
\centering
\includegraphics{figures/schemes/vector_orthogonality.pdf}
\caption{Scalar product of two orthogonal vectors.}
\label{fig:scheme-orthogonal-scalar-product}
\end{figure}
\begin{align*}
\scalar{v-u, v-u} & = \scalar{v, v} + \scalar{u, u} - 2 \scalar{u, v} \\
& = \norm{v}^2 + \norm{u}^2 \\
& = -2 \scalar{u, v}
\end{align*}
\begin{align*}
\norm{u - v}^2 & = \norm{u}^2 + \norm{v}^2 - 2 \scalar{u,v} \\
\norm{u + v}^2 & = \norm{u}^2 + \norm{v}^2 + 2 \scalar{u,v}
\end{align*}
\begin{proposition}[Scalar product of orthogonal vectors]
\[
u \perp v \Leftrightarrow \scalar{u, v} = 0
\]
\end{proposition}
\begin{proof}[Indeed]
$\norm{u-v}^2 = \norm{u+v}^2$, as illustrated in \autoref{fig:scheme-orthogonal-scalar-product}.
\begin{align*}
\Leftrightarrow & -2 \scalar{u, v} = 2 \scalar{u, v} \\
\Leftrightarrow & 4 \scalar{u, v} = 0 \\
\Leftrightarrow & \scalar{u, v} = 0
\end{align*}
\end{proof}
\begin{theorem}[Pythagorean theorem]
If $u \perp v$, then $\norm{u+v}^2 = \norm{u}^2 + \norm{v}^2$ .
\end{theorem}
\begin{definition}[Orthogonal Projection]
\end{definition}
Let $y = \begin{pmatrix}
y_1 \\
. \\
y_n
\end{pmatrix} \in \RR[n]$ and $w$ a subspace of $\RR[n]$.
$\mathcal{Y}$ can be written as the orthogonal projection of $y$ on $w$:
\[
\mathcal{Y} = proj^w(y) + z,
\]
where
\[
\begin{cases}
z \in w^\perp \\
proj^w(y) \in w
\end{cases}
\]
There is only one vector $\mathcal{Y}$ that ?
The scalar product between $z$ and (?) is zero.
\begin{property}
$proj^w(y)$ is the closest vector to $y$ that belongs to $w$.
\end{property}
\begin{definition}[Matrix]
A matrix is an application, that is, a function that transform a thing into another, it is a linear function.
\end{definition}
\begin{example}[Matrix application]
Let $A$ be a matrix:
\[
A = \begin{pmatrix}
a & b \\
c & d
\end{pmatrix}
\] and
\[
x = \begin{pmatrix}
x_1 \\
x_2
\end{pmatrix}
\]
Then,
\begin{align*}
Ax & = \begin{pmatrix}
a & b \\
c & d
\end{pmatrix}
\begin{pmatrix}
x_1 \\
x_2
\end{pmatrix} \\
& = \begin{pmatrix}
a x_1 + b x_2 \\
c x_1 + d x_2
\end{pmatrix}
\end{align*}
Similarly,
\begin{align*}
\begin{pmatrix}
a & b & c & d \\
e & f & g & h \\
i & j & k & l
\end{pmatrix}
\begin{pmatrix}
x_1 \\
x_2 \\
x_3 \\
x_4
\end{pmatrix}
=
\begin{pmatrix}
\luadirect{
local matrix_product = require("scripts.matrix_product")
local m1 = {
{"a", "b", "c", "d"},
{"e", "f", "g", "h"},
{"i", "j", "k", "l"}
}
local m2 = {
{"x_1"},
{"x_2"},
{"x_3"},
{"x_4"}
}
local product_matrix = matrix_product.matrix_product_repr(m1,m2)
local matrix_dump = matrix_product.dump_matrix(product_matrix)
tex.print(matrix_dump)
}
\end{pmatrix}
\end{align*}
\end{example}
The number of columns has to be the same as the dimension of the vector to which the matrix is applied.
\begin{definition}[Tranpose of a Matrix]
Let $A = \begin{pmatrix}
a & b \\
c & d
\end{pmatrix}$, then $A^T = \begin{pmatrix}
a & c \\
b & d
\end{pmatrix}$
\end{definition}
\begin{figure}
\centering
\includegraphics{figures/schemes/coordinates_systems.pdf}
\caption{Coordinate systems}
\end{figure}

0
content/conclusion.tex Executable file
View File

35
content/introduction.tex Executable file
View File

@ -0,0 +1,35 @@
\chapter{Introduction}
\begin{definition}[Long Term Nonprocessor (LTNP)]
Patient who will remain a long time in good health condition, even with a large viral load (cf. HIV).
\end{definition}
\begin{example}[Genotype: Qualitative or Quantitative?]
\[
\text{SNP}:
\begin{cases}
\text{AA} \\
\text{AB} \\
\text{BB}
\end{cases}
\rightarrow
\begin{pmatrix}
0 \\
1 \\
2
\end{pmatrix},
\]
thus we might consider genotype either as a qualitative variable or quantitative variable.
\end{example}
When the variable are quantitative, we use regression, whereas for qualitative variables, we use an analysis of variance.
\begin{figure}
\begin{subfigure}{0.45\columnwidth}
\includegraphics[width=\columnwidth]{figures/plots/linear_regression_linear.pdf}
\end{subfigure}
\begin{subfigure}{0.45\columnwidth}
\includegraphics[width=\columnwidth]{figures/plots/linear_regression_non_linear.pdf}
\end{subfigure}
\caption{Illustration of two models fitting observed values}
\end{figure}

12
definitions.tex Executable file
View File

@ -0,0 +1,12 @@
\DeclareMathOperator{\VVar}{\mathbb{V}} % variance
\DeclareMathOperator{\One}{\mathbf{1}}
\DeclareMathOperator{\Cor}{\mathrm{Cor}}
\DeclareMathOperator{\St}{\mathscr{St}}
\newcommand{\M}[1][]{\ensuremath{\ifstrempty{#1}{\mathcal{M}}{\mathbb{M}_{#1}}}}
\newcommand{\X}{\ensuremath{\mathbf{X}}}
\newcommand{\Y}{\ensuremath{\mathbf{Y}}}
\newcommand{\Z}{\ensuremath{\mathbf{Z}}}
\DeclareMathOperator*{\argmax}{arg\,max}
\DeclareMathOperator*{\argmin}{arg\,min}
\usepackage{unicode-math}

View File

@ -0,0 +1,26 @@
# Plot an affine model
n <- 250
sd <- 0.05
epsilon <- rnorm(n, mean = 0, sd = 2)
beta0 <- 1.25
beta1 <- 4
linear_model <- function(x) {
return(beta0 + beta1*x)
}
x <- runif(n, min=0, max=1)
y <- linear_model(x) + epsilon
pdf("figures/plots/linear_regression_linear.pdf")
plot(x, y, col="#5654fa", type="p", pch=20, xlab="x", ylab="y")
abline(a = beta0, b = beta1, col="red")
dev.off()
non_linear_model <- function(x) {
return(beta0 + beta1 * exp(2*x))
}
non_linear_y <- non_linear_model(x) + epsilon
pdf("figures/plots/linear_regression_non_linear.pdf")
plot(x, non_linear_y, col="#5654fa", type="p", pch=20, xlab="x", ylab="z")
curve(non_linear_model, from=0, to=1, add=T, col="red")
dev.off()

BIN
figures/plots/linear_regression_linear.pdf (Stored with Git LFS) Executable file

Binary file not shown.

BIN
figures/plots/linear_regression_non_linear.pdf (Stored with Git LFS) Executable file

Binary file not shown.

BIN
figures/plots/logistic_curve.pdf (Stored with Git LFS) Executable file

Binary file not shown.

View File

@ -0,0 +1,23 @@
\documentclass[margin=0.5cm]{standalone}
\usepackage{tikz}
\usepackage{pgfplots}
\pgfplotsset{compat=1.18}
\begin{document}
\begin{tikzpicture}
\begin{axis}[
title={Logit function},
xlabel={$x$},
ylabel={$y$},
domain=-5:5,
samples=200,
legend style={at={(0.95,0.05)},anchor=south east}
]
\newcommand{\Lvar}{1}
\newcommand{\kvar}{1}
\newcommand{\xvar}{0}
\addplot [blue] {\Lvar / (1 + exp(-\kvar*(x-\xvar)))};
\addlegendentry{$L = \Lvar, k=\kvar, x_0=\xvar$};
\end{axis}
\end{tikzpicture}
\end{document}

3
figures/schemes/.gitattributes vendored Executable file
View File

@ -0,0 +1,3 @@
covariance.pdf filter=lfs diff=lfs merge=lfs -text
../plots/linear_regression_linear.pdf filter=lfs diff=lfs merge=lfs -text
../plots/linear_regression_non_linear.pdf filter=lfs diff=lfs merge=lfs -text

BIN
figures/schemes/base_plan.pdf (Stored with Git LFS) Executable file

Binary file not shown.

16
figures/schemes/base_plan.tex Executable file
View File

@ -0,0 +1,16 @@
\documentclass[margin=0.5cm]{standalone}
\usepackage{tikz}
\usepackage{tkz-euclide}
\begin{document}
\usetikzlibrary{3d}
\begin{tikzpicture}
\tkzDefPoint(-2,-2){A}
\tkzDefPoint(10:3){B}
\tkzDefShiftPointCoord[B](1:5){C}
\tkzDefShiftPointCoord[A](1:5){D}
\tkzDrawPolygon(A,...,D)
\tkzDrawPoints(A,...,D)
\node at (A) {A};
\end{tikzpicture}
\end{document}

BIN
figures/schemes/coordinates_systems.pdf (Stored with Git LFS) Executable file

Binary file not shown.

View File

@ -0,0 +1,23 @@
\documentclass[tikz]{standalone}
\usepackage{tikz}
\begin{document}
\usetikzlibrary{3d}
% 1D axis
\begin{tikzpicture}[->]
\begin{scope}[xshift=0]
\draw (0, 0, 0) -- (xyz cylindrical cs:radius=1) node[right] {$x$};
\end{scope}
% 2D coordinate system
\begin{scope}[xshift=50]
\draw (0, 0, 0) -- (xyz cylindrical cs:radius=1) node[right] {$x$};
\draw (0, 0, 0) -- (xyz cylindrical cs:radius=1,angle=90) node[above] {$y$};
\end{scope}
% 3D coordinate systems
\begin{scope}[xshift=100]
\draw (0, 0, 0) -- (xyz cylindrical cs:radius=1) node[right] {$x$};
\draw (0, 0, 0) -- (xyz cylindrical cs:radius=1,angle=90) node[above] {$y$};
\draw (0, 0, 0) -- (xyz cylindrical cs:z=1) node[below left] {$z$};
\end{scope}
\end{tikzpicture}
\end{document}

BIN
figures/schemes/covariance.pdf (Stored with Git LFS) Executable file

Binary file not shown.

35
figures/schemes/covariance.tex Executable file
View File

@ -0,0 +1,35 @@
% Scheme of Covariance
\documentclass[margin=0.5cm]{standalone}
\usepackage{tikz}
\usepackage{amssymb}
\begin{document}
\begin{tikzpicture}
\usetikzlibrary{positioning}
\tikzset{
point/.style = {circle, inner sep={.75\pgflinewidth}, opacity=1, draw, black, fill=black},
point name/.style = {insert path={coordinate (#1)}},
}
\begin{scope}[yshift=0]
\draw (-4, 0.5) -- (4,0.5) node[right] {$Y_i$};
\draw (-4, -0.5) -- (4,-0.5) node[right] {$Y_j$};
\node at (6, 0) {$\mathrm{Cov}(Y_i, Y_j) > 0$};
\node (EYipoint) at (0,0.5) {$\times$};
\node at (0, 1) {$\mathbb{E}(Y_i)$};
\node (EYipoint) at (0,-0.5) {$\times$};
\node at (0, -1) {$\mathbb{E}(Y_j)$};
\foreach \x in {-3, 0.5, 2.75} {
\node[point] at (\x, 0.5) {};
}
\foreach \x in {-2, -1, 3} {
\node[point] at (\x, -0.5) {};
}
\end{scope}
\begin{scope}[yshift=-100]
\draw (-4,0.5) -- (4,0.5) node[right] {$Y_i$};
\draw (-4,-0.5) -- (4,-0.5) node[right] {$Y_j$};
\node at (6, 0) {$\mathrm{Cov}(Y_i, Y_j) \approx 0$};
\end{scope}
\end{tikzpicture}
\end{document}

BIN
figures/schemes/ordinary_least_squares.pdf (Stored with Git LFS) Executable file

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.5 KiB

View File

@ -0,0 +1,988 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<svg
width="175.798"
height="170.477"
viewBox="0 0 175.798 170.477"
version="1.1"
id="svg88"
sodipodi:docname="ordinary_least_squares.svg"
inkscape:export-filename="ordinary_least_squares.png"
inkscape:export-xdpi="300"
inkscape:export-ydpi="300"
inkscape:version="1.3 (0e150ed6c4, 2023-07-21)"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns="http://www.w3.org/2000/svg"
xmlns:svg="http://www.w3.org/2000/svg">
<sodipodi:namedview
id="namedview88"
pagecolor="#ffffff"
bordercolor="#000000"
borderopacity="0.25"
inkscape:showpageshadow="2"
inkscape:pageopacity="0.0"
inkscape:pagecheckerboard="0"
inkscape:deskcolor="#d1d1d1"
inkscape:export-bgcolor="#ffffffff"
inkscape:zoom="3.7372438"
inkscape:cx="87.899002"
inkscape:cy="85.223233"
inkscape:window-width="1920"
inkscape:window-height="1011"
inkscape:window-x="0"
inkscape:window-y="32"
inkscape:window-maximized="1"
inkscape:current-layer="svg88" />
<defs
id="defs1">
<g
id="g1">
<g
id="glyph-0-0" />
<g
id="glyph-0-1">
<path
d="M 1.90625 -0.53125 C 1.90625 -0.8125 1.671875 -1.0625 1.390625 -1.0625 C 1.09375 -1.0625 0.859375 -0.8125 0.859375 -0.53125 C 0.859375 -0.234375 1.09375 0 1.390625 0 C 1.671875 0 1.90625 -0.234375 1.90625 -0.53125 Z M 1.90625 -0.53125 "
id="path1" />
</g>
</g>
</defs>
<path
fill="none"
stroke-width="0.3985"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(0%, 0%, 0%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 0.00165625 -0.00046875 L 141.275094 -0.00046875 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path2" />
<path
fill="none"
stroke-width="0.31879"
stroke-linecap="round"
stroke-linejoin="round"
stroke="rgb(0%, 0%, 0%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M -1.195979 1.593281 C -1.094416 0.995625 -0.00066625 0.101094 0.300115 -0.00046875 C -0.00066625 -0.098125 -1.094416 -0.996563 -1.195979 -1.594219 "
transform="matrix(1, 0, 0, -1, 156.62176, 156.105)"
id="path3" />
<path
fill="none"
stroke-width="0.3985"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(0%, 0%, 0%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 0.00165625 -0.00046875 L 0.00165625 141.276875 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path4" />
<path
fill="none"
stroke-width="0.31879"
stroke-linecap="round"
stroke-linejoin="round"
stroke="rgb(0%, 0%, 0%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M -1.194197 1.592094 C -1.096541 0.994438 0.001115 0.0999063 0.29799 -0.00165625 C 0.001115 -0.0993125 -1.096541 -0.99775 -1.194197 -1.595406 "
transform="matrix(0, -1, -1, 0, 15.346, 14.82924)"
id="path5" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 46.767281 66.046406 L 46.767281 76.855 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path6" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 59.880562 68.671406 L 59.880562 52.015156 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path7" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 35.450875 63.784687 L 35.450875 71.526875 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path8" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 122.404 81.175312 L 122.404 69.929219 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path9" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 3.532906 57.401875 L 3.532906 49.745625 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path10" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 108.482125 78.390156 L 108.482125 68.062031 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path11" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 19.236031 60.5425 L 19.236031 38.245625 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path12" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 107.814156 78.257344 L 107.814156 54.538594 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path13" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 78.087594 72.312031 L 78.087594 75.976094 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path14" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 105.345406 77.76125 L 105.345406 105.07375 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path15" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 31.017281 62.897969 L 31.017281 60.312031 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path16" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 73.509469 71.394062 L 73.509469 75.581562 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path17" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 78.696969 72.433125 L 78.696969 85.390156 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path18" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 89.134469 74.519062 L 89.134469 91.308125 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path19" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 82.732125 73.241719 L 82.732125 60.472187 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path20" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 117.583687 80.210469 L 117.583687 103.663594 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path21" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 136.829781 84.058125 L 136.829781 70.003437 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path22" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 17.001656 60.093281 L 17.001656 43.964375 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path23" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 125.950875 81.882344 L 125.950875 109.300312 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path24" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 73.302437 71.355 L 73.302437 94.800312 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path25" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 49.404 66.57375 L 49.404 54.245625 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path26" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 32.802437 63.253437 L 32.802437 62.362812 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path27" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 55.177437 67.73 L 55.177437 95.628437 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path28" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 80.2165 72.737812 L 80.2165 97.69875 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path29" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 78.911812 72.476094 L 78.911812 61.659687 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path30" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 130.681344 82.831562 L 130.681344 98.456562 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path31" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 108.232125 78.339375 L 108.232125 74.976094 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path32" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 49.529 66.601094 L 49.529 56.327656 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path33" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 23.986031 61.491719 L 23.986031 88.608906 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path34" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 16.298531 59.952656 L 16.298531 74.2925 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path35" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 35.868844 63.866719 L 35.868844 89.07375 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path36" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 94.482125 75.589375 L 94.482125 59.628437 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path37" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 27.876656 62.269062 L 27.876656 78.515156 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path38" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 61.513375 68.995625 L 61.513375 45.487812 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path39" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 109.154 78.522969 L 109.154 104.105 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path40" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 141.575875 85.007344 L 141.575875 63.390156 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path41" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 33.134469 63.319844 L 33.134469 48.030781 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path42" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 85.462594 73.784687 L 85.462594 80.765156 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path43" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 31.521187 62.999531 L 31.521187 44.23 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path44" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 79.845406 72.663594 L 79.845406 88.487812 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path45" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 67.775094 70.249531 L 67.775094 69.421406 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path46" />
<path
fill="none"
stroke-width="0.19925"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(75%, 75%, 75%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 78.626656 72.4175 L 78.626656 57.780781 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path47" />
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g47">
<use
xlink:href="#glyph-0-1"
x="60.728"
y="79.778"
id="use47" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g48">
<use
xlink:href="#glyph-0-1"
x="73.841"
y="104.616"
id="use48" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g49">
<use
xlink:href="#glyph-0-1"
x="49.412"
y="85.108"
id="use49" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g50">
<use
xlink:href="#glyph-0-1"
x="136.364"
y="86.704"
id="use50" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g51">
<use
xlink:href="#glyph-0-1"
x="17.494"
y="106.887"
id="use51" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g52">
<use
xlink:href="#glyph-0-1"
x="122.44"
y="88.57"
id="use52" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g53">
<use
xlink:href="#glyph-0-1"
x="33.198"
y="118.387"
id="use53" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g54">
<use
xlink:href="#glyph-0-1"
x="121.773"
y="102.094"
id="use54" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g55">
<use
xlink:href="#glyph-0-1"
x="92.049"
y="80.656"
id="use55" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g56">
<use
xlink:href="#glyph-0-1"
x="119.307"
y="51.558"
id="use56" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g57">
<use
xlink:href="#glyph-0-1"
x="44.979"
y="96.322"
id="use57" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g58">
<use
xlink:href="#glyph-0-1"
x="87.471"
y="81.051"
id="use58" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g59">
<use
xlink:href="#glyph-0-1"
x="92.655"
y="71.244"
id="use59" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g60">
<use
xlink:href="#glyph-0-1"
x="103.093"
y="65.325"
id="use60" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g61">
<use
xlink:href="#glyph-0-1"
x="96.693"
y="96.162"
id="use61" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g62">
<use
xlink:href="#glyph-0-1"
x="131.542"
y="52.971"
id="use62" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g63">
<use
xlink:href="#glyph-0-1"
x="150.79"
y="86.629"
id="use63" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g64">
<use
xlink:href="#glyph-0-1"
x="30.961"
y="112.667"
id="use64" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g65">
<use
xlink:href="#glyph-0-1"
x="139.911"
y="47.335"
id="use65" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g66">
<use
xlink:href="#glyph-0-1"
x="87.263"
y="61.833"
id="use66" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g67">
<use
xlink:href="#glyph-0-1"
x="63.363"
y="102.386"
id="use67" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g68">
<use
xlink:href="#glyph-0-1"
x="46.762"
y="94.27"
id="use68" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g69">
<use
xlink:href="#glyph-0-1"
x="69.139"
y="61.006"
id="use69" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g70">
<use
xlink:href="#glyph-0-1"
x="94.176"
y="58.936"
id="use70" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g71">
<use
xlink:href="#glyph-0-1"
x="92.871"
y="94.973"
id="use71" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g72">
<use
xlink:href="#glyph-0-1"
x="144.642"
y="58.179"
id="use72" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g73">
<use
xlink:href="#glyph-0-1"
x="122.192"
y="81.657"
id="use73" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g74">
<use
xlink:href="#glyph-0-1"
x="63.488"
y="100.304"
id="use74" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g75">
<use
xlink:href="#glyph-0-1"
x="37.946"
y="68.026"
id="use75" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g76">
<use
xlink:href="#glyph-0-1"
x="30.259"
y="82.34"
id="use76" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g77">
<use
xlink:href="#glyph-0-1"
x="49.831"
y="67.561"
id="use77" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g78">
<use
xlink:href="#glyph-0-1"
x="108.444"
y="97.003"
id="use78" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g79">
<use
xlink:href="#glyph-0-1"
x="41.836"
y="78.118"
id="use79" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g80">
<use
xlink:href="#glyph-0-1"
x="75.475"
y="111.147"
id="use80" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g81">
<use
xlink:href="#glyph-0-1"
x="123.115"
y="52.529"
id="use81" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g82">
<use
xlink:href="#glyph-0-1"
x="155.535"
y="93.242"
id="use82" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g83">
<use
xlink:href="#glyph-0-1"
x="47.094"
y="108.603"
id="use83" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g84">
<use
xlink:href="#glyph-0-1"
x="99.424"
y="75.869"
id="use84" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g85">
<use
xlink:href="#glyph-0-1"
x="45.483"
y="112.404"
id="use85" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g86">
<use
xlink:href="#glyph-0-1"
x="93.806"
y="68.145"
id="use86" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g87">
<use
xlink:href="#glyph-0-1"
x="81.735"
y="87.214"
id="use87" />
</g>
<g
fill="rgb(0%, 0%, 0%)"
fill-opacity="1"
id="g88">
<use
xlink:href="#glyph-0-1"
x="92.588"
y="98.852"
id="use88" />
</g>
<path
fill="none"
stroke-width="0.79701"
stroke-linecap="butt"
stroke-linejoin="miter"
stroke="rgb(0%, 0%, 100%)"
stroke-opacity="1"
stroke-miterlimit="10"
d="M 0.00165625 56.694844 L 141.732125 85.038594 "
transform="matrix(1, 0, 0, -1, 15.346, 156.105)"
id="path88" />
</svg>

After

Width:  |  Height:  |  Size: 24 KiB

View File

@ -0,0 +1,45 @@
\documentclass[margin=0.5cm]{standalone}
\usepackage{tikz}
\usepackage{luacode}
\begin{document}
\begin{tikzpicture}
% Draw axes
\draw[->] (0,0) -- (5,0);
\draw[->] (0,0) -- (0,5);
\directlua{
function runif(min, max)
return min + (max - min) * math.random()
end
math.randomseed(42)
x_min = 0
x_max = 5
error_min = -1
error_max = 1
beta0 = 2
beta1 = 1/5
x_values = {}
y_values = {}
for i=1,42 do
x = runif(x_min, x_max)
epsilon = runif(error_min, error_max)
y_hat = beta0 + beta1 * x
y = y_hat + epsilon
tex.print("\\draw[-,very thin, lightgray] ("..x..","..y_hat..") -- ("..x..","..y..") ;")
x_values[i] = x
y_values[i] = y
end
for i=1,42 do
x = x_values[i]
y = y_values[i]
tex.print("\\node[black] at ("..x..","..y..") {.};")
end
}
% Draw least square line
\draw[-,blue,thick] (0,2) -- (5,\directlua{tex.print(5*beta1+beta0)});
% Draw square norm
\end{tikzpicture}
\end{document}

BIN
figures/schemes/orthogonal_projection.pdf (Stored with Git LFS) Executable file

Binary file not shown.

View File

@ -0,0 +1,42 @@
% ref. https://tex.stackexchange.com/a/523362/235607
\documentclass[tikz]{standalone}
\usepackage{tikz-3dplot}
\usepackage{tkz-euclide}
\usepackage{mathtools}
\begin{document}
\tdplotsetmaincoords{50}{0}
\begin{tikzpicture}[tdplot_main_coords,bullet/.style={circle,inner
sep=1pt,fill=black,fill opacity=1}]
\begin{scope}[canvas is xy plane at z=0]
\tkzDefPoints{-2/-1/A,3/-1/B,4/2/C}
\tkzDefParallelogram(A,B,C)
\tkzGetPoint{D}
\tkzDrawPolygon[fill=gray!25!white](A,B,C,D)
\end{scope}
% Draw the rectangle triangle scheme
\begin{scope}[canvas is xz plane at y=1]
\draw[thick,fill=white,fill opacity=0.7,nodes={opacity=1}]
(2,0) node[bullet,label=right:{$\bar{\mathbf{Y}}$}] (Y_bar) {}
-- (0,-0.5) node (B) {}
-- (0,3) node[label=above:{$\mathbf{Y}$}] (Y) {} -- cycle;
% Right angle annotation
\tkzPicRightAngle[draw,
angle eccentricity=.5,angle radius=2mm](Y,B,Y_bar)
% epsilon: Y - X \hat{\beta} curly brackets annotations
\draw[decorate,decoration={brace,
amplitude=8pt},xshift=0pt,very thin,gray] (B) -- (Y) node [black,midway,xshift=-1.25em,yshift=0em] {\color{blue}$b$};
% X\hat{\beta} - \hat{Y}
\draw[decorate,decoration={brace,
amplitude=8pt},xshift=0pt,very thin,gray] (Y_bar) -- (B) node [black,midway,xshift=0.5em,yshift=-1em] {\color{blue}$a$};
%
\draw[decorate,decoration={brace,
amplitude=8pt},xshift=0pt,very thin,gray] (Y) -- (Y_bar) node [black,midway,xshift=1em,yshift=1em] {\color{blue}$c$};
\end{scope}
% Coordinate system
\begin{scope}[canvas is xy plane at z=0]
\draw[->] (2,1) -- node [above] {$\mathbf{1}$} ++(-1,0) ;
\draw[->] (2,1) -- ++(-0.45,-1) node [right] {$X_1$};
\end{scope}
\end{tikzpicture}
\end{document}

BIN
figures/schemes/regression_plan_3D.pdf (Stored with Git LFS) Executable file

Binary file not shown.

View File

@ -0,0 +1,26 @@
\documentclass[tikz,border=3.14mm]{standalone}
\usepackage{tikz-3dplot}
\begin{document}
\tdplotsetmaincoords{105}{-30}
\usetikzlibrary{patterns}
\begin{tikzpicture}[tdplot_main_coords,font=\sffamily]
\tdplotsetrotatedcoords{00}{30}{0}
\begin{scope}[tdplot_rotated_coords]
\begin{scope}[canvas is xy plane at z=0]
\draw[fill opacity=0,pattern=north west lines,pattern color=gray] (-2,-3) rectangle (2,3);
\draw[gray,fill=lightgray,fill opacity=0.75] (-2,-3) rectangle (2,3);
\draw[very thick] (-2,0) -- (2,0);
\path (-150:2) coordinate (H) (-1.5,0) coordinate(X);
\pgflowlevelsynccm
\draw[very thick,-stealth,gray] (0,0) -- (-30:1.5);
\end{scope}
\draw[stealth-] (H) -- ++ (-1,0,0.2) node[pos=1.3]{$H$};
\draw[stealth-] (X) -- ++ (0,1,0.2) node[pos=1.3]{$X$};
\draw[very thick,-stealth] (0,0,0) coordinate (O) -- (0,0,3) node[right]{$p$};
\end{scope}
\pgfmathsetmacro{\Radius}{1.5}
\draw[-stealth] (O)-- (2.5*\Radius,0,0) node[pos=1.15] {$x$};
\draw[-stealth] (O) -- (0,3.5*\Radius,0) node[pos=1.15] {$z$};
\draw[-stealth] (O) -- (0,0,2.5*\Radius) node[pos=1.05] {$y$};
\end{tikzpicture}
\end{document}

BIN
figures/schemes/vector_orthogonality.pdf (Stored with Git LFS) Executable file

Binary file not shown.

View File

@ -0,0 +1,27 @@
\documentclass[margin=0.5cm]{standalone}
\usepackage{tikz}
\usepackage{tkz-euclide}
\usepackage{mathtools}
\begin{document}
\begin{tikzpicture}
\coordinate (A) at (0.5, 1) {};
\coordinate (B) at (-0.5, -1) {};
\coordinate (C) at (1.25, -0.70) {};
\coordinate (0) at (0, 0) {};
% left angle
\tkzMarkRightAngle[draw=black,size=0.1](A,0,C);
\draw[lightgray,very thin] (A) -- (C);
% Curly brace annotation for ||u-v||
\draw[decorate,decoration={brace,
amplitude=10pt},xshift=0pt,yshift=4pt,very thin] (A) -- (C) node [black,midway,xshift=27pt,yshift=0.5em] {$\lVert u-v \rVert$};
\draw[lightgray,very thin] (B) -- (C);
% axis lines
\draw[->] (0) -- (A) node[above] {$u$};
\draw[->] (0) -- (B) node[below] {$-u$};
\draw[->] (0) -- (C) node[right] {$v$};
\end{tikzpicture}
\end{document}

0
glossary.tex Executable file
View File

BIN
main.pdf (Stored with Git LFS) Normal file

Binary file not shown.

68
main.tex Executable file
View File

@ -0,0 +1,68 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Course -- Multivariate Statistics --- GENIOMHE --- M1 - S1
%
% Author: Samuel Ortion <samuel@ortion.fr>
% Version: 0.1.0
% Date: 2023
% License: CC-By-SA 4.0+ International
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\documentclass[
a4paper,
fontsize=10pt,
fleqn,
oneside
]{scrbook}
\usepackage{mus}
\titlehead{GENIOMHE}
\title{Multivariate\newline{}Statistics}
\author{Samuel Ortion}
\teacher{Cyril Dalmasso}
\cursus{GENIOMHE}
\university{Université Paris-Saclay, Université d'Évry val d'Essonne}
\semester{M1 - S1}
\date{Fall 2023}
\definecolor{myblue}{HTML}{5654fa}
\colorlet{primary}{myblue}
\hypersetup{
pdftitle={Course - Multivariate Statistics},
pdfauthor={Samuel Ortion},
pdfsubject={},
pdfkeywords={},
pdfcreator={LaTeX}
}
\addbibresource{references}
\usepackage[
type={CC},
modifier={by-sa},
version={4.0},
]{doclicense}
\input{preamble}
\input{glossary}
\input{definitions}
\makeindex%
\makeglossary%
\begin{document}
\maketitlefullpage
\tableofcontents
\doclicenseThis%
\input{content/introduction}
\input{content/chapters/include}
\input{content/conclusion}
\end{document}

7
preamble.tex Executable file
View File

@ -0,0 +1,7 @@
\usepackage{pgffor}
\usetikzlibrary{math}
\usepackage{standalone}
\usepackage{tikz-3dplot}
\usepackage{tkz-euclide}
\usepackage{nicematrix}
\usepackage{luacode}

0
references.bib Executable file
View File

57
scripts/matrix_product.lua Executable file
View File

@ -0,0 +1,57 @@
local function matrix_product_repr(m1, m2)
if #m1[1] ~= #m2 then -- inner matrix-dimensions must agree
return nil
end
local res = {}
for i = 1, #m1 do
res[i] = {}
for j = 1, #m2[1] do
res[i][j] = " "
for k = 1, #m2 do
if k ~= 1 then
res[i][j] = res[i][j] .. " + "
end
res[i][j] = res[i][j] .. m1[i][k] .. " " .. m2[k][j]
end
end
end
return res
end
local function dump_matrix(matrix)
local repr = ""
for i, row in ipairs(matrix) do
for j, cell in ipairs(row) do
repr = repr .. " " .. cell
if j ~= #row then
repr = repr .. " & "
end
end
if i ~= #matrix then
repr = repr .. [[ \\ ]]
end
repr = repr .. "\n"
end
return repr
end
local m1 = {
{"a", "b", "c", "d"},
{"e", "f", "g", "h"},
{"i", "j", "k", "l"}
}
local m2 = {
{"x_1"},
{"x_2"},
{"x_3"},
{"x_4"}
}
print(dump_matrix(matrix_product_repr(m1, m2)))
return {
matrix_product_repr = matrix_product_repr,
dump_matrix = dump_matrix
}