commit 9af6993a2889aa5bae43c014ca2693a50b026fc4 Author: Samuel Ortion Date: Thu Nov 16 16:47:14 2023 +0100 feat: Reinitialize commit diff --git a/.gitattributes b/.gitattributes new file mode 100755 index 0000000..59f2077 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +main.pdf filter=lfs diff=lfs merge=lfs -text +**/*.pdf filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100755 index 0000000..76f44d5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,304 @@ +build/ +.auctex-auto + +## Core latex/pdflatex auxiliary files: +*.aux +*.lof +*.log +*.lot +*.fls +*.out +*.toc +*.fmt +*.fot +*.cb +*.cb2 +.*.lb + +## Intermediate documents: +*.dvi +*.xdv +*-converted-to.* +# these rules might exclude image files for figures etc. +# *.ps +# *.eps +# *.pdf + +## Generated if empty string is given at "Please type another file name for output:" +.pdf + +## Bibliography auxiliary files (bibtex/biblatex/biber): +*.bbl +*.bcf +*.blg +*-blx.aux +*-blx.bib +*.run.xml + +## Build tool auxiliary files: +*.fdb_latexmk +*.synctex +*.synctex(busy) +*.synctex.gz +*.synctex.gz(busy) +*.pdfsync + +## Build tool directories for auxiliary files +# latexrun +latex.out/ + +## Auxiliary and intermediate files from other packages: +# algorithms +*.alg +*.loa + +# achemso +acs-*.bib + +# amsthm +*.thm + +# beamer +*.nav +*.pre +*.snm +*.vrb + +# changes +*.soc + +# comment +*.cut + +# cprotect +*.cpt + +# elsarticle (documentclass of Elsevier journals) +*.spl + +# endnotes +*.ent + +# fixme +*.lox + +# feynmf/feynmp +*.mf +*.mp +*.t[1-9] +*.t[1-9][0-9] +*.tfm + +#(r)(e)ledmac/(r)(e)ledpar +*.end +*.?end +*.[1-9] +*.[1-9][0-9] +*.[1-9][0-9][0-9] +*.[1-9]R +*.[1-9][0-9]R +*.[1-9][0-9][0-9]R +*.eledsec[1-9] +*.eledsec[1-9]R +*.eledsec[1-9][0-9] +*.eledsec[1-9][0-9]R +*.eledsec[1-9][0-9][0-9] +*.eledsec[1-9][0-9][0-9]R + +# glossaries +*.acn +*.acr +*.glg +*.glo +*.gls +*.glsdefs +*.lzo +*.lzs +*.slg +*.slo +*.sls + +# uncomment this for glossaries-extra (will ignore makeindex's style files!) +# *.ist + +# gnuplot +*.gnuplot +*.table + +# gnuplottex +*-gnuplottex-* + +# gregoriotex +*.gaux +*.glog +*.gtex + +# htlatex +*.4ct +*.4tc +*.idv +*.lg +*.trc +*.xref + +# hyperref +*.brf + +# knitr +*-concordance.tex +# TODO Uncomment the next line if you use knitr and want to ignore its generated tikz files +# *.tikz +*-tikzDictionary + +# listings +*.lol + +# luatexja-ruby +*.ltjruby + +# makeidx +*.idx +*.ilg +*.ind + +# minitoc +*.maf +*.mlf +*.mlt +*.mtc[0-9]* +*.slf[0-9]* +*.slt[0-9]* +*.stc[0-9]* + +# minted +_minted* +*.pyg + +# morewrites +*.mw + +# newpax +*.newpax + +# nomencl +*.nlg +*.nlo +*.nls + +# pax +*.pax + +# pdfpcnotes +*.pdfpc + +# sagetex +*.sagetex.sage +*.sagetex.py +*.sagetex.scmd + +# scrwfile +*.wrt + +# svg +svg-inkscape/ + +# sympy +*.sout +*.sympy +sympy-plots-for-*.tex/ + +# pdfcomment +*.upa +*.upb + +# pythontex +*.pytxcode +pythontex-files-*/ + +# tcolorbox +*.listing + +# thmtools +*.loe + +# TikZ & PGF +*.dpth +*.md5 +*.auxlock + +# titletoc +*.ptc + +# todonotes +*.tdo + +# vhistory +*.hst +*.ver + +# easy-todo +*.lod + +# xcolor +*.xcp + +# xmpincl +*.xmpi + +# xindy +*.xdy + +# xypic precompiled matrices and outlines +*.xyc +*.xyd + +# endfloat +*.ttt +*.fff + +# Latexian +TSWLatexianTemp* + +## Editors: +# WinEdt +*.bak +*.sav + +# Texpad +.texpadtmp + +# LyX +*.lyx~ + +# Kile +*.backup + +# gummi +.*.swp + +# KBibTeX +*~[0-9]* + +# TeXnicCenter +*.tps + +# auto folder when using emacs and auctex +./auto/* +*.el + +# expex forward references with \gathertags +*-tags.tex + +# standalone packages +*.sta + +# Makeindex log files +*.lpz + +# xwatermark package +*.xwm + +# REVTeX puts footnotes in the bibliography by default, unless the nofootinbib +# option is specified. Footnotes are the stored in a file with suffix Notes.bib. +# Uncomment the next line to have this generated file ignored. +#*Notes.bib diff --git a/Makefile b/Makefile new file mode 100755 index 0000000..a84950a --- /dev/null +++ b/Makefile @@ -0,0 +1,6 @@ +options=-shell-escape -file-line-error + +all: main.pdf + +%.pdf: %.tex + lualatex $(options) $< diff --git a/content/chapters/include.tex b/content/chapters/include.tex new file mode 100755 index 0000000..53c3f72 --- /dev/null +++ b/content/chapters/include.tex @@ -0,0 +1,18 @@ +%---------------------------------------- +% CHAPTERS +%---------------------------------------- + +\newcommand{\includechapters}[2]{% + \foreach \i in {0, ..., #2} {% + \edef\FileName{content/chapters/#1/\i}% + \IfFileExists{\FileName}{% + \input{\FileName}% + } + } +} + +\includechapters{part1}{4} + +\includechapters{part2}{2} + +% \includechapters{part3}{1} diff --git a/content/chapters/part1/0.tex b/content/chapters/part1/0.tex new file mode 100755 index 0000000..ebca180 --- /dev/null +++ b/content/chapters/part1/0.tex @@ -0,0 +1 @@ +\part{} \ No newline at end of file diff --git a/content/chapters/part1/1.tex b/content/chapters/part1/1.tex new file mode 100755 index 0000000..910c7e9 --- /dev/null +++ b/content/chapters/part1/1.tex @@ -0,0 +1,653 @@ +\chapter{Linear Model} + +\section{Simple Linear Regression} + +\[ + Y_i = \beta_0 + \beta_1 X_i + \varepsilon_i +\] +\[ + \Y = \X \beta + \varepsilon. +\] +\[ + \begin{pmatrix} + Y_1 \\ + Y_2 \\ + \vdots \\ + Y_n + \end{pmatrix} + = + \begin{pmatrix} + 1 & X_1 \\ + 1 & X_2 \\ + \vdots & \vdots \\ + 1 & X_n + \end{pmatrix} + \begin{pmatrix} + \beta_0 \\ + \beta_1 + \end{pmatrix} + + + \begin{pmatrix} + \varepsilon_1 \\ + \varepsilon_2 \\ + \vdots + \varepsilon_n + \end{pmatrix} +\] + +\paragraph*{Assumptions} +\begin{enumerate}[label={\color{primary}{($A_\arabic*$)}}] + \item $\varepsilon_i$ are independent; + \item $\varepsilon_i$ are identically distributed; + \item $\varepsilon_i$ are i.i.d $\sim \Norm(0, \sigma^2)$ (homoscedasticity). +\end{enumerate} + +\section{Generalized Linear Model} + +\[ + g(\EE(Y)) = X \beta +\] +with $g$ being +\begin{itemize} + \item Logistic regression: $g(v) = \log \left(\frac{v}{1-v}\right)$, for instance for boolean values, + \item Poisson regression: $g(v) = \log(v)$, for instance for discrete variables. +\end{itemize} + +\subsection{Penalized Regression} + +When the number of variables is large, e.g, when the number of explanatory variable is above the number of observations, if $p >> n$ ($p$: the number of explanatory variable, $n$ is the number of observations), we cannot estimate the parameters. +In order to estimate the parameters, we can use penalties (additional terms). + +Lasso regression, Elastic Net, etc. + +\[ + Y = X \beta + \varepsilon, +\] +is noted equivalently as +\[ + \begin{pmatrix} + y_1 \\ + y_2 \\ + y_3 \\ + y_4 + \end{pmatrix} + = \begin{pmatrix} + 1 & x_{11} & x_{12} \\ + 1 & x_{21} & x_{22} \\ + 1 & x_{31} & x_{32} \\ + 1 & x_{41} & x_{42} + \end{pmatrix} + \begin{pmatrix} + \beta_0 \\ + \beta_1 \\ + \beta_2 + \end{pmatrix} + + \begin{pmatrix} + \varepsilon_1 \\ + \varepsilon_2 \\ + \varepsilon_3 \\ + \varepsilon_4 + \end{pmatrix}. +\] +\section{Parameter Estimation} + +\subsection{Simple Linear Regression} + +\subsection{General Case} + +If $\X^T\X$ is invertible, the OLS estimator is: +\begin{equation} +\hat{\beta} = (\X^T\X)^{-1} \X^T \Y +\end{equation} + +\subsection{Ordinary Least Square Algorithm} + +We want to minimize the distance between $\X\beta$ and $\Y$: +\[ + \min \norm{\Y - \X\beta}^2 +\] +(See \autoref{ch:elements-of-linear-algebra}). +\begin{align*} + \Rightarrow& \X \beta = proj^{(1, \X)} \Y\\ + \Rightarrow& \forall v \in w,\, vy = v proj^w(y)\\ + \Rightarrow& \forall i: \\ + & \X_i \Y = \X_i \X\hat{\beta} \qquad \text{where $\hat{\beta}$ is the estimator of $\beta$} \\ + \Rightarrow& \X^T \Y = \X^T \X \hat{\beta} \\ + \Rightarrow& {\color{gray}(\X^T \X)^{-1}} \X^T \Y = {\color{gray}(\X^T \X)^{-1}} (\X^T\X) \hat{\beta} \\ + \Rightarrow& \hat{\beta} = (\X^T\X)^{-1} \X^T \Y +\end{align*} + +This formula comes from the orthogonal projection of $\Y$ on the vector subspace defined by the explanatory variables $\X$ + +$\X \hat{\beta}$ is the closest point to $\Y$ in the subspace generated by $\X$. + +If $H$ is the projection matrix of the subspace generated by $\X$, $\X\Y$ is the projection on $\Y$ on this subspace, that corresponds to $\X\hat{\beta}$. + +\section{Sum of squares} + +$\Y - \X \hat{\beta} \perp \X \hat{\beta} - \Y \One$ if $\One \in V$, so +\[ + \underbrace{\norm{\Y - \bar{\Y}\One}}_{\text{Total SS}} = \underbrace{\norm{\Y - \X \hat{\beta}}^2}_{\text{Residual SS}} + \underbrace{\norm{\X \hat{\beta} - \bar{\Y} \One}^2}_{\text{Explicated SS}} +\] + +\section{Coefficient of Determination: \texorpdfstring{$R^2$}{R\textsuperscript{2}}} +\begin{definition}[$R^2$] + \[ + 0 \leq R^2 = \frac{\norm{\X\hat{\beta} - \bar{\Y}\One}^2}{\norm{\Y - \bar{\Y}\One}^2} = 1 - \frac{\norm{\Y - \X\hat{\beta}}^2}{\norm{\Y - \bar{\Y}\One}^2} \leq 1 + \] proportion of variation of $\Y$ explained by the model. +\end{definition} + +\begin{figure} + \centering + \includegraphics{figures/schemes/orthogonal_projection.pdf} + \caption{Orthogonal projection of $\Y$ on plan generated by the base described by $\X$. $\color{blue}a$ corresponds to $\norm{\X\hat{\beta} - \bar{\Y}}^2$ and $\color{blue}b$ corresponds to $\hat{\varepsilon} = \norm{\Y - \hat{\beta}\X}^2$} and $\color{blue}c$ corresponds to $\norm{Y - \bar{Y}}^2$. + \label{fig:scheme-orthogonal-projection} +\end{figure} + +\begin{figure} + \centering + \includegraphics{figures/schemes/ordinary_least_squares.pdf} + \caption{Ordinary least squares and regression line with simulated data.} + \label{fig:ordinary-least-squares} +\end{figure} + +\begin{definition}[Model dimension] + Let $\M$ be a model. + The dimension of $\M$ is the dimension of the subspace generated by $\X$, that is the number of parameters in the $\beta$ vector. + + \textit{Nb.} The dimension of the model is not the number of parameter, as $\sigma^2$ is one of the model parameters. +\end{definition} + +\section{Gaussian vectors} + +\begin{definition}[Normal distribution] + $X \sim \Norm(\mu, \sigma^{2})$, with density function $f$ + \[ + f(x) = \frac{1}{\sigma \sqrt{2\pi}}e^{-\frac{1}{2}(\frac{x-\mu}{\sigma})^{2}} + \] +\end{definition} + + +\begin{definition}[Gaussian vector] + A random vector $\Y \in \RR[n]$ is a gaussian vector if every linear combination of its component is a gaussian random variable. +\end{definition} + +\begin{property} + $m = \EE(Y) = (m_1, \ldots, m_n)^T$, where $m_i = \EE(Y_i)$ + + \[ + \Y \sim \Norm_n(m, \Sigma) + \] + where $\Sigma$ is the variance-covariance matrix! + \[ + \Sigma = \E\left[(\Y -m)(\Y - m)^T\right]. + \] + +\end{property} + +\begin{remark} + \[ + \Cov(Y_i, Y_i) = \Var(Y_i) + \] +\end{remark} + +\begin{definition}[Covariance] + \[ + \Cov(Y_i, Y_j) = \EE\left((Y_i-\EE(Y_j))(Y_j-\EE(Y_j))\right) + \] +\end{definition} + + +When two variable are linked, the covariance is large. + +If two variables $X, Y$ are independent, $\Cov(X, Y) = 0$. + +\begin{definition}[Correlation coefficient] + \[ + \Cor(Y_i, Y_j) = \frac{\EE\left((Y_i-\EE(Y_j))(Y_j-\EE(Y_j))\right)}{\sqrt{\EE(Y_i - \EE(Y_i)) \cdot \EE(Y_j - \EE(Y_j))}} + \] +\end{definition} + +Covariance is really sensitive to scale of variables. For instance, if we measure distance in millimeters, the covariance would be larger than in the case of a measure expressed in metters. Thus the correlation coefficient, which is a sort of normalized covariance is useful, to be able to compare the values. + +\begin{remark} + \begin{align*} + \Cov(Y_i, Y_i) &= \EE((Y_i - \EE(Y_i)) (Y_i - \EE(Y_i))) \\ + &= \EE((Y_i - \EE(Y_i))^2) \\ + &= \Var(Y_i) + \end{align*} +\end{remark} + +\begin{equation} + \Sigma = \begin{pNiceMatrix} + \VVar(Y_1) & & & &\\ + & \Ddots & & & \\ + & \Cov(Y_i, Y_j) & \VVar(Y_i) & & \\ + & & & \Ddots & \\ + & & & & \VVar(Y_n) + \end{pNiceMatrix} +\end{equation} + +\begin{definition}[Identity matrix] + \[ + \mathcal{I}_n = \begin{pNiceMatrix} + 1 & 0 & 0 \\ + 0 & \Ddots & 0\\ + 0 & 0 & 1 + \end{pNiceMatrix} + \] + +\end{definition} + + +\begin{theorem}[Cochran Theorem (Consequence)] + \label{thm:cochran} + Let $\mathbf{Z}$ be a gaussian vector: $\mathbf{Z} \sim \Norm_n(0_n, I_n)$. + + \begin{itemize} + \item If $V_1, V_n$ are orthogonal subspaces of $\RR[n]$ with dimensions $n_1, n_2$ such that + \[ + \RR[n] = V_1 \overset{\perp}{\oplus} V_2. + \] + \item If $Z_1, Z_2$ are orthogonal of $\mathbf{Z}$ on $V_1$ and $V_2$ i.e. $Z_1 = \Pi_{V_1}(\mathbf{Z}) = \Pi_1 \Y$ and $Z_2 = \Pi_{V_2} (\mathbf{Z}) = \Pi_2 \Y$ ($\Pi_{1}$ and $\Pi_{2}$ being projection matrices) + then: + \item $z_{1}$, $Z_{2}$ are independent gaussian vectors, $Z_{1} \sim \Norm_{n_{1}} (0_{n}, \Pi_{1})$ and $Z_{2} \sim \Norm(0_{n_{2}}, \Pi_{2})$. + + In particular $\norm{Z_{1}} \sim \chi^{2}(n_{1})$ and $\norm{Z_{2}} \sim \chi^{2}(n_{2})$. + \end{itemize} + + $Z_2 = \Pi_{V_1}(\Z)$ is the projection of $\Z$ on subspace $V_1$. + + \dots +\end{theorem} + +\begin{property}[Estimators properties in the linear model] + According to \autoref{thm:cochran}, + \[ + \hat{m} \text{ is independent from $\hat{\sigma}^2$} + \] + \[ + \norm{\Y - \Pi_V(\Y)}^2 = \norm{\varepsilon - \Pi_{V}(\varepsilon)}^{2} = \norm{\Pi_{V}^{\perp} (\varepsilon)}^{2} + \] + + $\hat{m} = \X \hat{\beta}$ + + $\hat{m}$ is the estimation of the mean. +\end{property} + + +\begin{definition}[Chi 2 distribution] + If $X_1, \ldots, X_n$ i.i.d. $\sim \Norm(0, 1)$, then;, + \[ + X_1^2 + \ldots X_n^2 \sim \chi_n^2 + \] +\end{definition} + +\subsection{Estimator's properties} + + +\[ + \Pi_V = \X(\X^T\X)^{-1} \X^T +\] + +\begin{align*} + \hat{m} &= \X \hat{\beta} = \X(\X^T\X)^{-1} \X^T \Y \\ + \intertext{so} \\ + &= \Pi_V \Y +\end{align*} + +According to Cochran theorem, we can deduce that the estimator of the predicted value $\hat{m}$ is independent $\hat{\sigma}^2$ + +All the sum of squares follows a $\chi^2$ distribution. + + +\subsection{Estimators properties} + +\begin{itemize} + \item $\hat{m}$ is unbiased and estimator of $m$; + \item $\EE(\hat{\sigma}^{2}) = \sigma^{2}(n-q)/n$ $\hat{\sigma}^{2}$ is a biased estimator of $\sigma^{2}$. + \[ + S^{2} = \frac{1}{n-q} \norm{\Y - \Pi_{V}}^{2} + \] + is an unbiased estimator of $\sigma²$. +\end{itemize} + +We can derive statistical test from these properties. + +\section{Statistical tests} + +\subsection{Student $t$-test} + +\[ + \frac{\hat{\theta}-\theta}{\sqrt{\frac{\widehat{\VVar}(\hat{\theta})}{n}}} \underset{H_0}{\sim} t_{n-q} +\] + +where + +\paragraph{Estimation of $\sigma^2$} + +A biased estimator of $\sigma^2$ is: +\[ + \hat{\sigma^2} = ? +\] + +$S^2$ is the unbiased estimator of $\sigma^2$ +\begin{align*} + S^2 &= \frac{1}{n-q} \norm{\Y - \Pi_V(\Y)}^2 \\ + &= \frac{1}{n-q} \sum_{i=1}^n (Y_i - (\X\hat{\beta})_i)^2 +\end{align*} + +\begin{remark}[On $\hat{m}$] + \begin{align*} + &\Y = \X \beta + \varepsilon + \Leftrightarrow& \EE(\Y) = \X \beta + \end{align*} +\end{remark} + +\section{Student test of nullity of a parameter} + +Let $\beta_j$ be a parameter, the tested hypotheses are as follows: +\[ + \begin{cases} + (H_0): \beta_j = 0 \\ + (H_1): \beta_j \neq 0 + \end{cases} +\] + +Under the null hypothesis: +\[ + \frac{\hat{\beta}_j - \beta_j}{S \sqrt{(\X^T \X)^1_{j,j}}} \sim \St(n-q). +\] +The test statistic is: +\[ + W_n = \frac{\hat{\beta}_j}{S \sqrt{(\X^T\X)^{-1}_{j,j}}} \underset{H_0}{\sim} \St(n-q). +\] + +$\hat{\beta}$ is a multinormal vector. + +Let's consider a vector of 4 values: +\begin{align*} + \begin{pmatrix} + \hat{\beta}_0 \\ + \hat{\beta}_1 \\ + \hat{\beta}_2 \\ + \hat{\beta}_3 + \end{pmatrix} + \sim \Norm_4 \left( \begin{pmatrix} + \beta_0 \\ + \beta_1 \\ + \beta_2 \\ + \beta_3 + \end{pmatrix} ; + \sigma^2 \left(\X^T \X\right)^{-1} + \right) +\end{align*} + +Let $\M$ be the following model +\begin{align*} + Y_i &= \beta_0 + \beta_1 X_{1i} + \beta_2 X_{2i} + \beta_3 X_{3i} + \varepsilon_i +\end{align*} + +Why can't we use the following model to test each of the parameters values (here for $X_2$)? +\[ + Y_i = \theta_0 + \theta_1 X_{2i} + \varepsilon_i +\] +We can't use such a model, we would probably meet a confounding factor: even if we are only interested in relationship $X_2$ with $Y$, we have to fit the whole model. + +\begin{example}[Confounding parameter] + Let $Y$ be a variable related to the lung cancer. Let $X_1$ be the smoking status, and $X_2$ the variable `alcohol' (for instance the quantity of alcohol drunk per week). + + If we only fit the model $\M: Y_i = \theta_0 + \theta_1 X_{2i} + \varepsilon_i$, we could conclude for a relationship between alcohol and lung cancer, because alcohol consumption and smoking is strongly related. If we had fit the model $\M = Y_i = \theta_0 + \theta_1 X_{1i} + \theta_2 X_{2i} + \varepsilon_i$, we could indeed have found no significant relationship between $X_2$ and $Y$. +\end{example} + +\begin{definition}[Student law] + Let $X$ and $Y$ be two random variables such as $X \indep Y$, and such that $X \sim \Norm(0, 1)$ and $Y \sim \chi_n^2$, then + \[ + \frac{X}{\sqrt{Y}} \sim \St(n) + \] +\end{definition} + +\subsection{Model comparison} + +\begin{definition}[Nested models] + +\end{definition} + +Let $\M_2$ and $\M_4$ be two models: + +$\M_2: Y_i = \beta_0 + \beta_3 X_{3_i} + \varepsilon_i$ + +$\M_4: Y_i = \beta_0 + \beta_1 X_{1i} + \beta_2 X_{2i} + \beta_3 X_{3i} + \varepsilon_i$ + +$\M_2$ is nested in $\M_4$. + +\paragraph*{Principle} We compare the residual variances of the two models, that is, the variance that is not explained by the model. + +The better the model is, the smallest the variance would be. + +If everything is explained by the model, the residual variance would be null. + + +Here $\M_4$ holds all the information found in $\M_2$ plus other informations. In the worst case It would be at least as good as $\M_2$. + +\subsection{Fisher $F$-test of model comparison} + +Let $\M_q$ and $\M_{q'}$ be two models such as $\dim(\M_q) = q$, $\dim(\M_{q'}) = q'$, $q > q'$ and $\M_{q'}$ is nested in $\M_q$. + +\paragraph{Tested hypotheses} +\[ +\begin{cases} + (H_0): \M_{q'} \text{ is the proper model} \\ + (H_1): \M_q \text{ is a better model} +\end{cases} +\] + +\begin{description} + \item[ESS] Estimated Sum of Squares + \item[RSS] Residual Sum of Squares + \item[EMS] Estimates Mean Square + \item[RMS] Residual Mean Square +\end{description} + +\[ + ESS = RSS(\M_{q'}) - RSS(\M_q) +\] +\[ + RSS(\M) = \norm{\Y - \X\hat{\beta}} = \sum_{i=1}^n \hat{\varepsilon}_i^2 +\] +\[ + EMS = \frac{ESS}{q - q'} +\] +\[ + RMS = \frac{RSS(\M_q)}{n-q} +\] + +Under the null hypotheses: +\[ + F = \frac{EMS}{RMS} \underset{H_0}{\sim} \Fish(q-q'; n-q) +\] + +\section{Model validity} + +Assumptions: +\begin{itemize} + \item $\X$ is a full rank matrix; + \item Residuals are i.i.d. $\varepsilon \sim \Norm(0_n, \sigma^2 \mathcal{I}_n)$; +\end{itemize} + +We have also to look for influential variables. + + +\subsection{$\X$ is full rank} + +To check that the rank of the matrix is $p+1$, we can calculate the eigen value of the correlation value of the matrix. If there is a perfect relationship between two variables (two columns of $\X$), one of the eigen value would be null. In practice, we never get a null eigen value. We consider the condition index as the ratio between the largest and the smallest eigenvalues, if the condition index $\kappa = \frac{\lambda_1}{\lambda_p}$, with $\lambda_1 \geq \lambda_2 \geq \ldots \geq \lambda_p$ the eigenvalues. + + +If all eigenvalues is different from 0, $\X^T \X$ can be inverted, but the estimated parameter variance would be large, thus the estimation of the parameters would be not relevant (not good enough). + +\paragraph{Variance Inflation Factor} + +Perform a regression of each of the predictors against the other predictors. + +If there is a strong linear relationship between a parameter and the others, it would reflect that the coefficient of determination $R^2$ (the amount of variance explained by the model) for this model, which would mean that there is a strong relationship between the parameters. + +We do this for all parameters, and for parameter $j = 1, \ldots, p$, the variance inflation factor would be: +\[ + VIF_j = \frac{1}{1-R^2_j}. +\] + +\subparagraph*{Rule} +If $VIF > 10$ or $VIF > 100$\dots + + +In case of multicollinearity, we have to remove the variable one by one until there is no longer multicollinearity. +Variables have to be removed based on statistical results and through discussion with experimenters. + + +\subsection{Residuals analysis} + +\paragraph*{Assumption} +\[ + \varepsilon \sim \Norm_n(0_n, \sigma^2 I_n) +\] + +\paragraph{Normality of the residuals} If $\varepsilon_i$ ($i=1, \ldots, n$) could be observed we could build a QQ-plot of $\varepsilon_i / \sigma$ against quantiles of $\Norm(0, 1)$. + +Only the residual errors $\hat{e}_i$ can be observed: + +Let $e_i^*$ be the studentized residual, considered as estimators of $\varepsilon_i$ + +\[ + e_i^* = \frac{\hat{e}_i}{\sqrt{\sigma^2_{(i)(1-H_{ii})}}} +\] + +\begin{align*} + \hat{Y} &= X \hat{\beta} \\ + &= X \left( (X^TX)^{-1} X^T Y\right) \\ + &= \underbrace{X (X^TX)^{-1} X^T}_{H} Y +\end{align*} + +\paragraph{Centered residuals} If $(1, \ldots, 1)^T$ belongs to $\X$ $\EE(\varepsilon) = 0$, by construction. + +\paragraph{Independence} We do not have a statistical test for independence in R, we would plot the residuals $e$ against $\X \hat{\beta}$. + +\paragraph{Homoscedastiscity} Plot the $\sqrt{e^*}$ against $\X \hat{\beta}$. + + +\paragraph{Influential observations} + +We make the distinction between observations: +\begin{itemize} + \item With too large residual + $\rightarrow$ Influence on the estimation of $\sigma^2$ + \item Which are too isolated + $\rightarrow$ Influence on the estimation of $\beta$ +\end{itemize} + +\[ + e_i^* \sim \St(n-p-1) +\] +\subparagraph*{Rule} We consider an observation to be aberrant if: +\[ + e_i^* > \F^{-1}_{\St(n-p-1)}(1-\alpha) +\] +quantile of order $1-\alpha$, $\alpha$ being often set as $1/n$, or we set the threshold to 2. + +\paragraph{Leverage} Leverage is the diagonal term of the orthogonal projection matrix(?) $H_{ii}$. + +\begin{property} + \begin{itemize} + \item $0 \leq H_{ii} \leq 1$ + \item $\sum_i H_ii = p$ + \end{itemize} +\end{property} + +\subparagraph*{Rule} We consider that the observation is aberrant if the leverage is ??. + + +\paragraph{Non-linearity} + + +\section{Model Selection} + +We want to select the best model with the smallest number of predictors. + +When models have too many explicative variables, the power of statistical tests decreases. + +Different methods: +\begin{itemize} + \item Comparison of nested models; + \item Information criteria; + \item Method based on the prediction error. +\end{itemize} + +\subsection{Information criteria} + +\subsubsection{Likelihood} + +\begin{definition}[Likelihood] + Probability to observe what we observed for a particular model. + \[ + L_n (\M(k)) + \] +\end{definition} + + +\begin{definition}[Akaike Information Criterion] + \[ + AIC(\M(k)) = -2 \log L_n (\M(k)) + 2k. + \] + + $2k$ is a penalty, leading to privilege the smallest model. +\end{definition} + +\begin{definition}[Bayesian Information Criterion] + \[ + BIC(\M(k)) = -2 \log L_n (\M(k)) + \log(n) k. + \] + $\log(n) k$ is a penalty. +\end{definition} + +Usually $AIC$ have smaller penalty than $BIC$, thus $AIC$ criterion tends to select models with more variables than $BIC$ criterion. + +\subsection{Stepwise} + +\begin{description} + \item[forward] Add new predictor iteratively, beginning with the most contributing predictors. + \item[backward] Remove predictors iteratively. + \item[stepwise] Combination of forward and backward selection. We start by no predictors. We add predictor. Before adding the predictor, we check whether all previously predictors remain meaningful. +\end{description} + +The problem with this iterative regression, is that at each step we make a test. We have to reduce the confidence level for multiple test. + +In practice, the multiple testing problem is not taken into account in these approaches. + +We can use information criteria or model comparison in these methods. + +\section{Predictions} + +Let $X_i$ the $i$-th row of the matrix $\X$. The observed value $Y_i$ can be estimated by: +\[ + \hat{Y}_i = (\X \hat{\beta})_i = X_i \hat{\beta} +\] + +\begin{align*} + \EE (\hat{Y}_i) &= (\X \beta)_i = X_i \beta \\ + \sigma^{-1} (\X \hat{\beta} - \X \beta) \sim \Norm (0_{p+1}, (\X^T \X)^{-1}), \qquad \text{and} \\ + \Var(\hat{Y}_i) = ... \\ + S^2 = \norm{...} +\end{align*} + + +\paragraph{Prediction Confidence Interval} +We can build confidence interval for predicted values $(\X \hat{\beta})_i$ + +\dots + +\paragraph{Prediction error of $Y$} + + +\paragraph{Prediction interval for a new observation $Y_{n+1}$} + + + diff --git a/content/chapters/part1/2.tex b/content/chapters/part1/2.tex new file mode 100755 index 0000000..83fa3fd --- /dev/null +++ b/content/chapters/part1/2.tex @@ -0,0 +1,186 @@ +\chapter{Generalized Linear Model} + +\begin{example} + + \begin{description} + \item[Ex. 1 - Credit Carb Default] + Let $Y_i$ be a boolean random variable following a Bernoulli distribution. + \item[Ex. 2 - Horseshoe Crabs] + Let $Y_i$, be the number of satellites males. + + $Y_i$ can be described as following a Poisson distribution. + \end{description} +\end{example} + +\begin{remark} + A Poisson distribution can be viewed as an approximation of binomial distribution when $n$ is high and $p$ low. +\end{remark} + + +We will consider the following relation: +\[ + \EE(Y_i) = g^{-1} X_i \beta, +\] +equivalently: +\[ + g(\EE(Y_i)) = X_i \beta. +\] + +\begin{itemize} + \item $\beta$ is estimated by the maximum likelihood; + \item $g$ is called the link function. +\end{itemize} + +\begin{remark} + In standard linear model, the OLS estimator is the estimator of maximum of likelihood. +\end{remark} + +\section{Logistic Regression} + +\begin{align*} + & \log(\frac{\Pi}{1 - \Pi}) & = \X \beta \\ + \Leftrightarrow & e^{\ln \frac{\Pi}{1 - \Pi}} = e^{\X \beta} \\ + \Leftrightarrow & \frac{\Pi}{1 - \Pi} = e^{\X \beta} \\ + \Leftrightarrow & \Pi = (1 - \Pi) e^{\X\beta} \\ + \Leftrightarrow & \Pi = e^{\X \beta} - \Pi e^{\X\beta} \\ + \Leftrightarrow & \Pi + \Pi e^{\X\beta} = e^{\X \beta} \\ + \Leftrightarrow & \Pi (1 - e^{\X\beta}) = e^{\X \beta} \\ + \Leftrightarrow & \Pi = \frac{e^{\X\beta}}{1 + e^{\X \beta}} +\end{align*} + + +\section{Maximum Likelihood estimator} + +log-likelihood: the probability to observe what we observe. + +Estimate $\beta$ by $\hat{\beta}$ such that $\forall \beta \in \RR[p+1]$: +\[ + L_n (\hat{\beta}) \geq L_n (\beta) +\] + +These estimators are consistent, but not necessarily unbiased. + + +\section{Test for each single coordinate} + + + +\begin{example}[Payment Default] + Let $Y_i$ be the default value for individual $i$. + + \[ + \log (\frac{\Pi (X)}{1 - \Pi (X)}) = \beta_0 + \beta_1 \text{student} + \beta_2 \text{balance} + \beta_3 \text{income} + \] + + In this example, only $\beta_0$ and $\beta_2$ are significantly different from 0. +\end{example} + +\begin{remark} + We do not add $\varepsilon_i$, because $\log(\frac{\Pi (X)}{1 - \Pi (X)})$ corresponds to the expectation. +\end{remark} + +\subsection{Comparison of nested models} + +To test $H_0:\: \beta_0 = \ldots = \beta_p = 0$, we use the likelihood ratio test: +\[ + T_n = -2 \log (\mathcal{L}^{\texttt{null}}) + 2 \log (\mathcal{L}(\hat{\beta})) \underset{H_0}{\overunderset{\mathcal{L}}{n \to \infty}{\longrightarrow}} \chi^2(p). +\] + +\begin{remark}[Family of Tests] + \begin{itemize} + \item Comparison of estimated values and values under the null hypothesis; + \item Likelihood ratio test; + \item Based on the slope on the derivative. + \end{itemize} +\end{remark} + +\section{Relative risk} + +$RR_i$ is the probably to have the disease, conditional to the predictor $X_{i1}$ over the probability of having the disease, conditional to the predictor $X_{i2}$. + +\[ + RR(j) = \frac{\Prob(Y_{i_1} = 1 \: | \: X_{i_1})}{\Prob(Y_{i_2} = 1) \: | \: X_{i_2}} = \frac{\EE(Y_{i_1})}{\EE(Y_{i_2})}. +\] + +$\pi(X_i)$ is the probability of having the disease, according to $X_i$. + +The relative risk can be written as\dots + +\section{Odds} + +Quantity providing a measure of the likelihood of a particular outcome: +\[ + odd = \frac{\pi(X_i)}{1 - \pi(X_i)} +\] + +\[ + odds = \exp(X_i \beta) +\] +odds is the ratio of people having the disease, if Y represent the disease, over the people not having the disease. + +\section{Odds Ratio} + +\begin{align*} + OR(j) =\frac{odds(X_{i_1})}{odds(X_{i_2})} & = \frac{\frac{\pi{X_{i_1}}}{1 - \pi(X_{i_1})}}{\frac{\pi{X_{i_2}}}{1 - \pi(X_{i_2})}} +\end{align*} + +The OR can be written as: +\[ + OR(j) = \exp(\beta_j) +\] + +\begin{exercise} + Show that $OR(j) = \exp(\beta_j)$. +\end{exercise} + +\begin{align*} + OR(j) & = \frac{odds(X_{i_1})}{odds(X_{i_2})} \\ + & = \frac{\exp(X_{i_1} \beta)}{\exp(X_{i_2} \beta)} \\ +\end{align*} + + \[ + \log \left( + \frac{\Prob(Y=1 \: |\: X_{i_1})}{1 - \Prob(Y=1 \: |\: X_{i_1})}\right) + = \beta_0 + \beta_1 X_1^{(1)} + \beta_2 X_2^{(1)} + \ldots + \beta_p X_p^{(1)} +\] + Similarly +\[ + \log \left( + \frac{\Prob(Y=1 \: |\: X_{i_2})}{1 - \Prob(Y=1 \: |\: X_{i_2})}\right) + = \beta_0 + \beta_1 X_1^{(2)} + \beta_2 X_2^{(2)} + \ldots + \beta_p X_p^{(2)} +\] + We substract both equations: + + \begin{align*} + &\log \left( + \frac{\Prob(Y=1 \: |\: X_{i_1})}{1 - \Prob(Y=1 \: |\: X_{i_1})} \right) - \log \left(\frac{\Prob(Y=1 \: |\: X_{i_2})}{1 - \Prob(Y=1 \: |\: X_{i_2})}\right) \\ + & = \beta_0 + \beta_1 X_1^{(1)} + \beta_2 X_2^{(1)} + \ldots + \beta_p X_p^{(1)} - \beta_0 + \beta_1 X_1^{(2)} + \beta_2 X_2^{(2)} + \ldots + \beta_p X_p^{(2)} \\ + & = \log OR(j) \\ + & = \cancel{(\beta_0 - \beta_0)} + \beta_1 \cancel{(X_1^{(1)} - X_1^{(2)})} + \beta_2 \cancel{(X_2^{(1)} - X_2^{(2)})} + \ldots + \beta_j \cancelto{1}{(X_j^{(1)} - X_j^{(2)})} + \ldots + \beta_p \cancel{(X_p^{(1)} - X_p^{(2)})} \\ + &\Leftrightarrow \log (OR_j) = \beta_j \\ + &\Leftrightarrow OR(j) = \exp(\beta_j) + \end{align*} + +OR is not equal to RR, except in the particular case of probability (?) + +If OR is significantly different from 1, the $\exp(\beta_j)$ is significantly different from 1, thus $\beta_j$ is significantly different from 0. + +If we have more than two classes, we do not know what means $X_{i_1} - X_{i_2} = 0$. We will have to take a reference class, and compare successively each class with the reference class. + +$\hat{\pi}(X_{+}) = \hat{\Prob(X=1 \: | X_{i1})}$ for a new individual. + + +\section{Poisson model} + +Let $Y_{i} \sim \mathcal{P}(\lambda_{i})$, corresponding to a counting. + +\begin{align*} + \EE(Y_{i}) & = g^{-1}(X_{i} \beta) \\ + \Leftrightarrow g(\EE(Y_{i})) = X_{i} \beta +\end{align*} + +where $g(x) = \ln(x)$, and $g^{-1}(x) = e^{x}$. + +\[ + \lambda_{i} = \EE(Y_{i}) = \Var(Y_{i}) +\] diff --git a/content/chapters/part1/3.tex b/content/chapters/part1/3.tex new file mode 100755 index 0000000..cb5be51 --- /dev/null +++ b/content/chapters/part1/3.tex @@ -0,0 +1,26 @@ +\chapter{Tests Reminders} + +\section{\texorpdfstring{$\chi^2$}{chi2} test of independence} + +[...] + +\section{\texorpdfstring{$\chi^2$}{chi2} test of goodness of fit} + +Check if the observations is in adequation with a particular distribution. + +\begin{example}[Mendel experiments] + Let $AB$, $Ab$, $aB$, $ab$ be the four possible genotypes of peas: colors and grain shape. + \begin{tabular}{cccc} + \toprule + AB & Ab & aB & ab \\ + \midrule + 315 & 108 & 101 & 32 \\ + \bottomrule + \end{tabular} +\end{example} + +The test statistics is: +\[ + D_{k,n} = \sum_{i=1}^{k} \frac{(N_i - np_i)^2}{np_i} \underset{H_0}{\overunderset{\mathcal{L}}{n \to \infty}{\longrightarrow}} \chi^2_{(n-1)(q-1)??} +\] + diff --git a/content/chapters/part1/4.tex b/content/chapters/part1/4.tex new file mode 100755 index 0000000..6881527 --- /dev/null +++ b/content/chapters/part1/4.tex @@ -0,0 +1,125 @@ +\chapter{Regularized regressions} + + +Let $\Y$ be a vector of observations and $\X$ a matrix of dimension $n \times (p+1)$. +Suppose the real model is: +\[ + \Y = \X^{m^{*}} \beta^{m^{*}} + \varepsilon^{m^{*}} = \X^{*} \beta^{*} + \varepsilon^{*}. +\] +if $p$ is large compared to $n$: +\begin{itemize} + \item $\hat{\beta} = (\X^{T}\X)^{-1} \X^{T} \Y$ is not defined as $\X^{T}\X$ is not invertible. + + $m^{*}$ is the number of true predictors, that is, the number of predictor with non-zero values. + + \item + + \item +\end{itemize} + +\section{Ridge regression} + +Instead of minimizing the mean square error, we want to minimize the following regularize expression: +\[ + \hat{\beta}^{\text{ridge}}_{\lambda} = \argmin_{\beta \in \RR[p]} \norm{Y - X \beta}^{2} \lambda \sum_{j=1}^{p} \beta_{j}^{2} +\] +it is a way to favor the solution with small values for parameters. +where $\lambda$ is used to callibrate the regularization. +\[ + \sum_{j=1}^{p} \beta_{j}^{2} = \norm{\beta_{j}}^{2} +\] +is the classical square norm of the vector. + + +\section{Cross validation} + +\subsection{Leave-one-out \textit{jackknife}} + +\begin{example} + Let $\M_{0}$ be the model $Y_{i} = \beta_{0} + \beta_{1} X_{1i} + \beta_{2}X_{2i} + \beta_{3} X_{3i}$ + + The model will be: + \[ + \begin{pmatrix} + y_{1} \\ + y_{2} \\ + y_{3} \\ + y_{4} \\ + y_{5} + \end{pmatrix} = + \beta_{0} + \beta_{1} \begin{pmatrix} + x_{11} \\ + x_{12} \\ + x_{13} \\ + x_{14} \\ + x_{15} + \end{pmatrix} + + \beta_{2} \begin{pmatrix} + x_{21} \\ + x_{22} \\ + x_{23} \\ + x_{24} \\ + x_{25} + \end{pmatrix} + + + \beta_{3} \begin{pmatrix} + x_{31} \\ + x_{32} \\ + x_{33} \\ + x_{34} \\ + x_{35} + \end{pmatrix} + \] + \def\x{$\times$} + \begin{tabular}{ccccc} + \toprule + 1 & 2 & 3 & 4 & 5 \\ + \midrule + . & \x & \x & \x & \x \\ + \x & . & \x & \x & \x \\ + \x & \x & . & \x & \x \\ + \x & \x & \x & . & \x \\ + \x & \x & \x & \x & . \\ + \bottomrule + \end{tabular} +\end{example} + +We perform computation of $\lambda$ for each dataset without one observation. + + +\subsection{K-fold cross-validation} + +We will have as many tables as subsets. + + +We chose lambda such that the generalization error is the smallest. + +\section{Lasso regression} + +The difference with the Ridge regression lies in the penalty: + +\[ + \hat{\beta}_{\lambda}^{\text{lasso}}= \argmin \norm{Y-X\beta}^{2} + \sum_{j=1}^{p} \abs{\beta_{j}} +\] + +$\sum_{j=1}^{p} \abs{\beta_j} = \norm{\beta}_1$ + +Instead of having a smooth increasing for each parameters, each parameters will enter iteratively in the model. Some parameters can be set to 0. + +Lasso regression can be used to perform variable selection. + + +We can use the same methods (K-fold and Leave-one-out) to select the $\lambda$ value. + +\section{Elastic Net} + +Combination of the Ridge and Lasso regression: + +\[ + \hat{\beta}_\lambda^{en} = \argmin \norm{Y-X\beta}^{2} + \lambda_{1} \norm{\beta}_{1} + \lambda_{2} \norm{\beta}_{2}^{2} +\] + + +\begin{remark} + In the case of Lasso, Elastic net or Ridge regression, we can no longer perform statistical test on the parameters. +\end{remark} diff --git a/content/chapters/part2/0.tex b/content/chapters/part2/0.tex new file mode 100755 index 0000000..fbfa3b9 --- /dev/null +++ b/content/chapters/part2/0.tex @@ -0,0 +1,2 @@ +\part{Linear Algebra} + diff --git a/content/chapters/part2/1.tex b/content/chapters/part2/1.tex new file mode 100755 index 0000000..5e168cd --- /dev/null +++ b/content/chapters/part2/1.tex @@ -0,0 +1,220 @@ +\chapter{Elements of Linear Algebra} +\label{ch:elements-of-linear-algebra} + +\begin{remark}[vector] + Let $u$ a vector, we will use interchangeably the following notations: $u$ and $\vec{u}$ +\end{remark} + +Let $u = \begin{pmatrix} + u_1 \\ + \vdots \\ + u_n + \end{pmatrix}$ and $v = \begin{pmatrix} + v_1 \\ + \vdots \\ + v_n + \end{pmatrix}$ + +\begin{definition}[Scalar Product (Dot Product)] + \begin{align*} + \scalar{u, v} & = \begin{pmatrix} + u_1, \ldots, u_v + \end{pmatrix} + \begin{pmatrix} + v_1 \\ + \vdots \\ + v_n + \end{pmatrix} \\ + & = u_1 v_1 + u_2 v_2 + \ldots + u_n v_n + \end{align*} + + We may use $\scalar{u, v}$ or $u \cdot v$ notations. +\end{definition} +\paragraph{Dot product properties} +\begin{description} + \item[Commutative] $\scalar{u, v} = \scalar{v, u}$ + \item[Distributive] $\scalar{(u+v), w} = \scalar{u, w} + \scalar{v, w}$ + \item $\scalar{u, v} = \norm{u} \times \norm{v} \times \cos(\widehat{u, v})$ + \item $\scalar{a, a} = \norm{a}^2$ +\end{description} + +\begin{definition}[Norm] + Length of the vector. + \[ + \norm{u} = \sqrt{\scalar{u, v}} + \] + + $\norm{u, v} > 0$ +\end{definition} + +\begin{definition}[Distance] + \[ + dist(u, v) = \norm{u-v} + \] +\end{definition} + +\begin{definition}[Orthogonality] + +\end{definition} + +\begin{remark} + \[ + (dist(u, v))^2 = \norm{u - v}^2, + \] and + \[ + \scalar{v-u, v-u} + \] +\end{remark} + +\begin{figure} + \centering + \includegraphics{figures/schemes/vector_orthogonality.pdf} + \caption{Scalar product of two orthogonal vectors.} + \label{fig:scheme-orthogonal-scalar-product} +\end{figure} + +\begin{align*} + \scalar{v-u, v-u} & = \scalar{v, v} + \scalar{u, u} - 2 \scalar{u, v} \\ + & = \norm{v}^2 + \norm{u}^2 \\ + & = -2 \scalar{u, v} +\end{align*} + +\begin{align*} + \norm{u - v}^2 & = \norm{u}^2 + \norm{v}^2 - 2 \scalar{u,v} \\ + \norm{u + v}^2 & = \norm{u}^2 + \norm{v}^2 + 2 \scalar{u,v} +\end{align*} + +\begin{proposition}[Scalar product of orthogonal vectors] +\[ + u \perp v \Leftrightarrow \scalar{u, v} = 0 +\] +\end{proposition} + +\begin{proof}[Indeed] + $\norm{u-v}^2 = \norm{u+v}^2$, as illustrated in \autoref{fig:scheme-orthogonal-scalar-product}. + \begin{align*} + \Leftrightarrow & -2 \scalar{u, v} = 2 \scalar{u, v} \\ + \Leftrightarrow & 4 \scalar{u, v} = 0 \\ + \Leftrightarrow & \scalar{u, v} = 0 + \end{align*} +\end{proof} + +\begin{theorem}[Pythagorean theorem] + If $u \perp v$, then $\norm{u+v}^2 = \norm{u}^2 + \norm{v}^2$ . +\end{theorem} + +\begin{definition}[Orthogonal Projection] + +\end{definition} +Let $y = \begin{pmatrix} + y_1 \\ + . \\ + y_n + \end{pmatrix} \in \RR[n]$ and $w$ a subspace of $\RR[n]$. +$\mathcal{Y}$ can be written as the orthogonal projection of $y$ on $w$: +\[ + \mathcal{Y} = proj^w(y) + z, +\] +where +\[ + \begin{cases} + z \in w^\perp \\ + proj^w(y) \in w + \end{cases} +\] +There is only one vector $\mathcal{Y}$ that ? + +The scalar product between $z$ and (?) is zero. + +\begin{property} + $proj^w(y)$ is the closest vector to $y$ that belongs to $w$. +\end{property} + +\begin{definition}[Matrix] + A matrix is an application, that is, a function that transform a thing into another, it is a linear function. +\end{definition} + +\begin{example}[Matrix application] + + Let $A$ be a matrix: + \[ + A = \begin{pmatrix} + a & b \\ + c & d + \end{pmatrix} + \] and + \[ + x = \begin{pmatrix} + x_1 \\ + x_2 + \end{pmatrix} + \] + Then, + \begin{align*} + Ax & = \begin{pmatrix} + a & b \\ + c & d + \end{pmatrix} + \begin{pmatrix} + x_1 \\ + x_2 + \end{pmatrix} \\ + & = \begin{pmatrix} + a x_1 + b x_2 \\ + c x_1 + d x_2 + \end{pmatrix} + \end{align*} + + Similarly, + \begin{align*} + \begin{pmatrix} + a & b & c & d \\ + e & f & g & h \\ + i & j & k & l + \end{pmatrix} + \begin{pmatrix} + x_1 \\ + x_2 \\ + x_3 \\ + x_4 + \end{pmatrix} + = + \begin{pmatrix} + \luadirect{ + local matrix_product = require("scripts.matrix_product") + local m1 = { + {"a", "b", "c", "d"}, + {"e", "f", "g", "h"}, + {"i", "j", "k", "l"} + } + local m2 = { + {"x_1"}, + {"x_2"}, + {"x_3"}, + {"x_4"} + } + local product_matrix = matrix_product.matrix_product_repr(m1,m2) + local matrix_dump = matrix_product.dump_matrix(product_matrix) + tex.print(matrix_dump) + } + \end{pmatrix} + \end{align*} +\end{example} + +The number of columns has to be the same as the dimension of the vector to which the matrix is applied. + +\begin{definition}[Tranpose of a Matrix] + Let $A = \begin{pmatrix} + a & b \\ + c & d + \end{pmatrix}$, then $A^T = \begin{pmatrix} + a & c \\ + b & d + \end{pmatrix}$ +\end{definition} + +\begin{figure} + \centering + \includegraphics{figures/schemes/coordinates_systems.pdf} + \caption{Coordinate systems} +\end{figure} diff --git a/content/conclusion.tex b/content/conclusion.tex new file mode 100755 index 0000000..e69de29 diff --git a/content/introduction.tex b/content/introduction.tex new file mode 100755 index 0000000..12a8352 --- /dev/null +++ b/content/introduction.tex @@ -0,0 +1,35 @@ +\chapter{Introduction} + +\begin{definition}[Long Term Nonprocessor (LTNP)] + Patient who will remain a long time in good health condition, even with a large viral load (cf. HIV). +\end{definition} + +\begin{example}[Genotype: Qualitative or Quantitative?] + \[ + \text{SNP}: + \begin{cases} + \text{AA} \\ + \text{AB} \\ + \text{BB} + \end{cases} + \rightarrow + \begin{pmatrix} + 0 \\ + 1 \\ + 2 + \end{pmatrix}, + \] + thus we might consider genotype either as a qualitative variable or quantitative variable. +\end{example} + +When the variable are quantitative, we use regression, whereas for qualitative variables, we use an analysis of variance. + +\begin{figure} + \begin{subfigure}{0.45\columnwidth} + \includegraphics[width=\columnwidth]{figures/plots/linear_regression_linear.pdf} + \end{subfigure} + \begin{subfigure}{0.45\columnwidth} + \includegraphics[width=\columnwidth]{figures/plots/linear_regression_non_linear.pdf} + \end{subfigure} + \caption{Illustration of two models fitting observed values} +\end{figure} \ No newline at end of file diff --git a/definitions.tex b/definitions.tex new file mode 100755 index 0000000..866ecb6 --- /dev/null +++ b/definitions.tex @@ -0,0 +1,12 @@ +\DeclareMathOperator{\VVar}{\mathbb{V}} % variance +\DeclareMathOperator{\One}{\mathbf{1}} +\DeclareMathOperator{\Cor}{\mathrm{Cor}} +\DeclareMathOperator{\St}{\mathscr{St}} +\newcommand{\M}[1][]{\ensuremath{\ifstrempty{#1}{\mathcal{M}}{\mathbb{M}_{#1}}}} +\newcommand{\X}{\ensuremath{\mathbf{X}}} +\newcommand{\Y}{\ensuremath{\mathbf{Y}}} +\newcommand{\Z}{\ensuremath{\mathbf{Z}}} +\DeclareMathOperator*{\argmax}{arg\,max} +\DeclareMathOperator*{\argmin}{arg\,min} +\usepackage{unicode-math} + diff --git a/figures/plots/linear_regression.R b/figures/plots/linear_regression.R new file mode 100755 index 0000000..1e3e902 --- /dev/null +++ b/figures/plots/linear_regression.R @@ -0,0 +1,26 @@ +# Plot an affine model +n <- 250 +sd <- 0.05 +epsilon <- rnorm(n, mean = 0, sd = 2) +beta0 <- 1.25 +beta1 <- 4 +linear_model <- function(x) { + return(beta0 + beta1*x) +} +x <- runif(n, min=0, max=1) +y <- linear_model(x) + epsilon + +pdf("figures/plots/linear_regression_linear.pdf") +plot(x, y, col="#5654fa", type="p", pch=20, xlab="x", ylab="y") +abline(a = beta0, b = beta1, col="red") +dev.off() + + +non_linear_model <- function(x) { + return(beta0 + beta1 * exp(2*x)) +} +non_linear_y <- non_linear_model(x) + epsilon +pdf("figures/plots/linear_regression_non_linear.pdf") +plot(x, non_linear_y, col="#5654fa", type="p", pch=20, xlab="x", ylab="z") +curve(non_linear_model, from=0, to=1, add=T, col="red") +dev.off() diff --git a/figures/plots/linear_regression_linear.pdf b/figures/plots/linear_regression_linear.pdf new file mode 100755 index 0000000..0be2ed9 --- /dev/null +++ b/figures/plots/linear_regression_linear.pdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25059b14c85b0700f41d52bfb08536a101f5ab0ee0b9580aadaae3faeefcd1ae +size 19542 diff --git a/figures/plots/linear_regression_non_linear.pdf b/figures/plots/linear_regression_non_linear.pdf new file mode 100755 index 0000000..20b5677 --- /dev/null +++ b/figures/plots/linear_regression_non_linear.pdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02fef791e9ba93e0c8eac221ee47f79aecd5a7744945575a793ec3ebfe673c3e +size 20288 diff --git a/figures/plots/logistic_curve.pdf b/figures/plots/logistic_curve.pdf new file mode 100755 index 0000000..197e00a --- /dev/null +++ b/figures/plots/logistic_curve.pdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83d9081426391efec66ccd3d61fab8c4520d5524cf8f763f1728f39cfd1beb35 +size 37982 diff --git a/figures/plots/logistic_curve.tex b/figures/plots/logistic_curve.tex new file mode 100755 index 0000000..8b1e1a4 --- /dev/null +++ b/figures/plots/logistic_curve.tex @@ -0,0 +1,23 @@ +\documentclass[margin=0.5cm]{standalone} +\usepackage{tikz} +\usepackage{pgfplots} +\pgfplotsset{compat=1.18} + +\begin{document} +\begin{tikzpicture} + \begin{axis}[ + title={Logit function}, + xlabel={$x$}, + ylabel={$y$}, + domain=-5:5, + samples=200, + legend style={at={(0.95,0.05)},anchor=south east} +] +\newcommand{\Lvar}{1} +\newcommand{\kvar}{1} +\newcommand{\xvar}{0} +\addplot [blue] {\Lvar / (1 + exp(-\kvar*(x-\xvar)))}; +\addlegendentry{$L = \Lvar, k=\kvar, x_0=\xvar$}; +\end{axis} +\end{tikzpicture} +\end{document} \ No newline at end of file diff --git a/figures/schemes/.gitattributes b/figures/schemes/.gitattributes new file mode 100755 index 0000000..0799fdf --- /dev/null +++ b/figures/schemes/.gitattributes @@ -0,0 +1,3 @@ +covariance.pdf filter=lfs diff=lfs merge=lfs -text +../plots/linear_regression_linear.pdf filter=lfs diff=lfs merge=lfs -text +../plots/linear_regression_non_linear.pdf filter=lfs diff=lfs merge=lfs -text diff --git a/figures/schemes/base_plan.pdf b/figures/schemes/base_plan.pdf new file mode 100755 index 0000000..b5c74c0 --- /dev/null +++ b/figures/schemes/base_plan.pdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fa22e3c82fc6eccf37ef1c220543178c13b7f4d6b3c2b6cec84caffce27fcf8 +size 3108 diff --git a/figures/schemes/base_plan.tex b/figures/schemes/base_plan.tex new file mode 100755 index 0000000..52dacda --- /dev/null +++ b/figures/schemes/base_plan.tex @@ -0,0 +1,16 @@ +\documentclass[margin=0.5cm]{standalone} +\usepackage{tikz} +\usepackage{tkz-euclide} + +\begin{document} +\usetikzlibrary{3d} +\begin{tikzpicture} + \tkzDefPoint(-2,-2){A} + \tkzDefPoint(10:3){B} + \tkzDefShiftPointCoord[B](1:5){C} + \tkzDefShiftPointCoord[A](1:5){D} + \tkzDrawPolygon(A,...,D) + \tkzDrawPoints(A,...,D) + \node at (A) {A}; +\end{tikzpicture} +\end{document} \ No newline at end of file diff --git a/figures/schemes/coordinates_systems.pdf b/figures/schemes/coordinates_systems.pdf new file mode 100755 index 0000000..c9def7d --- /dev/null +++ b/figures/schemes/coordinates_systems.pdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43b624043eca04af9f807d9d24bee0588b8c091bdc817018b93c2fabc7bb134e +size 9339 diff --git a/figures/schemes/coordinates_systems.tex b/figures/schemes/coordinates_systems.tex new file mode 100755 index 0000000..4b59680 --- /dev/null +++ b/figures/schemes/coordinates_systems.tex @@ -0,0 +1,23 @@ +\documentclass[tikz]{standalone} +\usepackage{tikz} + +\begin{document} +\usetikzlibrary{3d} +% 1D axis +\begin{tikzpicture}[->] + \begin{scope}[xshift=0] + \draw (0, 0, 0) -- (xyz cylindrical cs:radius=1) node[right] {$x$}; + \end{scope} +% 2D coordinate system + \begin{scope}[xshift=50] + \draw (0, 0, 0) -- (xyz cylindrical cs:radius=1) node[right] {$x$}; + \draw (0, 0, 0) -- (xyz cylindrical cs:radius=1,angle=90) node[above] {$y$}; + \end{scope} +% 3D coordinate systems +\begin{scope}[xshift=100] + \draw (0, 0, 0) -- (xyz cylindrical cs:radius=1) node[right] {$x$}; + \draw (0, 0, 0) -- (xyz cylindrical cs:radius=1,angle=90) node[above] {$y$}; + \draw (0, 0, 0) -- (xyz cylindrical cs:z=1) node[below left] {$z$}; +\end{scope} +\end{tikzpicture} +\end{document} \ No newline at end of file diff --git a/figures/schemes/covariance.pdf b/figures/schemes/covariance.pdf new file mode 100755 index 0000000..773a8ca --- /dev/null +++ b/figures/schemes/covariance.pdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c32e65df88f221afec82d7d549ac15078d482112e8693a58e21eaaf1c8958785 +size 36303 diff --git a/figures/schemes/covariance.tex b/figures/schemes/covariance.tex new file mode 100755 index 0000000..aa487ca --- /dev/null +++ b/figures/schemes/covariance.tex @@ -0,0 +1,35 @@ +% Scheme of Covariance +\documentclass[margin=0.5cm]{standalone} +\usepackage{tikz} +\usepackage{amssymb} +\begin{document} +\begin{tikzpicture} + \usetikzlibrary{positioning} + \tikzset{ + point/.style = {circle, inner sep={.75\pgflinewidth}, opacity=1, draw, black, fill=black}, + point name/.style = {insert path={coordinate (#1)}}, + } + \begin{scope}[yshift=0] + \draw (-4, 0.5) -- (4,0.5) node[right] {$Y_i$}; + \draw (-4, -0.5) -- (4,-0.5) node[right] {$Y_j$}; + \node at (6, 0) {$\mathrm{Cov}(Y_i, Y_j) > 0$}; + \node (EYipoint) at (0,0.5) {$\times$}; + \node at (0, 1) {$\mathbb{E}(Y_i)$}; + \node (EYipoint) at (0,-0.5) {$\times$}; + \node at (0, -1) {$\mathbb{E}(Y_j)$}; + + \foreach \x in {-3, 0.5, 2.75} { + \node[point] at (\x, 0.5) {}; + } + \foreach \x in {-2, -1, 3} { + \node[point] at (\x, -0.5) {}; + } + \end{scope} + \begin{scope}[yshift=-100] + \draw (-4,0.5) -- (4,0.5) node[right] {$Y_i$}; + \draw (-4,-0.5) -- (4,-0.5) node[right] {$Y_j$}; + \node at (6, 0) {$\mathrm{Cov}(Y_i, Y_j) \approx 0$}; + \end{scope} + +\end{tikzpicture} +\end{document} \ No newline at end of file diff --git a/figures/schemes/ordinary_least_squares.pdf b/figures/schemes/ordinary_least_squares.pdf new file mode 100755 index 0000000..db4a55a --- /dev/null +++ b/figures/schemes/ordinary_least_squares.pdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b2d61135c31ecd7ed1863fce7b1dfea45dea33e886cb07895f2927a74e85993 +size 5095 diff --git a/figures/schemes/ordinary_least_squares.png b/figures/schemes/ordinary_least_squares.png new file mode 100755 index 0000000..1dfc9d6 Binary files /dev/null and b/figures/schemes/ordinary_least_squares.png differ diff --git a/figures/schemes/ordinary_least_squares.svg b/figures/schemes/ordinary_least_squares.svg new file mode 100755 index 0000000..b60649e --- /dev/null +++ b/figures/schemes/ordinary_least_squares.svg @@ -0,0 +1,988 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/figures/schemes/ordinary_least_squares.tex b/figures/schemes/ordinary_least_squares.tex new file mode 100755 index 0000000..39190b2 --- /dev/null +++ b/figures/schemes/ordinary_least_squares.tex @@ -0,0 +1,45 @@ +\documentclass[margin=0.5cm]{standalone} +\usepackage{tikz} +\usepackage{luacode} + + +\begin{document} + +\begin{tikzpicture} + % Draw axes + \draw[->] (0,0) -- (5,0); + \draw[->] (0,0) -- (0,5); + + \directlua{ + function runif(min, max) + return min + (max - min) * math.random() + end + math.randomseed(42) + x_min = 0 + x_max = 5 + error_min = -1 + error_max = 1 + beta0 = 2 + beta1 = 1/5 + x_values = {} + y_values = {} + for i=1,42 do + x = runif(x_min, x_max) + epsilon = runif(error_min, error_max) + y_hat = beta0 + beta1 * x + y = y_hat + epsilon + tex.print("\\draw[-,very thin, lightgray] ("..x..","..y_hat..") -- ("..x..","..y..") ;") + x_values[i] = x + y_values[i] = y + end + for i=1,42 do + x = x_values[i] + y = y_values[i] + tex.print("\\node[black] at ("..x..","..y..") {.};") + end + } + % Draw least square line + \draw[-,blue,thick] (0,2) -- (5,\directlua{tex.print(5*beta1+beta0)}); + % Draw square norm +\end{tikzpicture} +\end{document} \ No newline at end of file diff --git a/figures/schemes/orthogonal_projection.pdf b/figures/schemes/orthogonal_projection.pdf new file mode 100755 index 0000000..7ca63e0 --- /dev/null +++ b/figures/schemes/orthogonal_projection.pdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb63544022b5cd3c4061c7dbb8355fe805e24598591233c8623c00339d0336fc +size 25097 diff --git a/figures/schemes/orthogonal_projection.tex b/figures/schemes/orthogonal_projection.tex new file mode 100755 index 0000000..6135b94 --- /dev/null +++ b/figures/schemes/orthogonal_projection.tex @@ -0,0 +1,42 @@ +% ref. https://tex.stackexchange.com/a/523362/235607 +\documentclass[tikz]{standalone} +\usepackage{tikz-3dplot} +\usepackage{tkz-euclide} +\usepackage{mathtools} +\begin{document} +\tdplotsetmaincoords{50}{0} +\begin{tikzpicture}[tdplot_main_coords,bullet/.style={circle,inner + sep=1pt,fill=black,fill opacity=1}] + \begin{scope}[canvas is xy plane at z=0] + \tkzDefPoints{-2/-1/A,3/-1/B,4/2/C} + \tkzDefParallelogram(A,B,C) + \tkzGetPoint{D} + \tkzDrawPolygon[fill=gray!25!white](A,B,C,D) + + \end{scope} + % Draw the rectangle triangle scheme + \begin{scope}[canvas is xz plane at y=1] + \draw[thick,fill=white,fill opacity=0.7,nodes={opacity=1}] + (2,0) node[bullet,label=right:{$\bar{\mathbf{Y}}$}] (Y_bar) {} + -- (0,-0.5) node (B) {} + -- (0,3) node[label=above:{$\mathbf{Y}$}] (Y) {} -- cycle; + % Right angle annotation + \tkzPicRightAngle[draw, + angle eccentricity=.5,angle radius=2mm](Y,B,Y_bar) + % epsilon: Y - X \hat{\beta} curly brackets annotations + \draw[decorate,decoration={brace, + amplitude=8pt},xshift=0pt,very thin,gray] (B) -- (Y) node [black,midway,xshift=-1.25em,yshift=0em] {\color{blue}$b$}; + % X\hat{\beta} - \hat{Y} + \draw[decorate,decoration={brace, + amplitude=8pt},xshift=0pt,very thin,gray] (Y_bar) -- (B) node [black,midway,xshift=0.5em,yshift=-1em] {\color{blue}$a$}; + % + \draw[decorate,decoration={brace, + amplitude=8pt},xshift=0pt,very thin,gray] (Y) -- (Y_bar) node [black,midway,xshift=1em,yshift=1em] {\color{blue}$c$}; + \end{scope} + % Coordinate system + \begin{scope}[canvas is xy plane at z=0] + \draw[->] (2,1) -- node [above] {$\mathbf{1}$} ++(-1,0) ; + \draw[->] (2,1) -- ++(-0.45,-1) node [right] {$X_1$}; + \end{scope} +\end{tikzpicture} +\end{document} \ No newline at end of file diff --git a/figures/schemes/regression_plan_3D.pdf b/figures/schemes/regression_plan_3D.pdf new file mode 100755 index 0000000..07bffa0 --- /dev/null +++ b/figures/schemes/regression_plan_3D.pdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bec0202c19e356883e57666de7d78083f4d021f51db8a51756f60381063cc01a +size 11106 diff --git a/figures/schemes/regression_plan_3D.tex b/figures/schemes/regression_plan_3D.tex new file mode 100755 index 0000000..0ef61bf --- /dev/null +++ b/figures/schemes/regression_plan_3D.tex @@ -0,0 +1,26 @@ +\documentclass[tikz,border=3.14mm]{standalone} +\usepackage{tikz-3dplot} +\begin{document} +\tdplotsetmaincoords{105}{-30} +\usetikzlibrary{patterns} +\begin{tikzpicture}[tdplot_main_coords,font=\sffamily] + \tdplotsetrotatedcoords{00}{30}{0} + \begin{scope}[tdplot_rotated_coords] + \begin{scope}[canvas is xy plane at z=0] + \draw[fill opacity=0,pattern=north west lines,pattern color=gray] (-2,-3) rectangle (2,3); + \draw[gray,fill=lightgray,fill opacity=0.75] (-2,-3) rectangle (2,3); + \draw[very thick] (-2,0) -- (2,0); + \path (-150:2) coordinate (H) (-1.5,0) coordinate(X); + \pgflowlevelsynccm + \draw[very thick,-stealth,gray] (0,0) -- (-30:1.5); + \end{scope} + \draw[stealth-] (H) -- ++ (-1,0,0.2) node[pos=1.3]{$H$}; + \draw[stealth-] (X) -- ++ (0,1,0.2) node[pos=1.3]{$X$}; + \draw[very thick,-stealth] (0,0,0) coordinate (O) -- (0,0,3) node[right]{$p$}; + \end{scope} + \pgfmathsetmacro{\Radius}{1.5} + \draw[-stealth] (O)-- (2.5*\Radius,0,0) node[pos=1.15] {$x$}; + \draw[-stealth] (O) -- (0,3.5*\Radius,0) node[pos=1.15] {$z$}; + \draw[-stealth] (O) -- (0,0,2.5*\Radius) node[pos=1.05] {$y$}; +\end{tikzpicture} +\end{document} diff --git a/figures/schemes/vector_orthogonality.pdf b/figures/schemes/vector_orthogonality.pdf new file mode 100755 index 0000000..71ef526 --- /dev/null +++ b/figures/schemes/vector_orthogonality.pdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c560745fa56d903129700c3ddcec8ad338fe915ca05a23d1da773ce26fcc6eda +size 16486 diff --git a/figures/schemes/vector_orthogonality.tex b/figures/schemes/vector_orthogonality.tex new file mode 100755 index 0000000..bd96481 --- /dev/null +++ b/figures/schemes/vector_orthogonality.tex @@ -0,0 +1,27 @@ +\documentclass[margin=0.5cm]{standalone} +\usepackage{tikz} +\usepackage{tkz-euclide} +\usepackage{mathtools} + +\begin{document} +\begin{tikzpicture} + \coordinate (A) at (0.5, 1) {}; + \coordinate (B) at (-0.5, -1) {}; + \coordinate (C) at (1.25, -0.70) {}; + \coordinate (0) at (0, 0) {}; + + % left angle + \tkzMarkRightAngle[draw=black,size=0.1](A,0,C); + \draw[lightgray,very thin] (A) -- (C); + % Curly brace annotation for ||u-v|| + \draw[decorate,decoration={brace, + amplitude=10pt},xshift=0pt,yshift=4pt,very thin] (A) -- (C) node [black,midway,xshift=27pt,yshift=0.5em] {$\lVert u-v \rVert$}; + \draw[lightgray,very thin] (B) -- (C); + + % axis lines + \draw[->] (0) -- (A) node[above] {$u$}; + \draw[->] (0) -- (B) node[below] {$-u$}; + \draw[->] (0) -- (C) node[right] {$v$}; + +\end{tikzpicture} +\end{document} \ No newline at end of file diff --git a/glossary.tex b/glossary.tex new file mode 100755 index 0000000..e69de29 diff --git a/main.pdf b/main.pdf new file mode 100644 index 0000000..59d9113 --- /dev/null +++ b/main.pdf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94ce17f320ebc6b8d42c8b6137f8f9180990d2dd2a66b973b743f9f3419d183d +size 391515 diff --git a/main.tex b/main.tex new file mode 100755 index 0000000..5069408 --- /dev/null +++ b/main.tex @@ -0,0 +1,68 @@ +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Course -- Multivariate Statistics --- GENIOMHE --- M1 - S1 +% +% Author: Samuel Ortion +% Version: 0.1.0 +% Date: 2023 +% License: CC-By-SA 4.0+ International +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\documentclass[ + a4paper, + fontsize=10pt, + fleqn, + oneside +]{scrbook} + +\usepackage{mus} + +\titlehead{GENIOMHE} +\title{Multivariate\newline{}Statistics} +\author{Samuel Ortion} +\teacher{Cyril Dalmasso} +\cursus{GENIOMHE} +\university{Université Paris-Saclay, Université d'Évry val d'Essonne} +\semester{M1 - S1} +\date{Fall 2023} + +\definecolor{myblue}{HTML}{5654fa} +\colorlet{primary}{myblue} + +\hypersetup{ + pdftitle={Course - Multivariate Statistics}, + pdfauthor={Samuel Ortion}, + pdfsubject={}, + pdfkeywords={}, + pdfcreator={LaTeX} +} + +\addbibresource{references} + +\usepackage[ + type={CC}, + modifier={by-sa}, + version={4.0}, +]{doclicense} + +\input{preamble} +\input{glossary} +\input{definitions} + + +\makeindex% +\makeglossary% +\begin{document} + +\maketitlefullpage + +\tableofcontents + +\doclicenseThis% + +\input{content/introduction} + +\input{content/chapters/include} + +\input{content/conclusion} + +\end{document} diff --git a/preamble.tex b/preamble.tex new file mode 100755 index 0000000..a29dcd9 --- /dev/null +++ b/preamble.tex @@ -0,0 +1,7 @@ +\usepackage{pgffor} +\usetikzlibrary{math} +\usepackage{standalone} +\usepackage{tikz-3dplot} +\usepackage{tkz-euclide} +\usepackage{nicematrix} +\usepackage{luacode} diff --git a/references.bib b/references.bib new file mode 100755 index 0000000..e69de29 diff --git a/scripts/matrix_product.lua b/scripts/matrix_product.lua new file mode 100755 index 0000000..21ba5ed --- /dev/null +++ b/scripts/matrix_product.lua @@ -0,0 +1,57 @@ +local function matrix_product_repr(m1, m2) + if #m1[1] ~= #m2 then -- inner matrix-dimensions must agree + return nil + end + + local res = {} + + for i = 1, #m1 do + res[i] = {} + for j = 1, #m2[1] do + res[i][j] = " " + for k = 1, #m2 do + if k ~= 1 then + res[i][j] = res[i][j] .. " + " + end + res[i][j] = res[i][j] .. m1[i][k] .. " " .. m2[k][j] + end + end + end + return res +end + +local function dump_matrix(matrix) + local repr = "" + for i, row in ipairs(matrix) do + for j, cell in ipairs(row) do + repr = repr .. " " .. cell + if j ~= #row then + repr = repr .. " & " + end + end + if i ~= #matrix then + repr = repr .. [[ \\ ]] + end + repr = repr .. "\n" + end + return repr +end + +local m1 = { + {"a", "b", "c", "d"}, + {"e", "f", "g", "h"}, + {"i", "j", "k", "l"} +} +local m2 = { + {"x_1"}, + {"x_2"}, + {"x_3"}, + {"x_4"} +} + +print(dump_matrix(matrix_product_repr(m1, m2))) + +return { + matrix_product_repr = matrix_product_repr, + dump_matrix = dump_matrix +} \ No newline at end of file