feat: Addendum on cross validation

2023-11-10 15:16:12 +01:00 · 2023-11-10 15:16:12 +01:00 · 7bcd3f9289
parent c552aa24f4
commit 7bcd3f9289
6 changed files with 134 additions and 7 deletions
--- a/content/chapters/include.tex
+++ b/content/chapters/include.tex
@ -11,7 +11,7 @@
 		}
 }
-\includechapters{part1}{1}
+\includechapters{part1}{4}
 \includechapters{part2}{2}
--- a/content/chapters/part1/3.tex
+++ b/content/chapters/part1/3.tex
@ -1,15 +1,15 @@
 \chapter{Tests Reminders}
-\section{$\chi^2$ test of independence}
+\section{\texorpdfstring{$\chi^2$}{chi2} test of independence}
-\section{$\chi^2$ test of goodness of fit}
+\section{\texorpdfstring{$\chi^2$}{chi2} test of goodness of fit}
 Check if the observations is in adequation with a particular distribution.
 \begin{example}[Mendel experiments]
 	Let $AB$, $Ab$, $aB$, $ab$ be the four possible genotypes of peas: colors and grain shape.
-	\begin{tabular}
+	\begin{tabular}{cccc}
 		\toprule
 		AB  & Ab  & aB  & ab \\
 		\midrule
@ -20,6 +20,6 @@ Check if the observations is in adequation with a particular distribution.
 The test statistics is:
 \[
-	D_{k,n} = \sum_{i=1}^{k} \frac{(N_i - np_i)^2}{np_i} \underoverset{H_0}{\mathcal{L}} \chi^2_{(n-1)(q-1)??}
+	D_{k,n} = \sum_{i=1}^{k} \frac{(N_i - np_i)^2}{np_i} \overunderset{\mathcal{L}}{H_0}{n \longrightarrow \infty} \chi^2_{(n-1)(q-1)??}
 \]
--- a/content/chapters/part1/4.tex
+++ b/content/chapters/part1/4.tex
@ -0,0 +1,125 @@
 \chapter{Regularized regressions}
 Let $\Y$ be a vector of observations and $\X$ a matrix of dimension $n \times (p+1)$.
 Suppose the real model is:
 \[
 	\Y = \X^{m^{*}} \beta^{m^{*}} + \varepsilon^{m^{*}} = \X^{*} \beta^{*} + \varepsilon^{*}.
 \]
 if $p$ is large compared to $n$:
 \begin{itemize}
 	\item $\hat{\beta} = (\X^{T}\X)^{-1} \X^{T} \Y$ is not defined as $\X^{T}\X$ is not invertible.
 	      $m^{*}$ is the number of true predictors, that is, the number of predictor with non-zero values.
 	\item
 	\item
 \end{itemize}
 \section{Ridge regression}
 Instead of minimizing the mean square error, we want to minimize the following regularize expression:
 \[
 	\hat{\beta}^{\text{ridge}}_{\lambda} = \argmin_{\beta \in \RR[p]} \norm{Y - X \beta}^{2} \lambda \sum_{j=1}^{p} \beta_{j}^{2}
 \]
 it is a way to favor the solution with small values for parameters.
 where $\lambda$ is used to callibrate the regularization.
 \[
 	\sum_{j=1}^{p} \beta_{j}^{2} = \norm{\beta_{j}}^{2}
 \]
 is the classical square norm of the vector.
 \section{Cross validation}
 \subsection{Leave-one-out \textit{jackknife}}
 \begin{example}
 	Let $\M_{0}$ be the model $Y_{i} = \beta_{0} + \beta_{1} X_{1i} + \beta_{2}X_{2i} + \beta_{3} X_{3i}$
 	The model will be:
 	\[
 		\begin{pmatrix}
 			y_{1} \\
 			y_{2} \\
 			y_{3} \\
 			y_{4} \\
 			y_{5}
 		\end{pmatrix} =
 		\beta_{0} + \beta_{1} \begin{pmatrix}
 			x_{11} \\
 			x_{12} \\
 			x_{13} \\
 			x_{14} \\
 			x_{15}
 		\end{pmatrix}
 		+ \beta_{2} \begin{pmatrix}
 			x_{21} \\
 			x_{22} \\
 			x_{23} \\
 			x_{24} \\
 			x_{25}
 		\end{pmatrix}
 		+
 		\beta_{3} \begin{pmatrix}
 			x_{31} \\
 			x_{32} \\
 			x_{33} \\
 			x_{34} \\
 			x_{35}
 		\end{pmatrix}
 	\]
 	\def\x{$\times$}
 	\begin{tabular}{ccccc}
 		\toprule
 		1  & 2  & 3  & 4  & 5  \\
 		\midrule
 		.  & \x & \x & \x & \x \\
 		\x & .  & \x & \x & \x \\
 		\x & \x & .  & \x & \x \\
 		\x & \x & \x & .  & \x \\
 		\x & \x & \x & \x & .  \\
 		\bottomrule
 	\end{tabular}
 \end{example}
 We perform computation of $\lambda$ for each dataset without one observation.
 \subsection{K-fold cross-validation}
 We will have as many tables as subsets.
 We chose lambda such that the generalization error is the smallest.
 \section{Lasso regression}
 The difference with the Ridge regression lies in the penalty:
 \[
 	\hat{\beta}_{\lambda}^{\text{lasso}}= \argmin \norm{Y-X\beta}^{2} + \sum_{j=1}^{p} \abs{\beta_{j}}
 \]
 $\sum_{j=1}^{p} \abs{\beta_j} = \norm{\beta}_1$
 Instead of having a smooth increasing for each parameters, each parameters will enter iteratively in the model. Some parameters can be set to 0.
 Lasso regression can be used to perform variable selection.
 We can use the same methods (K-fold and Leave-one-out) to select the $\lambda$ value.
 \section{Elastic Net}
 Combination of the Ridge and Lasso regression:
 \[
 	\hat{\beta}_\lambda^{en} = \argmin \norm{Y-X\beta}^{2} + \lambda_{1} \norm{\beta}_{1} + \lambda_{2} \norm{\beta}_{2}^{2}
 \]
 \begin{remark}
 	In the case of Lasso, Elastic net or Ridge regression, we can no longer perform statistical test on the parameters.
 \end{remark}
--- a/definitions.tex
+++ b/definitions.tex
@ -6,5 +6,7 @@
 \newcommand{\X}{\ensuremath{\mathbf{X}}}
 \newcommand{\Y}{\ensuremath{\mathbf{Y}}}
 \newcommand{\Z}{\ensuremath{\mathbf{Z}}}
 \DeclareMathOperator*{\argmax}{arg\,max}
 \DeclareMathOperator*{\argmin}{arg\,min}
 \usepackage{unicode-math}
--- a/main.pdf
+++ b/main.pdf