feat: Addendum on cross validation

2023-11-10 15:16:12 +01:00 · 2023-11-10 15:16:12 +01:00 · 7bcd3f9289
parent c552aa24f4
commit 7bcd3f9289
6 changed files with 134 additions and 7 deletions
--- a/content/chapters/include.tex
+++ b/content/chapters/include.tex
@ -11,8 +11,8 @@
 		}
 }

-\includechapters{part1}{1}
+\includechapters{part1}{4}

 \includechapters{part2}{2}

-% \includechapters{part3}{1}
+% \includechapters{part3}{1}
--- a/content/chapters/part1/3.tex
+++ b/content/chapters/part1/3.tex
@ -1,15 +1,15 @@
 \chapter{Tests Reminders}

-\section{$\chi^2$ test of independence}
+\section{\texorpdfstring{$\chi^2$}{chi2} test of independence}


-\section{$\chi^2$ test of goodness of fit}
+\section{\texorpdfstring{$\chi^2$}{chi2} test of goodness of fit}

 Check if the observations is in adequation with a particular distribution.

 \begin{example}[Mendel experiments]
 	Let $AB$, $Ab$, $aB$, $ab$ be the four possible genotypes of peas: colors and grain shape.
-	\begin{tabular}
+	\begin{tabular}{cccc}
 		\toprule
 		AB  & Ab  & aB  & ab \\
 		\midrule
@ -20,6 +20,6 @@ Check if the observations is in adequation with a particular distribution.

 The test statistics is:
 \[
-	D_{k,n} = \sum_{i=1}^{k} \frac{(N_i - np_i)^2}{np_i} \underoverset{H_0}{\mathcal{L}} \chi^2_{(n-1)(q-1)??}
+	D_{k,n} = \sum_{i=1}^{k} \frac{(N_i - np_i)^2}{np_i} \overunderset{\mathcal{L}}{H_0}{n \longrightarrow \infty} \chi^2_{(n-1)(q-1)??}
 \]

--- a/content/chapters/part1/4.tex
+++ b/content/chapters/part1/4.tex
@ -0,0 +1,125 @@
+\chapter{Regularized regressions}
+
+
+Let $\Y$ be a vector of observations and $\X$ a matrix of dimension $n \times (p+1)$.
+Suppose the real model is:
+\[
+	\Y = \X^{m^{*}} \beta^{m^{*}} + \varepsilon^{m^{*}} = \X^{*} \beta^{*} + \varepsilon^{*}.
+\]
+if $p$ is large compared to $n$:
+\begin{itemize}
+	\item $\hat{\beta} = (\X^{T}\X)^{-1} \X^{T} \Y$ is not defined as $\X^{T}\X$ is not invertible.
+
+	      $m^{*}$ is the number of true predictors, that is, the number of predictor with non-zero values.
+
+	\item
+
+	\item
+\end{itemize}
+
+\section{Ridge regression}
+
+Instead of minimizing the mean square error, we want to minimize the following regularize expression:
+\[
+	\hat{\beta}^{\text{ridge}}_{\lambda} = \argmin_{\beta \in \RR[p]} \norm{Y - X \beta}^{2} \lambda \sum_{j=1}^{p} \beta_{j}^{2}
+\]
+it is a way to favor the solution with small values for parameters.
+where $\lambda$ is used to callibrate the regularization.
+\[
+	\sum_{j=1}^{p} \beta_{j}^{2} = \norm{\beta_{j}}^{2}
+\]
+is the classical square norm of the vector.
+
+
+\section{Cross validation}
+
+\subsection{Leave-one-out \textit{jackknife}}
+
+\begin{example}
+	Let $\M_{0}$ be the model $Y_{i} = \beta_{0} + \beta_{1} X_{1i} + \beta_{2}X_{2i} + \beta_{3} X_{3i}$
+
+	The model will be:
+	\[
+		\begin{pmatrix}
+			y_{1} \\
+			y_{2} \\
+			y_{3} \\
+			y_{4} \\
+			y_{5}
+		\end{pmatrix} =
+		\beta_{0} + \beta_{1} \begin{pmatrix}
+			x_{11} \\
+			x_{12} \\
+			x_{13} \\
+			x_{14} \\
+			x_{15}
+		\end{pmatrix}
+		+ \beta_{2} \begin{pmatrix}
+			x_{21} \\
+			x_{22} \\
+			x_{23} \\
+			x_{24} \\
+			x_{25}
+		\end{pmatrix}
+		+
+		\beta_{3} \begin{pmatrix}
+			x_{31} \\
+			x_{32} \\
+			x_{33} \\
+			x_{34} \\
+			x_{35}
+		\end{pmatrix}
+	\]
+	\def\x{$\times$}
+	\begin{tabular}{ccccc}
+		\toprule
+		1  & 2  & 3  & 4  & 5  \\
+		\midrule
+		.  & \x & \x & \x & \x \\
+		\x & .  & \x & \x & \x \\
+		\x & \x & .  & \x & \x \\
+		\x & \x & \x & .  & \x \\
+		\x & \x & \x & \x & .  \\
+		\bottomrule
+	\end{tabular}
+\end{example}
+
+We perform computation of $\lambda$ for each dataset without one observation.
+
+
+\subsection{K-fold cross-validation}
+
+We will have as many tables as subsets.
+
+
+We chose lambda such that the generalization error is the smallest.
+
+\section{Lasso regression}
+
+The difference with the Ridge regression lies in the penalty:
+
+\[
+	\hat{\beta}_{\lambda}^{\text{lasso}}= \argmin \norm{Y-X\beta}^{2} + \sum_{j=1}^{p} \abs{\beta_{j}}
+\]
+
+$\sum_{j=1}^{p} \abs{\beta_j} = \norm{\beta}_1$
+
+Instead of having a smooth increasing for each parameters, each parameters will enter iteratively in the model. Some parameters can be set to 0.
+
+Lasso regression can be used to perform variable selection.
+
+
+We can use the same methods (K-fold and Leave-one-out) to select the $\lambda$ value.
+
+\section{Elastic Net}
+
+Combination of the Ridge and Lasso regression:
+
+\[
+	\hat{\beta}_\lambda^{en} = \argmin \norm{Y-X\beta}^{2} + \lambda_{1} \norm{\beta}_{1} + \lambda_{2} \norm{\beta}_{2}^{2}
+\]
+
+
+\begin{remark}
+	In the case of Lasso, Elastic net or Ridge regression, we can no longer perform statistical test on the parameters.
+\end{remark}
--- a/definitions.tex
+++ b/definitions.tex
@ -6,5 +6,7 @@
 \newcommand{\X}{\ensuremath{\mathbf{X}}}
 \newcommand{\Y}{\ensuremath{\mathbf{Y}}}
 \newcommand{\Z}{\ensuremath{\mathbf{Z}}}
+\DeclareMathOperator*{\argmax}{arg\,max}
+\DeclareMathOperator*{\argmin}{arg\,min}
 \usepackage{unicode-math}

--- a/main.pdf
+++ b/main.pdf
--- a/preamble.tex
+++ b/preamble.tex
@ -4,4 +4,4 @@
 \usepackage{tikz-3dplot}
 \usepackage{tkz-euclide}
 \usepackage{nicematrix}
-\usepackage{luacode}
+\usepackage{luacode}