feat: Addendum on cross validation
This commit is contained in:
parent
c552aa24f4
commit
7bcd3f9289
|
@ -11,7 +11,7 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
\includechapters{part1}{1}
|
\includechapters{part1}{4}
|
||||||
|
|
||||||
\includechapters{part2}{2}
|
\includechapters{part2}{2}
|
||||||
|
|
||||||
|
|
|
@ -1,15 +1,15 @@
|
||||||
\chapter{Tests Reminders}
|
\chapter{Tests Reminders}
|
||||||
|
|
||||||
\section{$\chi^2$ test of independence}
|
\section{\texorpdfstring{$\chi^2$}{chi2} test of independence}
|
||||||
|
|
||||||
|
|
||||||
\section{$\chi^2$ test of goodness of fit}
|
\section{\texorpdfstring{$\chi^2$}{chi2} test of goodness of fit}
|
||||||
|
|
||||||
Check if the observations is in adequation with a particular distribution.
|
Check if the observations is in adequation with a particular distribution.
|
||||||
|
|
||||||
\begin{example}[Mendel experiments]
|
\begin{example}[Mendel experiments]
|
||||||
Let $AB$, $Ab$, $aB$, $ab$ be the four possible genotypes of peas: colors and grain shape.
|
Let $AB$, $Ab$, $aB$, $ab$ be the four possible genotypes of peas: colors and grain shape.
|
||||||
\begin{tabular}
|
\begin{tabular}{cccc}
|
||||||
\toprule
|
\toprule
|
||||||
AB & Ab & aB & ab \\
|
AB & Ab & aB & ab \\
|
||||||
\midrule
|
\midrule
|
||||||
|
@ -20,6 +20,6 @@ Check if the observations is in adequation with a particular distribution.
|
||||||
|
|
||||||
The test statistics is:
|
The test statistics is:
|
||||||
\[
|
\[
|
||||||
D_{k,n} = \sum_{i=1}^{k} \frac{(N_i - np_i)^2}{np_i} \underoverset{H_0}{\mathcal{L}} \chi^2_{(n-1)(q-1)??}
|
D_{k,n} = \sum_{i=1}^{k} \frac{(N_i - np_i)^2}{np_i} \overunderset{\mathcal{L}}{H_0}{n \longrightarrow \infty} \chi^2_{(n-1)(q-1)??}
|
||||||
\]
|
\]
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,125 @@
|
||||||
|
\chapter{Regularized regressions}
|
||||||
|
|
||||||
|
|
||||||
|
Let $\Y$ be a vector of observations and $\X$ a matrix of dimension $n \times (p+1)$.
|
||||||
|
Suppose the real model is:
|
||||||
|
\[
|
||||||
|
\Y = \X^{m^{*}} \beta^{m^{*}} + \varepsilon^{m^{*}} = \X^{*} \beta^{*} + \varepsilon^{*}.
|
||||||
|
\]
|
||||||
|
if $p$ is large compared to $n$:
|
||||||
|
\begin{itemize}
|
||||||
|
\item $\hat{\beta} = (\X^{T}\X)^{-1} \X^{T} \Y$ is not defined as $\X^{T}\X$ is not invertible.
|
||||||
|
|
||||||
|
$m^{*}$ is the number of true predictors, that is, the number of predictor with non-zero values.
|
||||||
|
|
||||||
|
\item
|
||||||
|
|
||||||
|
\item
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\section{Ridge regression}
|
||||||
|
|
||||||
|
Instead of minimizing the mean square error, we want to minimize the following regularize expression:
|
||||||
|
\[
|
||||||
|
\hat{\beta}^{\text{ridge}}_{\lambda} = \argmin_{\beta \in \RR[p]} \norm{Y - X \beta}^{2} \lambda \sum_{j=1}^{p} \beta_{j}^{2}
|
||||||
|
\]
|
||||||
|
it is a way to favor the solution with small values for parameters.
|
||||||
|
where $\lambda$ is used to callibrate the regularization.
|
||||||
|
\[
|
||||||
|
\sum_{j=1}^{p} \beta_{j}^{2} = \norm{\beta_{j}}^{2}
|
||||||
|
\]
|
||||||
|
is the classical square norm of the vector.
|
||||||
|
|
||||||
|
|
||||||
|
\section{Cross validation}
|
||||||
|
|
||||||
|
\subsection{Leave-one-out \textit{jackknife}}
|
||||||
|
|
||||||
|
\begin{example}
|
||||||
|
Let $\M_{0}$ be the model $Y_{i} = \beta_{0} + \beta_{1} X_{1i} + \beta_{2}X_{2i} + \beta_{3} X_{3i}$
|
||||||
|
|
||||||
|
The model will be:
|
||||||
|
\[
|
||||||
|
\begin{pmatrix}
|
||||||
|
y_{1} \\
|
||||||
|
y_{2} \\
|
||||||
|
y_{3} \\
|
||||||
|
y_{4} \\
|
||||||
|
y_{5}
|
||||||
|
\end{pmatrix} =
|
||||||
|
\beta_{0} + \beta_{1} \begin{pmatrix}
|
||||||
|
x_{11} \\
|
||||||
|
x_{12} \\
|
||||||
|
x_{13} \\
|
||||||
|
x_{14} \\
|
||||||
|
x_{15}
|
||||||
|
\end{pmatrix}
|
||||||
|
+ \beta_{2} \begin{pmatrix}
|
||||||
|
x_{21} \\
|
||||||
|
x_{22} \\
|
||||||
|
x_{23} \\
|
||||||
|
x_{24} \\
|
||||||
|
x_{25}
|
||||||
|
\end{pmatrix}
|
||||||
|
+
|
||||||
|
\beta_{3} \begin{pmatrix}
|
||||||
|
x_{31} \\
|
||||||
|
x_{32} \\
|
||||||
|
x_{33} \\
|
||||||
|
x_{34} \\
|
||||||
|
x_{35}
|
||||||
|
\end{pmatrix}
|
||||||
|
\]
|
||||||
|
\def\x{$\times$}
|
||||||
|
\begin{tabular}{ccccc}
|
||||||
|
\toprule
|
||||||
|
1 & 2 & 3 & 4 & 5 \\
|
||||||
|
\midrule
|
||||||
|
. & \x & \x & \x & \x \\
|
||||||
|
\x & . & \x & \x & \x \\
|
||||||
|
\x & \x & . & \x & \x \\
|
||||||
|
\x & \x & \x & . & \x \\
|
||||||
|
\x & \x & \x & \x & . \\
|
||||||
|
\bottomrule
|
||||||
|
\end{tabular}
|
||||||
|
\end{example}
|
||||||
|
|
||||||
|
We perform computation of $\lambda$ for each dataset without one observation.
|
||||||
|
|
||||||
|
|
||||||
|
\subsection{K-fold cross-validation}
|
||||||
|
|
||||||
|
We will have as many tables as subsets.
|
||||||
|
|
||||||
|
|
||||||
|
We chose lambda such that the generalization error is the smallest.
|
||||||
|
|
||||||
|
\section{Lasso regression}
|
||||||
|
|
||||||
|
The difference with the Ridge regression lies in the penalty:
|
||||||
|
|
||||||
|
\[
|
||||||
|
\hat{\beta}_{\lambda}^{\text{lasso}}= \argmin \norm{Y-X\beta}^{2} + \sum_{j=1}^{p} \abs{\beta_{j}}
|
||||||
|
\]
|
||||||
|
|
||||||
|
$\sum_{j=1}^{p} \abs{\beta_j} = \norm{\beta}_1$
|
||||||
|
|
||||||
|
Instead of having a smooth increasing for each parameters, each parameters will enter iteratively in the model. Some parameters can be set to 0.
|
||||||
|
|
||||||
|
Lasso regression can be used to perform variable selection.
|
||||||
|
|
||||||
|
|
||||||
|
We can use the same methods (K-fold and Leave-one-out) to select the $\lambda$ value.
|
||||||
|
|
||||||
|
\section{Elastic Net}
|
||||||
|
|
||||||
|
Combination of the Ridge and Lasso regression:
|
||||||
|
|
||||||
|
\[
|
||||||
|
\hat{\beta}_\lambda^{en} = \argmin \norm{Y-X\beta}^{2} + \lambda_{1} \norm{\beta}_{1} + \lambda_{2} \norm{\beta}_{2}^{2}
|
||||||
|
\]
|
||||||
|
|
||||||
|
|
||||||
|
\begin{remark}
|
||||||
|
In the case of Lasso, Elastic net or Ridge regression, we can no longer perform statistical test on the parameters.
|
||||||
|
\end{remark}
|
|
@ -6,5 +6,7 @@
|
||||||
\newcommand{\X}{\ensuremath{\mathbf{X}}}
|
\newcommand{\X}{\ensuremath{\mathbf{X}}}
|
||||||
\newcommand{\Y}{\ensuremath{\mathbf{Y}}}
|
\newcommand{\Y}{\ensuremath{\mathbf{Y}}}
|
||||||
\newcommand{\Z}{\ensuremath{\mathbf{Z}}}
|
\newcommand{\Z}{\ensuremath{\mathbf{Z}}}
|
||||||
|
\DeclareMathOperator*{\argmax}{arg\,max}
|
||||||
|
\DeclareMathOperator*{\argmin}{arg\,min}
|
||||||
\usepackage{unicode-math}
|
\usepackage{unicode-math}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue