diff --git a/content/chapters/include.tex b/content/chapters/include.tex index 07f5699..53c3f72 100755 --- a/content/chapters/include.tex +++ b/content/chapters/include.tex @@ -11,8 +11,8 @@ } } -\includechapters{part1}{1} +\includechapters{part1}{4} \includechapters{part2}{2} -% \includechapters{part3}{1} \ No newline at end of file +% \includechapters{part3}{1} diff --git a/content/chapters/part1/3.tex b/content/chapters/part1/3.tex index cc758aa..a780778 100755 --- a/content/chapters/part1/3.tex +++ b/content/chapters/part1/3.tex @@ -1,15 +1,15 @@ \chapter{Tests Reminders} -\section{$\chi^2$ test of independence} +\section{\texorpdfstring{$\chi^2$}{chi2} test of independence} -\section{$\chi^2$ test of goodness of fit} +\section{\texorpdfstring{$\chi^2$}{chi2} test of goodness of fit} Check if the observations is in adequation with a particular distribution. \begin{example}[Mendel experiments] Let $AB$, $Ab$, $aB$, $ab$ be the four possible genotypes of peas: colors and grain shape. - \begin{tabular} + \begin{tabular}{cccc} \toprule AB & Ab & aB & ab \\ \midrule @@ -20,6 +20,6 @@ Check if the observations is in adequation with a particular distribution. The test statistics is: \[ - D_{k,n} = \sum_{i=1}^{k} \frac{(N_i - np_i)^2}{np_i} \underoverset{H_0}{\mathcal{L}} \chi^2_{(n-1)(q-1)??} + D_{k,n} = \sum_{i=1}^{k} \frac{(N_i - np_i)^2}{np_i} \overunderset{\mathcal{L}}{H_0}{n \longrightarrow \infty} \chi^2_{(n-1)(q-1)??} \] diff --git a/content/chapters/part1/4.tex b/content/chapters/part1/4.tex index e69de29..6881527 100644 --- a/content/chapters/part1/4.tex +++ b/content/chapters/part1/4.tex @@ -0,0 +1,125 @@ +\chapter{Regularized regressions} + + +Let $\Y$ be a vector of observations and $\X$ a matrix of dimension $n \times (p+1)$. +Suppose the real model is: +\[ + \Y = \X^{m^{*}} \beta^{m^{*}} + \varepsilon^{m^{*}} = \X^{*} \beta^{*} + \varepsilon^{*}. +\] +if $p$ is large compared to $n$: +\begin{itemize} + \item $\hat{\beta} = (\X^{T}\X)^{-1} \X^{T} \Y$ is not defined as $\X^{T}\X$ is not invertible. + + $m^{*}$ is the number of true predictors, that is, the number of predictor with non-zero values. + + \item + + \item +\end{itemize} + +\section{Ridge regression} + +Instead of minimizing the mean square error, we want to minimize the following regularize expression: +\[ + \hat{\beta}^{\text{ridge}}_{\lambda} = \argmin_{\beta \in \RR[p]} \norm{Y - X \beta}^{2} \lambda \sum_{j=1}^{p} \beta_{j}^{2} +\] +it is a way to favor the solution with small values for parameters. +where $\lambda$ is used to callibrate the regularization. +\[ + \sum_{j=1}^{p} \beta_{j}^{2} = \norm{\beta_{j}}^{2} +\] +is the classical square norm of the vector. + + +\section{Cross validation} + +\subsection{Leave-one-out \textit{jackknife}} + +\begin{example} + Let $\M_{0}$ be the model $Y_{i} = \beta_{0} + \beta_{1} X_{1i} + \beta_{2}X_{2i} + \beta_{3} X_{3i}$ + + The model will be: + \[ + \begin{pmatrix} + y_{1} \\ + y_{2} \\ + y_{3} \\ + y_{4} \\ + y_{5} + \end{pmatrix} = + \beta_{0} + \beta_{1} \begin{pmatrix} + x_{11} \\ + x_{12} \\ + x_{13} \\ + x_{14} \\ + x_{15} + \end{pmatrix} + + \beta_{2} \begin{pmatrix} + x_{21} \\ + x_{22} \\ + x_{23} \\ + x_{24} \\ + x_{25} + \end{pmatrix} + + + \beta_{3} \begin{pmatrix} + x_{31} \\ + x_{32} \\ + x_{33} \\ + x_{34} \\ + x_{35} + \end{pmatrix} + \] + \def\x{$\times$} + \begin{tabular}{ccccc} + \toprule + 1 & 2 & 3 & 4 & 5 \\ + \midrule + . & \x & \x & \x & \x \\ + \x & . & \x & \x & \x \\ + \x & \x & . & \x & \x \\ + \x & \x & \x & . & \x \\ + \x & \x & \x & \x & . \\ + \bottomrule + \end{tabular} +\end{example} + +We perform computation of $\lambda$ for each dataset without one observation. + + +\subsection{K-fold cross-validation} + +We will have as many tables as subsets. + + +We chose lambda such that the generalization error is the smallest. + +\section{Lasso regression} + +The difference with the Ridge regression lies in the penalty: + +\[ + \hat{\beta}_{\lambda}^{\text{lasso}}= \argmin \norm{Y-X\beta}^{2} + \sum_{j=1}^{p} \abs{\beta_{j}} +\] + +$\sum_{j=1}^{p} \abs{\beta_j} = \norm{\beta}_1$ + +Instead of having a smooth increasing for each parameters, each parameters will enter iteratively in the model. Some parameters can be set to 0. + +Lasso regression can be used to perform variable selection. + + +We can use the same methods (K-fold and Leave-one-out) to select the $\lambda$ value. + +\section{Elastic Net} + +Combination of the Ridge and Lasso regression: + +\[ + \hat{\beta}_\lambda^{en} = \argmin \norm{Y-X\beta}^{2} + \lambda_{1} \norm{\beta}_{1} + \lambda_{2} \norm{\beta}_{2}^{2} +\] + + +\begin{remark} + In the case of Lasso, Elastic net or Ridge regression, we can no longer perform statistical test on the parameters. +\end{remark} diff --git a/definitions.tex b/definitions.tex index b69346b..866ecb6 100755 --- a/definitions.tex +++ b/definitions.tex @@ -6,5 +6,7 @@ \newcommand{\X}{\ensuremath{\mathbf{X}}} \newcommand{\Y}{\ensuremath{\mathbf{Y}}} \newcommand{\Z}{\ensuremath{\mathbf{Z}}} +\DeclareMathOperator*{\argmax}{arg\,max} +\DeclareMathOperator*{\argmin}{arg\,min} \usepackage{unicode-math} diff --git a/main.pdf b/main.pdf index f6e5d5f..9015570 100644 Binary files a/main.pdf and b/main.pdf differ diff --git a/preamble.tex b/preamble.tex index 4aed155..a29dcd9 100755 --- a/preamble.tex +++ b/preamble.tex @@ -4,4 +4,4 @@ \usepackage{tikz-3dplot} \usepackage{tkz-euclide} \usepackage{nicematrix} -\usepackage{luacode} \ No newline at end of file +\usepackage{luacode}