multivariate-statistics/content/chapters/part1/1.tex

\chapter{Linear Model}

\section{Simple Linear Regression}

\[
    Y_i = \beta_0 + \beta_1 X_i  + \varepsilon_i
\]
\[
    \Y = \X \beta + \varepsilon.
\]
\[
    \begin{pmatrix}
        Y_1 \\
        Y_2 \\
        \vdots \\
        Y_n
    \end{pmatrix}
    = 
    \begin{pmatrix}
        1 & X_1 \\
        1 & X_2 \\
        \vdots & \vdots \\
        1 & X_n 
    \end{pmatrix}
    \begin{pmatrix}
        \beta_0 \\
        \beta_1
    \end{pmatrix}
    +
    \begin{pmatrix}
        \varepsilon_1 \\
        \varepsilon_2 \\
        \vdots
        \varepsilon_n
    \end{pmatrix}
\]

\paragraph*{Assumptions}
\begin{enumerate}[label={\color{primary}{($A_\arabic*$)}}]
    \item $\varepsilon_i$ are independent;
    \item $\varepsilon_i$ are identically distributed;
    \item $\varepsilon_i$ are i.i.d $\sim \Norm(0, \sigma^2)$ (homoscedasticity).
\end{enumerate}

\section{Generalized Linear Model}

\[
    g(\EE(Y)) = X \beta
\]
with $g$ being
\begin{itemize}
    \item Logistic regression: $g(v) = \log \left(\frac{v}{1-v}\right)$, for instance for boolean values,
    \item Poisson regression: $g(v) = \log(v)$, for instance for discrete variables. 
\end{itemize}

\subsection{Penalized Regression}

When the number of variables is large, e.g, when the number of explanatory variable is above the number of observations, if $p >> n$ ($p$: the number of explanatory variable, $n$ is the number of observations), we cannot estimate the parameters.
In order to estimate the parameters, we can use penalties (additional terms).

Lasso regression, Elastic Net, etc.

\subsection{Statistical Analysis Workflow}

\begin{enumerate}[label={\bfseries\color{primary}Step \arabic*.}]
    \item Graphical representation;
    \item ...
\end{enumerate}
\[
    Y = X \beta + \varepsilon,
\]
is noted equivalently as
\[
        \begin{pmatrix}
            y_1 \\
            y_2 \\
            y_3 \\
            y_4
        \end{pmatrix}
        = \begin{pmatrix}
                1 & x_{11} & x_{12} \\
                1 & x_{21} & x_{22} \\
                1 & x_{31} & x_{32} \\
                1 & x_{41} & x_{42}
            \end{pmatrix}
        \begin{pmatrix}
            \beta_0 \\
            \beta_1 \\
            \beta_2
        \end{pmatrix} +
        \begin{pmatrix}
            \varepsilon_1 \\
            \varepsilon_2 \\
            \varepsilon_3 \\
            \varepsilon_4
        \end{pmatrix}.
\]
\section{Parameter Estimation}

\subsection{Simple Linear Regression}

\subsection{General Case}

If $\X^T\X$ is invertible, the OLS estimator is:
\begin{equation}
\hat{\beta} = (\X^T\X)^{-1} \X^T \Y
\end{equation}

\subsection{Ordinary Least Square Algorithm}

We want to minimize the distance between $\X\beta$ and $\Y$:
\[
    \min \norm{\Y - \X\beta}^2
\]
(See \autoref{ch:elements-of-linear-algebra}).
\begin{align*}
    \Rightarrow& \X \beta = proj^{(1, \X)} \Y\\
    \Rightarrow& \forall v \in w,\, vy = v proj^w(y)\\
    \Rightarrow& \forall i: \\
    & \X_i \Y = \X_i X\hat{\beta} \qquad \text{where $\hat{\beta}$ is the estimator of $\beta$} \\
    \Rightarrow& \X^T \Y = \X^T \X \hat{\beta} \\
    \Rightarrow& {\color{gray}(\X^T \X)^{-1}} \X^T \Y = {\color{gray}(\X^T \X)^{-1}} (\X^T\X) \hat{\beta} \\
    \Rightarrow& \hat{\beta} = (\X^T\X)^{-1} \X^T \Y
\end{align*}

This formula comes from the orthogonal projection of $\Y$ on the vector subspace defined by the explanatory variables $\X$

$\X \hat{\beta}$ is the closest point to $\Y$ in the subspace generated by $\X$.

If $H$ is the projection matrix of the subspace generated by $\X$, $X\Y$ is the projection on $\Y$ on this subspace, that corresponds to $\X\hat{\beta}$.

\section{Sum of squares}

$\Y - \X \hat{\beta} \perp \X \hat{\beta} - \Y \One$ if $\One \in V$, so
\[
    \underbrace{\norm{\Y - \bar{\Y}\One}}_{\text{Total SS}} = \underbrace{\norm{\Y - \X \hat{\beta}}^2}_{\text{Residual SS}} + \underbrace{\norm{\X \hat{\beta} - \bar{\Y} \One}^2}_{\text{Explicated SS}}
\]

\section{Coefficient of Determination: \texorpdfstring{$R^2$}{R\textsuperscript{2}}}
\begin{definition}[$R^2$]
    \[
        0 \leq R^2 = \frac{\norm{\X\hat{\beta} - \bar{\Y}\One}^2}{\norm{\Y - \bar{\Y}\One}^2} = 1 - \frac{\norm{\Y - \X\hat{\beta}}^2}{\norm{\Y - \bar{\Y}\One}^2} \leq 1
    \] proportion of variation of $\Y$ explained by the model.
\end{definition}

\begin{figure}
    \centering
    \includestandalone{figures/schemes/orthogonal_projection}
    \caption{Orthogonal projection of $\Y$ on plan generated by the base described by $\X$. $\color{blue}a$ corresponds to $\norm{\X\hat{\beta} - \bar{\Y}}^2$ and $\color{blue}b$ corresponds to $\hat{\varepsilon} = \norm{\Y - \hat{\beta}\X}^2$} and $\color{blue}c$ corresponds to $\norm{Y - \bar{Y}}^2$.
    \label{fig:scheme-orthogonal-projection}
\end{figure}

\begin{figure}
    \centering
    \includestandalone{figures/schemes/ordinary_least_squares}
    \caption{Ordinary least squares and regression line with simulated data.}
    \label{fig:ordinary-least-squares}
\end{figure}


\begin{definition}[Model dimension]
    Let $\M$ be a model.
    The dimension of $\M$ is the dimension of the subspace generated by $\X$, that is the number of parameters in the $\beta$ vector.

    \textit{Nb.} The dimension of the model is not the number of parameter, as $\sigma^2$ is one of the model parameters.
\end{definition}

\section{Gaussian vectors}


\begin{definition}[Normal distribution]

\end{definition}


\begin{definition}[Gaussian vector]
    A random vector $\Y \in \RR[n]$ is a gaussian vector if every linear combination of its component is ...
\end{definition}

\begin{property}
    $m = \EE(Y) = (m_1, \ldots, m_n)^T$, where $m_i = \EE(Y_i)$


    ...

    \[
        \Y \sim \Norm_n(m, \Sigma)
    \]
    where $\Sigma$ is the variance-covariance matrix!
    \[
        \Sigma = \E\left[(\Y -m)(\Y - m)^T\right].
    \]


\end{property}

\begin{remark}
    \[
        \Cov(Y_i, Y_i) = \Var(Y_i)
    \]
\end{remark}

\begin{definition}[Covariance]
    \[
        \Cov(Y_i, Y_j) = \EE\left((Y_i-\EE(Y_j))(Y_j-\EE(Y_j))\right)
    \]
\end{definition}


When two variable are linked, the covariance is large.

If two variables $X, Y$ are independent, $\Cov(X, Y) = 0$.

\begin{definition}[Correlation coefficient]
    \[
        \Cor(Y_i, Y_j) = \frac{\EE\left((Y_i-\EE(Y_j))(Y_j-\EE(Y_j))\right)}{\sqrt{\EE(Y_i - \EE(Y_i)) \cdot \EE(Y_j - \EE(Y_j))}}
    \]
\end{definition}

Covariance is really sensitive to scale of variables. For instance, if we measure distance in millimeters, the covariance would be larger than in the case of a measure expressed in metters. Thus the correlation coefficient, which is a sort of normalized covariance is useful, to be able to compare the values.

\begin{remark}
    \begin{align*}
        \Cov(Y_i, Y_i) &= \EE((Y_i - \EE(Y_i)) (Y_i - \EE(Y_i))) \\
        &= \EE((Y_i - \EE(Y_i))^2) \\
        &= \Var(Y_i)
    \end{align*}
\end{remark}

\begin{equation}
    \Sigma = \begin{pNiceMatrix}
        \VVar(Y_1) & & & &\\
        & \Ddots & & & \\
        &  \Cov(Y_i, Y_j) & \VVar(Y_i) & & \\
        & & & \Ddots & \\
        & & & & \VVar(Y_n) 
    \end{pNiceMatrix}
\end{equation}

\begin{definition}[Identity matrix]
    \[
        \mathcal{I}_n = \begin{pNiceMatrix}
            1 & 0 & 0  \\
            0 & \Ddots & 0\\
            0 & 0 & 1
        \end{pNiceMatrix}
    \]
    
\end{definition}


\begin{theorem}[Cochran Theorem (Consequence)]
    Let $\mathbf{Z}$ be a gaussian vector: $\mathbf{Z} \sim \Norm_n(0_n, I_n)$.

    \begin{itemize}
        \item If $V_1, V_n$ are orthogonal subspaces of $\RR[n]$ with dimensions $n_1, n_2$ such that
        \[
            \RR[n] = V_1 \overset{\perp}{\oplus} V_2.
        \]
        \item If $Z_1, Z_2$ are orthogonal of $\mathbf{Z}$ on $V_1$ and $V_2$ i.e. $Z_1 = \Pi_{V_1}(\mathbf{Z}) = \Pi_1 \Y$ and $Z_2 = \Pi_{V_2} (\mathbf{Z}) = \Pi_2 \Y$... 
        (\textcolor{red}{look to the slides})
    \end{itemize}
\end{theorem}

\begin{definition}[Chi 2 distribution]
    If $X_1, \ldots, X_n$ i.i.d. $\sim \Norm(0, 1)$, then;,
    \[
        X_1^2 + \ldots X_n^2 \sim \chi_n^2
    \]
\end{definition}

\subsection{Estimator's properties}


\[
    \Pi_V = \X(\X^T\X)^{-1} \X^T
\]

\begin{align*}
    \hat{m} &= \X \hat{\beta} = \X(\X^T\X)^{-1} \X^T \Y \\
    \text{so} \\
    &= \Pi_V \Y 
\end{align*}

According to Cochran theorem, we can deduce that the estimator of the predicted value $\hat{m}$ is independent $\hat{\sigma}^2$

All the sum of squares follows a $\chi^2$ distribution:
\[
   ...
\]

\begin{property}

\end{property}

\subsection{Estimators consistency}

If $q < n$,
\begin{itemize}
    \item $\hat{\sigma}^2 \overunderset{\PP}{n\to\infty} \sigma^{*2}$.
    \item If $(\X^T\X)^{-1}$...
    \item ...
\end{itemize}

We can derive statistical test from these properties.


\section{Statistical tests}

\subsection{Student $t$-test}


\[
    \frac{\hat{\theta}-\theta}{\sqrt{\frac{\widehat{\VVar}(\hat{\theta})}{n}}} \underset{H_0}{\sim} t
\]

where
feat: Add lua script for matrix product representation 2023-09-27 08:40:51 +02:00			`\chapter{Linear Model}`
cm1: Base introduction and some elements of linear algebra 2023-09-22 17:32:56 +02:00
feat: Add lua script for matrix product representation 2023-09-27 08:40:51 +02:00			`\section{Simple Linear Regression}`
cm1: Base introduction and some elements of linear algebra 2023-09-22 17:32:56 +02:00
			`\[`
feat: Add lua script for matrix product representation 2023-09-27 08:40:51 +02:00			`Y_i = \beta_0 + \beta_1 X_i + \varepsilon_i`
cm1: Base introduction and some elements of linear algebra 2023-09-22 17:32:56 +02:00			`\]`
feat: Add lua script for matrix product representation 2023-09-27 08:40:51 +02:00			`\[`
			`\Y = \X \beta + \varepsilon.`
			`\]`
			`\[`
cm1: Base introduction and some elements of linear algebra 2023-09-22 17:32:56 +02:00			`\begin{pmatrix}`
			`Y_1 \\`
			`Y_2 \\`
			`\vdots \\`
			`Y_n`
			`\end{pmatrix}`
feat: Add lua script for matrix product representation 2023-09-27 08:40:51 +02:00			`=`
			`\begin{pmatrix}`
cm1: Base introduction and some elements of linear algebra 2023-09-22 17:32:56 +02:00			`1 & X_1 \\`
			`1 & X_2 \\`
			`\vdots & \vdots \\`
feat: Add lua script for matrix product representation 2023-09-27 08:40:51 +02:00			`1 & X_n`
cm1: Base introduction and some elements of linear algebra 2023-09-22 17:32:56 +02:00			`\end{pmatrix}`
feat: Add some basic (but probably uncomplete) schemes 2023-09-22 23:48:46 +02:00			`\begin{pmatrix}`
cm1: Base introduction and some elements of linear algebra 2023-09-22 17:32:56 +02:00			`\beta_0 \\`
			`\beta_1`
			`\end{pmatrix}`
feat: Add lua script for matrix product representation 2023-09-27 08:40:51 +02:00			`+`
cm1: Base introduction and some elements of linear algebra 2023-09-22 17:32:56 +02:00			`\begin{pmatrix}`
			`\varepsilon_1 \\`
			`\varepsilon_2 \\`
feat: Add lua script for matrix product representation 2023-09-27 08:40:51 +02:00			`\vdots`
cm1: Base introduction and some elements of linear algebra 2023-09-22 17:32:56 +02:00			`\varepsilon_n`
			`\end{pmatrix}`
feat: Add lua script for matrix product representation 2023-09-27 08:40:51 +02:00			`\]`
cm1: Base introduction and some elements of linear algebra 2023-09-22 17:32:56 +02:00
feat: Add lua script for matrix product representation 2023-09-27 08:40:51 +02:00			`\paragraph*{Assumptions}`
			`\begin{enumerate}[label={\color{primary}{($A_\arabic*$)}}]`
			`\item $\varepsilon_i$ are independent;`
			`\item $\varepsilon_i$ are identically distributed;`
			`\item $\varepsilon_i$ are i.i.d $\sim \Norm(0, \sigma^2)$ (homoscedasticity).`
			`\end{enumerate}`

			`\section{Generalized Linear Model}`
cm1: Base introduction and some elements of linear algebra 2023-09-22 17:32:56 +02:00
feat: Add lua script for matrix product representation 2023-09-27 08:40:51 +02:00			`\[`
			`g(\EE(Y)) = X \beta`
			`\]`
			`with $g$ being`
cm1: Base introduction and some elements of linear algebra 2023-09-22 17:32:56 +02:00			`\begin{itemize}`
feat: Add lua script for matrix product representation 2023-09-27 08:40:51 +02:00			`\item Logistic regression: $g(v) = \log \left(\frac{v}{1-v}\right)$, for instance for boolean values,`
			`\item Poisson regression: $g(v) = \log(v)$, for instance for discrete variables.`
cm1: Base introduction and some elements of linear algebra 2023-09-22 17:32:56 +02:00			`\end{itemize}`

feat: Add lua script for matrix product representation 2023-09-27 08:40:51 +02:00			`\subsection{Penalized Regression}`

			`When the number of variables is large, e.g, when the number of explanatory variable is above the number of observations, if $p >> n$ ($p$: the number of explanatory variable, $n$ is the number of observations), we cannot estimate the parameters.`
			`In order to estimate the parameters, we can use penalties (additional terms).`

			`Lasso regression, Elastic Net, etc.`
cm1: Base introduction and some elements of linear algebra 2023-09-22 17:32:56 +02:00
			`\subsection{Statistical Analysis Workflow}`

			`\begin{enumerate}[label={\bfseries\color{primary}Step \arabic*.}]`
			`\item Graphical representation;`
			`\item ...`
			`\end{enumerate}`
feat: Add some basic (but probably uncomplete) schemes 2023-09-22 23:48:46 +02:00			`\[`
			`Y = X \beta + \varepsilon,`
			`\]`
			`is noted equivalently as`
			`\[`
			`\begin{pmatrix}`
			`y_1 \\`
			`y_2 \\`
			`y_3 \\`
			`y_4`
			`\end{pmatrix}`
			`= \begin{pmatrix}`
			`1 & x_{11} & x_{12} \\`
			`1 & x_{21} & x_{22} \\`
			`1 & x_{31} & x_{32} \\`
			`1 & x_{41} & x_{42}`
			`\end{pmatrix}`
			`\begin{pmatrix}`
			`\beta_0 \\`
			`\beta_1 \\`
			`\beta_2`
			`\end{pmatrix} +`
			`\begin{pmatrix}`
			`\varepsilon_1 \\`
			`\varepsilon_2 \\`
			`\varepsilon_3 \\`
			`\varepsilon_4`
			`\end{pmatrix}.`
			`\]`
cm1: Base introduction and some elements of linear algebra 2023-09-22 17:32:56 +02:00			`\section{Parameter Estimation}`

			`\subsection{Simple Linear Regression}`

			`\subsection{General Case}`

feat: Add lua script for matrix product representation 2023-09-27 08:40:51 +02:00			`If $\X^T\X$ is invertible, the OLS estimator is:`
cm1: Base introduction and some elements of linear algebra 2023-09-22 17:32:56 +02:00			`\begin{equation}`
feat: Add lua script for matrix product representation 2023-09-27 08:40:51 +02:00			`\hat{\beta} = (\X^T\X)^{-1} \X^T \Y`
cm1: Base introduction and some elements of linear algebra 2023-09-22 17:32:56 +02:00			`\end{equation}`

			`\subsection{Ordinary Least Square Algorithm}`

			`We want to minimize the distance between $\X\beta$ and $\Y$:`
			`\[`
			`\min \norm{\Y - \X\beta}^2`
			`\]`
			`(See \autoref{ch:elements-of-linear-algebra}).`
			`\begin{align*}`
			`\Rightarrow& \X \beta = proj^{(1, \X)} \Y\\`
			`\Rightarrow& \forall v \in w,\, vy = v proj^w(y)\\`
			`\Rightarrow& \forall i: \\`
			`& \X_i \Y = \X_i X\hat{\beta} \qquad \text{where $\hat{\beta}$ is the estimator of $\beta$} \\`
feat: Add lua script for matrix product representation 2023-09-27 08:40:51 +02:00			`\Rightarrow& \X^T \Y = \X^T \X \hat{\beta} \\`
			`\Rightarrow& {\color{gray}(\X^T \X)^{-1}} \X^T \Y = {\color{gray}(\X^T \X)^{-1}} (\X^T\X) \hat{\beta} \\`
			`\Rightarrow& \hat{\beta} = (\X^T\X)^{-1} \X^T \Y`
cm1: Base introduction and some elements of linear algebra 2023-09-22 17:32:56 +02:00			`\end{align*}`

feat: Add lua script for matrix product representation 2023-09-27 08:40:51 +02:00			`This formula comes from the orthogonal projection of $\Y$ on the vector subspace defined by the explanatory variables $\X$`
cm1: Base introduction and some elements of linear algebra 2023-09-22 17:32:56 +02:00
			`$\X \hat{\beta}$ is the closest point to $\Y$ in the subspace generated by $\X$.`

			`If $H$ is the projection matrix of the subspace generated by $\X$, $X\Y$ is the projection on $\Y$ on this subspace, that corresponds to $\X\hat{\beta}$.`

fix: Amend the orthogonal projection scheme 2023-09-30 07:03:23 +02:00			`\section{Sum of squares}`

			`$\Y - \X \hat{\beta} \perp \X \hat{\beta} - \Y \One$ if $\One \in V$, so`
			`\[`
			`\underbrace{\norm{\Y - \bar{\Y}\One}}_{\text{Total SS}} = \underbrace{\norm{\Y - \X \hat{\beta}}^2}_{\text{Residual SS}} + \underbrace{\norm{\X \hat{\beta} - \bar{\Y} \One}^2}_{\text{Explicated SS}}`
			`\]`

feat: Add some basic (but probably uncomplete) schemes 2023-09-22 23:48:46 +02:00			`\section{Coefficient of Determination: \texorpdfstring{$R^2$}{R\textsuperscript{2}}}`
cm1: Base introduction and some elements of linear algebra 2023-09-22 17:32:56 +02:00			`\begin{definition}[$R^2$]`
			`\[`
			`0 \leq R^2 = \frac{\norm{\X\hat{\beta} - \bar{\Y}\One}^2}{\norm{\Y - \bar{\Y}\One}^2} = 1 - \frac{\norm{\Y - \X\hat{\beta}}^2}{\norm{\Y - \bar{\Y}\One}^2} \leq 1`
feat: Add some basic (but probably uncomplete) schemes 2023-09-22 23:48:46 +02:00			`\] proportion of variation of $\Y$ explained by the model.`
cm1: Base introduction and some elements of linear algebra 2023-09-22 17:32:56 +02:00			`\end{definition}`
feat: Add some basic (but probably uncomplete) schemes 2023-09-22 23:48:46 +02:00
			`\begin{figure}`
			`\centering`
			`\includestandalone{figures/schemes/orthogonal_projection}`
fix: Amend the orthogonal projection scheme 2023-09-30 07:03:23 +02:00			`\caption{Orthogonal projection of $\Y$ on plan generated by the base described by $\X$. $\color{blue}a$ corresponds to $\norm{\X\hat{\beta} - \bar{\Y}}^2$ and $\color{blue}b$ corresponds to $\hat{\varepsilon} = \norm{\Y - \hat{\beta}\X}^2$} and $\color{blue}c$ corresponds to $\norm{Y - \bar{Y}}^2$.`
feat: Add some basic (but probably uncomplete) schemes 2023-09-22 23:48:46 +02:00			`\label{fig:scheme-orthogonal-projection}`
feat: Add ordinary least square figure 2023-09-23 09:00:51 +02:00			`\end{figure}`

			`\begin{figure}`
			`\centering`
			`\includestandalone{figures/schemes/ordinary_least_squares}`
			`\caption{Ordinary least squares and regression line with simulated data.}`
			`\label{fig:ordinary-least-squares}`
			`\end{figure}`
fix: Amend the orthogonal projection scheme 2023-09-30 07:03:23 +02:00



			`\begin{definition}[Model dimension]`
			`Let $\M$ be a model.`
			`The dimension of $\M$ is the dimension of the subspace generated by $\X$, that is the number of parameters in the $\beta$ vector.`

			`\textit{Nb.} The dimension of the model is not the number of parameter, as $\sigma^2$ is one of the model parameters.`
			`\end{definition}`

			`\section{Gaussian vectors}`


			`\begin{definition}[Normal distribution]`

			`\end{definition}`


			`\begin{definition}[Gaussian vector]`
			`A random vector $\Y \in \RR[n]$ is a gaussian vector if every linear combination of its component is ...`
			`\end{definition}`

			`\begin{property}`
			`$m = \EE(Y) = (m_1, \ldots, m_n)^T$, where $m_i = \EE(Y_i)$`


			`...`

			`\[`
			`\Y \sim \Norm_n(m, \Sigma)`
			`\]`
			`where $\Sigma$ is the variance-covariance matrix!`
			`\[`
			`\Sigma = \E\left[(\Y -m)(\Y - m)^T\right].`
			`\]`



			`\end{property}`

			`\begin{remark}`
			`\[`
			`\Cov(Y_i, Y_i) = \Var(Y_i)`
			`\]`
			`\end{remark}`

			`\begin{definition}[Covariance]`
			`\[`
			`\Cov(Y_i, Y_j) = \EE\left((Y_i-\EE(Y_j))(Y_j-\EE(Y_j))\right)`
			`\]`
			`\end{definition}`


			`When two variable are linked, the covariance is large.`

			`If two variables $X, Y$ are independent, $\Cov(X, Y) = 0$.`

			`\begin{definition}[Correlation coefficient]`
			`\[`
			`\Cor(Y_i, Y_j) = \frac{\EE\left((Y_i-\EE(Y_j))(Y_j-\EE(Y_j))\right)}{\sqrt{\EE(Y_i - \EE(Y_i)) \cdot \EE(Y_j - \EE(Y_j))}}`
			`\]`
			`\end{definition}`

			`Covariance is really sensitive to scale of variables. For instance, if we measure distance in millimeters, the covariance would be larger than in the case of a measure expressed in metters. Thus the correlation coefficient, which is a sort of normalized covariance is useful, to be able to compare the values.`

			`\begin{remark}`
			`\begin{align*}`
			`\Cov(Y_i, Y_i) &= \EE((Y_i - \EE(Y_i)) (Y_i - \EE(Y_i))) \\`
			`&= \EE((Y_i - \EE(Y_i))^2) \\`
			`&= \Var(Y_i)`
			`\end{align*}`
			`\end{remark}`

			`\begin{equation}`
			`\Sigma = \begin{pNiceMatrix}`
			`\VVar(Y_1) & & & &\\`
			`& \Ddots & & & \\`
			`& \Cov(Y_i, Y_j) & \VVar(Y_i) & & \\`
			`& & & \Ddots & \\`
			`& & & & \VVar(Y_n)`
			`\end{pNiceMatrix}`
			`\end{equation}`

			`\begin{definition}[Identity matrix]`
			`\[`
			`\mathcal{I}_n = \begin{pNiceMatrix}`
			`1 & 0 & 0 \\`
			`0 & \Ddots & 0\\`
			`0 & 0 & 1`
			`\end{pNiceMatrix}`
			`\]`

			`\end{definition}`


			`\begin{theorem}[Cochran Theorem (Consequence)]`
			`Let $\mathbf{Z}$ be a gaussian vector: $\mathbf{Z} \sim \Norm_n(0_n, I_n)$.`

			`\begin{itemize}`
			`\item If $V_1, V_n$ are orthogonal subspaces of $\RR[n]$ with dimensions $n_1, n_2$ such that`
			`\[`
			`\RR[n] = V_1 \overset{\perp}{\oplus} V_2.`
			`\]`
			`\item If $Z_1, Z_2$ are orthogonal of $\mathbf{Z}$ on $V_1$ and $V_2$ i.e. $Z_1 = \Pi_{V_1}(\mathbf{Z}) = \Pi_1 \Y$ and $Z_2 = \Pi_{V_2} (\mathbf{Z}) = \Pi_2 \Y$...`
			`(\textcolor{red}{look to the slides})`
			`\end{itemize}`
			`\end{theorem}`

			`\begin{definition}[Chi 2 distribution]`
			`If $X_1, \ldots, X_n$ i.i.d. $\sim \Norm(0, 1)$, then;,`
			`\[`
			`X_1^2 + \ldots X_n^2 \sim \chi_n^2`
			`\]`
			`\end{definition}`

			`\subsection{Estimator's properties}`


			`\[`
			`\Pi_V = \X(\X^T\X)^{-1} \X^T`
			`\]`

			`\begin{align*}`
			`\hat{m} &= \X \hat{\beta} = \X(\X^T\X)^{-1} \X^T \Y \\`
			`\text{so} \\`
			`&= \Pi_V \Y`
			`\end{align*}`

			`According to Cochran theorem, we can deduce that the estimator of the predicted value $\hat{m}$ is independent $\hat{\sigma}^2$`

			`All the sum of squares follows a $\chi^2$ distribution:`
			`\[`
			`...`
			`\]`

			`\begin{property}`

			`\end{property}`

			`\subsection{Estimators consistency}`

			`If $q < n$,`
			`\begin{itemize}`
			`\item $\hat{\sigma}^2 \overunderset{\PP}{n\to\infty} \sigma^{*2}$.`
			`\item If $(\X^T\X)^{-1}$...`
			`\item ...`
			`\end{itemize}`

			`We can derive statistical test from these properties.`


			`\section{Statistical tests}`

			`\subsection{Student $t$-test}`


			`\[`
			`\frac{\hat{\theta}-\theta}{\sqrt{\frac{\widehat{\VVar}(\hat{\theta})}{n}}} \underset{H_0}{\sim} t`
			`\]`

			`where`