multivariate-statistics/content/chapters/part1/1.tex



\section{Generalized Linear Model}

\[
    g(\EE(Y)) = X \beta
\]
with $g$ being
\begin{itemize}
    \item Logistic regression: $g(v) = \log \left(\frac{v}{1-v}\right)$, for instance for boolean values,
    \item Poission regression: $g(v) = \log(v)$, for instance for discrete variables. 
\end{itemize}

\subsection{Penalized Regression}

When the number of variables is large, e.g, when the number of explanatory variable is above the number of observations, if $p >> n$ ($p$: the number of explanatory variable, $n$ is the number of observations), we cannot estimate the parameters.
In order to estimate the parameters, we can use penalties (additional terms).

Lasso regression, Elastic Net, etc.

\subsection{Simple Linear Model}

\begin{align*}
    \Y &= \X \beta + \varepsilon \\
    \begin{pmatrix}
        Y_1 \\
        Y_2 \\
        \vdots \\
        Y_n
    \end{pmatrix}
    &= \begin{pmatrix}
        1 & X_1 \\
        1 & X_2 \\
        \vdots & \vdots \\
        1 & X_n
    \end{pmatrix}
    \begin{pmatrix}
        \beta_0 \\
        \beta_1
    \end{pmatrix}
    + 
    \begin{pmatrix}
        \varepsilon_1 \\
        \varepsilon_2 \\
        \vdots \\
        \varepsilon_n
    \end{pmatrix}
\end{align*}

\subsection{Assumptions}

\begin{itemize}
    \item 
\end{itemize}


\subsection{Statistical Analysis Workflow}

\begin{enumerate}[label={\bfseries\color{primary}Step \arabic*.}]
    \item Graphical representation;
    \item ...
\end{enumerate}
\[
    Y = X \beta + \varepsilon,
\]
is noted equivalently as
\[
        \begin{pmatrix}
            y_1 \\
            y_2 \\
            y_3 \\
            y_4
        \end{pmatrix}
        = \begin{pmatrix}
                1 & x_{11} & x_{12} \\
                1 & x_{21} & x_{22} \\
                1 & x_{31} & x_{32} \\
                1 & x_{41} & x_{42}
            \end{pmatrix}
        \begin{pmatrix}
            \beta_0 \\
            \beta_1 \\
            \beta_2
        \end{pmatrix} +
        \begin{pmatrix}
            \varepsilon_1 \\
            \varepsilon_2 \\
            \varepsilon_3 \\
            \varepsilon_4
        \end{pmatrix}.
\]
\section{Parameter Estimation}

\subsection{Simple Linear Regression}

\subsection{General Case}

If $\X^\T\X$ is invertible, the OLS estimator is:
\begin{equation}
\hat{\beta} = (\X^\T\X)^{-1} \X^\T \Y
\end{equation}

\subsection{Ordinary Least Square Algorithm}

We want to minimize the distance between $\X\beta$ and $\Y$:
\[
    \min \norm{\Y - \X\beta}^2
\]
(See \autoref{ch:elements-of-linear-algebra}).
\begin{align*}
    \Rightarrow& \X \beta = proj^{(1, \X)} \Y\\
    \Rightarrow& \forall v \in w,\, vy = v proj^w(y)\\
    \Rightarrow& \forall i: \\
    & \X_i \Y = \X_i X\hat{\beta} \qquad \text{where $\hat{\beta}$ is the estimator of $\beta$} \\
    \Rightarrow& \X^\T \Y = \X^\T \X \hat{\beta} \\
    \Rightarrow& {\color{gray}(\X^\T \X)^{-1}} \X^\T \Y = {\color{gray}(\X^\T \X)^{-1}} (\X^\T\X) \hat{\beta} \\
    \Rightarrow& \hat{\beta} = (X^\T\X)^{-1} \X^\T \Y
\end{align*}

This formula comes from the orthogonal projection of $\Y$ on the subspace define by the explanatory variables $\X$

$\X \hat{\beta}$ is the closest point to $\Y$ in the subspace generated by $\X$.

If $H$ is the projection matrix of the subspace generated by $\X$, $X\Y$ is the projection on $\Y$ on this subspace, that corresponds to $\X\hat{\beta}$.

\section{Coefficient of Determination: \texorpdfstring{$R^2$}{R\textsuperscript{2}}}
\begin{definition}[$R^2$]
    \[
        0 \leq R^2 = \frac{\norm{\X\hat{\beta} - \bar{\Y}\One}^2}{\norm{\Y - \bar{\Y}\One}^2} = 1 - \frac{\norm{\Y - \X\hat{\beta}}^2}{\norm{\Y - \bar{\Y}\One}^2} \leq 1
    \] proportion of variation of $\Y$ explained by the model.
\end{definition}

\begin{figure}
    \centering
    \includestandalone{figures/schemes/orthogonal_projection}
    \caption{Orthogonal projection of $\Y$ on plan generated by the base described by $\X$. $\color{blue}a$ corresponds to $\norm{\X\hat{\beta} - \bar{\Y}}^2$ and $\color{blue}b$ corresponds to $\norm{\Y - \hat{\beta}\X}^2$}
    \label{fig:scheme-orthogonal-projection}
\end{figure}
cm1: Base introduction and some elements of linear algebra 2023-09-22 17:32:56 +02:00

			`\section{Generalized Linear Model}`

			`\[`
			`g(\EE(Y)) = X \beta`
			`\]`
			`with $g$ being`
			`\begin{itemize}`
			`\item Logistic regression: $g(v) = \log \left(\frac{v}{1-v}\right)$, for instance for boolean values,`
			`\item Poission regression: $g(v) = \log(v)$, for instance for discrete variables.`
			`\end{itemize}`

			`\subsection{Penalized Regression}`

feat: Add some basic (but probably uncomplete) schemes 2023-09-22 23:48:46 +02:00			`When the number of variables is large, e.g, when the number of explanatory variable is above the number of observations, if $p >> n$ ($p$: the number of explanatory variable, $n$ is the number of observations), we cannot estimate the parameters.`
cm1: Base introduction and some elements of linear algebra 2023-09-22 17:32:56 +02:00			`In order to estimate the parameters, we can use penalties (additional terms).`

			`Lasso regression, Elastic Net, etc.`

			`\subsection{Simple Linear Model}`

			`\begin{align*}`
feat: Add some basic (but probably uncomplete) schemes 2023-09-22 23:48:46 +02:00			`\Y &= \X \beta + \varepsilon \\`
cm1: Base introduction and some elements of linear algebra 2023-09-22 17:32:56 +02:00			`\begin{pmatrix}`
			`Y_1 \\`
			`Y_2 \\`
			`\vdots \\`
			`Y_n`
			`\end{pmatrix}`
			`&= \begin{pmatrix}`
			`1 & X_1 \\`
			`1 & X_2 \\`
			`\vdots & \vdots \\`
			`1 & X_n`
			`\end{pmatrix}`
feat: Add some basic (but probably uncomplete) schemes 2023-09-22 23:48:46 +02:00			`\begin{pmatrix}`
cm1: Base introduction and some elements of linear algebra 2023-09-22 17:32:56 +02:00			`\beta_0 \\`
			`\beta_1`
			`\end{pmatrix}`
feat: Add some basic (but probably uncomplete) schemes 2023-09-22 23:48:46 +02:00			`+`
cm1: Base introduction and some elements of linear algebra 2023-09-22 17:32:56 +02:00			`\begin{pmatrix}`
			`\varepsilon_1 \\`
			`\varepsilon_2 \\`
			`\vdots \\`
			`\varepsilon_n`
			`\end{pmatrix}`
			`\end{align*}`

			`\subsection{Assumptions}`

			`\begin{itemize}`
			`\item`
			`\end{itemize}`


			`\subsection{Statistical Analysis Workflow}`

			`\begin{enumerate}[label={\bfseries\color{primary}Step \arabic*.}]`
			`\item Graphical representation;`
			`\item ...`
			`\end{enumerate}`
feat: Add some basic (but probably uncomplete) schemes 2023-09-22 23:48:46 +02:00			`\[`
			`Y = X \beta + \varepsilon,`
			`\]`
			`is noted equivalently as`
			`\[`
			`\begin{pmatrix}`
			`y_1 \\`
			`y_2 \\`
			`y_3 \\`
			`y_4`
			`\end{pmatrix}`
			`= \begin{pmatrix}`
			`1 & x_{11} & x_{12} \\`
			`1 & x_{21} & x_{22} \\`
			`1 & x_{31} & x_{32} \\`
			`1 & x_{41} & x_{42}`
			`\end{pmatrix}`
			`\begin{pmatrix}`
			`\beta_0 \\`
			`\beta_1 \\`
			`\beta_2`
			`\end{pmatrix} +`
			`\begin{pmatrix}`
			`\varepsilon_1 \\`
			`\varepsilon_2 \\`
			`\varepsilon_3 \\`
			`\varepsilon_4`
			`\end{pmatrix}.`
			`\]`
cm1: Base introduction and some elements of linear algebra 2023-09-22 17:32:56 +02:00			`\section{Parameter Estimation}`

			`\subsection{Simple Linear Regression}`

			`\subsection{General Case}`

			`If $\X^\T\X$ is invertible, the OLS estimator is:`
			`\begin{equation}`
			`\hat{\beta} = (\X^\T\X)^{-1} \X^\T \Y`
			`\end{equation}`

			`\subsection{Ordinary Least Square Algorithm}`

			`We want to minimize the distance between $\X\beta$ and $\Y$:`
			`\[`
			`\min \norm{\Y - \X\beta}^2`
			`\]`
			`(See \autoref{ch:elements-of-linear-algebra}).`
			`\begin{align*}`
			`\Rightarrow& \X \beta = proj^{(1, \X)} \Y\\`
			`\Rightarrow& \forall v \in w,\, vy = v proj^w(y)\\`
			`\Rightarrow& \forall i: \\`
			`& \X_i \Y = \X_i X\hat{\beta} \qquad \text{where $\hat{\beta}$ is the estimator of $\beta$} \\`
			`\Rightarrow& \X^\T \Y = \X^\T \X \hat{\beta} \\`
feat: Add some basic (but probably uncomplete) schemes 2023-09-22 23:48:46 +02:00			`\Rightarrow& {\color{gray}(\X^\T \X)^{-1}} \X^\T \Y = {\color{gray}(\X^\T \X)^{-1}} (\X^\T\X) \hat{\beta} \\`
cm1: Base introduction and some elements of linear algebra 2023-09-22 17:32:56 +02:00			`\Rightarrow& \hat{\beta} = (X^\T\X)^{-1} \X^\T \Y`
			`\end{align*}`

feat: Add some basic (but probably uncomplete) schemes 2023-09-22 23:48:46 +02:00			`This formula comes from the orthogonal projection of $\Y$ on the subspace define by the explanatory variables $\X$`
cm1: Base introduction and some elements of linear algebra 2023-09-22 17:32:56 +02:00
			`$\X \hat{\beta}$ is the closest point to $\Y$ in the subspace generated by $\X$.`

			`If $H$ is the projection matrix of the subspace generated by $\X$, $X\Y$ is the projection on $\Y$ on this subspace, that corresponds to $\X\hat{\beta}$.`

feat: Add some basic (but probably uncomplete) schemes 2023-09-22 23:48:46 +02:00			`\section{Coefficient of Determination: \texorpdfstring{$R^2$}{R\textsuperscript{2}}}`
cm1: Base introduction and some elements of linear algebra 2023-09-22 17:32:56 +02:00			`\begin{definition}[$R^2$]`
			`\[`
			`0 \leq R^2 = \frac{\norm{\X\hat{\beta} - \bar{\Y}\One}^2}{\norm{\Y - \bar{\Y}\One}^2} = 1 - \frac{\norm{\Y - \X\hat{\beta}}^2}{\norm{\Y - \bar{\Y}\One}^2} \leq 1`
feat: Add some basic (but probably uncomplete) schemes 2023-09-22 23:48:46 +02:00			`\] proportion of variation of $\Y$ explained by the model.`
cm1: Base introduction and some elements of linear algebra 2023-09-22 17:32:56 +02:00			`\end{definition}`
feat: Add some basic (but probably uncomplete) schemes 2023-09-22 23:48:46 +02:00
			`\begin{figure}`
			`\centering`
			`\includestandalone{figures/schemes/orthogonal_projection}`
			`\caption{Orthogonal projection of $\Y$ on plan generated by the base described by $\X$. $\color{blue}a$ corresponds to $\norm{\X\hat{\beta} - \bar{\Y}}^2$ and $\color{blue}b$ corresponds to $\norm{\Y - \hat{\beta}\X}^2$}`
			`\label{fig:scheme-orthogonal-projection}`
			`\end{figure}`