multivariate-statistics/content/chapters/part1/1.tex

113 lines
3.1 KiB
TeX

\section{Generalized Linear Model}
\[
g(\EE(Y)) = X \beta
\]
with $g$ being
\begin{itemize}
\item Logistic regression: $g(v) = \log \left(\frac{v}{1-v}\right)$, for instance for boolean values,
\item Poission regression: $g(v) = \log(v)$, for instance for discrete variables.
\end{itemize}
\subsection{Penalized Regression}
When the number of variables is large, e.g, when the number of explicative variable is above the number of observations, if $p >> n$ ($p$: the number of explicative variable, $n$ is the number of observations), we cannot estimate the parameters.
In order to estimate the parameters, we can use penalties (additional terms).
Lasso regression, Elastic Net, etc.
\subsection{Simple Linear Model}
\begin{align*}
\Y &= \X & \beta & + & \varepsilon.\\
n \times 1 & n \times 2 & 2 \times 1 & + & n \times 1 \\
\begin{pmatrix}
Y_1 \\
Y_2 \\
\vdots \\
Y_n
\end{pmatrix}
&= \begin{pmatrix}
1 & X_1 \\
1 & X_2 \\
\vdots & \vdots \\
1 & X_n
\end{pmatrix}
& \begin{pmatrix}
\beta_0 \\
\beta_1
\end{pmatrix}
& + &
\begin{pmatrix}
\varepsilon_1 \\
\varepsilon_2 \\
\vdots \\
\varepsilon_n
\end{pmatrix}
\end{align*}
\subsection{Assumptions}
\begin{itemize}
\item
\end{itemize}
\subsection{Statistical Analysis Workflow}
\begin{enumerate}[label={\bfseries\color{primary}Step \arabic*.}]
\item Graphical representation;
\item ...
\end{enumerate}
\section{Parameter Estimation}
\subsection{Simple Linear Regression}
\subsection{General Case}
If $\X^\T\X$ is invertible, the OLS estimator is:
\begin{equation}
\hat{\beta} = (\X^\T\X)^{-1} \X^\T \Y
\end{equation}
\subsection{Ordinary Least Square Algorithm}
We want to minimize the distance between $\X\beta$ and $\Y$:
\[
\min \norm{\Y - \X\beta}^2
\]
(See \autoref{ch:elements-of-linear-algebra}).
\begin{align*}
\Rightarrow& \X \beta = proj^{(1, \X)} \Y\\
\Rightarrow& \forall v \in w,\, vy = v proj^w(y)\\
\Rightarrow& \forall i: \\
& \X_i \Y = \X_i X\hat{\beta} \qquad \text{where $\hat{\beta}$ is the estimator of $\beta$} \\
\Rightarrow& \X^\T \Y = \X^\T \X \hat{\beta} \\
\Rightarrow& {\color{red}(\X^T \X)^{-1}} \X^\T \Y = {\color{red}(\X^T \X)^{-1}} (\X^T\X) \hat{\beta} \\
\Rightarrow& \hat{\beta} = (X^\T\X)^{-1} \X^\T \Y
\end{align*}
This formula comes from the orthogonal projection of $\Y$ on the subspace define by the explicative variables $\X$
$\X \hat{\beta}$ is the closest point to $\Y$ in the subspace generated by $\X$.
If $H$ is the projection matrix of the subspace generated by $\X$, $X\Y$ is the projection on $\Y$ on this subspace, that corresponds to $\X\hat{\beta}$.
\section{Coefficient of Determination: $R^2$}
\begin{definition}[$R^2$]
\[
0 \leq R^2 = \frac{\norm{\X\hat{\beta} - \bar{\Y}\One}^2}{\norm{\Y - \bar{\Y}\One}^2} = 1 - \frac{\norm{\Y - \X\hat{\beta}}^2}{\norm{\Y - \bar{\Y}\One}^2} \leq 1
\] proportion of variation of $\Y$ explicated by the model.
\end{definition}