152 lines
4.5 KiB
TeX
152 lines
4.5 KiB
TeX
\chapter{Linear Model}
|
|
|
|
\section{Simple Linear Regression}
|
|
|
|
\[
|
|
Y_i = \beta_0 + \beta_1 X_i + \varepsilon_i
|
|
\]
|
|
\[
|
|
\Y = \X \beta + \varepsilon.
|
|
\]
|
|
\[
|
|
\begin{pmatrix}
|
|
Y_1 \\
|
|
Y_2 \\
|
|
\vdots \\
|
|
Y_n
|
|
\end{pmatrix}
|
|
=
|
|
\begin{pmatrix}
|
|
1 & X_1 \\
|
|
1 & X_2 \\
|
|
\vdots & \vdots \\
|
|
1 & X_n
|
|
\end{pmatrix}
|
|
\begin{pmatrix}
|
|
\beta_0 \\
|
|
\beta_1
|
|
\end{pmatrix}
|
|
+
|
|
\begin{pmatrix}
|
|
\varepsilon_1 \\
|
|
\varepsilon_2 \\
|
|
\vdots
|
|
\varepsilon_n
|
|
\end{pmatrix}
|
|
\]
|
|
|
|
\paragraph*{Assumptions}
|
|
\begin{enumerate}[label={\color{primary}{($A_\arabic*$)}}]
|
|
\item $\varepsilon_i$ are independent;
|
|
\item $\varepsilon_i$ are identically distributed;
|
|
\item $\varepsilon_i$ are i.i.d $\sim \Norm(0, \sigma^2)$ (homoscedasticity).
|
|
\end{enumerate}
|
|
|
|
\section{Generalized Linear Model}
|
|
|
|
\[
|
|
g(\EE(Y)) = X \beta
|
|
\]
|
|
with $g$ being
|
|
\begin{itemize}
|
|
\item Logistic regression: $g(v) = \log \left(\frac{v}{1-v}\right)$, for instance for boolean values,
|
|
\item Poisson regression: $g(v) = \log(v)$, for instance for discrete variables.
|
|
\end{itemize}
|
|
|
|
\subsection{Penalized Regression}
|
|
|
|
When the number of variables is large, e.g, when the number of explanatory variable is above the number of observations, if $p >> n$ ($p$: the number of explanatory variable, $n$ is the number of observations), we cannot estimate the parameters.
|
|
In order to estimate the parameters, we can use penalties (additional terms).
|
|
|
|
Lasso regression, Elastic Net, etc.
|
|
|
|
\subsection{Statistical Analysis Workflow}
|
|
|
|
\begin{enumerate}[label={\bfseries\color{primary}Step \arabic*.}]
|
|
\item Graphical representation;
|
|
\item ...
|
|
\end{enumerate}
|
|
\[
|
|
Y = X \beta + \varepsilon,
|
|
\]
|
|
is noted equivalently as
|
|
\[
|
|
\begin{pmatrix}
|
|
y_1 \\
|
|
y_2 \\
|
|
y_3 \\
|
|
y_4
|
|
\end{pmatrix}
|
|
= \begin{pmatrix}
|
|
1 & x_{11} & x_{12} \\
|
|
1 & x_{21} & x_{22} \\
|
|
1 & x_{31} & x_{32} \\
|
|
1 & x_{41} & x_{42}
|
|
\end{pmatrix}
|
|
\begin{pmatrix}
|
|
\beta_0 \\
|
|
\beta_1 \\
|
|
\beta_2
|
|
\end{pmatrix} +
|
|
\begin{pmatrix}
|
|
\varepsilon_1 \\
|
|
\varepsilon_2 \\
|
|
\varepsilon_3 \\
|
|
\varepsilon_4
|
|
\end{pmatrix}.
|
|
\]
|
|
\section{Parameter Estimation}
|
|
|
|
\subsection{Simple Linear Regression}
|
|
|
|
\subsection{General Case}
|
|
|
|
If $\X^T\X$ is invertible, the OLS estimator is:
|
|
\begin{equation}
|
|
\hat{\beta} = (\X^T\X)^{-1} \X^T \Y
|
|
\end{equation}
|
|
|
|
\subsection{Ordinary Least Square Algorithm}
|
|
|
|
We want to minimize the distance between $\X\beta$ and $\Y$:
|
|
\[
|
|
\min \norm{\Y - \X\beta}^2
|
|
\]
|
|
(See \autoref{ch:elements-of-linear-algebra}).
|
|
\begin{align*}
|
|
\Rightarrow& \X \beta = proj^{(1, \X)} \Y\\
|
|
\Rightarrow& \forall v \in w,\, vy = v proj^w(y)\\
|
|
\Rightarrow& \forall i: \\
|
|
& \X_i \Y = \X_i X\hat{\beta} \qquad \text{where $\hat{\beta}$ is the estimator of $\beta$} \\
|
|
\Rightarrow& \X^T \Y = \X^T \X \hat{\beta} \\
|
|
\Rightarrow& {\color{gray}(\X^T \X)^{-1}} \X^T \Y = {\color{gray}(\X^T \X)^{-1}} (\X^T\X) \hat{\beta} \\
|
|
\Rightarrow& \hat{\beta} = (\X^T\X)^{-1} \X^T \Y
|
|
\end{align*}
|
|
|
|
This formula comes from the orthogonal projection of $\Y$ on the vector subspace defined by the explanatory variables $\X$
|
|
|
|
$\X \hat{\beta}$ is the closest point to $\Y$ in the subspace generated by $\X$.
|
|
|
|
If $H$ is the projection matrix of the subspace generated by $\X$, $X\Y$ is the projection on $\Y$ on this subspace, that corresponds to $\X\hat{\beta}$.
|
|
|
|
\section{Coefficient of Determination: \texorpdfstring{$R^2$}{R\textsuperscript{2}}}
|
|
\begin{definition}[$R^2$]
|
|
\[
|
|
0 \leq R^2 = \frac{\norm{\X\hat{\beta} - \bar{\Y}\One}^2}{\norm{\Y - \bar{\Y}\One}^2} = 1 - \frac{\norm{\Y - \X\hat{\beta}}^2}{\norm{\Y - \bar{\Y}\One}^2} \leq 1
|
|
\] proportion of variation of $\Y$ explained by the model.
|
|
\end{definition}
|
|
|
|
\begin{figure}
|
|
\centering
|
|
\includestandalone{figures/schemes/orthogonal_projection}
|
|
\caption{Orthogonal projection of $\Y$ on plan generated by the base described by $\X$. $\color{blue}a$ corresponds to $\norm{\X\hat{\beta} - \bar{\Y}}^2$ and $\color{blue}b$ corresponds to $\norm{\Y - \hat{\beta}\X}^2$}
|
|
\label{fig:scheme-orthogonal-projection}
|
|
\end{figure}
|
|
|
|
\begin{figure}
|
|
\centering
|
|
\includestandalone{figures/schemes/ordinary_least_squares}
|
|
\caption{Ordinary least squares and regression line with simulated data.}
|
|
\label{fig:ordinary-least-squares}
|
|
\end{figure}
|