fix: Amend the orthogonal projection scheme
This commit is contained in:
parent
b7f323419d
commit
43acae64f3
|
@ -1 +1,7 @@
|
||||||
main.pdf filter=lfs diff=lfs merge=lfs -text
|
main.pdf filter=lfs diff=lfs merge=lfs -text
|
||||||
|
figures/schemes/regression_plan_3D.pdf filter=lfs diff=lfs merge=lfs -text
|
||||||
|
figures/schemes/vector_orthogonality.pdf filter=lfs diff=lfs merge=lfs -text
|
||||||
|
figures/schemes/base_plan.pdf filter=lfs diff=lfs merge=lfs -text
|
||||||
|
figures/schemes/coordinates_systems.pdf filter=lfs diff=lfs merge=lfs -text
|
||||||
|
figures/schemes/ordinary_least_squares.pdf filter=lfs diff=lfs merge=lfs -text
|
||||||
|
figures/schemes/orthogonal_projection.pdf filter=lfs diff=lfs merge=lfs -text
|
||||||
|
|
|
@ -129,6 +129,13 @@ $\X \hat{\beta}$ is the closest point to $\Y$ in the subspace generated by $\X$.
|
||||||
|
|
||||||
If $H$ is the projection matrix of the subspace generated by $\X$, $X\Y$ is the projection on $\Y$ on this subspace, that corresponds to $\X\hat{\beta}$.
|
If $H$ is the projection matrix of the subspace generated by $\X$, $X\Y$ is the projection on $\Y$ on this subspace, that corresponds to $\X\hat{\beta}$.
|
||||||
|
|
||||||
|
\section{Sum of squares}
|
||||||
|
|
||||||
|
$\Y - \X \hat{\beta} \perp \X \hat{\beta} - \Y \One$ if $\One \in V$, so
|
||||||
|
\[
|
||||||
|
\underbrace{\norm{\Y - \bar{\Y}\One}}_{\text{Total SS}} = \underbrace{\norm{\Y - \X \hat{\beta}}^2}_{\text{Residual SS}} + \underbrace{\norm{\X \hat{\beta} - \bar{\Y} \One}^2}_{\text{Explicated SS}}
|
||||||
|
\]
|
||||||
|
|
||||||
\section{Coefficient of Determination: \texorpdfstring{$R^2$}{R\textsuperscript{2}}}
|
\section{Coefficient of Determination: \texorpdfstring{$R^2$}{R\textsuperscript{2}}}
|
||||||
\begin{definition}[$R^2$]
|
\begin{definition}[$R^2$]
|
||||||
\[
|
\[
|
||||||
|
@ -139,7 +146,7 @@ If $H$ is the projection matrix of the subspace generated by $\X$, $X\Y$ is the
|
||||||
\begin{figure}
|
\begin{figure}
|
||||||
\centering
|
\centering
|
||||||
\includestandalone{figures/schemes/orthogonal_projection}
|
\includestandalone{figures/schemes/orthogonal_projection}
|
||||||
\caption{Orthogonal projection of $\Y$ on plan generated by the base described by $\X$. $\color{blue}a$ corresponds to $\norm{\X\hat{\beta} - \bar{\Y}}^2$ and $\color{blue}b$ corresponds to $\norm{\Y - \hat{\beta}\X}^2$}
|
\caption{Orthogonal projection of $\Y$ on plan generated by the base described by $\X$. $\color{blue}a$ corresponds to $\norm{\X\hat{\beta} - \bar{\Y}}^2$ and $\color{blue}b$ corresponds to $\hat{\varepsilon} = \norm{\Y - \hat{\beta}\X}^2$} and $\color{blue}c$ corresponds to $\norm{Y - \bar{Y}}^2$.
|
||||||
\label{fig:scheme-orthogonal-projection}
|
\label{fig:scheme-orthogonal-projection}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
|
@ -149,3 +156,165 @@ If $H$ is the projection matrix of the subspace generated by $\X$, $X\Y$ is the
|
||||||
\caption{Ordinary least squares and regression line with simulated data.}
|
\caption{Ordinary least squares and regression line with simulated data.}
|
||||||
\label{fig:ordinary-least-squares}
|
\label{fig:ordinary-least-squares}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
\begin{definition}[Model dimension]
|
||||||
|
Let $\M$ be a model.
|
||||||
|
The dimension of $\M$ is the dimension of the subspace generated by $\X$, that is the number of parameters in the $\beta$ vector.
|
||||||
|
|
||||||
|
\textit{Nb.} The dimension of the model is not the number of parameter, as $\sigma^2$ is one of the model parameters.
|
||||||
|
\end{definition}
|
||||||
|
|
||||||
|
\section{Gaussian vectors}
|
||||||
|
|
||||||
|
|
||||||
|
\begin{definition}[Normal distribution]
|
||||||
|
|
||||||
|
\end{definition}
|
||||||
|
|
||||||
|
|
||||||
|
\begin{definition}[Gaussian vector]
|
||||||
|
A random vector $\Y \in \RR[n]$ is a gaussian vector if every linear combination of its component is ...
|
||||||
|
\end{definition}
|
||||||
|
|
||||||
|
\begin{property}
|
||||||
|
$m = \EE(Y) = (m_1, \ldots, m_n)^T$, where $m_i = \EE(Y_i)$
|
||||||
|
|
||||||
|
|
||||||
|
...
|
||||||
|
|
||||||
|
\[
|
||||||
|
\Y \sim \Norm_n(m, \Sigma)
|
||||||
|
\]
|
||||||
|
where $\Sigma$ is the variance-covariance matrix!
|
||||||
|
\[
|
||||||
|
\Sigma = \E\left[(\Y -m)(\Y - m)^T\right].
|
||||||
|
\]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
\end{property}
|
||||||
|
|
||||||
|
\begin{remark}
|
||||||
|
\[
|
||||||
|
\Cov(Y_i, Y_i) = \Var(Y_i)
|
||||||
|
\]
|
||||||
|
\end{remark}
|
||||||
|
|
||||||
|
\begin{definition}[Covariance]
|
||||||
|
\[
|
||||||
|
\Cov(Y_i, Y_j) = \EE\left((Y_i-\EE(Y_j))(Y_j-\EE(Y_j))\right)
|
||||||
|
\]
|
||||||
|
\end{definition}
|
||||||
|
|
||||||
|
|
||||||
|
When two variable are linked, the covariance is large.
|
||||||
|
|
||||||
|
If two variables $X, Y$ are independent, $\Cov(X, Y) = 0$.
|
||||||
|
|
||||||
|
\begin{definition}[Correlation coefficient]
|
||||||
|
\[
|
||||||
|
\Cor(Y_i, Y_j) = \frac{\EE\left((Y_i-\EE(Y_j))(Y_j-\EE(Y_j))\right)}{\sqrt{\EE(Y_i - \EE(Y_i)) \cdot \EE(Y_j - \EE(Y_j))}}
|
||||||
|
\]
|
||||||
|
\end{definition}
|
||||||
|
|
||||||
|
Covariance is really sensitive to scale of variables. For instance, if we measure distance in millimeters, the covariance would be larger than in the case of a measure expressed in metters. Thus the correlation coefficient, which is a sort of normalized covariance is useful, to be able to compare the values.
|
||||||
|
|
||||||
|
\begin{remark}
|
||||||
|
\begin{align*}
|
||||||
|
\Cov(Y_i, Y_i) &= \EE((Y_i - \EE(Y_i)) (Y_i - \EE(Y_i))) \\
|
||||||
|
&= \EE((Y_i - \EE(Y_i))^2) \\
|
||||||
|
&= \Var(Y_i)
|
||||||
|
\end{align*}
|
||||||
|
\end{remark}
|
||||||
|
|
||||||
|
\begin{equation}
|
||||||
|
\Sigma = \begin{pNiceMatrix}
|
||||||
|
\VVar(Y_1) & & & &\\
|
||||||
|
& \Ddots & & & \\
|
||||||
|
& \Cov(Y_i, Y_j) & \VVar(Y_i) & & \\
|
||||||
|
& & & \Ddots & \\
|
||||||
|
& & & & \VVar(Y_n)
|
||||||
|
\end{pNiceMatrix}
|
||||||
|
\end{equation}
|
||||||
|
|
||||||
|
\begin{definition}[Identity matrix]
|
||||||
|
\[
|
||||||
|
\mathcal{I}_n = \begin{pNiceMatrix}
|
||||||
|
1 & 0 & 0 \\
|
||||||
|
0 & \Ddots & 0\\
|
||||||
|
0 & 0 & 1
|
||||||
|
\end{pNiceMatrix}
|
||||||
|
\]
|
||||||
|
|
||||||
|
\end{definition}
|
||||||
|
|
||||||
|
|
||||||
|
\begin{theorem}[Cochran Theorem (Consequence)]
|
||||||
|
Let $\mathbf{Z}$ be a gaussian vector: $\mathbf{Z} \sim \Norm_n(0_n, I_n)$.
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
\item If $V_1, V_n$ are orthogonal subspaces of $\RR[n]$ with dimensions $n_1, n_2$ such that
|
||||||
|
\[
|
||||||
|
\RR[n] = V_1 \overset{\perp}{\oplus} V_2.
|
||||||
|
\]
|
||||||
|
\item If $Z_1, Z_2$ are orthogonal of $\mathbf{Z}$ on $V_1$ and $V_2$ i.e. $Z_1 = \Pi_{V_1}(\mathbf{Z}) = \Pi_1 \Y$ and $Z_2 = \Pi_{V_2} (\mathbf{Z}) = \Pi_2 \Y$...
|
||||||
|
(\textcolor{red}{look to the slides})
|
||||||
|
\end{itemize}
|
||||||
|
\end{theorem}
|
||||||
|
|
||||||
|
\begin{definition}[Chi 2 distribution]
|
||||||
|
If $X_1, \ldots, X_n$ i.i.d. $\sim \Norm(0, 1)$, then;,
|
||||||
|
\[
|
||||||
|
X_1^2 + \ldots X_n^2 \sim \chi_n^2
|
||||||
|
\]
|
||||||
|
\end{definition}
|
||||||
|
|
||||||
|
\subsection{Estimator's properties}
|
||||||
|
|
||||||
|
|
||||||
|
\[
|
||||||
|
\Pi_V = \X(\X^T\X)^{-1} \X^T
|
||||||
|
\]
|
||||||
|
|
||||||
|
\begin{align*}
|
||||||
|
\hat{m} &= \X \hat{\beta} = \X(\X^T\X)^{-1} \X^T \Y \\
|
||||||
|
\text{so} \\
|
||||||
|
&= \Pi_V \Y
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
According to Cochran theorem, we can deduce that the estimator of the predicted value $\hat{m}$ is independent $\hat{\sigma}^2$
|
||||||
|
|
||||||
|
All the sum of squares follows a $\chi^2$ distribution:
|
||||||
|
\[
|
||||||
|
...
|
||||||
|
\]
|
||||||
|
|
||||||
|
\begin{property}
|
||||||
|
|
||||||
|
\end{property}
|
||||||
|
|
||||||
|
\subsection{Estimators consistency}
|
||||||
|
|
||||||
|
If $q < n$,
|
||||||
|
\begin{itemize}
|
||||||
|
\item $\hat{\sigma}^2 \overunderset{\PP}{n\to\infty} \sigma^{*2}$.
|
||||||
|
\item If $(\X^T\X)^{-1}$...
|
||||||
|
\item ...
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
We can derive statistical test from these properties.
|
||||||
|
|
||||||
|
|
||||||
|
\section{Statistical tests}
|
||||||
|
|
||||||
|
\subsection{Student $t$-test}
|
||||||
|
|
||||||
|
|
||||||
|
\[
|
||||||
|
\frac{\hat{\theta}-\theta}{\sqrt{\frac{\widehat{\VVar}(\hat{\theta})}{n}}} \underset{H_0}{\sim} t
|
||||||
|
\]
|
||||||
|
|
||||||
|
where
|
||||||
|
|
|
@ -0,0 +1,6 @@
|
||||||
|
\DeclareMathOperator{\VVar}{\mathbb{V}} % variance
|
||||||
|
\DeclareMathOperator{\One}{\mathbf{1}}
|
||||||
|
\DeclareMathOperator{\Cor}{\mathrm{Cor}}
|
||||||
|
\newcommand{\M}[1][]{\ensuremath{\ifstrempty{#1}{\mathcal{M}}{\mathbb{M}_{#1}}}}
|
||||||
|
\newcommand{\X}{\ensuremath{\mathbf{X}}}
|
||||||
|
\newcommand{\Y}{\ensuremath{\mathbf{Y}}}
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -12,21 +12,31 @@
|
||||||
\tkzDefParallelogram(A,B,C)
|
\tkzDefParallelogram(A,B,C)
|
||||||
\tkzGetPoint{D}
|
\tkzGetPoint{D}
|
||||||
\tkzDrawPolygon[fill=gray!25!white](A,B,C,D)
|
\tkzDrawPolygon[fill=gray!25!white](A,B,C,D)
|
||||||
\draw[decorate,decoration={brace,
|
|
||||||
amplitude=8pt},xshift=0pt,very thin,gray] (2,0) -- ++(-1,-0.5) node [black,midway,xshift=0.5em,yshift=-1em] {\color{blue}$a$};
|
|
||||||
\end{scope}
|
\end{scope}
|
||||||
\begin{scope}[canvas is xz plane at y=0]
|
% Draw the rectangle triangle scheme
|
||||||
|
\begin{scope}[canvas is xz plane at y=1]
|
||||||
\draw[thick,fill=white,fill opacity=0.7,nodes={opacity=1}]
|
\draw[thick,fill=white,fill opacity=0.7,nodes={opacity=1}]
|
||||||
(2,0) node[bullet,label=below right:{$\mathbf{X}$}] {}
|
(2,0) node[bullet,label=right:{$\bar{\mathbf{Y}}$}] (Y_bar) {}
|
||||||
-- (0,0) node[bullet] {}
|
-- (0,-0.5) node (B) {}
|
||||||
-- (0,3) node[bullet,label=above:{$\mathbf{Y}$}] {} -- cycle;
|
-- (0,3) node[label=above:{$\mathbf{Y}$}] (Y) {} -- cycle;
|
||||||
\draw (0.25,0) -- (0.25,0.25) -- (0,0.25);
|
% Right angle annotation
|
||||||
|
\tkzPicRightAngle[draw,
|
||||||
|
angle eccentricity=.5,angle radius=2mm](Y,B,Y_bar)
|
||||||
|
% epsilon: Y - X \hat{\beta} curly brackets annotations
|
||||||
\draw[decorate,decoration={brace,
|
\draw[decorate,decoration={brace,
|
||||||
amplitude=8pt},xshift=0pt,very thin,gray] (0,0) -- (0,3) node [black,midway,xshift=-1.25em,yshift=0em] {\color{blue}$b$};
|
amplitude=8pt},xshift=0pt,very thin,gray] (B) -- (Y) node [black,midway,xshift=-1.25em,yshift=0em] {\color{blue}$b$};
|
||||||
|
% X\hat{\beta} - \hat{Y}
|
||||||
|
\draw[decorate,decoration={brace,
|
||||||
|
amplitude=8pt},xshift=0pt,very thin,gray] (Y_bar) -- (B) node [black,midway,xshift=0.5em,yshift=-1em] {\color{blue}$a$};
|
||||||
|
%
|
||||||
|
\draw[decorate,decoration={brace,
|
||||||
|
amplitude=8pt},xshift=0pt,very thin,gray] (Y) -- (Y_bar) node [black,midway,xshift=1em,yshift=1em] {\color{blue}$c$};
|
||||||
\end{scope}
|
\end{scope}
|
||||||
|
% Coordinate system
|
||||||
\begin{scope}[canvas is xy plane at z=0]
|
\begin{scope}[canvas is xy plane at z=0]
|
||||||
\draw[->] (2,0) -- ++(-0.75,0.75) node [left] {$\mathbf{1}$};
|
\draw[->] (2,1) -- node [above] {$\mathbf{1}$} ++(-1,0) ;
|
||||||
\draw[->] (2,0) -- ++(-1,-0.5);
|
\draw[->] (2,1) -- ++(-0.45,-1) node [right] {$X_1$};
|
||||||
\end{scope}
|
\end{scope}
|
||||||
\end{tikzpicture}
|
\end{tikzpicture}
|
||||||
\end{document}
|
\end{document}
|
Binary file not shown.
Binary file not shown.
4
main.tex
4
main.tex
|
@ -28,9 +28,6 @@
|
||||||
\definecolor{myblue}{HTML}{5654fa}
|
\definecolor{myblue}{HTML}{5654fa}
|
||||||
\colorlet{primary}{myblue}
|
\colorlet{primary}{myblue}
|
||||||
|
|
||||||
\input{definitions}
|
|
||||||
\input{preamble}
|
|
||||||
|
|
||||||
\hypersetup{
|
\hypersetup{
|
||||||
pdftitle={Course - Multivariate Statistics},
|
pdftitle={Course - Multivariate Statistics},
|
||||||
pdfauthor={Samuel Ortion},
|
pdfauthor={Samuel Ortion},
|
||||||
|
@ -51,6 +48,7 @@
|
||||||
\input{glossary}
|
\input{glossary}
|
||||||
\input{definitions}
|
\input{definitions}
|
||||||
|
|
||||||
|
|
||||||
\makeindex%
|
\makeindex%
|
||||||
\makeglossary%
|
\makeglossary%
|
||||||
\begin{document}
|
\begin{document}
|
||||||
|
|
Loading…
Reference in New Issue