parent
43acae64f3
commit
29dad16dfb
|
@ -1,7 +1,2 @@
|
||||||
main.pdf filter=lfs diff=lfs merge=lfs -text
|
main.pdf filter=lfs diff=lfs merge=lfs -text
|
||||||
figures/schemes/regression_plan_3D.pdf filter=lfs diff=lfs merge=lfs -text
|
**/*.pdf filter=lfs diff=lfs merge=lfs -text
|
||||||
figures/schemes/vector_orthogonality.pdf filter=lfs diff=lfs merge=lfs -text
|
|
||||||
figures/schemes/base_plan.pdf filter=lfs diff=lfs merge=lfs -text
|
|
||||||
figures/schemes/coordinates_systems.pdf filter=lfs diff=lfs merge=lfs -text
|
|
||||||
figures/schemes/ordinary_least_squares.pdf filter=lfs diff=lfs merge=lfs -text
|
|
||||||
figures/schemes/orthogonal_projection.pdf filter=lfs diff=lfs merge=lfs -text
|
|
||||||
|
|
|
@ -13,6 +13,6 @@
|
||||||
|
|
||||||
\includechapters{part1}{2}
|
\includechapters{part1}{2}
|
||||||
|
|
||||||
% \includechapters{part2}{2}
|
\includechapters{part2}{2}
|
||||||
|
|
||||||
% \includechapters{part3}{1}
|
% \includechapters{part3}{1}
|
|
@ -117,7 +117,7 @@ We want to minimize the distance between $\X\beta$ and $\Y$:
|
||||||
\Rightarrow& \X \beta = proj^{(1, \X)} \Y\\
|
\Rightarrow& \X \beta = proj^{(1, \X)} \Y\\
|
||||||
\Rightarrow& \forall v \in w,\, vy = v proj^w(y)\\
|
\Rightarrow& \forall v \in w,\, vy = v proj^w(y)\\
|
||||||
\Rightarrow& \forall i: \\
|
\Rightarrow& \forall i: \\
|
||||||
& \X_i \Y = \X_i X\hat{\beta} \qquad \text{where $\hat{\beta}$ is the estimator of $\beta$} \\
|
& \X_i \Y = \X_i \X\hat{\beta} \qquad \text{where $\hat{\beta}$ is the estimator of $\beta$} \\
|
||||||
\Rightarrow& \X^T \Y = \X^T \X \hat{\beta} \\
|
\Rightarrow& \X^T \Y = \X^T \X \hat{\beta} \\
|
||||||
\Rightarrow& {\color{gray}(\X^T \X)^{-1}} \X^T \Y = {\color{gray}(\X^T \X)^{-1}} (\X^T\X) \hat{\beta} \\
|
\Rightarrow& {\color{gray}(\X^T \X)^{-1}} \X^T \Y = {\color{gray}(\X^T \X)^{-1}} (\X^T\X) \hat{\beta} \\
|
||||||
\Rightarrow& \hat{\beta} = (\X^T\X)^{-1} \X^T \Y
|
\Rightarrow& \hat{\beta} = (\X^T\X)^{-1} \X^T \Y
|
||||||
|
@ -127,7 +127,7 @@ This formula comes from the orthogonal projection of $\Y$ on the vector subspace
|
||||||
|
|
||||||
$\X \hat{\beta}$ is the closest point to $\Y$ in the subspace generated by $\X$.
|
$\X \hat{\beta}$ is the closest point to $\Y$ in the subspace generated by $\X$.
|
||||||
|
|
||||||
If $H$ is the projection matrix of the subspace generated by $\X$, $X\Y$ is the projection on $\Y$ on this subspace, that corresponds to $\X\hat{\beta}$.
|
If $H$ is the projection matrix of the subspace generated by $\X$, $\X\Y$ is the projection on $\Y$ on this subspace, that corresponds to $\X\hat{\beta}$.
|
||||||
|
|
||||||
\section{Sum of squares}
|
\section{Sum of squares}
|
||||||
|
|
||||||
|
@ -253,6 +253,7 @@ Covariance is really sensitive to scale of variables. For instance, if we measur
|
||||||
|
|
||||||
|
|
||||||
\begin{theorem}[Cochran Theorem (Consequence)]
|
\begin{theorem}[Cochran Theorem (Consequence)]
|
||||||
|
\label{thm:cochran}
|
||||||
Let $\mathbf{Z}$ be a gaussian vector: $\mathbf{Z} \sim \Norm_n(0_n, I_n)$.
|
Let $\mathbf{Z}$ be a gaussian vector: $\mathbf{Z} \sim \Norm_n(0_n, I_n)$.
|
||||||
|
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
|
@ -263,8 +264,29 @@ Covariance is really sensitive to scale of variables. For instance, if we measur
|
||||||
\item If $Z_1, Z_2$ are orthogonal of $\mathbf{Z}$ on $V_1$ and $V_2$ i.e. $Z_1 = \Pi_{V_1}(\mathbf{Z}) = \Pi_1 \Y$ and $Z_2 = \Pi_{V_2} (\mathbf{Z}) = \Pi_2 \Y$...
|
\item If $Z_1, Z_2$ are orthogonal of $\mathbf{Z}$ on $V_1$ and $V_2$ i.e. $Z_1 = \Pi_{V_1}(\mathbf{Z}) = \Pi_1 \Y$ and $Z_2 = \Pi_{V_2} (\mathbf{Z}) = \Pi_2 \Y$...
|
||||||
(\textcolor{red}{look to the slides})
|
(\textcolor{red}{look to the slides})
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
|
|
||||||
|
$Z_2 = \Pi_{V_1}(\Z)$ is the projection of $\Z$ on subspace $V_1$.
|
||||||
|
|
||||||
|
\dots
|
||||||
|
|
||||||
|
|
||||||
\end{theorem}
|
\end{theorem}
|
||||||
|
|
||||||
|
\begin{property}[Estimators properties in the linear model]
|
||||||
|
According to \autoref{thm:cochran},
|
||||||
|
\[
|
||||||
|
\hat{m} \text{ is independent from $\hat{\sigma}^2$}
|
||||||
|
\]\dots
|
||||||
|
\[
|
||||||
|
\frac{\norm{\Y - \Pi_V(\Y)}^2}{...} \sim
|
||||||
|
\]
|
||||||
|
|
||||||
|
$\hat{m} = \X \hat{\beta}$
|
||||||
|
|
||||||
|
$\hat{m}$ is the estimation of the mean.
|
||||||
|
\end{property}
|
||||||
|
|
||||||
|
|
||||||
\begin{definition}[Chi 2 distribution]
|
\begin{definition}[Chi 2 distribution]
|
||||||
If $X_1, \ldots, X_n$ i.i.d. $\sim \Norm(0, 1)$, then;,
|
If $X_1, \ldots, X_n$ i.i.d. $\sim \Norm(0, 1)$, then;,
|
||||||
\[
|
\[
|
||||||
|
@ -318,3 +340,290 @@ We can derive statistical test from these properties.
|
||||||
\]
|
\]
|
||||||
|
|
||||||
where
|
where
|
||||||
|
|
||||||
|
\paragraph{Estimation of $\sigma^2$}
|
||||||
|
|
||||||
|
A biased estimator of $\sigma^2$ is:
|
||||||
|
\[
|
||||||
|
\hat{\sigma^2} = ?
|
||||||
|
\]
|
||||||
|
|
||||||
|
$S^2$ is the unbiased estimator of $\sigma^2$
|
||||||
|
\begin{align*}
|
||||||
|
S^2 &= \frac{1}{n-q} \norm{\Y - \Pi_V(\Y)}^2 \\
|
||||||
|
&= \frac{1}{n-q} \sum_{i=1}^n (Y_i - (\X\hat{\beta})_i)^2
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
\begin{remark}[On $\hat{m}$]
|
||||||
|
\begin{align*}
|
||||||
|
&\Y = \X \beta + \varepsilon
|
||||||
|
\Leftrightarrow& \EE(\Y) = \X \beta
|
||||||
|
\end{align*}
|
||||||
|
\end{remark}
|
||||||
|
|
||||||
|
\section{Student test of nullity of a parameter}
|
||||||
|
|
||||||
|
Let $\beta_j$ be a parameter, the tested hypotheses are as follows:
|
||||||
|
\[
|
||||||
|
\begin{cases}
|
||||||
|
(H_0): \beta_j = 0 \\
|
||||||
|
(H_1): \beta_j \neq 0
|
||||||
|
\end{cases}
|
||||||
|
\]
|
||||||
|
|
||||||
|
Under the null hypothesis:
|
||||||
|
\[
|
||||||
|
\frac{\hat{\beta}_j - \beta_j}{S \sqrt{(\X^T \X)^1_{j,j}}} \sim \St(n-q).
|
||||||
|
\]
|
||||||
|
The test statistic is:
|
||||||
|
\[
|
||||||
|
W_n = \frac{\hat{\beta}_j}{S \sqrt{(\X^T\X)^{-1}_{j,j}}} \underset{H_0}{\sim} \St(n-q).
|
||||||
|
\]
|
||||||
|
|
||||||
|
$\hat{\beta}$ is a multinormal vector.
|
||||||
|
|
||||||
|
Let's consider a vector of 4 values:
|
||||||
|
\begin{align*}
|
||||||
|
\begin{pmatrix}
|
||||||
|
\hat{\beta}_0 \\
|
||||||
|
\hat{\beta}_1 \\
|
||||||
|
\hat{\beta}_2 \\
|
||||||
|
\hat{\beta}_3
|
||||||
|
\end{pmatrix}
|
||||||
|
\sim \Norm_4 \left( \begin{pmatrix}
|
||||||
|
\beta_0 \\
|
||||||
|
\beta_1 \\
|
||||||
|
\beta_2 \\
|
||||||
|
\beta_3
|
||||||
|
\end{pmatrix} ;
|
||||||
|
\sigma^2 \left(\X^T \X\right)^{-1}
|
||||||
|
\right)
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
Let $\M$ be the following model
|
||||||
|
\begin{align*}
|
||||||
|
Y_i &= \beta_0 + \beta_1 X_{1i} + \beta_2 X_{2i} + \beta_3 X_{3i} + \varepsilon_i
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
Why can't we use the following model to test each of the parameters values (here for $X_2$)?
|
||||||
|
\[
|
||||||
|
Y_i = \theta_0 + \theta_1 X_{2i} + \varepsilon_i
|
||||||
|
\]
|
||||||
|
We can't use such a model, we would probably meet a confounding factor: even if we are only interested in relationship $X_2$ with $Y$, we have to fit the whole model.
|
||||||
|
|
||||||
|
\begin{example}[Confounding parameter]
|
||||||
|
Let $Y$ be a variable related to the lung cancer. Let $X_1$ be the smoking status, and $X_2$ the variable `alcohol' (for instance the quantity of alcohol drunk per week).
|
||||||
|
|
||||||
|
If we only fit the model $\M: Y_i = \theta_0 + \theta_1 X_{2i} + \varepsilon_i$, we could conclude for a relationship between alcohol and lung cancer, because alcohol consumption and smoking is strongly related. If we had fit the model $\M = Y_i = \theta_0 + \theta_1 X_{1i} + \theta_2 X_{2i} + \varepsilon_i$, we could indeed have found no significant relationship between $X_2$ and $Y$.
|
||||||
|
\end{example}
|
||||||
|
|
||||||
|
\begin{definition}[Student law]
|
||||||
|
Let $X$ and $Y$ be two random variables such as $X \indep Y$, and such that $X \sim \Norm(0, 1)$ and $Y \sim \chi_n^2$, then
|
||||||
|
\[
|
||||||
|
\frac{X}{\sqrt{Y}} \sim \St(n)
|
||||||
|
\]
|
||||||
|
\end{definition}
|
||||||
|
|
||||||
|
\subsection{Model comparison}
|
||||||
|
|
||||||
|
\begin{definition}[Nested models]
|
||||||
|
|
||||||
|
\end{definition}
|
||||||
|
|
||||||
|
Let $\M_2$ and $\M_4$ be two models:
|
||||||
|
|
||||||
|
$\M_2: Y_i = \beta_0 + \beta_3 X_{3_i} + \varepsilon_i$
|
||||||
|
|
||||||
|
$\M_4: Y_i = \beta_0 + \beta_1 X_{1i} + \beta_2 X_{2i} + \beta_3 X_{3i} + \varepsilon_i$
|
||||||
|
|
||||||
|
$\M_2$ is nested in $\M_4$.
|
||||||
|
|
||||||
|
\paragraph*{Principle} We compare the residual variances of the two models, that is, the variance that is not explained by the model.
|
||||||
|
|
||||||
|
The better the model is, the smallest the variance would be.
|
||||||
|
|
||||||
|
If everything is explained by the model, the residual variance would be null.
|
||||||
|
|
||||||
|
|
||||||
|
Here $\M_4$ holds all the information found in $\M_2$ plus other informations. In the worst case It would be at least as good as $\M_2$.
|
||||||
|
|
||||||
|
\subsection{Fisher $F$-test of model comparison}
|
||||||
|
|
||||||
|
Let $\M_q$ and $\M_{q'}$ be two models such as $\dim(\M_q) = q$, $\dim(\M_{q'}) = q'$, $q > q'$ and $\M_{q'}$ is nested in $\M_q$.
|
||||||
|
|
||||||
|
\paragraph{Tested hypotheses}
|
||||||
|
\[
|
||||||
|
\begin{cases}
|
||||||
|
(H_0): \M_{q'} \text{ is the proper model} \\
|
||||||
|
(H_1): \M_q \text{ is a better model}
|
||||||
|
\end{cases}
|
||||||
|
\]
|
||||||
|
|
||||||
|
\begin{description}
|
||||||
|
\item[ESS] Estimated Sum of Squares
|
||||||
|
\item[RSS] Residual Sum of Squares
|
||||||
|
\item[EMS] Estimates Mean Square
|
||||||
|
\item[RMS] Residual Mean Square
|
||||||
|
\end{description}
|
||||||
|
|
||||||
|
\[
|
||||||
|
ESS = RSS(\M_{q'}) - RSS(\M_q)
|
||||||
|
\]
|
||||||
|
\[
|
||||||
|
RSS(\M) = \norm{\Y - \X\hat{\beta}} = \sum_{i=1}^n \hat{\varepsilon}_i^2
|
||||||
|
\]
|
||||||
|
\[
|
||||||
|
EMS = \frac{ESS}{q - q'}
|
||||||
|
\]
|
||||||
|
\[
|
||||||
|
RMS = \frac{RSS(\M_q)}{n-q}
|
||||||
|
\]
|
||||||
|
|
||||||
|
Under the null hypotheses:
|
||||||
|
\[
|
||||||
|
F = \frac{EMS}{RMS} \underset{H_0}{\sim} \Fish(q-q'; n-q)
|
||||||
|
\]
|
||||||
|
|
||||||
|
\section{Model validity}
|
||||||
|
|
||||||
|
Assumptions:
|
||||||
|
\begin{itemize}
|
||||||
|
\item $\X$ is a full rank matrix;
|
||||||
|
\item Residuals are i.i.d. $\varepsilon \sim \Norm(0_n, \sigma^2 \mathcal{I}_n)$;
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
We have also to look for influential variables.
|
||||||
|
|
||||||
|
|
||||||
|
\subsection{$\X$ is full rank}
|
||||||
|
|
||||||
|
To check that the rank of the matrix is $p+1$, we can calculate the eigen value of the correlation value of the matrix. If there is a perfect relationship between two variables (two columns of $\X$), one of the eigen value would be null. In practice, we never get a null eigen value. We consider the condition index as the ratio between the largest and the smallest eigenvalues, if the condition index $\kappa = \frac{\lambda_1}{\lambda_p}$, with $\lambda_1 \geq \lambda_2 \geq \ldots \geq \lambda_p$ the eigenvalues.
|
||||||
|
|
||||||
|
|
||||||
|
If all eigenvalues is different from 0, $\X^T \X$ can be inverted, but the estimated parameter variance would be large, thus the estimation of the parameters would be not relevant (not good enough).
|
||||||
|
|
||||||
|
\paragraph{Variance Inflation Factor}
|
||||||
|
|
||||||
|
Perform a regression of each of the predictors against the other predictors.
|
||||||
|
|
||||||
|
If there is a strong linear relationship between a parameter and the others, it would reflect that the coefficient of determination $R^2$ (the amount of variance explained by the model) for this model, which would mean that there is a strong relationship between the parameters.
|
||||||
|
|
||||||
|
We do this for all parameters, and for parameter $j = 1, \ldots, p$, the variance inflation factor would be:
|
||||||
|
\[
|
||||||
|
VIF_j = \frac{1}{1-R^2_j}.
|
||||||
|
\]
|
||||||
|
|
||||||
|
\subparagraph*{Rule}
|
||||||
|
If $VIF > 10$ or $VIF > 100$\dots
|
||||||
|
|
||||||
|
|
||||||
|
In case of multicollinearity, we have to remove the variable one by one until there is no longer multicollinearity.
|
||||||
|
Variables have to be removed based on statistical results and through discussion with experimenters.
|
||||||
|
|
||||||
|
|
||||||
|
\subsection{Residuals analysis}
|
||||||
|
|
||||||
|
\paragraph*{Assumption}
|
||||||
|
\[
|
||||||
|
\varepsilon \sim \Norm_n(0_n, \sigma^2 I_n)
|
||||||
|
\]
|
||||||
|
|
||||||
|
\paragraph{Normality of the residuals} If $\varepsilon_i$ ($i=1, \ldots, n$) could be observed we could build a QQ-plot of $\varepsilon_i / \sigma$ against quantiles of $\Norm(0, 1)$.
|
||||||
|
|
||||||
|
Only the residual errors $\hat{e}_i$ can be observed:
|
||||||
|
|
||||||
|
Let $e_i^*$ be the studentized residual, considered as estimators of $\varepsilon_i$
|
||||||
|
|
||||||
|
\[
|
||||||
|
e_i^* = \frac{\hat{e}_i}{\sqrt{\sigma^2_{(i)(1-H_{ii})}}}
|
||||||
|
\]
|
||||||
|
|
||||||
|
\begin{align*}
|
||||||
|
\hat{Y} &= X \hat{\beta} \\
|
||||||
|
&= X \left( (X^TX)^{-1} X^T Y\right) \\
|
||||||
|
&= \underbrace{X (X^TX)^{-1} X^T}_{H} Y
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
\paragraph{Centered residuals} If $(1, \ldots, 1)^T$ belongs to $\X$ $\EE(\varepsilon) = 0$, by construction.
|
||||||
|
|
||||||
|
\paragraph{Independence} We do not have a statistical test for independence in R, we would plot the residuals $e$ against $\X \hat{\beta}$.
|
||||||
|
|
||||||
|
\paragraph{Homoscedastiscity} Plot the $\sqrt{e^*}$ against $\X \hat{\beta}$.
|
||||||
|
|
||||||
|
|
||||||
|
\paragraph{Influential observations}
|
||||||
|
|
||||||
|
We make the distinction between observations:
|
||||||
|
\begin{itemize}
|
||||||
|
\item With too large residual
|
||||||
|
$\rightarrow$ Influence on the estimation of $\sigma^2$
|
||||||
|
\item Which are too isolated
|
||||||
|
$\rightarrow$ Influence on the estimation of $\beta$
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\[
|
||||||
|
e_i^* \sim \St(n-p-1)
|
||||||
|
\]
|
||||||
|
\subparagraph*{Rule} We consider an observation to be aberrant if:
|
||||||
|
\[
|
||||||
|
e_i^* > \F^{-1}_{\St(n-p-1)}(1-\alpha)
|
||||||
|
\]
|
||||||
|
quantile of order $1-\alpha$, $\alpha$ being often set as $1/n$, or we set the threshold to 2.
|
||||||
|
|
||||||
|
\paragraph{Leverage} Leverage is the diagonal term of the orthogonal projection matrix(?) $H_{ii}$.
|
||||||
|
|
||||||
|
\begin{property}
|
||||||
|
\begin{itemize}
|
||||||
|
\item $0 \leq H_{ii} \leq 1$
|
||||||
|
\item $\sum_i H_ii = p$
|
||||||
|
\end{itemize}
|
||||||
|
\end{property}
|
||||||
|
|
||||||
|
\subparagraph*{Rule} We consider that the observation is aberrant if the leverage is ??.
|
||||||
|
|
||||||
|
|
||||||
|
\paragraph{Non-linearity}
|
||||||
|
|
||||||
|
|
||||||
|
\section{Model Selection}
|
||||||
|
|
||||||
|
We want to select the best model with the smallest number of predictors.
|
||||||
|
|
||||||
|
When models have too many explicative variables, the power of statistical tests decreases.
|
||||||
|
|
||||||
|
Different methods:
|
||||||
|
\begin{itemize}
|
||||||
|
\item Comparison of nested models;
|
||||||
|
\item Information criteria;
|
||||||
|
\item Method based on the prediction error.
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\subsection{Information criteria}
|
||||||
|
|
||||||
|
\subsubsection{Likelihood}
|
||||||
|
|
||||||
|
\begin{definition}[Likelihood]
|
||||||
|
Probability to observe what we observed for a particular model.
|
||||||
|
\[
|
||||||
|
L_n (\M(k))
|
||||||
|
\]
|
||||||
|
\end{definition}
|
||||||
|
|
||||||
|
|
||||||
|
\begin{definition}[Akaike Information Criterion]
|
||||||
|
\[
|
||||||
|
AIC(\M(k)) = -2 \log L_n (\M(k)) + 2k.
|
||||||
|
\]
|
||||||
|
|
||||||
|
$2k$ is a penalty, leading to privilege the smallest model.
|
||||||
|
\end{definition}
|
||||||
|
|
||||||
|
\begin{definition}[Bayesian Information Criterion]
|
||||||
|
\[
|
||||||
|
BIC(\M(k)) = -2 \log L_n (\M(k)) + \log(n) k.
|
||||||
|
\]
|
||||||
|
$\log(n) k$ is a penalty.
|
||||||
|
\end{definition}
|
||||||
|
|
||||||
|
Usually $AIC$ have smaller penalty than $BIC$, thus $AIC$ criterion tends to select models with more variables than $BIC$ criterion.
|
||||||
|
|
||||||
|
|
|
@ -1,220 +1,4 @@
|
||||||
\chapter{Elements of Linear Algebra}
|
\chapter{Generalized Linear Model}
|
||||||
\label{ch:elements-of-linear-algebra}
|
|
||||||
|
|
||||||
\begin{remark}[vector]
|
\section{Logistic Regression}
|
||||||
Let $u$ a vector, we will use interchangeably the following notations: $u$ and $\vec{u}$
|
|
||||||
\end{remark}
|
|
||||||
|
|
||||||
Let $u = \begin{pmatrix}
|
|
||||||
u_1 \\
|
|
||||||
\vdots \\
|
|
||||||
u_n
|
|
||||||
\end{pmatrix}$ and $v = \begin{pmatrix}
|
|
||||||
v_1 \\
|
|
||||||
\vdots \\
|
|
||||||
v_n
|
|
||||||
\end{pmatrix}$
|
|
||||||
|
|
||||||
\begin{definition}[Scalar Product (Dot Product)]
|
|
||||||
\begin{align*}
|
|
||||||
\scalar{u, v} & = \begin{pmatrix}
|
|
||||||
u_1, \ldots, u_v
|
|
||||||
\end{pmatrix}
|
|
||||||
\begin{pmatrix}
|
|
||||||
v_1 \\
|
|
||||||
\vdots \\
|
|
||||||
v_n
|
|
||||||
\end{pmatrix} \\
|
|
||||||
& = u_1 v_1 + u_2 v_2 + \ldots + u_n v_n
|
|
||||||
\end{align*}
|
|
||||||
|
|
||||||
We may use $\scalar{u, v}$ or $u \cdot v$ notations.
|
|
||||||
\end{definition}
|
|
||||||
\paragraph{Dot product properties}
|
|
||||||
\begin{description}
|
|
||||||
\item[Commutative] $\scalar{u, v} = \scalar{v, u}$
|
|
||||||
\item[Distributive] $\scalar{(u+v), w} = \scalar{u, w} + \scalar{v, w}$
|
|
||||||
\item $\scalar{u, v} = \norm{u} \times \norm{v} \times \cos(\widehat{u, v})$
|
|
||||||
\item $\scalar{a, a} = \norm{a}^2$
|
|
||||||
\end{description}
|
|
||||||
|
|
||||||
\begin{definition}[Norm]
|
|
||||||
Length of the vector.
|
|
||||||
\[
|
|
||||||
\norm{u} = \sqrt{\scalar{u, v}}
|
|
||||||
\]
|
|
||||||
|
|
||||||
$\norm{u, v} > 0$
|
|
||||||
\end{definition}
|
|
||||||
|
|
||||||
\begin{definition}[Distance]
|
|
||||||
\[
|
|
||||||
dist(u, v) = \norm{u-v}
|
|
||||||
\]
|
|
||||||
\end{definition}
|
|
||||||
|
|
||||||
\begin{definition}[Orthogonality]
|
|
||||||
|
|
||||||
\end{definition}
|
|
||||||
|
|
||||||
\begin{remark}
|
|
||||||
\[
|
|
||||||
(dist(u, v))^2 = \norm{u - v}^2,
|
|
||||||
\] and
|
|
||||||
\[
|
|
||||||
\scalar{v-u, v-u}
|
|
||||||
\]
|
|
||||||
\end{remark}
|
|
||||||
|
|
||||||
\begin{figure}
|
|
||||||
\centering
|
|
||||||
\includestandalone{figures/schemes/vector_orthogonality}
|
|
||||||
\caption{Scalar product of two orthogonal vectors.}
|
|
||||||
\label{fig:scheme-orthogonal-scalar-product}
|
|
||||||
\end{figure}
|
|
||||||
|
|
||||||
\begin{align*}
|
|
||||||
\scalar{v-u, v-u} & = \scalar{v, v} + \scalar{u, u} - 2 \scalar{u, v} \\
|
|
||||||
& = \norm{v}^2 + \norm{u}^2 \\
|
|
||||||
& = -2 \scalar{u, v}
|
|
||||||
\end{align*}
|
|
||||||
|
|
||||||
\begin{align*}
|
|
||||||
\norm{u - v}^2 & = \norm{u}^2 + \norm{v}^2 - 2 \scalar{u,v} \\
|
|
||||||
\norm{u + v}^2 & = \norm{u}^2 + \norm{v}^2 + 2 \scalar{u,v}
|
|
||||||
\end{align*}
|
|
||||||
|
|
||||||
\begin{proposition}[Scalar product of orthogonal vectors]
|
|
||||||
\[
|
|
||||||
u \perp v \Leftrightarrow \scalar{u, v} = 0
|
|
||||||
\]
|
|
||||||
\end{proposition}
|
|
||||||
|
|
||||||
\begin{proof}[Indeed]
|
|
||||||
$\norm{u-v}^2 = \norm{u+v}^2$, as illustrated in \autoref{fig:scheme-orthogonal-scalar-product}.
|
|
||||||
\begin{align*}
|
|
||||||
\Leftrightarrow & -2 \scalar{u, v} = 2 \scalar{u, v} \\
|
|
||||||
\Leftrightarrow & 4 \scalar{u, v} = 0 \\
|
|
||||||
\Leftrightarrow & \scalar{u, v} = 0
|
|
||||||
\end{align*}
|
|
||||||
\end{proof}
|
|
||||||
|
|
||||||
\begin{theorem}[Pythagorean theorem]
|
|
||||||
If $u \perp v$, then $\norm{u+v}^2 = \norm{u}^2 + \norm{v}^2$ .
|
|
||||||
\end{theorem}
|
|
||||||
|
|
||||||
\begin{definition}[Orthogonal Projection]
|
|
||||||
|
|
||||||
\end{definition}
|
|
||||||
Let $y = \begin{pmatrix}
|
|
||||||
y_1 \\
|
|
||||||
. \\
|
|
||||||
y_n
|
|
||||||
\end{pmatrix} \in \RR[n]$ and $w$ a subspace of $\RR[n]$.
|
|
||||||
$\mathcal{Y}$ can be written as the orthogonal projection of $y$ on $w$:
|
|
||||||
\[
|
|
||||||
\mathcal{Y} = proj^w(y) + z,
|
|
||||||
\]
|
|
||||||
where
|
|
||||||
\[
|
|
||||||
\begin{cases}
|
|
||||||
z \in w^\perp \\
|
|
||||||
proj^w(y) \in w
|
|
||||||
\end{cases}
|
|
||||||
\]
|
|
||||||
There is only one vector $\mathcal{Y}$ that ?
|
|
||||||
|
|
||||||
The scalar product between $z$ and (?) is zero.
|
|
||||||
|
|
||||||
\begin{property}
|
|
||||||
$proj^w(y)$ is the closest vector to $y$ that belongs to $w$.
|
|
||||||
\end{property}
|
|
||||||
|
|
||||||
\begin{definition}[Matrix]
|
|
||||||
A matrix is an application, that is, a function that transform a thing into another, it is a linear function.
|
|
||||||
\end{definition}
|
|
||||||
|
|
||||||
\begin{example}[Matrix application]
|
|
||||||
|
|
||||||
Let $A$ be a matrix:
|
|
||||||
\[
|
|
||||||
A = \begin{pmatrix}
|
|
||||||
a & b \\
|
|
||||||
c & d
|
|
||||||
\end{pmatrix}
|
|
||||||
\] and
|
|
||||||
\[
|
|
||||||
x = \begin{pmatrix}
|
|
||||||
x_1 \\
|
|
||||||
x_2
|
|
||||||
\end{pmatrix}
|
|
||||||
\]
|
|
||||||
Then,
|
|
||||||
\begin{align*}
|
|
||||||
Ax & = \begin{pmatrix}
|
|
||||||
a & b \\
|
|
||||||
c & d
|
|
||||||
\end{pmatrix}
|
|
||||||
\begin{pmatrix}
|
|
||||||
x_1 \\
|
|
||||||
x_2
|
|
||||||
\end{pmatrix} \\
|
|
||||||
& = \begin{pmatrix}
|
|
||||||
a x_1 + b x_2 \\
|
|
||||||
c x_1 + d x_2
|
|
||||||
\end{pmatrix}
|
|
||||||
\end{align*}
|
|
||||||
|
|
||||||
Similarly,
|
|
||||||
\begin{align*}
|
|
||||||
\begin{pmatrix}
|
|
||||||
a & b & c & d \\
|
|
||||||
e & f & g & h \\
|
|
||||||
i & j & k & l
|
|
||||||
\end{pmatrix}
|
|
||||||
\begin{pmatrix}
|
|
||||||
x_1 \\
|
|
||||||
x_2 \\
|
|
||||||
x_3 \\
|
|
||||||
x_4
|
|
||||||
\end{pmatrix}
|
|
||||||
=
|
|
||||||
\begin{pmatrix}
|
|
||||||
\luadirect{
|
|
||||||
local matrix_product = require("scripts.matrix_product")
|
|
||||||
local m1 = {
|
|
||||||
{"a", "b", "c", "d"},
|
|
||||||
{"e", "f", "g", "h"},
|
|
||||||
{"i", "j", "k", "l"}
|
|
||||||
}
|
|
||||||
local m2 = {
|
|
||||||
{"x_1"},
|
|
||||||
{"x_2"},
|
|
||||||
{"x_3"},
|
|
||||||
{"x_4"}
|
|
||||||
}
|
|
||||||
local product_matrix = matrix_product.matrix_product_repr(m1,m2)
|
|
||||||
local matrix_dump = matrix_product.dump_matrix(product_matrix)
|
|
||||||
tex.print(matrix_dump)
|
|
||||||
}
|
|
||||||
\end{pmatrix}
|
|
||||||
\end{align*}
|
|
||||||
\end{example}
|
|
||||||
|
|
||||||
The number of columns has to be the same as the dimension of the vector to which the matrix is applied.
|
|
||||||
|
|
||||||
\begin{definition}[Tranpose of a Matrix]
|
|
||||||
Let $A = \begin{pmatrix}
|
|
||||||
a & b \\
|
|
||||||
c & d
|
|
||||||
\end{pmatrix}$, then $A^T = \begin{pmatrix}
|
|
||||||
a & c \\
|
|
||||||
b & d
|
|
||||||
\end{pmatrix}$
|
|
||||||
\end{definition}
|
|
||||||
|
|
||||||
\begin{figure}
|
|
||||||
\centering
|
|
||||||
\includestandalone{figures/schemes/coordinates_systems}
|
|
||||||
\caption{Coordinate systems}
|
|
||||||
\end{figure}
|
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
\part{Linear Algebra}
|
||||||
|
|
|
@ -0,0 +1,220 @@
|
||||||
|
\chapter{Elements of Linear Algebra}
|
||||||
|
\label{ch:elements-of-linear-algebra}
|
||||||
|
|
||||||
|
\begin{remark}[vector]
|
||||||
|
Let $u$ a vector, we will use interchangeably the following notations: $u$ and $\vec{u}$
|
||||||
|
\end{remark}
|
||||||
|
|
||||||
|
Let $u = \begin{pmatrix}
|
||||||
|
u_1 \\
|
||||||
|
\vdots \\
|
||||||
|
u_n
|
||||||
|
\end{pmatrix}$ and $v = \begin{pmatrix}
|
||||||
|
v_1 \\
|
||||||
|
\vdots \\
|
||||||
|
v_n
|
||||||
|
\end{pmatrix}$
|
||||||
|
|
||||||
|
\begin{definition}[Scalar Product (Dot Product)]
|
||||||
|
\begin{align*}
|
||||||
|
\scalar{u, v} & = \begin{pmatrix}
|
||||||
|
u_1, \ldots, u_v
|
||||||
|
\end{pmatrix}
|
||||||
|
\begin{pmatrix}
|
||||||
|
v_1 \\
|
||||||
|
\vdots \\
|
||||||
|
v_n
|
||||||
|
\end{pmatrix} \\
|
||||||
|
& = u_1 v_1 + u_2 v_2 + \ldots + u_n v_n
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
We may use $\scalar{u, v}$ or $u \cdot v$ notations.
|
||||||
|
\end{definition}
|
||||||
|
\paragraph{Dot product properties}
|
||||||
|
\begin{description}
|
||||||
|
\item[Commutative] $\scalar{u, v} = \scalar{v, u}$
|
||||||
|
\item[Distributive] $\scalar{(u+v), w} = \scalar{u, w} + \scalar{v, w}$
|
||||||
|
\item $\scalar{u, v} = \norm{u} \times \norm{v} \times \cos(\widehat{u, v})$
|
||||||
|
\item $\scalar{a, a} = \norm{a}^2$
|
||||||
|
\end{description}
|
||||||
|
|
||||||
|
\begin{definition}[Norm]
|
||||||
|
Length of the vector.
|
||||||
|
\[
|
||||||
|
\norm{u} = \sqrt{\scalar{u, v}}
|
||||||
|
\]
|
||||||
|
|
||||||
|
$\norm{u, v} > 0$
|
||||||
|
\end{definition}
|
||||||
|
|
||||||
|
\begin{definition}[Distance]
|
||||||
|
\[
|
||||||
|
dist(u, v) = \norm{u-v}
|
||||||
|
\]
|
||||||
|
\end{definition}
|
||||||
|
|
||||||
|
\begin{definition}[Orthogonality]
|
||||||
|
|
||||||
|
\end{definition}
|
||||||
|
|
||||||
|
\begin{remark}
|
||||||
|
\[
|
||||||
|
(dist(u, v))^2 = \norm{u - v}^2,
|
||||||
|
\] and
|
||||||
|
\[
|
||||||
|
\scalar{v-u, v-u}
|
||||||
|
\]
|
||||||
|
\end{remark}
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\centering
|
||||||
|
\includestandalone{figures/schemes/vector_orthogonality}
|
||||||
|
\caption{Scalar product of two orthogonal vectors.}
|
||||||
|
\label{fig:scheme-orthogonal-scalar-product}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\begin{align*}
|
||||||
|
\scalar{v-u, v-u} & = \scalar{v, v} + \scalar{u, u} - 2 \scalar{u, v} \\
|
||||||
|
& = \norm{v}^2 + \norm{u}^2 \\
|
||||||
|
& = -2 \scalar{u, v}
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
\begin{align*}
|
||||||
|
\norm{u - v}^2 & = \norm{u}^2 + \norm{v}^2 - 2 \scalar{u,v} \\
|
||||||
|
\norm{u + v}^2 & = \norm{u}^2 + \norm{v}^2 + 2 \scalar{u,v}
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
\begin{proposition}[Scalar product of orthogonal vectors]
|
||||||
|
\[
|
||||||
|
u \perp v \Leftrightarrow \scalar{u, v} = 0
|
||||||
|
\]
|
||||||
|
\end{proposition}
|
||||||
|
|
||||||
|
\begin{proof}[Indeed]
|
||||||
|
$\norm{u-v}^2 = \norm{u+v}^2$, as illustrated in \autoref{fig:scheme-orthogonal-scalar-product}.
|
||||||
|
\begin{align*}
|
||||||
|
\Leftrightarrow & -2 \scalar{u, v} = 2 \scalar{u, v} \\
|
||||||
|
\Leftrightarrow & 4 \scalar{u, v} = 0 \\
|
||||||
|
\Leftrightarrow & \scalar{u, v} = 0
|
||||||
|
\end{align*}
|
||||||
|
\end{proof}
|
||||||
|
|
||||||
|
\begin{theorem}[Pythagorean theorem]
|
||||||
|
If $u \perp v$, then $\norm{u+v}^2 = \norm{u}^2 + \norm{v}^2$ .
|
||||||
|
\end{theorem}
|
||||||
|
|
||||||
|
\begin{definition}[Orthogonal Projection]
|
||||||
|
|
||||||
|
\end{definition}
|
||||||
|
Let $y = \begin{pmatrix}
|
||||||
|
y_1 \\
|
||||||
|
. \\
|
||||||
|
y_n
|
||||||
|
\end{pmatrix} \in \RR[n]$ and $w$ a subspace of $\RR[n]$.
|
||||||
|
$\mathcal{Y}$ can be written as the orthogonal projection of $y$ on $w$:
|
||||||
|
\[
|
||||||
|
\mathcal{Y} = proj^w(y) + z,
|
||||||
|
\]
|
||||||
|
where
|
||||||
|
\[
|
||||||
|
\begin{cases}
|
||||||
|
z \in w^\perp \\
|
||||||
|
proj^w(y) \in w
|
||||||
|
\end{cases}
|
||||||
|
\]
|
||||||
|
There is only one vector $\mathcal{Y}$ that ?
|
||||||
|
|
||||||
|
The scalar product between $z$ and (?) is zero.
|
||||||
|
|
||||||
|
\begin{property}
|
||||||
|
$proj^w(y)$ is the closest vector to $y$ that belongs to $w$.
|
||||||
|
\end{property}
|
||||||
|
|
||||||
|
\begin{definition}[Matrix]
|
||||||
|
A matrix is an application, that is, a function that transform a thing into another, it is a linear function.
|
||||||
|
\end{definition}
|
||||||
|
|
||||||
|
\begin{example}[Matrix application]
|
||||||
|
|
||||||
|
Let $A$ be a matrix:
|
||||||
|
\[
|
||||||
|
A = \begin{pmatrix}
|
||||||
|
a & b \\
|
||||||
|
c & d
|
||||||
|
\end{pmatrix}
|
||||||
|
\] and
|
||||||
|
\[
|
||||||
|
x = \begin{pmatrix}
|
||||||
|
x_1 \\
|
||||||
|
x_2
|
||||||
|
\end{pmatrix}
|
||||||
|
\]
|
||||||
|
Then,
|
||||||
|
\begin{align*}
|
||||||
|
Ax & = \begin{pmatrix}
|
||||||
|
a & b \\
|
||||||
|
c & d
|
||||||
|
\end{pmatrix}
|
||||||
|
\begin{pmatrix}
|
||||||
|
x_1 \\
|
||||||
|
x_2
|
||||||
|
\end{pmatrix} \\
|
||||||
|
& = \begin{pmatrix}
|
||||||
|
a x_1 + b x_2 \\
|
||||||
|
c x_1 + d x_2
|
||||||
|
\end{pmatrix}
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
Similarly,
|
||||||
|
\begin{align*}
|
||||||
|
\begin{pmatrix}
|
||||||
|
a & b & c & d \\
|
||||||
|
e & f & g & h \\
|
||||||
|
i & j & k & l
|
||||||
|
\end{pmatrix}
|
||||||
|
\begin{pmatrix}
|
||||||
|
x_1 \\
|
||||||
|
x_2 \\
|
||||||
|
x_3 \\
|
||||||
|
x_4
|
||||||
|
\end{pmatrix}
|
||||||
|
=
|
||||||
|
\begin{pmatrix}
|
||||||
|
\luadirect{
|
||||||
|
local matrix_product = require("scripts.matrix_product")
|
||||||
|
local m1 = {
|
||||||
|
{"a", "b", "c", "d"},
|
||||||
|
{"e", "f", "g", "h"},
|
||||||
|
{"i", "j", "k", "l"}
|
||||||
|
}
|
||||||
|
local m2 = {
|
||||||
|
{"x_1"},
|
||||||
|
{"x_2"},
|
||||||
|
{"x_3"},
|
||||||
|
{"x_4"}
|
||||||
|
}
|
||||||
|
local product_matrix = matrix_product.matrix_product_repr(m1,m2)
|
||||||
|
local matrix_dump = matrix_product.dump_matrix(product_matrix)
|
||||||
|
tex.print(matrix_dump)
|
||||||
|
}
|
||||||
|
\end{pmatrix}
|
||||||
|
\end{align*}
|
||||||
|
\end{example}
|
||||||
|
|
||||||
|
The number of columns has to be the same as the dimension of the vector to which the matrix is applied.
|
||||||
|
|
||||||
|
\begin{definition}[Tranpose of a Matrix]
|
||||||
|
Let $A = \begin{pmatrix}
|
||||||
|
a & b \\
|
||||||
|
c & d
|
||||||
|
\end{pmatrix}$, then $A^T = \begin{pmatrix}
|
||||||
|
a & c \\
|
||||||
|
b & d
|
||||||
|
\end{pmatrix}$
|
||||||
|
\end{definition}
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\centering
|
||||||
|
\includestandalone{figures/schemes/coordinates_systems}
|
||||||
|
\caption{Coordinate systems}
|
||||||
|
\end{figure}
|
|
@ -23,3 +23,13 @@
|
||||||
\end{example}
|
\end{example}
|
||||||
|
|
||||||
When the variable are quantitative, we use regression, whereas for qualitative variables, we use an analysis of variance.
|
When the variable are quantitative, we use regression, whereas for qualitative variables, we use an analysis of variance.
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\begin{subfigure}{0.45\columnwidth}
|
||||||
|
\includegraphics[width=\columnwidth]{figures/plots/linear_regression_linear.pdf}
|
||||||
|
\end{subfigure}
|
||||||
|
\begin{subfigure}{0.45\columnwidth}
|
||||||
|
\includegraphics[width=\columnwidth]{figures/plots/linear_regression_non_linear.pdf}
|
||||||
|
\end{subfigure}
|
||||||
|
\caption{Illustration of two models fitting observed values}
|
||||||
|
\end{figure}
|
|
@ -1,6 +1,10 @@
|
||||||
\DeclareMathOperator{\VVar}{\mathbb{V}} % variance
|
\DeclareMathOperator{\VVar}{\mathbb{V}} % variance
|
||||||
\DeclareMathOperator{\One}{\mathbf{1}}
|
\DeclareMathOperator{\One}{\mathbf{1}}
|
||||||
\DeclareMathOperator{\Cor}{\mathrm{Cor}}
|
\DeclareMathOperator{\Cor}{\mathrm{Cor}}
|
||||||
|
\DeclareMathOperator{\St}{\mathscr{St}}
|
||||||
\newcommand{\M}[1][]{\ensuremath{\ifstrempty{#1}{\mathcal{M}}{\mathbb{M}_{#1}}}}
|
\newcommand{\M}[1][]{\ensuremath{\ifstrempty{#1}{\mathcal{M}}{\mathbb{M}_{#1}}}}
|
||||||
\newcommand{\X}{\ensuremath{\mathbf{X}}}
|
\newcommand{\X}{\ensuremath{\mathbf{X}}}
|
||||||
\newcommand{\Y}{\ensuremath{\mathbf{Y}}}
|
\newcommand{\Y}{\ensuremath{\mathbf{Y}}}
|
||||||
|
\newcommand{\Z}{\ensuremath{\mathbf{Z}}}
|
||||||
|
\usepackage{unicode-math}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,26 @@
|
||||||
|
# Plot an affine model
|
||||||
|
n <- 250
|
||||||
|
sd <- 0.05
|
||||||
|
epsilon <- rnorm(n, mean = 0, sd = 2)
|
||||||
|
beta0 <- 1.25
|
||||||
|
beta1 <- 4
|
||||||
|
linear_model <- function(x) {
|
||||||
|
return(beta0 + beta1*x)
|
||||||
|
}
|
||||||
|
x <- runif(n, min=0, max=1)
|
||||||
|
y <- linear_model(x) + epsilon
|
||||||
|
|
||||||
|
pdf("figures/plots/linear_regression_linear.pdf")
|
||||||
|
plot(x, y, col="#5654fa", type="p", pch=20, xlab="x", ylab="y")
|
||||||
|
abline(a = beta0, b = beta1, col="red")
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
|
||||||
|
non_linear_model <- function(x) {
|
||||||
|
return(beta0 + beta1 * exp(2*x))
|
||||||
|
}
|
||||||
|
non_linear_y <- non_linear_model(x) + epsilon
|
||||||
|
pdf("figures/plots/linear_regression_non_linear.pdf")
|
||||||
|
plot(x, non_linear_y, col="#5654fa", type="p", pch=20, xlab="x", ylab="z")
|
||||||
|
curve(non_linear_model, from=0, to=1, add=T, col="red")
|
||||||
|
dev.off()
|
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,3 @@
|
||||||
|
covariance.pdf filter=lfs diff=lfs merge=lfs -text
|
||||||
|
../plots/linear_regression_linear.pdf filter=lfs diff=lfs merge=lfs -text
|
||||||
|
../plots/linear_regression_non_linear.pdf filter=lfs diff=lfs merge=lfs -text
|
Binary file not shown.
|
@ -0,0 +1,35 @@
|
||||||
|
% Scheme of Covariance
|
||||||
|
\documentclass[margin=0.5cm]{standalone}
|
||||||
|
\usepackage{tikz}
|
||||||
|
\usepackage{amssymb}
|
||||||
|
\begin{document}
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\usetikzlibrary{positioning}
|
||||||
|
\tikzset{
|
||||||
|
point/.style = {circle, inner sep={.75\pgflinewidth}, opacity=1, draw, black, fill=black},
|
||||||
|
point name/.style = {insert path={coordinate (#1)}},
|
||||||
|
}
|
||||||
|
\begin{scope}[yshift=0]
|
||||||
|
\draw (-4, 0.5) -- (4,0.5) node[right] {$Y_i$};
|
||||||
|
\draw (-4, -0.5) -- (4,-0.5) node[right] {$Y_j$};
|
||||||
|
\node at (6, 0) {$\mathrm{Cov}(Y_i, Y_j) > 0$};
|
||||||
|
\node (EYipoint) at (0,0.5) {$\times$};
|
||||||
|
\node at (0, 1) {$\mathbb{E}(Y_i)$};
|
||||||
|
\node (EYipoint) at (0,-0.5) {$\times$};
|
||||||
|
\node at (0, -1) {$\mathbb{E}(Y_j)$};
|
||||||
|
|
||||||
|
\foreach \x in {-3, 0.5, 2.75} {
|
||||||
|
\node[point] at (\x, 0.5) {};
|
||||||
|
}
|
||||||
|
\foreach \x in {-2, -1, 3} {
|
||||||
|
\node[point] at (\x, -0.5) {};
|
||||||
|
}
|
||||||
|
\end{scope}
|
||||||
|
\begin{scope}[yshift=-100]
|
||||||
|
\draw (-4,0.5) -- (4,0.5) node[right] {$Y_i$};
|
||||||
|
\draw (-4,-0.5) -- (4,-0.5) node[right] {$Y_j$};
|
||||||
|
\node at (6, 0) {$\mathrm{Cov}(Y_i, Y_j) \approx 0$};
|
||||||
|
\end{scope}
|
||||||
|
|
||||||
|
\end{tikzpicture}
|
||||||
|
\end{document}
|
Loading…
Reference in New Issue