diff --git a/content/chapters/part1/1.tex b/content/chapters/part1/1.tex index 8ac4a3f..a83f2c5 100644 --- a/content/chapters/part1/1.tex +++ b/content/chapters/part1/1.tex @@ -13,7 +13,7 @@ with $g$ being \subsection{Penalized Regression} -When the number of variables is large, e.g, when the number of explicative variable is above the number of observations, if $p >> n$ ($p$: the number of explicative variable, $n$ is the number of observations), we cannot estimate the parameters. +When the number of variables is large, e.g, when the number of explanatory variable is above the number of observations, if $p >> n$ ($p$: the number of explanatory variable, $n$ is the number of observations), we cannot estimate the parameters. In order to estimate the parameters, we can use penalties (additional terms). Lasso regression, Elastic Net, etc. @@ -21,8 +21,7 @@ Lasso regression, Elastic Net, etc. \subsection{Simple Linear Model} \begin{align*} - \Y &= \X & \beta & + & \varepsilon.\\ - n \times 1 & n \times 2 & 2 \times 1 & + & n \times 1 \\ + \Y &= \X \beta + \varepsilon \\ \begin{pmatrix} Y_1 \\ Y_2 \\ @@ -35,11 +34,11 @@ Lasso regression, Elastic Net, etc. \vdots & \vdots \\ 1 & X_n \end{pmatrix} - & \begin{pmatrix} + \begin{pmatrix} \beta_0 \\ \beta_1 \end{pmatrix} - & + & + + \begin{pmatrix} \varepsilon_1 \\ \varepsilon_2 \\ @@ -61,8 +60,35 @@ Lasso regression, Elastic Net, etc. \item Graphical representation; \item ... \end{enumerate} - - +\[ + Y = X \beta + \varepsilon, +\] +is noted equivalently as +\[ + \begin{pmatrix} + y_1 \\ + y_2 \\ + y_3 \\ + y_4 + \end{pmatrix} + = \begin{pmatrix} + 1 & x_{11} & x_{12} \\ + 1 & x_{21} & x_{22} \\ + 1 & x_{31} & x_{32} \\ + 1 & x_{41} & x_{42} + \end{pmatrix} + \begin{pmatrix} + \beta_0 \\ + \beta_1 \\ + \beta_2 + \end{pmatrix} + + \begin{pmatrix} + \varepsilon_1 \\ + \varepsilon_2 \\ + \varepsilon_3 \\ + \varepsilon_4 + \end{pmatrix}. +\] \section{Parameter Estimation} \subsection{Simple Linear Regression} @@ -87,26 +113,26 @@ We want to minimize the distance between $\X\beta$ and $\Y$: \Rightarrow& \forall i: \\ & \X_i \Y = \X_i X\hat{\beta} \qquad \text{where $\hat{\beta}$ is the estimator of $\beta$} \\ \Rightarrow& \X^\T \Y = \X^\T \X \hat{\beta} \\ - \Rightarrow& {\color{red}(\X^T \X)^{-1}} \X^\T \Y = {\color{red}(\X^T \X)^{-1}} (\X^T\X) \hat{\beta} \\ + \Rightarrow& {\color{gray}(\X^\T \X)^{-1}} \X^\T \Y = {\color{gray}(\X^\T \X)^{-1}} (\X^\T\X) \hat{\beta} \\ \Rightarrow& \hat{\beta} = (X^\T\X)^{-1} \X^\T \Y \end{align*} - -This formula comes from the orthogonal projection of $\Y$ on the subspace define by the explicative variables $\X$ - - - +This formula comes from the orthogonal projection of $\Y$ on the subspace define by the explanatory variables $\X$ $\X \hat{\beta}$ is the closest point to $\Y$ in the subspace generated by $\X$. - - If $H$ is the projection matrix of the subspace generated by $\X$, $X\Y$ is the projection on $\Y$ on this subspace, that corresponds to $\X\hat{\beta}$. - -\section{Coefficient of Determination: $R^2$} +\section{Coefficient of Determination: \texorpdfstring{$R^2$}{R\textsuperscript{2}}} \begin{definition}[$R^2$] \[ 0 \leq R^2 = \frac{\norm{\X\hat{\beta} - \bar{\Y}\One}^2}{\norm{\Y - \bar{\Y}\One}^2} = 1 - \frac{\norm{\Y - \X\hat{\beta}}^2}{\norm{\Y - \bar{\Y}\One}^2} \leq 1 - \] proportion of variation of $\Y$ explicated by the model. + \] proportion of variation of $\Y$ explained by the model. \end{definition} + +\begin{figure} + \centering + \includestandalone{figures/schemes/orthogonal_projection} + \caption{Orthogonal projection of $\Y$ on plan generated by the base described by $\X$. $\color{blue}a$ corresponds to $\norm{\X\hat{\beta} - \bar{\Y}}^2$ and $\color{blue}b$ corresponds to $\norm{\Y - \hat{\beta}\X}^2$} + \label{fig:scheme-orthogonal-projection} +\end{figure} \ No newline at end of file diff --git a/content/chapters/part1/2.tex b/content/chapters/part1/2.tex index eb391fe..f614b6a 100644 --- a/content/chapters/part1/2.tex +++ b/content/chapters/part1/2.tex @@ -15,15 +15,28 @@ Let $u = \begin{pmatrix} v_n \end{pmatrix}$ -\begin{align*} - \langle u, v\rangle & = \left(u_1, \ldots, u_v\right) \begin{pmatrix} - v_1 \\ - \vdots \\ - v_n - \end{pmatrix} \\ - & = u_1 v_1 + u_2 v_2 + \ldots + u_n v_n -\end{align*} +\begin{definition}[Scalar Product (Dot Product)] + \begin{align*} + \scalar{u, v} & = \begin{pmatrix} + u_1, \ldots, u_v + \end{pmatrix} + \begin{pmatrix} + v_1 \\ + \vdots \\ + v_n + \end{pmatrix} \\ + & = u_1 v_1 + u_2 v_2 + \ldots + u_n v_n + \end{align*} + We may use $\scalar{u, v}$ or $u \cdot v$ notations. +\end{definition} +\paragraph{Dot product properties} +\begin{itemize} + \item $\scalar{u, v} = \scalar{v, u}$ + \item $\scalar{(u+v), w} = \scalar{u, w} + \scalar{v, w}$ + \item $\scalar{u, v}$ + \item $\scalar{\vec{u}, \vec{v}} = \norm{\vec{u}} \times \norm{\vec{v}} \times \cos(\widehat{\vec{u}, \vec{v}})$ +\end{itemize} \begin{definition}[Norm] Length of the vector. @@ -41,9 +54,7 @@ Let $u = \begin{pmatrix} \end{definition} \begin{definition}[Orthogonality] - \[ - u \perp v \Leftrightarrow \scalar{u, v} = 0 - \] + \end{definition} \begin{remark} @@ -55,18 +66,17 @@ Let $u = \begin{pmatrix} \] \end{remark} -Scalar product properties: -\begin{itemize} - \item $\scalar{u, v} = \scalar{v, u}$ - \item $\scalar{(u+v), w} = \scalar{u, w} + \scalar{v, w}$ - \item $\scalar{u, v}$ - \item $\scalar{\vec{u}, \vec{v}} = \norm{\vec{u}} \times \norm{\vec{v}} \times \cos(\widehat{\vec{u}, \vec{v}})$ -\end{itemize} +\begin{figure} + \centering + \includestandalone{figures/schemes/vector_orthogonality} + \caption{Illustration for the scalar product of two orthogonal vectors.} + \label{fig:scheme-orthogonal-scalar-product} +\end{figure} \begin{align*} \scalar{v-u, v-u} & = \scalar{v, v} + \scalar{u, u} - 2 \scalar{u, v} \\ - & = \norm{v}^2 + \norm{u}^2 \\ - & = -2 \scalar{u, v} + & = \norm{v}^2 + \norm{u}^2 \\ + & = -2 \scalar{u, v} \end{align*} \begin{align*} @@ -74,10 +84,14 @@ Scalar product properties: \norm{u + v}^2 & = \norm{u}^2 + \norm{v}^2 + 2 \scalar{u,v} \end{align*} +\begin{proposition}[Scalar product of orthogonal vectors] +\[ + u \perp v \Leftrightarrow \scalar{u, v} = 0 +\] +\end{proposition} -If $u \perp v$, then $\scalar{u, v} = 0$ \begin{proof}[Indeed] - $\norm{u-v}^2 = \norm{u+v}^2$, + $\norm{u-v}^2 = \norm{u+v}^2$, as illustrated in \autoref{fig:scheme-orthogonal-scalar-product}. \begin{align*} \Leftrightarrow & -2 \scalar{u, v} = 2 \scalar{u, v} \\ \Leftrightarrow & 4 \scalar{u, v} = 0 \\ @@ -138,17 +152,17 @@ The scalar product between $z$ and (?) is zero. Then, \begin{align*} Ax & = \begin{pmatrix} - a & b \\ - c & d - \end{pmatrix} + a & b \\ + c & d + \end{pmatrix} \begin{pmatrix} x_1 \\ x_2 \end{pmatrix} \\ - & = \begin{pmatrix} - a x_1 + b_x2 \\ - c x_1 + d x_2 - \end{pmatrix} + & = \begin{pmatrix} + a x_1 + b x_2 \\ + c x_1 + d x_2 + \end{pmatrix} \end{align*} Similarly, @@ -164,9 +178,9 @@ The scalar product between $z$ and (?) is zero. x_3 \\ x_4 \end{pmatrix} - & = \begin{pmatrix} - a x_1 + b x_2 + c x_3 \ldots - \end{pmatrix} + & = \begin{pmatrix} + a x_1 + b x_2 + c x_3 \ldots + \end{pmatrix} \end{align*} \end{example} @@ -182,31 +196,8 @@ The number of columns has to be the same as the dimension of the vector to which \end{pmatrix}$ \end{definition} -\begin{example} - \begin{align*} - Y & = X \beta + \varepsilon \\ - \begin{pmatrix} - y_1 \\ - y_2 \\ - y_3 \\ - y_4 - \end{pmatrix} - & = \begin{pmatrix} - 1 & x_{11} & x_{12} \\ - 1 & x_{21} & x_{22} \\ - 1 & x_{31} & x_{32} \\ - 1 & x_{41} & x_{42} - \end{pmatrix} - \begin{pmatrix} - \beta_0 \\ - \beta_1 \\ - \beta_2 - \end{pmatrix} + - \begin{pmatrix} - \varepsilon_1 \\ - \varepsilon_2 \\ - \varepsilon_3 \\ - \varepsilon_4 - \end{pmatrix} - \end{align*} -\end{example} \ No newline at end of file +\begin{figure} + \centering + \includestandalone{figures/schemes/coordinates_systems} + \caption{Coordinate systems} +\end{figure} diff --git a/figures/schemes/base_plan.pdf b/figures/schemes/base_plan.pdf new file mode 100644 index 0000000..04d165f Binary files /dev/null and b/figures/schemes/base_plan.pdf differ diff --git a/figures/schemes/base_plan.tex b/figures/schemes/base_plan.tex new file mode 100644 index 0000000..52dacda --- /dev/null +++ b/figures/schemes/base_plan.tex @@ -0,0 +1,16 @@ +\documentclass[margin=0.5cm]{standalone} +\usepackage{tikz} +\usepackage{tkz-euclide} + +\begin{document} +\usetikzlibrary{3d} +\begin{tikzpicture} + \tkzDefPoint(-2,-2){A} + \tkzDefPoint(10:3){B} + \tkzDefShiftPointCoord[B](1:5){C} + \tkzDefShiftPointCoord[A](1:5){D} + \tkzDrawPolygon(A,...,D) + \tkzDrawPoints(A,...,D) + \node at (A) {A}; +\end{tikzpicture} +\end{document} \ No newline at end of file diff --git a/figures/schemes/coordinates_systems.pdf b/figures/schemes/coordinates_systems.pdf index 953e6c4..c015e8a 100644 Binary files a/figures/schemes/coordinates_systems.pdf and b/figures/schemes/coordinates_systems.pdf differ diff --git a/figures/schemes/coordinates_systems.tex b/figures/schemes/coordinates_systems.tex index 2671264..4b59680 100644 --- a/figures/schemes/coordinates_systems.tex +++ b/figures/schemes/coordinates_systems.tex @@ -1,12 +1,23 @@ \documentclass[tikz]{standalone} \usepackage{tikz} -\usepackage{tkz-euclide} \begin{document} -\begin{tikzpicture} - \tkzInit[xmax=5,ymax=5,xmin=-5,ymin=-5] - \tkzGrid - \tkzAxeXY - \draw[thick, latex-latex] (-1,4) -- (4,-6) node[anchor=south west] {$a$}; +\usetikzlibrary{3d} +% 1D axis +\begin{tikzpicture}[->] + \begin{scope}[xshift=0] + \draw (0, 0, 0) -- (xyz cylindrical cs:radius=1) node[right] {$x$}; + \end{scope} +% 2D coordinate system + \begin{scope}[xshift=50] + \draw (0, 0, 0) -- (xyz cylindrical cs:radius=1) node[right] {$x$}; + \draw (0, 0, 0) -- (xyz cylindrical cs:radius=1,angle=90) node[above] {$y$}; + \end{scope} +% 3D coordinate systems +\begin{scope}[xshift=100] + \draw (0, 0, 0) -- (xyz cylindrical cs:radius=1) node[right] {$x$}; + \draw (0, 0, 0) -- (xyz cylindrical cs:radius=1,angle=90) node[above] {$y$}; + \draw (0, 0, 0) -- (xyz cylindrical cs:z=1) node[below left] {$z$}; +\end{scope} \end{tikzpicture} \end{document} \ No newline at end of file diff --git a/figures/schemes/orthogonal_projection.pdf b/figures/schemes/orthogonal_projection.pdf new file mode 100644 index 0000000..1259746 Binary files /dev/null and b/figures/schemes/orthogonal_projection.pdf differ diff --git a/figures/schemes/orthogonal_projection.tex b/figures/schemes/orthogonal_projection.tex new file mode 100644 index 0000000..690cc07 --- /dev/null +++ b/figures/schemes/orthogonal_projection.tex @@ -0,0 +1,32 @@ +% ref. https://tex.stackexchange.com/a/523362/235607 +\documentclass[tikz]{standalone} +\usepackage{tikz-3dplot} +\usepackage{tkz-euclide} +\usepackage{mathtools} +\begin{document} +\tdplotsetmaincoords{50}{0} +\begin{tikzpicture}[tdplot_main_coords,bullet/.style={circle,inner + sep=1pt,fill=black,fill opacity=1}] + \begin{scope}[canvas is xy plane at z=0] + \tkzDefPoints{-2/-1/A,3/-1/B,4/2/C} + \tkzDefParallelogram(A,B,C) + \tkzGetPoint{D} + \tkzDrawPolygon[fill=gray!25!white](A,B,C,D) + \draw[decorate,decoration={brace, + amplitude=8pt},xshift=0pt,very thin,gray] (2,0) -- ++(-1,-0.5) node [black,midway,xshift=0.5em,yshift=-1em] {\color{blue}$a$}; + \end{scope} + \begin{scope}[canvas is xz plane at y=0] + \draw[thick,fill=white,fill opacity=0.7,nodes={opacity=1}] + (2,0) node[bullet,label=below right:{$\mathbf{X}$}] {} + -- (0,0) node[bullet] {} + -- (0,3) node[bullet,label=above:{$\mathbf{Y}$}] {} -- cycle; + \draw (0.25,0) -- (0.25,0.25) -- (0,0.25); + \draw[decorate,decoration={brace, + amplitude=8pt},xshift=0pt,very thin,gray] (0,0) -- (0,3) node [black,midway,xshift=-1.25em,yshift=0em] {\color{blue}$b$}; + \end{scope} + \begin{scope}[canvas is xy plane at z=0] + \draw[->] (2,0) -- ++(-0.75,0.75) node [left] {$\mathbf{1}$}; + \draw[->] (2,0) -- ++(-1,-0.5); + \end{scope} +\end{tikzpicture} +\end{document} \ No newline at end of file diff --git a/figures/schemes/vector_orthogonality.pdf b/figures/schemes/vector_orthogonality.pdf new file mode 100644 index 0000000..5ff62b4 Binary files /dev/null and b/figures/schemes/vector_orthogonality.pdf differ diff --git a/figures/schemes/vector_orthogonality.tex b/figures/schemes/vector_orthogonality.tex new file mode 100644 index 0000000..bd96481 --- /dev/null +++ b/figures/schemes/vector_orthogonality.tex @@ -0,0 +1,27 @@ +\documentclass[margin=0.5cm]{standalone} +\usepackage{tikz} +\usepackage{tkz-euclide} +\usepackage{mathtools} + +\begin{document} +\begin{tikzpicture} + \coordinate (A) at (0.5, 1) {}; + \coordinate (B) at (-0.5, -1) {}; + \coordinate (C) at (1.25, -0.70) {}; + \coordinate (0) at (0, 0) {}; + + % left angle + \tkzMarkRightAngle[draw=black,size=0.1](A,0,C); + \draw[lightgray,very thin] (A) -- (C); + % Curly brace annotation for ||u-v|| + \draw[decorate,decoration={brace, + amplitude=10pt},xshift=0pt,yshift=4pt,very thin] (A) -- (C) node [black,midway,xshift=27pt,yshift=0.5em] {$\lVert u-v \rVert$}; + \draw[lightgray,very thin] (B) -- (C); + + % axis lines + \draw[->] (0) -- (A) node[above] {$u$}; + \draw[->] (0) -- (B) node[below] {$-u$}; + \draw[->] (0) -- (C) node[right] {$v$}; + +\end{tikzpicture} +\end{document} \ No newline at end of file diff --git a/main.pdf b/main.pdf index 2a1a123..a0ebaac 100644 Binary files a/main.pdf and b/main.pdf differ diff --git a/preamble.tex b/preamble.tex index f2c2d8f..fbe8eca 100644 --- a/preamble.tex +++ b/preamble.tex @@ -1,3 +1,6 @@ \usepackage{pgffor} \usetikzlibrary{math} -\usepackage{standalone} \ No newline at end of file +\usepackage{standalone} +\usepackage{tikz-3dplot} +\usepackage{tkz-euclide} +\usepackage{mathtools} \ No newline at end of file