multivariate-statistics/content/chapters/part1/2.tex

\chapter{Generalized Linear Model}

\begin{example}

  \begin{description}
    \item[Ex. 1 - Credit Carb Default]
      Let $Y_i$ be a boolean random variable following a Bernoulli distribution.
    \item[Ex. 2 - Horseshoe Crabs]
      Let $Y_i$, be the number of satellites males.

      $Y_i$ can be described as following a Poisson distribution.
  \end{description}
\end{example}

\begin{remark}
  A Poisson distribution can be viewed as an approximation of binomial distribution when $n$ is high and $p$ low.
\end{remark}


We will consider the following relation:
\[
  \EE(Y_i) = g^{-1} X_i \beta,
\]
equivalently:
\[
  g(\EE(Y_i)) = X_i \beta.
\]

\begin{itemize}
  \item $\beta$ is estimated by the maximum likelihood;
  \item $g$ is called the link function.
\end{itemize}

\begin{remark}
  In standard linear model, the OLS estimator is the estimator of maximum of likelihood.
\end{remark}

\section{Logistic Regression}

\begin{align*}
                  & \log(\frac{\Pi}{1 - \Pi})                   & = \X \beta \\
  \Leftrightarrow & e^{\ln \frac{\Pi}{1 - \Pi}} =  e^{\X \beta}              \\
  \Leftrightarrow & \frac{\Pi}{1 - \Pi} = e^{\X \beta}                       \\
  \Leftrightarrow & \Pi = (1 - \Pi) e^{\X\beta}                              \\
  \Leftrightarrow & \Pi = e^{\X \beta} - \Pi e^{\X\beta}                     \\
  \Leftrightarrow & \Pi + \Pi e^{\X\beta} = e^{\X \beta}                     \\
  \Leftrightarrow & \Pi (1 - e^{\X\beta}) = e^{\X \beta}                     \\
  \Leftrightarrow & \Pi = \frac{e^{\X\beta}}{1 + e^{\X \beta}}
\end{align*}


\section{Maximum Likelihood estimator}

log-likelihood: the probability to observe what we observe.

Estimate $\beta$ by $\hat{\beta}$ such that $\forall \beta \in \RR[p+1]$:
\[
  L_n (\hat{\beta}) \geq L_n (\beta)
\]

These estimators are consistent, but not necessarily unbiased.


\section{Test for each single coordinate}


\begin{example}[Payment Default]
  Let $Y_i$ be the default value for individual $i$.

  \[
    \log (\frac{\Pi (X)}{1 - \Pi (X)}) = \beta_0 + \beta_1 \text{student} + \beta_2 \text{balance} + \beta_3 \text{income}
  \]

  In this example, only $\beta_0$ and $\beta_2$ are significantly different from 0.
\end{example}

\begin{remark}
  We do not add $\varepsilon_i$, because $\log(\frac{\Pi (X)}{1 - \Pi (X)})$ corresponds to the expectation.
\end{remark}

\subsection{Comparison of nested models}

To test $H_0:\: \beta_0 = \ldots = \beta_p = 0$, we use the likelihood ratio test:
\[
  T_n = -2 \log (\mathcal{L}^{\texttt{null}}) + 2 \log (\mathcal{L}(\hat{\beta})) \underset{H_0}{\overunderset{\mathcal{L}}{n \to \infty}{\longrightarrow}} \chi^2(p).
\]

\begin{remark}[Family of Tests]
  \begin{itemize}
    \item Comparison of estimated values and values under the null hypothesis;
    \item Likelihood ratio test;
    \item Based on the slope on the derivative.
  \end{itemize}
\end{remark}

\section{Relative risk}

$RR_i$ is the probably to have the disease, conditional to the predictor $X_{i1}$ over the probability of having the disease, conditional to the predictor $X_{i2}$.

\[
  RR(j) = \frac{\Prob(Y_{i_1} = 1 \: | \: X_{i_1})}{\Prob(Y_{i_2} = 1) \: | \: X_{i_2}} = \frac{\EE(Y_{i_1})}{\EE(Y_{i_2})}.
\]

$\pi(X_i)$ is the probability of having the disease, according to $X_i$.

The relative risk can be written as\dots

\section{Odds}

Quantity providing a measure of the likelihood of a particular outcome:
\[
  odd = \frac{\pi(X_i)}{1 - \pi(X_i)}
\]

\[
  odds = \exp(X_i \beta)
\]
odds is the ratio of people having the disease, if Y represent the disease, over the people not having the disease.

\section{Odds Ratio}

\begin{align*}
  OR(j) =\frac{odds(X_{i_1})}{odds(X_{i_2})} & =  \frac{\frac{\pi{X_{i_1}}}{1 - \pi(X_{i_1})}}{\frac{\pi{X_{i_2}}}{1 - \pi(X_{i_2})}}
\end{align*}

The OR can be written as:
\[
  OR(j) = \exp(\beta_j)
\]

\begin{exercise}
  Show that $OR(j) = \exp(\beta_j)$.
\end{exercise}

\begin{align*}
  OR(j) & = \frac{odds(X_{i_1})}{odds(X_{i_2})}             \\
        & = \frac{\exp(X_{i_1} \beta)}{\exp(X_{i_2} \beta)} \\
\end{align*}

  \[
    \log \left(
    \frac{\Prob(Y=1 \: |\: X_{i_1})}{1 - \Prob(Y=1 \: |\: X_{i_1})}\right)
    = \beta_0 + \beta_1 X_1^{(1)} + \beta_2 X_2^{(1)} + \ldots + \beta_p X_p^{(1)}
\]
    Similarly
\[
    \log \left(
      \frac{\Prob(Y=1 \: |\: X_{i_2})}{1 - \Prob(Y=1 \: |\: X_{i_2})}\right)
    = \beta_0 + \beta_1 X_1^{(2)} + \beta_2 X_2^{(2)} + \ldots + \beta_p X_p^{(2)}
\]
  We substract both equations:

    \begin{align*}
      &\log \left(
        \frac{\Prob(Y=1 \: |\: X_{i_1})}{1 - \Prob(Y=1 \: |\: X_{i_1})} \right) - \log \left(\frac{\Prob(Y=1 \: |\: X_{i_2})}{1 - \Prob(Y=1 \: |\: X_{i_2})}\right) \\
      & = \beta_0 + \beta_1 X_1^{(1)} + \beta_2 X_2^{(1)} + \ldots + \beta_p X_p^{(1)} - \beta_0 + \beta_1 X_1^{(2)} + \beta_2 X_2^{(2)} + \ldots + \beta_p X_p^{(2)} \\
      & = \log OR(j)  \\
      & = \cancel{(\beta_0 - \beta_0)} + \beta_1 \cancel{(X_1^{(1)} - X_1^{(2)})} + \beta_2 \cancel{(X_2^{(1)} - X_2^{(2)})} + \ldots + \beta_j \cancelto{1}{(X_j^{(1)} - X_j^{(2)})} + \ldots + \beta_p \cancel{(X_p^{(1)} - X_p^{(2)})} \\
      &\Leftrightarrow \log (OR_j) = \beta_j \\
      &\Leftrightarrow OR(j) = \exp(\beta_j)
    \end{align*}

OR is not equal to RR, except in the particular case of probability (?)

If OR is significantly different from 1, the $\exp(\beta_j)$ is significantly different from 1, thus $\beta_j$ is significantly different from 0.

If we have more than two classes, we do not know what means $X_{i_1} - X_{i_2} = 0$. We will have to take a reference class, and compare successively each class with the reference class.

$\hat{\pi}(X_{+}) = \hat{\Prob(X=1 \: | X_{i1})}$ for a new individual.


\section{Poisson model}

Let $Y_{i} \sim \mathcal{P}(\lambda_{i})$, corresponding to a counting.

\begin{align*}
	\EE(Y_{i}) & = g^{-1}(X_{i} \beta) \\
	\Leftrightarrow g(\EE(Y_{i})) = X_{i} \beta
\end{align*}

where $g(x) = \ln(x)$, and $g^{-1}(x) = e^{x}$.

\[
	\lambda_{i} = \EE(Y_{i}) = \Var(Y_{i})
\]
Moved part on linear algebra Add stuff on model validity 2023-10-13 13:19:12 +02:00			`\chapter{Generalized Linear Model}`
cm1: Base introduction and some elements of linear algebra 2023-09-22 17:32:56 +02:00
feat: Add some stuff on generalized linear models 2023-11-10 13:39:13 +01:00			`\begin{example}`

			`\begin{description}`
			`\item[Ex. 1 - Credit Carb Default]`
			`Let $Y_i$ be a boolean random variable following a Bernoulli distribution.`
			`\item[Ex. 2 - Horseshoe Crabs]`
			`Let $Y_i$, be the number of satellites males.`

			`$Y_i$ can be described as following a Poisson distribution.`
			`\end{description}`
			`\end{example}`

			`\begin{remark}`
			`A Poisson distribution can be viewed as an approximation of binomial distribution when $n$ is high and $p$ low.`
			`\end{remark}`


			`We will consider the following relation:`
			`\[`
			`\EE(Y_i) = g^{-1} X_i \beta,`
			`\]`
			`equivalently:`
			`\[`
			`g(\EE(Y_i)) = X_i \beta.`
			`\]`

			`\begin{itemize}`
			`\item $\beta$ is estimated by the maximum likelihood;`
			`\item $g$ is called the link function.`
			`\end{itemize}`

			`\begin{remark}`
			`In standard linear model, the OLS estimator is the estimator of maximum of likelihood.`
			`\end{remark}`

Moved part on linear algebra Add stuff on model validity 2023-10-13 13:19:12 +02:00			`\section{Logistic Regression}`
cm1: Base introduction and some elements of linear algebra 2023-09-22 17:32:56 +02:00
feat: Add some stuff on generalized linear models 2023-11-10 13:39:13 +01:00			`\begin{align*}`
			`& \log(\frac{\Pi}{1 - \Pi}) & = \X \beta \\`
			`\Leftrightarrow & e^{\ln \frac{\Pi}{1 - \Pi}} = e^{\X \beta} \\`
			`\Leftrightarrow & \frac{\Pi}{1 - \Pi} = e^{\X \beta} \\`
			`\Leftrightarrow & \Pi = (1 - \Pi) e^{\X\beta} \\`
			`\Leftrightarrow & \Pi = e^{\X \beta} - \Pi e^{\X\beta} \\`
			`\Leftrightarrow & \Pi + \Pi e^{\X\beta} = e^{\X \beta} \\`
			`\Leftrightarrow & \Pi (1 - e^{\X\beta}) = e^{\X \beta} \\`
			`\Leftrightarrow & \Pi = \frac{e^{\X\beta}}{1 + e^{\X \beta}}`
			`\end{align*}`


			`\section{Maximum Likelihood estimator}`

			`log-likelihood: the probability to observe what we observe.`

			`Estimate $\beta$ by $\hat{\beta}$ such that $\forall \beta \in \RR[p+1]$:`
			`\[`
			`L_n (\hat{\beta}) \geq L_n (\beta)`
			`\]`

			`These estimators are consistent, but not necessarily unbiased.`


			`\section{Test for each single coordinate}`



			`\begin{example}[Payment Default]`
			`Let $Y_i$ be the default value for individual $i$.`

			`\[`
			`\log (\frac{\Pi (X)}{1 - \Pi (X)}) = \beta_0 + \beta_1 \text{student} + \beta_2 \text{balance} + \beta_3 \text{income}`
			`\]`

			`In this example, only $\beta_0$ and $\beta_2$ are significantly different from 0.`
			`\end{example}`

			`\begin{remark}`
			`We do not add $\varepsilon_i$, because $\log(\frac{\Pi (X)}{1 - \Pi (X)})$ corresponds to the expectation.`
			`\end{remark}`

			`\subsection{Comparison of nested models}`

			`To test $H_0:\: \beta_0 = \ldots = \beta_p = 0$, we use the likelihood ratio test:`
			`\[`
			`T_n = -2 \log (\mathcal{L}^{\texttt{null}}) + 2 \log (\mathcal{L}(\hat{\beta})) \underset{H_0}{\overunderset{\mathcal{L}}{n \to \infty}{\longrightarrow}} \chi^2(p).`
			`\]`

			`\begin{remark}[Family of Tests]`
			`\begin{itemize}`
			`\item Comparison of estimated values and values under the null hypothesis;`
			`\item Likelihood ratio test;`
			`\item Based on the slope on the derivative.`
			`\end{itemize}`
			`\end{remark}`

			`\section{Relative risk}`

			`$RR_i$ is the probably to have the disease, conditional to the predictor $X_{i1}$ over the probability of having the disease, conditional to the predictor $X_{i2}$.`

			`\[`
			`RR(j) = \frac{\Prob(Y_{i_1} = 1 \: \| \: X_{i_1})}{\Prob(Y_{i_2} = 1) \: \| \: X_{i_2}} = \frac{\EE(Y_{i_1})}{\EE(Y_{i_2})}.`
			`\]`

			`$\pi(X_i)$ is the probability of having the disease, according to $X_i$.`

			`The relative risk can be written as\dots`

			`\section{Odds}`

			`Quantity providing a measure of the likelihood of a particular outcome:`
			`\[`
			`odd = \frac{\pi(X_i)}{1 - \pi(X_i)}`
			`\]`

			`\[`
			`odds = \exp(X_i \beta)`
			`\]`
			`odds is the ratio of people having the disease, if Y represent the disease, over the people not having the disease.`

			`\section{Odds Ratio}`

			`\begin{align*}`
			`OR(j) =\frac{odds(X_{i_1})}{odds(X_{i_2})} & = \frac{\frac{\pi{X_{i_1}}}{1 - \pi(X_{i_1})}}{\frac{\pi{X_{i_2}}}{1 - \pi(X_{i_2})}}`
			`\end{align*}`

			`The OR can be written as:`
			`\[`
			`OR(j) = \exp(\beta_j)`
			`\]`

			`\begin{exercise}`
			`Show that $OR(j) = \exp(\beta_j)$.`
			`\end{exercise}`

			`\begin{align*}`
			`OR(j) & = \frac{odds(X_{i_1})}{odds(X_{i_2})} \\`
			`& = \frac{\exp(X_{i_1} \beta)}{\exp(X_{i_2} \beta)} \\`
			`\end{align*}`

			`\[`
			`\log \left(`
			`\frac{\Prob(Y=1 \: \|\: X_{i_1})}{1 - \Prob(Y=1 \: \|\: X_{i_1})}\right)`
			`= \beta_0 + \beta_1 X_1^{(1)} + \beta_2 X_2^{(1)} + \ldots + \beta_p X_p^{(1)}`
			`\]`
			`Similarly`
			`\[`
			`\log \left(`
			`\frac{\Prob(Y=1 \: \|\: X_{i_2})}{1 - \Prob(Y=1 \: \|\: X_{i_2})}\right)`
			`= \beta_0 + \beta_1 X_1^{(2)} + \beta_2 X_2^{(2)} + \ldots + \beta_p X_p^{(2)}`
			`\]`
			`We substract both equations:`

			`\begin{align*}`
			`&\log \left(`
			`\frac{\Prob(Y=1 \: \|\: X_{i_1})}{1 - \Prob(Y=1 \: \|\: X_{i_1})} \right) - \log \left(\frac{\Prob(Y=1 \: \|\: X_{i_2})}{1 - \Prob(Y=1 \: \|\: X_{i_2})}\right) \\`
			`& = \beta_0 + \beta_1 X_1^{(1)} + \beta_2 X_2^{(1)} + \ldots + \beta_p X_p^{(1)} - \beta_0 + \beta_1 X_1^{(2)} + \beta_2 X_2^{(2)} + \ldots + \beta_p X_p^{(2)} \\`
			`& = \log OR(j) \\`
			`& = \cancel{(\beta_0 - \beta_0)} + \beta_1 \cancel{(X_1^{(1)} - X_1^{(2)})} + \beta_2 \cancel{(X_2^{(1)} - X_2^{(2)})} + \ldots + \beta_j \cancelto{1}{(X_j^{(1)} - X_j^{(2)})} + \ldots + \beta_p \cancel{(X_p^{(1)} - X_p^{(2)})} \\`
			`&\Leftrightarrow \log (OR_j) = \beta_j \\`
			`&\Leftrightarrow OR(j) = \exp(\beta_j)`
			`\end{align*}`

			`OR is not equal to RR, except in the particular case of probability (?)`

			`If OR is significantly different from 1, the $\exp(\beta_j)$ is significantly different from 1, thus $\beta_j$ is significantly different from 0.`

			`If we have more than two classes, we do not know what means $X_{i_1} - X_{i_2} = 0$. We will have to take a reference class, and compare successively each class with the reference class.`

			`$\hat{\pi}(X_{+}) = \hat{\Prob(X=1 \: \| X_{i1})}$ for a new individual.`


			`\section{Poisson model}`

			`Let $Y_{i} \sim \mathcal{P}(\lambda_{i})$, corresponding to a counting.`

			`\begin{align*}`
			`\EE(Y_{i}) & = g^{-1}(X_{i} \beta) \\`
			`\Leftrightarrow g(\EE(Y_{i})) = X_{i} \beta`
			`\end{align*}`

			`where $g(x) = \ln(x)$, and $g^{-1}(x) = e^{x}$.`

			`\[`
			`\lambda_{i} = \EE(Y_{i}) = \Var(Y_{i})`
			`\]`