187 lines
5.9 KiB
TeX
187 lines
5.9 KiB
TeX
|
\chapter{Generalized Linear Model}
|
||
|
|
||
|
\begin{example}
|
||
|
|
||
|
\begin{description}
|
||
|
\item[Ex. 1 - Credit Carb Default]
|
||
|
Let $Y_i$ be a boolean random variable following a Bernoulli distribution.
|
||
|
\item[Ex. 2 - Horseshoe Crabs]
|
||
|
Let $Y_i$, be the number of satellites males.
|
||
|
|
||
|
$Y_i$ can be described as following a Poisson distribution.
|
||
|
\end{description}
|
||
|
\end{example}
|
||
|
|
||
|
\begin{remark}
|
||
|
A Poisson distribution can be viewed as an approximation of binomial distribution when $n$ is high and $p$ low.
|
||
|
\end{remark}
|
||
|
|
||
|
|
||
|
We will consider the following relation:
|
||
|
\[
|
||
|
\EE(Y_i) = g^{-1} X_i \beta,
|
||
|
\]
|
||
|
equivalently:
|
||
|
\[
|
||
|
g(\EE(Y_i)) = X_i \beta.
|
||
|
\]
|
||
|
|
||
|
\begin{itemize}
|
||
|
\item $\beta$ is estimated by the maximum likelihood;
|
||
|
\item $g$ is called the link function.
|
||
|
\end{itemize}
|
||
|
|
||
|
\begin{remark}
|
||
|
In standard linear model, the OLS estimator is the estimator of maximum of likelihood.
|
||
|
\end{remark}
|
||
|
|
||
|
\section{Logistic Regression}
|
||
|
|
||
|
\begin{align*}
|
||
|
& \log(\frac{\Pi}{1 - \Pi}) & = \X \beta \\
|
||
|
\Leftrightarrow & e^{\ln \frac{\Pi}{1 - \Pi}} = e^{\X \beta} \\
|
||
|
\Leftrightarrow & \frac{\Pi}{1 - \Pi} = e^{\X \beta} \\
|
||
|
\Leftrightarrow & \Pi = (1 - \Pi) e^{\X\beta} \\
|
||
|
\Leftrightarrow & \Pi = e^{\X \beta} - \Pi e^{\X\beta} \\
|
||
|
\Leftrightarrow & \Pi + \Pi e^{\X\beta} = e^{\X \beta} \\
|
||
|
\Leftrightarrow & \Pi (1 - e^{\X\beta}) = e^{\X \beta} \\
|
||
|
\Leftrightarrow & \Pi = \frac{e^{\X\beta}}{1 + e^{\X \beta}}
|
||
|
\end{align*}
|
||
|
|
||
|
|
||
|
\section{Maximum Likelihood estimator}
|
||
|
|
||
|
log-likelihood: the probability to observe what we observe.
|
||
|
|
||
|
Estimate $\beta$ by $\hat{\beta}$ such that $\forall \beta \in \RR[p+1]$:
|
||
|
\[
|
||
|
L_n (\hat{\beta}) \geq L_n (\beta)
|
||
|
\]
|
||
|
|
||
|
These estimators are consistent, but not necessarily unbiased.
|
||
|
|
||
|
|
||
|
\section{Test for each single coordinate}
|
||
|
|
||
|
|
||
|
|
||
|
\begin{example}[Payment Default]
|
||
|
Let $Y_i$ be the default value for individual $i$.
|
||
|
|
||
|
\[
|
||
|
\log (\frac{\Pi (X)}{1 - \Pi (X)}) = \beta_0 + \beta_1 \text{student} + \beta_2 \text{balance} + \beta_3 \text{income}
|
||
|
\]
|
||
|
|
||
|
In this example, only $\beta_0$ and $\beta_2$ are significantly different from 0.
|
||
|
\end{example}
|
||
|
|
||
|
\begin{remark}
|
||
|
We do not add $\varepsilon_i$, because $\log(\frac{\Pi (X)}{1 - \Pi (X)})$ corresponds to the expectation.
|
||
|
\end{remark}
|
||
|
|
||
|
\subsection{Comparison of nested models}
|
||
|
|
||
|
To test $H_0:\: \beta_0 = \ldots = \beta_p = 0$, we use the likelihood ratio test:
|
||
|
\[
|
||
|
T_n = -2 \log (\mathcal{L}^{\texttt{null}}) + 2 \log (\mathcal{L}(\hat{\beta})) \underset{H_0}{\overunderset{\mathcal{L}}{n \to \infty}{\longrightarrow}} \chi^2(p).
|
||
|
\]
|
||
|
|
||
|
\begin{remark}[Family of Tests]
|
||
|
\begin{itemize}
|
||
|
\item Comparison of estimated values and values under the null hypothesis;
|
||
|
\item Likelihood ratio test;
|
||
|
\item Based on the slope on the derivative.
|
||
|
\end{itemize}
|
||
|
\end{remark}
|
||
|
|
||
|
\section{Relative risk}
|
||
|
|
||
|
$RR_i$ is the probably to have the disease, conditional to the predictor $X_{i1}$ over the probability of having the disease, conditional to the predictor $X_{i2}$.
|
||
|
|
||
|
\[
|
||
|
RR(j) = \frac{\Prob(Y_{i_1} = 1 \: | \: X_{i_1})}{\Prob(Y_{i_2} = 1) \: | \: X_{i_2}} = \frac{\EE(Y_{i_1})}{\EE(Y_{i_2})}.
|
||
|
\]
|
||
|
|
||
|
$\pi(X_i)$ is the probability of having the disease, according to $X_i$.
|
||
|
|
||
|
The relative risk can be written as\dots
|
||
|
|
||
|
\section{Odds}
|
||
|
|
||
|
Quantity providing a measure of the likelihood of a particular outcome:
|
||
|
\[
|
||
|
odd = \frac{\pi(X_i)}{1 - \pi(X_i)}
|
||
|
\]
|
||
|
|
||
|
\[
|
||
|
odds = \exp(X_i \beta)
|
||
|
\]
|
||
|
odds is the ratio of people having the disease, if Y represent the disease, over the people not having the disease.
|
||
|
|
||
|
\section{Odds Ratio}
|
||
|
|
||
|
\begin{align*}
|
||
|
OR(j) =\frac{odds(X_{i_1})}{odds(X_{i_2})} & = \frac{\frac{\pi{X_{i_1}}}{1 - \pi(X_{i_1})}}{\frac{\pi{X_{i_2}}}{1 - \pi(X_{i_2})}}
|
||
|
\end{align*}
|
||
|
|
||
|
The OR can be written as:
|
||
|
\[
|
||
|
OR(j) = \exp(\beta_j)
|
||
|
\]
|
||
|
|
||
|
\begin{exercise}
|
||
|
Show that $OR(j) = \exp(\beta_j)$.
|
||
|
\end{exercise}
|
||
|
|
||
|
\begin{align*}
|
||
|
OR(j) & = \frac{odds(X_{i_1})}{odds(X_{i_2})} \\
|
||
|
& = \frac{\exp(X_{i_1} \beta)}{\exp(X_{i_2} \beta)} \\
|
||
|
\end{align*}
|
||
|
|
||
|
\[
|
||
|
\log \left(
|
||
|
\frac{\Prob(Y=1 \: |\: X_{i_1})}{1 - \Prob(Y=1 \: |\: X_{i_1})}\right)
|
||
|
= \beta_0 + \beta_1 X_1^{(1)} + \beta_2 X_2^{(1)} + \ldots + \beta_p X_p^{(1)}
|
||
|
\]
|
||
|
Similarly
|
||
|
\[
|
||
|
\log \left(
|
||
|
\frac{\Prob(Y=1 \: |\: X_{i_2})}{1 - \Prob(Y=1 \: |\: X_{i_2})}\right)
|
||
|
= \beta_0 + \beta_1 X_1^{(2)} + \beta_2 X_2^{(2)} + \ldots + \beta_p X_p^{(2)}
|
||
|
\]
|
||
|
We substract both equations:
|
||
|
|
||
|
\begin{align*}
|
||
|
&\log \left(
|
||
|
\frac{\Prob(Y=1 \: |\: X_{i_1})}{1 - \Prob(Y=1 \: |\: X_{i_1})} \right) - \log \left(\frac{\Prob(Y=1 \: |\: X_{i_2})}{1 - \Prob(Y=1 \: |\: X_{i_2})}\right) \\
|
||
|
& = \beta_0 + \beta_1 X_1^{(1)} + \beta_2 X_2^{(1)} + \ldots + \beta_p X_p^{(1)} - \beta_0 + \beta_1 X_1^{(2)} + \beta_2 X_2^{(2)} + \ldots + \beta_p X_p^{(2)} \\
|
||
|
& = \log OR(j) \\
|
||
|
& = \cancel{(\beta_0 - \beta_0)} + \beta_1 \cancel{(X_1^{(1)} - X_1^{(2)})} + \beta_2 \cancel{(X_2^{(1)} - X_2^{(2)})} + \ldots + \beta_j \cancelto{1}{(X_j^{(1)} - X_j^{(2)})} + \ldots + \beta_p \cancel{(X_p^{(1)} - X_p^{(2)})} \\
|
||
|
&\Leftrightarrow \log (OR_j) = \beta_j \\
|
||
|
&\Leftrightarrow OR(j) = \exp(\beta_j)
|
||
|
\end{align*}
|
||
|
|
||
|
OR is not equal to RR, except in the particular case of probability (?)
|
||
|
|
||
|
If OR is significantly different from 1, the $\exp(\beta_j)$ is significantly different from 1, thus $\beta_j$ is significantly different from 0.
|
||
|
|
||
|
If we have more than two classes, we do not know what means $X_{i_1} - X_{i_2} = 0$. We will have to take a reference class, and compare successively each class with the reference class.
|
||
|
|
||
|
$\hat{\pi}(X_{+}) = \hat{\Prob(X=1 \: | X_{i1})}$ for a new individual.
|
||
|
|
||
|
|
||
|
\section{Poisson model}
|
||
|
|
||
|
Let $Y_{i} \sim \mathcal{P}(\lambda_{i})$, corresponding to a counting.
|
||
|
|
||
|
\begin{align*}
|
||
|
\EE(Y_{i}) & = g^{-1}(X_{i} \beta) \\
|
||
|
\Leftrightarrow g(\EE(Y_{i})) = X_{i} \beta
|
||
|
\end{align*}
|
||
|
|
||
|
where $g(x) = \ln(x)$, and $g^{-1}(x) = e^{x}$.
|
||
|
|
||
|
\[
|
||
|
\lambda_{i} = \EE(Y_{i}) = \Var(Y_{i})
|
||
|
\]
|