---
title: "C25-26 Important Random Variables"
author: "David Housman"
format: docx
editor: source
fig-width: 6
fig-height: 2.4
---

```{r}
#| message: false
#| warning: false
#| echo: false
library(tidyverse)
```

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE)
knitr::opts_chunk$set(eval = FALSE)
```

| Question                 | Discrete                | Continuous              |
|--------------------------|-------------------------|-------------------------|
|                          |                         |                         |
| Number from a set        | DiscreteUniform$(a,b)$  | Uniform$(a,b)$          |
|                          |                         |                         |
| Amount of something      | Binomial$(n,p)$         | Normal$(\mu,\sigma)$    |
|                          | Poisson$(\lambda)$      | LogNormal$(\mu,\sigma)$ |
|                          |                         |                         |
| Interval until something | Geometric$(\lambda)$    | Exponential$(\lambda)$  |
|                          | NegativeBinomial$(r,p)$ | Gamma$(\alpha,\beta)$   |
|                          |                         | Weibull$(\alpha,\beta)$ |

For each random variable, there is a description, pdf formula, mean formula, standard deviation formula, histogram of 1000 randomly generated numbers on which is superimposed the theoretical pdf, and probabilities of being within one to three standard deviations of the mean (based on the simulation and theory).

```{r info_discrete}
#| eval: true
plot_discrete = function(x, pdf, mu, sigma, data, title='', xlab = 'x') {
  n - max(x) - min(x) + 1
  domain = c(min(x)-0.51, max(x)+0.51)
  dist = tibble(x = x, pdf = pdf)
  marks = tibble(
    sds = -3:3,
    x = mu + sds*sigma,
    lbl = c("µ-3σ","µ-2σ","µ-σ","µ","µ+σ","µ+2σ","µ+3σ"))
  plt = ggplot(tibble(x = data), aes(x = x, y = after_stat(count/sum(count)))) + 
    geom_histogram(binwidth = 1, color = "white") +
    geom_col(aes(x = x, y = pdf), dist, width = 0.1, fill = "red") +
    geom_label(aes(x = x, y = 0, label = lbl), marks, vjust = 0, color = "black") +
    scale_x_continuous(limits = domain, n.breaks = n) +
    ggtitle(title) +
    xlab(xlab) +
    ylab("Probability")
  suppressWarnings(print(plt))
}
coverage_discrete  = function(x, pdf, mu, sigma, data) {
  tibble(
    Interval = c("|X-µ| < σ", "|X - µ| < 2σ", "|X-µ| < 3σ"),
    Theory = map_vec(1:3, function(k) sum(pdf[mu-k*sigma <= x & x <= mu+k*sigma])),
    Data  = map_vec(1:3, function(k) sum(mu-k*sigma <= data & data <= mu+k*sigma))/length(data)
  )
}
```

```{r info_continuous}
#| eval: true
plot_continuous = function(pdf, mu, sigma, data, title='', xlab = 'x') {
  dx = 0.02*(max(data) - min(data))
  domain = c(min(data) - dx, max(data) + dx)
  marks = tibble(
    sds = -3:3,
    x = mu + sds*sigma,
    lbl = c("µ-3σ","µ-2σ","µ-σ","µ","µ+σ","µ+2σ","µ+3σ"))
  #plt = ggplot(tibble(x = data), aes(x = x, y = after_stat(density))) + 
  #  geom_histogram(bins = floor(length(data) / 30), color = "white") +
  plt = ggplot(tibble(x = data), aes(x)) +
    geom_density(fill = "gray") +
    #geom_function(aes(y = NULL), fun = pdf, linewidth = 2, color = "red") +
    geom_function(fun = pdf, size = 1.5, color = "red") +
    geom_label(aes(x = x, y = 0, label = lbl), marks, vjust = 0, color = "black") +
    scale_x_continuous(limits = domain) +
    ggtitle(title) +
    xlab(xlab) +
    ylab("Density")
  suppressWarnings(print(plt))
}
coverage_continuous  = function(cdf, mu, sigma, data) {
  tibble(
    Interval = c("|X-µ| < σ", "|X - µ| < 2σ", "|X-µ| < 3σ"),
    #Theory = map_vec(1:3, function(k) integrate(pdf, mu-k*sigma, mu+k*sigma)$value),
    Theory = map_vec(1:3, function(k) cdf(mu+k*sigma) - cdf(mu-k*sigma)),
    Data  = map_vec(1:3, function(k) sum(mu-k*sigma <= data & data <= mu+k*sigma))/length(data)
  )
}
```

## DiscreteUniform(a,b)

$\text{DiscreteUniform}(a,b)$ is the number drawn from the set $\{a,a+1,\ldots,b\}$ where each number is equally probable. The parameters $a$ and $b$ are integers satisfying $a \leq b$. The probability distribution function is $f(x) = 1/(b-a+1),\ x = a, a+1, \ldots, b$; the mean is $\mu = (a+b)/2$; and the standard deviation is $\sigma = \sqrt{(b-a+2)(b-a)/12}$.

The pdf is flat between $a$ and $b$. Increasing either parameter increases the center linearly, decreasing $a$ and increasing $b$ increases the spread linearly.

```{r discrete_uniform}
#| echo: true
a = 1; b = 6; n = b - a + 1
x = seq(a, b)
pdf = rep(1/n, n)
mu = (a+b)/2
sigma = sqrt((b-a+2)*(b-a)/12)
data = sample(x, 1000, replace=TRUE)
title = "Six-Sided Die Roll"
xlab = "Face"
plot_discrete(x, pdf, mu, sigma, data, title, xlab)
coverage_discrete(x, pdf, mu, sigma, data)
```

## Binomial(n,p)

$\text{Binomial}(n,p)$ is the number of heads when a coin is flipped $n$ times where the probability of heads on each flip is $p$. The parameter $n$ is a positive integer, and the parameter $p$ is a real number satisfying $0 \leq p \leq 1$. The probability distribution function is $f(x) = \binom{n}{x}p^x(1-p)^{n-x}, x = 0, 1, \ldots, n$; the mean is $\mu = np$; and the standard deviation is $\sigma = \sqrt{np(1-p)}$.

The pdf is unimodal on the integers between 0 and $n$. Increasing $n$ increases the distribution range linearly and the standard deviation in a square root fashion. For $p=1/2$, the distribution is symmetric with maximal standard deviation. As $p$ approaches 0, the mode approaches 0 and the distribution becomes increasingly skewed right with a decreasing standard deviation. As $p$ approaches 1, the mode approaches $n$ and the distribution becomes increasingly skewed left with a decreasing standard deviation.

```{r binomial}
n = 20; p = 0.67
data = rbinom(1000, n, p)
x = seq(min(data),max(data))
pdf = dbinom(x, n, p)
mu = n*p
sigma = sqrt(n*p*(1-p))
title = "Side Flips"
xlab = "Heads"
plot_discrete(x, pdf, mu, sigma, data, title, xlab)
coverage_discrete(x, pdf, mu, sigma, data)
```

## Poisson(lambda)

$\text{Poisson}(\lambda)$ is the number of events that occur in a unit interval if $\lambda$ events occur on average in a unit interval, in small enough intervals the probability of two or more events is negligible, and the number of events in any two non-overlapping intervals are independent. The parameter $\lambda$ is a positive real number. The probability distribution function is $f(x) = e^{-\lambda}\lambda^x/x!,\ x = 0, 1, 2, \ldots$; the mean is $\mu = \lambda$; and the standard deviation is $\sigma = \sqrt{\lambda}$.

The pdf is unimodal and skewed right on the nonnegative integers. Increasing $\lambda$ increases the center linearly and increases the spread in a square root fashion.

```{r poisson}
lambda = 3.87
data = rpois(1000, lambda)
x = seq(min(data),max(data))
pdf = dpois(x, lambda)
mu = lambda
sigma = sqrt(lambda)
title = "Radioactive Substance"
xlab = "Decays"
plot_discrete(x, pdf, mu, sigma, data, title, xlab)
coverage_discrete(x, pdf, mu, sigma, data)
```

## Geometric(p)

$\text{Geometric}(p)$ is the flip when the first heads appears, where the probability of heads on each flip is $p$. Note that R defines `geom` to be the number of tails before the first heads. The parameter $p$ is a real number satisfying $0 < p \leq 1$. The probability distribution function is $f(x) = p(1-p)^{x-1},\ x = 1, 2, 3, \ldots$; the mean is $\mu = 1/p$; and the standard deviation is $\sigma = \sqrt{1-p}/p$.

The distribution is strictly decreasing and skewed right on positive integers. Increasing $p$ increases the skewness and decreases the center and spread until the distribution is concentrated exclusively at 1. Decreasing $p$ decreases the skewness and increases the center and spread until the distribution is almost flat.

```{r geometric}
p = 0.67
data = rgeom(10000, p) + 1
x = seq(min(data),max(data))
pdf = dgeom(x-1, p)
mu = 1/p
sigma = sqrt(1-p)/p
title = "Side Flips"
xlab = "Flip First Heads Appears"
plot_discrete(x, pdf, mu, sigma, data, title, xlab)
coverage_discrete(x, pdf, mu, sigma, data)
```

## NegativeBinomial(r,p)

$\text{NegativeBinomial}(r,p)$ is the number of flips when the $r$th head appears, where the probability of heads on each flip is $p$. Note that R defines `nbinom` to be the number of flips before the $r$th heads appears. The parameter $r$ is a positive integer (positive real in R), and the parameter $p$ is a real number satisfying $0 < p \leq 1$. The probability distribution function is $f(x) = \binom{x+r-1}{r-1}p^r(1-p)^{x-1},\ x = 1, 2, 3, \ldots$; the mean is $\mu = r/p$; and the standard deviation is $\sigma = \sqrt{r(1-p)}/p$.

The pdf is unimodal and skewed right on integers starting at $r$. Increasing $p$ increases the skewness and decreases the center and spread until the distribution is concentrated exclusively at $r$. Decreasing $p$ decreases the skewness and increases the center and spread until the distribution is almost flat. Increasing $r$ increases the minimum value, mode, and mean linearly and increases the standard deviation in a square root fashion.

```{r negative binomial}
r = 4; p = 0.67
data = rnbinom(1000, r, p) + 1
x = seq(min(data),max(data))
pdf = dnbinom(x-1, r, p)
mu = r*(1-p)/p
sigma = sqrt(r*(1-p))/p
title = "Side Flips"
xlab = "Flip Fourth Heads Appears"
plot_discrete(x, pdf, mu, sigma, data, title, xlab)
coverage_discrete(x, pdf, mu, sigma, data)
```

## Uniform(a,b)

$\text{Uniform}(a,b)$ is the number drawn from the interval $[a,b)$ where each number is equally probable. The parameters $a$ and $b$ are real numbers satisfying $a < b$. The probability density function is $f(x) = 1/(b-a),\ a \leq x < b$; the mean is $\mu = (a+b)/2$; and the standard deviation is $\sigma = (b-a)/\sqrt{12}$.

```{r uniform}
#| echo: true
#| eval: true
a = 0; b = 360
data = runif(1000, a, b)
pdf = function(x) dunif(x, a, b)
cdf = function(x) punif(x, a, b)
mu = (a+b)/2
sigma = (b-a)/sqrt(12)
title = "Spinner"
xlab = "Angle (degrees)"
plot_continuous(pdf, mu, sigma, data, title, xlab)
coverage_continuous(cdf, mu, sigma, data)
```

## Normal(mu, sigma)

$\text{Normal}(\mu,\sigma)$ is the real number drawn from a bell-shaped distribution with mean $\mu$ and standard deviaion $\sigma$. It is also the sum of a large number of not too dissimilar independent random variables. The parameter $\mu$ is any real number and the parameter $\sigma$ is a positive real number. The probability density function is $f(x) = \frac{1}{\sqrt{2\pi}\sigma}e^{-(x-\mu)^2/(2\sigma^2)},\ -\infty < x < \infty$; the mean is $\mu$; and the standard deviation is $\sigma$.

```{r normal}
mu = 500; sigma = 100
data = rnorm(1000, mu, sigma)
pdf = function(x) dnorm(x, mu, sigma)
cdf = function(x) pnorm(x, mu, sigma)
title = "Math SAT"
xlab = "Score"
plot_continuous(pdf, mu, sigma, data, title, xlab)
coverage_continuous(cdf, mu, sigma, data)
```

## LogNormal(mu, sigma)

$\text{LogNormal}(\mu,\sigma) = e^{\text{Normal}(\mu,\sigma)}$. It is also the product of a large number of not too dissimilar independent random variables. The parameter $\mu$ is any real number and the parameter $\sigma$ is a positive real number. The probability density function is $f(x) = \frac{1}{\sqrt{2\pi}\sigma x}e^{-(\log(x)-\mu)^2/(2\sigma^2)},\ 0 < x < \infty$; the mean is $e^{\mu+\sigma^2/2}$; and the standard deviation is $\sqrt{e^{2\mu+\sigma^2}(e^{\sigma^2}-1)}$.

```{r lognormal}
lmu = 0; lsigma = 1
data = rlnorm(1000, lmu, lsigma)
pdf = function(x) dlnorm(x, lmu, lsigma)
cdf = function(x) plnorm(x, lmu, lsigma)
mu = exp(lmu+lsigma^2/2)
sigma = sqrt(exp(2*lmu+lsigma^2)*(exp(lsigma^2)-1))
title = "Resource"
xlab = "Amount"
plot_continuous(pdf, mu, sigma, data, title, xlab)
coverage_continuous(cdf, mu, sigma, data)
```

## Exponential(lambda)

$\text{Exponential}(\lambda)$ is the interval to an event if $\lambda$ events occur on average in a unit interval, in small enough intervals the probability of two or more events is negligible, and the number of events in any two non-overlapping intervals are independent. The parameter $\lambda$ is a positive real number. The probability density function is $f(x) = \lambda e^{-\lambda x},\ 0 \leq x < \infty$; the mean is $\mu = 1/\lambda$; and the standard deviation is $\sigma = 1/\lambda$.

```{r exponential}
lambda = 3.87/7.5
data = rexp(1000, lambda)
pdf = function(x) dexp(x, lambda)
cdf = function(x) pexp(x, lambda)
mu = 1/lambda
sigma = 1/lambda
title = "Radioactive Substance"
xlab = "Time to Decay (sec)"
plot_continuous(pdf, mu, sigma, data, title, xlab)
coverage_continuous(cdf, mu, sigma, data)
```

## Gamma(alpha, beta)

$\text{Gamma}(\alpha, \beta)$ is the interval to the $\alpha$th occurence of an event if $\lambda = 1/\beta$ events occur on average in a unit interval, in small enough intervals the probability of two or more events is negligible, and the number of events in any two non-overlapping intervals are independent. The parameters $\alpha$ and $\beta$ are positive real numbers. The probability density function is $f(x) = \frac{1}{\Gamma(\alpha)\beta^{\alpha}}x^{\alpha-1} e^{-x/\beta},\ 0 \leq x < \infty$; $\Gamma(\alpha) = \int_0^{\infty}t^{\alpha-1}e^{-t}dt$; the mean is $\mu = \alpha\beta$; and the standard deviation is $\sigma = \sqrt{\alpha}\beta$.

```{r gamma}
alpha = 2; lambda = 3.87/7.5; beta = 1/lambda
data = rgamma(1000, alpha, lambda)
pdf = function(x) dgamma(x, alpha, lambda)
cdf = function(x) pgamma(x, alpha, lambda)
mu = alpha*beta
sigma = sqrt(alpha)*beta
title = "Radioactive Substance"
xlab = "Time to Second Decay (sec)"
plot_continuous(pdf, mu, sigma, data, title, xlab)
coverage_continuous(cdf, mu, sigma, data)
```

## Weibull(alpha, beta)

$\text{Weibull}(\alpha,\beta)$ is the time to failure if the failure rate is $\lambda(x) = \alpha x^{\alpha-1}/\beta^{\alpha}$. The parameters $\alpha$ and $\beta$ are positive real numbers. The failure rate at time $x$ is the conditional density of failure at $x$ given the lifetime is at least $x$: $\lambda(x) = \frac{f(x)}{1-F(x)}$ implies (via integration) $\int_0^x\lambda(t)dt = -\log(1-F(x))$ implies $F(x) = 1 - e^{-\int_0^x\lambda(t)dt}$ implies $f(x) = e^{-\int_0^x\lambda(t)dt}\lambda(x)$. For most components, the failure rate should be increasing, possibly at an increasing rate. The Weibull distribution models the failure rate with the power function stated earlier, and in this case, the failure rate is increasing if $\alpha > 1$, is constant if $\alpha = 1$, and is decreasing if $0 < \alpha \ 1$. The probability density function is $f(x) = \frac{\alpha x^{\alpha-1}}{\beta^\alpha} e^{-(x/\beta)^{\alpha}},\ 0 \leq x < \infty$; the mean is $\mu = \beta\ \Gamma(1/\alpha+1)$; and the standard deviation is $\sigma = \beta\sqrt{\Gamma(2/\alpha+1)-\Gamma(1/\alpha+1)^2}$.

```{r weibull}
alpha = 2; beta = 5
data = rweibull(1000, alpha, beta)
pdf = function(x) dweibull(x, alpha, beta)
cdf = function(x) pweibull(x, alpha, beta)
mu = beta*gamma(1/alpha+1)
sigma = beta*sqrt(gamma(2/alpha+1)-gamma(1/alpha+1)^2)
title = "Component"
xlab = "Time to Failure (years)"
plot_continuous(pdf, mu, sigma, data, title, xlab)
coverage_continuous(cdf, mu, sigma, data)
```