library(tidyverse)
library(magrittr)
library(glue)
library(stats)
library(tictoc)
library(gridExtra)
library(latex2exp)
library(tictoc)
library(gt)
library(ggforce)
Tenemos una muestra de t multivariadas:
\[ \mathbf{x}_i \overset{\text{iid}} \sim \mathcal{T}_{p,k} \\ \mathbf{x}_i = v^{1/2} \mathbf{z}_i \quad \mathbf{z}_i \overset{\text{iid}} \sim \mathcal{N}_p(\mathbf{0}, \mathbf{I}_p) \quad \frac{k}{v} \sim \chi^2_k \] \[ \mathbb{E}[\overline{\mathbf{x}}] = \mu \\ \mathbb{E}[\mathbf{S}] = \mathrm{Var}[\mathbf{x}] = \mathbb{E}v \cdot \Sigma = \frac{k}{k - 2} \cdot \mathbf{I}_p \quad k > 2 \]
Nos piden calcular, para cada muestra:
\[\begin{align*} & D = || \overline{\mathbf{x}} − μ||^2 \overset{\mathbb{E}} \longrightarrow \mathbf{0} & k > 1 \\ & D_1 = \mathrm{Tr} ( \mathbf{S} \Sigma^{−1} ) \overset{\mathbb{E}} \longrightarrow \frac{k}{k-2} \cdot p & k > 2 \\ & D_2 = \frac{ \mathrm{Tr} ( \mathbf{S} \Sigma^{−1} ) }{ \mathbb{E}[v] } \overset{\mathbb{E}} \longrightarrow p & k > 2 \end{align*}\]
Para cada escenario \((k, n)\), tendremos una matriz \(\mathbf{X} \in \mathbb{R}^{n \mathrm{x} 3}\), con los vectores \(\mathbf{x}_i\) como filas.
Acá, ejemplo con \(n = 10\) y \(k = 2\):
set.seed(12)
X <- mvtnorm::rmvt(10, df = 2, sigma = diag(3))
X
## [,1] [,2] [,3]
## [1,] -1.4180359 1.51055785 -0.9163365
## [2,] -0.5277908 -1.14601218 -0.1562115
## [3,] -0.7222954 -1.43899700 -0.2438519
## [4,] 0.4085875 -0.74241942 -1.2351539
## [5,] -0.7308800 0.01120533 -0.1428973
## [6,] -0.7099990 1.19992315 0.3436754
## [7,] 0.9065953 -0.52450843 0.3999310
## [8,] 1.8145679 0.91485827 -0.2734319
## [9,] -5.0040948 -1.30507268 -0.9718104
## [10,] 0.1017141 0.11309954 0.2808600
Calculamos el promedio de las \(\mathbf{x}_i\) con colMeans
:
colMeans(X)
## [1] -0.5881631 -0.1407366 -0.2915227
Calculamos \(\mathbf{S}\) con cov
:
cov(X)
## [,1] [,2] [,3]
## [1,] 3.3025390 0.5819710 0.4396807
## [2,] 0.5819710 1.1429980 0.1003754
## [3,] 0.4396807 0.1003754 0.3328698
ESCENARIOS <-
cross_df(list(
n = c(10, 20, 50, seq(100, 1000, by = 50)),
k = c(1, 2, 4, 10)
))
ESCENARIOS
SIGMA <- diag(3)
MU <- rep(0, 3)
traza <- function(A) { sum(diag(A)) }
simular <- function() {
ESCENARIOS %>%
mutate(
# X = map2(n, k, ~ mvtnorm::rmvt(n = ..1, df = ..2, sigma = SIGMA)),
X = map2(n, k, mvtnorm::rmvt, sigma = SIGMA),
x_barra = map(X, colMeans),
S = map(X, cov),
D = map_dbl(x_barra, ~ sum((. - MU)^2)),
D_1 = map_dbl(S, ~ traza(. %*% solve(SIGMA))),
# Esperanza_v = map_dbl(k, ~ ifelse(. > 2, ./(. - 2), NA)),
Esperanza_v = ifelse(k > 2, k/(k - 2), NA),
D_2 = D_1/Esperanza_v
) %>%
select(-X, -x_barra, -S)
}
tic()
escenarios <- simular()
toc()
## 0.141 sec elapsed
escenarios
escenarios %<>% mutate_if(is.double, round, 3)
hacer_tabla_D <- function(.escenarios) {
.escenarios %>%
select(n, k, D) %>%
spread(n, D) %>%
gt() %>%
tab_header(
title = "D → 0; si k > 1"
) %>%
tab_spanner(
columns = matches("[0-9]"),
label = "n"
) %>%
fmt_number(
columns = matches("[0-9]"),
decimals = 3
)
}
hacer_tabla_D1 <- function(.escenarios) {
.escenarios %>%
select(n, k, D_1) %>%
spread(n, D_1) %>%
gt() %>%
tab_header(
title = "D1 → (k / (k - 2)) p; si k > 2"
) %>%
tab_spanner(
columns = matches("[0-9]"),
label = "n"
) %>%
fmt_number(
columns = matches("[0-9]"),
decimals = 3
)
}
hacer_tabla_D2 <- function(.escenarios) {
.escenarios %>%
select(n, k, D_2) %>%
spread(n, D_2) %>%
gt() %>%
tab_header(
title = "D2 → p; si k > 2"
) %>%
tab_spanner(
columns = matches("[0-9]"),
label = "n"
) %>%
fmt_number(
columns = matches("[0-9]"),
decimals = 3
) %>%
fmt_missing(
columns = everything(),
missing_text = "-"
)
}
plot_facetado_k_n <- function(.escenarios) {
.escenarios %>%
mutate(k_str = fct_reorder(glue("k = {k}"), k)) %>%
ggplot(aes(x = n)) +
facet_grid(k_str ~ ., scales = "free_y")
}
graficar_D <- function(.escenarios) {
.escenarios %>%
plot_facetado_k_n() +
aes(y = D) +
geom_hline(yintercept = 0, color = "Grey40", linetype = "dotted") +
geom_line(color = "Grey20") +
geom_point() +
labs(
title = TeX("Evolución de $D = || \\bar{x} - \\mu ||^2 \\rightarrow 0$"),
subtitle = "Converge para k > 1"
)
}
graficar_D1 <- function(.escenarios) {
plot_facetado_k_n(.escenarios) +
aes(y = D_1) +
geom_hline(
mapping = aes(yintercept = Esperanza_v * 3),
color = "ForestGreen", linetype = "dashed"
) +
geom_point() +
geom_line(color = "Grey50") +
labs(
title = TeX("Evolución de $D_1 = Tr ( S \\Sigma^{-1} ) \\rightarrow \\left( \\frac{k}{k - 2} \\right) \\cdot p$"),
subtitle = "Converge si k > 2",
y = TeX("$D_1$")
)
}
graficar_D2 <- function(.escenarios) {
.escenarios %>%
filter(!is.na(D_2)) %>%
plot_facetado_k_n() +
aes(y = D_2) +
geom_hline(yintercept = 3, color = "ForestGreen", linetype = "dashed") +
geom_point() +
geom_line(color = "Grey50") +
# geom_segment(aes(xend = n, yend = 3), color = "Red4") +
labs(
title = TeX("Evolución de $D_2 = Tr ( S \\Sigma^{-1} ) / E(v) \\rightarrow p$"),
subtitle = TeX("Sólo definido si k > 2"),
y = TeX("$D_2$")
)
}
hacer_tabla_D(escenarios)
D → 0; si k > 1 | ||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
k | n | |||||||||||||||||||||
10 | 20 | 50 | 100 | 150 | 200 | 250 | 300 | 350 | 400 | 450 | 500 | 550 | 600 | 650 | 700 | 750 | 800 | 850 | 900 | 950 | 1000 | |
1 | 3.366 | 7.452 | 1,305.005 | 7.187 | 19.073 | 2.594 | 0.361 | 68.846 | 1.097 | 0.804 | 3.298 | 0.696 | 138,001.372 | 0.911 | 2.692 | 0.653 | 2.109 | 54.338 | 8.149 | 1.514 | 1.270 | 3.616 |
2 | 0.843 | 0.402 | 0.221 | 0.590 | 0.046 | 0.106 | 0.022 | 0.026 | 0.035 | 0.050 | 0.038 | 0.015 | 0.010 | 0.056 | 0.005 | 0.223 | 0.036 | 0.024 | 0.002 | 0.012 | 0.078 | 0.007 |
4 | 0.257 | 0.219 | 0.037 | 0.000 | 0.056 | 0.032 | 0.021 | 0.029 | 0.019 | 0.009 | 0.022 | 0.057 | 0.003 | 0.009 | 0.010 | 0.003 | 0.004 | 0.010 | 0.003 | 0.003 | 0.010 | 0.001 |
10 | 0.939 | 0.155 | 0.030 | 0.016 | 0.014 | 0.006 | 0.005 | 0.012 | 0.005 | 0.001 | 0.000 | 0.002 | 0.006 | 0.004 | 0.017 | 0.011 | 0.011 | 0.005 | 0.006 | 0.014 | 0.002 | 0.000 |
graficar_D(escenarios)
\[ \frac{k}{k - 2} \cdot p = \frac{4}{4 - 2} \cdot 3 = 6 \\ \frac{k}{k - 2} \cdot p = \frac{10}{10 - 2} \cdot 3 = 3.75 \]
hacer_tabla_D1(escenarios)
D1 → (k / (k - 2)) p; si k > 2 | ||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
k | n | |||||||||||||||||||||
10 | 20 | 50 | 100 | 150 | 200 | 250 | 300 | 350 | 400 | 450 | 500 | 550 | 600 | 650 | 700 | 750 | 800 | 850 | 900 | 950 | 1000 | |
1 | 23.753 | 95.266 | 55,857.677 | 296.382 | 3,346.131 | 652.498 | 1,017.422 | 14,150.050 | 724.330 | 138.844 | 1,017.177 | 1,033.436 | 75,949,605.605 | 1,251.231 | 2,787.765 | 634.055 | 1,126.324 | 30,951.765 | 7,469.481 | 1,777.867 | 3,210.761 | 2,689.247 |
2 | 10.332 | 6.325 | 13.785 | 130.084 | 12.875 | 17.515 | 14.361 | 20.054 | 26.693 | 43.589 | 18.840 | 20.651 | 18.073 | 24.850 | 25.980 | 222.668 | 20.551 | 17.277 | 22.421 | 21.234 | 50.643 | 32.950 |
4 | 8.011 | 5.763 | 4.437 | 7.039 | 5.460 | 6.799 | 5.262 | 4.811 | 5.718 | 5.434 | 5.516 | 5.600 | 5.951 | 6.213 | 5.178 | 5.856 | 5.483 | 6.311 | 5.546 | 6.597 | 6.229 | 5.774 |
10 | 2.649 | 3.710 | 2.737 | 3.051 | 3.958 | 3.940 | 3.703 | 3.807 | 3.837 | 3.619 | 3.673 | 3.668 | 4.082 | 3.673 | 3.696 | 3.765 | 3.856 | 3.644 | 3.729 | 3.794 | 3.617 | 3.622 |
graficar_D1(escenarios)
## Warning: Removed 44 rows containing missing values (geom_hline).
hacer_tabla_D2(escenarios)
D2 → p; si k > 2 | ||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
k | n | |||||||||||||||||||||
10 | 20 | 50 | 100 | 150 | 200 | 250 | 300 | 350 | 400 | 450 | 500 | 550 | 600 | 650 | 700 | 750 | 800 | 850 | 900 | 950 | 1000 | |
1 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
2 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
4 | 4.006 | 2.881 | 2.219 | 3.520 | 2.730 | 3.400 | 2.631 | 2.405 | 2.859 | 2.717 | 2.758 | 2.800 | 2.976 | 3.106 | 2.589 | 2.928 | 2.742 | 3.156 | 2.773 | 3.298 | 3.114 | 2.887 |
10 | 2.119 | 2.968 | 2.190 | 2.441 | 3.166 | 3.152 | 2.963 | 3.046 | 3.070 | 2.895 | 2.938 | 2.934 | 3.265 | 2.939 | 2.957 | 3.012 | 3.085 | 2.915 | 2.983 | 3.035 | 2.894 | 2.898 |
graficar_D2(escenarios)
NITER <- 1000
tic()
simulaciones <-
tibble(
iter = 1:NITER,
simulacion = map(iter, ~ simular())
) %>%
unnest()
toc()
## 70.32 sec elapsed
promedios <-
simulaciones %>%
group_by(n, k, Esperanza_v) %>%
summarise(
count = n(),
D = mean(D),
D_1 = mean(D_1),
D_2 = mean(D_2)
) %>%
mutate_if(is.double, round, 3) %>%
ungroup()
promedios %>% arrange(k, n)
mensajito <- glue("Promedio de {NITER} iteraciones")
nota_al_pie <- function(data) {
tab_source_note(
data = data,
source = mensajito
)
}
caption <- labs(caption = mensajito)
hacer_tabla_D(promedios) %>% nota_al_pie()
D → 0; si k > 1 | ||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
k | n | |||||||||||||||||||||
10 | 20 | 50 | 100 | 150 | 200 | 250 | 300 | 350 | 400 | 450 | 500 | 550 | 600 | 650 | 700 | 750 | 800 | 850 | 900 | 950 | 1000 | |
1 | 491.023 | 994.800 | 6,274.841 | 19,786.703 | 7,487.349 | 1,860.939 | 165,910.073 | 3,033.474 | 1,024,892.514 | 676.114 | 4,168.928 | 4,952.103 | 1,259.951 | 74,005.663 | 3,049.628 | 74,281.261 | 64,542.197 | 119,486.916 | 4,169.782 | 5,324,136.106 | 5,715.676 | 2,254.718 |
2 | 4.949 | 1.425 | 0.778 | 0.373 | 0.227 | 0.170 | 0.165 | 0.139 | 0.102 | 0.087 | 0.087 | 0.092 | 0.080 | 0.244 | 0.064 | 0.067 | 0.052 | 0.050 | 0.051 | 0.038 | 0.516 | 0.051 |
4 | 0.565 | 0.298 | 0.120 | 0.061 | 0.039 | 0.032 | 0.024 | 0.019 | 0.017 | 0.016 | 0.013 | 0.012 | 0.011 | 0.010 | 0.009 | 0.009 | 0.008 | 0.007 | 0.007 | 0.007 | 0.006 | 0.006 |
10 | 0.364 | 0.188 | 0.078 | 0.036 | 0.025 | 0.018 | 0.015 | 0.013 | 0.011 | 0.010 | 0.008 | 0.008 | 0.007 | 0.006 | 0.006 | 0.006 | 0.005 | 0.005 | 0.005 | 0.004 | 0.004 | 0.004 |
Promedio de 1000 iteraciones |
graficar_D(promedios) + caption
hacer_tabla_D1(promedios) %>% nota_al_pie()
D1 → (k / (k - 2)) p; si k > 2 | ||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
k | n | |||||||||||||||||||||
10 | 20 | 50 | 100 | 150 | 200 | 250 | 300 | 350 | 400 | 450 | 500 | 550 | 600 | 650 | 700 | 750 | 800 | 850 | 900 | 950 | 1000 | |
1 | 4,960.308 | 19,808.115 | 312,083.129 | 1,980,472.995 | 1,125,722.898 | 367,397.192 | 41,554,256.610 | 907,404.915 | 358,668,330.937 | 265,723.745 | 1,873,563.732 | 2,482,409.616 | 687,073.406 | 44,447,938.034 | 1,992,532.453 | 52,001,253.436 | 48,430,252.109 | 95,599,590.979 | 3,548,918.883 | 4,791,753,639.840 | 5,392,643.344 | 2,257,874.037 |
2 | 47.353 | 29.015 | 38.891 | 37.828 | 34.737 | 34.304 | 42.112 | 41.605 | 35.762 | 34.870 | 37.750 | 46.764 | 43.456 | 148.520 | 40.429 | 47.596 | 40.057 | 39.399 | 43.741 | 35.488 | 487.165 | 50.829 |
4 | 5.948 | 6.012 | 6.052 | 6.019 | 6.025 | 6.044 | 6.009 | 5.925 | 6.009 | 6.006 | 6.005 | 5.979 | 6.014 | 5.981 | 5.974 | 5.989 | 5.999 | 6.006 | 6.002 | 6.009 | 5.984 | 5.980 |
10 | 3.717 | 3.740 | 3.733 | 3.758 | 3.762 | 3.735 | 3.741 | 3.760 | 3.749 | 3.746 | 3.749 | 3.750 | 3.751 | 3.755 | 3.755 | 3.749 | 3.751 | 3.750 | 3.753 | 3.743 | 3.747 | 3.750 |
Promedio de 1000 iteraciones |
graficar_D1(promedios) + caption
## Warning: Removed 44 rows containing missing values (geom_hline).
hacer_tabla_D2(promedios) %>% nota_al_pie()
D2 → p; si k > 2 | ||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
k | n | |||||||||||||||||||||
10 | 20 | 50 | 100 | 150 | 200 | 250 | 300 | 350 | 400 | 450 | 500 | 550 | 600 | 650 | 700 | 750 | 800 | 850 | 900 | 950 | 1000 | |
1 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
2 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
4 | 2.974 | 3.006 | 3.026 | 3.009 | 3.012 | 3.022 | 3.005 | 2.963 | 3.005 | 3.003 | 3.003 | 2.989 | 3.007 | 2.991 | 2.987 | 2.995 | 3.000 | 3.003 | 3.001 | 3.005 | 2.992 | 2.990 |
10 | 2.974 | 2.992 | 2.986 | 3.006 | 3.009 | 2.988 | 2.993 | 3.008 | 3.000 | 2.996 | 2.999 | 3.000 | 3.000 | 3.004 | 3.004 | 2.999 | 3.001 | 3.000 | 3.003 | 2.995 | 2.998 | 3.000 |
Promedio de 1000 iteraciones |
graficar_D2(promedios) + caption