---
title: "Simulation"
output:
html_document:
toc: true
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r, include=FALSE}
# packages used
listofpackages <- c("dygraphs")
for (j in listofpackages){
if(sum(installed.packages()[, 1] == j) == 0) {
install.packages(j)
}
library(j, character.only = T)
}
# setting the seed for replication
set.seed(77)
```
## Monte Carlo simulations and bootstrapping
To run this script, we first need to load the data that we prepared in the previous class.
```{r}
# this chunk is dedicated to the loading of the necessary data
data01 <- read.table("data/data01.csv", stringsAsFactors = F, header = T, sep = ",")
```
The first step is to see how the realized cumulative returns performed. Let us consider the arc of time from the beginning of 1950 to the end of 2015.
```{r}
TT <- 12*(2015 - 1950 + 1)
data02 <- data.frame(array(1, TT))
colnames(data02) <- c("port_c")
data02[, 2] <- subset(data01, select = ret_m_c, date >= 1950 & date < 2016) # the subset command is tricky, as it passes also the name of the column
# to calculate the cumulative return, we can a loop
for (i in 2:TT){
data02$port_c[i] <- data02$port_c[i-1] * (1+data02$ret_m_c[i])
}
dygraph(ts(data02$port_c, start = c(1950, 1), end = c(2015, 12), frequency = 12))
```
Now we will simulate the series. This permits to see how (im)plausible the assumptions of the absence of serial correlation and of return normality are.
```{r}
# parameter estimation and choice of the number of replications in the simulation
vol <- sd(data02$ret_m_c)
alpha <- mean(data02$ret_m_c)
nrep <- 10^3
# here I create the containers to be filled with the generated data.
y_bt <- y_mc <- array(1, c(TT, nrep))
x_bt <- x_mc <- array(alpha, c(TT, nrep))
# now, the loop
for (i in 1:nrep){
u <- rnorm(TT-1)
res <- sample(data02$ret_m_c[-1], replace = T) # this (re)samples from the data
x_mc[-1, i] <- alpha + vol * u # the Monte Carlo way
x_bt[-1, i] <- res # the bootstrap way
# now we simply construct and store the bootstrapped and MC cumulative returns
for (j in 2:TT){
y_mc[j, i] <- y_mc[j-1, i] * (1 + x_mc[j, i])
y_bt[j, i] <- y_bt[j-1, i] * (1 + x_bt[j, i])
}
}
# now we want to construct the series of means and quantiles of the resulting collection of drawn series
for (i in 1:TT){
# obtaining the means
data02$y_bt_mean[i] <- mean(y_bt[i, ])
data02$x_bt_mean[i] <- mean(x_bt[i, ])
data02$y_mc_mean[i] <- mean(y_mc[i, ])
data02$x_mc_mean[i] <- mean(x_mc[i, ])
# and the quantiles
data02$y_bt_q05[i] <- quantile(y_bt[i, ], 0.05)
data02$x_bt_q05[i] <- quantile(x_bt[i, ], 0.05)
data02$y_mc_q05[i] <- quantile(y_mc[i, ], 0.05)
data02$x_mc_q05[i] <- quantile(x_mc[i, ], 0.05)
data02$y_bt_q95[i] <- quantile(y_bt[i, ], 0.95)
data02$x_bt_q95[i] <- quantile(x_bt[i, ], 0.95)
data02$y_mc_q95[i] <- quantile(y_mc[i, ], 0.95)
data02$x_mc_q95[i] <- quantile(x_mc[i, ], 0.95)
}
```
Now we can finally see what we have got.
```{r}
# some default plotting
plot(data02$port_c, x = seq(from = 1950, to = 2016-1/12, length.out = TT),
main = "Cumulative return: Monte Carlo", type = "l", col = "blue",
ylab = "return", xlab = "time", ylim = c(0, 6000))
lines(data02$y_mc_mean, x = seq(from = 1950, to = 2016-1/12, length.out = TT),
col = "green", lwd = 1.6)
lines(data02$y_mc_q05, x = seq(from = 1950, to = 2016-1/12, length.out = TT),
col = "grey", lty = 2)
lines(data02$y_mc_q95, x = seq(from = 1950, to = 2016-1/12, length.out = TT),
col = "grey", lty = 2)
# for those who like to loose some time in fancy plotting
dygraph(ts(data02[, c("port_c", "y_bt_mean", "y_bt_q05", "y_bt_q95")],
start = c(1950, 1), end = c(2015, 12), frequency = 12),
main = "Cumulative return") %>%
dySeries(c("y_bt_q05", "y_bt_mean", "y_bt_q95"), label = "bootstrapped return") %>%
dySeries("port_c", label = "realised return")
dygraph(ts(data02[, c("port_c", "y_mc_mean", "y_mc_q05", "y_mc_q95")],
start = c(1950, 1), end = c(2015, 12), frequency = 12),
main = "Cumulative return") %>%
dySeries(c("y_mc_q05", "y_mc_mean", "y_mc_q95"), label = "Monte Carlo return") %>%
dySeries("port_c", label = "realised return")
# and what about the distribution of returns for the next period ??
#a) via bootstrap
s1_bt=x_bt[2,]
hist(s1_bt, breaks = seq(min(s1_bt), max(s1_bt), l = 20+1),prob=TRUE, main = "histogram of monthly returns")
curve(dnorm(x,mean=mean(s1_bt),sd=sd(s1_bt)),col='darkblue',lwd=2,add=TRUE)
VaR_bt <- quantile(s1_bt, 0.05)
# via Monte Carlo simulation
s1_mc=x_bt[2,]
hist(s1_mc, breaks = seq(min(s1_mc), max(s1_mc), l = 20+1),prob=TRUE, main = "histogram of monthly returns")
curve(dnorm(x,mean=mean(s1_mc),sd=sd(s1_mc)),col='darkblue',lwd=2,add=TRUE)
VaR_mc <- quantile(s1_mc, 0.05)
```
## CAPM, estimation and resampling methods
First we need to load the necessary data. In this exercise we will use the Fama and French database.
```{r}
ffdata <- read.table("ffdata.csv", header = T, sep = ",", stringsAsFactors = F)
# to get a glance of how the data are structured, don't forget to check the summary statistics
summary(na.omit(ffdata[, c("r_mkt", "RF", "PR15", "PR51", "MOM", "SMB", "HML")]))
cor(na.omit(ffdata[, c("r_mkt", "RF", "PR15", "PR51", "MOM", "SMB", "HML")]))
plot(x = ffdata$exret_mkt, y = ffdata$er_15, xlab = "excess return", ylab = "portfolio 15 return")
abline(lm(ffdata$exret_mkt~ffdata$er_15), col = "red") # this fits a straight line to the points on the plot
```
### Estimating CER and CAPM models
For the sake of estimation, we will use the sample from jan 2000 to june 2014. We will estimate both these models and verify whether the default regression commands obey the algebra behind them.
```{r}
ret <- as.matrix(subset(ffdata, select = er_15, date >= 2000 & date < 2014.5))
mkt <- as.matrix(subset(ffdata, select = exret_mkt, date >= 2000 & date < 2014.5)) # we define these variables separately for the reason of convenience
### CER ###
cer_mkt <- lm(mkt ~ 1) # as you can see, model specification has specific syntax
cer_ret <- lm(ret ~ 1) # obviously you could just take the sample mean to get the estimate
summary(cer_mkt)
### CAPM ###
capm_ret <- lm(ret ~ mkt) # constant included by default (no need to explicitly add it)!
summary(capm_ret)
# storing the residuals
resid_ret <- capm_ret$residuals
resid_mkt <- cer_mkt$residuals
# we can also estimate is by hands using matrix algebra!
Y <- ret
X <- cbind(1, mkt)
beta <- solve(t(X) %*% X) %*% t(X) %*% Y
beta
# CAPM in two steps
capm2_ret <- lm(ret ~ resid_mkt)
summary(capm2_ret) # notice the change in the intercept
```
### Value-at-Risk: MC and bootstraping
We want to simulate the behavior of the market in the (pseudo) future for the period from july 2014 to june 2016. The assumption is that the market returns follow **CER** and that portfolio returns follow **CAPM**. That is
$$
r_{mkt, t} = \alpha_{mkt} + \varepsilon_{mkt, t} \\
r_{pf, t} = \alpha_{pf} + \beta_{pf} r_{mkt} + \varepsilon_{pf, t}
$$
```{r}
# first, we create the arrays which will contain the sampled series
nrep <- 10^3
tT <- 12*2 # two years
fcst_mc <- fcst_bt <- array(0, c(tT, nrep)) # the containers
for (i in 1:nrep){
# generating the residuals
### bootstrapping
res_mkt_bt <- sample(resid_mkt, size = tT, replace = T)
res_ret_bt <- sample(resid_ret, size = tT, replace = T)
### Monte Carlo
res_ret_mc <- rnorm(n = tT, sd = summary(capm_ret)$sigma) # standard normal times the standard error of the residuals of the relevant regression
res_mkt_mc <- rnorm(n = tT, sd = summary(cer_mkt)$sigma)
# generating the series
### BT
mkt_bt <- cer_mkt$coefficients + res_mkt_bt
fcst_bt[, i] <- capm_ret$coefficients[1] + capm_ret$coefficients[2]*mkt_bt + res_ret_bt
### MC
mkt_mc <- cer_mkt$coefficients + res_mkt_mc
fcst_mc[, i] <- capm_ret$coefficients[1] + capm_ret$coefficients[2]*mkt_mc + res_ret_mc
}
# calculating VaR
var_bt <- var_mc <- array(0, tT)
for (j in 1:tT){
var_bt[j] <- quantile(fcst_bt[j, ], 0.05)
var_mc[j] <- quantile(fcst_mc[j, ], 0.05)
}
```
Now the simulations are done and we can check the results.
```{r}
plot(y = ret, x = seq(from = 2000, to = 2014 + 5/12, length.out = nrow(ret)),
type = "l", xlim = c(2000, 2017), ylab = "excess return", xlab = "time")
lines(y = apply(fcst_mc, 1, mean),
x = seq(from = 2014 + 6/12, to = 2016 + 5/12, length.out = tT),
col = "blue", lwd = 1)
lines(y = var_mc, col = "red",
x = seq(from = 2014 + 6/12, to = 2016 + 5/12, length.out = tT),
lwd = 2)
lines(y = as.matrix(subset(ffdata, select = er_15, date >= 2014.5 & date < 2016.5)),
x = seq(from = 2014 + 6/12, to = 2016 + 5/12, length.out = tT), lty = 2)
legend("bottomleft", lwd = c(1, 1, 2, 1), col = c("black", "blue", "red", "black"),
legend = c("data", "mean forecast", "Value-at-Risk: MC", "actual series"),
lty = c(1, 1, 1, 2))
plot(y = ret, x = seq(from = 2000, to = 2014 + 5/12, length.out = nrow(ret)),
type = "l", xlim = c(2000, 2017), ylab = "excess return", xlab = "time")
lines(y = apply(fcst_bt, 1, mean),
x = seq(from = 2014 + 6/12, to = 2016 + 5/12, length.out = tT),
col = "blue", lwd = 1)
lines(y = var_bt, col = "red",
x = seq(from = 2014 + 6/12, to = 2016 + 5/12, length.out = tT),
lwd = 2)
lines(y = as.matrix(subset(ffdata, select = er_15, date >= 2014.5 & date < 2016.5)),
x = seq(from = 2014 + 6/12, to = 2016 + 5/12, length.out = tT), lty = 2)
legend("bottomleft", lwd = c(1, 1, 2, 1), col = c("black", "blue", "red", "black"),
legend = c("data", "mean forecast", "Value-at-Risk: bootstrap", "actual series"),
lty = c(1, 1, 1, 2))
```