---
title: "Simulation"
output: 
  html_document:
    toc: true
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

```{r, include=FALSE}
# packages used
listofpackages <- c("dygraphs")

for (j in listofpackages){
  if(sum(installed.packages()[, 1] == j) == 0) {
    install.packages(j)
  }
  library(j, character.only = T)
}

# setting the seed for replication
set.seed(77)
```


## Monte Carlo simulations and bootstrapping

To run this script, we first need to load the data that we prepared in the previous class.

```{r}
# this chunk is dedicated to the loading of the necessary data
data01 <- read.table("data/data01.csv", stringsAsFactors = F, header = T, sep = ",")

```

The first step is to see how the realized cumulative returns performed. Let us consider the arc of time from the beginning of 1950 to the end of 2015.
```{r}
TT <- 12*(2015 - 1950 + 1)
data02 <- data.frame(array(1, TT))
colnames(data02) <- c("port_c")
data02[, 2] <- subset(data01, select = ret_m_c, date >= 1950 & date < 2016) # the subset command is tricky, as it passes also the name of the column
# to calculate the cumulative return, we can a loop
for (i in 2:TT){
  data02$port_c[i] <- data02$port_c[i-1] * (1+data02$ret_m_c[i]) 
}
dygraph(ts(data02$port_c, start = c(1950, 1), end = c(2015, 12), frequency = 12))
```


Now we will simulate the series. This permits to see how (im)plausible the assumptions of the absence of serial correlation and of return normality are.

```{r}
# parameter estimation and choice of the number of replications in the simulation
vol <- sd(data02$ret_m_c)
alpha <- mean(data02$ret_m_c)
nrep <- 10^3

# here I create the containers to be filled with the generated data.
y_bt <- y_mc <- array(1, c(TT, nrep))
x_bt <- x_mc <- array(alpha, c(TT, nrep))

# now, the loop

for (i in 1:nrep){
  u <- rnorm(TT-1)
  res <- sample(data02$ret_m_c[-1], replace = T) # this (re)samples from the data
  
  x_mc[-1, i] <- alpha + vol * u # the Monte Carlo way
  x_bt[-1, i] <- res             # the bootstrap way
  
  # now we simply construct and store the bootstrapped and MC cumulative returns
  for (j in 2:TT){
    y_mc[j, i] <- y_mc[j-1, i] * (1 + x_mc[j, i])
    y_bt[j, i] <- y_bt[j-1, i] * (1 + x_bt[j, i])
  }
}

# now we want to construct the series of means and quantiles of the resulting collection of drawn series
for (i in 1:TT){
  # obtaining the means
  data02$y_bt_mean[i] <- mean(y_bt[i, ])
  data02$x_bt_mean[i] <- mean(x_bt[i, ])
  data02$y_mc_mean[i] <- mean(y_mc[i, ])
  data02$x_mc_mean[i] <- mean(x_mc[i, ])
  
  # and the quantiles
  data02$y_bt_q05[i] <- quantile(y_bt[i, ], 0.05)
  data02$x_bt_q05[i] <- quantile(x_bt[i, ], 0.05)
  data02$y_mc_q05[i] <- quantile(y_mc[i, ], 0.05)
  data02$x_mc_q05[i] <- quantile(x_mc[i, ], 0.05)
  
  data02$y_bt_q95[i] <- quantile(y_bt[i, ], 0.95)
  data02$x_bt_q95[i] <- quantile(x_bt[i, ], 0.95)
  data02$y_mc_q95[i] <- quantile(y_mc[i, ], 0.95)
  data02$x_mc_q95[i] <- quantile(x_mc[i, ], 0.95)
  
}
```

Now we can finally see what we have got.
```{r}

# some default plotting 
plot(data02$port_c, x = seq(from = 1950, to = 2016-1/12, length.out = TT),
     main = "Cumulative return: Monte Carlo", type = "l", col = "blue", 
     ylab = "return", xlab = "time", ylim = c(0, 6000))
lines(data02$y_mc_mean, x = seq(from = 1950, to = 2016-1/12, length.out = TT),
      col = "green", lwd = 1.6)
lines(data02$y_mc_q05, x = seq(from = 1950, to = 2016-1/12, length.out = TT), 
      col = "grey", lty = 2)
lines(data02$y_mc_q95, x = seq(from = 1950, to = 2016-1/12, length.out = TT),
      col = "grey", lty = 2)


# for those who like to loose some time in fancy plotting
dygraph(ts(data02[, c("port_c", "y_bt_mean", "y_bt_q05", "y_bt_q95")],
           start = c(1950, 1), end = c(2015, 12), frequency = 12),
        main = "Cumulative return") %>%
  dySeries(c("y_bt_q05", "y_bt_mean", "y_bt_q95"), label = "bootstrapped return") %>%
  dySeries("port_c", label = "realised return")

dygraph(ts(data02[, c("port_c", "y_mc_mean", "y_mc_q05", "y_mc_q95")],
           start = c(1950, 1), end = c(2015, 12), frequency = 12),
        main = "Cumulative return") %>%
  dySeries(c("y_mc_q05", "y_mc_mean", "y_mc_q95"), label = "Monte Carlo return") %>%
  dySeries("port_c", label = "realised return")

# and what about the distribution of returns for the next period ?? 
#a) via bootstrap
s1_bt=x_bt[2,]
hist(s1_bt, breaks = seq(min(s1_bt), max(s1_bt), l = 20+1),prob=TRUE, main = "histogram of monthly returns")
curve(dnorm(x,mean=mean(s1_bt),sd=sd(s1_bt)),col='darkblue',lwd=2,add=TRUE)
VaR_bt <- quantile(s1_bt, 0.05)
# via Monte Carlo simulation
s1_mc=x_bt[2,]
hist(s1_mc, breaks = seq(min(s1_mc), max(s1_mc), l = 20+1),prob=TRUE, main = "histogram of monthly returns")
curve(dnorm(x,mean=mean(s1_mc),sd=sd(s1_mc)),col='darkblue',lwd=2,add=TRUE)
VaR_mc <- quantile(s1_mc, 0.05)

```

## CAPM, estimation and resampling methods

First we need to load the necessary data. In this exercise we will use the Fama and French database.

```{r}
ffdata <- read.table("ffdata.csv", header = T, sep = ",", stringsAsFactors = F)

# to get a glance of how the data are structured, don't forget to check the summary statistics

summary(na.omit(ffdata[, c("r_mkt", "RF", "PR15", "PR51", "MOM", "SMB", "HML")]))
cor(na.omit(ffdata[, c("r_mkt", "RF", "PR15", "PR51", "MOM", "SMB", "HML")]))


plot(x = ffdata$exret_mkt, y = ffdata$er_15, xlab = "excess return", ylab = "portfolio 15 return")
abline(lm(ffdata$exret_mkt~ffdata$er_15), col = "red") # this fits a straight line to the points on the plot
```


### Estimating CER and CAPM models
For the sake of estimation, we will use the sample from jan 2000 to june 2014. We will estimate both these models and verify whether the default regression commands obey the algebra behind them.
```{r}
ret <- as.matrix(subset(ffdata, select = er_15, date >= 2000 & date < 2014.5))
mkt <- as.matrix(subset(ffdata, select = exret_mkt, date >= 2000 & date < 2014.5)) # we define these variables separately for the reason of convenience

### CER ###
cer_mkt <- lm(mkt ~ 1) # as you can see, model specification has specific syntax
cer_ret <- lm(ret ~ 1) # obviously you could just take the sample mean to get the estimate

summary(cer_mkt)

### CAPM ###
capm_ret <- lm(ret ~ mkt) # constant included by default (no need to explicitly add it)!
summary(capm_ret)


# storing the residuals
resid_ret <- capm_ret$residuals
resid_mkt <- cer_mkt$residuals 


# we can also estimate is by hands using matrix algebra!
Y <- ret
X <- cbind(1, mkt)

beta <- solve(t(X) %*% X) %*% t(X) %*% Y
beta


# CAPM in two steps 
capm2_ret <- lm(ret ~ resid_mkt)
summary(capm2_ret) # notice the change in the intercept


```


### Value-at-Risk: MC and bootstraping
We want to simulate the behavior of the market in the (pseudo) future for the period from july 2014 to june 2016. The assumption is that the market returns follow **CER** and that portfolio returns follow **CAPM**. That is 
$$ 
r_{mkt, t} = \alpha_{mkt} + \varepsilon_{mkt, t} \\
r_{pf, t} = \alpha_{pf} + \beta_{pf} r_{mkt} + \varepsilon_{pf, t}
$$

```{r}
# first, we create the arrays which will contain the sampled series
nrep <- 10^3
tT <- 12*2 # two years
fcst_mc <- fcst_bt <- array(0, c(tT, nrep)) # the containers

for (i in 1:nrep){
  # generating the residuals
  
  ### bootstrapping
  res_mkt_bt <- sample(resid_mkt, size = tT, replace = T)
  res_ret_bt <- sample(resid_ret, size = tT, replace = T)
  
  ### Monte Carlo
  res_ret_mc <- rnorm(n = tT, sd = summary(capm_ret)$sigma) # standard normal times the standard error of the residuals of the relevant regression
  res_mkt_mc <- rnorm(n = tT, sd = summary(cer_mkt)$sigma)
  
  
  # generating the series
  
  ### BT
  mkt_bt <- cer_mkt$coefficients + res_mkt_bt
  fcst_bt[, i] <- capm_ret$coefficients[1] + capm_ret$coefficients[2]*mkt_bt + res_ret_bt
  
  ### MC
  mkt_mc <- cer_mkt$coefficients + res_mkt_mc
  fcst_mc[, i] <- capm_ret$coefficients[1] + capm_ret$coefficients[2]*mkt_mc + res_ret_mc
  
}

# calculating VaR
var_bt <- var_mc <- array(0, tT)

for (j in 1:tT){
  var_bt[j] <- quantile(fcst_bt[j, ], 0.05)
  var_mc[j] <- quantile(fcst_mc[j, ], 0.05)
}
```

Now the simulations are done and we can check the results.
```{r}
plot(y = ret, x = seq(from = 2000, to = 2014 + 5/12, length.out = nrow(ret)),
     type = "l", xlim = c(2000, 2017), ylab = "excess return", xlab = "time")
lines(y = apply(fcst_mc, 1, mean), 
      x = seq(from = 2014 + 6/12, to = 2016 + 5/12, length.out = tT),
      col = "blue", lwd = 1)
lines(y = var_mc, col = "red", 
      x = seq(from = 2014 + 6/12, to = 2016 + 5/12, length.out = tT),
      lwd = 2)
lines(y = as.matrix(subset(ffdata, select = er_15, date >= 2014.5 & date < 2016.5)), 
      x = seq(from = 2014 + 6/12, to = 2016 + 5/12, length.out = tT), lty = 2)
legend("bottomleft", lwd = c(1, 1, 2, 1), col = c("black", "blue", "red", "black"), 
       legend = c("data", "mean forecast", "Value-at-Risk: MC", "actual series"),
       lty = c(1, 1, 1, 2))

plot(y = ret, x = seq(from = 2000, to = 2014 + 5/12, length.out = nrow(ret)),
     type = "l", xlim = c(2000, 2017), ylab = "excess return", xlab = "time")
lines(y = apply(fcst_bt, 1, mean), 
      x = seq(from = 2014 + 6/12, to = 2016 + 5/12, length.out = tT),
      col = "blue", lwd = 1)
lines(y = var_bt, col = "red", 
      x = seq(from = 2014 + 6/12, to = 2016 + 5/12, length.out = tT),
      lwd = 2)
lines(y = as.matrix(subset(ffdata, select = er_15, date >= 2014.5 & date < 2016.5)), 
      x = seq(from = 2014 + 6/12, to = 2016 + 5/12, length.out = tT), lty = 2)
legend("bottomleft", lwd = c(1, 1, 2, 1), col = c("black", "blue", "red", "black"), 
       legend = c("data", "mean forecast", "Value-at-Risk: bootstrap", "actual series"),
       lty = c(1, 1, 1, 2))
```