# elaboration on the original produced by E.Zivot by C. Favero 
# author: Carlo Favero 
# created: July, 2021
#
# comments: Original Examples follow chapter 11 in Zivot and Wang (2006)


rm(list=ls()) #Removes all items in Environment!
#setwd(path)
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

# set output options
# options(width = 70, digits=4)
listofpackages <- c("dygraphs", "dplyr","ellipse","reshape2","ggplot2","PerformanceAnalytics","zoo")

for (j in listofpackages){
  if(sum(installed.packages()[, 1] == j) == 0) {
    install.packages(j)
  }
  library(j, character.only = T)
}
install.packages("fEcofin", repos="http://R-Forge.R-project.org")
# load required packages
library(fEcofin)                # various data sets

################################################################################
# Data Loadings and Transform Descriptive Analysis 
################################################################################

# create data frame with dates as rownames
berndt.df = berndtInvest[, -1]
berndt.df$date <- as.Date(berndtInvest[, 1])
rownames(berndt.df) = as.character(berndtInvest[, 1])
colnames(berndt.df)
dimnames(berndt.df)[[2]] #command alternative to the previous one 

# transform the data and compute cumulative returns 

t0 <- which(berndt.df$date == "1978-01-01")
t1 <- which(berndt.df$date == "1987-12-01")

series_names <- c("CITCRP","CONED","CONTIL","DATGEN","DEC","DELTA","GENMIL","GERBER","IBM","MARKET","MOBIL","PANAM","PSNH","TANDY","TEXACO","WEYER","RKFREE")

for (name in series_names) {
  P_col_name <- paste0(name,"_P")
  LP_col_name<- paste0("L",P_col_name)
  berndt.df[t0, P_col_name] <- 1
  for (i in (t0+1):(t1)) {
    berndt.df[i, P_col_name][[1]] <- berndt.df[i-1, P_col_name][[1]] * (1+berndt.df[i, name][[1]] )
  } 
  berndt.df[, LP_col_name] <- log(berndt.df[, P_col_name]) 
}
# add a trend to the database 
berndt.df$TREND<- array(data = NA, dim = nrow(berndt.df))
berndt.df[t0, c("TREND")] <- 1 # don't need to repeat the value to make the array being assigned be of the same length. be careful though as it is one of the few cases of exception
for (i in (t0+1):(t1)) {
  berndt.df[i, "TREND"][[1]] <- berndt.df[i-1, "TREND"][[1]] +1
} 

####################################
# Descriptive Analysis of prices and returns
####################################
# plot log prices
ggplot(berndt.df, aes(x = date)) +
  geom_line(aes(y = LTEXACO_P), color = "blue", size = 1, linetype = "solid") +
  geom_line(aes(y = LMARKET_P), color = "green", size = 1, linetype = "solid") +
  labs(x = "Date", y = "Portfolios TEXACO and MKT") +
  ylim(-0.5, 2) +
  theme_minimal() +
  theme(
    legend.position = "topleft",
    legend.title = element_blank(),
    legend.text = element_text(size = 8),
    axis.text = element_text(size = 8),
    axis.title = element_text(size = 10),
    plot.title = element_text(size = 12, hjust = 0.5)
  ) +
  scale_color_manual(
    values = c("blue", "green"),
    guide = guide_legend(override.aes = list(size = 2, linetype = "solid"))
  ) +
  guides(fill = guide_legend(override.aes = list(size = 2)))
# plot returns
ggplot(berndt.df, aes(x = date)) +
  geom_line(aes(y = TEXACO), color = "blue", size = 1, linetype = "solid") +
  geom_line(aes(y = MARKET), color = "green", size = 1, linetype = "solid") +
  labs(x = "Date", y = "Portfolios TEXACO and MKT") +
  ylim(-0.45, 0.45) +
  theme_minimal() +
  theme(
    legend.position = "topleft",
    legend.title = element_blank(),
    legend.text = element_text(size = 12),
    axis.text = element_text(size = 12),
    axis.title = element_text(size = 14),
    plot.title = element_text(size = 16, hjust = 0.5)
  ) +
  scale_color_manual(
    values = c("blue", "green"),
    guide = guide_legend(override.aes = list(size = 2, linetype = "solid"))
  ) +
  guides(fill = guide_legend(override.aes = list(size = 2)))



################################################################################
# CAPM FOR TEXACO  
################################################################################
capm_tex<-lm(TEXACO ~ MARKET, data=berndt.df)
summary(capm_tex)
berndt.df$TEXACO_fitted<-capm_tex$fitted.values 

#fitting returns 

plot(berndt.df$date[t0:t1],berndt.df$TEXACO[t0:t1], col = 'blue', type = "l", 
     ylab = " actual and fitted returns", xlab = "date",lwd = 2,)
lines(y = rep(mean(berndt.df$TEXACO[t0:t1], na.rm = T), length(berndt.df$TEXACO[t0:t1])), x = berndt.df$date[t0:t1], col = "red")
lines(y = berndt.df$TEXACO_fitted[t0:t1], x = berndt.df$date[t0:t1], col = "green",lwd = 2)
legend("topright", legend = c("TEXACO ACTUAL", "TEXACO FITTED"),
       col = c("blue", "green"), lty = 1)
grid(nx = 6, ny = 7, col = "lightgray", lty = "dotted",
     lwd = par("lwd"), equilogs = TRUE)

#fitting prices 
berndt.df$TEXACO_P_FITTED <-  array(data = NA, dim = nrow(berndt.df))
berndt.df$TEXACO_P_FITTED[t0] <- 1 
for (i in (t0+1):(t1)) {
  berndt.df$TEXACO_P_FITTED[i] <- berndt.df$TEXACO_P_FITTED[i-1] * (1 + berndt.df$TEXACO_fitted[i])}

plot(berndt.df$date[t0:t1],berndt.df$TEXACO_P[t0:t1], col = 'blue', type = "l", 
     ylab = " actual and fitted prices", xlab = "date",lwd = 2,ylim=c(0.9,5))
#lines(y = rep(mean(berndt.df$TEXACO[t0:t1], na.rm = T), length(berndt.df$TEXACO[t0:t1])), x = berndt.df$date[t0:t1], col = "red")
lines(y = berndt.df$TEXACO_P_FITTED[t0:t1], x = berndt.df$date[t0:t1], col = "green",lwd = 2)
legend("topleft", legend = c("TEXACO ACTUAL", "TEXACO FITTED"),
       col = c("blue", "green"), lty = 1)
grid(nx = 6, ny = 7, col = "lightgray", lty = "dotted",
     lwd = par("lwd"), equilogs = TRUE)
#dev.copy2pdf(width = 8.5, out.type = "pdf",file="CAPM.pdf")
#dev.off()
####################
# Optimal Portfolio weights with the CER approach 
###################

returns.df=berndt.df[, c(1:9,11:16)]
#returns.df = berndt.df[, c(-10, -17)
exreturns.df=returns.df-berndt.df$RKFREE
returns.mat = as.matrix(exreturns.df)
n.obs = nrow(returns.mat)

#Estimation of CER model parameters
cov.sample=var(returns.mat)
mu = matrix(colMeans(returns.mat), nrow = ncol(returns.mat), ncol = 1)
#
# compute global min variance portfolio
#
w.gmin.sample = solve(var(returns.mat))%*%rep(1,nrow(cov.sample))
w.gmin.sample = w.gmin.sample/sum(w.gmin.sample)
colnames(w.gmin.sample) = "sample"
barplot(t(w.gmin.sample), horiz=F, main="Weights", col="blue", cex.names = 0.75, las=2)
################################
# A Single index model: the CAPM 
################################

##
## use multivariate regression and matrix algebra
##
returnsmkt.df=berndt.df[, c(10:10,17:17)]
#returns.df = berndt.df[, c(-10, -17)
returnsmkt.df$EXRETMKT=returnsmkt.df$MARKET-returnsmkt.df$RKFREE
market.mat = as.matrix(returnsmkt.df[,3, drop=F])
n.obs = nrow(returns.mat)
X.mat = cbind(rep(1,n.obs),market.mat)
colnames(X.mat)[1] = "intercept"
XX.mat = crossprod(X.mat)

# multivariate least squares
G.hat = solve(XX.mat)%*%crossprod(X.mat,returns.mat)
# can also use solve(qr(X.mat), returns.mat)
beta.hat = G.hat[2,]
E.hat = returns.mat - X.mat%*%G.hat
#D.hat=crossprod(E.hat)
diagD.hat = diag(crossprod(E.hat)/(n.obs-2))
# compute R2 values from multivariate regression
sumSquares = apply(returns.mat, 2, function(x) {sum( (x - mean(x))^2 )})
R.square = 1 - (n.obs-2)*diagD.hat/sumSquares

# print and plot results
cbind(beta.hat, diagD.hat, R.square)

par(mfrow=c(1,2))
barplot(beta.hat, horiz=T, main="Beta values", col="blue", cex.names = 0.75, las=1)
barplot(R.square, horiz=T, main="R-square values", col="blue", cex.names = 0.75, las=1)
par(mfrow=c(1,1))

# compute single index model covariance/correlation matrices
cov.si = as.numeric(var(market.mat))*beta.hat%*%t(beta.hat) + diag(diagD.hat)
cor.si = cov2cor(cov.si)
#
# COMPARE CORRELATIONS 
#
# FACTOR MODEL BASED CORRELATION MATRIX using plotcorr() from ellipse package
#
rownames(cor.si) = colnames(cor.si)
ord <- order(cor.si[1,])
ordered.cor.si <- cor.si[ord, ord]
plotcorr(ordered.cor.si, col=cm.colors(11)[5*ordered.cor.si + 6])
plotcorr(cor.si, col=cm.colors(11)[5*cor.si + 6])
#
# SAMPLE CORRELATION MATRIX 
#
cor.sample = cor(returns.mat)
ord <- order(cor.sample[1,])
ordered.cor.sample <- cor.sample[ord, ord]
plotcorr(ordered.cor.sample, col=cm.colors(11)[5*ordered.cor.sample + 6])
plotcorr(cor.sample, col=cm.colors(11)[5*cor.sample + 6])
#
# CAPM residuals  CORRELATION MATRIX 
#
cor.resid = cor(E.hat)
ord <- order(cor.resid[1,])
ordered.cor.resid <- cor.resid[ord, ord]
plotcorr(ordered.cor.resid, col=cm.colors(11)[5*ordered.cor.resid + 6])
#
# compute global min variance portfolio
#
# use CAPM covariance (1-factor model)
w.gmin.si = solve(cov.si)%*%rep(1,nrow(cov.si))
w.gmin.si = w.gmin.si/sum(w.gmin.si)
colnames(w.gmin.si) = "single.index"


#par(mfrow=c(2,1))
#barplot(t(w.gmin.si), horiz=F, main="Single Index Weights", col="blue", cex.names = 0.75, las=2)
#barplot(t(w.gmin.sample), horiz=F, main="Sample Weights", col="blue", cex.names = 0.75, las=2)
#par(mfrow=c(1,1))


#compare weights delivered by the two alternative methods 
pdf("output.pdf", width = 10, height = 8)
par(mfrow = c(2, 1))
barplot(t(w.gmin.si), horiz=F, main="Single Index Weights", col="blue", cex.names = 0.75, las=2)
barplot(t(w.gmin.sample), horiz=F, main="Sample Weights", col="blue", cex.names = 0.75, las=2)
par(mfrow = c(1, 1))
dev.off()



# compare means and sd values on global min variance portfolios
mu.vals = colMeans(returns.mat)
mu.gmin.si = as.numeric(crossprod(w.gmin.si, mu.vals))
sd.gmin.si = as.numeric(sqrt(t(w.gmin.si)%*%cov.si%*%w.gmin.si))
mu.gmin.sample = as.numeric(crossprod(w.gmin.sample, mu.vals))
sd.gmin.sample = as.numeric(sqrt(t(w.gmin.sample)%*%var(returns.mat)%*%w.gmin.sample))
cbind(mu.gmin.si,mu.gmin.sample, sd.gmin.si, sd.gmin.sample)


########################################
## AN ALTERNATIVE APPROACH to compute parameters in CAPM:
## use lm function to compute single index model regressions for each asset
#######################################

asset.names = colnames(returns.mat)
asset.names

# initialize list object to hold regression objects

reg.list = list()
# loop over all assets and estimate time series regression
for (i in asset.names) {
 reg.df = berndt.df[, c(i, "MARKET")]
 si.formula = as.formula(paste(i,"~", "MARKET", sep=" "))
 reg.list[[i]] = lm(si.formula, data=reg.df)
}

# examine the elements of reg.list  - they are lm objects!
names(reg.list)
class(reg.list$CITCRP)
reg.list$CITCRP
summary(reg.list$CITCRP)

# plot actual vs. fitted over time
# use chart.TimeSeries() function from PerformanceAnalytics package

dataToPlot = cbind(fitted(reg.list$CITCRP), berndt.df$CITCRP)
colnames(dataToPlot) = c("Fitted","Actual")
dev.off()

# Verify the data
str(dataToPlot)
summary(dataToPlot)

# Create the time series chart
chart.TimeSeries(dataToPlot, main = "Single Index Model for CITCRP",
                 colorset = c("black", "blue"), legend.loc = "bottomleft")


# scatterplot of the single index model regression
plot(berndt.df$MARKET, berndt.df$CITCRP, main="SI model for CITCRP",
     type="p", pch=16, col="blue",
     xlab="MARKET", ylab="CITCRP")
abline(h=0, v=0)
abline(reg.list$CITCRP, lwd=2, col="red")

## extract beta values, residual sd's and R2's from list of regression objects
## brute force loop
reg.vals = matrix(0, length(asset.names), 3)
rownames(reg.vals) = asset.names
colnames(reg.vals) = c("beta", "residual.sd", "r.square")
for (i in names(reg.list)) {
    tmp.fit = reg.list[[i]]
    tmp.summary = summary(tmp.fit)
    reg.vals[i, "beta"] = coef(tmp.fit)[2]
    reg.vals[i, "residual.sd"] = tmp.summary$sigma
    reg.vals[i, "r.square"] = tmp.summary$r.squared
}
reg.vals

# alternatively use R apply function for list objects - lapply or sapply
extractRegVals = function(x) {
# x is an lm object
 beta.val = coef(x)[2]
 residual.sd.val = summary(x)$sigma
 r2.val = summary(x)$r.squared
 ret.vals = c(beta.val, residual.sd.val, r2.val)
 names(ret.vals) = c("beta", "residual.sd", "r.square")
 return(ret.vals)
}
reg.vals = sapply(reg.list, FUN=extractRegVals)
t(reg.vals)