################################################################################
###
### AUTHOR:       Carlo Favero 
### DATE:         Milan, December 2026
### DESCRIPTION:  This script gives the basics of forecasting with VAR for ex 1
### OUTPUT:           
################################################################################

## -----------------------------------------------------------------------------
rm(list=ls())                                                                   # Clear the environment 

## -----------------------------------------------------------------------------
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))                     # Set the appropriate environment to work in every machine

## -----------------------------------------------------------------------------
listofpackages <- c("tidyverse","ellipse","reshape2","xts","readxl",     # Install and load the required packages
"quantmod","vars","systemfit","stargazer","tseries", "dynlm","sandwich","urca")   
source("preparePackages.R")
preparePackages(listofpackages)

######################
### read data
######################
data<- read_excel('quarterly2023.xlsx', sheet=2, col_names=TRUE)
dates <-seq(as.Date("1978-01-01"),length=nrow(data), by="quarters")
rawdata.ts <- xts(data[,2:15], order.by=dates)
typeof(rawdata.ts$cpi)

######################
### transform data
######################

# Inflation: π (pi)
rawdata.ts$LP     <- log(rawdata.ts$cpi)
rawdata.ts$pi     <- (rawdata.ts$LP - lag.xts(rawdata.ts$LP, 4))*100
rawdata.ts$pi_gap <- rawdata.ts$pi - rawdata.ts$ptr

# Output gap: y_gap NB base year different for GDP and GDPPOT in FRED
rawdata.ts$pgdp=rawdata.ts$gdp/rawdata.ts$gdpc1
rawdata.ts$pgdppot=rawdata.ts$ngdppot/rawdata.ts$gdppot
rawdata.ts$y      <- log(rawdata.ts$gdpc1)
rawdata.ts$y_pot  <- log(rawdata.ts$gdppot*1.0865)
rawdata.ts$y_gap  <- (rawdata.ts$y - rawdata.ts$y_pot)*100
rawdata.ts$dy_pot <- (rawdata.ts$y_pot - lag.xts(rawdata.ts$y_pot, 4))*100
#plot(rawdata.ts$dy_pot)
#summary(dynlm(y_gap ~ L(y_gap)))

# Interest rates
rawdata.ts$y3m_cc <-100*log(1+0.01*rawdata.ts$tb3ms)
rawdata.ts$y1_cc <-100*log(1+0.01*rawdata.ts$gs1)
rawdata.ts$y10_cc <-100*log(1+0.01*rawdata.ts$gs10)
rawdata.ts$Dy3m_cc<-(rawdata.ts$y3m_cc - lag.xts(rawdata.ts$y3m_cc, 1))
rawdata.ts$spread<-rawdata.ts$y10_cc-rawdata.ts$y3m_cc

######################
### Estimation
######################

# 1) model for the short-term rate 
longrun<-lm(y3m_cc~my+dy_pot+ptr-1,data=rawdata.ts["1980-01-01/2016-10-01"])
summary(longrun)
stargazer(longrun, se=list(coeftest(longrun)[,2]), type='latex')
u_t1 <- ts(resid(longrun), start=c(1980,1), frequency = 4)
adf.test(u_t1,  k = 0, )
u_t1lag<-lag.xts(u_t1)
u_t1diff<-diff(u_t1)
adftest<-lm(u_t1diff ~ 0 + u_t1lag[2:148])
summary(adftest)


# 2) create trend and cycle for yields at all maturities 

rawdata.ts$dy_pot["2033-01-01/2050-10-01"]<-1.70
rawdata.ts$ptr["2024-01-01/2050-10-01"]<-2
rawdata.ts$trend_3m<-longrun$coefficients[1]*rawdata.ts$my+longrun$coefficients[2]*rawdata.ts$dy_pot+
  longrun$coefficients[3]*rawdata.ts$ptr
rawdata.ts$cycle_3m<-rawdata.ts$y3m_cc-rawdata.ts$trend_3m
data.df<-as.data.frame(rawdata.ts$trend_3m["1979-01-01/2050-01-01"])
T=nrow(data.df)
N=40
Y_trend <- matrix(NA,T,N)
Y_trend[,1]<-data.df[,1]

for (i in 2: N){
  for (j in 1:(T - N)) {  
    Y_trend[j, i] <- (Y_trend[j, (i-1)] + Y_trend[(j +i- 1), 1])
    
  }}

for (i in 1:N){Y_trend[, i]<-Y_trend[, i]/i}

dates_tr <-seq(as.Date("1979-01-01"),length=T, by="quarters")
trend_y1<- xts(Y_trend[,4], order.by=dates_tr)
trend_y10<- xts(Y_trend[,40], order.by=dates_tr)

rawdata.ts <-merge(rawdata.ts, trend_y1, join = 'outer')
rawdata.ts <-merge(rawdata.ts, trend_y10, join = 'outer')
rawdata.ts$cycle_y1<-rawdata.ts$y1_cc-rawdata.ts$trend_y1
rawdata.ts$cycle_y10<-rawdata.ts$y10_cc-rawdata.ts$trend_y10

# create lags and estimate a system for all the cyclical variables 
rawdata.ts$cycle_3m_l1<-lag.xts(rawdata.ts$cycle_3m,1)
rawdata.ts$cycle_y1_l1<-lag.xts(rawdata.ts$cycle_y1,1)
rawdata.ts$cycle_y10_l1<-lag.xts(rawdata.ts$cycle_y10,1)
rawdata.ts$y_gap_l1<-lag.xts(rawdata.ts$y_gap,1)
rawdata.ts$pi_gap_l1<-lag.xts(rawdata.ts$pi_gap,1)

# Define the system of equations
eq_trend_3m <-y3m_cc~my+dy_pot+ptr-1
eq_cycle_3m <- cycle_3m ~cycle_3m_l1+cycle_y1_l1+cycle_y10_l1+y_gap_l1+pi_gap_l1
eq_cycle_y1 <- cycle_y1 ~ cycle_3m_l1+cycle_y1_l1+cycle_y10_l1+y_gap_l1+pi_gap_l1
eq_cycle_y10 <-cycle_y10 ~ cycle_3m_l1+cycle_y1_l1+cycle_y10_l1+y_gap_l1+pi_gap_l1
eq_y_gap <- y_gap~ cycle_3m_l1+cycle_y1_l1+cycle_y10_l1+y_gap_l1+pi_gap_l1
eq_pi_gap <- pi_gap ~ cycle_3m_l1+cycle_y1_l1+cycle_y10_l1+y_gap_l1+pi_gap_l1-1

system_1 <- list(eq_cycle_3m, eq_cycle_y1,eq_cycle_y10,eq_y_gap,eq_pi_gap,eq_trend_3m)

# Estimate the system
sys<- systemfit(system_1, "OLS", data=rawdata.ts["1980-01-01/2016-10-01"])
summary(sys)

C <- coef(sys)
# Simulate the system
#create simulated variables 
rawdata.ts$cycle_3m_f <- NA
rawdata.ts$cycle_y1_f <- NA
rawdata.ts$cycle_y10_f <- NA
rawdata.ts$y_gap_f <- NA
rawdata.ts$pi_gap_f <- NA
rawdata.ts$y3m_cc_f <- NA
rawdata.ts$y1_cc_f <- NA
rawdata.ts$y10_cc_f <- NA
rawdata.ts$pi_f <- NA

#initialization

eos_date <- as.Date("2016-10-01")
position <- which(index(rawdata.ts) == eos_date)
rawdata.ts[position, "cycle_3m_f"][[1]]=rawdata.ts[position, "cycle_3m"][[1]]
rawdata.ts[position, "cycle_y1_f"][[1]]=rawdata.ts[position, "cycle_y1"][[1]]
rawdata.ts[position, "cycle_y10_f"][[1]]=rawdata.ts[position, "cycle_y10"][[1]]
rawdata.ts[position, "y_gap_f"][[1]]=rawdata.ts[position, "y_gap"][[1]]
rawdata.ts[position, "pi_gap_f"][[1]]=rawdata.ts[position, "pi_gap"][[1]]

#simulation


for (i in (position+1):(position + 64)) {
  rawdata.ts[i, "cycle_3m_f"][[1]]=C[1]+C[2]*rawdata.ts[i-1, "cycle_3m_f"][[1]]+
    +C[3]*rawdata.ts[i-1, "cycle_y1_f"][[1]]+C[4]*rawdata.ts[i-1, "cycle_y10_f"][[1]]+
    +C[5]*rawdata.ts[i-1, "y_gap_f"][[1]]+C[6]*rawdata.ts[i-1, "pi_gap_f"][[1]]
  rawdata.ts[i, "cycle_y1_f"][[1]]=C[7]+C[8]*rawdata.ts[i-1, "cycle_3m_f"][[1]]+
    +C[9]*rawdata.ts[i-1, "cycle_y1_f"][[1]]+C[10]*rawdata.ts[i-1, "cycle_y10_f"][[1]]+
    +C[11]*rawdata.ts[i-1, "y_gap_f"][[1]]+C[12]*rawdata.ts[i-1, "pi_gap_f"][[1]]
  rawdata.ts[i, "cycle_y10_f"][[1]]=C[13]+C[14]*rawdata.ts[i-1, "cycle_3m_f"][[1]]+
    +C[15]*rawdata.ts[i-1, "cycle_y1_f"][[1]]+C[16]*rawdata.ts[i-1, "cycle_y10_f"][[1]]+
    +C[17]*rawdata.ts[i-1, "y_gap_f"][[1]]+C[18]*rawdata.ts[i-1, "pi_gap_f"][[1]]
  rawdata.ts[i, "y_gap_f"][[1]]=C[19]+C[20]*rawdata.ts[i-1, "cycle_3m_f"][[1]]+
    +C[21]*rawdata.ts[i-1, "cycle_y1_f"][[1]]+C[22]*rawdata.ts[i-1, "cycle_y10_f"][[1]]+
    +C[23]*rawdata.ts[i-1, "y_gap_f"][[1]]+C[24]*rawdata.ts[i-1, "pi_gap_f"][[1]]
  rawdata.ts[i, "pi_gap_f"][[1]]=C[25]*rawdata.ts[i-1, "cycle_3m_f"][[1]]+
    +C[26]*rawdata.ts[i-1, "cycle_y1_f"][[1]]+C[27]*rawdata.ts[i-1, "cycle_y10_f"][[1]]+
    +C[28]*rawdata.ts[i-1, "y_gap_f"][[1]]+C[29]*rawdata.ts[i-1, "pi_gap_f"][[1]]
  rawdata.ts[i, "y3m_cc_f"][[1]]=C[30]*rawdata.ts[i, "my"][[1]]+
    +C[31]*rawdata.ts[i, "dy_pot"][[1]]+C[32]*rawdata.ts[i, "ptr"][[1]]+
    +rawdata.ts[i, "cycle_3m_f"][[1]]
  rawdata.ts[i, "y1_cc_f"][[1]]=rawdata.ts[i, "cycle_y1_f"][[1]]+rawdata.ts[i, "trend_y1"][[1]]
  rawdata.ts[i, "y10_cc_f"][[1]]=rawdata.ts[i, "cycle_y10_f"][[1]]+rawdata.ts[i, "trend_y10"][[1]] 
  rawdata.ts[i, "pi_f"][[1]]=rawdata.ts[i, "pi_gap_f"][[1]]+rawdata.ts[i, "ptr"][[1]]
  
  }


######################
### Graphs
######################

plot <- ggplot(rawdata.ts, aes(x = index(rawdata.ts))) +
  geom_line(aes(y = y3m_cc, color = "Actual"), size = 1)+
  geom_line(aes(y = trend_3m, color = "Trend"), linetype = "dashed", size = 1) +
  labs(x = "Date", y = "Actual and Trend", title = "3-month rates") +
  scale_color_manual(values = c("Actual" = "blue", "Trend" = "red")) +
  theme_minimal() +
  theme(
    legend.position = "top",
    plot.title = element_text(hjust = 0.5)  # Center the title
  ) +
  guides(color = guide_legend(title = ""))
plot <- plot +
  geom_vline(xintercept = as.Date("2016-10-01"), color = "black")

print(plot)
ggsave(filename = "Figure_trend3m.pdf", plot = plot, device = "pdf",width = 5.72,height=3.12)

plot <- ggplot(rawdata.ts, aes(x = index(rawdata.ts))) +
  geom_line(aes(y = y1_cc, color = "Actual"), size = 1)+
  geom_line(aes(y = trend_y1, color = "Trend"), linetype = "dashed", size = 1) +
  labs(x = "Date", y = "Actual and Trend", title = "1-year yields") +
  scale_color_manual(values = c("Actual" = "blue", "Trend" = "red")) +
  theme_minimal() +
  theme(
    legend.position = "top",
    plot.title = element_text(hjust = 0.5)  # Center the title
  ) +
  guides(color = guide_legend(title = ""))
plot <- plot +
  geom_vline(xintercept = as.Date("2016-10-01"), color = "black")

print(plot)
ggsave(filename = "Figure_trendy1.pdf", plot = plot, device = "pdf",width = 5.72,height=3.12)

plot <- ggplot(rawdata.ts, aes(x = index(rawdata.ts))) +
  geom_line(aes(y = y10_cc, color = "Actual"), size = 1)+
  geom_line(aes(y = trend_y10, color = "Trend"), linetype = "dashed", size = 1) +
  labs(x = "Date", y = "Actual and Trend", title = "10-year yields") +
  scale_color_manual(values = c("Actual" = "blue", "Trend" = "red")) +
  theme_minimal() +
  theme(
    legend.position = "top",
    plot.title = element_text(hjust = 0.5)  # Center the title
  ) +
  guides(color = guide_legend(title = ""))
plot <- plot +
  geom_vline(xintercept = as.Date("2016-10-01"), color = "black")

print(plot)
ggsave(filename = "Figure_trendy10.pdf", plot = plot, device = "pdf",width = 5.72,height=3.12)

plot <- ggplot(rawdata.ts["1979-01-01/2030-10-01"], aes(x = index(rawdata.ts["1979-01-01/2030-10-01"]))) +
  geom_line(aes(y = cycle_3m, color = "3m"), size = 1) +
  geom_line(aes(y = cycle_y1, color = "y1"), size = 1) +
  geom_line(aes(y = cycle_y10, color = "y10"), size = 1) +
  labs(title = "cycles in Yields ", x = "Time", y = "%") +
  scale_color_manual(values = c("3m" = "lightblue", "y1" = "blue","y10" = "darkblue"), name = NULL) +
  theme_minimal() +
  theme(axis.line = element_line(color = "black")) +
  scale_x_date(date_breaks = "5 year", date_labels = "%Y") +
  theme(
    legend.position = c(1, 1),  # Specify custom coordinates (x, y)
    legend.justification = c(1, 1),  # Specify justification for the coordinates
    axis.text.x = element_text(size = 8)  # Adjust the font size here
  )
print(plot)
ggsave(filename = "Figure_cycles.pdf", plot = plot, device = "pdf",width = 5.72,height=3.12)

plot <- ggplot(rawdata.ts, aes(x = index(rawdata.ts))) +
  geom_line(aes(y = y3m_cc, color = "Actual"), size = 1)+
  geom_line(aes(y = y3m_cc_f, color = "Forecast"), linetype = "dashed", size = 1) +
  labs(x = "Date", y = "Actual and Forecast", title = "3-month rates") +
  scale_color_manual(values = c("Actual" = "blue", "Forecast" = "red")) +
  theme_minimal() +
  theme(
    legend.position = "top",
    plot.title = element_text(hjust = 0.5)  # Center the title
  ) +
  guides(color = guide_legend(title = ""))
plot <- plot +
  geom_vline(xintercept = as.Date("2016-10-01"), color = "black")

print(plot)
ggsave(filename = "Figure_forc3m.pdf", plot = plot, device = "pdf",width = 5.72,height=3.12)

plot <- ggplot(rawdata.ts, aes(x = index(rawdata.ts))) +
  geom_line(aes(y = y1_cc, color = "Actual"), size = 1)+
  geom_line(aes(y = y1_cc_f, color = "Forecast"), linetype = "dashed", size = 1) +
  labs(x = "Date", y = "Actual and Forecast", title = "1-year rates") +
  scale_color_manual(values = c("Actual" = "blue", "Forecast" = "red")) +
  theme_minimal() +
  theme(
    legend.position = "top",
    plot.title = element_text(hjust = 0.5)  # Center the title
  ) +
  guides(color = guide_legend(title = ""))
plot <- plot +
  geom_vline(xintercept = as.Date("2016-10-01"), color = "black")

print(plot)
ggsave(filename = "Figure_forc1y.pdf", plot = plot, device = "pdf",width = 5.72,height=3.12)


plot <- ggplot(rawdata.ts, aes(x = index(rawdata.ts))) +
  geom_line(aes(y = y10_cc, color = "Actual"), size = 1)+
  geom_line(aes(y = y10_cc_f, color = "Forecast"), linetype = "dashed", size = 1) +
  labs(x = "Date", y = "Actual and Forecast", title = "10-year rates") +
  scale_color_manual(values = c("Actual" = "blue", "Forecast" = "red")) +
  theme_minimal() +
  theme(
    legend.position = "top",
    plot.title = element_text(hjust = 0.5)  # Center the title
  ) +
  guides(color = guide_legend(title = ""))
plot <- plot +
  geom_vline(xintercept = as.Date("2016-10-01"), color = "black")

print(plot)
ggsave(filename = "Figure_forc10y.pdf", plot = plot, device = "pdf",width = 5.72,height=3.12)


plot <- ggplot(rawdata.ts, aes(x = index(rawdata.ts))) +
  geom_line(aes(y = pi, color = "Actual"), size = 1)+
  geom_line(aes(y = pi_f, color = "Forecast"), linetype = "dashed", size = 1) +
  labs(x = "Date", y = "Actual and Forecast", title = "Inflation") +
  scale_color_manual(values = c("Actual" = "blue", "Forecast" = "red")) +
  theme_minimal() +
  theme(
    legend.position = "top",
    plot.title = element_text(hjust = 0.5)  # Center the title
  ) +
  guides(color = guide_legend(title = ""))
plot <- plot +
  geom_vline(xintercept = as.Date("2016-10-01"), color = "black")

print(plot)
ggsave(filename = "Figure_forc_pi.pdf", plot = plot, device = "pdf",width = 5.72,height=3.12)


######################
### RMSFE over forecasting sample
######################

# Indice delle osservazioni di previsione: dopo eos_date e con forecast non NA
fcst_idx <- which(index(rawdata.ts) > eos_date & !is.na(rawdata.ts$y3m_cc_f))

# Lista delle variabili per cui hai anche la versione _f
vars_eval <- c("y3m_cc", "y1_cc", "y10_cc", "pi",
               "y_gap", "pi_gap")

# Funzione RMSFE generica
rmsfe_vec <- sapply(vars_eval, function(v) {
  actual   <- as.numeric(rawdata.ts[fcst_idx, v])
  forecast <- as.numeric(rawdata.ts[fcst_idx, paste0(v, "_f")])
  ok <- !is.na(actual) & !is.na(forecast)
  sqrt(mean((actual[ok] - forecast[ok])^2))
})

# Tabella con i risultati
rmsfe_table <- data.frame(
  Variable = vars_eval,
  RMSFE    = rmsfe_vec
)

print(rmsfe_table)

# Tabella LaTeX con stargazer
stargazer(rmsfe_table,
          type       = "latex",
          summary    = FALSE,
          rownames   = FALSE,
          title      = "Root Mean Squared Forecast Errors (forecast sample)")
