list(mean_price = mean_price, vwap = vwap)
}
palingeo_stats <- calc_means(palingeo_df)
icop_stats     <- calc_means(icop_df)
cat("=== Campione:", format(sample_start), " - ", format(sample_end), "===\n\n")
cat("=== Palingeo (", ticker_palingeo, ") ===\n", sep = "")
cat("Media aritmetica (close): ", round(palingeo_stats$mean_price, 4), "\n", sep = "")
cat("VWAP                     : ", round(palingeo_stats$vwap, 4), "\n\n", sep = "")
cat("=== ICOP (", ticker_icop, ") ===\n", sep = "")
cat("Media aritmetica (close): ", round(icop_stats$mean_price, 4), "\n", sep = "")
cat("VWAP                     : ", round(icop_stats$vwap, 4), "\n\n", sep = "")
View(palingeo_raw)
View(palingeo_raw)
sample_start <- as.Date("2025-01-13")
sample_end   <- as.Date("2025-06-13")
## =========================================================
## 2. Funzione helper per scaricare e pulire i dati
## =========================================================
get_clean_ohlc <- function(ticker, start_date, end_date) {
# Scarica dati da Yahoo
x <- suppressWarnings(
getSymbols(
ticker,
src = "yahoo",
from = start_date,
to   = end_date,
auto.assign = FALSE
)
)
# Converte in data.frame con colonne standard
df <- data.frame(
date   = index(x),
open   = as.numeric(x[, 1]),
high   = as.numeric(x[, 2]),
low    = as.numeric(x[, 3]),
close  = as.numeric(x[, 4]),
volume = as.numeric(x[, 5])
)
# Rimuove eventuali giorni senza volume o con NA
df <- df %>%
filter(!is.na(close), !is.na(volume), volume > 0)
return(df)
}
## =========================================================
## 3. Scarica i dati “grezzi” per l’intero periodo di download
## =========================================================
palingeo_raw <- get_clean_ohlc(ticker_palingeo, download_start, download_end)
icop_raw     <- get_clean_ohlc(ticker_icop,     download_start, download_end)
## =========================================================
## 4. Restringi i dati al campione desiderato
## =========================================================
palingeo_df <- palingeo_raw %>%
filter(date >= sample_start, date <= sample_end)
icop_df <- icop_raw %>%
filter(date >= sample_start, date <= sample_end)
if (nrow(palingeo_df) == 0) {
stop("ATTENZIONE: nessun dato per Palingeo nel campione scelto. Controlla sample_start / sample_end.")
}
if (nrow(icop_df) == 0) {
stop("ATTENZIONE: nessun dato per ICOP nel campione scelto. Controlla sample_start / sample_end.")
}
## =========================================================
## 5. Funzione per calcolare media aritmetica e VWAP
## =========================================================
calc_means <- function(df) {
# Media aritmetica semplice dei prezzi di chiusura
mean_price <- mean(df$close)
# VWAP = sum(P_t * V_t) / sum(V_t)
vwap <- sum(df$close * df$volume) / sum(df$volume)
list(mean_price = mean_price, vwap = vwap)
}
palingeo_stats <- calc_means(palingeo_df)
icop_stats     <- calc_means(icop_df)
cat("=== Campione:", format(sample_start), " - ", format(sample_end), "===\n\n")
cat("=== Palingeo (", ticker_palingeo, ") ===\n", sep = "")
cat("Media aritmetica (close): ", round(palingeo_stats$mean_price, 4), "\n", sep = "")
cat("VWAP                     : ", round(palingeo_stats$vwap, 4), "\n\n", sep = "")
cat("=== ICOP (", ticker_icop, ") ===\n", sep = "")
cat("Media aritmetica (close): ", round(icop_stats$mean_price, 4), "\n", sep = "")
cat("VWAP                     : ", round(icop_stats$vwap, 4), "\n\n", sep = "")
icop_stats     <- calc_means(icop_df*12/19)
palingeo_stats <- calc_means(palingeo_df)
icop_stats     <- (12/19)*calc_means(icop_df)
icop_stats     <- calc_means(icop_df)
icop_stats_1     <- 12/19*icop_stats
## Tema personalizzato -------------------------------------------------
tema_finanza <- theme_minimal(base_size = 13) +
theme(
plot.title      = element_text(face = "bold", size = 16),
plot.subtitle   = element_text(size = 11, colour = "grey30"),
axis.title.x    = element_text(margin = margin(t = 8)),
axis.title.y    = element_text(margin = margin(r = 8)),
panel.grid.major = element_line(colour = "grey85", linewidth = 0.4),
panel.grid.minor = element_line(colour = "grey90", linewidth = 0.2),
axis.text.x      = element_text(angle = 0, hjust = 0.5),
legend.position  = "top",
legend.title     = element_blank()
)
## --------- Grafico Palingeo ------------------------------------------
ggplot(palingeo_df, aes(x = date)) +
geom_line(aes(y = close, colour = "Prezzo di chiusura"), linewidth = 0.7) +
geom_hline(
aes(yintercept = mean_close, colour = "Media campione"),
linetype = "dashed",
linewidth = 0.8
) +
scale_colour_manual(
values = c(
"Prezzo di chiusura" = "#1b9e77",
"Media campione"     = "#d95f02"
)
) +
scale_x_date(date_breaks = "1 month", date_labels = "%b %Y") +
labs(
title    = paste("Palingeo (", ticker_palingeo, "): prezzo giornaliero e media campione", sep = ""),
subtitle = paste("Campione:", format(sample_start), "-", format(sample_end)),
x = "Data",
y = "Prezzo di chiusura"
) +
tema_finanza
## =========================================================
## 6. Aggiungo la colonna con la media per i grafici
## =========================================================
palingeo_df <- palingeo_df %>%
mutate(mean_close = palingeo_stats$vwap)
icop_df <- icop_df %>%
mutate(mean_close = icop_stats$vwap)
## =========================================================
## 7. Grafici: prezzo giornaliero + media
## =========================================================
## Tema personalizzato -------------------------------------------------
tema_finanza <- theme_minimal(base_size = 13) +
theme(
plot.title      = element_text(face = "bold", size = 16),
plot.subtitle   = element_text(size = 11, colour = "grey30"),
axis.title.x    = element_text(margin = margin(t = 8)),
axis.title.y    = element_text(margin = margin(r = 8)),
panel.grid.major = element_line(colour = "grey85", linewidth = 0.4),
panel.grid.minor = element_line(colour = "grey90", linewidth = 0.2),
axis.text.x      = element_text(angle = 0, hjust = 0.5),
legend.position  = "top",
legend.title     = element_blank()
)
## --------- Grafico Palingeo ------------------------------------------
ggplot(palingeo_df, aes(x = date)) +
geom_line(aes(y = close, colour = "Prezzo di chiusura"), linewidth = 0.7) +
geom_hline(
aes(yintercept = mean_close, colour = "Media campione"),
linetype = "dashed",
linewidth = 0.8
) +
scale_colour_manual(
values = c(
"Prezzo di chiusura" = "#1b9e77",
"Media campione"     = "#d95f02"
)
) +
scale_x_date(date_breaks = "1 month", date_labels = "%b %Y") +
labs(
title    = paste("Palingeo (", ticker_palingeo, "): prezzo giornaliero e media campione", sep = ""),
subtitle = paste("Campione:", format(sample_start), "-", format(sample_end)),
x = "Data",
y = "Prezzo di chiusura"
) +
tema_finanza
## --------- Grafico ICOP ----------------------------------------------
ggplot(icop_df, aes(x = date)) +
geom_line(aes(y = close, colour = "Prezzo di chiusura"), linewidth = 0.7) +
geom_hline(
aes(yintercept = mean_close, colour = "Media campione"),
linetype = "dashed",
linewidth = 0.8
) +
scale_colour_manual(
values = c(
"Prezzo di chiusura" = "#0072B2",
"Media campione"     = "#E69F00"
)
) +
scale_x_date(date_breaks = "1 month", date_labels = "%b %Y") +
labs(
title    = paste("ICOP (", ticker_icop, "): prezzo giornaliero e media campione", sep = ""),
subtitle = paste("Campione:", format(sample_start), "-", format(sample_end)),
x = "Data",
y = "Prezzo di chiusura"
) +
tema_finanza
## Tema personalizzato -------------------------------------------------
tema_finanza <- theme_minimal(base_size = 13) +
theme(
plot.title      = element_text(face = "bold", size = 16),
plot.subtitle   = element_text(size = 11, colour = "grey30"),
axis.title.x    = element_text(margin = margin(t = 8)),
axis.title.y    = element_text(margin = margin(r = 8)),
panel.grid.major = element_line(colour = "grey85", linewidth = 0.4),
panel.grid.minor = element_line(colour = "grey90", linewidth = 0.2),
legend.position  = "top",
legend.title     = element_blank()
)
## --------- Grafico Palingeo ------------------------------------------
ggplot(palingeo_df, aes(x = date)) +
geom_line(aes(y = close, colour = "Prezzo di chiusura"), linewidth = 0.7) +
geom_hline(
aes(yintercept = mean_close, colour = "Media campione"),
linetype = "dashed",
linewidth = 0.8
) +
# Label del valore della media
annotate(
"text",
x = max(palingeo_df$date),      # ultima data
y = unique(palingeo_df$mean_close),
label = paste("Media:", round(unique(palingeo_df$mean_close), 2)),
hjust = 1.1, vjust = -0.5,
colour = "#d95f02", size = 4
) +
scale_colour_manual(
values = c(
"Prezzo di chiusura" = "#1b9e77",
"Media campione"     = "#d95f02"
)
) +
scale_x_date(date_breaks = "1 month", date_labels = "%b %Y") +
labs(
title    = paste("Palingeo (", ticker_palingeo, "): prezzo giornaliero e media campione", sep = ""),
subtitle = paste("Campione:", format(sample_start), "-", format(sample_end)),
x = "Data",
y = "Prezzo di chiusura"
) +
tema_finanza
## --------- Grafico ICOP ----------------------------------------------
ggplot(icop_df, aes(x = date)) +
geom_line(aes(y = close, colour = "Prezzo di chiusura"), linewidth = 0.7) +
geom_hline(
aes(yintercept = mean_close, colour = "Media campione"),
linetype = "dashed",
linewidth = 0.8
) +
# Label del valore della media
annotate(
"text",
x = max(icop_df$date),
y = unique(icop_df$mean_close),
label = paste("Media:", round(unique(icop_df$mean_close), 2)),
hjust = 1.1, vjust = -0.5,
colour = "#E69F00", size = 4
) +
scale_colour_manual(
values = c(
"Prezzo di chiusura" = "#0072B2",
"Media campione"     = "#E69F00"
)
) +
scale_x_date(date_breaks = "1 month", date_labels = "%b %Y") +
labs(
title    = paste("ICOP (", ticker_icop, "): prezzo giornaliero e media campione", sep = ""),
subtitle = paste("Campione:", format(sample_start), "-", format(sample_end)),
x = "Data",
y = "Prezzo di chiusura"
) +
tema_finanza
###############################################
# Script R per Palingeo e ICOP da Yahoo Finance
# - Scarica prezzi e volumi giornalieri
# - Permette di scegliere il campione per medie, VWAP e grafici
# - Calcola:
#     * media aritmetica
#     * VWAP
# - Esegue test di stazionarietà:
#     * ADF (Augmented Dickey-Fuller)
#     * KPSS
#   per entrambe le serie di prezzi (livello)
###############################################
rm(list=ls())                                                                   # Clear the environment
#setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
library(here)
setwd(here::here())
# Pacchetti necessari
if (!requireNamespace("quantmod", quietly = TRUE)) {
install.packages("quantmod")
}
if (!requireNamespace("dplyr", quietly = TRUE)) {
install.packages("dplyr")
}
if (!requireNamespace("ggplot2", quietly = TRUE)) {
install.packages("ggplot2")
}
if (!requireNamespace("tseries", quietly = TRUE)) {
install.packages("tseries")
}
library(quantmod)
library(dplyr)
library(ggplot2)
library(tseries)
## =========================================================
## 1. Imposta ticker, periodo di download e periodo di campione
## =========================================================
# >>>>>> SOSTITUISCI QUI I TICKER CORRETTI DA YAHOO FINANCE <<<<<<
# Esempio (da verificare manualmente su Yahoo Finance):
#   - Palingeo  : "PAL.MI"   (placeholder!)
#   - ICOP      : "ICOP.MI"  (placeholder!)
ticker_palingeo <- "PAL.MI"    # <-- controlla il ticker esatto
ticker_icop     <- "ICOP.MI"   # <-- controlla il ticker esatto
# Periodo che vuoi SCARICARE da Yahoo (può essere più ampio del campione)
download_start <- as.Date("2024-01-01")
download_end   <- Sys.Date()
sample_start <- as.Date("2025-05-13")
sample_end   <- as.Date("2025-06-13")
## =========================================================
## 2. Funzione helper per scaricare e pulire i dati
## =========================================================
get_clean_ohlc <- function(ticker, start_date, end_date) {
# Scarica dati da Yahoo
x <- suppressWarnings(
getSymbols(
ticker,
src = "yahoo",
from = start_date,
to   = end_date,
auto.assign = FALSE
)
)
# Converte in data.frame con colonne standard
df <- data.frame(
date   = index(x),
open   = as.numeric(x[, 1]),
high   = as.numeric(x[, 2]),
low    = as.numeric(x[, 3]),
close  = as.numeric(x[, 4]),
volume = as.numeric(x[, 5])
)
# Rimuove eventuali giorni senza volume o con NA
df <- df %>%
filter(!is.na(close), !is.na(volume), volume > 0)
return(df)
}
## =========================================================
## 3. Scarica i dati “grezzi” per l’intero periodo di download
## =========================================================
palingeo_raw <- get_clean_ohlc(ticker_palingeo, download_start, download_end)
icop_raw     <- get_clean_ohlc(ticker_icop,     download_start, download_end)
## =========================================================
## 4. Restringi i dati al campione desiderato
## =========================================================
palingeo_df <- palingeo_raw %>%
filter(date >= sample_start, date <= sample_end)
icop_df <- icop_raw %>%
filter(date >= sample_start, date <= sample_end)
if (nrow(palingeo_df) == 0) {
stop("ATTENZIONE: nessun dato per Palingeo nel campione scelto. Controlla sample_start / sample_end.")
}
if (nrow(icop_df) == 0) {
stop("ATTENZIONE: nessun dato per ICOP nel campione scelto. Controlla sample_start / sample_end.")
}
## =========================================================
## 5. Funzione per calcolare media aritmetica e VWAP
## =========================================================
calc_means <- function(df) {
# Media aritmetica semplice dei prezzi di chiusura
mean_price <- mean(df$close)
# VWAP = sum(P_t * V_t) / sum(V_t)
vwap <- sum(df$close * df$volume) / sum(df$volume)
list(mean_price = mean_price, vwap = vwap)
}
palingeo_stats <- calc_means(palingeo_df)
icop_stats     <- calc_means(icop_df)
#icop_stats_1     <- 12/19*icop_stats
cat("=== Campione:", format(sample_start), " - ", format(sample_end), "===\n\n")
cat("=== Palingeo (", ticker_palingeo, ") ===\n", sep = "")
cat("Media aritmetica (close): ", round(palingeo_stats$mean_price, 4), "\n", sep = "")
cat("VWAP                     : ", round(palingeo_stats$vwap, 4), "\n\n", sep = "")
cat("=== ICOP (", ticker_icop, ") ===\n", sep = "")
cat("Media aritmetica (close): ", round(icop_stats$mean_price, 4), "\n", sep = "")
cat("VWAP                     : ", round(icop_stats$vwap, 4), "\n\n", sep = "")
10.38*(12/19)
6.55/5.51
6/5.51
sample_start <- as.Date("2025-04-13")
sample_end   <- as.Date("2025-06-13")
palingeo_df <- palingeo_raw %>%
filter(date >= sample_start, date <= sample_end)
icop_df <- icop_raw %>%
filter(date >= sample_start, date <= sample_end)
if (nrow(palingeo_df) == 0) {
stop("ATTENZIONE: nessun dato per Palingeo nel campione scelto. Controlla sample_start / sample_end.")
}
if (nrow(icop_df) == 0) {
stop("ATTENZIONE: nessun dato per ICOP nel campione scelto. Controlla sample_start / sample_end.")
}
## =========================================================
## 5. Funzione per calcolare media aritmetica e VWAP
## =========================================================
calc_means <- function(df) {
# Media aritmetica semplice dei prezzi di chiusura
mean_price <- mean(df$close)
# VWAP = sum(P_t * V_t) / sum(V_t)
vwap <- sum(df$close * df$volume) / sum(df$volume)
list(mean_price = mean_price, vwap = vwap)
}
palingeo_stats <- calc_means(palingeo_df)
icop_stats     <- calc_means(icop_df)
#icop_stats_1     <- 12/19*icop_stats
cat("=== Campione:", format(sample_start), " - ", format(sample_end), "===\n\n")
cat("=== Palingeo (", ticker_palingeo, ") ===\n", sep = "")
cat("Media aritmetica (close): ", round(palingeo_stats$mean_price, 4), "\n", sep = "")
cat("VWAP                     : ", round(palingeo_stats$vwap, 4), "\n\n", sep = "")
cat("=== ICOP (", ticker_icop, ") ===\n", sep = "")
cat("Media aritmetica (close): ", round(icop_stats$mean_price, 4), "\n", sep = "")
cat("VWAP                     : ", round(icop_stats$vwap, 4), "\n\n", sep = "")
10.29*(12/19)
6,5/5.52
6.5/5.52
6/5.52
sample_start <- as.Date("2025-03-13")
sample_end   <- as.Date("2025-06-13")
palingeo_df <- palingeo_raw %>%
filter(date >= sample_start, date <= sample_end)
icop_df <- icop_raw %>%
filter(date >= sample_start, date <= sample_end)
if (nrow(palingeo_df) == 0) {
stop("ATTENZIONE: nessun dato per Palingeo nel campione scelto. Controlla sample_start / sample_end.")
}
if (nrow(icop_df) == 0) {
stop("ATTENZIONE: nessun dato per ICOP nel campione scelto. Controlla sample_start / sample_end.")
}
## =========================================================
## 5. Funzione per calcolare media aritmetica e VWAP
## =========================================================
calc_means <- function(df) {
# Media aritmetica semplice dei prezzi di chiusura
mean_price <- mean(df$close)
# VWAP = sum(P_t * V_t) / sum(V_t)
vwap <- sum(df$close * df$volume) / sum(df$volume)
list(mean_price = mean_price, vwap = vwap)
}
palingeo_stats <- calc_means(palingeo_df)
icop_stats     <- calc_means(icop_df)
#icop_stats_1     <- 12/19*icop_stats
cat("=== Campione:", format(sample_start), " - ", format(sample_end), "===\n\n")
cat("=== Palingeo (", ticker_palingeo, ") ===\n", sep = "")
cat("Media aritmetica (close): ", round(palingeo_stats$mean_price, 4), "\n", sep = "")
cat("VWAP                     : ", round(palingeo_stats$vwap, 4), "\n\n", sep = "")
cat("=== ICOP (", ticker_icop, ") ===\n", sep = "")
cat("Media aritmetica (close): ", round(icop_stats$mean_price, 4), "\n", sep = "")
cat("VWAP                     : ", round(icop_stats$vwap, 4), "\n\n", sep = "")
9.96*12/19
6.29/5.69
6/5.69
sample_start <- as.Date("2025-01-13")
sample_end   <- as.Date("2025-06-13")
palingeo_df <- palingeo_raw %>%
filter(date >= sample_start, date <= sample_end)
icop_df <- icop_raw %>%
filter(date >= sample_start, date <= sample_end)
if (nrow(palingeo_df) == 0) {
stop("ATTENZIONE: nessun dato per Palingeo nel campione scelto. Controlla sample_start / sample_end.")
}
if (nrow(icop_df) == 0) {
stop("ATTENZIONE: nessun dato per ICOP nel campione scelto. Controlla sample_start / sample_end.")
}
## =========================================================
## 5. Funzione per calcolare media aritmetica e VWAP
## =========================================================
calc_means <- function(df) {
# Media aritmetica semplice dei prezzi di chiusura
mean_price <- mean(df$close)
# VWAP = sum(P_t * V_t) / sum(V_t)
vwap <- sum(df$close * df$volume) / sum(df$volume)
list(mean_price = mean_price, vwap = vwap)
}
palingeo_stats <- calc_means(palingeo_df)
icop_stats     <- calc_means(icop_df)
#icop_stats_1     <- 12/19*icop_stats
cat("=== Campione:", format(sample_start), " - ", format(sample_end), "===\n\n")
cat("=== Palingeo (", ticker_palingeo, ") ===\n", sep = "")
cat("Media aritmetica (close): ", round(palingeo_stats$mean_price, 4), "\n", sep = "")
cat("VWAP                     : ", round(palingeo_stats$vwap, 4), "\n\n", sep = "")
cat("=== ICOP (", ticker_icop, ") ===\n", sep = "")
cat("Media aritmetica (close): ", round(icop_stats$mean_price, 4), "\n", sep = "")
cat("VWAP                     : ", round(icop_stats$vwap, 4), "\n\n", sep = "")
9.48*12/19
5.99/5.61
6/5.61
View(palingeo_raw)
###########################################################
# INTRODUCTION TO SPORT ANALYTICS (20630)
# Exam: June 13th 2025
# Allowed time: 70 minutes
# ###########################################################
# ---------------------------------------------------------
# Q1. Data Import and Preparation
# ---------------------------------------------------------
# 1.1 Clear the environment, set the working directory, import the data, install and load all relevant packages.
#setwd(path)
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
#clear the environment
rm(list=ls())
# install and load the relevant packages
# packages used
listofpackages <- c("dplyr","assertthat","bindrcpp","glue","pkgconfig","utf8","cli","ellipse","reshape2","ggplot2","dygraphs","aod")
for (j in listofpackages){
if(sum(installed.packages()[, 1] == j) == 0) {
install.packages(j)
}
library(j, character.only = T)
}
# Load dataset (replace with actual path if needed)
NBAdata <- read.csv("Teams_overall2026.csv")
NBAdata <- na.omit(NBAdata)
str(NBAdata)
unique_teams <- length(unique(NBAdata$Team))
unique_teams
