## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 4.0.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
##
## Attaching package: 'tsibble'
##
## The following object is masked from 'package:lubridate':
##
## interval
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, union
## Loading required package: fabletools
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.5 ──
## ✔ fma 2.5 ✔ expsmooth 2.3
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following object is masked from 'package:tsibble':
##
## index
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Loading required package: sandwich
##
## Attaching package: 'strucchange'
##
## The following object is masked from 'package:stringr':
##
## boundary
## Loading required package: Rcpp
## Loading required package: rlang
##
## Attaching package: 'rlang'
##
## The following objects are masked from 'package:purrr':
##
## %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
## flatten_raw, invoke, splice
library(zoo) # Used for Prophet date conversion
# 1. Load the data files (UPDATE THESE PATHS)
midwest <- read_excel("/Users/urielulloa/Desktop/Time_Series_Project/Midwest.xlsx", sheet = "Monthly") %>%
rename(Midwest = CMWRUR) # Rename the series for clarity
northeast <- read_excel("/Users/urielulloa/Desktop/Time_Series_Project/Northeast.xlsx", sheet = "Monthly") %>%
rename(Northeast = CNERUR)
south <- read_excel("/Users/urielulloa/Desktop/Time_Series_Project/South.xlsx", sheet = "Monthly") %>%
rename(South = CSOUUR)
west <- read_excel("/Users/urielulloa/Desktop/Time_Series_Project/West.xlsx", sheet = "Monthly") %>%
rename(West = CWSTUR)
# 2. Combine the datasets and convert to a tsibble (UNCHANGED)
unemployment_data <- midwest %>%
left_join(northeast, by = "observation_date") %>%
left_join(south, by = "observation_date") %>%
left_join(west, by = "observation_date") %>%
mutate(observation_date = ymd(observation_date)) %>%
as_tsibble(index = observation_date)
print(unemployment_data)## # A tsibble: 596 x 5 [1D]
## observation_date Midwest Northeast South West
## <date> <dbl> <dbl> <dbl> <dbl>
## 1 1976-01-01 6.8 9.6 6.9 8.6
## 2 1976-02-01 6.8 9.6 6.9 8.6
## 3 1976-03-01 6.8 9.6 6.9 8.6
## 4 1976-04-01 6.7 9.5 6.8 8.5
## 5 1976-05-01 6.6 9.5 6.7 8.4
## 6 1976-06-01 6.5 9.4 6.7 8.4
## 7 1976-07-01 6.5 9.4 6.7 8.5
## 8 1976-08-01 6.4 9.3 6.7 8.6
## 9 1976-09-01 6.4 9.3 6.7 8.7
## 10 1976-10-01 6.5 9.3 6.7 8.8
## # ℹ 586 more rows
# Long format is preferred for ggplot/tsibble plotting
unemployment_long <- unemployment_data %>%
pivot_longer(-observation_date, names_to = "Region", values_to = "Rate")
# Plot all four series on the same graph
unemployment_long %>%
autoplot(Rate) +
geom_smooth(method = "loess", se = FALSE, color = "black") +
labs(
title = "Unemployment Rate Trends by US Census Region (1976-Present)",
y = "Unemployment Rate (%)"
) +
facet_grid(Region ~ .) # Plot each region in its own panel## `geom_smooth()` using formula = 'y ~ x'
#### This chunk is for Exploratory Data Analysis (EDA). It converts the
data to a “long” format, making it easier to plot, and then generates a
visual chart showing the unemployment rate trend for all four regions
from 1976 to the present. This helps visually identify major peaks
(recessions) and structural changes. ## 3.
df_unemp <- unemployment_data
start_date <- c(1976, 1)
# Create full Time Series (ts) Objects (UNCHANGED)
midwest.ts <- ts(df_unemp$Midwest, start = start_date, frequency = 12)
northeast.ts <- ts(df_unemp$Northeast, start = start_date, frequency = 12)
south.ts <- ts(df_unemp$South, start = start_date, frequency = 12)
west.ts <- ts(df_unemp$West, start = start_date, frequency = 12)## 5.1 Formal Statistical Break Test (on Entire Series)
# Load the necessary library (already loaded in setup, but included for context)
library(strucchange)
# Test for breaks in the mean (level) of the series using the FULL TIME SERIES.
# h=0.15 is the minimum segment size.
bp.fit.midwest <- breakpoints(midwest.ts ~ 1, h = 0.15)
print("Midwest - Statistically Identified Break Dates (Entire Series):")## [1] "Midwest - Statistically Identified Break Dates (Entire Series):"
## [1] 1986.917 1994.333 2007.500 2014.917
bp.fit.northeast <- breakpoints(northeast.ts ~ 1, h = 0.15)
print("Northeast - Statistically Identified Break Dates (Entire Series):")## [1] "Northeast - Statistically Identified Break Dates (Entire Series):"
## [1] 1983.750 1991.167 1998.583 2008.250 2015.667
bp.fit.south <- breakpoints(south.ts ~ 1, h = 0.15)
print("South - Statistically Identified Break Dates (Entire Series):")## [1] "South - Statistically Identified Break Dates (Entire Series):"
## [1] 1987.333 1994.750 2008.333 2015.750
bp.fit.west <- breakpoints(west.ts ~ 1, h = 0.15)
print("West - Statistically Identified Break Dates (Entire Series):")## [1] "West - Statistically Identified Break Dates (Entire Series):"
## [1] 1985.583 1997.000 2008.250 2015.667
# 5.1.1 Visualization of Structural Breaks
# The plots now use the full time series objects.
plot(midwest.ts, main = "Midwest Rate with Detected Structural Breaks (Entire Series)")
lines(bp.fit.midwest, col = "red")plot(northeast.ts, main = "Northeast Rate with Detected Structural Breaks (Entire Series)")
lines(bp.fit.northeast, col = "red")plot(south.ts, main = "South Rate with Detected Structural Breaks (Entire Series)")
lines(bp.fit.south, col = "red")plot(west.ts, main = "West Rate with Detected Structural Breaks (Entire Series)")
lines(bp.fit.west, col = "red")
#### This uses the strucchange package to statistically identify points
in the historical data where the average unemployment rate level
significantly changed. The results help confirm the major structural
shifts (like recessions) that inform the three chosen forecast test
segments.
## 5. Time Series Data Preparation and Segmentation
# --- PRE-REQUISITES (Assuming data is loaded and merged into df_unemp) ---
df_unemp <- unemployment_data
start_date <- c(1976, 1) # Start date for all series
# Create full Time Series (ts) Objects (Unchanged)
midwest.ts <- ts(df_unemp$Midwest, start = start_date, frequency = 12)
northeast.ts <- ts(df_unemp$Northeast, start = start_date, frequency = 12)
south.ts <- ts(df_unemp$South, start = start_date, frequency = 12)
west.ts <- ts(df_unemp$West, start = start_date, frequency = 12)
# --- Define Fixed Breakpoints and Horizon ---
h_horizon <- 12 # 1 year test horizon (12 months)
# ====================================================
# SEGMENT 1: GREAT RECESSION (2008) TEST (Unchanged)
# Train: Start to 2007-12 | Test: 2008-01 to 2008-12
# ====================================================
recession_test_start <- c(2008, 1)
recession_train_end <- c(2007, 12)
# Training Sets S1
train.midwest.S1 <- window(midwest.ts, end = recession_train_end)
train.northeast.S1 <- window(northeast.ts, end = recession_train_end)
train.south.S1 <- window(south.ts, end = recession_train_end)
train.west.S1 <- window(west.ts, end = recession_train_end)
# Test Sets S1 (12 months)
test.midwest.S1 <- window(midwest.ts, start = recession_test_start, end = c(2008, 12))
test.northeast.S1 <- window(northeast.ts, start = recession_test_start, end = c(2008, 12))
test.south.S1 <- window(south.ts, start = recession_test_start, end = c(2008, 12))
test.west.S1 <- window(west.ts, start = recession_test_start, end = c(2008, 12))
# ====================================================
# SEGMENT 2: INTER-CRISIS STABILITY (2019) TEST (Unchanged)
# Train: Start to 2018-12 | Test: 2019-01 to 2019-12
# ====================================================
stability_test_start <- c(2019, 1)
stability_train_end <- c(2018, 12)
# Training Sets S2
train.midwest.S2 <- window(midwest.ts, end = stability_train_end)
train.northeast.S2 <- window(northeast.ts, end = stability_train_end)
train.south.S2 <- window(south.ts, end = stability_train_end)
train.west.S2 <- window(west.ts, end = stability_train_end)
# Test Sets S2 (12 months)
test.midwest.S2 <- window(midwest.ts, start = stability_test_start, end = c(2019, 12))
test.northeast.S2 <- window(northeast.ts, start = stability_test_start, end = c(2019, 12))
test.south.S2 <- window(south.ts, start = stability_test_start, end = c(2019, 12))
test.west.S2 <- window(west.ts, start = stability_test_start, end = c(2019, 12))
# ====================================================
# SEGMENT 3: COVID SHOCK & MODERN ERA TEST (Fixed Dates) ✅
# Train: 2020-01 to 2024-07 | Test: 2024-08 to 2025-08
# ====================================================
S3_train_start <- c(2020, 1)
S3_train_end <- c(2024, 7)
S3_test_start <- c(2024, 8)
S3_test_end <- c(2025, 7) # Adjusted to 12 months for h_horizon consistency (Aug 2024 to Jul 2025)
# Training Sets S3
train.midwest.S3 <- window(midwest.ts, start = S3_train_start, end = S3_train_end)
train.northeast.S3 <- window(northeast.ts, start = S3_train_start, end = S3_train_end)
train.south.S3 <- window(south.ts, start = S3_train_start, end = S3_train_end)
train.west.S3 <- window(west.ts, start = S3_train_start, end = S3_train_end)
# Test Sets S3 (12 months: Aug 2024 to Jul 2025)
test.midwest.S3 <- window(midwest.ts, start = S3_test_start, end = S3_test_end)
test.northeast.S3 <- window(northeast.ts, start = S3_test_start, end = S3_test_end)
test.south.S3 <- window(south.ts, start = S3_test_start, end = S3_test_end)
test.west.S3 <- window(west.ts, start = S3_test_start, end = S3_test_end)
# --- Verification ---
print(paste("S1 Midwest Test Length:", length(test.midwest.S1)))## [1] "S1 Midwest Test Length: 12"
## [1] "S2 Midwest Test Length: 12"
## [1] "S3 Midwest Test Length: 12"
print(paste("S3 Midwest Train Start:", start(train.midwest.S3)[1], "-", start(train.midwest.S3)[2]))## [1] "S3 Midwest Train Start: 2020 - 1"
## [1] "S3 Midwest Test Start: 2024 - 8"
## [1] "S3 Midwest Test End: 2025 - 7"
## 4.2. Stationarity and Autocorrelation Check (ndiffs)
# The ndiffs() function determines the number of non-seasonal differences required for stationarity.
# --- Segment 1 (S1) ndiffs (Pre-Recession Data) ---
print("--- Segment 1 (S1) ndiffs (d) ---")## [1] "--- Segment 1 (S1) ndiffs (d) ---"
d_order.midwest.S1 <- ndiffs(train.midwest.S1); print(paste("Midwest S1 ndiffs (d):", d_order.midwest.S1))## [1] "Midwest S1 ndiffs (d): 1"
d_order.northeast.S1 <- ndiffs(train.northeast.S1); print(paste("Northeast S1 ndiffs (d):", d_order.northeast.S1))## [1] "Northeast S1 ndiffs (d): 1"
## [1] "South S1 ndiffs (d): 1"
## [1] "West S1 ndiffs (d): 1"
# --- Segment 2 (S2) ndiffs (Inter-Crisis Stability Data) ---
print("--- Segment 2 (S2) ndiffs (d) ---")## [1] "--- Segment 2 (S2) ndiffs (d) ---"
d_order.midwest.S2 <- ndiffs(train.midwest.S2); print(paste("Midwest S2 ndiffs (d):", d_order.midwest.S2))## [1] "Midwest S2 ndiffs (d): 1"
d_order.northeast.S2 <- ndiffs(train.northeast.S2); print(paste("Northeast S2 ndiffs (d):", d_order.northeast.S2))## [1] "Northeast S2 ndiffs (d): 1"
## [1] "South S2 ndiffs (d): 1"
## [1] "West S2 ndiffs (d): 1"
## [1] "--- Segment 3 (S3) ndiffs (d) ---"
d_order.midwest.S3 <- ndiffs(train.midwest.S3); print(paste("Midwest S3 ndiffs (d):", d_order.midwest.S3))## [1] "Midwest S3 ndiffs (d): 1"
d_order.northeast.S3 <- ndiffs(train.northeast.S3); print(paste("Northeast S3 ndiffs (d):", d_order.northeast.S3))## [1] "Northeast S3 ndiffs (d): 1"
## [1] "South S3 ndiffs (d): 1"
## [1] "West S3 ndiffs (d): 1"
# Plot ACF/PACF of the DIFFERENCED series using the largest training set (S3)
# This helps confirm the p and q orders for ARIMA.
# Midwest
if (d_order.midwest.S3 > 0) {
ggAcf(diff(train.midwest.S3, lag = d_order.midwest.S3)) + ggtitle("ACF of Differenced Midwest Rate (S3)")
ggPacf(diff(train.midwest.S3, lag = d_order.midwest.S3)) + ggtitle("PACF of Differenced Midwest Rate (S3)")
}# Northeast
if (d_order.northeast.S3 > 0) {
ggAcf(diff(train.northeast.S3, lag = d_order.northeast.S3)) + ggtitle("ACF of Differenced Northeast Rate (S3)")
ggPacf(diff(train.northeast.S3, lag = d_order.northeast.S3)) + ggtitle("PACF of Differenced Northeast Rate (S3)")
}# South
if (d_order.south.S3 > 0) {
ggAcf(diff(train.south.S3, lag = d_order.south.S3)) + ggtitle("ACF of Differenced South Rate (S3)")
ggPacf(diff(train.south.S3, lag = d_order.south.S3)) + ggtitle("PACF of Differenced South Rate (S3)")
}# West
if (d_order.west.S3 > 0) {
ggAcf(diff(train.west.S3, lag = d_order.west.S3)) + ggtitle("ACF of Differenced West Rate (S3)")
ggPacf(diff(train.west.S3, lag = d_order.west.S3)) + ggtitle("PACF of Differenced West Rate (S3)")
}
#### These plots help identify the remaining structure in the data after
differencing. The ACF (Autocorrelation Function) determines the q
(Moving Average) order, and the PACF (Partial Autocorrelation Function)
determines the p (Autoregressive) order for the ARIMA model.
## 8 (Revised for COVID Impulse Train)
# 5.2 Create External Regressor (XREG) for COVID-19 Shock (2020-2021)
# Define the start time of the full unemployment series
start_time <- c(1976, 1)
# Create the full time index
full_time_index <- time(midwest.ts)
# --- Create the three impulse regressors (xreg) for the peak months ---
# April 2020 (The highest peak: 2020 + (4 - 1)/12)
impulse_apr <- ifelse(full_time_index == (2020 + 3/12), 1, 0)
# May 2020 (Second peak: 2020 + (5 - 1)/12)
impulse_may <- ifelse(full_time_index == (2020 + 4/12), 1, 0)
# June 2020 (Third peak: 2020 + (6 - 1)/12)
impulse_jun <- ifelse(full_time_index == (2020 + 5/12), 1, 0)
# Combine them into a matrix for the full series, renaming columns for clarity
covid_impulse_train <- cbind(
APR20_Impulse = ts(impulse_apr, start = start_time, frequency = 12),
MAY20_Impulse = ts(impulse_may, start = start_time, frequency = 12),
JUN20_Impulse = ts(impulse_jun, start = start_time, frequency = 12)
)
# Split the impulse matrix into training and test sets for all three segments:
# --- Segment 1 (Recession) ---
# Train: Start to 2007-12 (All Zeros)
xreg.train.S1 <- window(covid_impulse_train, end = c(2007, 12))
# Test: 2008-01 to 2008-12 (All Zeros)
xreg.test.S1 <- window(covid_impulse_train, start = c(2008, 1), end = c(2008, 12))
# --- Segment 2 (Inter-Crisis Stability) ---
# Train: Start to 2018-12 (All Zeros)
xreg.train.S2 <- window(covid_impulse_train, end = c(2018, 12))
# Test: 2019-01 to 2019-12 (All Zeros)
xreg.test.S2 <- window(covid_impulse_train, start = c(2019, 1), end = c(2019, 12))
# --- Segment 3 (COVID Shock) ---
# Train: Start to 2020-02 (All Zeros)
xreg.train.S3 <- window(covid_impulse_train, end = c(2020, 2))
# Test: 2020-03 to 2021-02 (Contains the three "1" spikes)
xreg.test.S3 <- window(covid_impulse_train, start = c(2020, 3), end = c(2021, 2))## 6. Naive Benchmark (3-Segment Evaluation)
h_horizon <- 12 # Fixed 12-month forecast horizon for all test sets
# ====================================================
# SEGMENT 1: GREAT RECESSION (2008) TEST
# ====================================================
print("--- Segment 1: Naive Model Fits (Recession Test) ---")## [1] "--- Segment 1: Naive Model Fits (Recession Test) ---"
# 1. FIT MODELS
naive.forecast.midwest.S1 <- naive(train.midwest.S1, h = h_horizon)
naive.forecast.northeast.S1 <- naive(train.northeast.S1, h = h_horizon)
naive.forecast.south.S1 <- naive(train.south.S1, h = h_horizon)
naive.forecast.west.S1 <- naive(train.west.S1, h = h_horizon)
# 2. CALCULATE ACCURACY
print("--- Segment 1: Naive Model Accuracy (Recession Test) ---")## [1] "--- Segment 1: Naive Model Accuracy (Recession Test) ---"
naive_accuracy_midwest.S1 <- accuracy(naive.forecast.midwest.S1, test.midwest.S1); print("Midwest S1:"); print(naive_accuracy_midwest.S1)## [1] "Midwest S1:"
## ME RMSE MAE MPE MAPE MASE
## Training set -0.003916449 0.1177465 0.0770235 -0.0800087 1.19385 0.1064366
## Test set 0.800000000 1.0778064 0.8000000 11.9522178 11.95222 1.1054978
## ACF1 Theil's U
## Training set 0.7718728 NA
## Test set 0.7124601 4.305121
naive_accuracy_northeast.S1 <- accuracy(naive.forecast.northeast.S1, test.northeast.S1); print("Northeast S1:"); print(naive_accuracy_northeast.S1)## [1] "Northeast S1:"
## ME RMSE MAE MPE MAPE MASE
## Training set -0.01305483 0.1001305 0.07101828 -0.2050718 1.154141 0.09482699
## Test set 0.80000000 1.0181683 0.80000000 13.7177558 13.717756 1.06819813
## ACF1 Theil's U
## Training set 0.7376841 NA
## Test set 0.7184874 4.887039
naive_accuracy_south.S1 <- accuracy(naive.forecast.south.S1, test.south.S1); print("South S1:"); print(naive_accuracy_south.S1)## [1] "South S1:"
## ME RMSE MAE MPE MAPE MASE
## Training set -0.006266319 0.09668122 0.0616188 -0.1236303 0.9953886 0.1006684
## Test set 1.041666667 1.32066902 1.0416667 17.1345901 17.1345901 1.7018006
## ACF1 Theil's U
## Training set 0.6269134 NA
## Test set 0.7169863 4.855518
naive_accuracy_west.S1 <- accuracy(naive.forecast.west.S1, test.west.S1); print("West S1:"); print(naive_accuracy_west.S1)## [1] "West S1:"
## ME RMSE MAE MPE MAPE MASE
## Training set -0.009399478 0.1091146 0.07519582 -0.1540029 1.111053 0.09880906
## Test set 1.300000000 1.6222412 1.30000000 18.8091368 18.809137 1.70823031
## ACF1 Theil's U
## Training set 0.7724146 NA
## Test set 0.7292035 5.227458
# ====================================================
# SEGMENT 2: INTER-CRISIS STABILITY (2019) TEST
# ====================================================
print("--- Segment 2: Naive Model Fits (Stability Test) ---")## [1] "--- Segment 2: Naive Model Fits (Stability Test) ---"
# 1. FIT MODELS
naive.forecast.midwest.S2 <- naive(train.midwest.S2, h = h_horizon)
naive.forecast.northeast.S2 <- naive(train.northeast.S2, h = h_horizon)
naive.forecast.south.S2 <- naive(train.south.S2, h = h_horizon)
naive.forecast.west.S2 <- naive(train.west.S2, h = h_horizon)
# 2. CALCULATE ACCURACY
print("--- Segment 2: Naive Model Accuracy (Stability Test) ---")## [1] "--- Segment 2: Naive Model Accuracy (Stability Test) ---"
naive_accuracy_midwest.S2 <- accuracy(naive.forecast.midwest.S2, test.midwest.S2); print("Midwest S2:"); print(naive_accuracy_midwest.S2)## [1] "Midwest S2:"
## ME RMSE MAE MPE MAPE MASE
## Training set -0.005825243 0.1241671 0.08038835 -0.1292829 1.232779 0.1010367
## Test set -0.208333333 0.2291288 0.20833333 -5.8737309 5.873731 0.2618454
## ACF1 Theil's U
## Training set 0.7917288 NA
## Test set 0.5337150 3.625293
naive_accuracy_northeast.S2 <- accuracy(naive.forecast.northeast.S2, test.northeast.S2); print("Northeast S2:"); print(naive_accuracy_northeast.S2)## [1] "Northeast S2:"
## ME RMSE MAE MPE MAPE MASE
## Training set -0.01126214 0.1042774 0.07262136 -0.1938483 1.173391 0.0941388
## Test set -0.12500000 0.1554563 0.12500000 -3.4670385 3.467038 0.1620370
## ACF1 Theil's U
## Training set 0.7506402 NA
## Test set 0.5548780 2.219387
naive_accuracy_south.S2 <- accuracy(naive.forecast.south.S2, test.south.S2); print("South S2:"); print(naive_accuracy_south.S2)## [1] "South S2:"
## ME RMSE MAE MPE MAPE MASE
## Training set -0.006019417 0.1060317 0.06737864 -0.1297915 1.076094 0.09552415
## Test set -0.283333333 0.3000000 0.28333333 -8.1403042 8.140304 0.40168776
## ACF1 Theil's U
## Training set 0.6950385 NA
## Test set 0.6690476 6.163555
naive_accuracy_west.S2 <- accuracy(naive.forecast.west.S2, test.west.S2); print("West S2:"); print(naive_accuracy_west.S2)## [1] "West S2:"
## ME RMSE MAE MPE MAPE MASE
## Training set -0.008543689 0.1205167 0.08271845 -0.1535701 1.194121 0.09419362
## Test set -0.266666667 0.3055050 0.26666667 -6.9289605 6.928961 0.30366019
## ACF1 Theil's U
## Training set 0.8078481 NA
## Test set 0.7708333 4.908073
# ====================================================
# SEGMENT 3: COVID SHOCK (2020) TEST
# ====================================================
print("--- Segment 3: Naive Model Fits (COVID Test) ---")## [1] "--- Segment 3: Naive Model Fits (COVID Test) ---"
# 1. FIT MODELS
naive.forecast.midwest.S3 <- naive(train.midwest.S3, h = h_horizon)
naive.forecast.northeast.S3 <- naive(train.northeast.S3, h = h_horizon)
naive.forecast.south.S3 <- naive(train.south.S3, h = h_horizon)
naive.forecast.west.S3 <- naive(train.west.S3, h = h_horizon)
# 2. CALCULATE ACCURACY
print("--- Segment 3: Naive Model Accuracy (COVID Test) ---")## [1] "--- Segment 3: Naive Model Accuracy (COVID Test) ---"
naive_accuracy_midwest.S3 <- accuracy(naive.forecast.midwest.S3, test.midwest.S3); print("Midwest S3:"); print(naive_accuracy_midwest.S3)## [1] "Midwest S3:"
## ME RMSE MAE MPE MAPE MASE
## Training set 0.009259259 1.65445526 0.46111111 -1.067150 5.078136 0.30410702
## Test set 0.058333333 0.07637626 0.05833333 1.388889 1.388889 0.03847137
## ACF1 Theil's U
## Training set -0.08013098 NA
## Test set 0.74047619 2.59185
naive_accuracy_northeast.S3 <- accuracy(naive.forecast.northeast.S3, test.northeast.S3); print("Northeast S3:"); print(naive_accuracy_northeast.S3)## [1] "Northeast S3:"
## ME RMSE MAE MPE MAPE MASE
## Training set 0.001851852 1.5618721 0.4388889 -1.108949 4.640912 0.21592932
## Test set 0.125000000 0.1384437 0.1250000 3.010066 3.010066 0.06149886
## ACF1 Theil's U
## Training set 0.03324985 NA
## Test set 0.51470588 3.31141
naive_accuracy_south.S3 <- accuracy(naive.forecast.south.S3, test.south.S3); print("South S3:"); print(naive_accuracy_south.S3)## [1] "South S3:"
## ME RMSE MAE MPE MAPE MASE
## Training set 0.003703704 1.271336 0.3703704 -0.8875514 4.650857 0.26150946
## Test set 0.100000000 0.100000 0.1000000 2.6315789 2.631579 0.07060755
## ACF1 Theil's U
## Training set -0.01490336 NA
## Test set NaN Inf
naive_accuracy_west.S3 <- accuracy(naive.forecast.west.S3, test.west.S3); print("West S3:"); print(naive_accuracy_west.S3)## [1] "West S3:"
## ME RMSE MAE MPE MAPE MASE
## Training set 0.01296296 1.4904014 0.4462963 -0.6795274 4.813203 0.21982521
## Test set 0.10833333 0.1118034 0.1083333 2.2498583 2.249858 0.05336006
## ACF1 Theil's U
## Training set 0.051208994 NA
## Test set -0.007575758 3.741657
## 7.1. Exponential Smoothing (ETS) Model (3-Segment Evaluation)
h_horizon <- 12 # Fixed 12-month forecast horizon for all test sets
# We use model="AAN" (Additive Error, Additive Trend, No Seasonality) as specified,
# relying on ETS to select the optimal parameters (alpha, beta, phi) within that structure.
# ====================================================
# 1. SEGMENT 1: GREAT RECESSION (2008) TEST
# ====================================================
print("--- Segment 1: ETS Model Fits ---")## [1] "--- Segment 1: ETS Model Fits ---"
# --- Fit Models ---
ets.fit.midwest.S1 <- ets(train.midwest.S1, model = "AAN")
ets.fit.northeast.S1 <- ets(train.northeast.S1, model = "AAN")
ets.fit.south.S1 <- ets(train.south.S1, model = "AAN")
ets.fit.west.S1 <- ets(train.west.S1, model = "AAN")
# --- Generate Forecasts ---
ets.forecast.midwest.S1 <- forecast(ets.fit.midwest.S1, h = h_horizon)
ets.forecast.northeast.S1 <- forecast(ets.fit.northeast.S1, h = h_horizon)
ets.forecast.south.S1 <- forecast(ets.fit.south.S1, h = h_horizon)
ets.forecast.west.S1 <- forecast(ets.fit.west.S1, h = h_horizon)
# ====================================================
# 2. SEGMENT 2: INTER-CRISIS STABILITY (2019) TEST
# ====================================================
print("--- Segment 2: ETS Model Fits ---")## [1] "--- Segment 2: ETS Model Fits ---"
# --- Fit Models ---
ets.fit.midwest.S2 <- ets(train.midwest.S2, model = "AAN")
ets.fit.northeast.S2 <- ets(train.northeast.S2, model = "AAN")
ets.fit.south.S2 <- ets(train.south.S2, model = "AAN")
ets.fit.west.S2 <- ets(train.west.S2, model = "AAN")
# --- Generate Forecasts ---
ets.forecast.midwest.S2 <- forecast(ets.fit.midwest.S2, h = h_horizon)
ets.forecast.northeast.S2 <- forecast(ets.fit.northeast.S2, h = h_horizon)
ets.forecast.south.S2 <- forecast(ets.fit.south.S2, h = h_horizon)
ets.forecast.west.S2 <- forecast(ets.fit.west.S2, h = h_horizon)
# ====================================================
# 3. SEGMENT 3: COVID SHOCK (2020) TEST
# ====================================================
print("--- Segment 3: ETS Model Fits ---")## [1] "--- Segment 3: ETS Model Fits ---"
# --- Fit Models ---
ets.fit.midwest.S3 <- ets(train.midwest.S3, model = "AAN")
ets.fit.northeast.S3 <- ets(train.northeast.S3, model = "AAN")
ets.fit.south.S3 <- ets(train.south.S3, model = "AAN")
ets.fit.west.S3 <- ets(train.west.S3, model = "AAN")
# --- Generate Forecasts ---
ets.forecast.midwest.S3 <- forecast(ets.fit.midwest.S3, h = h_horizon)
ets.forecast.northeast.S3 <- forecast(ets.fit.northeast.S3, h = h_horizon)
ets.forecast.south.S3 <- forecast(ets.fit.south.S3, h = h_horizon)
ets.forecast.west.S3 <- forecast(ets.fit.west.S3, h = h_horizon)
# --- Diagnostics (Check Residuals for S3, the most comprehensive fit) ---
print("--- ETS Residual Diagnostics (Using S3 Fits) ---")## [1] "--- ETS Residual Diagnostics (Using S3 Fits) ---"
##
## Ljung-Box test
##
## data: Residuals from ETS(A,Ad,N)
## Q* = 2.7275, df = 11, p-value = 0.9939
##
## Model df: 0. Total lags used: 11
##
## Ljung-Box test
##
## data: Residuals from ETS(A,Ad,N)
## Q* = 2.5184, df = 11, p-value = 0.9957
##
## Model df: 0. Total lags used: 11
##
## Ljung-Box test
##
## data: Residuals from ETS(A,A,N)
## Q* = 2.1431, df = 11, p-value = 0.9979
##
## Model df: 0. Total lags used: 11
##
## Ljung-Box test
##
## data: Residuals from ETS(A,A,N)
## Q* = 2.7226, df = 11, p-value = 0.9939
##
## Model df: 0. Total lags used: 11
## 7.2. ARIMA Model Implementation (FINAL STABLE VERSION)
h_horizon <- 12 # Fixed 12-month forecast horizon for all test sets
# ====================================================
# STEP 1: Find Optimal Orders (p, d, q)
# ====================================================
print("--- Step 1: Finding Optimal ARIMA Orders (p, d, q) ---")## [1] "--- Step 1: Finding Optimal ARIMA Orders (p, d, q) ---"
# --- Segment 1 Orders (No XREG) ---
best_order_midwest.S1 <- auto.arima(train.midwest.S1, seasonal = FALSE, max.d = 1)$arma[c(1, 6, 2)]
best_order_northeast.S1 <- auto.arima(train.northeast.S1, seasonal = FALSE, max.d = 1)$arma[c(1, 6, 2)]
best_order_south.S1 <- auto.arima(train.south.S1, seasonal = FALSE, max.d = 1)$arma[c(1, 6, 2)]
best_order_west.S1 <- auto.arima(train.west.S1, seasonal = FALSE, max.d = 1)$arma[c(1, 6, 2)]
# --- Segment 2 Orders (No XREG) ---
best_order_midwest.S2 <- auto.arima(train.midwest.S2, seasonal = FALSE, max.d = 1)$arma[c(1, 6, 2)]
best_order_northeast.S2 <- auto.arima(train.northeast.S2, seasonal = FALSE, max.d = 1)$arma[c(1, 6, 2)]
best_order_south.S2 <- auto.arima(train.south.S2, seasonal = FALSE, max.d = 1)$arma[c(1, 6, 2)]
best_order_west.S2 <- auto.arima(train.west.S2, seasonal = FALSE, max.d = 1)$arma[c(1, 6, 2)]
# --- Segment 3 Orders (No XREG - Must be clean) ---
best_order_midwest.S3 <- auto.arima(train.midwest.S3, seasonal = FALSE, max.d = 1)$arma[c(1, 6, 2)]
best_order_northeast.S3 <- auto.arima(train.northeast.S3, seasonal = FALSE, max.d = 1)$arma[c(1, 6, 2)]
best_order_south.S3 <- auto.arima(train.south.S3, seasonal = FALSE, max.d = 1)$arma[c(1, 6, 2)]
best_order_west.S3 <- auto.arima(train.west.S3, seasonal = FALSE, max.d = 1)$arma[c(1, 6, 2)]
# ====================================================
# STEP 2 & 3: Fit and Forecast
# ====================================================
# --- SEGMENT 1 (Recession) ---
print("--- Segment 1: Fitting and Forecasting (Recession Test) ---")## [1] "--- Segment 1: Fitting and Forecasting (Recession Test) ---"
# Midwest S1
arima.fit.midwest.S1 <- Arima(train.midwest.S1, order = best_order_midwest.S1, seasonal = list(order = c(0, 0, 0), period = 12), method = "CSS", include.mean = FALSE)
arima.forecast.midwest.S1 <- forecast(arima.fit.midwest.S1, xreg = xreg.test.S1, h = h_horizon)## Warning in forecast.forecast_ARIMA(arima.fit.midwest.S1, xreg = xreg.test.S1, :
## xreg not required by this model, ignoring the provided regressors
# Northeast S1
arima.fit.northeast.S1 <- Arima(train.northeast.S1, order = best_order_northeast.S1, seasonal = list(order = c(0, 0, 0), period = 12), method = "CSS", include.mean = FALSE)
arima.forecast.northeast.S1 <- forecast(arima.fit.northeast.S1, xreg = xreg.test.S1, h = h_horizon)## Warning in forecast.forecast_ARIMA(arima.fit.northeast.S1, xreg = xreg.test.S1,
## : xreg not required by this model, ignoring the provided regressors
# South S1
arima.fit.south.S1 <- Arima(train.south.S1, order = best_order_south.S1, seasonal = list(order = c(0, 0, 0), period = 12), method = "CSS", include.mean = FALSE)
arima.forecast.south.S1 <- forecast(arima.fit.south.S1, xreg = xreg.test.S1, h = h_horizon)## Warning in forecast.forecast_ARIMA(arima.fit.south.S1, xreg = xreg.test.S1, :
## xreg not required by this model, ignoring the provided regressors
# West S1
arima.fit.west.S1 <- Arima(train.west.S1, order = best_order_west.S1, seasonal = list(order = c(0, 0, 0), period = 12), method = "CSS", include.mean = FALSE)
arima.forecast.west.S1 <- forecast(arima.fit.west.S1, xreg = xreg.test.S1, h = h_horizon)## Warning in forecast.forecast_ARIMA(arima.fit.west.S1, xreg = xreg.test.S1, :
## xreg not required by this model, ignoring the provided regressors
# --- SEGMENT 2 (Stability) ---
print("--- Segment 2: Fitting and Forecasting (Stability Test) ---")## [1] "--- Segment 2: Fitting and Forecasting (Stability Test) ---"
# Midwest S2
arima.fit.midwest.S2 <- Arima(train.midwest.S2, order = best_order_midwest.S2, seasonal = list(order = c(0, 0, 0), period = 12), method = "CSS", include.mean = FALSE)
arima.forecast.midwest.S2 <- forecast(arima.fit.midwest.S2, xreg = xreg.test.S2, h = h_horizon)## Warning in forecast.forecast_ARIMA(arima.fit.midwest.S2, xreg = xreg.test.S2, :
## xreg not required by this model, ignoring the provided regressors
# Northeast S2
arima.fit.northeast.S2 <- Arima(train.northeast.S2, order = best_order_northeast.S2, seasonal = list(order = c(0, 0, 0), period = 12), method = "CSS", include.mean = FALSE)
arima.forecast.northeast.S2 <- forecast(arima.fit.northeast.S2, xreg = xreg.test.S2, h = h_horizon)## Warning in forecast.forecast_ARIMA(arima.fit.northeast.S2, xreg = xreg.test.S2,
## : xreg not required by this model, ignoring the provided regressors
# South S2
arima.fit.south.S2 <- Arima(train.south.S2, order = best_order_south.S2, seasonal = list(order = c(0, 0, 0), period = 12), method = "CSS", include.mean = FALSE)
arima.forecast.south.S2 <- forecast(arima.fit.south.S2, xreg = xreg.test.S2, h = h_horizon)## Warning in forecast.forecast_ARIMA(arima.fit.south.S2, xreg = xreg.test.S2, :
## xreg not required by this model, ignoring the provided regressors
# West S2
arima.fit.west.S2 <- Arima(train.west.S2, order = best_order_west.S2, seasonal = list(order = c(0, 0, 0), period = 12), method = "CSS", include.mean = FALSE)
arima.forecast.west.S2 <- forecast(arima.fit.west.S2, xreg = xreg.test.S2, h = h_horizon)## Warning in forecast.forecast_ARIMA(arima.fit.west.S2, xreg = xreg.test.S2, :
## xreg not required by this model, ignoring the provided regressors
# --- SEGMENT 3 (COVID Shock) ---
print("--- Segment 3: Fitting and Forecasting (COVID Shock Test) ---")## [1] "--- Segment 3: Fitting and Forecasting (COVID Shock Test) ---"
# *** CRITICAL FIX: The XREG term is REMOVED from the Arima() fitting call,
# but KEPT in the forecast() call to avoid the optim() crash. ***
# Midwest S3
arima.fit.midwest.S3 <- Arima(train.midwest.S3, order = best_order_midwest.S3,
seasonal = list(order = c(0, 0, 0), period = 12), method = "CSS", include.mean = FALSE)
arima.forecast.midwest.S3 <- forecast(arima.fit.midwest.S3, xreg = xreg.test.S3, h = h_horizon)## Warning in forecast.forecast_ARIMA(arima.fit.midwest.S3, xreg = xreg.test.S3, :
## xreg not required by this model, ignoring the provided regressors
# Northeast S3
arima.fit.northeast.S3 <- Arima(train.northeast.S3, order = best_order_northeast.S3,
seasonal = list(order = c(0, 0, 0), period = 12), method = "CSS", include.mean = FALSE)
arima.forecast.northeast.S3 <- forecast(arima.fit.northeast.S3, xreg = xreg.test.S3, h = h_horizon)## Warning in forecast.forecast_ARIMA(arima.fit.northeast.S3, xreg = xreg.test.S3,
## : xreg not required by this model, ignoring the provided regressors
# South S3
arima.fit.south.S3 <- Arima(train.south.S3, order = best_order_south.S3,
seasonal = list(order = c(0, 0, 0), period = 12), method = "CSS", include.mean = FALSE)
arima.forecast.south.S3 <- forecast(arima.fit.south.S3, xreg = xreg.test.S3, h = h_horizon)## Warning in forecast.forecast_ARIMA(arima.fit.south.S3, xreg = xreg.test.S3, :
## xreg not required by this model, ignoring the provided regressors
# West S3
arima.fit.west.S3 <- Arima(train.west.S3, order = best_order_west.S3,
seasonal = list(order = c(0, 0, 0), period = 12), method = "CSS", include.mean = FALSE)
arima.forecast.west.S3 <- forecast(arima.fit.west.S3, xreg = xreg.test.S3, h = h_horizon)## Warning in forecast.forecast_ARIMA(arima.fit.west.S3, xreg = xreg.test.S3, :
## xreg not required by this model, ignoring the provided regressors
# --- Diagnostics (Check the largest S3 fit) ---
print("--- ARIMA Residual Diagnostics (Using S3 Fits) ---")## [1] "--- ARIMA Residual Diagnostics (Using S3 Fits) ---"
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,0)
## Q* = 2.0381, df = 11, p-value = 0.9984
##
## Model df: 0. Total lags used: 11
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,0)
## Q* = 2.1383, df = 11, p-value = 0.9979
##
## Model df: 0. Total lags used: 11
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,0)
## Q* = 1.9997, df = 11, p-value = 0.9985
##
## Model df: 0. Total lags used: 11
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,0)
## Q* = 2.1022, df = 11, p-value = 0.9981
##
## Model df: 0. Total lags used: 11
## 9.2. NNETAR Model Implementation (3-Segment Evaluation)
# Non-linear Time Series Forecasting using Neural Networks
h_horizon <- 12
# The model uses the default settings for nodes (hidden layer size),
# but sets the maximum non-seasonal lag (max.p) to 12.
# Scaling inputs is generally recommended for neural networks.
# ====================================================
# --- SEGMENT 1 (Recession) ---
# ====================================================
print("--- Segment 1: NNETAR Fits and Forecasts (Recession Test) ---")## [1] "--- Segment 1: NNETAR Fits and Forecasts (Recession Test) ---"
# Midwest S1
nnetar.fit.midwest.S1 <- nnetar(train.midwest.S1, max.p = 12, max.P = 0, scale.inputs = TRUE)
nnetar.forecast.midwest.S1 <- forecast(nnetar.fit.midwest.S1, h = h_horizon, PI = TRUE, npaths = 100)
# Northeast S1
nnetar.fit.northeast.S1 <- nnetar(train.northeast.S1, max.p = 12, max.P = 0, scale.inputs = TRUE)
nnetar.forecast.northeast.S1 <- forecast(nnetar.fit.northeast.S1, h = h_horizon, PI = TRUE, npaths = 100)
# South S1
nnetar.fit.south.S1 <- nnetar(train.south.S1, max.p = 12, max.P = 0, scale.inputs = TRUE)
nnetar.forecast.south.S1 <- forecast(nnetar.fit.south.S1, h = h_horizon, PI = TRUE, npaths = 100)
# West S1
nnetar.fit.west.S1 <- nnetar(train.west.S1, max.p = 12, max.P = 0, scale.inputs = TRUE)
nnetar.forecast.west.S1 <- forecast(nnetar.fit.west.S1, h = h_horizon, PI = TRUE, npaths = 100)
# ====================================================
# --- SEGMENT 2 (Stability) ---
# ====================================================
print("--- Segment 2: NNETAR Fits and Forecasts (Stability Test) ---")## [1] "--- Segment 2: NNETAR Fits and Forecasts (Stability Test) ---"
# Midwest S2
nnetar.fit.midwest.S2 <- nnetar(train.midwest.S2, max.p = 12, max.P = 0, scale.inputs = TRUE)
nnetar.forecast.midwest.S2 <- forecast(nnetar.fit.midwest.S2, h = h_horizon, PI = TRUE, npaths = 100)
# Northeast S2
nnetar.fit.northeast.S2 <- nnetar(train.northeast.S2, max.p = 12, max.P = 0, scale.inputs = TRUE)
nnetar.forecast.northeast.S2 <- forecast(nnetar.fit.northeast.S2, h = h_horizon, PI = TRUE, npaths = 100)
# South S2
nnetar.fit.south.S2 <- nnetar(train.south.S2, max.p = 12, max.P = 0, scale.inputs = TRUE)
nnetar.forecast.south.S2 <- forecast(nnetar.fit.south.S2, h = h_horizon, PI = TRUE, npaths = 100)
# West S2
nnetar.fit.west.S2 <- nnetar(train.west.S2, max.p = 12, max.P = 0, scale.inputs = TRUE)
nnetar.forecast.west.S2 <- forecast(nnetar.fit.west.S2, h = h_horizon, PI = TRUE, npaths = 100)
# ====================================================
# --- SEGMENT 3 (COVID Shock) ---
# ====================================================
print("--- Segment 3: NNETAR Fits and Forecasts (COVID Test) ---")## [1] "--- Segment 3: NNETAR Fits and Forecasts (COVID Test) ---"
# Midwest S3
nnetar.fit.midwest.S3 <- nnetar(train.midwest.S3, max.p = 12, max.P = 0, scale.inputs = TRUE)
nnetar.forecast.midwest.S3 <- forecast(nnetar.fit.midwest.S3, h = h_horizon, PI = TRUE, npaths = 100)
# Northeast S3
nnetar.fit.northeast.S3 <- nnetar(train.northeast.S3, max.p = 12, max.P = 0, scale.inputs = TRUE)
nnetar.forecast.northeast.S3 <- forecast(nnetar.fit.northeast.S3, h = h_horizon, PI = TRUE, npaths = 100)
# South S3
nnetar.fit.south.S3 <- nnetar(train.south.S3, max.p = 12, max.P = 0, scale.inputs = TRUE)
nnetar.forecast.south.S3 <- forecast(nnetar.fit.south.S3, h = h_horizon, PI = TRUE, npaths = 100)
# West S3
nnetar.fit.west.S3 <- nnetar(train.west.S3, max.p = 12, max.P = 0, scale.inputs = TRUE)
nnetar.forecast.west.S3 <- forecast(nnetar.fit.west.S3, h = h_horizon, PI = TRUE, npaths = 100)
# ====================================================
# --- Accuracy (Check all S3 Test Sets) ---
# ====================================================
print("--- NNETAR Model Accuracy (Segment 3 Test Set) ---")## [1] "--- NNETAR Model Accuracy (Segment 3 Test Set) ---"
## [1] "Midwest - NNETAR Model Accuracy (S3 Test Set):"
## ME RMSE MAE MPE MAPE
## Training set -0.0006607645 0.05001029 0.03923287 -0.04186954 1.060797
## Test set -0.2929608942 0.32341043 0.29296089 -7.01509840 7.015098
## MASE ACF1 Theil's U
## Training set 0.02587444 0.0547709 NA
## Test set 0.19321041 0.7252745 10.96999
## [1] "Northeast - NNETAR Model Accuracy (S3 Test Set):"
## ME RMSE MAE MPE MAPE
## Training set 5.860714e-06 0.05851895 0.04197701 -0.01939956 0.9863066
## Test set 1.815658e-01 0.19921828 0.18156584 4.37414997 4.3741500
## MASE ACF1 Theil's U
## Training set 0.02065230 -0.02865256 NA
## Test set 0.08932873 0.59343195 4.764015
## [1] "South - NNETAR Model Accuracy (S3 Test Set):"
## ME RMSE MAE MPE MAPE MASE
## Training set 2.477209e-05 0.05043311 0.03804721 -0.01917205 1.032201 0.02686420
## Test set 8.100877e-02 0.08121271 0.08100877 2.13180966 2.131810 0.05719831
## ACF1 Theil's U
## Training set -0.1094276 NA
## Test set 0.5960063 Inf
## [1] "West - NNETAR Model Accuracy (S3 Test Set):"
## ME RMSE MAE MPE MAPE MASE
## Training set 0.0001591521 0.06171431 0.05102366 -0.01534804 1.100611 0.02513193
## Test set 0.1768727209 0.19189117 0.17687272 3.67168226 3.671682 0.08711944
## ACF1 Theil's U
## Training set -0.1608701 NA
## Test set 0.5289102 6.576474
# Displays the error statistics for the in-sample fit of the Naive model.
summary(naive.forecast.midwest.S3)##
## Forecast method: Naive method
##
## Model Information:
## Call: naive(y = train.midwest.S3, h = h_horizon)
##
## Residual sd: 1.6545
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.009259259 1.654455 0.4611111 -1.06715 5.078136 0.304107
## ACF1
## Training set -0.08013098
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Aug 2024 4.1 1.9797303 6.220270 0.8573273 7.342673
## Sep 2024 4.1 1.1014858 7.098514 -0.4858318 8.685832
## Oct 2024 4.1 0.4275851 7.772415 -1.5164739 9.716474
## Nov 2024 4.1 -0.1405395 8.340539 -2.3853455 10.585345
## Dec 2024 4.1 -0.6410673 8.841067 -3.1508367 11.350837
## Jan 2025 4.1 -1.0935790 9.293579 -3.8428936 12.042894
## Feb 2025 4.1 -1.5097064 9.709706 -4.4793056 12.679306
## Mar 2025 4.1 -1.8970284 10.097028 -5.0716635 13.271664
## Apr 2025 4.1 -2.2608092 10.460809 -5.6280182 13.828018
## May 2025 4.1 -2.6048816 10.804882 -6.1542315 14.354232
## Jun 2025 4.1 -2.9321392 11.132139 -6.6547288 14.854729
## Jul 2025 4.1 -3.2448298 11.444830 -7.1329478 15.332948
# Displays the chosen orders, estimated coefficients (if any),
# and key fit statistics (AICc, BIC).
summary(arima.fit.midwest.S3)## Series: train.midwest.S3
## ARIMA(0,1,0)
##
## sigma^2 = 2.737: log likelihood = -103.81
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.009090909 1.639346 0.4527273 -1.047747 4.985806 0.2985778
## ACF1
## Training set -0.08013046
# Displays the chosen model (e.g., ETS(A,Ad,N)),
# the smoothing parameters, and fit statistics.
summary(ets.fit.midwest.S3)## ETS(A,Ad,N)
##
## Call:
## ets(y = train.midwest.S3, model = "AAN")
##
## Smoothing parameters:
## alpha = 0.7852
## beta = 1e-04
## phi = 0.8164
##
## Initial states:
## l = 1.8199
## b = 1.4712
##
## sigma: 1.6699
##
## AIC AICc BIC
## 283.5654 285.3154 295.6094
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.09910824 1.592189 0.546623 -3.041817 7.218987 0.3605029
## ACF1
## Training set 0.04328042
# Displays the network structure (input, hidden layer nodes)
# and the total number of weights (parameters) fitted.
summary(nnetar.fit.midwest.S3)## Length Class Mode
## x 55 ts numeric
## m 1 -none- numeric
## p 1 -none- numeric
## P 1 -none- numeric
## scalex 2 -none- list
## size 1 -none- numeric
## subset 55 -none- numeric
## model 20 nnetarmodels list
## nnetargs 2 -none- list
## fitted 55 ts numeric
## residuals 55 ts numeric
## lags 2 -none- numeric
## series 1 -none- character
## method 1 -none- character
## call 5 -none- call
## 10. Accuracy Comparison: ETS vs. ARIMA (3-Segment Evaluation)
# ====================================================
# SEGMENT 1: GREAT RECESSION (2008) TEST
# ====================================================
print("=========================================================")## [1] "========================================================="
## [1] "### Segment 1 Accuracy (Recession Test Set: 2008-01 to 2008-12) ###"
## [1] "========================================================="
## [1] "Midwest - Accuracy Comparison on Test Set S1:"
## ME RMSE MAE MPE MAPE
## Training set -0.0009039369 0.07013556 0.05597957 2.030623e-04 0.9662719
## Test set 0.8113580139 1.08843084 0.81135801 1.213455e+01 12.1345525
## MASE ACF1 Theil's U
## Training set 0.07735661 0.04464943 NA
## Test set 1.12119310 0.71274876 4.350053
## ME RMSE MAE MPE MAPE
## Training set -0.0005093873 0.06632238 0.05311237 0.00521059 0.9292757
## Test set 0.8075187887 1.07920880 0.80751879 12.08973292 12.0897329
## MASE ACF1 Theil's U
## Training set 0.0733945 -0.0007383558 NA
## Test set 1.1158878 0.7111472782 4.311862
## [1] "Northeast - Accuracy Comparison on Test Set S1:"
## ME RMSE MAE MPE MAPE
## Training set -0.002098609 0.06179039 0.04974512 -0.01202204 0.8443459
## Test set 0.804016723 1.02403158 0.80401672 13.78376077 13.7837608
## MASE ACF1 Theil's U
## Training set 0.06642205 0.01852489 NA
## Test set 1.07356145 0.71886408 4.915921
## ME RMSE MAE MPE MAPE MASE
## Training set -0.002558949 0.0597465 0.04810515 -0.02575173 0.8250436 0.0642323
## Test set 0.847899216 1.0672669 0.84789922 14.57663084 14.5766308 1.1321554
## ACF1 Theil's U
## Training set -0.001528442 NA
## Test set 0.720545637 5.137153
## [1] "South - Accuracy Comparison on Test Set S1:"
## ME RMSE MAE MPE MAPE
## Training set -0.001355497 0.06757787 0.05071912 -0.01042702 0.8660614
## Test set 1.038671035 1.31960429 1.03867103 17.07380295 17.0738030
## MASE ACF1 Theil's U
## Training set 0.08286128 -0.005281083 NA
## Test set 1.69690657 0.717130870 4.850521
## ME RMSE MAE MPE MAPE
## Training set -0.001537181 0.06710737 0.05039632 -0.0166593 0.8599961
## Test set 1.064806781 1.34538089 1.06480678 17.5323914 17.5323914
## MASE ACF1 Theil's U
## Training set 0.08233392 -0.001002279 NA
## Test set 1.73960528 0.717555976 4.952515
## [1] "West - Accuracy Comparison on Test Set S1:"
## ME RMSE MAE MPE MAPE
## Training set -0.001601481 0.0642681 0.05253752 -0.006881176 0.8359745
## Test set 1.215060299 1.5402142 1.21506030 17.489542374 17.4895424
## MASE ACF1 Theil's U
## Training set 0.06903553 0.00193163 NA
## Test set 1.59661756 0.72832652 4.94479
## ME RMSE MAE MPE MAPE
## Training set -0.001378063 0.0624998 0.05088764 -0.007631588 0.8153877
## Test set 1.306946884 1.6383930 1.30694688 18.877536461 18.8775365
## MASE ACF1 Theil's U
## Training set 0.06686754 -0.004752455 NA
## Test set 1.71735868 0.729993402 5.279204
## [1] "---"
# ====================================================
# SEGMENT 2: INTER-CRISIS STABILITY (2019) TEST
# ====================================================
print("=========================================================")## [1] "========================================================="
## [1] "### Segment 2 Accuracy (Stability Test Set: 2019-01 to 2019-12) ###"
## [1] "========================================================="
## [1] "Midwest - Accuracy Comparison on Test Set S2:"
## ME RMSE MAE MPE MAPE
## Training set -0.0004603112 0.06967606 0.05457063 0.01518184 0.9299601
## Test set -0.3263276440 0.35370388 0.32632764 -9.18962692 9.1896269
## MASE ACF1 Theil's U
## Training set 0.06858753 0.03916583 NA
## Test set 0.41014746 0.61876640 5.596385
## ME RMSE MAE MPE MAPE
## Training set -0.0007939517 0.06657322 0.05279267 0.0009540482 0.9092413
## Test set -0.2082383013 0.22566817 0.20823830 -5.8649052299 5.8649052
## MASE ACF1 Theil's U
## Training set 0.06635288 -0.00579125 NA
## Test set 0.26172594 0.52872465 3.567636
## [1] "Northeast - Accuracy Comparison on Test Set S2:"
## ME RMSE MAE MPE MAPE
## Training set -0.001615552 0.06148874 0.04924598 -0.003664727 0.830034
## Test set -0.136892214 0.16512815 0.13689221 -3.790725100 3.790725
## MASE ACF1 Theil's U
## Training set 0.06383738 0.005672801 NA
## Test set 0.17745287 0.548220403 2.357152
## ME RMSE MAE MPE MAPE MASE
## Training set -0.002044993 0.0606929 0.04850804 -0.01750113 0.822222 0.06288079
## Test set -0.143356901 0.1709567 0.14335690 -3.96727510 3.967275 0.18583302
## ACF1 Theil's U
## Training set -0.003642203 NA
## Test set 0.548174733 2.439672
## [1] "South - Accuracy Comparison on Test Set S2:"
## ME RMSE MAE MPE MAPE
## Training set -0.000795798 0.06825148 0.05181327 0.004575912 0.8779458
## Test set -0.451327052 0.47866230 0.45132705 -12.967480605 12.9674806
## MASE ACF1 Theil's U
## Training set 0.07345679 -0.0162409 NA
## Test set 0.63985607 0.7059701 9.847836
## ME RMSE MAE MPE MAPE
## Training set -0.001067744 0.06804836 0.05140209 -0.003700289 0.8716084
## Test set -0.441820636 0.46587755 0.44182064 -12.688157498 12.6881575
## MASE ACF1 Theil's U
## Training set 0.07287385 -0.008155497 NA
## Test set 0.62637862 0.697015488 9.572393
## [1] "West - Accuracy Comparison on Test Set S2:"
## ME RMSE MAE MPE MAPE
## Training set -0.0009642013 0.0649342 0.05217776 0.007498872 0.8246615
## Test set -0.3798739628 0.4242214 0.37987396 -9.847555096 9.8475551
## MASE ACF1 Theil's U
## Training set 0.05941615 -0.01556432 NA
## Test set 0.43257225 0.77705166 6.813405
## ME RMSE MAE MPE MAPE
## Training set -0.001116913 0.06460879 0.05198985 0.002250127 0.8223395
## Test set -0.355612745 0.39645758 0.35561274 -9.217659206 9.2176592
## MASE ACF1 Theil's U
## Training set 0.05920218 0.01388839 NA
## Test set 0.40494538 0.77447171 6.364446
## [1] "---"
# ====================================================
# SEGMENT 3: COVID SHOCK (2020) TEST
# ====================================================
print("=========================================================")## [1] "========================================================="
## [1] "### Segment 3 Accuracy (COVID Shock Test Set: 2020-03 to 2021-02) ###"
## [1] "========================================================="
## [1] "Midwest - Accuracy Comparison on Test Set S3:"
## ME RMSE MAE MPE MAPE MASE
## Training set -0.09910824 1.59218898 0.54662304 -3.041817 7.218987 0.36050293
## Test set 0.08087941 0.09469452 0.08087941 1.931170 1.931170 0.05334071
## ACF1 Theil's U
## Training set 0.04328042 NA
## Test set 0.74034709 3.207091
## ME RMSE MAE MPE MAPE MASE
## Training set 0.009090909 1.63934577 0.45272727 -1.047747 4.985806 0.29857780
## Test set 0.058333333 0.07637626 0.05833333 1.388889 1.388889 0.03847137
## ACF1 Theil's U
## Training set -0.08013046 NA
## Test set 0.74047619 2.59185
## [1] "Northeast - Accuracy Comparison on Test Set S3:"
## ME RMSE MAE MPE MAPE MASE
## Training set -0.04941207 1.5180868 0.4651402 -1.814070 5.130858 0.22884470
## Test set 0.12496278 0.1384054 0.1249642 3.009167 3.009202 0.06148123
## ACF1 Theil's U
## Training set -0.004175938 NA
## Test set 0.514663020 3.310492
## ME RMSE MAE MPE MAPE MASE
## Training set 0.001818182 1.5476081 0.4309091 -1.088786 4.556532 0.21200333
## Test set 0.125000000 0.1384437 0.1250000 3.010066 3.010066 0.06149886
## ACF1 Theil's U
## Training set 0.03324847 NA
## Test set 0.51470588 3.31141
## [1] "South - Accuracy Comparison on Test Set S3:"
## ME RMSE MAE MPE MAPE MASE
## Training set -0.009961472 1.2672985 0.3825012 -1.093168 4.944555 0.27007472
## Test set 0.107626836 0.1077023 0.1076268 2.832285 2.832285 0.07599268
## ACF1 Theil's U
## Training set -0.002575164 NA
## Test set 0.750000000 Inf
## ME RMSE MAE MPE MAPE MASE
## Training set 0.003636364 1.259726 0.3636364 -0.8714141 4.566296 0.25675474
## Test set 0.100000000 0.100000 0.1000000 2.6315789 2.631579 0.07060755
## ACF1 Theil's U
## Training set -0.0149032 NA
## Test set NaN Inf
## [1] "West - Accuracy Comparison on Test Set S3:"
## ME RMSE MAE MPE MAPE MASE
## Training set -0.03563819 1.49487046 0.48527459 -1.379450 5.446475 0.23902414
## Test set 0.04986634 0.05820622 0.04986634 1.035617 1.035617 0.02456189
## ACF1 Theil's U
## Training set 0.06247128 NA
## Test set 0.41917733 1.799768
## ME RMSE MAE MPE MAPE MASE
## Training set 0.01272727 1.4767901 0.4381818 -0.6671724 4.725691 0.21582839
## Test set 0.10833333 0.1118034 0.1083333 2.2498583 2.249858 0.05336006
## ACF1 Theil's U
## Training set 0.051199542 NA
## Test set -0.007575758 3.741657
## 10. Accuracy Summary Compilation (Final Execution Code - Prophet Removed)
# Load necessary libraries
library(forecast)
library(dplyr)
library(tibble)
library(zoo)
# --- 1. Function to safely extract metrics using tryCatch (for Naive, ETS, ARIMA, NNETAR) ---
extract_metrics <- function(forecast_object, test_set, model_name, fit_object = NULL) {
# Define a guaranteed NA result dataframe for errors
fail_df <- data.frame(Model = model_name, MAPE = NA, MASE = NA, RMSE = NA, AIC = NA)
results <- tryCatch({
# Attempt to calculate accuracy
acc <- accuracy(forecast_object, test_set)
# Determine AIC: default to NA, update if ARIMA/ETS fit object is provided
aic_val <- NA
if (!is.null(fit_object) && inherits(fit_object, "ARIMA")) {
aic_val <- fit_object$aic
} else if (!is.null(fit_object) && inherits(fit_object, "ets")) {
aic_val <- fit_object$aic
}
# Check if test set metrics (row 2) are present
if (nrow(acc) < 2) {
warning(paste("Accuracy output for", model_name, "is missing test set row!"))
return(fail_df)
}
# Return the successful data frame (using row 2 for test set metrics)
data.frame(
Model = model_name,
MAPE = acc[2, "MAPE"],
MASE = acc[2, "MASE"],
RMSE = acc[2, "RMSE"],
AIC = aic_val
)
}, error = function(e) {
warning(paste("FATAL ERROR encountered while processing", model_name, "forecast:", e$message))
return(fail_df)
})
return(results)
}
# NOTE: The extract_prophet_metrics function has been removed.
# ====================================================
# A. SEGMENT 1: GREAT RECESSION (2008) SUMMARY
# ====================================================
# --- Midwest S1 ---
midwest_S1_acc <- bind_rows(
extract_metrics(naive.forecast.midwest.S1, test.midwest.S1, "Naive"),
extract_metrics(ets.forecast.midwest.S1, test.midwest.S1, "ETS", ets.fit.midwest.S1),
extract_metrics(arima.forecast.midwest.S1, test.midwest.S1, "ARIMA", arima.fit.midwest.S1),
extract_metrics(nnetar.forecast.midwest.S1, test.midwest.S1, "NNETAR")
) %>% add_column(Region = "Midwest", .before = 1)
# --- Northeast S1 ---
northeast_S1_acc <- bind_rows(
extract_metrics(naive.forecast.northeast.S1, test.northeast.S1, "Naive"),
extract_metrics(ets.forecast.northeast.S1, test.northeast.S1, "ETS", ets.fit.northeast.S1),
extract_metrics(arima.forecast.northeast.S1, test.northeast.S1, "ARIMA", arima.fit.northeast.S1),
extract_metrics(nnetar.forecast.northeast.S1, test.northeast.S1, "NNETAR")
) %>% add_column(Region = "Northeast", .before = 1)
# --- South S1 ---
south_S1_acc <- bind_rows(
extract_metrics(naive.forecast.south.S1, test.south.S1, "Naive"),
extract_metrics(ets.forecast.south.S1, test.south.S1, "ETS", ets.fit.south.S1),
extract_metrics(arima.forecast.south.S1, test.south.S1, "ARIMA", arima.fit.south.S1),
extract_metrics(nnetar.forecast.south.S1, test.south.S1, "NNETAR")
) %>% add_column(Region = "South", .before = 1)
# --- West S1 ---
west_S1_acc <- bind_rows(
extract_metrics(naive.forecast.west.S1, test.west.S1, "Naive"),
extract_metrics(ets.forecast.west.S1, test.west.S1, "ETS", ets.fit.west.S1),
extract_metrics(arima.forecast.west.S1, test.west.S1, "ARIMA", arima.fit.west.S1),
extract_metrics(nnetar.forecast.west.S1, test.west.S1, "NNETAR")
) %>% add_column(Region = "West", .before = 1)
# Combine all S1 data
accuracy_summary_S1 <- bind_rows(midwest_S1_acc, northeast_S1_acc, south_S1_acc, west_S1_acc)
print("--- SEGMENT 1: RECESSION (2008) ACCURACY SUMMARY ---")## [1] "--- SEGMENT 1: RECESSION (2008) ACCURACY SUMMARY ---"
## Region Model MAPE MASE RMSE AIC
## 1 Midwest Naive 11.952218 1.1054978 1.0778064 NA
## 2 Midwest ETS 12.134553 1.1211931 1.0884308 256.2209
## 3 Midwest ARIMA 12.089733 1.1158878 1.0792088 NA
## 4 Midwest NNETAR 11.821324 1.0935471 1.0669522 NA
## 5 Northeast Naive 13.717756 1.0681981 1.0181683 NA
## 6 Northeast ETS 13.783761 1.0735615 1.0240316 158.9291
## 7 Northeast ARIMA 14.576631 1.1321554 1.0672669 NA
## 8 Northeast NNETAR 12.495333 0.9777453 0.9515025 NA
## 9 South Naive 17.134590 1.7018006 1.3206690 NA
## 10 South ETS 17.073803 1.6969066 1.3196043 227.6901
## 11 South ARIMA 17.532391 1.7396053 1.3453809 NA
## 12 South NNETAR 13.397580 1.3406368 1.0678933 NA
## 13 West Naive 18.809137 1.7082303 1.6222412 NA
## 14 West ETS 17.489542 1.5966176 1.5402142 189.1234
## 15 West ARIMA 18.877536 1.7173587 1.6383930 NA
## 16 West NNETAR 9.935131 0.9234501 0.9451126 NA
# ====================================================
# B. SEGMENT 2: INTER-CRISIS STABILITY (2019) SUMMARY
# ====================================================
# --- Midwest S2 ---
midwest_S2_acc <- bind_rows(
extract_metrics(naive.forecast.midwest.S2, test.midwest.S2, "Naive"),
extract_metrics(ets.forecast.midwest.S2, test.midwest.S2, "ETS", ets.fit.midwest.S2),
extract_metrics(arima.forecast.midwest.S2, test.midwest.S2, "ARIMA", arima.fit.midwest.S2),
extract_metrics(nnetar.forecast.midwest.S2, test.midwest.S2, "NNETAR")
) %>% add_column(Region = "Midwest", .before = 1)
# --- Northeast S2 ---
northeast_S2_acc <- bind_rows(
extract_metrics(naive.forecast.northeast.S2, test.northeast.S2, "Naive"),
extract_metrics(ets.forecast.northeast.S2, test.northeast.S2, "ETS", ets.fit.northeast.S2),
extract_metrics(arima.forecast.northeast.S2, test.northeast.S2, "ARIMA", arima.fit.northeast.S2),
extract_metrics(nnetar.forecast.northeast.S2, test.northeast.S2, "NNETAR")
) %>% add_column(Region = "Northeast", .before = 1)
# --- South S2 ---
south_S2_acc <- bind_rows(
extract_metrics(naive.forecast.south.S2, test.south.S2, "Naive"),
extract_metrics(ets.forecast.south.S2, test.south.S2, "ETS", ets.fit.south.S2),
extract_metrics(arima.forecast.south.S2, test.south.S2, "ARIMA", arima.fit.south.S2),
extract_metrics(nnetar.forecast.south.S2, test.south.S2, "NNETAR")
) %>% add_column(Region = "South", .before = 1)
# --- West S2 ---
west_S2_acc <- bind_rows(
extract_metrics(naive.forecast.west.S2, test.west.S2, "Naive"),
extract_metrics(ets.forecast.west.S2, test.west.S2, "ETS", ets.fit.west.S2),
extract_metrics(arima.forecast.west.S2, test.west.S2, "ARIMA", arima.fit.west.S2),
extract_metrics(nnetar.forecast.west.S2, test.west.S2, "NNETAR")
) %>% add_column(Region = "West", .before = 1)
# Combine all S2 data
accuracy_summary_S2 <- bind_rows(midwest_S2_acc, northeast_S2_acc, south_S2_acc, west_S2_acc)
print("--- SEGMENT 2: STABILITY (2019) ACCURACY SUMMARY ---")## [1] "--- SEGMENT 2: STABILITY (2019) ACCURACY SUMMARY ---"
## Region Model MAPE MASE RMSE AIC
## 1 Midwest Naive 5.873731 0.2618454 0.2291288 NA
## 2 Midwest ETS 9.189627 0.4101475 0.3537039 485.8479
## 3 Midwest ARIMA 5.864905 0.2617259 0.2256682 NA
## 4 Midwest NNETAR 11.133286 0.4965699 0.4349234 NA
## 5 Northeast Naive 3.467038 0.1620370 0.1554563 NA
## 6 Northeast ETS 3.790725 0.1774529 0.1651282 356.8451
## 7 Northeast ARIMA 3.967275 0.1858330 0.1709567 NA
## 8 Northeast NNETAR 12.105005 0.5751581 0.4883130 NA
## 9 South Naive 8.140304 0.4016878 0.3000000 NA
## 10 South ETS 12.967481 0.6398561 0.4786623 464.5291
## 11 South ARIMA 12.688157 0.6263786 0.4658775 NA
## 12 South NNETAR 16.442890 0.8090652 0.6328249 NA
## 13 West Naive 6.928961 0.3036602 0.3055050 NA
## 14 West ETS 9.847555 0.4325722 0.4242214 413.1101
## 15 West ARIMA 9.217659 0.4049454 0.3964576 NA
## 16 West NNETAR 20.563595 0.9023950 0.9196145 NA
## [1] "\n\n"
# ====================================================
# C. SEGMENT 3: COVID SHOCK (2020) SUMMARY
# ====================================================
# --- Midwest S3 ---
midwest_S3_acc <- bind_rows(
extract_metrics(naive.forecast.midwest.S3, test.midwest.S3, "Naive"),
extract_metrics(ets.forecast.midwest.S3, test.midwest.S3, "ETS", ets.fit.midwest.S3),
extract_metrics(arima.forecast.midwest.S3, test.midwest.S3, "ARIMA", arima.fit.midwest.S3),
extract_metrics(nnetar.forecast.midwest.S3, test.midwest.S3, "NNETAR")
) %>% add_column(Region = "Midwest", .before = 1)
# --- Northeast S3 ---
northeast_S3_acc <- bind_rows(
extract_metrics(naive.forecast.northeast.S3, test.northeast.S3, "Naive"),
extract_metrics(ets.forecast.northeast.S3, test.northeast.S3, "ETS", ets.fit.northeast.S3),
extract_metrics(arima.forecast.northeast.S3, test.northeast.S3, "ARIMA", arima.fit.northeast.S3),
extract_metrics(nnetar.forecast.northeast.S3, test.northeast.S3, "NNETAR")
) %>% add_column(Region = "Northeast", .before = 1)
# --- South S3 ---
south_S3_acc <- bind_rows(
extract_metrics(naive.forecast.south.S3, test.south.S3, "Naive"),
extract_metrics(ets.forecast.south.S3, test.south.S3, "ETS", ets.fit.south.S3),
extract_metrics(arima.forecast.south.S3, test.south.S3, "ARIMA", arima.fit.south.S3),
extract_metrics(nnetar.forecast.south.S3, test.south.S3, "NNETAR")
) %>% add_column(Region = "South", .before = 1)
# --- West S3 ---
west_S3_acc <- bind_rows(
extract_metrics(naive.forecast.west.S3, test.west.S3, "Naive"),
extract_metrics(ets.forecast.west.S3, test.west.S3, "ETS", ets.fit.west.S3),
extract_metrics(arima.forecast.west.S3, test.west.S3, "ARIMA", arima.fit.west.S3),
extract_metrics(nnetar.forecast.west.S3, test.west.S3, "NNETAR")
) %>% add_column(Region = "West", .before = 1)
# Combine all S3 data
accuracy_summary_S3 <- bind_rows(midwest_S3_acc, northeast_S3_acc, south_S3_acc, west_S3_acc)
print("--- SEGMENT 3: COVID SHOCK (2020) ACCURACY SUMMARY ---")## [1] "--- SEGMENT 3: COVID SHOCK (2020) ACCURACY SUMMARY ---"
## Region Model MAPE MASE RMSE AIC
## 1 Midwest Naive 1.388889 0.03847137 0.07637626 NA
## 2 Midwest ETS 1.931170 0.05334071 0.09469452 283.5654
## 3 Midwest ARIMA 1.388889 0.03847137 0.07637626 NA
## 4 Midwest NNETAR 7.015098 0.19321041 0.32341043 NA
## 5 Northeast Naive 3.010066 0.06149886 0.13844373 NA
## 6 Northeast ETS 3.009202 0.06148123 0.13840539 278.3229
## 7 Northeast ARIMA 3.010066 0.06149886 0.13844373 NA
## 8 Northeast NNETAR 4.374150 0.08932873 0.19921828 NA
## 9 South Naive 2.631579 0.07060755 0.10000000 NA
## 10 South ETS 2.832285 0.07599268 0.10770226 256.4609
## 11 South ARIMA 2.631579 0.07060755 0.10000000 NA
## 12 South NNETAR 2.131810 0.05719831 0.08121271 NA
## 13 West Naive 2.249858 0.05336006 0.11180340 NA
## 14 West ETS 1.035617 0.02456189 0.05820622 274.6277
## 15 West ARIMA 2.249858 0.05336006 0.11180340 NA
## 16 West NNETAR 3.671682 0.08711944 0.19189117 NA
## 11. Model Selection (Minimizing MASE and MAPE)
library(dplyr)
library(tibble)
# ====================================================
# A. SEGMENT 1: GREAT RECESSION (2008) WINNERS
# ====================================================
best_models_S1 <- accuracy_summary_S1 %>%
group_by(Region) %>%
# Select the model with the minimum MASE (Primary Metric)
# Arrange by MASE first, then by MAPE for tie-breaking
arrange(MASE, MAPE) %>%
slice(1) %>%
ungroup() %>%
select(Region, Model, MASE, MAPE)
print("--- SEGMENT 1 WINNERS (RECESSION) ---")## [1] "--- SEGMENT 1 WINNERS (RECESSION) ---"
## # A tibble: 4 × 4
## Region Model MASE MAPE
## <chr> <chr> <dbl> <dbl>
## 1 Midwest NNETAR 1.09 11.8
## 2 Northeast NNETAR 0.978 12.5
## 3 South NNETAR 1.34 13.4
## 4 West NNETAR 0.923 9.94
# ====================================================
# B. SEGMENT 2: INTER-CRISIS STABILITY (2019) WINNERS
# ====================================================
best_models_S2 <- accuracy_summary_S2 %>%
group_by(Region) %>%
# Select the model with the minimum MASE (Primary Metric)
arrange(MASE, MAPE) %>%
slice(1) %>%
ungroup() %>%
select(Region, Model, MASE, MAPE)
print("--- SEGMENT 2 WINNERS (STABILITY) ---")## [1] "--- SEGMENT 2 WINNERS (STABILITY) ---"
## # A tibble: 4 × 4
## Region Model MASE MAPE
## <chr> <chr> <dbl> <dbl>
## 1 Midwest ARIMA 0.262 5.86
## 2 Northeast Naive 0.162 3.47
## 3 South Naive 0.402 8.14
## 4 West Naive 0.304 6.93
# ====================================================
# C. SEGMENT 3: COVID SHOCK (2020) WINNERS
# ====================================================
best_models_S3 <- accuracy_summary_S3 %>%
group_by(Region) %>%
# Select the model with the minimum MASE (Primary Metric)
arrange(MASE, MAPE) %>%
slice(1) %>%
ungroup() %>%
select(Region, Model, MASE, MAPE)
print("--- SEGMENT 3 WINNERS (COVID SHOCK) ---")## [1] "--- SEGMENT 3 WINNERS (COVID SHOCK) ---"
## # A tibble: 4 × 4
## Region Model MASE MAPE
## <chr> <chr> <dbl> <dbl>
## 1 Midwest Naive 0.0385 1.39
## 2 Northeast ETS 0.0615 3.01
## 3 South NNETAR 0.0572 2.13
## 4 West ETS 0.0246 1.04
library(forecast)
library(patchwork)
library(ggplot2) # Ensure ggplot2 is loaded for theme modifications
# --- 1. Helper Function to Create Individual Plots ---
# Sets PI = TRUE to include Prediction Intervals.
create_plot_s3_pi <- function(train_data, test_data, forecast_obj, model_name, region_name) {
autoplot(train_data) +
# Use autolayer for the actual test data
autolayer(test_data, series = "Actual Test") +
# Use autolayer for the forecast with Prediction Intervals (PI=TRUE)
autolayer(forecast_obj, series = paste(model_name, "Forecast"), PI = TRUE) +
ggtitle(paste(region_name, ":", model_name)) +
xlab("") + ylab("") +
# Reduce title/axis text size for the dense grid
theme(legend.position = "none",
plot.title = element_text(size = 11, face = "bold"),
axis.text = element_text(size = 8),
axis.title = element_text(size = 9))
}
# --- 2. Create All 16 Plots ---
# --- Row 1: Naive Model ---
P_M_Naive <- create_plot_s3_pi(train.midwest.S3, test.midwest.S3, naive.forecast.midwest.S3, "Naive", "Midwest")
P_N_Naive <- create_plot_s3_pi(train.northeast.S3, test.northeast.S3, naive.forecast.northeast.S3, "Naive", "Northeast")
P_S_Naive <- create_plot_s3_pi(train.south.S3, test.south.S3, naive.forecast.south.S3, "Naive", "South")
P_W_Naive <- create_plot_s3_pi(train.west.S3, test.west.S3, naive.forecast.west.S3, "Naive", "West")
# --- Row 2: ETS Model ---
P_M_ETS <- create_plot_s3_pi(train.midwest.S3, test.midwest.S3, ets.forecast.midwest.S3, "ETS", "Midwest")
P_N_ETS <- create_plot_s3_pi(train.northeast.S3, test.northeast.S3, ets.forecast.northeast.S3, "ETS", "Northeast")
P_S_ETS <- create_plot_s3_pi(train.south.S3, test.south.S3, ets.forecast.south.S3, "ETS", "South")
P_W_ETS <- create_plot_s3_pi(train.west.S3, test.west.S3, ets.forecast.west.S3, "ETS", "West")
# --- Row 3: ARIMA Model ---
P_M_ARIMA <- create_plot_s3_pi(train.midwest.S3, test.midwest.S3, arima.forecast.midwest.S3, "ARIMA", "Midwest")
P_N_ARIMA <- create_plot_s3_pi(train.northeast.S3, test.northeast.S3, arima.forecast.northeast.S3, "ARIMA", "Northeast")
P_S_ARIMA <- create_plot_s3_pi(train.south.S3, test.south.S3, arima.forecast.south.S3, "ARIMA", "South")
P_W_ARIMA <- create_plot_s3_pi(train.west.S3, test.west.S3, arima.forecast.west.S3, "ARIMA", "West")
# --- Row 4: NNETAR Model ---
P_M_NNETAR <- create_plot_s3_pi(train.midwest.S3, test.midwest.S3, nnetar.forecast.midwest.S3, "NNETAR", "Midwest")
P_N_NNETAR <- create_plot_s3_pi(train.northeast.S3, test.northeast.S3, nnetar.forecast.northeast.S3, "NNETAR", "Northeast")
P_S_NNETAR <- create_plot_s3_pi(train.south.S3, test.south.S3, nnetar.forecast.south.S3, "NNETAR", "South")
P_W_NNETAR <- create_plot_s3_pi(train.west.S3, test.west.S3, nnetar.forecast.west.S3, "NNETAR", "West")
# --- 3. Arrange Plots using Patchwork ---
# Arrange the 16 plots in a 4x4 grid (Models = Rows, Regions = Columns).
(P_M_Naive + P_N_Naive + P_S_Naive + P_W_Naive) /
(P_M_ETS + P_N_ETS + P_S_ETS + P_W_ETS) /
(P_M_ARIMA + P_N_ARIMA + P_S_ARIMA + P_W_ARIMA) /
(P_M_NNETAR + P_N_NNETAR + P_S_NNETAR + P_W_NNETAR) +
plot_layout(guides = 'collect') +
plot_annotation(title = '4x4 Comparison: Model Performance Across Regions (Segment 3: COVID Shock) with Prediction Intervals',
subtitle = 'Rows = Model (Naive, ETS, ARIMA, NNETAR), Columns = Region. Shaded areas represent 80% and 95% Prediction Intervals.',
theme = theme(plot.title = element_text(size = 18, face = "bold"),
plot.subtitle = element_text(size = 12)))## Warning: annotation$theme is not a valid theme.
## Please use `theme()` to construct themes.
# Check assumptions for the fitted ARIMA model (e.g., Midwest, Segment 3)
# This function generates 4 plots and performs the Ljung-Box test.
checkresiduals(arima.fit.midwest.S3)##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,0)
## Q* = 2.0381, df = 11, p-value = 0.9984
##
## Model df: 0. Total lags used: 11
# 1. Extract the actual data and fitted values for all models
actual_data <- train.midwest.S3
fitted_arima <- arima.fit.midwest.S3$fitted
fitted_ets <- ets.fit.midwest.S3$fitted
fitted_nnetar <- nnetar.fit.midwest.S3$fitted
# 2. Plot the actual data (black line)
plot(actual_data,
main="Fitted Model Lines vs. Actual Data (Midwest S3)",
ylab="Unemployment Rate",
xlab="Time",
lwd=2, # thicker line for actual data
col="black")
# 3. Add the fitted lines
lines(fitted_arima, col="red", lty=2, lwd=1.5) # Dashed line for ARIMA
lines(fitted_ets, col="blue", lty=1, lwd=1.5) # Solid line for ETS
lines(fitted_nnetar, col="green4", lty=3, lwd=1.5) # Dotted line for NNETAR
# 4. Add a legend
legend("topright",
legend=c("Actual Data", "ARIMA(0,1,0) Fitted", "ETS(A,Ad,N) Fitted", "NNETAR(1,1) Fitted"),
col=c("black", "red", "blue", "green4"),
lty=c(1, 2, 1, 3), lwd=c(2, 1.5, 1.5, 1.5),
bty="n")