# Core Data and ML Libraries
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns 
import tensorflow as tf
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc
import glob
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv2D, Dense, Flatten, Input, MaxPool2D, Dropout # <-- Added Dropout
from tensorflow.keras.layers import Rescaling, RandomFlip, RandomRotation, RandomZoom
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.utils import load_img, img_to_array
from tensorflow.keras.utils import image_dataset_from_directory
from tensorflow.keras.optimizers import Adam # <-- Added Adam Optimizer
import matplotlib.pyplot as plt

# Ensure reproducibility
SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)

# --- Configuration Parameters (Matching your input_dim = 200) ---
IMAGE_SIZE = (200, 200) 
BATCH_SIZE = 32
VALIDATION_SPLIT = 0.2
DATA_DIR = '/Users/urielulloa/Desktop/Bentley_University/Fall_2025/MA707_Machine_Learning/Problem_Set_2/castings' # MUST contain 'defective' and 'good' subfolders
MODEL_SAVE_PATH = '/Users/urielulloa/Desktop/Bentley_University/Fall_2025/MA707_Machine_Learning/Problem_Set_2/best_qc_model.keras' # Path for ModelCheckpoint

#Code generated with AI support

import tensorflow as tf
import numpy as np
import os
import glob
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import load_img, img_to_array
from tensorflow.keras.layers import Rescaling

# Ensure reproducibility
SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)

# --- Configuration Parameters ---
IMAGE_SIZE = (200, 200) 
BATCH_SIZE = 32
TEST_SIZE = 0.2  # 20% for validation/test set
DATA_DIR = '/Users/urielulloa/Desktop/Bentley_University/Fall_2025/MA707_Machine_Learning/Problem_Set_2/castings/train' 

# 1. Gather all file paths and create corresponding labels
all_paths = []
all_labels = [] # 0: defective, 1: good

class_names = sorted(os.listdir(DATA_DIR))
class_names = [name for name in class_names if not name.startswith('.')] # Filter out hidden files

# Assumes the directory structure is DATA_DIR/defective and DATA_DIR/good
for i, name in enumerate(class_names):
    label = i # 0 for first class name, 1 for second (assuming 'defective', 'good')
    class_path = os.path.join(DATA_DIR, name)
    files = glob.glob(os.path.join(class_path, '*.*')) # Get all files in subfolder
    
    all_paths.extend(files)
    all_labels.extend([label] * len(files))

all_labels = np.array(all_labels)


# 2. Perform STRATIFIED Split on File Paths
# CRITICAL: stratify=all_labels ensures the 20% split maintains the same proportion of classes
train_paths, val_paths, train_labels, val_labels = train_test_split(
    all_paths, all_labels, 
    test_size=TEST_SIZE, 
    random_state=SEED, 
    stratify=all_labels # This guarantees balance
)

# 3. Define Image Loading Function
def load_and_preprocess_image(path, label):
    # Load image from path (requires a function that operates on paths)
    img = tf.io.read_file(path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, IMAGE_SIZE)
    # The Rescaling layer will be applied later in the model architecture
    # Convert label to float32 as expected by binary_crossentropy
    return img, tf.cast(label, tf.float32)

# 4. Create Datasets from Tensor Slices (Stratified Paths)
AUTOTUNE = tf.data.AUTOTUNE

train_dataset = tf.data.Dataset.from_tensor_slices((train_paths, train_labels))
train_dataset = train_dataset.map(load_and_preprocess_image, num_parallel_calls=AUTOTUNE)
train_dataset = train_dataset.shuffle(buffer_size=1000).batch(BATCH_SIZE).prefetch(AUTOTUNE)

# Validation dataset (used for evaluation)
validation_dataset = tf.data.Dataset.from_tensor_slices((val_paths, val_labels))
validation_dataset = validation_dataset.map(load_and_preprocess_image, num_parallel_calls=AUTOTUNE)
# CRITICAL: Do NOT shuffle the validation set. Use .cache() to fix order.
validation_dataset = validation_dataset.batch(BATCH_SIZE).cache().prefetch(AUTOTUNE)

print(f"Inferred Class Names (0, 1): {class_names}")

# --- DIAGNOSTIC: Check Data Balance ---

# 1. Safely extract true labels (y_true) from the validation dataset
validation_labels = np.concatenate([y.numpy() for x, y in validation_dataset], axis=0).flatten()

# 2. Convert to integers and count
counts = np.bincount(validation_labels.astype(int))

print(f"DEBUG: validation_labels shape is {validation_labels.shape}")

if len(counts) == 1:
    print("\n--- CRITICAL DATA WARNING ---")
    print(f"Validation Dataset contains only ONE class (Label: {np.unique(validation_labels)}).")
    print("This means the random split failed due to data imbalance. You must manually try a different SEED.")
elif len(counts) == 2:
    print("\n--- Validation Dataset Balance ---")
    print(f"Total Samples: {len(validation_labels)}")
    # Ensure class_names exist before printing
    c0_name = class_names[0] if len(class_names) > 0 else 'Class 0'
    c1_name = class_names[1] if len(class_names) > 1 else 'Class 1'
    
    print(f"Class 0 ({c0_name}): {counts[0]} samples")
    print(f"Class 1 ({c1_name}): {counts[1]} samples")
    
# --- END DIAGNOSTIC ---

Inferred Class Names (0, 1): ['defective', 'good']
DEBUG: validation_labels shape is (260,)

--- Validation Dataset Balance ---
Total Samples: 260
Class 0 (defective): 156 samples
Class 1 (good): 104 samples

2025-11-12 08:26:15.588614: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence

#Code generated with AI support
# Ensure input_dim is set to 200, matching your data loading
input_dim = 200 

# Data Augmentation Layer (from previous code)
data_augmentation = Sequential([
    RandomFlip('horizontal'),
    RandomRotation(.1),
    RandomZoom(.2)
])

#Code generated with AI support

# Final Tuned Model
model = Sequential([
    Input(shape=(input_dim, input_dim, 3)),
    data_augmentation,
    Rescaling(1./255),
    Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same'),
    Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same'),
    MaxPool2D(pool_size=(2,2)),
    Dropout(0.2), # <--- NEW: Dropout after first pooling to regularize early features
    Conv2D(filters=128, kernel_size=(3,3), activation='relu', padding='same'),
    MaxPool2D(pool_size=(2,2)),
    Conv2D(filters=256, kernel_size=(3,3), activation='relu', padding='same'),
    MaxPool2D(pool_size=(2,2)),
    Flatten(),
    Dropout(0.25), # <--- Existing, higher Dropout before final Dense layer
    Dense(1, activation='sigmoid')
])

# Compilation (Optimized Learning Rate)
custom_adam = Adam(learning_rate=0.0005) # Optimized Learning Rate to speed up convergence

model.compile(loss='binary_crossentropy',
              optimizer=custom_adam,
              metrics=['accuracy'])

# Callbacks (Increased patience)
MODEL_SAVE_PATH = './final_qc_model_200.keras'
early_stop = EarlyStopping(monitor='val_loss', patience=5, mode='min', restore_best_weights=True)
save_best = ModelCheckpoint(MODEL_SAVE_PATH, save_best_only=True, monitor='val_loss', mode='min')

# Training Step
print("Starting FINAL training run with Stratified Data and Improved Hyperparameters...")
# Use 100 epochs; the EarlyStopping will manage the actual run time
history = model.fit(train_dataset, 
                    epochs=100, 
                    validation_data=validation_dataset, 
                    callbacks=[early_stop, save_best], 
                    verbose=1)

Starting FINAL training run with Stratified Data and Improved Hyperparameters...
Epoch 1/100
33/33 ━━━━━━━━━━━━━━━━━━━━ 141s 4s/step - accuracy: 0.5769 - loss: 0.6852 - val_accuracy: 0.7692 - val_loss: 0.6484
Epoch 2/100
33/33 ━━━━━━━━━━━━━━━━━━━━ 135s 4s/step - accuracy: 0.6587 - loss: 0.6222 - val_accuracy: 0.6846 - val_loss: 0.5681
Epoch 3/100
33/33 ━━━━━━━━━━━━━━━━━━━━ 130s 4s/step - accuracy: 0.6981 - loss: 0.5963 - val_accuracy: 0.7154 - val_loss: 0.5455
Epoch 4/100
33/33 ━━━━━━━━━━━━━━━━━━━━ 130s 4s/step - accuracy: 0.7308 - loss: 0.5747 - val_accuracy: 0.6962 - val_loss: 0.5401
Epoch 5/100
33/33 ━━━━━━━━━━━━━━━━━━━━ 131s 4s/step - accuracy: 0.7327 - loss: 0.5649 - val_accuracy: 0.6423 - val_loss: 0.5609
Epoch 6/100
33/33 ━━━━━━━━━━━━━━━━━━━━ 137s 4s/step - accuracy: 0.7308 - loss: 0.5497 - val_accuracy: 0.6538 - val_loss: 0.5523
Epoch 7/100
33/33 ━━━━━━━━━━━━━━━━━━━━ 127s 4s/step - accuracy: 0.7433 - loss: 0.5279 - val_accuracy: 0.7731 - val_loss: 0.4605
Epoch 8/100
33/33 ━━━━━━━━━━━━━━━━━━━━ 128s 4s/step - accuracy: 0.7375 - loss: 0.5277 - val_accuracy: 0.6615 - val_loss: 0.5735
Epoch 9/100
33/33 ━━━━━━━━━━━━━━━━━━━━ 124s 4s/step - accuracy: 0.7423 - loss: 0.5133 - val_accuracy: 0.8077 - val_loss: 0.4405
Epoch 10/100
33/33 ━━━━━━━━━━━━━━━━━━━━ 134s 4s/step - accuracy: 0.7981 - loss: 0.4478 - val_accuracy: 0.8346 - val_loss: 0.3786
Epoch 11/100
33/33 ━━━━━━━━━━━━━━━━━━━━ 136s 4s/step - accuracy: 0.8019 - loss: 0.4431 - val_accuracy: 0.8115 - val_loss: 0.3811
Epoch 12/100
33/33 ━━━━━━━━━━━━━━━━━━━━ 136s 4s/step - accuracy: 0.8077 - loss: 0.4325 - val_accuracy: 0.7846 - val_loss: 0.3863
Epoch 13/100
33/33 ━━━━━━━━━━━━━━━━━━━━ 136s 4s/step - accuracy: 0.8048 - loss: 0.4413 - val_accuracy: 0.8538 - val_loss: 0.3336
Epoch 14/100
33/33 ━━━━━━━━━━━━━━━━━━━━ 135s 4s/step - accuracy: 0.8038 - loss: 0.4172 - val_accuracy: 0.8115 - val_loss: 0.4212
Epoch 15/100
33/33 ━━━━━━━━━━━━━━━━━━━━ 135s 4s/step - accuracy: 0.8365 - loss: 0.3858 - val_accuracy: 0.8615 - val_loss: 0.2945
Epoch 16/100
33/33 ━━━━━━━━━━━━━━━━━━━━ 133s 4s/step - accuracy: 0.8288 - loss: 0.3741 - val_accuracy: 0.8846 - val_loss: 0.2893
Epoch 17/100
33/33 ━━━━━━━━━━━━━━━━━━━━ 135s 4s/step - accuracy: 0.8240 - loss: 0.3946 - val_accuracy: 0.8731 - val_loss: 0.2965
Epoch 18/100
33/33 ━━━━━━━━━━━━━━━━━━━━ 133s 4s/step - accuracy: 0.8529 - loss: 0.3366 - val_accuracy: 0.8846 - val_loss: 0.2852
Epoch 19/100
33/33 ━━━━━━━━━━━━━━━━━━━━ 135s 4s/step - accuracy: 0.8673 - loss: 0.3175 - val_accuracy: 0.9000 - val_loss: 0.2655
Epoch 20/100
33/33 ━━━━━━━━━━━━━━━━━━━━ 138s 4s/step - accuracy: 0.8788 - loss: 0.2780 - val_accuracy: 0.8885 - val_loss: 0.2661
Epoch 21/100
33/33 ━━━━━━━━━━━━━━━━━━━━ 136s 4s/step - accuracy: 0.8712 - loss: 0.2908 - val_accuracy: 0.9038 - val_loss: 0.2057
Epoch 22/100
33/33 ━━━━━━━━━━━━━━━━━━━━ 133s 4s/step - accuracy: 0.8760 - loss: 0.3046 - val_accuracy: 0.9038 - val_loss: 0.2283
Epoch 23/100
33/33 ━━━━━━━━━━━━━━━━━━━━ 130s 4s/step - accuracy: 0.8856 - loss: 0.2587 - val_accuracy: 0.9192 - val_loss: 0.1974
Epoch 24/100
33/33 ━━━━━━━━━━━━━━━━━━━━ 132s 4s/step - accuracy: 0.8962 - loss: 0.2522 - val_accuracy: 0.9115 - val_loss: 0.2226
Epoch 25/100
33/33 ━━━━━━━━━━━━━━━━━━━━ 132s 4s/step - accuracy: 0.9106 - loss: 0.2418 - val_accuracy: 0.8846 - val_loss: 0.2479
Epoch 26/100
33/33 ━━━━━━━━━━━━━━━━━━━━ 140s 4s/step - accuracy: 0.9019 - loss: 0.2445 - val_accuracy: 0.9192 - val_loss: 0.2223
Epoch 27/100
33/33 ━━━━━━━━━━━━━━━━━━━━ 133s 4s/step - accuracy: 0.8683 - loss: 0.2934 - val_accuracy: 0.9000 - val_loss: 0.2288
Epoch 28/100
33/33 ━━━━━━━━━━━━━━━━━━━━ 134s 4s/step - accuracy: 0.8952 - loss: 0.2690 - val_accuracy: 0.9346 - val_loss: 0.1869
Epoch 29/100
33/33 ━━━━━━━━━━━━━━━━━━━━ 138s 4s/step - accuracy: 0.9125 - loss: 0.2084 - val_accuracy: 0.9154 - val_loss: 0.2034
Epoch 30/100
33/33 ━━━━━━━━━━━━━━━━━━━━ 137s 4s/step - accuracy: 0.8933 - loss: 0.2487 - val_accuracy: 0.9269 - val_loss: 0.1640
Epoch 31/100
33/33 ━━━━━━━━━━━━━━━━━━━━ 127s 4s/step - accuracy: 0.8808 - loss: 0.2388 - val_accuracy: 0.9538 - val_loss: 0.1419
Epoch 32/100
33/33 ━━━━━━━━━━━━━━━━━━━━ 132s 4s/step - accuracy: 0.9000 - loss: 0.2453 - val_accuracy: 0.9192 - val_loss: 0.2223
Epoch 33/100
33/33 ━━━━━━━━━━━━━━━━━━━━ 126s 4s/step - accuracy: 0.9067 - loss: 0.2404 - val_accuracy: 0.9154 - val_loss: 0.1786
Epoch 34/100
33/33 ━━━━━━━━━━━━━━━━━━━━ 123s 4s/step - accuracy: 0.9163 - loss: 0.1946 - val_accuracy: 0.9346 - val_loss: 0.2170
Epoch 35/100
33/33 ━━━━━━━━━━━━━━━━━━━━ 137s 4s/step - accuracy: 0.9183 - loss: 0.2112 - val_accuracy: 0.9231 - val_loss: 0.1869
Epoch 36/100
33/33 ━━━━━━━━━━━━━━━━━━━━ 134s 4s/step - accuracy: 0.9202 - loss: 0.1891 - val_accuracy: 0.9462 - val_loss: 0.1480

#Code generated with AI support
import matplotlib.pyplot as plt

# --- Plot Training & Validation Accuracy ---
plt.figure(figsize=(8, 5))
plt.plot(history.history['accuracy'], label='Train Accuracy', linewidth=2)
plt.plot(history.history['val_accuracy'], label='Validation Accuracy', linewidth=2)
plt.title('Model Accuracy History (Tuned Model)', fontsize=14)
plt.ylabel('Accuracy', fontsize=12)
plt.xlabel('Epoch', fontsize=12)
plt.legend(loc='lower right', fontsize=10)
plt.grid(True)
plt.tight_layout()
plt.show()

# --- Plot Training & Validation Loss ---
plt.figure(figsize=(8, 5))
plt.plot(history.history['loss'], label='Train Loss', linewidth=2)
plt.plot(history.history['val_loss'], label='Validation Loss', linewidth=2)
plt.title('Model Loss History (Tuned Model)', fontsize=14)
plt.ylabel('Loss', fontsize=12)
plt.xlabel('Epoch', fontsize=12)
plt.legend(loc='upper right', fontsize=10)
plt.grid(True)
plt.tight_layout()
plt.show()

#Code generated with AI support

# --- Configuration for Evaluation ---
evaluation_dataset = validation_dataset 
target_recall = 0.98  

# --- Step 1: Generate True Labels and Predicted Probabilities ---
print("Generating predictions on the evaluation dataset...")

# 1. SAFELY EXTRACT TRUE LABELS (y_true)
y_true_list = []
# Loop structure: for x_batch, y_batch in dataset
for _, y_batch in evaluation_dataset:
    # Convert tensor to numpy and append
    y_true_list.append(y_batch.numpy())

# Concatenate all batches and use .flatten() to ensure final shape is (N_samples,)
y_true = np.concatenate(y_true_list, axis=0).flatten()
num_samples = len(y_true)
print(f"y_true shape after concatenation: {y_true.shape}") 

# 2. GENERATE REAL PREDICTIONS FROM THE TRAINED MODEL
y_pred_probs = model.predict(evaluation_dataset, verbose=1).flatten()

print(f"y_pred_probs shape after prediction: {y_pred_probs.shape}")

# 2. SAFELY GENERATE MOCK PREDICTIONS
# y_pred_probs = None 

# --- MOCK DATA (Ensure ONLY this block is active if model.fit was skipped) ---
# SEED = 42 
# np.random.seed(SEED)

# Create mock array with guaranteed length num_samples 
# y_pred_probs = np.where(y_true == 1, 
#                         np.clip(0.85 + np.random.rand(num_samples) * 0.1, 0.01, 0.99), 
#                         np.clip(0.1 + np.random.rand(num_samples) * 0.1, 0.01, 0.99)).flatten() 
# 
# print(f"y_pred_probs shape after mock generation: {y_pred_probs.shape}") 
# -----------------------------------------------------------------------------------


# --- Step 2: Find the Optimal Threshold for Target Recall ---

# The roc_curve call is now safe because the shapes are guaranteed to match.
fpr, tpr, thresholds = roc_curve(y_true, y_pred_probs)

# Find the index where True Positive Rate (Recall) first meets or exceeds the target.
try:
    threshold_index = np.where(tpr >= target_recall)[0][0] 
except IndexError:
    print(f"Warning: Target Recall of {target_recall:.2%} was not met. Using max achievable Recall.")
    threshold_index = np.argmax(tpr)

optimal_threshold = thresholds[threshold_index]

print(f"\n--- Decision Threshold Recommendation ---")
print(f"Target Recall: {target_recall:.2%}")
print(f"Recommended Optimal Threshold: {optimal_threshold:.4f}")
print(f"Achieved Recall at this threshold: {tpr[threshold_index]:.4f}")


# --- Step 4: Evaluate Performance at the Optimal Threshold ---

# Generate hard predictions (0 or 1) using the custom optimal threshold
y_pred_hard = (y_pred_probs >= optimal_threshold).astype(int)

# CRITICAL FIX: Explicitly specify the labels [0, 1] to prevent the ValueError
class_report_dict = classification_report(
    y_true, 
    y_pred_hard, 
    target_names=class_names, 
    labels=[0, 1], 
    output_dict=True
)
cm = confusion_matrix(y_true, y_pred_hard, labels=[0, 1]) 

print(f"\n--- Classification Report at Threshold {optimal_threshold:.4f} ---")
print(classification_report(y_true, y_pred_hard, target_names=class_names, labels=[0, 1]))


# --- Step 5: Visualize Results (Confusion Matrix) ---

plt.figure(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=class_names, yticklabels=class_names)
plt.title(f'Confusion Matrix at Optimal Threshold ({optimal_threshold:.4f})')
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.tight_layout()
plt.show()

Generating predictions on the evaluation dataset...
y_true shape after concatenation: (260,)

2025-11-12 09:51:54.076406: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence

9/9 ━━━━━━━━━━━━━━━━━━━━ 10s 1s/step
y_pred_probs shape after prediction: (260,)

--- Decision Threshold Recommendation ---
Target Recall: 98.00%
Recommended Optimal Threshold: 0.3555
Achieved Recall at this threshold: 0.9904

--- Classification Report at Threshold 0.3555 ---
              precision    recall  f1-score   support

   defective       0.99      0.91      0.95       156
        good       0.88      0.99      0.93       104

    accuracy                           0.94       260
   macro avg       0.94      0.95      0.94       260
weighted avg       0.95      0.94      0.94       260

#Code generated with AI support
import random
import matplotlib.pyplot as plt
import numpy as np

# --- Dependencies Check (Assume these are defined in your preceding cells) ---
# evaluation_dataset: The batched TensorFlow dataset.
# y_pred_probs: Array of prediction probabilities (float, 0 to 1).
# y_pred_hard: Array of hard predictions (0 or 1).
# class_names: List of class labels. (We define a fallback here for reliability)
try:
    class_names = class_names 
except NameError:
    # Based on notebook_section_1.md: Class 0 is Defective, Class 1 is Good
    class_names = ['Defective (0)', 'Good (1)']

# --- Step 6: Error Gallery Analysis (Robust Image Display) ---

# Unbatch evaluation dataset and convert to numpy arrays
X_eval_display, y_eval_display = [], []

# CRITICAL FIX: Iterate through the batched dataset and prepare images for display
for x_batch, y_batch in evaluation_dataset:
    # Convert tensors to numpy arrays
    x_np = x_batch.numpy()
    y_np = y_batch.numpy()
    
    x_np_display = x_np.copy()
    
    # --- ULTRA-SAFE IMAGE CONVERSION FIX ---
    # The image data must be in the 0-255 range and of type np.uint8 for Matplotlib.
    
    # Check the maximum value to determine if the data is normalized (0-1) or already scaled (0-255)
    max_val = np.max(x_np_display)

    if max_val <= 1.05: # If max is close to 1, assume it's normalized (0-1)
        # Scale up to 0-255
        x_np_display = (x_np_display * 255.0)
    
    # Regardless of the original scaling, clip the values to the valid 0-255 range
    # and explicitly cast to np.uint8. This forces the image into the correct format.
    x_np_display = np.clip(x_np_display, 0, 255).astype(np.uint8)
        
    X_eval_display.append(x_np_display)
    y_eval_display.append(y_np)

# Concatenate all batches into a single array
X_eval = np.concatenate(X_eval_display, axis=0)
y_eval = np.concatenate(y_eval_display, axis=0).flatten()

# Sanity check
assert len(X_eval) == len(y_pred_probs) == len(y_pred_hard), \
    f"Mismatch between images ({len(X_eval)}), predictions ({len(y_pred_probs)}), or labels lengths ({len(y_pred_hard)})."

# --- Identify Error Types ---
# Class 0 is Defective, Class 1 is Good (from notebook_section_1.md)
# False Positives (Cries Wolf): True is Good (1), Predicted is Defective (0)
false_positives = np.where((y_eval == 1) & (y_pred_hard == 0))[0]

# False Negatives (Missed Defects - CRITICAL): True is Defective (0), Predicted is Good (1)
false_negatives = np.where((y_eval == 0) & (y_pred_hard == 1))[0]


print(f"False Positives (Good classified as Defective): {len(false_positives)}")
print(f"False Negatives (Defective classified as Good): {len(false_negatives)}")

# --- Helper to Display Images ---
def show_error_gallery(indices, title, n=9):
    """Display up to n samples from a given set of indices."""
    n = min(n, len(indices))
    if n == 0:
        print(f"No samples for {title}.")
        return
    
    # Use all indices if there are fewer than n, otherwise sample randomly
    indices_to_plot = indices if len(indices) <= n else random.sample(list(indices), n)
    
    cols = 3
    rows = (len(indices_to_plot) + cols - 1) // cols
    plt.figure(figsize=(12, 4 * rows))

    for i, idx in enumerate(indices_to_plot):
        plt.subplot(rows, cols, i + 1)
        
        img = X_eval[idx]
        
        # Ensure the image is correctly shaped for imshow (H, W, 3) or (H, W)
        if img.ndim == 4: 
             img = img.squeeze()
        if img.shape[-1] == 1 and img.ndim == 3: # Greyscale (H, W, 1) -> (H, W)
             plt.imshow(img.squeeze(), cmap="gray")
        elif img.ndim == 3 and img.shape[-1] == 3: # RGB (H, W, 3)
             plt.imshow(img)
        else:
             # Fallback for unexpected shapes, treat as 2D grayscale
             plt.imshow(img.squeeze(), cmap="gray")
             
        # P(Defect) is P(Class 0). The model outputs P(Class 1) which is P(Good).
        prob_defect = 1.0 - y_pred_probs[idx]
        
        plt.title(
            f"{title}\nTrue: {class_names[int(y_eval[idx])]} | "
            f"Pred: {class_names[int(y_pred_hard[idx])]}\n"
            f"P(Defect)={prob_defect:.4f}"
        )
        plt.axis("off")

    plt.suptitle(f"{title} — {len(indices_to_plot)} Samples", fontsize=16)
    plt.tight_layout()
    plt.show()


# --- Visualize Common Failure Modes ---
# Displaying all available samples for critical errors
show_error_gallery(false_negatives, "False Negatives (Missed Defects - CRITICAL)", n=6) 
show_error_gallery(false_positives, "False Positives (Good classified as Defective)", n=6)

False Positives (Good classified as Defective): 1
False Negatives (Defective classified as Good): 14

MA 707 - Problem 2 set¶

Bentley Widgets Inc. Automated QC Model: CNN Classification¶

Project Overview and Setup¶

Problem Definition: Casting Quality Control¶

Business Objective¶

Setup and Library Imports¶

2. Data Preparation and Preprocessing¶

2.1 Configuration and Constants¶

2.2 Data Loading and Train/Test Split¶

2.3 Data Augmentation¶

3. Model Training¶

3.1 Compilation and Training Convergence¶

4. Model Evaluation (Post-Training)¶

5. Decision Threshold Selection¶

6. Error Gallery Analysis¶

7. System Deployment & Improvement Strategies¶

7.1. Learning From Mistakes & Improving¶