Multivariate Time Series Classification - Human Activity Data from Wireless Sensor Network

Dataset and Overview

This dataset contains temporal data from a Wireless Sensor Network worn by an actor performing the activities: bending, cycling, lying down, sitting, standing, walking. It contains 7 folders that represent seven types of activities. In each folder, there are multiple files each of which represents an instant of a human performing an activity. Each file containis 6 time series collected from activities of the same person, which are called avg rss12, var rss12, avg rss13, var rss13, vg rss23, and ar rss23. There are 88 instances in the dataset, each of which contains 6 time series and each time series has 480 consecutive values.

This project tackles multivariate time-series classification using the UCI AReM human-activity dataset collected from a wireless sensor network (six synchronized signals per instance, 480 samples each).

Worflow

organize time series data into train/test folders
engineer time-domain features per signal—min, max, mean, median, quartiles, standard deviation, zero-crossings, cross-correlation, peak/trough counts;
segment each instance into ℓ equal chunks and extract the same features within each segment to capture within-instance dynamics;
perform class imbalance correction via undersampling majority
build and compare classifiers for both a binary task (bending vs. non-bending) and multiclass activity recognition.

Binary Classification Comparison:

a p-value-guided RFE + logistic regression pipeline
1-penalized logistic regression that performs embedded feature selection.

Multiclass Classification Comparison:

evaluate multinomial logistic regression (L1) and Naive Bayes variants, including a Gaussian NB + PCA reduction to mitigate feature correlation.

Comparison Framework

Model selection uses nested/outer cross-validation and test-set evaluation with ROC/AUC, accuracy, and confusion matrices.

1. Split into train and test folders




def remove_empty_dirs(train_dir, test_dir):
    for d in (train_dir, test_dir):
        if os.path.exists(d):
            shutil.rmtree(d)
        os.makedirs(d)

def ensure_dirs():
    for activity in os.listdir(root_dir):
        activity_path = os.path.join(root_dir, activity)
        if not os.path.isdir(activity_path):
            continue
        os.makedirs(os.path.join(train_dir, activity))
        os.makedirs(os.path.join(test_dir, activity))
        
        if activity in bending_folders:
            test_ids = {1, 2}
        else:
            test_ids = {1, 2, 3}
        split_test_train_data(activity, activity_path, test_ids)


def split_test_train_data(activity, activity_path, test_ids):
        for fname in os.listdir(activity_path):
            if not fname.lower().endswith(".csv"):
                continue
            try:
                idx = int(fname.replace("dataset", "").replace(".csv", ""))
            except ValueError:
                continue
            src = os.path.join(activity_path, fname)
            if idx in test_ids:
                dst = os.path.join(test_dir, activity, fname)
            else:
                dst = os.path.join(train_dir, activity, fname)
            shutil.copy(src, dst)

remove_empty_dirs(train_dir, test_dir)
ensure_dirs()

import os
import shutil
import pandas as pd
root_dir = "activity+recognition+system+based+on+multisensor+data+fusion+arem 2"
train_dir = os.path.join(root_dir, "train")
test_dir = os.path.join(root_dir, "test")
bending_folders = ("bending1", "bending2")

file_path = '/Users/mishkin/Desktop/USC Classes/EE559_local/Homework4/activity+recognition+system+based+on+multisensor+data+fusion+arem 2/bending2/dataset1.csv'
df = pd.read_csv(file_path, comment='#', header=None, names=colnames)

with open(file_path, 'r') as f:
    lines = [next(f) for _ in range(10)]

for i, line in enumerate(lines, start=1):
    print(f"Line {i}: {line.rstrip()}")

Line 1: # Task: bending2
Line 2: # Frequency (Hz): 20
Line 3: # Clock (millisecond): 250
Line 4: # Duration (seconds): 120
Line 5: # Columns: time,avg_rss12,var_rss12,avg_rss13,var_rss13,avg_rss23,var_rss23
Line 6: 0,23.75,0.43,24.00,0.00,24.67,0.47
Line 7: 250,22.67,0.94,24.00,0.00,24.67,0.94
Line 8: 500,21.33,0.47,24.00,0.00,24.75,0.83
Line 9: 750,21.00,0.00,24.00,0.00,25.25,1.09
Line 10: 1000,23.00,0.71,23.75,0.43,24.75,0.83

Clean Dataset 4 for bending 2 due to parsing error

def col4_bending2():
    cols = ['time',
            'avg_rss12','var_rss12',
            'avg_rss13','var_rss13',
            'avg_rss23','var_rss23']

    df_clean = pd.read_csv(
        '/Users/mishkin/Desktop/USC Classes/EE559_local/Homework4/train/bending2/dataset4.csv',   # ← your actual path here
        comment='#',header=None, names=cols,delim_whitespace=True                    )

    return df_clean

1 - Time Series Features

time series capture the behavior of a signal over a specificed window collected at certain intervals. There is no domain transformation involved in the time series data. Often, statistical information is collecte about the signal:

min is used to show the smallest bound of a signal
max is used to show the upper bound of a signal
mean of the signal over a specific time window.
standard devation
Zero-Crossings: counts the instances when the signal transits through a zero value.
Cross-Correlation: measure similarity between two signals relative to delay
Number of Peaks and Troughs

2 - Extraction

Extracting the time-domain features minimum, maximum, mean, median, standard deviation, first quartile, and third quartile for all of the 6 time series in each instance.

instance_dict = {}
colnames = ['time', 'avg_rss12', 'var_rss12', 'avg_rss13', 'var_rss13', 'avg_rss23', 'var_rss23']
#list index: time-series
#format: {activity_dataset_num1: [[min, max, mean, median, standard_dev, 1st_quartile, 3rd_quartile]]}

instance_dict = {}
colnames = ['time', 'avg_rss12', 'var_rss12', 'avg_rss13', 'var_rss13', 'avg_rss23', 'var_rss23']
bend4_special = '/Users/mishkin/Desktop/USC Classes/EE559_local/Homework4/train/bending2/dataset4.csv'

#get all  colmn statistics for a dataframe sample
def get_all_stats(df):
    stats = []
    for col in df.columns[1:]:
        min_val = df[col].min()
        max_val = df[col].max()
        mean_val = df[col].mean()
        median_val = df[col].median()
        std_dev = df[col].std()
        first_quartile = df[col].quantile(0.25)
        third_quartile = df[col].quantile(0.75)
        stats.append([min_val, max_val, mean_val, median_val, std_dev, first_quartile, third_quartile])
    return stats



def iterate_over_files():
    for activity in os.listdir(root_dir):
        activity_path = os.path.join(root_dir, activity)
        if not os.path.isdir(activity_path):
            continue
        for fname in os.listdir(activity_path):
            if not fname.lower().endswith(".csv"):
                continue
            file_path = os.path.join(activity_path, fname)
            if file_path == bend4_special:
                df = col4_bending2() # Special case for bending2 dataset4
            else:
                df = pd.read_csv(file_path,comment='#', header=None, names=colnames,engine='python', on_bad_lines='skip')
            stats = get_all_stats(df)
            fname_key = f"{activity}_{fname.replace('.csv', '')}"
            print(f"Stats for {fname_key}: {stats}")
            instance_dict[fname_key] = stats  

iterate_over_files()

display(instance_dict)

stats = ['min', 'max', 'mean', 'median', 'std', '1st_quart', '3rd_quart']
n_series = 6

records = []
index = []
for key, arr in instance_dict.items():
    flat_arr = []
    for time_series_features in arr:     
        flat_arr.extend(time_series_features)  
    records.append(flat_arr)
    index.append(key)

columns = []
for i in range(1, n_series+1):
    for stat in stats:
        columns.append(f'{stat}{i}')

df = pd.DataFrame(records, index=index, columns=columns)
df.index.name = 'Instance'

display(df)

	min1	max1	mean1	median1	std1	1st_quart1	3rd_quart1	min2	max2	mean2	...	std5	1st_quart5	3rd_quart5	min6	max6	mean6	median6	std6	1st_quart6	3rd_quart6
Instance
bending1_dataset7	36.25	48.00	43.969125	44.50	1.618364	43.310	44.67	0.0	1.50	0.413125	...	3.318301	20.5000	23.7500	0.0	2.96	0.555312	0.490	0.487826	0.0000	0.830
bending1_dataset6	37.00	48.00	43.454958	43.25	1.386098	42.500	45.00	0.0	1.58	0.378083	...	2.488862	22.2500	24.0000	0.0	5.26	0.679646	0.500	0.622534	0.4300	0.870
bending1_dataset4	33.00	47.75	42.179812	43.50	3.670666	39.150	45.00	0.0	3.00	0.696042	...	3.849448	30.4575	36.3300	0.0	2.18	0.613521	0.500	0.524317	0.0000	1.000
bending1_dataset5	33.00	45.75	41.678063	41.75	2.243490	41.330	42.75	0.0	2.83	0.535979	...	2.411026	28.4575	31.2500	0.0	1.79	0.383292	0.430	0.389164	0.0000	0.500
bending1_dataset1	37.25	45.00	40.624792	40.50	1.476967	39.250	42.00	0.0	1.30	0.358604	...	2.188449	33.0000	36.0000	0.0	1.92	0.570583	0.430	0.582915	0.0000	1.300
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
cycling_dataset12	18.50	44.25	35.752354	36.00	4.614802	33.000	39.33	0.0	12.60	3.328104	...	3.120057	14.0000	18.0625	0.0	9.39	3.069667	2.770	1.748326	1.7975	4.060
cycling_dataset15	24.25	45.00	37.177042	36.25	3.581301	34.500	40.25	0.0	8.58	2.374208	...	2.890347	17.9500	21.7500	0.0	9.34	2.921729	2.500	1.852600	1.5000	3.900
cycling_dataset14	23.33	43.50	36.248768	36.75	3.824632	33.415	39.25	0.0	9.71	2.737307	...	3.617405	15.7500	21.0000	0.0	11.15	3.532463	3.110	1.965267	2.1700	4.625
cycling_dataset8	26.25	44.25	36.957458	36.29	3.434863	34.500	40.25	0.0	8.64	2.420083	...	2.851673	14.0000	18.2500	0.0	8.34	2.934625	2.525	1.631380	1.6600	4.030
cycling_dataset9	27.75	44.67	37.142359	36.33	3.762442	34.000	40.50	0.0	10.76	2.422150	...	2.687173	15.0000	18.7500	0.0	8.75	2.825720	2.590	1.637312	1.5900	3.740

88 rows × 42 columns

3 - Standard Deviation

Estimating the standard deviation of each of the time-domain features extracted from the data. We build a 90% bootsrap confidence interval for the standard deviation of each feature.

#bootstrap sample
#We need to re-create 1000 samples of each feature. each value can be drawn from original feature with replacement 
col = [1,2,3,4,6,3,2,3]
def bootstrap(col):
    bootstrap_list = []
    bootstrap_arr = np.random.choice(col, size= (len(col), 1000), replace= True)
    #compute the standard deviation of each column
    bootstrap_std = bootstrap_arr.std(axis=0, ddof =1)
    lower_ci = np.quantile(bootstrap_std, 0.05)
    upper_ci = np.quantile(bootstrap_std, 0.95)
    return [bootstrap_std.mean(),lower_ci, upper_ci]

train_stats_std_b = train_stats.apply(bootstrap, axis =0)
train_stats_std = train_stats.std(axis =0, ddof=1)
train_stats_std_b.rename(index= {0: "b_std", 1: "l_ci", 2: "u_ci"}, inplace=True)
train_stats_std.name = 'std'
compare_std = pd.concat([train_stats_std ,train_stats_std_b.T], axis=1, )
display(compare_std)


rows = []
for feat in train_stats.columns:
    col = train_stats[feat]
    bs = np.random.choice(col, size=(len(col),1000), replace=True)
    sds = bs.std(axis=0, ddof=1)
    for val in sds:
        rows.append({'feature': feat, 'bootstrap_sd': val})

df_boot = pd.DataFrame(rows)
display(df_boot)
# Use FacetGrid to plot each feature's distribution side-by-side
g = sns.FacetGrid(df_boot, col='feature', col_wrap=6, sharex=False, sharey=False, height=2.5)
g.map(sns.histplot, 'bootstrap_sd', stat='density', kde=True)


for ax in g.axes.flat:
    feat = ax.get_title().split(' = ')[1]
    sds = df_boot.loc[df_boot['feature']== feat, 'bootstrap_sd']
    l_ci = np.quantile(sds, .05)
    h_ci = np.quantile(sds, .95)
    ax.axvline(l_ci, color='red', linestyle='--')
    ax.axvline(h_ci, color ='red', linestyle ='--')
plt.tight_layout()
plt.show()

display(ci_df)

	feature	obs_sd	ci_lower	ci_upper
0	min1	9.515445	8.272678	10.785692
1	max1	4.369322	3.338807	5.297051
2	mean1	5.305297	4.710636	5.894650
3	median1	5.409056	4.804663	6.008572
4	std1	1.762087	1.569118	1.948228
5	1st_quart1	6.118809	5.573342	6.657207
6	3rd_quart1	5.109643	4.335365	5.856706
7	min2	0.000000	0.000000	0.000000
8	max2	5.033882	4.623830	5.403929
9	mean2	1.565228	1.395385	1.703508
10	median2	1.404245	1.236852	1.541850
11	std2	0.879099	0.801783	0.941140
12	1st_quart2	0.940994	0.829606	1.035603
13	3rd_quart2	2.113288	1.892708	2.293791
14	min3	2.939616	2.761722	3.106292
15	max3	4.847358	4.175199	5.473759
16	mean3	3.985389	3.421560	4.490846
17	median3	4.013397	3.438490	4.538375
18	std3	0.941275	0.762230	1.124497
19	1st_quart3	4.196608	3.637085	4.708010
20	3rd_quart3	4.147858	3.548299	4.691338
21	min4	0.000000	0.000000	0.000000
22	max4	2.171183	1.974183	2.358420
23	mean4	1.159533	1.075816	1.221463
24	median4	1.139456	1.057452	1.201339
25	std4	0.455671	0.421401	0.486129
26	1st_quart4	0.838600	0.774024	0.889930
27	3rd_quart4	1.543658	1.433446	1.625677
28	min5	6.089107	4.390495	7.519351
29	max5	5.708524	4.745236	6.577859
30	mean5	5.643204	4.422241	6.720106
31	median5	5.780655	4.516529	6.903632
32	std5	1.019078	0.810070	1.221376
33	1st_quart5	6.061727	4.795095	7.202457
34	3rd_quart5	5.500200	4.367876	6.522408
35	min6	0.045577	0.000000	0.078476
36	max6	2.504568	2.251554	2.761453
37	mean6	1.148308	1.061613	1.214289
38	median6	1.080284	0.994092	1.147727
39	std6	0.514702	0.479660	0.545218
40	1st_quart6	0.754364	0.689663	0.806713
41	3rd_quart6	1.515057	1.401202	1.600523

scatterplot — Scatterplot of aggregate time domain features

4 - Best Time domain features

Features that have larger standard deviations are often more informative for distinguishing between different conditions. We would also like features that provide characteristic representation of the signal. Therefore, we can use:

min: captures the lower extremes of the signal
max: captures the upper extremes of the signal
mean: captures the central tendancy of the signal

5 - Scatterplots of Best Time Domain Features

skip over dataset 1,2, in bending, and 1,2,3 in all others to seperate training from test instances

import matplotlib.pyplot as plt
series = [1,2,6]
feat = ['min', 'max', 'mean']
cols = [ '_'.join([ft,str(ser)]) for ft in feat for ser in series]
rows = [f"bending1_{x}" for x in range(3, 8)]
rows = [f"bending2_{x}" for x in range(3, 8)]
train_stats_main_ft = train_stats[cols]
train_stats_main_ft.loc[[f"bending1_dataset{x}" for x in range(3,8)] , 'category'] = 'bending'
train_stats_main_ft.loc[[f"bending2_dataset{x}" for x in range(3,7)] , 'category'] = 'bending'
train_stats_main_ft['category'].fillna("other", inplace=True)
display(train_stats_main_ft)
sns.pairplot(train_stats_main_ft, hue = 'category')

Time Series Classification: Binary and Multiclass Classification

1 - Chunking Time Series Instances and Plotting

We break each time series in our training set into two (approximately) equal length time series. Now instead of 6 time series for each of the training instances, we have 12 time series for each training instance. Again, we depict scatter plots of the features extracted from both parts of the time series 1,2, and 6.

the scatterplot of both the classes (bending and non-bending) appear to be slightly more separable and eaiser to see when we split the time series data and compute the min, max, median, std, 25th, and 75th percentile.

root_dir = "/Users/mishkin/Desktop/USC Classes/EE559_local/Homework4"
train_dir = os.path.join(root_dir, "train")
test_dir = os.path.join(root_dir, "test")

def iterate_training_files():
    colnames = ['time','avg_rss12','var_rss12','avg_rss13','var_rss13','avg_rss23','var_rss23']
    instance_dict = {}
    for activity in os.listdir(train_dir):
        activity_path = os.path.join(train_dir, activity)
        if not os.path.isdir(activity_path):
            continue
        for fname in os.listdir(activity_path):
            if not fname.lower().endswith(".csv"):
                continue
            file_path = os.path.join(activity_path, fname)
            if file_path == bend4_special:
                df = col4_bending2() # Special case for bending2 dataset4
            else:
                df = pd.read_csv(file_path,comment='#', header=None, names=colnames,engine='python', on_bad_lines='skip')
            fname_key = f"{activity}_{fname.replace('.csv', '')}"
            instance_dict[fname_key] = df
    return instance_dict

dataframes = iterate_training_files()
print(len(dataframes))

def get_stats(dataframes):
    stats_dict = {}
    for fname, (df1, df2) in dataframes.items():
            stats_1 = get_all_stats(df1)
            stats_2 = get_all_stats(df2)
            stats_dict[f"{fname}_split1"] = stats_1  
            stats_dict[f"{fname}_split2"] = stats_2  
    return stats_dict

stat_dict = get_stats(dataframes)



def flatten_stats(stats_dict):
        stats = ['min', 'max', 'mean', 'median', 'std', '1st_quart', '3rd_quart']
        n_series = 6
        records = []
        index = []
        for key, arr in stat_dict.items():
                flat_arr = []
                for time_series_features in arr:     
                        flat_arr.extend(time_series_features)  
                records.append(flat_arr)
                index.append(key)

        columns = []
        for i in range(1, n_series+1):
                for stat in stats:
                        columns.append(f'{stat}{i}')
        df = pd.DataFrame(records, index=index, columns=columns)
        df.index.name = 'Instance'
        return df

df = flatten_stats(stat_dict)
display(df)

	min1	max1	mean1	median1	std1	1st_quart1	3rd_quart1	min2	max2	mean2	...	std5	1st_quart5	3rd_quart5	min6	max6	mean6	median6	std6	1st_quart6	3rd_quart6
Instance
bending1_dataset7_split1	36.67	45.00	43.486208	43.710	1.281976	42.33	44.50	0.0	1.30	0.378667	...	4.271610	19.2500	24.00	0.0	2.28	0.524875	0.470	0.486847	0.0000	0.8300
bending1_dataset7_split2	36.25	48.00	44.452042	44.500	1.771251	44.25	44.75	0.0	1.50	0.447583	...	1.929494	21.0000	23.50	0.0	2.96	0.585750	0.500	0.487918	0.4225	0.8300
bending1_dataset6_split1	39.00	48.00	44.117042	45.000	1.359836	43.50	45.00	0.0	1.30	0.250042	...	2.967487	21.7500	24.25	0.0	5.26	0.815417	0.710	0.718016	0.4300	1.1200
bending1_dataset6_split2	37.00	46.50	42.792875	42.670	1.058792	42.50	43.00	0.0	1.58	0.506125	...	1.881721	22.5000	23.75	0.0	4.06	0.543875	0.500	0.473445	0.4300	0.7100
bending1_dataset4_split1	33.75	47.75	43.278875	45.000	3.473355	42.00	45.25	0.0	3.00	0.673292	...	1.650105	36.0000	37.00	0.0	1.53	0.640958	0.765	0.554542	0.0000	1.2200
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
cycling_dataset14_split2	23.33	43.50	36.160000	36.500	3.991725	33.31	39.27	0.0	9.63	2.775958	...	3.427143	16.1875	21.00	0.0	11.15	3.533958	3.085	1.994636	2.1700	4.5075
cycling_dataset8_split1	26.25	43.40	36.865417	36.250	3.462793	34.31	40.00	0.0	7.46	2.387250	...	3.009900	13.7500	18.25	0.0	8.26	2.964792	2.500	1.655499	1.6550	4.0375
cycling_dataset8_split2	29.25	44.25	37.049500	36.415	3.411449	34.50	40.25	0.0	8.64	2.452917	...	2.681881	14.3300	18.00	0.0	8.34	2.904458	2.570	1.609793	1.6900	3.7700
cycling_dataset9_split1	27.75	44.00	37.132636	36.670	3.808255	34.00	40.33	0.0	10.76	2.388870	...	2.579995	14.7500	18.75	0.0	8.75	2.917782	2.600	1.684056	1.6800	3.9000
cycling_dataset9_split2	31.00	44.67	37.152042	36.250	3.724195	33.81	40.50	0.0	7.79	2.455292	...	2.784775	15.0000	18.75	0.0	7.93	2.734042	2.550	1.587605	1.5000	3.7050

138 rows × 42 columns


def is_test_instance(idx):
    activity, rest = idx.split('_dataset')
    ds = rest.split('_')[0]
    if activity in ('bending1','bending2'):
        return ds in ('1','2')
    else:
        return ds in ('1','2','3')

# build your masks
test_mask   = df.index.map(is_test_instance)
train_mask  = ~test_mask
is_bending  = df.index.str.startswith('bending')
is_split1   = df.index.str.endswith('_split1')
is_split2   = df.index.str.endswith('_split2')

series = [1,2,6]
stats  = ['min','mean','max']
pairs  = [(i,j) for i in series for j in series if i<j]

for stat in stats:
    for i,j in pairs:
        x = f'{stat}{i}'
        y = f'{stat}{j}'

        plt.figure(figsize=(6,5))
        # bending split1
        plt.scatter(
            df.loc[train_mask & is_bending & is_split1, x],
            df.loc[train_mask & is_bending & is_split1, y],
            c='red', marker='o', label='bending split1', alpha=0.7
        )
        # bending split2
        plt.scatter(
            df.loc[train_mask & is_bending & is_split2, x],
            df.loc[train_mask & is_bending & is_split2, y],
            c='red', marker='^', label='bending split2', alpha=0.7
        )
        # other split1
        plt.scatter(
            df.loc[train_mask & ~is_bending & is_split1, x],
            df.loc[train_mask & ~is_bending & is_split1, y],
            c='blue', marker='o', label='other split1', alpha=0.7
        )
        # other split2
        plt.scatter(
            df.loc[train_mask & ~is_bending & is_split2, x],
            df.loc[train_mask & ~is_bending & is_split2, y],
            c='blue', marker='^', label='other split2', alpha=0.7
        )

        plt.xlabel(x)
        plt.ylabel(y)
        plt.title(f"{stat} of series {i} vs {j} — train only")
        plt.legend(loc='best')
        plt.tight_layout()
        plt.show()

Time Series Segmentation Comparison: RFE versus Logistic Regression + L1

Binary Classification with RFE + P-value Model Approach

We break each time series in our training set into l ∈ {1, 2, . . . , 20} time series of approximately equal length and use logistic regression to solve the binary classification problem, using time-domain features.

we calculate the p-values for our logistic regression parameters in each model corresponding to each value of l and refit a logistic regression model using our pruned set of features.
Use 5-fold cross-validation to determine the best value of the pair (l, p), where p is the number of features used in recursive feature elimination.
Since we want to let our algorithm gain the knowledge of the various predictors available, the correct way would be to do the RFE along with Cross Validation.
feature dimension d(42) grows with ℓ as 42ℓ

1- SKLEARN Transformer Class

ℓ: Number of splits for each time series instance
dynamically build training dataframes of size (69, 42 * ℓ)

source for transformer object

import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin


class TimeSeriesFeaturizer(BaseEstimator, TransformerMixin):
    def __init__(self, n_splits=1):
        self.n_splits = n_splits
        self._stat_funcs = [
            np.nanmin,
            np.nanmax,
            np.nanmean,
            np.nanmedian,
            lambda x: np.nanstd(x, ddof=1),
            lambda x: np.nanpercentile(x, 25),
            lambda x: np.nanpercentile(x, 75),
        ]

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        rows = []
        for df in X:
            df_copy = df.copy()
            data = df_copy.iloc[:, 1:].to_numpy(dtype=float)  # shape (T, n_series)
            T, n_series = data.shape
            inst_feats = []
            #for each time series, compute the 7 features
            for idx in range(n_series):
                #select the column from corrspdongin to the time series data
                time_series = data[:, idx]
                #split the time series data into n_splits 
                splits = np.array_split(time_series, self.n_splits)
                #in every split, #compute every feature (7) for this time series channel (out of 6)
                for split in splits:
                    for fn in self._stat_funcs:
                        #we will have a list of 7 features  * l_splits for each time series
                        inst_feats.append(fn(split))
                #repeat to appen the next 7 features  * l_splits for all time series (6)
            #each row (sample) in the output will contain 7*6* l_splits features 
            rows.append(inst_feats)
        return np.asarray(rows)

raw_dfs_train = []   
y = []   
for key, df in dataframes.items():
    raw_dfs_train.append(df)  
    y.append(1 if key.startswith('bending') else 0)
y = np.array(y, dtype=int)

print(f"Number of DataFrames in raw_dfs: {len(raw_dfs_train)}")
print("Columns in raw_dfs[0]:", list(raw_dfs_train[0].columns))

import numpy as np
import sys
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.feature_selection import RFECV
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline


pipe = Pipeline([
    ('featurize', TimeSeriesFeaturizer()),
    ('selector', RFECV(
        estimator=LogisticRegression(solver='liblinear', C=sys.maxsize),
        step=1,
        cv=StratifiedKFold(5, shuffle=True, random_state=1),  
        scoring='roc_auc',
        n_jobs=-1,
        min_features_to_select=1
    ))
])

param_grid = {'featurize__n_splits': list(range(1,21))}
outer_cv   = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)

grid = GridSearchCV(pipe, param_grid=param_grid, cv=outer_cv, scoring='roc_auc',n_jobs=-1,refit=True)
grid.fit(raw_dfs_train, y)

print("Best ℓ:", grid.best_params_['featurize__n_splits'])
print("Best p:", grid.best_estimator_.named_steps['selector'].n_features_)
print("Outer‐CV ROC‐AUC:", grid.best_score_)

Best ℓ: 2

Best p: 4

Outer‐CV ROC‐AUC: 1.0

final_model = grid.best_estimator_

2 - RFE + p-value Classifier: AUC and ROC on Training Data

We report the confusion matrix and show the ROC and AUC for our classifier on train data. We report the parameters of our logistic regression βi ’s as well as the p-values associated with them.

from sklearn.metrics import confusion_matrix, roc_curve, roc_auc_score

y_pred_prob = final_model.predict_proba(raw_dfs_train)[:, 1]
y_pred = final_model.predict(raw_dfs_train)

cm = confusion_matrix(y, y_pred)
print("Confusion matrix (train):")
print(cm)

fpr, tpr, thresholds = roc_curve(y, y_pred_prob)
auc_score = roc_auc_score(y, y_pred_prob)

plt.figure(figsize=(6,5))
plt.plot(fpr, tpr, label=f"AUC = {auc_score:.3f}")
plt.plot([0,1], [0,1], '--', label="chance")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve (Train)")
plt.legend(loc="lower right")
plt.tight_layout()
plt.show()

Confusion matrix (train):
[[60  0]
 [ 0  9]]

Confusion Matrix (Train)	Predicted 0	Predicted 1
Actual 0	60	0
Actual 1	0	9

import statsmodels.api as sm

X_full = final_model.named_steps['featurize'].transform(raw_dfs_train)
mask   = final_model.named_steps['selector'].support_
X_sel  = X_full[:, mask]
X_sm   = sm.add_constant(X_sel)

logit_model = sm.Logit(y, X_sm)
result=logit_model.fit(maxiter = 10)
result.summary()

Warning: Maximum number of iterations has been exceeded.
         Current function value: 0.000022
         Iterations: 10

/Users/mishkin/Desktop/USC Classes/EE559_local/.venv/lib/python3.11/site-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warnings.warn("Maximum Likelihood optimization failed to "

Logit Regression Results
Dep. Variable:	y	No. Observations:	69
Model:	Logit	Df Residuals:	43
Method:	MLE	Df Model:	25
Date:	Tue, 15 Jul 2025	Pseudo R-squ.:	0.9999
Time:	09:47:11	Log-Likelihood:	-0.0015323
converged:	False	LL-Null:	-26.718
Covariance Type:	nonrobust	LLR p-value:	0.0007868

	coef	std err	z	P>\|z\|	[0.025	0.975]
const	-5.2870	621.018	-0.009	0.993	-1222.459	1211.885
x1	0.1499	29.832	0.005	0.996	-58.319	58.619
x2	1.6624	103.556	0.016	0.987	-201.304	204.628
x3	-1.5198	77.650	-0.020	0.984	-153.710	150.671
x4	-1.4596	128.997	-0.011	0.991	-254.288	251.369
x5	-0.7231	123.478	-0.006	0.995	-242.735	241.289
x6	1.5962	64.833	0.025	0.980	-125.475	128.668
x7	-1.4437	30.268	-0.048	0.962	-60.768	57.880
x8	0.7432	35.295	0.021	0.983	-68.435	69.921
x9	2.4598	104.890	0.023	0.981	-203.121	208.041
x10	-0.3915	43.320	-0.009	0.993	-85.297	84.513
x11	0.7134	86.018	0.008	0.993	-167.878	169.305
x12	-0.7760	82.758	-0.009	0.993	-162.979	161.427
x13	0.4047	26.193	0.015	0.988	-50.932	51.742
x14	0.2386	28.401	0.008	0.993	-55.426	55.903
x15	11.8560	330.609	0.036	0.971	-636.125	659.838
x16	-1.8487	132.386	-0.014	0.989	-261.321	257.624
x17	-5.7389	167.384	-0.034	0.973	-333.806	322.328
x18	-3.4516	135.440	-0.025	0.980	-268.909	262.006
x19	-0.3535	14.827	-0.024	0.981	-29.413	28.706
x20	1.8497	104.655	0.018	0.986	-203.270	206.969
x21	-4.3567	88.373	-0.049	0.961	-177.564	168.851
x22	-1.4647	104.504	-0.014	0.989	-206.289	203.359
x23	0.4390	35.094	0.013	0.990	-68.345	69.223
x24	1.2981	52.323	0.025	0.980	-101.253	103.849
x25	-7.9668	158.703	-0.050	0.960	-319.019	303.086

Possibly complete quasi-separation: A fraction 0.94 of observations can be
perfectly predicted. This might indicate that there is complete
quasi-separation. In this case some parameters will not be identified.

3 - RFE + p-value Classifier on Test Set

Testing the classifier on the test set. We transform the test set into the same number of time series pur training set was broke into based on the rsults of RFE using ℓ = 4.

Classifier was tested using the features extracted from the test set.
Compared the accuracy on the test set with the cross-validation accuracy we obtained previously.


def iterate_testing_files():
    colnames = ['time','avg_rss12','var_rss12','avg_rss13','var_rss13','avg_rss23','var_rss23']
    test_dict = {}
    for activity in os.listdir(test_dir):
        activity_path = os.path.join(test_dir, activity)
        if not os.path.isdir(activity_path):
            continue
        for fname in os.listdir(activity_path):
            if not fname.lower().endswith('.csv'):
                continue
            file_path = os.path.join(activity_path, fname)
            if file_path == bend4_special:
                df = col4_bending2()
            else:
                df = pd.read_csv(file_path,comment='#',header=None, names=colnames, engine='python', on_bad_lines='skip')
            key = f"{activity}_{fname.replace('.csv','')}"
            test_dict[key] = df
    return test_dict

test_dataframes = iterate_testing_files()

raw_dfs_test = []
y_test       = []
for key, df in test_dataframes.items():
    raw_dfs_test.append(df)
    y_test.append(1 if key.startswith('bending') else 0)
y_test = np.array(y_test, dtype=int)

test_accuracy = final_model.score(raw_dfs_test, y_test)
print(f"Test set accuracy: {test_accuracy:.3f}")

from sklearn.metrics import confusion_matrix
y_pred_test = final_model.predict(raw_dfs_test)
cm_test = confusion_matrix(y_test, y_pred_test, labels=[0,1])
print("Confusion matrix (test):")
print(cm_test)

print(f"Mean outer‐CV accuracy was {np.mean(outer_scores):.3f}")
print(f"Test accuracy is           {test_accuracy:.3f}")

Metric	Value
Test accuracy	1.000
Mean outer-CV accuracy	0.971

Confusion Matrix	Predicted 0	Predicted 1
Actual 0	15	0
Actual 1	0	4

6 - Observations

Peformance:

The test accuracy is very similar to the cross-validation accuracy.

Well-seperated classes?

The classes are not not well separated; this is why we had to use the stratified split to deal with the class imbalance and reduce instability in the logistic regression parameter calculations.

7 - Imbalance + Case Control

There are 15 positive anf 4 negative samples in the test set. and 60 positive(bending), while only 9 negative in the training set.
we mst undersample the majority class (non-bending) down to the size of the minority (bending)

Given the above, we train a logistic regression model on a case-controlled training set to see whether we can get better results.

raw_dfs_train = []   
y = []   
for key, df in dataframes.items():
    raw_dfs_train.append(df)  
    y.append(1 if key.startswith('bending') else 0)
y = np.array(y, dtype=int)

print(f"Number of DataFrames in raw_dfs: {len(raw_dfs_train)}")
print("Columns in raw_dfs[0]:", list(raw_dfs_train[0].columns))

raw_dfs_test = []
y_test       = []
for key, df in test_dataframes.items():
    raw_dfs_test.append(df)
    y_test.append(1 if key.startswith('bending') else 0)
y_test = np.array(y_test, dtype=int)

Number of DataFrames in raw_dfs: 69
Columns in raw_dfs[0]: ['time', 'avg_rss12', 'var_rss12', 'avg_rss13', 'var_rss13', 'avg_rss23', 'var_rss23']

from sklearn.utils import resample
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, roc_curve, roc_auc_score
import numpy as np
import matplotlib.pyplot as plt

featurizer = final_model.named_steps['featurize']
selector = final_model.named_steps['rfe']
X_train_full = featurizer.transform(raw_dfs_train)
mask = selector.support_

X_train_sel  = X_train_full[:, mask]
X_test_full  = featurizer.transform(raw_dfs_test)
X_test_sel = X_test_full[:, mask]

X_maj = X_train_sel[y==0]
y_maj = y[y==0]
X_min = X_train_sel[y==1]
y_min = y[y==1]

X_maj_down, y_maj_down = resample(X_maj, y_maj, replace=False,n_samples=len(y_min),random_state=42)
X_bal = np.vstack((X_maj_down, X_min))
y_bal = np.concatenate((y_maj_down, y_min))

clf = LogisticRegression(solver='liblinear')
clf.fit(X_bal, y_bal)


y_test_prob = clf.predict_proba(X_test_sel)[:,1]
y_test_pred = clf.predict(X_test_sel)

cm = confusion_matrix(y_test, y_test_pred, labels=[0,1])
print("Confusion matrix (test, case–control):")
print(cm)
fpr, tpr, _ = roc_curve(y_test, y_test_prob)
auc_score  = roc_auc_score(y_test, y_test_prob)
plt.figure(figsize=(5,5))
plt.plot(fpr, tpr, label=f"AUC = {auc_score:.3f}")
plt.plot([0,1],[0,1],'--',label="chance")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve (Test, case–control)")
plt.legend(loc="lower right")
plt.tight_layout()
plt.show()

Confusion matrix (test, case–control):
[[15  0]
 [ 0  4]]

y_bal_pred = clf.predict(X_bal)
y_bal_prob = clf.predict_proba(X_bal)[:, 1]

cm_bal = confusion_matrix(y_bal, y_bal_pred, labels=[0,1])
print("Confusion matrix (balanced train):")
print(cm_bal)
fpr_bal, tpr_bal, _ = roc_curve(y_bal, y_bal_prob)
auc_bal = roc_auc_score(y_bal, y_bal_prob)

plt.figure(figsize=(5,5))
plt.plot(fpr_bal, tpr_bal, label=f"AUC = {auc_bal:.3f}")
plt.plot([0,1], [0,1], '--', label="chance")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve (Balanced Training Set)")
plt.legend(loc="lower right")
plt.tight_layout()
plt.show()

Confusion matrix (balanced train):
[[9 0]
 [0 9]]

Binary Classification Using L1-penalized logistic regression

1 - Time Series Segmentation and L1 Binary Regression Model

import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.linear_model import LogisticRegressionCV
from sklearn.metrics import accuracy_score


pipe_l1 = Pipeline([
    ('featurize', TimeSeriesFeaturizer()),
    ('logreg', LogisticRegressionCV(penalty='l1', solver='liblinear', cv=5,scoring='accuracy', n_jobs=-1, max_iter=50))
])


param_grid = {'featurize__n_splits': list(range(1, 21)),}
outer_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)

grid_l1 = GridSearchCV(
    pipe_l1,
    param_grid=param_grid,
    cv=outer_cv,          
    scoring='accuracy',
    n_jobs=-1,
    refit=True
)


grid_l1.fit(raw_dfs_train, y)

best_l = grid_l1.best_params_['featurize__n_splits']
best_C = grid_l1.best_estimator_.named_steps['logreg'].C_[0]

print(f"Best number of splits ℓ: {best_l}")
print(f"Best inverse‐penalty C:   {best_C:.2e}")
print(f"Outer‐CV accuracy:        {grid_l1.best_score_:.3f}")

X_test_feats = grid_l1.best_estimator_.named_steps['featurize'].transform(raw_dfs_test)
mask = grid_l1.best_estimator_.named_steps['logreg'].coef_.ravel() != 0
X_test_sel = X_test_feats[:, mask]

# Predict & report accuracy
y_pred_test  = grid_l1.best_estimator_.named_steps['logreg'].predict(X_test_feats)
test_acc  = accuracy_score(y_test, y_pred_test)
print(f"Test accuracy (L1 model): {test_acc:.3f}")

Best number of splits ℓ: 1

Best inverse‐penalty C: 3.59e-01

Outer‐CV accuracy: 0.986

Test accuracy (L1 model): 1.000

Overall Comparison of Binary Classifiers

P-value + RFE often achieves perfect separation on the training set as notes with the AUC being close to 1 but tends to overfit to the trainign data in thise case. The likelihood function becomes unbounded due to perfect separation, and the p-values are shown as 1 – counterintuitive to model coeff being statistically significant.

L1 was way easier to implement because it handles the feature seelction automatically and most libraris has build in support for l1-regression. The p values are harder to calclate and require R.

Multiclass Classifier

we build an L1- penalized multinomial regression model to classify all activities.

1 - Time Series Segmentation + Multinomial Regression Model

from sklearn.preprocessing import LabelEncoder

raw_dfs_train, y_train = [], []
for key, df in dataframes.items():
    raw_dfs_train.append(df)
    activity = key.split('_dataset')[0]
    y_train.append(activity)

raw_dfs_test, y_test = [], []
for key, df in test_dataframes.items():
    raw_dfs_test.append(df)
    activity = key.split('_dataset')[0]
    y_test.append(activity)

le = LabelEncoder().fit(y_train)  
y_train_enc = le.transform(y_train)
y_test_enc  = le.transform(y_test)
classes     = le.classes_     
n_classes   = len(classes)

pipe_multi = Pipeline([
    ('featurize', TimeSeriesFeaturizer()), 
    ('logreg',   LogisticRegressionCV(solver='saga', multi_class='multinomial', cv=5,scoring='accuracy',n_jobs=-1, max_iter = 2000))
])

outer_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
param_grid = {'featurize__n_splits': list(range(1,21))}

grid_multi = GridSearchCV(
    pipe_multi,
    param_grid=param_grid,
    cv=outer_cv,   
    scoring='accuracy',
    n_jobs=-1,
    refit=True, 
)

grid_multi.fit(raw_dfs_train, y_train_enc)

best_l  = grid_multi.best_params_['featurize__n_splits']
best_C  = grid_multi.best_estimator_.named_steps['logreg'].C_[0]
cv_accuracy  = grid_multi.best_score_

print(f"Best ℓ = {best_l}, best C = {best_C:.2e}, outer‐CV acc = {cv_accuracy:.3f}")

Best ℓ = 1,

Best C = 2.78e+00,

Outer‐CV acc = 0.798

2 - ROC and Confustion Matrix

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
test_acc = grid_multi.score(raw_dfs_test, y_test_enc)
print(f"Test‐set accuracy (multiclass): {test_acc:.3f}")
y_pred_test = grid_multi.predict(raw_dfs_test)
cm = confusion_matrix(y_test_enc, y_pred_test, labels=range(n_classes))
disp = ConfusionMatrixDisplay(cm, display_labels=classes)
disp.plot(cmap="Blues", xticks_rotation="vertical")

Test‐set accuracy (multiclass): 0.789

from sklearn.metrics import roc_curve, roc_auc_score
from sklearn.preprocessing import label_binarize
y_test_bin = label_binarize(y_test_enc, classes=range(n_classes))
y_score   = grid_multi.predict_proba(raw_dfs_test)

import matplotlib.pyplot as plt

fpr, tpr, roc_auc = {}, {}, {}
for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(y_test_bin[:, i], y_score[:, i])
    roc_auc[i] = roc_auc_score(y_test_bin[:, i], y_score[:, i])

plt.figure(figsize=(6,6))
for i, cls in enumerate(classes):
    plt.plot(fpr[i], tpr[i], label=f"{cls} (AUC={roc_auc[i]:.2f})")
plt.plot([0,1],[0,1],'k--',label="chance")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("Multiclass OVR ROC Curves")
plt.legend(loc="lower right", fontsize="small")
plt.tight_layout()
plt.show()

Gaussian Naive Bayes versus Multinomial Naive Bayes

1 - Time Series Segmentation and Model

from sklearn.pipeline import Pipeline
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.metrics import ConfusionMatrixDisplay

pipe_gnb = Pipeline([
    ('featurize', TimeSeriesFeaturizer()), 
    ('gnb',       GaussianNB())
])
pipe_mnb = Pipeline([
    ('featurize', TimeSeriesFeaturizer()), 
    ('mnb',       MultinomialNB())
])

param_grid = {'featurize__n_splits': list(range(1,21))}

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)

grid_gnb = GridSearchCV(pipe_gnb, param_grid, cv=cv, scoring='accuracy', n_jobs=-1)
grid_mnb = GridSearchCV(pipe_mnb, param_grid, cv=cv, scoring='accuracy', n_jobs=-1)

grid_gnb.fit(raw_dfs_train, y)
grid_mnb.fit(raw_dfs_train, y)

best_l_gnb = grid_gnb.best_params_['featurize__n_splits']
cv_acc_gnb = grid_gnb.best_score_
best_l_mnb = grid_mnb.best_params_['featurize__n_splits']
cv_acc_mnb = grid_mnb.best_score_

print(f"GNB: best ℓ = {best_l_gnb}, CV acc = {cv_acc_gnb:.3f}")
print(f"MNB: best ℓ = {best_l_mnb}, CV acc = {cv_acc_mnb:.3f}")

best_gnb = grid_gnb.best_estimator_
best_mnb = grid_mnb.best_estimator_
test_acc_gnb = best_gnb.score(raw_dfs_test, y_test)
test_acc_mnb = best_mnb.score(raw_dfs_test, y_test)

print(f"GNB test accuracy = {test_acc_gnb:.3f}")
print(f"MNB test accuracy = {test_acc_mnb:.3f}")

for name, clf in [('GaussianNB', best_gnb), ('MultinomialNB', best_mnb)]:
    y_pred = clf.predict(raw_dfs_test)
    cm = confusion_matrix(y_test, y_pred, labels=[0,1])
    disp = ConfusionMatrixDisplay(cm, display_labels=['other','bending'])
    disp.plot(cmap='Blues')
    plt.title(f"{name} (ℓ={clf.named_steps['featurize'].n_splits}) CM")
    plt.show()

GNB: best ℓ = 18, CV acc = 0.841
MNB: best ℓ = 1, CV acc = 0.927
GNB test accuracy = 0.684
MNB test accuracy = 1.000

Gaussian Naive Bayes with PCA Reduction

using the previosuly computed best_ℓ for the Naive Bayes Gaussian classifier, we select the best number of features (p-value) using PCA components via cross-validation
Gaussian Bayes assumes independent features, and PCA can help reduce correlation between features

raw_dfs_train = []   
y = []   
for key, df in dataframes.items():
    raw_dfs_train.append(df)  
    y.append(1 if key.startswith('bending') else 0)
y = np.array(y, dtype=int)

print(f"Number of DataFrames in raw_dfs: {len(raw_dfs_train)}")
print("Columns in raw_dfs[0]:", list(raw_dfs_train[0].columns))

raw_dfs_test = []
y_test       = []
for key, df in test_dataframes.items():
    raw_dfs_test.append(df)
    y_test.append(1 if key.startswith('bending') else 0)
y_test = np.array(y_test, dtype=int)

Number of DataFrames in raw_dfs: 69
Columns in raw_dfs[0]: ['time', 'avg_rss12', 'var_rss12', 'avg_rss13', 'var_rss13', 'avg_rss23', 'var_rss23']

from sklearn.decomposition import PCA

pipe = Pipeline([
    ('featurize', TimeSeriesFeaturizer(n_splits=best_l)),
    ('pca',       PCA()),           
    ('nb',        GaussianNB())
])

max_p = min(len(raw_dfs_train), 42 * best_l)
param_grid = {'pca__n_components': list(range(2, max_p+1, 2))}

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
grid = GridSearchCV(pipe, param_grid, cv=cv, scoring='accuracy', n_jobs=-1, refit=True)
grid.fit(raw_dfs_train, y_train_enc)

best_p = grid.best_params_['pca__n_components']
cv_acc_pca = grid.best_score_
print(f"Best ℓ = {best_l}, Best # PCs = {best_p}, CV accuracy = {cv_acc_pca:.3f}")
test_acc_pca = grid.score(raw_dfs_test, y_test_enc)
print(f"Test‐set accuracy (PCA + NB): {test_acc_pca:.3f}")

X_train_feats = grid.best_estimator_.named_steps['featurize'].transform(raw_dfs_train)
pca = grid.best_estimator_.named_steps['pca']
X2 = pca.transform(X_train_feats)[:, :2]

plt.figure(figsize=(6,5))
for cls in range(len(classes)):
    mask = (y_train_enc == cls)
    plt.scatter(X2[mask,0], X2[mask,1], alpha=0.7, label=classes[cls])
plt.xlabel("PC1"); plt.ylabel("PC2")
plt.title(f"PCA scatter (ℓ={best_l}, p={best_p})")
plt.legend(loc="best", fontsize="small")
plt.tight_layout()
plt.show()

y_pred_test = grid.predict(raw_dfs_test)
cm = confusion_matrix(y_test_enc, y_pred_test, labels=range(len(classes)))
disp = ConfusionMatrixDisplay(cm, display_labels=classes)
disp.plot(cmap="Blues", xticks_rotation="vertical")
plt.title("Confusion Matrix (PCA + NB)")
plt.tight_layout()
plt.show()

Best ℓ = 1

Best # PCs = 8

CV accuracy = 0.868

Test‐set accuracy (PCA + NB): 0.842

from sklearn.preprocessing import label_binarize
from sklearn.metrics import roc_curve, roc_auc_score
import numpy as np

y_test_bin = label_binarize(y_test_enc, classes=np.arange(len(classes)))
y_score   = grid.predict_proba(raw_dfs_test)

plt.figure(figsize=(6,6))
for i, cls in enumerate(classes):
    fpr, tpr, _ = roc_curve(y_test_bin[:, i], y_score[:, i])
    auc = roc_auc_score(y_test_bin[:, i], y_score[:, i])
    plt.plot(fpr, tpr, label=f"{cls} (AUC={auc:.2f})")

fpr_micro, tpr_micro, _ = roc_curve(y_test_bin.ravel(), y_score.ravel())
auc_micro = roc_auc_score(y_test_bin, y_score, average="micro")
plt.plot(fpr_micro, tpr_micro, '--', color='gray', label=f"micro-avg (AUC={auc_micro:.2f})")
plt.plot([0,1], [0,1], 'k--', label="chance")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title(f"One-vs-Rest ROC (ℓ={best_l}, p={best_p})")
plt.legend(loc="lower right", fontsize="small")
plt.tight_layout()
plt.show()

Summary of Multiclass Classification Models

The Multinomial Naive Bayes Classifier is better for multi-class classification since its AUC per class is better than boththe Gaussian Naive Bayes as well as logistic regression with L1 regularization