-
Notifications
You must be signed in to change notification settings - Fork 1
/
statAnal.py
executable file
·118 lines (94 loc) · 4.57 KB
/
statAnal.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/env python
"""
statAnal.py
Find the statistical differences between
2 Total Runtime Experiments:
without Numba vs. Numba
SHSH <sandy.herho@email.ucr.edu>
30/12/23
"""
import numpy as np
import pandas as pd
from scipy.stats import mannwhitneyu, wilcoxon
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use("bmh")
def process_data(file_path):
# Read CSV file and extract control and test data
df = pd.read_csv(file_path)
print(df.describe().round(3))
control = df["control"].to_numpy()
test = df["test"].to_numpy()
return control, test
def print_test_results(test_name, test_statistic, p_value):
# Print results of statistical tests
print(f"{test_name}:")
print(f"Test statistic: {test_statistic}")
print(f"P-value: {p_value}")
if p_value < 0.01:
print(f"The {test_name} indicates a significant difference between the control and test groups.")
else:
print(f"The {test_name} does not indicate a significant difference between the control and test groups.")
print("\n")
def plot_and_save_histogram(control, test, filename):
# Plot and save histogram
sns.histplot(control, bins=15, kde=True, label='without Numba')
sns.histplot(test, bins=15, kde=True, label='with Numba')
plt.xlabel("Total Runtime [seconds]", fontsize=16)
plt.ylabel("Count", fontsize=16)
plt.legend()
plt.savefig(filename, dpi=400)
plt.close()
def plot_and_save_boxplot(control, test, filename):
# Plot and save boxplot
sns.boxplot(x=['Control'] * len(control) + ['Test'] * len(test), y=np.concatenate([control, test]))
plt.xticks([0, 1], ['without Numba', 'with Numba'])
plt.ylabel("Total Runtime [seconds]", fontsize=16)
plt.xlabel("Treatment", fontsize=16)
plt.savefig(filename, dpi=400)
plt.close()
def plot_and_save_violinplot(control, test, filename):
# Plot and save violinplot
vio = pd.DataFrame({'Value': np.concatenate([control, test]),
'Treatment': ['without Numba'] * len(control) + ['with Numba'] * len(test)})