forked from ShengranHu/ADAS
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
76 lines (56 loc) · 2.62 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import random
import string
from collections import namedtuple
import numpy as np
Example = namedtuple('Example', ['question', 'choice1', 'choice2', 'choice3', 'choice4', 'correct_index'])
QUERY_TEMPLATE_MULTICHOICE = """
Answer the following multiple choice question.
{Question}
(A) {A}
(B) {B}
(C) {C}
(D) {D}
""".strip()
def format_multichoice_question(row):
return QUERY_TEMPLATE_MULTICHOICE.format(**row)
def random_id(length=4):
characters = string.ascii_letters + string.digits # includes both upper/lower case letters and numbers
random_id = ''.join(random.choices(characters, k=length))
return random_id
def bootstrap_confidence_interval(data, num_bootstrap_samples=100000, confidence_level=0.95):
"""
Calculate the bootstrap confidence interval for the mean of 1D accuracy data.
Also returns the median of the bootstrap means.
Args:
- data (list or array of float): 1D list or array of data points.
- num_bootstrap_samples (int): Number of bootstrap samples.
- confidence_level (float): The desired confidence level (e.g., 0.95 for 95%).
Returns:
- str: Formatted string with 95% confidence interval and median as percentages with one decimal place.
"""
# Convert data to a numpy array for easier manipulation
data = np.array(data)
# List to store the means of bootstrap samples
bootstrap_means = []
# Generate bootstrap samples and compute the mean for each sample
for _ in range(num_bootstrap_samples):
# Resample with replacement
bootstrap_sample = np.random.choice(data, size=len(data), replace=True)
# Compute the mean of the bootstrap sample
bootstrap_mean = np.mean(bootstrap_sample)
bootstrap_means.append(bootstrap_mean)
# Convert bootstrap_means to a numpy array for percentile calculation
bootstrap_means = np.array(bootstrap_means)
# Compute the lower and upper percentiles for the confidence interval
lower_percentile = (1.0 - confidence_level) / 2.0
upper_percentile = 1.0 - lower_percentile
ci_lower = np.percentile(bootstrap_means, lower_percentile * 100)
ci_upper = np.percentile(bootstrap_means, upper_percentile * 100)
# Compute the median of the bootstrap means
median = np.median(bootstrap_means)
# Convert to percentages and format to one decimal place
ci_lower_percent = ci_lower * 100
ci_upper_percent = ci_upper * 100
median_percent = median * 100
# Return the formatted string with confidence interval and median
return f"95% Bootstrap Confidence Interval: ({ci_lower_percent:.1f}%, {ci_upper_percent:.1f}%), Median: {median_percent:.1f}%"