-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmisc.py
123 lines (94 loc) · 4.02 KB
/
misc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import os
import sys
import json
import numpy as np
from . import lm_consistency as LM
from . import im_consistency as IM
from .chair import *
def hallucination_file_to_dict(hallucinated_json):
hallucination_data = json.load(open(hallucinated_json))
hallucination_by_imid = {h["image_id"]: h for h in hallucination_data["sentences"]}
return hallucination_by_imid
def get_sentence_scores_from_hallucination_file(hallucination_file):
hallucination = json.load(open(hallucination_file))
return hallucination["overall_metrics"]
def get_consistency(tag, annotation_path, robust=False):
# Load hallucination dict. If it does not exist, make it!
hallucinated_json = "./output/hallucination/hallucinated_words_%s.json" % tag
sentences = "generated_sentences/%s.json" % tag
if not os.path.exists(hallucinated_json):
print("Computing hallucination file for tag %s" % tag)
sentence_template = "generated_sentences/%s.json"
_, imids, _ = load_generated_captions(sentence_template % tag)
evaluator = CHAIR(imids, annotation_path)
evaluator.get_annotations()
cap_dict = evaluator.compute_chair(sentence_template % tag)
save_hallucinated_words(sentence_template % tag, cap_dict)
hallucination_by_imid = hallucination_file_to_dict(hallucinated_json)
# LM consistency
word_to_idx = LM.read_vocab(robust)
blank_lm_predictions = LM.get_blank_prediction_path(tag)
lm_consistency = LM.get_lm_consistency(
hallucination_by_imid, blank_lm_predictions, word_to_idx, quiet=True
)
# IM consistency
predicted_label_dict, gt_label_dict = IM.get_label_dicts(robust)
im_consistency = IM.get_im_consistency(
hallucination_by_imid, predicted_label_dict, gt_label_dict
)
# get chair scores for completeness
scores = get_sentence_scores_from_hallucination_file(hallucinated_json)
return scores["CHAIRi"], lm_consistency, im_consistency
def score_correlation(cap_file, quiet=False):
caps = json.load(open(cap_file))
ciders = []
meteors = []
spices = []
hallucinations = []
for cap in caps["sentences"]:
info = cap["metrics"]
meteors.append(info["METEOR"])
ciders.append(info["CIDEr"])
spices.append(info["SPICE"]["All"]["f"])
hallucinations.append(1 - info["CHAIRi"])
meteors = np.array(meteors)
ciders = np.array(ciders)
spices = np.array(spices)
hallucinations = np.array(hallucinations)
cider_corr = np.corrcoef(ciders, hallucinations)[1][0]
meteor_corr = np.corrcoef(meteors, hallucinations)[1][0]
spice_corr = np.corrcoef(spices, hallucinations)[1][0]
if not quiet:
print("CIDEr and hallucination: %0.03f" % cider_corr)
print("METEOR and hallucination: %0.03f" % meteor_corr)
print("SPICE and hallucination: %0.03f" % spice_corr)
return cider_corr, meteor_corr, spice_corr
def predictive_metrics(hallucinated_json_1, hallucinated_json_2):
"""
Can sentence metrics predict hallucination? In section 3.4 of paper.
"""
hallucination_data_1 = json.load(open(hallucinated_json_1))
hallucination_data_2 = json.load(open(hallucinated_json_2))
def bin_by_spice(data):
# bin by spice scores
spices = []
hallucinations = []
for cap in data["sentences"]:
info = cap["metrics"]
spices.append(info["SPICE"]["All"]["f"])
hallucinations.append(info["CHAIRs"])
hist = []
for i in range(0, 100, 10):
idxs = [
idx
for idx, spice in enumerate(spices)
if (spice * 100 >= i) and (spice * 100 < (i + 10))
]
if len(idxs) == 0:
hist.append(0)
else:
hist.append(np.mean([hallucinations[idx] for idx in idxs]))
return hist
score_histogram_1 = bin_by_spice(hallucination_data_1)
score_histogram_2 = bin_by_spice(hallucination_data_2)
return list(np.array(score_histogram_1) - np.array(score_histogram_2))