diff --git a/bigcodebench/data/bigcodebench.py b/bigcodebench/data/bigcodebench.py index b113944..1c5c02e 100644 --- a/bigcodebench/data/bigcodebench.py +++ b/bigcodebench/data/bigcodebench.py @@ -16,7 +16,7 @@ BIGCODEBENCH_HF = "bigcode/bigcodebench" BIGCODEBENCH_VERSION = "v0.1.0_hf" -def _ready_bigcodebench_path(subset="full", version="default") -> str: +def _ready_bigcodebench_path(subset="full", version="default", offline=False) -> str: if BIGCODEBENCH_OVERRIDE_PATH: return BIGCODEBENCH_OVERRIDE_PATH @@ -29,17 +29,17 @@ def _ready_bigcodebench_path(subset="full", version="default") -> str: try: dataset = load_dataset(BIGCODEBENCH_HF+extra, split=BIGCODEBENCH_VERSION) - make_cache(url, dataset, path) + make_cache(url, dataset, path, offline) except: if os.path.exists(path): os.remove(path) - make_cache(url, None, path, gh=True) + make_cache(url, None, path, gh=True, offline=offline) return path def get_bigcodebench( - err_incomplete=True, subset="full", version="default" + err_incomplete=True, subset="full", version="default", offline=False ) -> Dict[str, Dict]: """Get BigCodeBench from BigCode's github repo and return as a list of parsed dicts. @@ -56,19 +56,19 @@ def get_bigcodebench( """ # Check if open eval file exists in CACHE_DIR data_path = _ready_bigcodebench_path( - subset=subset, version=version + subset=subset, version=version, offline=offline ) data = {task["task_id"]: task for task in stream_jsonl(data_path)} if err_incomplete: completeness_check("BigCodeBench", data) return data -def get_bigcodebench_hash(subset="full", version="default") -> str: +def get_bigcodebench_hash(subset="full", version="default", offline=False) -> str: """Get the hash of BigCodeBench. Returns: str: The hash of BigCodeBench """ - data_path = _ready_bigcodebench_path(subset, version="default") + data_path = _ready_bigcodebench_path(subset, version="default", offline=offline) with open(data_path, "rb") as f: data = f.read() return hashlib.md5(data).hexdigest() diff --git a/bigcodebench/data/constant.py b/bigcodebench/data/constant.py new file mode 100644 index 0000000..1002409 --- /dev/null +++ b/bigcodebench/data/constant.py @@ -0,0 +1,520 @@ + +OFFLINE_IDS = ( + '0', + '1', + '2', + '3', + '4', + '5', + '6', + '7', + '8', + '9', + '10', + '11', + '12', + '14', + '15', + '16', + '17', + '18', + '19', + '20', + '21', + '22', + '23', + '24', + '25', + '26', + '27', + '29', + '30', + '31', + '33', + '34', + '35', + '36', + '37', + '38', + '39', + '40', + '41', + '42', + '43', + '44', + '45', + '46', + '47', + '48', + '49', + '50', + '51', + '52', + '53', + '54', + '55', + '56', + '57', + '58', + '60', + '61', + '62', + '63', + '64', + '65', + '66', + '67', + '68', + '69', + '70', + '71', + '72', + '73', + '75', + '84', + '85', + '86', + '87', + '88', + '89', + '90', + '91', + '92', + '93', + '94', + '95', + '96', + '97', + '98', + '99', + '100', + '102', + '103', + '104', + '105', + '106', + '107', + '108', + '109', + '110', + '111', + '112', + '113', + '114', + '115', + '116', + '117', + '118', + '119', + '120', + '121', + '122', + '123', + '124', + '125', + '126', + '127', + '128', + '130', + '131', + '132', + '133', + '134', + '135', + '136', + '137', + '138', + '139', + '140', + '141', + '142', + '143', + '148', + '149', + '150', + '151', + '152', + '153', + '155', + '156', + '157', + '159', + '160', + '161', + '162', + '163', + '164', + '165', + '166', + '167', + '168', + '169', + '171', + '172', + '173', + '174', + '175', + '177', + '178', + '179', + '180', + '182', + '184', + '187', + '190', + '191', + '193', + '194', + '196', + '197', + '198', + '199', + '200', + '201', + '202', + '204', + '205', + '206', + '208', + '209', + '210', + '212', + '213', + '214', + '216', + '217', + '218', + '219', + '220', + '221', + '222', + '223', + '224', + '225', + '226', + '227', + '228', + '229', + '230', + '231', + '232', + '233', + '234', + '235', + '236', + '237', + '238', + '239', + '240', + '241', + '242', + '243', + '244', + '245', + '246', + '247', + '248', + '249', + '250', + '251', + '252', + '253', + '254', + '255', + '256', + '257', + '258', + '259', + '260', + '261', + '262', + '263', + '264', + '265', + '266', + '267', + '268', + '269', + '270', + '271', + '275', + '276', + '277', + '278', + '279', + '280', + '281', + '282', + '283', + '286', + '287', + '288', + '289', + '291', + '292', + '293', + '294', + '295', + '296', + '297', + '298', + '299', + '300', + '301', + '302', + '303', + '304', + '305', + '306', + '307', + '308', + '309', + '310', + '311', + '312', + '313', + '316', + '317', + '318', + '319', + '320', + '321', + '322', + '323', + '324', + '325', + '326', + '327', + '328', + '329', + '330', + '331', + '333', + '334', + '335', + '336', + '337', + '338', + '340', + '341', + '342', + '343', + '344', + '345', + '346', + '347', + '348', + '349', + '350', + '351', + '352', + '353', + '354', + '355', + '356', + '357', + '358', + '359', + '360', + '361', + '362', + '363', + '364', + '365', + '366', + '367', + '368', + '369', + '370', + '371', + '372', + '373', + '374', + '375', + '377', + '378', + '379', + '380', + '381', + '382', + '383', + '384', + '385', + '386', + '387', + '388', + '389', + '391', + '392', + '393', + '394', + '395', + '396', + '398', + '399', + '400', + '403', + '404', + '405', + '406', + '407', + '408', + '409', + '410', + '411', + '412', + '414', + '415', + '416', + '417', + '418', + '419', + '420', + '422', + '423', + '424', + '425', + '426', + '427', + '428', + '429', + '430', + '431', + '432', + '433', + '434', + '435', + '436', + '437', + '438', + '439', + '440', + '441', + '442', + '443', + '444', + '445', + '446', + '447', + '448', + '449', + '450', + '451', + '452', + '453', + '454', + '455', + '456', + '457', + '458', + '459', + '460', + '461', + '462', + '463', + '464', + '465', + '466', + '467', + '468', + '469', + '470', + '471', + '472', + '473', + '474', + '475', + '476', + '477', + '478', + '479', + '480', + '481', + '482', + '483', + '484', + '485', + '486', + '487', + '488', + '489', + '491', + '492', + '493', + '494', + '495', + '496', + '497', + '499', + '500', + '501', + '502', + '503', + '504', + '505', + '506', + '507', + '508', + '509', + '510', + '511', + '512', + '513', + '514', + '515', + '516', + '517', + '518', + '519', + '520', + '521', + '522', + '523', + '524', + '525', + '526', + '527', + '528', + '529', + '530', + '531', + '532', + '533', + '534', + '535', + '536', + '537', + '538', + '539', + '540', + '541', + '542', + '543', + '544', + '545', + '546', + '547', + '548', + '549', + '550', + '551', + '552', + '553', + '554', + '555', + '556', + '557', + '558', + '559', + '560', + '561', + '562', + '563', + '564', + '565', + '566', + '567', + '568', + '569', + '570' +) \ No newline at end of file diff --git a/bigcodebench/data/utils.py b/bigcodebench/data/utils.py index fa91abe..8a2a524 100644 --- a/bigcodebench/data/utils.py +++ b/bigcodebench/data/utils.py @@ -8,6 +8,8 @@ import wget from appdirs import user_cache_dir +from .constant import OFFLINE_IDS + CACHE_DIR = user_cache_dir("bigcodebench") @@ -18,7 +20,7 @@ def get_dataset_metadata(version: str, subset: str="full"): return url, cache_path -def make_cache(gzip_url, hf_data, cache_path, gh=False): +def make_cache(gzip_url, hf_data, cache_path, gh=False, offline=False): # Check if open eval file exists in CACHE_DIR if not os.path.exists(cache_path): @@ -32,14 +34,25 @@ def make_cache(gzip_url, hf_data, cache_path, gh=False): with gzip.open(gz_path, "rb") as f: data = f.read().decode("utf-8") + # If offline, then parse the json then check the task_id + if offline: + json_data = [json.loads(line) for line in data.split('\n') if line] + json_data = [item for item in json_data if item.get("task_id").lstrip("BigCodeBench/") in OFFLINE_IDS] + # create CACHE_DIR if not exists if not os.path.exists(CACHE_DIR): os.makedirs(CACHE_DIR) # Write the original open eval file to CACHE_DIR with open(cache_path, "w") as f: - f.write(data) + if offline: + for item in json_data: + f.write(json.dumps(item) + '\n') + else: + f.write(data) else: + if offline: + hf_data = hf_data.filter(lambda instance: instance["task_id"].lstrip("BigCodeBench/") in OFFLINE_IDS) hf_data.to_json(cache_path) diff --git a/bigcodebench/evaluate.py b/bigcodebench/evaluate.py index 8cc91d8..d2c001b 100644 --- a/bigcodebench/evaluate.py +++ b/bigcodebench/evaluate.py @@ -125,8 +125,8 @@ def evaluate(flags): assert flags.samples.endswith(".jsonl") result_path = flags.samples.replace(".jsonl", f"_{extra}eval_results.json") - problems = get_bigcodebench(subset=flags.subset) - dataset_hash = get_bigcodebench_hash(subset=flags.subset) + problems = get_bigcodebench(subset=flags.subset, offline=flags.offline) + dataset_hash = get_bigcodebench_hash(subset=flags.subset, offline=flags.offline) if not flags.no_gt: expected_time = get_groundtruth(n_workers, problems, dataset_hash, flags.check_gt_only, flags.max_as_limit, flags.max_data_limit, flags.max_stack_limit) @@ -328,6 +328,7 @@ def main(): parser.add_argument( "--no-gt", action="store_true", help="Check the groundtruth" ) + parser.add_argument("--offline", action="store_true") args = parser.parse_args() evaluate(args) diff --git a/bigcodebench/generate.py b/bigcodebench/generate.py index 679300c..de4b49b 100644 --- a/bigcodebench/generate.py +++ b/bigcodebench/generate.py @@ -23,6 +23,7 @@ def codegen( n_samples=1, id_range=None, resume=True, + offline=False, ): with Progress( TextColumn(f"BigCodeBench--{split.capitalize()} ({subset.capitalize()}) •" + "[progress.percentage]{task.percentage:>3.0f}%"), @@ -32,7 +33,7 @@ def codegen( TimeElapsedColumn(), ) as p: - dataset = get_bigcodebench(subset=subset) + dataset = get_bigcodebench(subset=subset, offline=offline) if model.is_direct_completion() and split == "instruct": raise Exception("Base model does not support direct completion for instruct tasks") @@ -121,6 +122,7 @@ def main(): parser.add_argument("--trust_remote_code", action="store_true") parser.add_argument("--tokenizer_legacy", action="store_true") parser.add_argument("--tokenizer_name", default=None, type=str) + parser.add_argument("--offline", action="store_true") args = parser.parse_args() @@ -164,7 +166,8 @@ def main(): strip_newlines=args.strip_newlines, n_samples=args.n_samples, resume=args.resume, - id_range=args.id_range + id_range=args.id_range, + offline=args.offline )