Skip to content

[Dataset]Add GAIA Datasets #2051

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions dataset-index.yml
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,12 @@
paper: https://aclanthology.org/D19-1632.pdf
configpath: opencompass/configs/datasets/flores/flores_gen.py
configpath_llmjudge: ''
- gaia:
name: GAIA
category: Tool Utilization
paper: https://arxiv.org/abs/2311.12983
configpath: opencompass/configs/datasets/GAIA/gaia_gen.py
configpath_llmjudge: ''
- game24:
name: Game24
category: Math
Expand Down
52 changes: 52 additions & 0 deletions opencompass/configs/datasets/GAIA/gaia_gen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import GAIADataset
from opencompass.utils.text_postprocessors import first_capital_postprocess

gaia_reader_cfg = dict(
input_columns=['question', 'file_path'],
output_column='answerKey',
test_split='test')

gaia_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role='HUMAN',
prompt=
'''You are a general AI assistant. I will ask you a question. Report your thoughts, and
finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated
list of numbers and/or strings.
If you are asked for a number, don’t use comma to write your number neither use units such as $ or
percent sign unless specified otherwise.
If you are asked for a string, don’t use articles, neither abbreviations (e.g. for cities), and write the
digits in plain text unless specified otherwise.
If you are asked for a comma separated list, apply the above rules depending of whether the element
to be put in the list is a number or a string.\nGAIA Question: {question}\nFile Path: {file_path}\n'''
),
]),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer),
)
gaia_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role='BOT',
pred_postprocessor=dict(type=first_capital_postprocess),
)

gaia_datasets = [
dict(
abbr='gaia-validation',
type=GAIADataset,
path='opencompass/gaia',
local_mode=False,
reader_cfg=gaia_reader_cfg,
infer_cfg=gaia_infer_cfg,
eval_cfg=gaia_eval_cfg,
)
]
1 change: 1 addition & 0 deletions opencompass/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
from .eprstmt import * # noqa: F401, F403
from .FinanceIQ import * # noqa: F401, F403
from .flores import * # noqa: F401, F403
from .gaia import * # noqa: F401, F403
from .game24 import * # noqa: F401, F403
from .gaokao_math import * # noqa: F401, F403
from .GaokaoBench import * # noqa: F401, F403
Expand Down
60 changes: 60 additions & 0 deletions opencompass/datasets/gaia.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import json
import os
from os import environ

from datasets import Dataset

from opencompass.registry import LOAD_DATASET
from opencompass.utils.datasets_info import DATASETS_MAPPING

from .base import BaseDataset


@LOAD_DATASET.register_module()
class GAIADataset(BaseDataset):

@staticmethod
def load(path, local_mode: bool = False):
rows = []
if environ.get('DATASET_SOURCE') == 'HF':
from datasets import load_dataset
try:
hf_id = DATASETS_MAPPING[path]['hf_id']
# 因为ModelScope的GAIA数据集读取存在问题,所以从huggingface读取
ds = load_dataset(hf_id, '2023_all', split='validation')
rows = []
for item in ds:
rows.append({
'question': item['Question'],
'answerKey': item['Final answer'],
'file_path': item['file_path'],
'file_name': item['file_name'],
'level': item['Level']
})
except Exception as e:
print(f'Error loading local file: {e}')
else:
# 从本地读取
compass_data_cache = os.environ.get('COMPASS_DATA_CACHE')
local_path = DATASETS_MAPPING[path]['local']
local_path = os.path.join(compass_data_cache, local_path)
with open(local_path, 'r', encoding='utf-8') as f:
for line in f:
line = json.loads(line.strip())
# 构建数据行
row_data = {
'question': line['Question'],
'answerKey': line['Final answer'],
'file_name': line['file_name'],
'level': line['Level']
}

# 只有在file_name不为空时设置file_path
if line['file_name']:
file_name = line['file_name']
row_data['file_path'] = f'{local_path}/{file_name}'
else:
row_data['file_path'] = ''

rows.append(row_data)
return Dataset.from_list(rows)
6 changes: 6 additions & 0 deletions opencompass/utils/datasets_info.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
DATASETS_MAPPING = {
# GAIA Datasets
"opencompass/gaia": {
"ms_id": None,
"hf_id": "gaia-benchmark/GAIA",
"local": "./data/gaia/2023/validation/metadata.jsonl",
},
# ADVGLUE Datasets
"opencompass/advglue-dev": {
"ms_id": None,
Expand Down