mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 05:48:36 +08:00
feat(evaluation): Added INSTRUCTION_TEMPLATE_NAME to run_infer.py in swe_bench (#10270)
Co-authored-by: Xingyao Wang <xingyao@all-hands.dev> Co-authored-by: mamoodi <mamoodiha@gmail.com>
This commit is contained in:
parent
bef6b1afee
commit
74ba21bad0
@ -93,6 +93,9 @@ export USE_HINT_TEXT=true # Ignore this if you are not sure.
|
||||
|
||||
# Specify a condenser configuration for memory management (default: NoOpCondenser)
|
||||
export EVAL_CONDENSER=summarizer_for_eval # Name of the condenser config group in config.toml
|
||||
|
||||
# Specify the instruction prompt template file name
|
||||
export INSTRUCTION_TEMPLATE_NAME=swe_custom.j2 # Name of the file in the swe_bench/prompts folder.
|
||||
```
|
||||
|
||||
Let's say you'd like to run 10 instances using `llm.eval_gpt4_1106_preview` and CodeActAgent,
|
||||
|
||||
@ -108,7 +108,9 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata) -> MessageActio
|
||||
llm_model = metadata.llm_config.model
|
||||
|
||||
# Determine the template file based on mode and LLM
|
||||
if mode.startswith('swt'):
|
||||
if metadata.instruction_template_name:
|
||||
template_name = metadata.instruction_template_name
|
||||
elif mode.startswith('swt'):
|
||||
template_name = 'swt.j2'
|
||||
elif mode == 'swe':
|
||||
if 'gpt-4.1' in llm_model:
|
||||
@ -122,6 +124,7 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata) -> MessageActio
|
||||
logger.error(f'Unexpected evaluation mode: {mode}. Falling back to default.')
|
||||
template_name = 'swe_default.j2'
|
||||
|
||||
logger.debug(f'Using instruction template file: {template_name}')
|
||||
# Set up Jinja2 environment
|
||||
# Assuming templates are in 'evaluation/benchmarks/swe_bench/prompts' relative to this script
|
||||
prompts_dir = os.path.join(os.path.dirname(__file__), 'prompts')
|
||||
|
||||
@ -53,6 +53,7 @@ class EvalMetadata(BaseModel):
|
||||
data_split: str | None = None
|
||||
details: dict[str, Any] | None = None
|
||||
condenser_config: CondenserConfig | None = None
|
||||
instruction_template_name: str | None = None
|
||||
|
||||
|
||||
class EvalOutput(BaseModel):
|
||||
@ -205,6 +206,7 @@ def make_metadata(
|
||||
condenser_config=condenser_config
|
||||
if condenser_config
|
||||
else NoOpCondenserConfig(),
|
||||
instruction_template_name=os.environ.get('INSTRUCTION_TEMPLATE_NAME'),
|
||||
)
|
||||
metadata_json = metadata.model_dump_json()
|
||||
logger.info(f'Metadata: {metadata_json}')
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user