Skip to content

WebArena-Verified

Task

ServiceNow/webarena-verified

WebArenaVerifiedTask¶

Attributes¶

Attributes:

Name	Type	Description
`sites`	`tuple[WebArenaSite, ...]`	List of platforms involved (e.g., gitlab, shopping_admin).
`task_id`	`int`	Unique identifier for the task.
`start_urls`	`tuple[NonEmptyStr, ...]`	Initial URLs where the task begins.
`intent`	`NonEmptyStr`	Natural language description of what to accomplish.
`intent_template`	`NonEmptyStr`	Template with placeholders (e.g., 'Get top-{{n}} products').
`instantiation_dict`	`SerializableMappingProxyType`	Values used to fill template placeholders.
`eval`	`tuple[EvaluatorCfg, ...]`	Array of evaluator configurations.
`revision`	`Annotated[int, Field(ge=1)]`	Integer revision number tracking task changes (minimum 1).

Source code in src/webarena_verified/types/task.py

class WebArenaVerifiedTask(BaseModel):
    """Pydantic model for a WebArena Verified task."""

    sites: tuple[WebArenaSite, ...]
    """List of platforms involved (e.g., gitlab, shopping_admin)."""

    task_id: int
    """Unique identifier for the task."""

    intent_template_id: int
    """Groups tasks from the same template."""

    start_urls: tuple[NonEmptyStr, ...]
    """Initial URLs where the task begins."""

    intent: NonEmptyStr
    """Natural language description of what to accomplish."""

    eval: tuple[EvaluatorCfg, ...]
    """Array of evaluator configurations."""

    intent_template: NonEmptyStr
    """Template with placeholders (e.g., 'Get top-{{n}} products')."""

    instantiation_dict: SerializableMappingProxyType
    """Values used to fill template placeholders."""

    revision: Annotated[int, Field(ge=1)]
    """Integer revision number tracking task changes (minimum 1)."""

    model_config = ConfigDict(
        frozen=True,
        extra="forbid",
        arbitrary_types_allowed=True,
    )

    @model_validator(mode="after")
    def check_eval_has_agent_response(self) -> Self:
        """Validate that eval contains at least one AgentResponseEval item."""
        if not any(isinstance(item, AgentResponseEvaluatorCfg) for item in self.eval):
            raise ValueError("eval must contain at least one AgentResponseEval item")
        return self

    @property
    def expected_agent_response(self) -> FinalAgentResponse:
        """Return the expected agent response from the first AgentResponseEval."""
        for item in self.eval:
            if isinstance(item, AgentResponseEvaluatorCfg):
                return item.expected
        raise ValueError("No AgentResponseEval found in eval")

    @property
    def expected_action(self) -> str:
        """Return the expected task type from the expected agent response."""
        return self.expected_agent_response.task_type

    @property
    def network_event_evaluator_cfgs(self) -> tuple[NetworkEventEvaluatorCfg, ...]:
        """Return all NetworkEventEvaluatorCfg items in eval."""
        return tuple(item for item in self.eval if isinstance(item, NetworkEventEvaluatorCfg))

    @property
    def is_navigate_task(self) -> bool:
        """Check if this is a navigate task."""
        return self.expected_agent_response.is_navigate

    @property
    def is_mutate_task(self) -> bool:
        """Check if this is a mutate task."""
        return self.expected_agent_response.is_mutate

    @property
    def is_retrieve_task(self) -> bool:
        """Check if this is a retrieve task."""
        return self.expected_agent_response.is_retrieve

    @property
    def sites_str(self) -> str:
        """Return a comma-separated string of site names."""
        return "-".join(sorted([site.value for site in self.sites]))

    def __str__(self) -> str:
        """Pretty print task with key information."""
        return (
            f"WebArenaVerifiedTask(\n"
            f"  task_id={self.task_id},\n"
            f"  intent_template_id={self.intent_template_id},\n"
            f"  sites={list(self.sites)},\n"
            f"  intent={self.intent!r},\n"
            f"  start_urls={list(self.start_urls)},\n"
            f")"
        )

    def __repr__(self) -> str:
        """Repr with key information."""
        return f"WebArenaVerifiedTask(task_id={self.task_id}, intent_template_id={self.intent_template_id}, sites=[{self.sites_str}])"

`sites` `instance-attribute` ¶

`task_id` `instance-attribute` ¶

`start_urls` `instance-attribute` ¶

`intent` `instance-attribute` ¶

`intent_template` `instance-attribute` ¶

`instantiation_dict` `instance-attribute` ¶

`eval` `instance-attribute` ¶

`revision` `instance-attribute` ¶

Example Task¶

{
  "task_id": 7,
  "intent_template_id": 79,
  "sites": ["map"],
  "start_urls": ["__MAP__"],
  "intent": "Get the name, state, and zip code of all international airports that are within a driving distance of 50 km to Carnegie Mellon University. Use \"name\" for the name, \"state\" for the state, and \"postcode\" for the postcode.",
  "intent_template": "Get the name, state, and zip code of all {{airport_type}} that are within a driving distance of {{radius}} to {{start}}. {{retrieved_data_format_spec}}.",
  "instantiation_dict": {
    "airport_type": "international airports",
    "start": "Carnegie Mellon University",
    "radius": "50 km",
    "retrieved_data_format_spec": "Use \"name\" for the name, \"state\" for the state, and \"postcode\" for the postcode"
  },
  "eval": [
    {
      "evaluator": "AgentResponseEvaluator",
      "ordered": false,
      "results_schema": {
        "type": "array",
        "items": {
          "type": "object",
          "properties": {
            "name": {"type": "string"},
            "state": {"type": "string"},
            "postcode": {"type": "string"}
          }
        }
      },
      "expected": {
        "task_type": "retrieve",
        "status": "SUCCESS",
        "retrieved_data": [
          {
            "name": "Pittsburgh International Airport",
            "state": "Pennsylvania",
            "postcode": "15231"
          }
        ]
      }
    }
  ],
  "revision": 2
}