AgentBench

· v1.0.0

Medium Risk

Benchmark your OpenClaw agent across 40 real-world tasks. Tests file creation, research, data analysis, multi-step workflows, memory, error handling, and tool efficiency. Not a coding benchmark — measures your agent setup and config.

H:3 D:4 A:2 C:1

⚠️ Hazard Flags

EXEC FS_READ_WORKSPACE FS_READ_USER FS_WRITE_WORKSPACE FS_WRITE_USER FS_WRITE_SYSTEM NET_EGRESS_ANY NET_INGRESS CREDS_ENV CREDS_BROWSER PI_WEB

📋 Capabilities

Execution

✅ Shell execution
❌ Code execution
❌ Install dependencies
❌ Persistence
Privilege: user

Filesystem

✅ Read workspace
✅ Write workspace
✅ Read home
✅ Write home
❌ Read system
❌ Delete

Network

Egress: any
✅ Ingress

Credentials

✅ Environment vars
❌ Credential files
✅ Browser data
❌ Keychain

Actions

❌ send messages❌ post public❌ purchase❌ transfer money❌ deploy❌ delete external

🔒 Containment

Level: maximum

Required:

SANDBOX_CONTAINER: Code execution capability

Recommended:

LOG_ACTIONS: Audit trail for all actions

⚡ Risks

Unauthorized tool use: MCP_SYS_CRITICAL_ACCESS, INSTRUCTED_GIT_CLONE_AND_BUILD high

Mitigation: Avoid accessing system directories unless absolutely necessary.

Social engineering indicators: SOCIAL_ENG_VAGUE_DESCRIPTION low

Mitigation: Provide clear, detailed description of skill functionality

Data exfiltration patterns: DATA_EXFIL_SENSITIVE_FILES high

Mitigation: Do not access credential files or sensitive system files

Want a deeper analysis?

This report was generated by static analysis. Get an LLM-powered deep review with behavioral reasoning and attack surface mapping.

🧠 Deep Analysis — $5.00

🚨 Incident Response

Kill switch: Stop the agent process

Containment: Review logs for unexpected actions

Recovery: Depends on skill capabilities

📄 Raw SSDS JSON click to expand

{
  "meta": {
    "document_id": "ssds:auto:agentbench:1.0.0",
    "ssds_version": "0.2.0",
    "scanner_version": "0.4.0+fe6fd9123d50",
    "created_at": "2026-03-05T14:51:34.999Z",
    "created_by": {
      "agent": "safeagentskills-cli/generate-ssds"
    },
    "language": "en",
    "notes": "Auto-generated SSDS. Manual review recommended."
  },
  "skill": {
    "name": "AgentBench",
    "version": "1.0.0",
    "format": "agent_skill",
    "description": "Benchmark your OpenClaw agent across 40 real-world tasks. Tests file creation, research, data analysis, multi-step workflows, memory, error handling, and tool efficiency. Not a coding benchmark — measures your agent setup and config.",
    "publisher": "unknown",
    "source": {
      "channel": "local"
    },
    "artifact": {
      "sha256": "95a2277c0a10dc74c544b5936fd219b3735286aa2a6843727f83664fdcee6a84",
      "hash_method": "files_sorted"
    }
  },
  "capabilities": {
    "execution": {
      "can_exec_shell": true,
      "can_exec_code": false,
      "privilege_level": "user",
      "can_install_deps": false,
      "can_persist": false
    },
    "filesystem": {
      "reads_workspace": true,
      "reads_user_home": true,
      "reads_system": false,
      "writes_workspace": true,
      "writes_user_home": true,
      "writes_system": true,
      "can_delete": false
    },
    "network": {
      "egress": "any",
      "ingress": true
    },
    "credentials": {
      "reads_env_vars": true,
      "reads_credential_files": false,
      "reads_browser_data": true,
      "reads_keychain": false
    },
    "services": [
      {
        "service": "email",
        "operations": [
          "read",
          "write"
        ]
      }
    ],
    "actions": {
      "can_send_messages": false,
      "can_post_public": false,
      "can_purchase": false,
      "can_transfer_money": false,
      "can_deploy": false,
      "can_delete_external": false
    },
    "prompt_injection_surfaces": [
      "web"
    ],
    "content_types": [
      "general"
    ]
  },
  "hazards": {
    "hdac": {
      "H": 3,
      "D": 4,
      "A": 2,
      "C": 1
    },
    "flags": [
      "EXEC",
      "FS_READ_WORKSPACE",
      "FS_READ_USER",
      "FS_WRITE_WORKSPACE",
      "FS_WRITE_USER",
      "FS_WRITE_SYSTEM",
      "NET_EGRESS_ANY",
      "NET_INGRESS",
      "CREDS_ENV",
      "CREDS_BROWSER",
      "PI_WEB"
    ],
    "custom_flags": [
      {
        "code": "NET_INGRESS",
        "name": "Network Server",
        "description": "Listens for incoming connections in: tasks/error-handling/misleading-error-message/setup.sh, tasks/memory/interleaved-projects/setup.sh"
      },
      {
        "code": "TOOL_ABUSE",
        "name": "Unauthorized Tool Use",
        "description": "MCP_SYS_CRITICAL_ACCESS, INSTRUCTED_GIT_CLONE_AND_BUILD: Access to critical system directories"
      },
      {
        "code": "SOCIAL_ENGINEERING",
        "name": "Social Engineering Risk",
        "description": "SOCIAL_ENG_VAGUE_DESCRIPTION: Skill description is too vague or missing"
      },
      {
        "code": "DATA_EXFILTRATION",
        "name": "Data Exfiltration Risk",
        "description": "DATA_EXFIL_SENSITIVE_FILES: Accessing sensitive system or credential files"
      }
    ],
    "confidence": {
      "level": "medium",
      "basis": [
        "static_analysis"
      ],
      "notes": "Detected 4 security patterns (22 vendored rule hits). Review recommended."
    },
    "rationale": {
      "H": "H3: Shell/code execution or persistence detected",
      "D": "D4: Critical: Credential theft or data exfiltration",
      "A": "A2: Service integrations detected",
      "C": "C1: General content"
    }
  },
  "containment": {
    "level": "maximum",
    "required": [
      {
        "control": "SANDBOX_CONTAINER",
        "reason": "Code execution capability"
      }
    ],
    "recommended": [
      {
        "control": "LOG_ACTIONS",
        "reason": "Audit trail for all actions"
      }
    ],
    "uncontained_risk": "Risk level depends on manual review of actual capabilities."
  },
  "risks": {
    "risks": [
      {
        "risk": "Unauthorized tool use: MCP_SYS_CRITICAL_ACCESS, INSTRUCTED_GIT_CLONE_AND_BUILD",
        "severity": "high",
        "mitigation": "Avoid accessing system directories unless absolutely necessary."
      },
      {
        "risk": "Social engineering indicators: SOCIAL_ENG_VAGUE_DESCRIPTION",
        "severity": "low",
        "mitigation": "Provide clear, detailed description of skill functionality"
      },
      {
        "risk": "Data exfiltration patterns: DATA_EXFIL_SENSITIVE_FILES",
        "severity": "high",
        "mitigation": "Do not access credential files or sensitive system files"
      }
    ],
    "limitations": [
      "Static analysis only - runtime behavior not verified"
    ]
  },
  "incident_response": {
    "kill_switch": [
      "Stop the agent process"
    ],
    "containment": [
      "Review logs for unexpected actions"
    ],
    "recovery": [
      "Depends on skill capabilities"
    ]
  },
  "evidence": [
    {
      "evidence_id": "EV:file-1",
      "type": "file_excerpt",
      "title": "lib/metrics.sh",
      "file_path": "lib/metrics.sh"
    },
    {
      "evidence_id": "EV:file-2",
      "type": "file_excerpt",
      "title": "README.md",
      "file_path": "README.md"
    },
    {
      "evidence_id": "EV:file-3",
      "type": "file_excerpt",
      "title": "SKILL.md",
      "file_path": "SKILL.md"
    },
    {
      "evidence_id": "EV:file-4",
      "type": "file_excerpt",
      "title": "tasks/data-analysis/log-pattern-detection/setup.sh",
      "file_path": "tasks/data-analysis/log-pattern-detection/setup.sh"
    },
    {
      "evidence_id": "EV:file-5",
      "type": "file_excerpt",
      "title": "tasks/data-analysis/multi-format-reconciliation/setup.sh",
      "file_path": "tasks/data-analysis/multi-format-reconciliation/setup.sh"
    },
    {
      "evidence_id": "EV:file-6",
      "type": "file_excerpt",
      "title": "tasks/error-handling/cascading-failures/setup.sh",
      "file_path": "tasks/error-handling/cascading-failures/setup.sh"
    },
    {
      "evidence_id": "EV:file-7",
      "type": "file_excerpt",
      "title": "tasks/error-handling/misleading-error-message/setup.sh",
      "file_path": "tasks/error-handling/misleading-error-message/setup.sh"
    },
    {
      "evidence_id": "EV:file-8",
      "type": "file_excerpt",
      "title": "tasks/error-handling/partial-recovery/setup.sh",
      "file_path": "tasks/error-handling/partial-recovery/setup.sh"
    },
    {
      "evidence_id": "EV:file-9",
      "type": "file_excerpt",
      "title": "tasks/file-creation/migration-script/setup.sh",
      "file_path": "tasks/file-creation/migration-script/setup.sh"
    },
    {
      "evidence_id": "EV:file-10",
      "type": "file_excerpt",
      "title": "tasks/file-creation/project-scaffold/setup.sh",
      "file_path": "tasks/file-creation/project-scaffold/setup.sh"
    },
    {
      "evidence_id": "EV:cisco-1",
      "type": "file_excerpt",
      "title": "MCP_SYS_CRITICAL_ACCESS [HIGH] lib/metrics.sh:1: #!/usr/bin/env bash",
      "file_path": "lib/metrics.sh"
    },
    {
      "evidence_id": "EV:cisco-2",
      "type": "file_excerpt",
      "title": "INSTRUCTED_GIT_CLONE_AND_BUILD [MEDIUM] README.md:14: git clone https://github.com/agentbench/agentbench-openclaw.git ~/.openclaw/skil",
      "file_path": "README.md"
    },
    {
      "evidence_id": "EV:cisco-3",
      "type": "file_excerpt",
      "title": "SOCIAL_ENG_VAGUE_DESCRIPTION [LOW] SKILL.md:1: ---",
      "file_path": "SKILL.md"
    },
    {
      "evidence_id": "EV:cisco-4",
      "type": "file_excerpt",
      "title": "MCP_SYS_CRITICAL_ACCESS [HIGH] tasks/data-analysis/log-pattern-detection/setup.sh:1: #!/usr/bin/env bash",
      "file_path": "tasks/data-analysis/log-pattern-detection/setup.sh"
    },
    {
      "evidence_id": "EV:cisco-5",
      "type": "file_excerpt",
      "title": "MCP_SYS_CRITICAL_ACCESS [HIGH] tasks/data-analysis/multi-format-reconciliation/setup.sh:1: #!/usr/bin/env bash",
      "file_path": "tasks/data-analysis/multi-format-reconciliation/setup.sh"
    },
    {
      "evidence_id": "EV:cisco-6",
      "type": "file_excerpt",
      "title": "DATA_EXFIL_SENSITIVE_FILES [HIGH] tasks/error-handling/cascading-failures/setup.sh:73: with open(filepath, 'r') as f:",
      "file_path": "tasks/error-handling/cascading-failures/setup.sh"
    },
    {
      "evidence_id": "EV:cisco-7",
      "type": "file_excerpt",
      "title": "MCP_SYS_CRITICAL_ACCESS [HIGH] tasks/error-handling/cascading-failures/setup.sh:1: #!/usr/bin/env bash",
      "file_path": "tasks/error-handling/cascading-failures/setup.sh"
    },
    {
      "evidence_id": "EV:cisco-8",
      "type": "file_excerpt",
      "title": "MCP_SYS_CRITICAL_ACCESS [HIGH] tasks/error-handling/misleading-error-message/setup.sh:1: #!/usr/bin/env bash",
      "file_path": "tasks/error-handling/misleading-error-message/setup.sh"
    },
    {
      "evidence_id": "EV:cisco-9",
      "type": "file_excerpt",
      "title": "DATA_EXFIL_SENSITIVE_FILES [HIGH] tasks/error-handling/partial-recovery/setup.sh:43: with open(filepath, 'r') as f:",
      "file_path": "tasks/error-handling/partial-recovery/setup.sh"
    },
    {
      "evidence_id": "EV:cisco-10",
      "type": "file_excerpt",
      "title": "MCP_SYS_CRITICAL_ACCESS [HIGH] tasks/error-handling/partial-recovery/setup.sh:1: #!/usr/bin/env bash",
      "file_path": "tasks/error-handling/partial-recovery/setup.sh"
    }
  ]
}