Memory Bench Pioneer

· v2.0.0

Medium Risk

"Be one of the first to benchmark your agent's memory — and help shape how AI remembers. Runs a peer-review-grade evaluation suite (LLM-as-judge, nDCG/MAP/MRR with 95% CIs, ablation studies) against your live memory system and submits anonymized results to the ENGRAM/CORTEX research papers. Your data stays private; only aggregate stats leave. Works with agent-memory-ultimate. For the bold few who believe AI memory should be measured, not guessed at."

H:3 D:4 A:3 C:1

⚠️ Hazard Flags

EXEC FS_READ_WORKSPACE FS_READ_USER FS_WRITE_WORKSPACE FS_WRITE_USER FS_DELETE NET_EGRESS_ANY CREDS_ENV CREDS_FILES ACT_POST_PUBLIC PI_WEB

📋 Capabilities

Execution

✅ Shell execution
❌ Code execution
❌ Install dependencies
❌ Persistence
Privilege: user

Filesystem

✅ Read workspace
✅ Write workspace
✅ Read home
✅ Write home
❌ Read system
✅ Delete

Network

Egress: any
❌ Ingress

Credentials

✅ Environment vars
✅ Credential files
❌ Browser data
❌ Keychain

Actions

❌ send messages✅ post public❌ purchase❌ transfer money❌ deploy❌ delete external

🔒 Containment

Level: maximum

Required:

APPROVE_POST: Can post publicly
SANDBOX_CONTAINER: Code execution capability

Recommended:

LOG_ACTIONS: Audit trail for all actions

⚡ Risks

Unauthorized tool use: MCP_SYS_FILE_DESTRUCTION, MCP_SYS_CRITICAL_ACCESS critical

Mitigation: Remove destructive file operations.

Social engineering indicators: SOCIAL_ENG_VAGUE_DESCRIPTION low

Mitigation: Provide clear, detailed description of skill functionality

Data exfiltration patterns: DATA_EXFIL_NETWORK_REQUESTS, DATA_EXFIL_ENV_VARS medium

Mitigation: Ensure network access is necessary and documented

Want a deeper analysis?

This report was generated by static analysis. Get an LLM-powered deep review with behavioral reasoning and attack surface mapping.

🧠 Deep Analysis — $5.00

🚨 Incident Response

Kill switch: Stop the agent process

Containment: Review logs for unexpected actions

Recovery: Depends on skill capabilities

📄 Raw SSDS JSON click to expand

{
  "meta": {
    "document_id": "ssds:auto:memory-bench-pioneer:2.0.0",
    "ssds_version": "0.2.0",
    "scanner_version": "0.4.0+fe6fd9123d50",
    "created_at": "2026-03-05T14:31:07.457Z",
    "created_by": {
      "agent": "safeagentskills-cli/generate-ssds"
    },
    "language": "en",
    "notes": "Auto-generated SSDS. Manual review recommended."
  },
  "skill": {
    "name": "Memory Bench Pioneer",
    "version": "2.0.0",
    "format": "agent_skill",
    "description": "\"Be one of the first to benchmark your agent's memory — and help shape how AI remembers. Runs a peer-review-grade evaluation suite (LLM-as-judge, nDCG/MAP/MRR with 95% CIs, ablation studies) against your live memory system and submits anonymized results to the ENGRAM/CORTEX research papers. Your data stays private; only aggregate stats leave. Works with agent-memory-ultimate. For the bold few who believe AI memory should be measured, not guessed at.\"",
    "publisher": "unknown",
    "source": {
      "channel": "local"
    },
    "artifact": {
      "sha256": "8542a8fdb53a9959b8431f307af6379479108be9c4ea3e0ee3c9c53ca26ed511",
      "hash_method": "files_sorted"
    }
  },
  "capabilities": {
    "execution": {
      "can_exec_shell": true,
      "can_exec_code": false,
      "privilege_level": "user",
      "can_install_deps": false,
      "can_persist": false
    },
    "filesystem": {
      "reads_workspace": true,
      "reads_user_home": true,
      "reads_system": false,
      "writes_workspace": true,
      "writes_user_home": true,
      "writes_system": false,
      "can_delete": true
    },
    "network": {
      "egress": "any",
      "ingress": false
    },
    "credentials": {
      "reads_env_vars": true,
      "reads_credential_files": true,
      "reads_browser_data": false,
      "reads_keychain": false
    },
    "services": [],
    "actions": {
      "can_send_messages": false,
      "can_post_public": true,
      "can_purchase": false,
      "can_transfer_money": false,
      "can_deploy": false,
      "can_delete_external": false
    },
    "prompt_injection_surfaces": [
      "web"
    ],
    "content_types": [
      "general"
    ]
  },
  "hazards": {
    "hdac": {
      "H": 3,
      "D": 4,
      "A": 3,
      "C": 1
    },
    "flags": [
      "EXEC",
      "FS_READ_WORKSPACE",
      "FS_READ_USER",
      "FS_WRITE_WORKSPACE",
      "FS_WRITE_USER",
      "FS_DELETE",
      "NET_EGRESS_ANY",
      "CREDS_ENV",
      "CREDS_FILES",
      "ACT_POST_PUBLIC",
      "PI_WEB"
    ],
    "custom_flags": [
      {
        "code": "FILE_DELETE",
        "name": "File Deletion",
        "description": "Can delete files in: scripts/submit.sh"
      },
      {
        "code": "TOOL_ABUSE",
        "name": "Unauthorized Tool Use",
        "description": "MCP_SYS_FILE_DESTRUCTION, MCP_SYS_CRITICAL_ACCESS: File destruction or wiping commands"
      },
      {
        "code": "SOCIAL_ENGINEERING",
        "name": "Social Engineering Risk",
        "description": "SOCIAL_ENG_VAGUE_DESCRIPTION: Skill description is too vague or missing"
      },
      {
        "code": "DATA_EXFILTRATION",
        "name": "Data Exfiltration Risk",
        "description": "DATA_EXFIL_NETWORK_REQUESTS, DATA_EXFIL_ENV_VARS: HTTP client library imports that enable external communication"
      }
    ],
    "confidence": {
      "level": "medium",
      "basis": [
        "static_analysis"
      ],
      "notes": "Detected 4 security patterns (5 vendored rule hits). Review recommended."
    },
    "rationale": {
      "H": "H3: Shell/code execution or persistence detected",
      "D": "D4: Critical: Credential theft or data exfiltration",
      "A": "A3: External actions (deploy/message/post)",
      "C": "C1: General content"
    }
  },
  "containment": {
    "level": "maximum",
    "required": [
      {
        "control": "APPROVE_POST",
        "reason": "Can post publicly"
      },
      {
        "control": "SANDBOX_CONTAINER",
        "reason": "Code execution capability"
      }
    ],
    "recommended": [
      {
        "control": "LOG_ACTIONS",
        "reason": "Audit trail for all actions"
      }
    ],
    "uncontained_risk": "Risk level depends on manual review of actual capabilities."
  },
  "risks": {
    "risks": [
      {
        "risk": "Unauthorized tool use: MCP_SYS_FILE_DESTRUCTION, MCP_SYS_CRITICAL_ACCESS",
        "severity": "critical",
        "mitigation": "Remove destructive file operations."
      },
      {
        "risk": "Social engineering indicators: SOCIAL_ENG_VAGUE_DESCRIPTION",
        "severity": "low",
        "mitigation": "Provide clear, detailed description of skill functionality"
      },
      {
        "risk": "Data exfiltration patterns: DATA_EXFIL_NETWORK_REQUESTS, DATA_EXFIL_ENV_VARS",
        "severity": "medium",
        "mitigation": "Ensure network access is necessary and documented"
      }
    ],
    "limitations": [
      "Static analysis only - runtime behavior not verified"
    ]
  },
  "incident_response": {
    "kill_switch": [
      "Stop the agent process"
    ],
    "containment": [
      "Review logs for unexpected actions"
    ],
    "recovery": [
      "Depends on skill capabilities"
    ]
  },
  "evidence": [
    {
      "evidence_id": "EV:file-1",
      "type": "file_excerpt",
      "title": "scripts/collect.py",
      "file_path": "scripts/collect.py"
    },
    {
      "evidence_id": "EV:file-2",
      "type": "file_excerpt",
      "title": "scripts/rate.py",
      "file_path": "scripts/rate.py"
    },
    {
      "evidence_id": "EV:file-3",
      "type": "file_excerpt",
      "title": "scripts/submit.sh",
      "file_path": "scripts/submit.sh"
    },
    {
      "evidence_id": "EV:file-4",
      "type": "file_excerpt",
      "title": "scripts/test_metrics.py",
      "file_path": "scripts/test_metrics.py"
    },
    {
      "evidence_id": "EV:file-5",
      "type": "file_excerpt",
      "title": "SKILL.md",
      "file_path": "SKILL.md"
    },
    {
      "evidence_id": "EV:file-6",
      "type": "file_excerpt",
      "title": "_meta.json",
      "file_path": "_meta.json"
    },
    {
      "evidence_id": "EV:cisco-1",
      "type": "file_excerpt",
      "title": "DATA_EXFIL_NETWORK_REQUESTS [MEDIUM] scripts/rate.py:164: import urllib.request",
      "file_path": "scripts/rate.py"
    },
    {
      "evidence_id": "EV:cisco-2",
      "type": "file_excerpt",
      "title": "DATA_EXFIL_ENV_VARS [MEDIUM] scripts/rate.py:404: api_key = args.api_key or os.environ.get(\"OPENAI_API_KEY\", \"\")",
      "file_path": "scripts/rate.py"
    },
    {
      "evidence_id": "EV:cisco-3",
      "type": "file_excerpt",
      "title": "MCP_SYS_FILE_DESTRUCTION [CRITICAL] scripts/submit.sh:61: trap 'rm -rf \"$TMPDIR\"' EXIT",
      "file_path": "scripts/submit.sh"
    },
    {
      "evidence_id": "EV:cisco-4",
      "type": "file_excerpt",
      "title": "MCP_SYS_CRITICAL_ACCESS [HIGH] scripts/submit.sh:1: #!/usr/bin/env bash",
      "file_path": "scripts/submit.sh"
    },
    {
      "evidence_id": "EV:cisco-5",
      "type": "file_excerpt",
      "title": "SOCIAL_ENG_VAGUE_DESCRIPTION [LOW] SKILL.md:1: ---",
      "file_path": "SKILL.md"
    }
  ]
}