Memory Bench Pioneer
ยท v2.0.0
"Be one of the first to benchmark your agent's memory โ and help shape how AI remembers. Runs a peer-review-grade evaluation suite (LLM-as-judge, nDCG/MAP/MRR with 95% CIs, ablation studies) against your live memory system and submits anonymized results to the ENGRAM/CORTEX research papers. Your data stays private; only aggregate stats leave. Works with agent-memory-ultimate. For the bold few who believe AI memory should be measured, not guessed at."
โ ๏ธ Hazard Flags
๐ Capabilities
Execution
- โ Shell execution
- โ Code execution
- โ Install dependencies
- โ Persistence
- Privilege: user
Filesystem
- โ Read workspace
- โ Write workspace
- โ Read home
- โ Write home
- โ Read system
- โ Delete
Network
- Egress: any
- โ Ingress
Credentials
- โ Environment vars
- โ Credential files
- โ Browser data
- โ Keychain
Actions
๐ Containment
Level: maximum
- APPROVE_POST: Can post publicly
- SANDBOX_CONTAINER: Code execution capability
- LOG_ACTIONS: Audit trail for all actions
โก Risks
Mitigation: Remove destructive file operations.
Mitigation: Provide clear, detailed description of skill functionality
Mitigation: Ensure network access is necessary and documented
Want a deeper analysis?
This report was generated by static analysis. Get an LLM-powered deep review with behavioral reasoning and attack surface mapping.
๐ง Deep Analysis โ $5.00๐จ Incident Response
Kill switch: Stop the agent process
Containment: Review logs for unexpected actions
Recovery: Depends on skill capabilities
๐ Raw SSDS JSON click to expand
{
"meta": {
"document_id": "ssds:auto:memory-bench-pioneer:2.0.0",
"ssds_version": "0.2.0",
"scanner_version": "0.4.0+fe6fd9123d50",
"created_at": "2026-03-05T14:31:07.457Z",
"created_by": {
"agent": "safeagentskills-cli/generate-ssds"
},
"language": "en",
"notes": "Auto-generated SSDS. Manual review recommended."
},
"skill": {
"name": "Memory Bench Pioneer",
"version": "2.0.0",
"format": "agent_skill",
"description": "\"Be one of the first to benchmark your agent's memory โ and help shape how AI remembers. Runs a peer-review-grade evaluation suite (LLM-as-judge, nDCG/MAP/MRR with 95% CIs, ablation studies) against your live memory system and submits anonymized results to the ENGRAM/CORTEX research papers. Your data stays private; only aggregate stats leave. Works with agent-memory-ultimate. For the bold few who believe AI memory should be measured, not guessed at.\"",
"publisher": "unknown",
"source": {
"channel": "local"
},
"artifact": {
"sha256": "8542a8fdb53a9959b8431f307af6379479108be9c4ea3e0ee3c9c53ca26ed511",
"hash_method": "files_sorted"
}
},
"capabilities": {
"execution": {
"can_exec_shell": true,
"can_exec_code": false,
"privilege_level": "user",
"can_install_deps": false,
"can_persist": false
},
"filesystem": {
"reads_workspace": true,
"reads_user_home": true,
"reads_system": false,
"writes_workspace": true,
"writes_user_home": true,
"writes_system": false,
"can_delete": true
},
"network": {
"egress": "any",
"ingress": false
},
"credentials": {
"reads_env_vars": true,
"reads_credential_files": true,
"reads_browser_data": false,
"reads_keychain": false
},
"services": [],
"actions": {
"can_send_messages": false,
"can_post_public": true,
"can_purchase": false,
"can_transfer_money": false,
"can_deploy": false,
"can_delete_external": false
},
"prompt_injection_surfaces": [
"web"
],
"content_types": [
"general"
]
},
"hazards": {
"hdac": {
"H": 3,
"D": 4,
"A": 3,
"C": 1
},
"flags": [
"EXEC",
"FS_READ_WORKSPACE",
"FS_READ_USER",
"FS_WRITE_WORKSPACE",
"FS_WRITE_USER",
"FS_DELETE",
"NET_EGRESS_ANY",
"CREDS_ENV",
"CREDS_FILES",
"ACT_POST_PUBLIC",
"PI_WEB"
],
"custom_flags": [
{
"code": "FILE_DELETE",
"name": "File Deletion",
"description": "Can delete files in: scripts/submit.sh"
},
{
"code": "TOOL_ABUSE",
"name": "Unauthorized Tool Use",
"description": "MCP_SYS_FILE_DESTRUCTION, MCP_SYS_CRITICAL_ACCESS: File destruction or wiping commands"
},
{
"code": "SOCIAL_ENGINEERING",
"name": "Social Engineering Risk",
"description": "SOCIAL_ENG_VAGUE_DESCRIPTION: Skill description is too vague or missing"
},
{
"code": "DATA_EXFILTRATION",
"name": "Data Exfiltration Risk",
"description": "DATA_EXFIL_NETWORK_REQUESTS, DATA_EXFIL_ENV_VARS: HTTP client library imports that enable external communication"
}
],
"confidence": {
"level": "medium",
"basis": [
"static_analysis"
],
"notes": "Detected 4 security patterns (5 vendored rule hits). Review recommended."
},
"rationale": {
"H": "H3: Shell/code execution or persistence detected",
"D": "D4: Critical: Credential theft or data exfiltration",
"A": "A3: External actions (deploy/message/post)",
"C": "C1: General content"
}
},
"containment": {
"level": "maximum",
"required": [
{
"control": "APPROVE_POST",
"reason": "Can post publicly"
},
{
"control": "SANDBOX_CONTAINER",
"reason": "Code execution capability"
}
],
"recommended": [
{
"control": "LOG_ACTIONS",
"reason": "Audit trail for all actions"
}
],
"uncontained_risk": "Risk level depends on manual review of actual capabilities."
},
"risks": {
"risks": [
{
"risk": "Unauthorized tool use: MCP_SYS_FILE_DESTRUCTION, MCP_SYS_CRITICAL_ACCESS",
"severity": "critical",
"mitigation": "Remove destructive file operations."
},
{
"risk": "Social engineering indicators: SOCIAL_ENG_VAGUE_DESCRIPTION",
"severity": "low",
"mitigation": "Provide clear, detailed description of skill functionality"
},
{
"risk": "Data exfiltration patterns: DATA_EXFIL_NETWORK_REQUESTS, DATA_EXFIL_ENV_VARS",
"severity": "medium",
"mitigation": "Ensure network access is necessary and documented"
}
],
"limitations": [
"Static analysis only - runtime behavior not verified"
]
},
"incident_response": {
"kill_switch": [
"Stop the agent process"
],
"containment": [
"Review logs for unexpected actions"
],
"recovery": [
"Depends on skill capabilities"
]
},
"evidence": [
{
"evidence_id": "EV:file-1",
"type": "file_excerpt",
"title": "scripts/collect.py",
"file_path": "scripts/collect.py"
},
{
"evidence_id": "EV:file-2",
"type": "file_excerpt",
"title": "scripts/rate.py",
"file_path": "scripts/rate.py"
},
{
"evidence_id": "EV:file-3",
"type": "file_excerpt",
"title": "scripts/submit.sh",
"file_path": "scripts/submit.sh"
},
{
"evidence_id": "EV:file-4",
"type": "file_excerpt",
"title": "scripts/test_metrics.py",
"file_path": "scripts/test_metrics.py"
},
{
"evidence_id": "EV:file-5",
"type": "file_excerpt",
"title": "SKILL.md",
"file_path": "SKILL.md"
},
{
"evidence_id": "EV:file-6",
"type": "file_excerpt",
"title": "_meta.json",
"file_path": "_meta.json"
},
{
"evidence_id": "EV:cisco-1",
"type": "file_excerpt",
"title": "DATA_EXFIL_NETWORK_REQUESTS [MEDIUM] scripts/rate.py:164: import urllib.request",
"file_path": "scripts/rate.py"
},
{
"evidence_id": "EV:cisco-2",
"type": "file_excerpt",
"title": "DATA_EXFIL_ENV_VARS [MEDIUM] scripts/rate.py:404: api_key = args.api_key or os.environ.get(\"OPENAI_API_KEY\", \"\")",
"file_path": "scripts/rate.py"
},
{
"evidence_id": "EV:cisco-3",
"type": "file_excerpt",
"title": "MCP_SYS_FILE_DESTRUCTION [CRITICAL] scripts/submit.sh:61: trap 'rm -rf \"$TMPDIR\"' EXIT",
"file_path": "scripts/submit.sh"
},
{
"evidence_id": "EV:cisco-4",
"type": "file_excerpt",
"title": "MCP_SYS_CRITICAL_ACCESS [HIGH] scripts/submit.sh:1: #!/usr/bin/env bash",
"file_path": "scripts/submit.sh"
},
{
"evidence_id": "EV:cisco-5",
"type": "file_excerpt",
"title": "SOCIAL_ENG_VAGUE_DESCRIPTION [LOW] SKILL.md:1: ---",
"file_path": "SKILL.md"
}
]
}