{
  "schema_version": "1.0",
  "artifact_scope": "public_summary",
  "disclosure": "This public artifact intentionally excludes dev-environment runtime details and working-tree provenance. Full benchmark artifact with runtime + provenance metadata remains available in the internal reviewer bundle.",
  "generated_at": "2026-05-18T13:21:42Z",
  "timestamp": "2026-05-18T13:21:42Z",
  "git": {
    "head": "979aa53b"
  },
  "runtime": {
    "python": "3.12.12"
  },
  "benchmark": {
    "type": "dated_in_repo_precision_recall_snapshot",
    "notes": [
      "This is a dated in-repo benchmark snapshot, not an external audit.",
      "The adversarial false-positive slices use case-level zero-false-positive precision because their gold labels are intentionally empty.",
      "The Saudi-name slices measure curated PERSON recall only and do not claim production-wide coverage."
    ],
    "corpora": [
      {
        "id": "policy_code",
        "kind": "false_positive",
        "gate_metric": "precision",
        "gate_min": 0.95,
        "case_count": 30
      },
      {
        "id": "operational_text",
        "kind": "false_positive",
        "gate_metric": "precision",
        "gate_min": 0.95,
        "case_count": 30
      },
      {
        "id": "structured_noise",
        "kind": "false_positive",
        "gate_metric": "precision",
        "gate_min": 0.95,
        "case_count": 30
      },
      {
        "id": "document_bodies",
        "kind": "false_positive",
        "gate_metric": "precision",
        "gate_min": 0.95,
        "case_count": 40
      },
      {
        "id": "bilingual_boilerplate",
        "kind": "false_positive",
        "gate_metric": "precision",
        "gate_min": 0.95,
        "case_count": 30
      },
      {
        "id": "numeric_ambiguity",
        "kind": "false_positive",
        "gate_metric": "precision",
        "gate_min": 0.95,
        "case_count": 30
      },
      {
        "id": "tribal_names",
        "kind": "name_recall",
        "gate_metric": "recall",
        "gate_min": 0.85,
        "case_count": 50
      },
      {
        "id": "female_names",
        "kind": "name_recall",
        "gate_metric": "recall",
        "gate_min": 0.85,
        "case_count": 40
      },
      {
        "id": "modern_names",
        "kind": "name_recall",
        "gate_metric": "recall",
        "gate_min": 0.85,
        "case_count": 30
      },
      {
        "id": "honorifics",
        "kind": "name_recall",
        "gate_metric": "recall",
        "gate_min": 0.85,
        "case_count": 20
      },
      {
        "id": "diaspora_variants",
        "kind": "name_recall",
        "gate_metric": "recall",
        "gate_min": 0.85,
        "case_count": 20
      }
    ],
    "gates": {
      "false_positive_case_precision_min": 0.95,
      "name_recall_min": 0.85
    }
  },
  "overall_pass": true,
  "summary": {
    "total_cases": 350,
    "total_expected_entities": 170,
    "total_false_positives": 2,
    "total_false_negatives": 0,
    "failed_slices": []
  },
  "entity_metrics": {
    "LOCATION": {
      "tp": 10,
      "fp": 0,
      "fn": 0,
      "precision": 1.0,
      "recall": 1.0
    },
    "PERSON": {
      "tp": 160,
      "fp": 1,
      "fn": 0,
      "precision": 0.9937888198757764,
      "recall": 1.0
    },
    "PROFESSION": {
      "tp": 0,
      "fp": 1,
      "fn": 0,
      "precision": 0.0,
      "recall": null
    }
  },
  "slice_metrics": {
    "bilingual_boilerplate": {
      "kind": "false_positive",
      "category": "bilingual_boilerplate",
      "gate_metric": "precision",
      "gate_min": 0.95,
      "case_count": 30,
      "expected_total": 0,
      "tp_total": 0,
      "fp_total": 0,
      "fn_total": 0,
      "false_positive_case_count": 0,
      "false_negative_case_count": 0,
      "precision": 1.0,
      "precision_definition": "case_level_zero_false_positive_rate",
      "passed": true
    },
    "diaspora_variants": {
      "kind": "name_recall",
      "category": "diaspora_variants",
      "gate_metric": "recall",
      "gate_min": 0.85,
      "case_count": 20,
      "expected_total": 20,
      "tp_total": 20,
      "fp_total": 0,
      "fn_total": 0,
      "false_positive_case_count": 0,
      "false_negative_case_count": 0,
      "recall": 1.0,
      "passed": true
    },
    "document_bodies": {
      "kind": "false_positive",
      "category": "document_bodies",
      "gate_metric": "precision",
      "gate_min": 0.95,
      "case_count": 40,
      "expected_total": 0,
      "tp_total": 0,
      "fp_total": 0,
      "fn_total": 0,
      "false_positive_case_count": 0,
      "false_negative_case_count": 0,
      "precision": 1.0,
      "precision_definition": "case_level_zero_false_positive_rate",
      "passed": true
    },
    "female_names": {
      "kind": "name_recall",
      "category": "female_names",
      "gate_metric": "recall",
      "gate_min": 0.85,
      "case_count": 40,
      "expected_total": 42,
      "tp_total": 42,
      "fp_total": 0,
      "fn_total": 0,
      "false_positive_case_count": 0,
      "false_negative_case_count": 0,
      "recall": 1.0,
      "passed": true
    },
    "honorifics": {
      "kind": "name_recall",
      "category": "honorifics",
      "gate_metric": "recall",
      "gate_min": 0.85,
      "case_count": 20,
      "expected_total": 20,
      "tp_total": 20,
      "fp_total": 1,
      "fn_total": 0,
      "false_positive_case_count": 1,
      "false_negative_case_count": 0,
      "recall": 1.0,
      "passed": true
    },
    "modern_names": {
      "kind": "name_recall",
      "category": "modern_names",
      "gate_metric": "recall",
      "gate_min": 0.85,
      "case_count": 30,
      "expected_total": 32,
      "tp_total": 32,
      "fp_total": 0,
      "fn_total": 0,
      "false_positive_case_count": 0,
      "false_negative_case_count": 0,
      "recall": 1.0,
      "passed": true
    },
    "numeric_ambiguity": {
      "kind": "false_positive",
      "category": "numeric_ambiguity",
      "gate_metric": "precision",
      "gate_min": 0.95,
      "case_count": 30,
      "expected_total": 0,
      "tp_total": 0,
      "fp_total": 0,
      "fn_total": 0,
      "false_positive_case_count": 0,
      "false_negative_case_count": 0,
      "precision": 1.0,
      "precision_definition": "case_level_zero_false_positive_rate",
      "passed": true
    },
    "operational_text": {
      "kind": "false_positive",
      "category": "operational_text",
      "gate_metric": "precision",
      "gate_min": 0.95,
      "case_count": 30,
      "expected_total": 0,
      "tp_total": 0,
      "fp_total": 0,
      "fn_total": 0,
      "false_positive_case_count": 0,
      "false_negative_case_count": 0,
      "precision": 1.0,
      "precision_definition": "case_level_zero_false_positive_rate",
      "passed": true
    },
    "policy_code": {
      "kind": "false_positive",
      "category": "policy_code",
      "gate_metric": "precision",
      "gate_min": 0.95,
      "case_count": 30,
      "expected_total": 0,
      "tp_total": 0,
      "fp_total": 0,
      "fn_total": 0,
      "false_positive_case_count": 0,
      "false_negative_case_count": 0,
      "precision": 1.0,
      "precision_definition": "case_level_zero_false_positive_rate",
      "passed": true
    },
    "structured_noise": {
      "kind": "false_positive",
      "category": "structured_noise",
      "gate_metric": "precision",
      "gate_min": 0.95,
      "case_count": 30,
      "expected_total": 0,
      "tp_total": 0,
      "fp_total": 0,
      "fn_total": 0,
      "false_positive_case_count": 0,
      "false_negative_case_count": 0,
      "precision": 1.0,
      "precision_definition": "case_level_zero_false_positive_rate",
      "passed": true
    },
    "tribal_names": {
      "kind": "name_recall",
      "category": "tribal_names",
      "gate_metric": "recall",
      "gate_min": 0.85,
      "case_count": 50,
      "expected_total": 56,
      "tp_total": 56,
      "fp_total": 1,
      "fn_total": 0,
      "false_positive_case_count": 1,
      "false_negative_case_count": 0,
      "recall": 1.0,
      "passed": true
    }
  },
  "diagnostics": {
    "false_positives": [
      {
        "case_id": "tribal-ar-011",
        "slice_id": "tribal_names",
        "entity_type": "PROFESSION",
        "start": 12,
        "end": 16,
        "text": "جهّز"
      },
      {
        "case_id": "honorific-019",
        "slice_id": "honorifics",
        "entity_type": "PERSON",
        "start": 40,
        "end": 46,
        "text": "الليلي"
      }
    ],
    "false_negatives": []
  }
}
