{
  "slug": "tennessee-eastman-process-2017",
  "title": "Additional Tennessee Eastman Process Simulation Data",
  "canonical_source_url": "https://doi.org/10.7910/DVN/6C3JR1",
  "provided_walkthrough_url": "https://medium.com/@mrunal68/tennessee-eastman-process-simulation-data-for-anomaly-detection-evaluation-d719dc133a7f",
  "original_benchmark_paper_url": "https://doi.org/10.1016/0098-1354(93)80018-I",
  "introducing_source": "wiki/sources/tennessee-eastman-process-2017.md",
  "dataset_type": "synthetic multivariate industrial process-control time series for anomaly detection and fault diagnosis",
  "domain": "chemical process control and industrial control systems",
  "temporal_structure": "RData dataframes with fault-free and faulty training/testing runs; each run is sampled every 3 minutes; training runs have 500 samples over 25 hours and testing runs have 960 samples over 48 hours",
  "data_structure": {
    "files": [
      "TEP_FaultFree_Training.RData",
      "TEP_FaultFree_Testing.RData",
      "TEP_Faulty_Training.RData",
      "TEP_Faulty_Testing.RData"
    ],
    "columns": [
      "faultNumber",
      "simulationRun",
      "sample",
      "52 process variables"
    ],
    "process_variable_groups": [
      "measured variables",
      "manipulated variables"
    ],
    "faults": "faultNumber 0 for normal operation and faults 1 through 20 for faulty runs"
  },
  "reported_scale": {
    "process_variables": 52,
    "states": 21,
    "fault_types": 20,
    "simulation_runs_per_fault_split": 500,
    "training_samples_per_run": 500,
    "testing_samples_per_run": 960,
    "sampling_period_minutes": 3,
    "fdbenchmark_rieth_tep_size": "1.84 GB"
  },
  "actions_or_interventions": "contains manipulated process variables that can be treated as control-input channels for next-state modeling, but the public Rieth et al. dataset is primarily an anomaly/fault benchmark; fault labels and fault injections are events or benchmark conditions, not logged operator remediation actions",
  "tasks": [
    "anomaly detection",
    "fault diagnosis",
    "fault classification",
    "industrial process monitoring",
    "control-input-conditioned next-state modeling with careful preprocessing"
  ],
  "action_conditioned_world_model_fit": "useful but not clean offline RL; measured variables plus manipulated variables can support action-conditioned transition modeling, while fault scenarios test robustness under exogenous disturbances and abnormal regimes",
  "known_limitations": [
    "synthetic simulator data rather than live plant data",
    "fault-detection benchmark framing rather than policy-learning benchmark framing",
    "direct Harvard Dataverse landing page required JavaScript verification in this environment",
    "license should be rechecked on the canonical Dataverse page before reuse"
  ],
  "license_note": "downstream references sometimes report CC0/Public Domain for the Harvard Dataverse artifact, but the direct landing page could not be verified in this environment; treat reuse terms as needing confirmation",
  "access_note": "canonical DOI points to Harvard Dataverse; this knowledge base records metadata only and does not mirror RData payloads",
  "created": "2026-06-09",
  "updated": "2026-06-09"
}
