{
  "slug": "neorl2-2025",
  "title": "NeoRL-2",
  "canonical_source_url": "https://github.com/polixir/NeoRL2",
  "official_dataset_url": "https://huggingface.co/datasets/polixirai/NeoRL2",
  "introducing_source_url": "https://arxiv.org/abs/2503.19267",
  "introducing_source": "wiki/sources/neorl2-2025.md",
  "dataset_type": "near-real-world offline reinforcement-learning benchmark with datasets and evaluation simulators",
  "domain": "offline reinforcement learning across industrial, healthcare, robotics, aircraft, nuclear-fusion, and safety-constrained control tasks",
  "temporal_structure": "trajectory transitions with current observation, action, reward, next observation, terminal/done flag, and trajectory index or split metadata",
  "collection_policy": "online training with reinforcement-learning algorithms or PID policies, followed by selection of suboptimal policies with returns from 50% to 80% of expert return to generate conservative offline datasets",
  "data_structure": {
    "github_dataset_fields": [
      "obs",
      "next_obs",
      "action",
      "reward",
      "done",
      "index"
    ],
    "huggingface_dataset_fields": [
      "observations",
      "actions",
      "rewards",
      "next_observations",
      "terminals"
    ],
    "paper_tasks": [
      "Pipeline",
      "Simglucose",
      "RocketRecovery",
      "RandomFrictionHopper",
      "DMSD",
      "Fusion",
      "SafetyHalfCheetah"
    ]
  },
  "task_shapes": {
    "Pipeline": {
      "observation_shape": 52,
      "action_shape": 1,
      "has_done": false,
      "max_timesteps": 1000
    },
    "Simglucose": {
      "observation_shape": 31,
      "action_shape": 1,
      "has_done": true,
      "max_timesteps": 480
    },
    "RocketRecovery": {
      "observation_shape": 7,
      "action_shape": 2,
      "has_done": true,
      "max_timesteps": 500
    },
    "RandomFrictionHopper": {
      "observation_shape": 13,
      "action_shape": 3,
      "has_done": true,
      "max_timesteps": 1000
    },
    "DMSD": {
      "observation_shape": 6,
      "action_shape": 2,
      "has_done": false,
      "max_timesteps": 100
    },
    "Fusion": {
      "observation_shape": 15,
      "action_shape": 6,
      "has_done": false,
      "max_timesteps": 100
    },
    "SafetyHalfCheetah": {
      "observation_shape": 18,
      "action_shape": 6,
      "has_done": false,
      "max_timesteps": 1000
    }
  },
  "benchmark_challenges": [
    "delay",
    "external factors",
    "global safety constraints",
    "rule-based behavior policy",
    "severely limited data"
  ],
  "reported_scale": {
    "paper_tasks": 7,
    "huggingface_rows": 980848,
    "huggingface_total_file_size": "130 MB",
    "typical_task_size": "about 100k transitions, with Fusion, RocketRecovery, and SafetyHalfCheetah smaller by design"
  },
  "actions_or_interventions": "explicit continuous action channels in offline RL transition data; this is a clean action-conditioned trajectory benchmark with rewards and evaluation simulators",
  "action_conditioned_world_model_fit": "Tier 1 fit for non-vision action-conditioned dynamics, especially for testing delays, exogenous factors, safety constraints, conservative behavior policies, limited data, and model-based offline RL brittleness",
  "known_limitations": [
    "simulated tasks are designed to reflect real-world challenges but are not direct business data",
    "paper reports that current baselines often fail to significantly outperform the data-collection behavior policy",
    "Hugging Face config metadata lists Salespromotion and Simglucose-high in addition to the seven paper/GitHub tasks",
    "license metadata conflicts between GitHub README and Hugging Face frontmatter"
  ],
  "license_note": "GitHub README says all datasets are CC BY 4.0 and code is Apache 2.0; Hugging Face frontmatter marks the dataset repository as apache-2.0, so reuse terms should be pinned from the intended artifact",
  "access_note": "public GitHub interface and Hugging Face dataset; this knowledge base records metadata only and does not mirror parquet payloads or simulators",
  "created": "2026-06-09",
  "updated": "2026-06-09"
}