Explore Prediction Bundles

This notebook loads pickled hierarchical predictors and allows you to explore the prediction bundles that were created.

import pickle
from pathlib import Path
import pandas as pd
import numpy as np

from patientflow.predict.hierarchy import (
    HierarchicalPredictor,
    FlowSelection,
    create_hierarchical_predictor,
)
from patientflow.predict.subspecialty import SubspecialtyPredictionInputs

Load Pickled Data

Update the path below to point to your pickle file.

# Update this path to your pickle file
pickle_path = "/Users/zellaking/Downloads/hierarchical_predictors_with_inputs.pkl"

with open(pickle_path, 'rb') as f:
    pickled_data = pickle.load(f)

print(f"Loaded pickle file: {pickle_path}")
print(f"Keys in pickle: {list(pickled_data.keys())}")

Loaded pickle file: /Users/zellaking/Downloads/hierarchical_predictors_with_inputs.pkl
Keys in pickle: ['all', 'elective', 'emergency']


/var/folders/lr/pm79dxzs0v70y4gz98dl13440000gn/T/ipykernel_12431/3782410358.py:5: DeprecationWarning: numpy.core.numeric is deprecated and has been renamed to numpy._core.numeric. The numpy._core namespace contains private NumPy internals and its use is discouraged, as NumPy internals can change without warning in any release. In practice, most real-world usage of numpy.core is to access functionality in the public NumPy API. If that is the case, use the public NumPy API. If not, you are using NumPy internals. If you would still like to access an internal attribute, use numpy._core.numeric._frombuffer.
  pickled_data = pickle.load(f)

Extract Data from 'all' Entry

if 'all' not in pickled_data:
    raise ValueError(f"Pickled data must contain 'all' key. Available keys: {list(pickled_data.keys())}")

all_data = pickled_data['all']

# Extract all components
predictor = all_data['predictor']
subspecialty_data = all_data['subspecialty_data']
hierarchy_df = all_data['hierarchy_df']
column_mapping = all_data['column_mapping']
top_level_id = all_data['top_level_id']

print(f"Number of subspecialties: {len(subspecialty_data)}")
print(f"Subspecialty IDs: {list(subspecialty_data.keys())[:10]}..." if len(subspecialty_data) > 10 else f"Subspecialty IDs: {list(subspecialty_data.keys())}")
print(f"Top level ID: {top_level_id}")
print(f"\nHierarchy DataFrame shape: {hierarchy_df.shape}")
print(f"Column mapping: {column_mapping}")

Number of subspecialties: 291
Subspecialty IDs: ['Oncology - Gynae - Clinical', 'Neurology - Stroke ASU', 'Neurosurgery - Epilepsy', 'Haematology - BMT - Treatment', 'CYPPS - Psychiatry', 'Neurosurgery - Stereotactic Functional', 'Neurosurgery - Gamma Knife', 'Paediatric - Respiratory Medicine', 'Infectious Diseases - COVID-19 Medicines Delivery', 'Neurosurgery - Vascular']...
Top level ID: uclh

Hierarchy DataFrame shape: (290, 4)
Column mapping: {'sub_specialty': 'subspecialty', 'reporting_unit': 'reporting_unit', 'division': 'division', 'board': 'board'}

Inspect Hierarchy DataFrame

hierarchy_df.head(10)

	sub_specialty	reporting_unit	division	board
0	Neurology - Upper Limb	Rehab & Stroke	Queen Square	Specialist Hospitals Board
2	Oncology - Head and Neck - Clinical	Oncology	Cancer Services	Cancer & Surgery Board
3	Oncology - Radiotherapy	Oncology	Cancer Services	Cancer & Surgery Board
4	Neurology - Spasticity	Rehab & Stroke	Queen Square	Specialist Hospitals Board
5	Neurology - Inherited Metabolic Diseases	Specialist Services	Queen Square	Specialist Hospitals Board
6	Haematology - ITP	Haematology	Cancer Services	Cancer & Surgery Board
7	ENT - Ears	Adult ENT	RNENT & EDH	Specialist Hospitals Board
8	Paediatric - Proton Beam Therapy	Children & Young Peoples Cancer	Children and Young People	Specialist Hospitals Board
9	Paediatric - Surgery	Paediatric Surgery	Children and Young People	Specialist Hospitals Board
11	Orthopaedics - Upper Limb	Trauma & Orthopaedics	Surgical Specialties	Cancer & Surgery Board

print(f"Hierarchy levels:")
for col in hierarchy_df.columns:
    unique_count = hierarchy_df[col].nunique()
    print(f"  {col}: {unique_count} unique values")

Hierarchy levels:
  sub_specialty: 290 unique values
  reporting_unit: 55 unique values
  division: 14 unique values
  board: 3 unique values

Inspect Subspecialty Prediction Inputs

# Look at one subspecialty's inputs
first_spec_id = list(subspecialty_data.keys())[0]
first_spec_inputs = subspecialty_data[first_spec_id]

print(f"Example subspecialty: {first_spec_id}")
print(f"\n{first_spec_inputs}")

Example subspecialty: Oncology - Gynae - Clinical

SubspecialtyPredictionInputs(subspecialty='Oncology - Gynae - Clinical')
  INFLOWS:
    Admissions from current ED               PMF[0:10]: [0.970, 0.030, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000] (E=0.0 of 27 patients in ED)
    ED yet-to-arrive admissions              λ = 0.014
    Non-ED emergency admissions              λ = 0.000
    Elective admissions                      λ = 0.075
    Elective transfers from other subspecialties PMF[0:2]: [1.000, 0.000] (E=0.0)
    Emergency transfers from other subspecialties PMF[0:7]: [0.957, 0.042, 0.001, 0.000, 0.000, 0.000, 0.000] (E=0.0)
  OUTFLOWS:
    Emergency inpatient departures           PMF[0:2]: [0.905, 0.095] (E=0.1 of 1 emergency patients in subspec)
    Elective inpatient departures            PMF[0:2]: [1.000, 0.000] (E=0.0 of 1 elective patients in subspec)

# Inspect inflows for the first subspecialty
print(f"Inflows for {first_spec_id}:")
for flow_id, flow_input in first_spec_inputs.inflows.items():
    if flow_input.flow_type == 'pmf':
        pmf = flow_input.distribution
        print(f"  {flow_id}: PMF with {len(pmf)} elements, sum={pmf.sum():.4f}, mean={np.sum(pmf * np.arange(len(pmf))):.4f}")
    elif flow_input.flow_type == 'poisson':
        lam = flow_input.distribution
        print(f"  {flow_id}: Poisson(λ={lam:.4f})")

Inflows for Oncology - Gynae - Clinical:
  ed_current: PMF with 28 elements, sum=1.0000, mean=0.0303
  ed_yta: Poisson(λ=0.0141)
  non_ed_yta: Poisson(λ=0.0000)
  elective_yta: Poisson(λ=0.0755)
  elective_transfers: PMF with 2 elements, sum=1.0000, mean=0.0000
  emergency_transfers: PMF with 7 elements, sum=1.0000, mean=0.0436

# Inspect outflows for the first subspecialty
print(f"Outflows for {first_spec_id}:")
for flow_id, flow_input in first_spec_inputs.outflows.items():
    if flow_input.flow_type == 'pmf':
        pmf = flow_input.distribution
        print(f"  {flow_id}: PMF with {len(pmf)} elements, sum={pmf.sum():.4f}, mean={np.sum(pmf * np.arange(len(pmf))):.4f}")
    elif flow_input.flow_type == 'poisson':
        lam = flow_input.distribution
        print(f"  {flow_id}: Poisson(λ={lam:.4f})")

Outflows for Oncology - Gynae - Clinical:
  emergency_departures: PMF with 2 elements, sum=1.0000, mean=0.0953
  elective_departures: PMF with 2 elements, sum=1.0000, mean=0.0000

Inspect Existing Prediction Bundles (from Pickled Predictor)

# Get flow selection from the pickled predictor
if predictor.cache:
    first_bundle = next(iter(predictor.cache.values()))
    flow_selection = first_bundle.flow_selection
    print(f"Flow selection from pickled predictor: {flow_selection.cohort}")
    print(f"\nCached bundles in predictor: {len(predictor.cache)} entities")
    print(f"\nEntity IDs in cache:")
    for entity_id in list(predictor.cache.keys())[:20]:
        print(f"  {entity_id}")
    if len(predictor.cache) > 20:
        print(f"  ... and {len(predictor.cache) - 20} more")

Flow selection from pickled predictor: all

Cached bundles in predictor: 364 entities

Entity IDs in cache:
  subspecialty:Oncology - Gynae - Clinical
  subspecialty:Neurology - Stroke ASU
  subspecialty:Neurosurgery - Epilepsy
  subspecialty:Haematology - BMT - Treatment
  subspecialty:CYPPS - Psychiatry
  subspecialty:Neurosurgery - Stereotactic Functional
  subspecialty:Neurosurgery - Gamma Knife
  subspecialty:Paediatric - Respiratory Medicine
  subspecialty:Infectious Diseases - COVID-19 Medicines Delivery
  subspecialty:Neurosurgery - Vascular
  subspecialty:Gastroenterology - Adolescent
  subspecialty:Gynaecology - Endometriosis
  subspecialty:Neurology - Neuro Ophthalmology
  subspecialty:Respiratory - Lung Cancer
  subspecialty:Obstetrics - Still Birth
  subspecialty:Neurology - Autonomics
  subspecialty:Urology - Stones
  subspecialty:Neurosurgery - General Cranial
  subspecialty:Gynaecology - Urogynaecology
  subspecialty:Oncology - Interventional Oncology Service
  ... and 344 more

# Inspect a specific prediction bundle from the cache
# You can modify this to look at different entities
entity_to_inspect = list(predictor.cache.keys())[0] if predictor.cache else None

if entity_to_inspect:
    bundle = predictor.cache[entity_to_inspect]
    print(f"\nPrediction Bundle for: {entity_to_inspect}")
    print(f"Entity ID: {bundle.entity_id}")
    print(f"Entity Type: {bundle.entity_type}")
    print(f"\n{bundle}")

Prediction Bundle for: subspecialty:Oncology - Gynae - Clinical
Entity ID: Oncology - Gynae - Clinical
Entity Type: subspecialty

PredictionBundle(subspecialty: Oncology - Gynae - Clinical)
  Arrivals:    PMF[0:10]: [0.849, 0.140, 0.011, 0.001, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000] (E=0.2)
  Departures:  PMF[0:3]: [0.905, 0.095, 0.000] (E=0.1)
  Net flow:    PMF[-2:6]: [0.000, 0.081, 0.781, 0.127, 0.010, 0.001, 0.000, 0.000] (E=0.1)
  Flows:       selection cohort=all inflows(ed_current=True, ed_yta=True, non_ed_yta=True, elective_yta=True, transfers_in=True) outflows(departures=True)

Create New Predictions with Different k_sigma Values

You can create new predictions with different k_sigma values to see how they affect the results.

# Choose a k_sigma value to test
k_sigma = 8.0

# Create a new predictor with this k_sigma
new_predictor = create_hierarchical_predictor(
    hierarchy_df,
    column_mapping,
    top_level_id,
    k_sigma=k_sigma,
)

print(f"Created new predictor with k_sigma={k_sigma}")

Created new predictor with k_sigma=8.0

# Generate predictions with the new k_sigma
# Use the flow_selection from the pickled predictor
new_results = new_predictor.predict_all_levels(
    subspecialty_data,
    flow_selection=flow_selection
)

print(f"Generated {len(new_results)} prediction bundles")
print(f"\nEntity IDs in results:")
for entity_id in list(new_results.keys())[:20]:
    print(f"  {entity_id}")
if len(new_results) > 20:
    print(f"  ... and {len(new_results) - 20} more")

Generated 364 prediction bundles

Entity IDs in results:
  subspecialty:Oncology - Gynae - Clinical
  subspecialty:Neurology - Stroke ASU
  subspecialty:Neurosurgery - Epilepsy
  subspecialty:Haematology - BMT - Treatment
  subspecialty:CYPPS - Psychiatry
  subspecialty:Neurosurgery - Stereotactic Functional
  subspecialty:Neurosurgery - Gamma Knife
  subspecialty:Paediatric - Respiratory Medicine
  subspecialty:Infectious Diseases - COVID-19 Medicines Delivery
  subspecialty:Neurosurgery - Vascular
  subspecialty:Gastroenterology - Adolescent
  subspecialty:Gynaecology - Endometriosis
  subspecialty:Neurology - Neuro Ophthalmology
  subspecialty:Respiratory - Lung Cancer
  subspecialty:Obstetrics - Still Birth
  subspecialty:Neurology - Autonomics
  subspecialty:Urology - Stones
  subspecialty:Neurosurgery - General Cranial
  subspecialty:Gynaecology - Urogynaecology
  subspecialty:Oncology - Interventional Oncology Service
  ... and 344 more

# Inspect a specific prediction bundle from the new results
# Try different entity IDs to explore
entity_to_inspect = list(new_results.keys())[0]

bundle = new_results[entity_to_inspect]
print(f"\nPrediction Bundle for: {entity_to_inspect}")
print(f"\n{bundle}")

Prediction Bundle for: subspecialty:Oncology - Gynae - Clinical

PredictionBundle(subspecialty: Oncology - Gynae - Clinical)
  Arrivals:    PMF[0:10]: [0.849, 0.140, 0.011, 0.001, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000] (E=0.2)
  Departures:  PMF[0:3]: [0.905, 0.095, 0.000] (E=0.1)
  Net flow:    PMF[-2:8]: [0.000, 0.081, 0.781, 0.128, 0.010, 0.000, 0.000, 0.000, 0.000, 0.000] (E=0.1)
  Flows:       selection cohort=all inflows(ed_current=True, ed_yta=True, non_ed_yta=True, elective_yta=True, transfers_in=True) outflows(departures=True)

# Compare PMF sizes for top-level entity
top_level_key = f"hospital:{top_level_id}" if f"hospital:{top_level_id}" in new_results else None
if not top_level_key:
    # Try to find top level by checking entity types
    for key, bundle in new_results.items():
        if bundle.entity_type == 'hospital':
            top_level_key = key
            break

if top_level_key:
    bundle = new_results[top_level_key]
    print(f"Top-level bundle: {top_level_key}")
    print(f"\nArrivals PMF length: {len(bundle.arrivals.probabilities)}")
    print(f"Arrivals expected value: {bundle.arrivals.expected_value:.2f}")
    print(f"\nDepartures PMF length: {len(bundle.departures.probabilities)}")
    print(f"Departures expected value: {bundle.departures.expected_value:.2f}")
    print(f"\nNet flow PMF length: {len(bundle.net_flow.probabilities)}")
    print(f"Net flow expected value: {bundle.net_flow.expected_value:.2f}")
    print(f"Net flow offset: {bundle.net_flow.offset}")

    # Show percentiles
    print(f"\nArrivals percentiles: {bundle.arrivals.percentiles}")
    print(f"Departures percentiles: {bundle.departures.percentiles}")
    print(f"Net flow percentiles: {bundle.net_flow.percentiles}")
else:
    print("Could not find top-level entity")

Top-level bundle: hospital:uclh

Arrivals PMF length: 189
Arrivals expected value: 106.17

Departures PMF length: 144
Departures expected value: 86.42

Net flow PMF length: 332
Net flow expected value: 19.75
Net flow offset: -143

Arrivals percentiles: {50: 106, 75: 113, 90: 119, 95: 123, 99: 130}
Departures percentiles: {50: 86, 75: 91, 90: 95, 95: 98, 99: 103}
Net flow percentiles: {50: 20, 75: 28, 90: 36, 95: 40, 99: 49}

Compare Different k_sigma Values

Compare how different k_sigma values affect PMF sizes and predictions.

# Test multiple k_sigma values
k_sigma_values = [4.0, 6.0, 8.0, 10.0, 12.0]
comparison_results = {}

for k_sigma in k_sigma_values:
    predictor = create_hierarchical_predictor(
        hierarchy_df,
        column_mapping,
        top_level_id,
        k_sigma=k_sigma,
    )

    results = predictor.predict_all_levels(
        subspecialty_data,
        flow_selection=flow_selection
    )

    # Find top-level bundle
    top_bundle = None
    for key, bundle in results.items():
        if bundle.entity_type == 'hospital':
            top_bundle = bundle
            break

    if top_bundle:
        comparison_results[k_sigma] = {
            'arrivals_pmf_len': len(top_bundle.arrivals.probabilities),
            'departures_pmf_len': len(top_bundle.departures.probabilities),
            'net_flow_pmf_len': len(top_bundle.net_flow.probabilities),
            'arrivals_expected': top_bundle.arrivals.expected_value,
            'departures_expected': top_bundle.departures.expected_value,
            'net_flow_expected': top_bundle.net_flow.expected_value,
        }

# Display comparison
print(f"{'k_sigma':<10} {'Arr PMF':<12} {'Dep PMF':<12} {'Net PMF':<12} {'Arr E[X]':<12} {'Dep E[X]':<12} {'Net E[X]':<12}")
print("-" * 80)
for k_sigma in sorted(comparison_results.keys()):
    r = comparison_results[k_sigma]
    print(f"{k_sigma:<10.1f} {r['arrivals_pmf_len']:<12} {r['departures_pmf_len']:<12} {r['net_flow_pmf_len']:<12} "
          f"{r['arrivals_expected']:<12.2f} {r['departures_expected']:<12.2f} {r['net_flow_expected']:<12.2f}")

k_sigma    Arr PMF      Dep PMF      Net PMF      Arr E[X]     Dep E[X]     Net E[X]    
--------------------------------------------------------------------------------
4.0        148          116          263          106.09       86.42        19.67       
6.0        169          130          298          106.16       86.42        19.74       
8.0        189          144          332          106.17       86.42        19.75       
10.0       209          158          366          106.17       86.42        19.75       
12.0       230          172          401          106.17       86.42        19.75

Explore Specific Subspecialty Predictions

Verify Net Flow Bounds Based on k_sigma

Check that net flow bounds are correct: net flow should range from [-max_departures, +max_arrivals] where the max values come from the capped arrivals and departures distributions.

def verify_net_flow_bounds(bundle):
    """Verify that net flow bounds are correct based on arrivals and departures.

    Net flow should have:
    - Offset = -(max_departures + departures.offset)
    - Support range: [-(max_departures + dep_offset), +(max_arrivals + arr_offset)]
    - PMF length = (max_arrivals + arr_offset) + (max_departures + dep_offset) + 1

    Where:
    - max_arrivals = len(arrivals.probabilities) - 1
    - max_departures = len(departures.probabilities) - 1
    - arr_offset = arrivals.offset (typically 0)
    - dep_offset = departures.offset (typically 0)

    Returns a dictionary with verification results.
    """
    arrivals = bundle.arrivals
    departures = bundle.departures
    net_flow = bundle.net_flow

    # Calculate max values (accounting for offsets)
    arr_offset = arrivals.offset
    dep_offset = departures.offset
    max_arrivals_idx = len(arrivals.probabilities) - 1
    max_departures_idx = len(departures.probabilities) - 1

    # Maximum actual values (not indices)
    max_arrivals_value = arr_offset + max_arrivals_idx
    max_departures_value = dep_offset + max_departures_idx

    # Expected net flow bounds
    # Net flow = arrivals - departures
    # Minimum: min_arrivals - max_departures = arr_offset - max_departures_value
    # Maximum: max_arrivals - min_departures = max_arrivals_value - dep_offset
    expected_min_value = arr_offset - max_departures_value
    expected_max_value = max_arrivals_value - dep_offset
    expected_offset = expected_min_value
    expected_pmf_length = expected_max_value - expected_min_value + 1

    # Actual values
    actual_offset = net_flow.offset
    actual_min_value = net_flow.offset
    actual_max_value = net_flow.offset + len(net_flow.probabilities) - 1
    actual_pmf_length = len(net_flow.probabilities)

    # Verify
    offset_correct = actual_offset == expected_offset
    min_value_correct = actual_min_value == expected_min_value
    max_value_correct = actual_max_value == expected_max_value
    pmf_length_correct = actual_pmf_length == expected_pmf_length

    all_correct = offset_correct and min_value_correct and max_value_correct and pmf_length_correct

    return {
        'entity_id': bundle.entity_id,
        'entity_type': bundle.entity_type,
        'arr_offset': arr_offset,
        'dep_offset': dep_offset,
        'max_arrivals_idx': max_arrivals_idx,
        'max_departures_idx': max_departures_idx,
        'max_arrivals_value': max_arrivals_value,
        'max_departures_value': max_departures_value,
        'expected_offset': expected_offset,
        'actual_offset': actual_offset,
        'offset_correct': offset_correct,
        'expected_min_value': expected_min_value,
        'actual_min_value': actual_min_value,
        'min_value_correct': min_value_correct,
        'expected_max_value': expected_max_value,
        'actual_max_value': actual_max_value,
        'max_value_correct': max_value_correct,
        'expected_pmf_length': expected_pmf_length,
        'actual_pmf_length': actual_pmf_length,
        'pmf_length_correct': pmf_length_correct,
        'all_correct': all_correct,
    }

# Verify bounds for a specific bundle
if 'new_results' in locals() and new_results:
    bundle_to_check = list(new_results.values())[0]
    verification = verify_net_flow_bounds(bundle_to_check)

    print(f"Verification for: {verification['entity_id']} ({verification['entity_type']})")
    print(f"\nArrivals:")
    print(f"  Offset: {verification['arr_offset']}, Max index: {verification['max_arrivals_idx']}, Max value: {verification['max_arrivals_value']}")
    print(f"\nDepartures:")
    print(f"  Offset: {verification['dep_offset']}, Max index: {verification['max_departures_idx']}, Max value: {verification['max_departures_value']}")
    print(f"\nNet Flow Bounds:")
    print(f"  Expected offset: {verification['expected_offset']}, Actual: {verification['actual_offset']} {'✓' if verification['offset_correct'] else '✗'}")
    print(f"  Expected min value: {verification['expected_min_value']}, Actual: {verification['actual_min_value']} {'✓' if verification['min_value_correct'] else '✗'}")
    print(f"  Expected max value: {verification['expected_max_value']}, Actual: {verification['actual_max_value']} {'✓' if verification['max_value_correct'] else '✗'}")
    print(f"  Expected PMF length: {verification['expected_pmf_length']}, Actual: {verification['actual_pmf_length']} {'✓' if verification['pmf_length_correct'] else '✗'}")
    print(f"\n{'All checks passed!' if verification['all_correct'] else 'Some checks failed!'}")
else:
    print("Run the cells above to generate predictions first.")

Verification for: Oncology - Gynae - Clinical (subspecialty)

Arrivals:
  Offset: 0, Max index: 38, Max value: 38

Departures:
  Offset: 0, Max index: 2, Max value: 2

Net Flow Bounds:
  Expected offset: -2, Actual: -2 ✓
  Expected min value: -2, Actual: -2 ✓
  Expected max value: 38, Actual: 38 ✓
  Expected PMF length: 41, Actual: 41 ✓

All checks passed!

# Verify bounds across all entities and different k_sigma values
k_sigma_test_values = [4.0, 6.0, 8.0, 10.0, 12.0, 50.0, 100.0]
verification_results = {}

for k_sigma in k_sigma_test_values:
    predictor = create_hierarchical_predictor(
        hierarchy_df,
        column_mapping,
        top_level_id,
        k_sigma=k_sigma,
    )

    results = predictor.predict_all_levels(
        subspecialty_data,
        flow_selection=flow_selection
    )

    # Verify all bundles
    entity_verifications = {}
    for entity_id, bundle in results.items():
        verification = verify_net_flow_bounds(bundle)
        entity_verifications[entity_id] = verification

    verification_results[k_sigma] = entity_verifications

# Summary: Check how many entities pass verification for each k_sigma
print("Verification Summary:")
print(f"{'k_sigma':<10} {'Total Entities':<15} {'All Correct':<15} {'Failed':<15}")
print("-" * 60)

for k_sigma in sorted(verification_results.keys()):
    verifications = verification_results[k_sigma]
    total = len(verifications)
    all_correct = sum(1 for v in verifications.values() if v['all_correct'])
    failed = total - all_correct
    print(f"{k_sigma:<10.1f} {total:<15} {all_correct:<15} {failed:<15}")

# Show any failures
print("\n" + "="*60)
print("Detailed Failures (if any):")
print("="*60)

for k_sigma in sorted(verification_results.keys()):
    verifications = verification_results[k_sigma]
    failures = [(eid, v) for eid, v in verifications.items() if not v['all_correct']]
    if failures:
        print(f"\nk_sigma = {k_sigma}:")
        for entity_id, v in failures:
            print(f"  {entity_id}:")
            if not v['offset_correct']:
                print(f"    Offset: expected {v['expected_offset']}, got {v['actual_offset']}")
            if not v['min_value_correct']:
                print(f"    Min value: expected {v['expected_min_value']}, got {v['actual_min_value']}")
            if not v['max_value_correct']:
                print(f"    Max value: expected {v['expected_max_value']}, got {v['actual_max_value']}")
            if not v['pmf_length_correct']:
                print(f"    PMF length: expected {v['expected_pmf_length']}, got {v['actual_pmf_length']}")
    else:
        print(f"\nk_sigma = {k_sigma}: All entities passed verification ✓")

Verification Summary:
k_sigma    Total Entities  All Correct     Failed         
------------------------------------------------------------
4.0        364             364             0              
6.0        364             364             0              
8.0        364             364             0              
10.0       364             364             0              
12.0       364             364             0              
50.0       364             364             0              
100.0      364             364             0

============================================================
Detailed Failures (if any):
============================================================

k_sigma = 4.0: All entities passed verification ✓

k_sigma = 6.0: All entities passed verification ✓

k_sigma = 8.0: All entities passed verification ✓

k_sigma = 10.0: All entities passed verification ✓

k_sigma = 12.0: All entities passed verification ✓

k_sigma = 50.0: All entities passed verification ✓

k_sigma = 100.0: All entities passed verification ✓

# Show how k_sigma affects net flow bounds for top-level entity
print("How k_sigma affects net flow bounds (top-level entity):")
print(f"{'k_sigma':<10} {'Max Arr Val':<12} {'Max Dep Val':<12} {'Net Min':<10} {'Net Max':<10} {'Net PMF Len':<12} {'Status':<10}")
print("-" * 80)

for k_sigma in sorted(verification_results.keys()):
    verifications = verification_results[k_sigma]

    # Find top-level entity
    top_verification = None
    for entity_id, v in verifications.items():
        if v['entity_type'] == 'hospital':
            top_verification = v
            break

    if top_verification:
        v = top_verification
        status = "✓" if v['all_correct'] else "✗"
        print(f"{k_sigma:<10.1f} {v['max_arrivals_value']:<12} {v['max_departures_value']:<12} "
              f"{v['actual_min_value']:<10} {v['actual_max_value']:<10} {v['actual_pmf_length']:<12} {status:<10}")

How k_sigma affects net flow bounds (top-level entity):
k_sigma    Max Arr Val  Max Dep Val  Net Min    Net Max    Net PMF Len  Status    
--------------------------------------------------------------------------------
4.0        147          115          -115       147        263          ✓         
6.0        168          129          -129       168        298          ✓         
8.0        188          143          -143       188        332          ✓         
10.0       208          157          -157       208        366          ✓         
12.0       229          171          -171       229        401          ✓         
50.0       614          436          -436       614        1051         ✓         
100.0      1122         563          -563       1122       1686         ✓

# Choose a subspecialty to inspect in detail
spec_to_inspect = list(subspecialty_data.keys())[0]
spec_key = f"subspecialty:{spec_to_inspect}"

if spec_key in new_results:
    spec_bundle = new_results[spec_key]
    print(f"Detailed view of subspecialty: {spec_to_inspect}")
    print(f"\n{spec_bundle}")

    # Show PMF arrays
    print(f"\n\nArrivals PMF (first 20 values):")
    print(spec_bundle.arrivals.probabilities[:20])

    print(f"\nDepartures PMF (first 20 values):")
    print(spec_bundle.departures.probabilities[:20])

    print(f"\nNet Flow PMF (first 20 values, offset={spec_bundle.net_flow.offset}):")
    print(spec_bundle.net_flow.probabilities[:20])
else:
    print(f"Could not find {spec_key} in results")

Detailed view of subspecialty: Oncology - Gynae - Clinical

PredictionBundle(subspecialty: Oncology - Gynae - Clinical)
  Arrivals:    PMF[0:10]: [0.849, 0.140, 0.011, 0.001, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000] (E=0.2)
  Departures:  PMF[0:3]: [0.905, 0.095, 0.000] (E=0.1)
  Net flow:    PMF[-2:8]: [0.000, 0.081, 0.781, 0.128, 0.010, 0.000, 0.000, 0.000, 0.000, 0.000] (E=0.1)
  Flows:       selection cohort=all inflows(ed_current=True, ed_yta=True, non_ed_yta=True, elective_yta=True, transfers_in=True) outflows(departures=True)


Arrivals PMF (first 20 values):
[8.48694244e-01 1.39805058e-01 1.09366768e-02 5.45197028e-04
 1.83986641e-05 4.18752076e-07 6.35445372e-09 6.36642404e-11
 4.18183760e-13 1.80973266e-15 5.31576184e-18 1.09678047e-20
 1.63744117e-23 1.81122916e-26 1.51211119e-29 9.66733306e-33
 4.78723204e-36 1.85225818e-39 5.63508145e-43 1.35349652e-46]

Departures PMF (first 20 values):
[0.90469798 0.09530202 0.        ]

Net Flow PMF (first 20 values, offset=-2):
[0.00000000e+00 8.08822721e-02 7.81135676e-01 1.27523642e-01
 9.94634782e-03 4.94992082e-04 1.66851423e-05 3.79449751e-07
 5.75492880e-09 5.76367637e-11 3.78502476e-13 1.63776810e-15
 4.81020427e-18 9.92411128e-21 1.48156234e-23 1.63875948e-26
 1.36809608e-29 8.74647296e-33 4.33117570e-36 1.67578794e-39]