Data Handling Quickstart Guide¶

This guide demonstrates how to use the data_handlers package for handling random variables and named values.

Basic Usage of Named Values¶

from process_manager import data_handlers as dh

# Create named values with automatic state management
name = dh.NamedValue(name="name", value="John Doe")  # Value is set immediately
age = dh.NamedValue(name="age")  # Value is initially unset

# Check and set values
print(name.value)  # Output: "John Doe"
try:
    print(age.value)  # Raises ValueError: Value 'age' has not been set yet
except ValueError as e:
    print(e)

# Set the value for age
age.value = 25  # Sets and freezes the value
try:
    age.value = 26  # Raises ValueError - value is frozen
except ValueError as e:
    print(e)  # Output: Value 'age' has already been set and is frozen...

# Use force_set_value to change a frozen value
age.force_set_value(26)  # Successfully changes the value
print(age.value)  # Output: 26

Managing Named Value Collections¶

# Create a hash to store named values
nv_hash = dh.NamedValueHash()

# Register named values
nv_hash.register_value(name)
nv_hash.register_value(age)

# Access values through the hash
print(nv_hash.get_raw_value("name"))  # Output: "John Doe"
print(nv_hash.get_raw_value("age"))   # Output: 26

# Create ordered lists of named values
nv_list = dh.NamedValueList()
nv_list.append(name)
nv_list.append(age)

# Access values by index or name
print(nv_list[0].value)  # Output: "John Doe"
print(nv_list.get_value("age").value)  # Output: 26

Working with Random Variables¶

import numpy as np

# Create random variable distributions
normal_dist = dh.NormalDistribution(name="height", mu=170, sigma=10)
uniform_dist = dh.UniformDistribution(name="weight", low=60, high=90)
categories = np.array(["A", "B", "C"])
cat_dist = dh.CategoricalDistribution(
    name="blood_type",
    categories=categories,
    probabilities=np.array([0.4, 0.1, 0.5])
)

# Create a hash to store random variables
rv_hash = dh.RandomVariableHash()

# Register variables and get samples
height = normal_dist.register_to_hash(rv_hash, size=5)
weight = uniform_dist.register_to_hash(rv_hash, size=5)
blood_type = cat_dist.register_to_hash(rv_hash, size=5)

# View the samples
print(f"Heights: {height.value}")      # e.g., [168.3, 175.2, 162.1, 171.8, 169.5]
print(f"Weights: {weight.value}")      # e.g., [75.3, 82.1, 68.4, 71.2, 88.9]
print(f"Blood Types: {blood_type.value}")  # e.g., ['A', 'C', 'A', 'C', 'C']

Serialization and Deserialization¶

# Serialize named values hash
nv_json = nv_hash.model_dump_json(indent=2)
print(nv_json)

Example output:

{
  "objects": {
    "name": {
      "name": "name",
      "type": "NamedValue",
      "state": "set",
      "stored_value": "John Doe"
    },
    "age": {
      "name": "age",
      "type": "NamedValue",
      "state": "set",
      "stored_value": 26
    }
  }
}

# Serialize random variables hash
rv_json = rv_hash.model_dump_json(indent=2)
print(rv_json)

Example output:

href="#__codelineno-6-1">{ "objects": { "height": { "name": "height", "type": "NormalDistribution", "mu": 170, "sigma": 10, "seed": null }, "weight": { "name": "weight", "type": "UniformDistribution", "low": 60, "high": 90, "seed": null }, "blood_type": { "name": "blood_type", "type": "CategoricalDistribution", "categories": ["A", "B", "C"], "probabilities": [0.4, 0.1, 0.5], "seed": null } } }

# Load from serialized data
new_nv_hash = dh.NamedValueHash.model_validate_json(nv_json)
new_rv_hash = dh.RandomVariableHash.model_validate_json(rv_json)

# Save to and load from files
with open("nv_hash.json", "w") as f:
    f.write(nv_hash.model_dump_json(indent=2))

with open("nv_hash.json", "r") as f:
    loaded_nv_hash = dh.NamedValueHash.model_validate_json(f.read())

Advanced Named Value Features¶

from process_manager import data_handlers as dh

# Type-safe value handling
integer_value = dh.NamedValue[int](name="count")  # Explicitly typed as int
try:
    integer_value.value = "not an integer"  # Raises TypeError
except TypeError as e:
    print(e)
integer_value.value = 42

# Working with collections
values_list = dh.NamedValueList()
integer_value.append_to_value_list(values_list)  # Method chaining
values_list.append(dh.NamedValue("price", 10.99))

# Iterate over values
for value in values_list.get_values():
    print(f"{value.name}: {value.value}")

# Filter values by type
number_values = values_list.get_value_by_type(int)

Best Practices¶

Always use type hints with NamedValue for better type safety:

temperature = dh.NamedValue[float](name="temp")
name = dh.NamedValue[str](name="user_name")

Handle unset values appropriately:

value = dh.NamedValue(name="example")
if value._state == dh.NamedValueState.UNSET:
    # Handle unset value case
    value.value = default_value

Use force_set_value() sparingly and only when you need to override frozen values:

# Prefer setting values once
config = dh.NamedValue(name="config", value=initial_config)

# Only use force_set_value when absolutely necessary
if needs_update:
    config.force_set_value(new_config)

Leverage the built-in serialization for persistence:

# Save state
saved_state = value.model_dump_json()

# Restore state
restored_value = dh.NamedValue.model_validate_json(saved_state)