1r"""Lazy, unit-aware HDF readers for PSI MAS and POT3D magnetohydrodynamic output.
2
3This module reads three-dimensional field variables from Predictive Science Inc.'s
4MAS and POT3D solvers through a single interface that spans both HDF4 (``.hdf``) and
5HDF5 (``.h5``) files. Readers are *lazy*: metadata is resolved at construction from
6the filename and HDF attributes, while array data is transferred from disk only on
7access, and full-dataset reads are cached on the reader.
8
9.. rubric:: Entry point
10
11:func:`PsiData` is the sole public symbol and the intended way to use this module.
12It is a factory that inspects the file extension and ``model`` argument and returns
13the matching concrete reader; the underlying ``_Hdf*`` classes should never be
14instantiated directly.
15
16.. code-block:: python
17
18 from psi_io.mhd_io import PsiData
19
20 PsiData('br001001.hdf', model='mas') # MAS HDF4
21 PsiData('br001.h5', model='pot3d', unit='Gauss') # POT3D HDF5 (unit declared)
22 PsiData('emission.h5', mesh='MMM', scales='X,Y,Z', order='C', ...) # Custom reader
23
24.. rubric:: Reader interface
25
26The object returned by :func:`PsiData` is an :class:`_HdfData` instance. Refer to
27the following classes for the complete reference:
28
29- :class:`_HdfData` — the main field reader. :meth:`_HdfData.read` slices by index
30 and :meth:`_HdfData.vslice` slices by physical coordinate value with linear
31 interpolation. Both return :class:`~astropy.units.Quantity` data in physical
32 ``(r, θ, φ)`` order, with optional unit conversion and mesh remapping.
33- :class:`_HdfArray` — the array interface inherited by every reader, defining the
34 metadata properties (``name``, ``desc``, ``unit``, ``mesh``, ``shape``, ``dtype``,
35 ``data_cached``, ``interp_cached`` …) and the base :meth:`_HdfArray.read`.
36- ``reader.scales`` — a named tuple of per-axis coordinate readers ``(r, t, p)``
37 that expose the same :class:`_HdfArray` interface as the main reader.
38
39.. note::
40 PSI HDF files are stored in Fortran column-major order, so ``reader.shape`` and
41 the on-disk layout are ``(Nφ, Nθ, Nr)`` with the radial axis last. All
42 slicing arguments and returned coordinate scales are nevertheless given in
43 physical ``(r, θ, φ)`` order.
44
45.. rubric:: Supported quantities
46
47MAS provides 19 field variables — magnetic field, velocity, and current-density
48components, temperatures, density, pressure, Alfvén/Elsässer wave energy, and
49coronal heating — while POT3D provides the three magnetic-field components. Each
50quantity's canonical unit and mesh stagger are defined in :mod:`psi_io.models`, and
51the corresponding normalization constants in :mod:`psi_io.units`. POT3D output is
52unnormalized; see the :func:`PsiData` warning on declaring its unit.
53"""
54
55from __future__ import annotations
56
57__all__ = ['PsiData',]
58
59import re
60import warnings
61from abc import abstractmethod, ABC
62from collections import namedtuple, UserDict
63from collections.abc import Sequence, Iterable, Collection
64from functools import partial
65from itertools import repeat, chain
66from pathlib import Path
67from types import MappingProxyType
68from typing import TYPE_CHECKING, Optional, Literal, ClassVar, Mapping
69import numpy as np
70import h5py as h5
71import astropy.units as u
72from astropy.table import QTable
73from numpy.lib.recfunctions import structured_to_unstructured
74try:
75 from scipy.interpolate import RegularGridInterpolator
76except ImportError:
77 RegularGridInterpolator = None # type: ignore[assignment,misc]
78
79if TYPE_CHECKING:
80 from astropy.units.typing import UnitLike, QuantityLike
81
82try:
83 import pyhdf.SD as h4
84except ImportError:
85 h4 = None
86
87from psi_io.mesh import (MeshCodeType,
88 _remesh_array,
89 ArrayOrdering, Mesh, MeshLike,
90 )
91from psi_io.models import (ModelType,
92 extract_quantity_from_filepath,
93 extract_sequence_from_filepath,
94 get_model_prop_caller,
95 get_psi_scale_properties,
96 _PROP_GETTER_MAPPING,
97 _PSI_SCALE_PROPS_MAPPING, )
98from psi_io.units import decompose_mas_units
99from psi_io.psi_io import (PathLike,
100 PSI_DATA_ID,
101 SDC_TYPE_CONVERSIONS,
102 _dispatch_by_ext,
103 _except_no_scipy, )
104
119
120
[docs]
121class CacheWarning(UserWarning):
122 """Warning raised when a cache operation is ignored or conflicts with the cache mode.
123
124 Emitted by :meth:`_HdfArray.load` when caching is disabled (``cache=None``),
125 and by :meth:`_HdfArray.clear` when the cache mode is ``'eager'`` and
126 ``clear()`` is called explicitly.
127
128 Examples
129 --------
130 >>> import warnings
131 >>> from psi_io.mhd_io import CacheWarning
132 >>> issubclass(CacheWarning, UserWarning)
133 True
134 """
135
136
137HdfVersionType = Literal[4, 5]
138"""Literal type alias for the two supported HDF file format versions.
139
140``4`` — HDF4, accessed via pyhdf (optional dependency).
141``5`` — HDF5, accessed via h5py.
142"""
143
144
145_HDF_EXT_MAPPING = MappingProxyType({'h4': '.hdf', 'h5': '.h5', })
146"""Mapping from HDF version string to file extension.
147
148Used by :class:`_HdfData.__init__` to validate that the supplied file has an
149extension consistent with the concrete class's format mixin.
150
151``'h5'`` → ``'.h5'`` (HDF5 files, read via h5py)
152``'h4'`` → ``'.hdf'`` (HDF4 files, read via pyhdf)
153"""
154
155
156_CODE_UNIT_ALIASES = {'native', 'code', 'model', 'psi'}
157"""Set of strings that request code-unit output from :meth:`_HdfInterface.read`.
158
159When the ``units`` argument to ``read()`` is one of these strings, the data are
160returned in MAS code units (dimensionless ratios) without any physical conversion.
161"""
162
163
164_REAL_UNIT_ALIASES = {'real', 'phys', 'physical', 'cgs'}
165"""Set of strings that request decomposed CGS output from :meth:`_HdfInterface.read`.
166
167When the ``units`` argument to ``read()`` is one of these strings, the data are
168converted to physical CGS units via :func:`~psi_io.units.decompose_mas_units`.
169"""
170
171_BASE_SLOTS = ('_ref', '_id', '_cache', '_name', '_desc', '_unit', '_scalar', '_mesh', '_order', '_vcache',)
172"""Slot names shared by all :class:`_HdfArray` subclasses."""
173
174_SCALE_SLOTS = _BASE_SLOTS
175"""Slot names for :class:`_HdfScale` subclasses (identical to :data:`_BASE_SLOTS`)."""
176
177_DATA_SLOTS = _BASE_SLOTS + ('_filepath', '_sequence', '_model', '_scales', '_icache')
178"""Slot names for :class:`_HdfData` subclasses; extends :data:`_BASE_SLOTS` with data-reader fields."""
179
180
181METADATA_SCHEMA = dict.fromkeys(['name', 'desc', 'unit', 'scalar', 'mesh', 'order', 'sequence', 'model', 'scales'])
182"""Template dictionary of recognized HDF dataset-level metadata keys.
183
184Keys that appear in an HDF dataset's attribute mapping and also appear here are
185extracted and merged with keyword arguments during :meth:`_HdfData._parse_inputs`.
186The value for each key is always ``None`` in this template; it is used only for
187membership testing.
188
189Keys
190----
191name : str
192 Canonical lower-case quantity identifier (e.g. ``'br'``).
193desc : str
194 Human-readable quantity description.
195unit : str
196 String representation of the code-to-physical unit.
197scalar : bool
198 Whether the quantity is a scalar (``True``) or vector component (``False``).
199mesh : int
200 Integer mesh stagger code.
201order : str
202 Array memory layout (``'F'`` or ``'C'``).
203sequence : int
204 Time-step sequence number.
205model : str
206 PSI model type (``'mas'`` or ``'pot3d'``).
207scales : sequence of str
208 Names of the coordinate scale axes.
209"""
210
211SCALES_SCHEMA = dict.fromkeys(['name', 'desc', 'unit',])
212"""Template dictionary of recognized HDF scale-dataset metadata keys.
213
214Used analogously to :data:`METADATA_SCHEMA` for the one-dimensional coordinate
215scale arrays.
216
217Keys
218----
219name : str
220 Coordinate axis name (``'r'``, ``'t'``, or ``'p'``).
221desc : str
222 Human-readable axis description.
223unit : str
224 String representation of the coordinate unit.
225"""
226
227CacheType = Optional[Literal['lazy', 'eager']]
228"""Type alias for the three valid cache modes.
229
230``'lazy'``
231 Cache the full data array on the first full-array read.
232``'eager'``
233 Cache immediately at construction time via :meth:`_HdfArray.load`.
234``None``
235 Never cache; every read goes to disk.
236"""
237
238
239def _interpolate_dim(arr: QuantityLike,
240 axis: int,
241 value: QuantityLike,
242 scale: QuantityLike,
243 ) -> QuantityLike:
244 """Linearly interpolate *arr* along *axis* to *value*, collapsing that axis to size 1.
245
246 Both *value* and *scale* must carry compatible units. The interpolated
247 result retains all other axes unchanged; the axis dimension is reduced from
248 2 to 1.
249
250 Parameters
251 ----------
252 arr : QuantityLike
253 Data array of shape ``(..., 2, ...)`` where ``2`` is along *axis*.
254 axis : int
255 Index of the axis to interpolate and collapse.
256 value : QuantityLike
257 Target coordinate value. Must have the same unit as *scale*.
258 scale : QuantityLike
259 Two-element coordinate array bounding *value*.
260
261 Returns
262 -------
263 out : QuantityLike
264 Interpolated array with the *axis* dimension reduced to 1.
265
266 Raises
267 ------
268 ValueError
269 If ``arr.shape[axis] != 2`` or ``len(scale) != 2``.
270
271 Examples
272 --------
273 >>> import numpy as np
274 >>> import astropy.units as u
275 >>> arr = np.array([[1.0, 2.0]]) * u.Gauss
276 >>> scale = np.array([0.0, 1.0]) * u.R_sun
277 >>> _interpolate_dim(arr, axis=1, value=0.5 * u.R_sun, scale=scale)
278 <Quantity [[1.5]] G>
279 """
280 if arr.shape[axis] != 2 or len(scale) != 2:
281 raise ValueError("Interpolation is only supported for 2-element arrays and scales.")
282 t = (value - scale[0]) / (scale[1] - scale[0])
283 slc_lo = [slice(None)] * arr.ndim
284 slc_hi = [slice(None)] * arr.ndim
285 slc_lo[axis] = slice(None, -1)
286 slc_hi[axis] = slice(1, None)
287 return (1.0 - t) * arr[tuple(slc_lo)] + t * arr[tuple(slc_hi)]
288
289
290def _slice_array(data: QuantityLike,
291 scales: Sequence[Optional[QuantityLike]],
292 values: Sequence[Optional[QuantityLike]],
293 order: ArrayOrdering = 'F') -> QuantityLike:
294 """Interpolate *data* to physical coordinate values along each non-``None`` axis.
295
296 Iterates over axes in storage order (reversed from physical order when
297 ``order='F'``) and calls :func:`_interpolate_dim` for each axis whose
298 entry in *values* is not ``None``. Axes with ``None`` entries are left
299 unchanged.
300
301 Parameters
302 ----------
303 data : QuantityLike
304 Data array to interpolate.
305 scales : sequence of QuantityLike or None
306 Two-element coordinate windows for each axis, in the same order as
307 *values*. ``None`` entries correspond to axes that are not interpolated.
308 values : sequence of QuantityLike or None
309 Target coordinate values for each axis. ``None`` means skip that axis.
310 order : {'F', 'C'}, optional
311 Array memory layout. ``'F'`` reverses the axis iteration order so that
312 axis 0 corresponds to the last physical axis. Default is ``'F'``.
313
314 Returns
315 -------
316 out : QuantityLike
317 *data* with each non-``None`` axis collapsed to size 1 by interpolation.
318
319 Examples
320 --------
321 >>> import numpy as np
322 >>> import astropy.units as u
323 >>> data = np.ones((2, 3)) * u.Gauss
324 >>> scale = np.array([0.0, 1.0]) * u.R_sun
325 >>> result = _slice_array(data, [scale, None], [0.5 * u.R_sun, None], order='C')
326 >>> result.shape
327 (1, 3)
328 """
329 if order == 'F':
330 values, scales = reversed(values), reversed(scales)
331 for i, (v, s) in enumerate(zip(values, scales)):
332 if v is not None:
333 data = _interpolate_dim(data, i, v, s)
334 return data
335
336
337def _expand_args(*args, ndim: int) -> tuple:
338 """Expand *args* to a length-*ndim* tuple, replacing ``Ellipsis`` and padding with ``None``.
339
340 Parameters
341 ----------
342 *args : object
343 User-supplied dimension arguments. An ``Ellipsis`` is replaced by the
344 appropriate number of ``None`` values so that the total length equals *ndim*.
345 If fewer than *ndim* arguments are provided, trailing ``None``\\ s are appended.
346 ndim : int
347 Target length of the returned tuple.
348
349 Returns
350 -------
351 out : tuple
352 Length-*ndim* tuple of dimension arguments.
353
354 Raises
355 ------
356 ValueError
357 If the number of explicit arguments (excluding ``Ellipsis``) exceeds *ndim*.
358
359 Examples
360 --------
361 >>> _expand_args(0, 1, ndim=3)
362 (0, 1, None)
363 >>> _expand_args(..., 5, ndim=3)
364 (None, None, 5)
365 >>> _expand_args(ndim=2)
366 (None, None)
367 """
368 if Ellipsis in args:
369 n_missing = ndim - (len(args) - 1)
370 idx = args.index(Ellipsis)
371 args = args[:idx] + (None,) * n_missing + args[idx + 1:]
372 if len(args) < ndim:
373 args += (None,) * (ndim - len(args))
374 elif len(args) > ndim:
375 raise ValueError(f"Too many dimension arguments: expected at most {ndim}, got {len(args)}.")
376 return args
377
378
379def _expand_quantity_filter(quantities: Iterable[str]) -> set[str]:
380 """Expand shorthand quantity group names to a flat set of canonical identifiers.
381
382 The single-letter codes ``'b'``, ``'j'``, and ``'v'`` each expand to the
383 three spherical-component variants (e.g. ``'b'`` → ``{'br', 'bt', 'bp'}``).
384 All other strings are passed through unchanged after lowercasing.
385
386 Parameters
387 ----------
388 quantities : iterable of str
389 Quantity identifiers or group codes to expand.
390
391 Returns
392 -------
393 out : set[str]
394 Flat set of lower-case canonical quantity names.
395
396 Examples
397 --------
398 >>> sorted(_expand_quantity_filter(['b', 'rho']))
399 ['bp', 'br', 'bt', 'rho']
400 >>> sorted(_expand_quantity_filter(['vr', 'V']))
401 ['vp', 'vr', 'vt']
402 """
403 out: set[str] = set()
404 for q in quantities:
405 q = q.lower()
406 if q in {'b', 'j', 'v'}:
407 out.update(f"{q}{c}" for c in 'rtp')
408 else:
409 out.add(q)
410 return out
411
412
413def _parse_islice_args(*args,
414 shape: tuple[int, ...],
415 remesh: tuple[bool, ...],):
416 """Validate and normalize index-space slice arguments for each array axis.
417
418 Converts each element of *args* to a :class:`slice` and adjusts the stop
419 index when the axis needs remeshing (to include the extra element required
420 for averaging).
421
422 Parameters
423 ----------
424 *args : None | int | slice | tuple
425 One argument per axis in physical ``(r, t, p)`` order. Passed through
426 :func:`_cast_to_slice`.
427 shape : tuple[int, ...]
428 Array shape in physical order.
429 remesh : tuple[bool, ...]
430 Per-axis remesh flags from :meth:`Mesh.__rshift__`.
431
432 Yields
433 ------
434 out : slice
435 Validated slice for each axis.
436
437 Raises
438 ------
439 ValueError
440 If a slice yields an empty dimension or uses a non-unit step.
441
442 Examples
443 --------
444 >>> list(_parse_islice_args(None, 1, shape=(10, 5), remesh=(False, False)))
445 [slice(None, None, None), slice(1, 2, None)]
446 """
447 for arg, size, rmesh in zip(args, shape, remesh):
448 slice_ = _cast_to_slice(arg)
449 if rmesh and slice_.stop is not None:
450 slice_ = slice(slice_.start, slice_.stop + 1, slice_.step)
451 start, stop, step = slice_.indices(size - bool(rmesh))
452 if stop <= start:
453 raise ValueError(f"Slice argument {arg!r} yields an empty dimension.")
454 if step != 1:
455 raise ValueError(f"Slice argument {arg!r} has a step size of {step}, but only step=1 is supported.")
456 yield slice_
457
458
459def _parse_vslice_args(*args,
460 scales: tuple[int, ...],
461 remesh: tuple[bool, ...]):
462 """Parse value-space slice arguments, returning coordinate values and index slices.
463
464 For each axis argument, determines whether it is an index-space argument
465 (``None`` or ``slice``) or a physical-coordinate argument
466 (:class:`~astropy.units.Quantity`, a bare scalar including :class:`int`, or
467 a 2-element sequence of coordinate bounds). Physical values are converted
468 to the scale unit, ``NaN`` is treated as an open bound, and the surrounding
469 index window is computed via :func:`numpy.searchsorted`.
470
471 .. note::
472 Unlike :meth:`PsiData.read`, an :class:`int` argument here is **not** an
473 array index — it is a physical coordinate value (in the scale's native
474 unit) that triggers interpolation. Only ``None`` and ``slice`` are
475 index-space.
476
477 Parameters
478 ----------
479 *args : None | slice | QuantityLike
480 One argument per axis in physical ``(r, t, p)`` order.
481 scales : tuple
482 Scale reader objects for each axis; used for unit conversion and
483 coordinate lookup.
484 remesh : tuple[bool, ...]
485 Per-axis remesh flags from :meth:`Mesh.__rshift__`.
486
487 Yields
488 ------
489 value : tuple[QuantityLike | None, QuantityLike | None]
490 Target coordinate value(s) for interpolation, or ``(None, None)`` for
491 index-space axes.
492 slice_ : slice
493 Index window into the axis that brackets the target coordinate.
494
495 Raises
496 ------
497 ValueError
498 If a physical-coordinate argument has more than 2 elements, or if it
499 yields an empty index window.
500
501 Examples
502 --------
503 >>> # Index-space arguments pass through as (None, None) / full slice
504 >>> list(_parse_vslice_args(None, scales=[None], remesh=[False]))
505 [((None, None), slice(None, None, None))]
506 """
507 for arg, scale, rmesh in zip(args, scales, remesh):
508 if arg is None or isinstance(arg, slice):
509 yield (None, None), _cast_to_slice(arg)
510 continue
511 arg = u.Quantity(arg, unit=scale.unit, ndmin=1)
512 if arg.size not in {1, 2}:
513 raise ValueError(f"Invalid argument {arg!r}: expected a scalar or 2-element sequence.")
514 nan_mask = np.isnan(arg)
515 if nan_mask[0]:
516 arg[0] = -np.inf
517 if nan_mask[-1]:
518 arg[-1] = np.inf
519 if np.all(np.isinf(arg)):
520 yield (None, None), slice(None)
521 continue
522 offset = int(rmesh)
523 n = scale.size
524 raw = np.clip(np.searchsorted(scale[:], arg), 1 + offset, n - 1 - offset).tolist()
525 start, stop = raw[0] - 1 - offset, raw[-1] + 1 + offset
526 if stop <= start:
527 raise ValueError(f"Slice argument {arg!r} yields an empty dimension.")
528 yield arg, slice(start, stop)
529
530
531def _apply_units(data: u.Quantity,
532 unit: Optional[UnitLike]) -> u.Quantity:
533 """Apply a unit conversion to *data*, returning a :class:`~u.Quantity`.
534
535 Parameters
536 ----------
537 data : Quantity
538 Data in code units.
539 unit : str | Unit | None
540 Requested output unit. ``None`` is a no-op. Special string aliases:
541 ``'native'`` / ``'code'`` / ``'model'`` / ``'psi'`` — return *data*
542 unchanged; ``'real'`` / ``'phys'`` / ``'physical'`` / ``'cgs'`` —
543 decompose to CGS base unit via
544 :func:`~psi_io.units.decompose_mas_units`. Any other value is
545 forwarded to :meth:`~u.Quantity.to`.
546
547 Returns
548 -------
549 out : Quantity
550 *data* in the requested unit.
551
552 Raises
553 ------
554 astropy.units.UnitConversionError
555 If *unit* is not compatible with the unit of *data*.
556
557 Examples
558 --------
559 >>> import astropy.units as u
560 >>> data = 1.0 * u.Gauss
561 >>> _apply_units(data, None) is data
562 True
563 >>> _apply_units(data, 'native').unit
564 Unit("G")
565 """
566 if unit is None:
567 return data
568 ounit = str(unit).lower()
569 if ounit in _CODE_UNIT_ALIASES:
570 return data
571 if ounit in _REAL_UNIT_ALIASES:
572 return decompose_mas_units(data)
573 return data.to(unit)
574
575
576def _cast_to_slice(input: None | int | slice | Sequence) -> slice:
577 """Convert a dimension index argument to a :class:`slice` object.
578
579 Parameters
580 ----------
581 input : None, int, slice, list, or tuple
582 - ``None`` → ``slice(None)`` (full axis).
583 - :class:`int` → ``slice(input, input + 1)`` (single element, axis retained).
584 - :class:`slice` → returned unchanged.
585 - :class:`list` or :class:`tuple` → ``slice(*input)`` (unpacked as
586 ``(start, stop)`` or ``(start, stop, step)``).
587
588 Returns
589 -------
590 out : slice
591
592 Raises
593 ------
594 TypeError
595 If *input* is not one of the recognized types.
596
597 Examples
598 --------
599 >>> _cast_to_slice(None)
600 slice(None, None, None)
601 >>> _cast_to_slice(3)
602 slice(3, 4, None)
603 >>> _cast_to_slice((2, 8))
604 slice(2, 8, None)
605 >>> _cast_to_slice(slice(1, 5, 2))
606 slice(1, 5, 2)
607 """
608 if input is None:
609 return slice(None)
610 elif isinstance(input, int):
611 return slice(input, input + 1) if input >= 0 else slice(input - 1, input)
612 elif isinstance(input, slice):
613 return input
614 elif isinstance(input, Collection):
615 return slice(*input)
616 else:
617 raise TypeError(f"Invalid slice argument: {input!r}. Expected int or 2-element sequence.")
618
619# =============================================================================
620# Abstract interface
621# =============================================================================
622
[docs]
623class _HdfArray(ABC):
624 """Abstract base class for a single HDF dataset with optional caching.
625
626 Provides the common interface for both data arrays (:class:`_HdfData`) and
627 coordinate scale arrays (:class:`_HdfScale`). Concrete subclasses supply
628 the HDF-version-specific implementations of the abstract properties
629 (:attr:`_shape`, :attr:`dtype`, :attr:`size`, :attr:`nbytes`, :attr:`ndim`,
630 :attr:`attrs`) and the :meth:`_dataset` factory.
631
632 Subclasses must not be instantiated directly. Use :func:`PsiData` or the
633 concrete classes :class:`H4Data`, :class:`H5Data`, :class:`H4Scale`, and
634 :class:`H5Scale`.
635
636 Attributes
637 ----------
638 _vcache : np.ndarray | None
639 In-memory copy of the full dataset array, or ``None`` when not cached.
640 _cache : CacheType
641 Active cache mode: ``'lazy'``, ``'eager'``, or ``None``.
642
643 See Also
644 --------
645 _HdfData : Subclass that adds interpolation and scale management.
646 _HdfScale : Subclass for one-dimensional coordinate arrays.
647 PsiData : Public factory function.
648 """
649
650 __slots__ = ()
651
652 _HDFN: ClassVar[HdfVersionType]
653
654 def __init__(self,
655 *args,
656 cache: CacheType = 'lazy',
657 **kwargs):
658 """Initialize the array with metadata and optionally load data into cache.
659
660 Parameters
661 ----------
662 *args : object
663 Passed to :meth:`_parse_inputs` by subclass constructors.
664 cache : CacheType, optional
665 Cache mode. ``'lazy'`` caches on first full read, ``'eager'`` loads
666 immediately, ``None`` disables caching entirely. Default is
667 ``'lazy'``.
668 **kwargs : object
669 Metadata keyword arguments forwarded to :meth:`_parse_inputs`.
670
671 Raises
672 ------
673 ValueError
674 If *cache* is not one of ``'lazy'``, ``'eager'``, or ``None``, or if
675 metadata cannot be resolved from *kwargs*.
676 """
677 self._vcache = None
678 self._cache = cache and cache.lower()
679 if self._cache not in {'lazy', 'eager', None}:
680 raise ValueError(f"Invalid cache method: {cache!r}. "
681 f"Expected 'lazy', 'eager', or None.")
682
683 try:
684 self._set_metadata(**self._parse_inputs(**kwargs))
685 except (TypeError, ValueError) as e:
686 raise ValueError("Missing or incompatible metadata") from e
687
688 if self._cache == 'eager':
689 self.load(recursive=False)
690
691
692 def __str__(self):
693 """Return the quantity name as a string.
694
695 Returns
696 -------
697 out : str
698 The :attr:`name` of the dataset (e.g. ``'br'``).
699
700 Examples
701 --------
702 >>> str(reader) # doctest: +SKIP
703 'br'
704 """
705 return f"{self.name}"
706
707 def __repr__(self):
708 """Return a detailed string representation including key metadata.
709
710 Returns
711 -------
712 out : str
713 String of the form
714 ``ClassName(name=... [...], order=..., shape=..., unit=..., mesh=..., cached=...)``.
715
716 Examples
717 --------
718 >>> repr(reader) # doctest: +SKIP
719 "H5Data(name='br' [...], order='F', shape=(...), unit=..., mesh=..., cached=False)"
720 """
721 return (f"{self.__class__.__name__}("
722 f"name={self.name!r} [{self.desc}], "
723 f"order={self.order!r}, "
724 f"shape={self.shape!r}, "
725 f"unit={self.unit!r}, "
726 f"mesh={self.mesh!r}, "
727 f"cached={self.cached!r})")
728
729 def __getitem__(self, args: str | int | slice | tuple):
730 """Index into the dataset, returning from cache when available.
731
732 When *args* is a string, delegates to :meth:`_dataset` (attribute
733 lookup on the HDF file object). Otherwise applies the index tuple to
734 the cached array if available, or reads directly from the HDF file and
735 caches the result for full-array reads.
736
737 Parameters
738 ----------
739 args : str | int | slice | tuple
740 Index expression. String values select HDF sub-datasets by name.
741
742 Returns
743 -------
744 out : np.ndarray
745 Indexed data.
746
747 Examples
748 --------
749 >>> reader[0] # first element along axis 0 # doctest: +SKIP
750 >>> reader[:] # full array (may populate cache) # doctest: +SKIP
751 """
752 if isinstance(args, str):
753 return self._dataset(args)
754
755 if not isinstance(args, tuple):
756 args = (args,)
757 if self._reverse:
758 args = args[::-1]
759 if self._vcache is not None:
760 return self._vcache[args]
761 else:
762 odata = self.dataset[args]
763 if self._cache and odata.shape == self._shape:
764 self._vcache = odata
765 return odata
766
[docs]
767 def select(self, id_: str) -> Sequence:
768 """Return the HDF dataset or sub-dataset identified by *id_*.
769
770 Parameters
771 ----------
772 id_ : str
773 Dataset key within the open HDF file.
774
775 Returns
776 -------
777 out : Sequence
778 The format-specific dataset object.
779
780 Examples
781 --------
782 >>> reader.select('Data-Set-2') # doctest: +SKIP
783 """
784 return self._dataset(id_)
785
786 @property
787 @abstractmethod
788 def _shape(self) -> tuple[int, ...]:
789 """Raw dataset shape in HDF storage order (not reversed for Fortran arrays)."""
790 ...
791
792 @property
793 @abstractmethod
794 def dtype(self) -> np.dtype:
795 """Element data type of the HDF dataset.
796
797 Returns
798 -------
799 out : np.dtype
800 NumPy dtype (typically ``float32``).
801 """
802 ...
803
804 @property
805 @abstractmethod
806 def size(self) -> int:
807 """Total number of elements in the dataset.
808
809 Returns
810 -------
811 out : int
812 Product of all dimension sizes.
813 """
814 ...
815
816 @property
817 @abstractmethod
818 def nbytes(self) -> int:
819 """Dataset size in bytes.
820
821 Returns
822 -------
823 out : int
824 ``size * dtype.itemsize``.
825 """
826 ...
827
828 @property
829 @abstractmethod
830 def ndim(self) -> int:
831 """Number of spatial dimensions in the dataset.
832
833 Returns
834 -------
835 out : int
836 Always ``3`` for MAS/POT3D field variables; ``1`` for scale arrays.
837 """
838 ...
839
840 @property
841 @abstractmethod
842 def attrs(self) -> dict:
843 """HDF dataset-level attributes as a plain Python dictionary.
844
845 Returns
846 -------
847 out : dict
848 Mapping of attribute name strings to their values.
849 """
850 ...
851
852 @property
853 def name(self) -> str:
854 """Canonical lower-case quantity identifier.
855
856 Returns
857 -------
858 out : str
859 E.g. ``'br'``, ``'vr'``, ``'t'``, ``'r'``.
860 """
861 return self._name
862
863 @property
864 def desc(self) -> str:
865 """Human-readable description of the physical quantity.
866
867 Returns
868 -------
869 out : str
870 E.g. ``'MAS Magnetic Field (Radial Component)'``.
871 """
872 return self._desc
873
874 @desc.setter
875 def desc(self, value: str):
876 """Set the human-readable description."""
877 self._desc = str(value)
878
879 @property
880 def unit(self) -> u.Unit:
881 """Astropy unit for converting from code units to physical units.
882
883 Returns
884 -------
885 out : Unit
886 E.g. :data:`~psi_io.units.MAS_b` for MAS magnetic field.
887 """
888 return self._unit
889
890 @unit.setter
891 def unit(self, value: UnitLike):
892 """Set the physical unit from a unit-like value."""
893 self._unit = u.Unit(str(value))
894
895 @property
896 def mesh(self) -> Mesh:
897 """Yee-grid stagger code for this dataset.
898
899 Returns
900 -------
901 out : Mesh
902 :class:`~psi_io.mesh.Mesh` instance encoding per-axis stagger.
903 """
904 return self._mesh
905
906 @property
907 def shape(self) -> tuple[int, ...]:
908 """Array shape in physical ``(r, t, p)`` order.
909
910 For Fortran-order (``order='F'``) arrays the raw HDF shape is reversed.
911
912 Returns
913 -------
914 out : tuple[int, ...]
915 Dimension sizes in physical coordinate order.
916 """
917 return self._shape[::-1] if self._reverse else self._shape
918
919 @property
920 def order(self) -> ArrayOrdering:
921 """Memory layout of the stored array.
922
923 Returns
924 -------
925 out : str
926 ``'F'`` for Fortran (column-major, PSI default) or ``'C'`` for C
927 (row-major).
928 """
929 return self._order
930
931 @property
932 def data_cached(self) -> bool:
933 """Whether the full data array is currently held in memory.
934
935 Returns
936 -------
937 out : bool
938 ``True`` if :attr:`_vcache` is not ``None``.
939 """
940 return self._vcache is not None
941
942 @property
943 def cached(self) -> bool:
944 """Alias for :attr:`data_cached`.
945
946 Returns
947 -------
948 out : bool
949 ``True`` if the data array is cached.
950 """
951 return self.data_cached
952
953 @property
954 def cache(self) -> str:
955 """Active cache mode.
956
957 Returns
958 -------
959 out : str | None
960 ``'lazy'``, ``'eager'``, or ``None``.
961 """
962 return self._cache
963
964 @cache.setter
965 def cache(self, method: CacheType):
966 """Set the cache mode and trigger load or clear as appropriate.
967
968 Parameters
969 ----------
970 method : CacheType
971 New cache mode. Setting ``'eager'`` calls :meth:`load`; setting
972 ``None`` calls :meth:`clear`.
973
974 Raises
975 ------
976 ValueError
977 If *method* is not ``'lazy'``, ``'eager'``, or ``None``.
978 """
979 self._cache = method and method.lower()
980 if self._cache not in {'lazy', 'eager', None}:
981 raise ValueError(f"Invalid cache method: {method!r}. "
982 f"Expected 'lazy', 'eager', or None.")
983 if self._cache == 'eager':
984 self.load()
985 elif self._cache is None:
986 self.clear()
987
988 @property
989 def _reverse(self) -> bool:
990 """Whether axis order should be reversed when indexing (Fortran-order arrays)."""
991 return self.order == 'F'
992
993 @property
994 def dataset(self):
995 """The primary HDF dataset object for this reader.
996
997 Returns
998 -------
999 out : object
1000 Format-specific dataset handle for the main data array.
1001 """
1002 return self._dataset(self._id)
1003
1004 @abstractmethod
1005 def _dataset(self, id_: str):
1006 """Return the HDF dataset identified by *id_*."""
1007 ...
1008
1009 @abstractmethod
1010 def _parse_inputs(self, **kwargs) -> dict:
1011 """Parse and merge file attributes with keyword overrides into a metadata dict."""
1012 ...
1013
1014 @abstractmethod
1015 def _set_metadata(self, **kwargs) -> None:
1016 """Apply the merged metadata dictionary to instance attributes."""
1017 ...
1018
1040
1041
[docs]
1042 def read(self,
1043 *args: None | int | tuple[int | None, ...] | slice,
1044 unit: Optional[str | UnitLike] = None,
1045 mesh: Optional[MeshLike] = None,
1046 order: Optional[ArrayOrdering] = None,
1047 scales: bool = False) -> u.Quantity | tuple[u.Quantity, ...]:
1048 """Read data by index with optional unit conversion.
1049
1050 .. attention::
1051
1052 When reading/slicing data, the ``*args`` are **always** supplied in physical (*e.g.*
1053 :math:`(r, \\theta, \\phi)`) order. However, unless specified with the ``order``
1054 argument, the sliced data array will be returned in storage order.
1055
1056 .. attention::
1057
1058 The ``scales`` argument is only functional on the main data array reader
1059 (:class:`_HdfData`). It is accepted by scale readers (:class:`_HdfScale`)
1060 for pass-through compatibility but has no effect there.
1061
1062 Parameters
1063 ----------
1064 *args : int | tuple | slice | None
1065 Index-space axis arguments in physical ``(r, t, p)`` order.
1066 unit : UnitLike | None, optional
1067 Output unit. Default is ``None`` (code units).
1068 mesh : MeshLike | None, optional
1069 Target stagger mesh. Default is ``None`` (no remeshing).
1070 order : ArrayOrdering | None, optional
1071 Transpose the output if it differs from storage order.
1072 Default is ``None``.
1073 scales : bool, optional
1074 If ``True`` (default), return coordinate slices alongside data.
1075
1076 Returns
1077 -------
1078 data : Quantity
1079 Sliced data array with the specified mesh staggering, units, and ordering
1080 applied.
1081 *scales : Quantity
1082 Sliced scales (only returned when *scales* is ``True``).
1083
1084 Examples
1085 --------
1086 >>> data, = reader.read(scales=True) # doctest: +SKIP
1087 """
1088 remesh = self.mesh >> mesh
1089 args = _expand_args(*args, ndim=self.ndim)
1090 sargs = tuple(_parse_islice_args(*args, shape=self.shape, remesh=remesh))
1091 odata = _apply_units(self._read(*sargs, remesh=remesh), unit)
1092 if not scales:
1093 return odata
1094 return (odata,)
1095
[docs]
1096 def slice(self, *args, **kwargs) -> u.Quantity | tuple[u.Quantity, ...]:
1097 """Alias for :meth:`read`.
1098
1099 Parameters
1100 ----------
1101 *args : object
1102 Forwarded to :meth:`read`.
1103 **kwargs : object
1104 Forwarded to :meth:`read`.
1105
1106 Returns
1107 -------
1108 out : Quantity | tuple[Quantity, ...]
1109 Same as :meth:`read`.
1110 """
1111 return self.read(*args, **kwargs)
1112
1113 def _read(self, *args, remesh: tuple[bool,...]) -> u.Quantity:
1114 """Read and remesh the dataset slice, applying the physical unit.
1115
1116 Parameters
1117 ----------
1118 *args : slice
1119 Per-axis slice objects in storage order.
1120 remesh : tuple[bool, ...]
1121 Per-axis remesh flags.
1122
1123 Returns
1124 -------
1125 out : Quantity
1126 Sliced and remeshed data multiplied by :attr:`unit`.
1127 """
1128 return _remesh_array(self[args], remesh=remesh, order=self.order) * self.unit
1129
[docs]
1130 def load(self, **kwargs):
1131 """Load the full dataset into the in-memory cache.
1132
1133 Has no effect (emits :exc:`CacheWarning`) when ``cache=None``.
1134
1135 Parameters
1136 ----------
1137 **kwargs : object
1138 Accepted but ignored; present for subclass override compatibility.
1139
1140 Examples
1141 --------
1142 >>> reader.load() # doctest: +SKIP
1143 >>> reader.data_cached # doctest: +SKIP
1144 True
1145 """
1146 if self._cache is None:
1147 warnings.warn(f"{self.__class__.__name__}({self}) has caching disabled; load() has no effect.", CacheWarning, stacklevel=3)
1148 return
1149 self._vcache = self.dataset[:]
1150
[docs]
1151 def clear(self, **kwargs):
1152 """Release the in-memory data cache.
1153
1154 Emits :exc:`CacheWarning` when ``cache='eager'`` to flag an explicit
1155 clear that conflicts with the cache mode.
1156
1157 Parameters
1158 ----------
1159 **kwargs : object
1160 Accepted but ignored; present for subclass override compatibility.
1161
1162 Examples
1163 --------
1164 >>> reader.clear() # doctest: +SKIP
1165 >>> reader.data_cached # doctest: +SKIP
1166 False
1167 """
1168 if self._cache == 'eager':
1169 warnings.warn(f"{self.__class__.__name__}({self}) has eager caching enabled; clear() was called explicitly.", CacheWarning, stacklevel=3)
1170 self._vcache = None
1171
1172
1173class _HdfScale(_HdfArray, ABC):
1174 """Abstract base class for a one-dimensional HDF coordinate scale array.
1175
1176 Wraps a single 1-D coordinate dataset stored alongside a PSI data file
1177 (radial, co-latitude, or longitude scale). Concrete implementations
1178 are :class:`H4Scale` and :class:`H5Scale`.
1179
1180 Attributes
1181 ----------
1182 _ref : _HdfData
1183 Parent data reader that owns this scale.
1184 _id : str | None
1185 Dataset key within the parent's HDF file.
1186
1187 See Also
1188 --------
1189 H4Scale : HDF4 concrete implementation.
1190 H5Scale : HDF5 concrete implementation.
1191 """
1192
1193 def __init__(self,
1194 parent: '_HdfData',
1195 dataset_id: Optional[str],
1196 **kwargs):
1197 """Initialize a scale reader from the parent data reader.
1198
1199 Parameters
1200 ----------
1201 parent : _HdfData
1202 The data reader that owns this coordinate scale.
1203 dataset_id : str | None
1204 HDF dataset key for the scale array; ``None`` uses a default key.
1205 **kwargs : object
1206 Metadata keyword arguments forwarded to :meth:`_HdfArray.__init__`.
1207 """
1208 self._ref = parent
1209 self._id = dataset_id
1210 super().__init__(**kwargs)
1211
1212 def validate_metadata(self) -> None:
1213 """Validate scale-specific metadata, warning on dimensionality or name issues."""
1214 super().validate_metadata()
1215 if 1 != self.ndim != len(self.mesh) != len(self.shape):
1216 warnings.warn(f'Scale {self} has {self.ndim} dimensions; expected 1.', MetaDataWarning, stacklevel=3)
1217 if self.name not in _PSI_SCALE_PROPS_MAPPING:
1218 warnings.warn(f"{self.__class__.__name__}({self}) has an unrecognized scale name {self.name!r}. "
1219 f"Check that the correct name is declared at instantiation or written to the HDF dataset's attribute mapping.", MetaDataWarning, stacklevel=3)
1220 elif self.name[0] == 't' and not self.mesh and not np.isclose(self.read(0)[0], 0 * self.unit, rtol=0.0, atol=1e-12):
1221 warnings.warn(f"{self.__class__.__name__}({self}) has a main-mesh stagger but non-zero values at the inner boundary. "
1222 f"Check that the correct mesh code is declared at instantiation or written to the HDF dataset's attribute mapping.", MetaDataWarning, stacklevel=3)
1223 elif self.name[0] == 'p' and not self.mesh and not np.isclose(self.read(0)[0], 0 * self.unit, rtol=0.0, atol=1e-12):
1224 warnings.warn(f"{self.__class__.__name__}({self}) has a main-mesh stagger but non-zero values at the inner boundary. "
1225 f"Check that the correct mesh code is declared at instantiation or written to the HDF dataset's attribute mapping.", MetaDataWarning, stacklevel=3)
1226
1227 def _parse_inputs(self, **kwargs):
1228 """Merge file attributes with keyword overrides, resolving PSI defaults by name."""
1229 input_attrs = {k: v for k, v in kwargs.items()}
1230 file_attrs = {k: v for k, v in self.attrs.items() if k in SCALES_SCHEMA}
1231 combined_attrs = {**file_attrs, **input_attrs}
1232
1233 if (name := combined_attrs.get('name')) in _PSI_SCALE_PROPS_MAPPING:
1234 native_attrs = get_psi_scale_properties(name)._asdict()
1235 else:
1236 native_attrs = {}
1237
1238 return {**native_attrs, **combined_attrs}
1239
1240 def _set_metadata(self,
1241 *,
1242 name: str,
1243 unit: str = '',
1244 desc: str = '',
1245 validate: bool = False) -> None:
1246 """Apply resolved metadata to instance attributes for a scale array."""
1247 self._name: str = str(name)
1248 self._desc: str = str(desc)
1249 self._unit: u.Unit = u.Unit(str(unit))
1250 self._scalar: bool = True
1251 self._mesh: Mesh = self._ref.mesh[self._ref._scales.index(self._name)]
1252 self._order: str = 'C'
1253 if validate:
1254 self.validate_metadata()
1255
[docs]
1256class _HdfData(_HdfArray, ABC):
1257 """Abstract base class for a PSI MAS or POT3D HDF data reader.
1258
1259 Extends :class:`_HdfArray` with file lifecycle management, coordinate scale
1260 readers, value-space slicing, and spatial interpolation. Concrete
1261 implementations are :class:`H4Data` (HDF4 backend) and :class:`H5Data`
1262 (HDF5 backend).
1263
1264 Instances should be obtained via :func:`PsiData`, not constructed directly.
1265
1266 Attributes
1267 ----------
1268 _filepath : pathlib.Path
1269 Absolute path to the open HDF file.
1270 _icache : RegularGridInterpolator | None
1271 Cached scipy interpolator, or ``None`` if not yet built.
1272
1273 See Also
1274 --------
1275 H4Data : HDF4 concrete implementation.
1276 H5Data : HDF5 concrete implementation.
1277 PsiData : Public factory function.
1278 """
1279
1280 def __init__(self,
1281 ifile: PathLike,
1282 dataset_id: Optional[str] = None,
1283 **kwargs):
1284 """Open an HDF file and initialize the reader with resolved metadata.
1285
1286 Parameters
1287 ----------
1288 ifile : PathLike
1289 Path to the HDF file. Must exist and have the correct extension for
1290 the concrete subclass (``'.h5'`` for :class:`H5Data`, ``'.hdf'`` for
1291 :class:`H4Data`).
1292 dataset_id : str | None, optional
1293 Dataset key within the HDF file. Defaults to the PSI standard
1294 identifier for the given format.
1295 **kwargs : object
1296 Metadata keyword arguments (``model``, ``name``, ``unit``, ``mesh``,
1297 etc.) forwarded to :meth:`_parse_inputs` and :meth:`_set_metadata`.
1298
1299 Raises
1300 ------
1301 FileNotFoundError
1302 If *ifile* does not exist.
1303 ValueError
1304 If the file extension does not match the expected format, or if
1305 metadata cannot be resolved.
1306 """
1307 ifile = Path(ifile)
1308 hdfv = f'h{self._HDFN}'
1309 if not ifile.is_file():
1310 raise FileNotFoundError(f"File '{ifile}' does not exist.")
1311 if ifile.suffix != _HDF_EXT_MAPPING[hdfv]:
1312 raise ValueError(f"File '{ifile}' does not have the correct extension for "
1313 f"{self._HDFN} files (expected '{_HDF_EXT_MAPPING[hdfv]}' extension).")
1314
1315 self._filepath: Path = ifile
1316 self._ref = self.read_file(ifile)
1317 self._id = dataset_id or PSI_DATA_ID[hdfv]
1318 self._icache = None
1319 super().__init__(**kwargs)
1320
1321 def __enter__(self):
1322 """Open (or re-open) the file and return ``self`` for use as a context manager."""
1323 self.open()
1324 return self
1325
1326 def __exit__(self, *args):
1327 """Close the file handle when exiting the context manager."""
1328 self.close()
1329
1330 def __del__(self):
1331 """Close the file handle when the object is garbage-collected."""
1332 self.delete()
1333
[docs]
1334 @classmethod
1335 @abstractmethod
1336 def read_file(cls, ifile: PathLike):
1337 """Open the HDF file at *ifile* and return the format-specific file handle."""
1338 ...
1339
1340 @property
1341 def sequence(self) -> int:
1342 """Time-step sequence number extracted from the filename or file attributes.
1343
1344 Returns
1345 -------
1346 out : int
1347 E.g. ``1001`` for a file named ``br001001.h5``.
1348 """
1349 return self._sequence
1350
1351 @sequence.setter
1352 def sequence(self, value: int):
1353 """Set the sequence number."""
1354 self._sequence = int(value)
1355
1356 @property
1357 def model(self) -> str:
1358 """PSI model type string.
1359
1360 Returns
1361 -------
1362 out : str
1363 ``'mas'``, ``'pot3d'``, or ``'custom'``.
1364 """
1365 return self._model
1366
1367 @property
1368 def scales(self) -> tuple:
1369 """Named tuple of coordinate scale readers ``(r, t, p)``.
1370
1371 Each element is a :class:`_HdfScale` instance that wraps the
1372 corresponding one-dimensional coordinate array.
1373
1374 Returns
1375 -------
1376 out : tuple
1377 Named tuple with fields matching the scale names (``r``, ``t``,
1378 ``p`` by default).
1379 """
1380 return self._scales
1381
1382 @property
1383 def interp_cached(self) -> bool:
1384 """Whether a :class:`~scipy.interpolate.RegularGridInterpolator` is cached.
1385
1386 Returns
1387 -------
1388 out : bool
1389 ``True`` if :attr:`_icache` is not ``None``.
1390 """
1391 return self._icache is not None
1392
[docs]
1393 @abstractmethod
1394 def open(self):
1395 """Re-open the file handle if it was previously closed."""
1396 ...
1397
[docs]
1398 @abstractmethod
1399 def close(self):
1400 """Close the open file handle and set the internal reference to ``None``."""
1401 ...
1402
[docs]
1403 @abstractmethod
1404 def delete(self):
1405 """Release the file handle during garbage collection (called by ``__del__``)."""
1406 ...
1407
1408 @abstractmethod
1409 def _get_dims(self) -> Sequence:
1410 """Return the dimension labels from the open HDF file in physical order."""
1411 ...
1412
1413 @abstractmethod
1414 def _set_scales(self, scales: Sequence) -> type[tuple]:
1415 """Build scale readers from dimension labels and attach them to ``_scales``."""
1416 Scales = namedtuple('Scales', scales)
1417 self._scales = Scales._fields
1418 return Scales
1419
1438
1439 def _parse_inputs(self, **kwargs) -> dict:
1440 """Merge file attributes with keyword overrides, resolving model defaults."""
1441 input_attrs = {k: v for k, v in kwargs.items()}
1442 file_attrs = {k: v for k, v in self.attrs.items() if k in METADATA_SCHEMA}
1443 combined_attrs = {**file_attrs, **input_attrs}
1444
1445 combined_attrs.setdefault('model', 'custom')
1446 if combined_attrs['model'] in _PROP_GETTER_MAPPING:
1447 combined_attrs.setdefault('name', extract_quantity_from_filepath(self._filepath, ''))
1448 combined_attrs.setdefault('sequence', extract_sequence_from_filepath(self._filepath, 0))
1449 prop_getter = get_model_prop_caller(combined_attrs['model'])
1450 native_attrs = prop_getter(combined_attrs['name'])._asdict()
1451 else:
1452 native_attrs = {}
1453
1454 return {**native_attrs, **combined_attrs}
1455
1456 def _set_metadata(self,
1457 *,
1458 model: Optional[ModelType | str],
1459 name: str,
1460 mesh: MeshLike,
1461 scalar: bool,
1462 order: ArrayOrdering,
1463 scales: Sequence,
1464 unit: str = '',
1465 sequence: int = 0,
1466 desc: str = '',
1467 validate: bool = True) -> None:
1468 """Apply resolved metadata to instance attributes for a data reader."""
1469 self._model: str = str(model)
1470 self._name: str = str(name)
1471 self._desc: str = str(desc)
1472 self._unit: u.Unit = u.Unit(str(unit))
1473 self._scalar: bool = bool(scalar)
1474 self._mesh: Mesh = Mesh.parse(mesh, self.ndim)
1475 self._order: str = str(order).upper()
1476 self._sequence: int = int(sequence)
1477 self._set_scales(scales)
1478 if validate:
1479 self.validate_metadata()
1480 for scale in self.scales:
1481 scale.validate_metadata()
1482
[docs]
1483 def read(self,
1484 *args: None | int | tuple[int | None, ...] | slice,
1485 unit: Optional[str | UnitLike] = None,
1486 mesh: Optional[MeshLike] = None,
1487 order: Optional[ArrayOrdering] = None,
1488 scales: bool = True) -> u.Quantity | tuple[u.Quantity, ...]:
1489 """Read data by index with optional unit conversion and coordinate scales.
1490
1491 .. attention::
1492
1493 When reading/slicing data, the ``*args`` are **always** supplied in physical (*e.g.*
1494 :math:`(r, \\theta, \\phi)`) order. However, unless specified with the ``order``
1495 argument, the sliced data array will be returned in storage order.
1496
1497 Parameters
1498 ----------
1499 *args : int | tuple | slice | None
1500 Index-space axis arguments in physical ``(r, t, p)`` order.
1501 unit : UnitLike | None, optional
1502 Output unit. Default is ``None`` (code units).
1503 mesh : MeshLike | None, optional
1504 Target stagger mesh. Default is ``None`` (no remeshing).
1505 order : ArrayOrdering | None, optional
1506 Transpose the output if it differs from storage order.
1507 Default is ``None``.
1508 scales : bool, optional
1509 If ``True`` (default), return coordinate slices alongside data.
1510
1511 Returns
1512 -------
1513 data : Quantity
1514 Sliced data array with the specified mesh staggering, units, and ordering
1515 applied.
1516 *scales : Quantity
1517 Sliced scales (only returned when *scales* is ``True``).
1518
1519 Examples
1520 --------
1521 >>> data, r, t, p = reader.read() # doctest: +SKIP
1522 >>> data_gauss = reader.read(scales=False, unit='Gauss') # doctest: +SKIP
1523 """
1524 remesh = self.mesh >> mesh
1525 args = _expand_args(*args, ndim=self.ndim)
1526 sargs = tuple(_parse_islice_args(*args, shape=self.shape, remesh=remesh))
1527 odata = _apply_units(self._read(*sargs, remesh=remesh), unit=unit)
1528 if order is not None and order.upper() != self.order:
1529 odata = odata.T
1530 if not scales:
1531 return odata
1532 oscales = (scale._read(sarg, remesh=rmesh) for scale, sarg, rmesh in zip(self.scales, sargs, remesh))
1533 return odata, *oscales
1534
[docs]
1535 def interp(self,
1536 data,
1537 unit: Optional[str | UnitLike] = None,
1538 **kwargs
1539 ) -> u.Quantity:
1540 """Interpolate the dataset at arbitrary spatial positions.
1541
1542 Builds or reuses a :class:`~scipy.interpolate.RegularGridInterpolator`
1543 and evaluates it at the positions given by *data*. When caching is
1544 disabled (``cache=None``), a minimal bounding-box slice is read on each
1545 call. When caching is enabled, the interpolator is cached and reused for
1546 subsequent calls that fall within the same grid extent.
1547
1548 Parameters
1549 ----------
1550 data : ArrayLike | Table
1551 Positions to interpolate, as a :class:`~astropy.table.Table`-like :math:`N \\times S` array,
1552 where :math:`\\lvert N \\rvert` is the number of positions, and :math:`\\lvert S \\rvert`
1553 is the number of scales. When columns do not possess a unit, they are cast to the
1554 corresponding scale's units.
1555 unit : UnitLike | None, optional
1556 Output unit. Default is ``None`` (code units).
1557 **kwargs : object
1558 Forwarded to :class:`~scipy.interpolate.RegularGridInterpolator`.
1559 Notable keywords: ``bounds_error`` (default ``True``),
1560 ``fill_value`` (default ``None``).
1561
1562 Returns
1563 -------
1564 out : Quantity
1565 Interpolated values of shape ``(N,)``.
1566
1567 Raises
1568 ------
1569 ImportError
1570 If scipy is not installed.
1571
1572 Examples
1573 --------
1574 >>> import numpy as np
1575 >>> import astropy.units as u
1576 >>> positions = np.column_stack([[1.5, 2.0], [1.57, 1.57], [0.1, 0.2]])
1577 >>> result = reader.interp(positions) # doctest: +SKIP
1578 """
1579 _except_no_scipy()
1580 positions = QTable(data, names=self.scales._fields, units=[scale.unit for scale in self.scales])
1581 positions = structured_to_unstructured(positions.as_array())
1582
1583 bounds_error = kwargs.get('bounds_error', True)
1584 vslice_args = [(np.min(positions[:, i]), np.max(positions[:, i]))
1585 for i in range(positions.shape[-1])]
1586
1587 if self._cache is None:
1588 data, *scales = self.vslice(*vslice_args, bounds_error=bounds_error, order='C')
1589 return _apply_units(
1590 RegularGridInterpolator(scales, data, **kwargs)(positions) << self.unit,
1591 unit=unit,
1592 )
1593
1594 needs_build = (
1595 self._icache is None
1596 or any(lo < g[0] or hi > g[-1]
1597 for g, (lo, hi) in zip(self._icache.grid, vslice_args))
1598 )
1599
1600 if needs_build:
1601 if self.data_cached:
1602 arr = self._vcache[:].T if self._reverse else self._vcache[:]
1603 self._icache = RegularGridInterpolator(
1604 [scale[:] for scale in self.scales], arr, **kwargs
1605 )
1606 else:
1607 data, *scales = self.vslice(*vslice_args, bounds_error=bounds_error, order='C')
1608 self._icache = RegularGridInterpolator(scales, data, **kwargs)
1609
1610 return _apply_units(self._icache(positions) << self.unit, unit=unit)
1611
1612
[docs]
1613 def vslice(self,
1614 *args,
1615 unit: Optional[str | UnitLike] = None,
1616 mesh: Optional[MeshCodeType] = None,
1617 order: Optional[ArrayOrdering] = None,
1618 scales: bool = True,
1619 bounds_error: bool = True,
1620 ) -> u.Quantity | tuple[u.Quantity, ...]:
1621 """Read data by physical coordinate value with linear interpolation.
1622
1623 Extends :meth:`read` to accept physical coordinate values as positional
1624 arguments. A scalar, :class:`int`, or :class:`~astropy.units.Quantity`
1625 argument for an axis locates the two nearest grid points and linearly
1626 interpolates to the target value. Only ``None`` and ``slice`` arguments
1627 are index-space and handled identically to :meth:`read`.
1628
1629 .. attention::
1630
1631 When reading/slicing data, the ``*args`` are **always** supplied in physical (*e.g.*
1632 :math:`(r, \\theta, \\phi)`) order. However, unless specified with the ``order``
1633 argument, the sliced data array will be returned in storage order.
1634
1635 .. note::
1636 Unlike :meth:`read`, an :class:`int` argument is treated as a physical
1637 coordinate **value** (in the axis's native unit), not an array index.
1638 Use a ``slice`` to select by index.
1639
1640 Parameters
1641 ----------
1642 *args : QuantityLike | slice | None
1643 One argument per axis in physical ``(r, t, p)`` order. A
1644 :class:`~astropy.units.Quantity`, bare scalar, or :class:`int`
1645 triggers interpolation to that coordinate value; ``None`` and
1646 ``slice`` are index-space and do not interpolate.
1647 unit : UnitLike | None, optional
1648 Output unit. Default is ``None`` (code units).
1649 mesh : MeshCodeType | None, optional
1650 Target stagger mesh. Default is ``None``.
1651 order : ArrayOrdering | None, optional
1652 Transpose output if it differs from storage order. Default is ``None``.
1653 scales : bool, optional
1654 If ``True`` (default), return coordinate slices alongside the data.
1655 bounds_error : bool, optional
1656 If ``True`` (default), raise :exc:`ValueError` when a physical value
1657 is outside the coordinate range.
1658
1659 Returns
1660 -------
1661 data : Quantity
1662 Interpolated or sliced data array.
1663 *scales : Quantity
1664 Sliced scales (only returned when ``scales`` is ``True``).
1665
1666 Raises
1667 ------
1668 ValueError
1669 If *bounds_error* is ``True`` and a physical value falls outside the
1670 coordinate range.
1671
1672 Examples
1673 --------
1674 >>> # Extract the r = 2.5 solar radii surface
1675 >>> data, r, t, p = reader.vslice(2.5 * u.R_sun) # doctest: +SKIP
1676 """
1677 remesh = self.mesh >> mesh
1678 args = _expand_args(*args, ndim=self.ndim)
1679 varg_pairs = _parse_vslice_args(*args, scales=self.scales, remesh=remesh)
1680 slice_values, slice_args = map(list, zip(*varg_pairs))
1681 slice_mask = [len(sv) == 1 for sv in slice_values]
1682
1683 remeshed_scales = [scale._read(sarg, remesh=rmesh)
1684 for scale, sarg, rmesh in zip(self.scales, slice_args, remesh)]
1685 for i, (svalue, rmesh) in enumerate(zip(slice_values, remesh)):
1686 if rmesh:
1687 if svalue[0] is not None and not np.isinf(svalue[0]) and svalue[0] > remeshed_scales[i][1]:
1688 slice_args[i] = slice(slice_args[i].start + 1, slice_args[i].stop, slice_args[i].step)
1689 remeshed_scales[i] = remeshed_scales[i][1:]
1690 if svalue[-1] is not None and not np.isinf(svalue[-1]) and svalue[-1] < remeshed_scales[i][-2]:
1691 slice_args[i] = slice(slice_args[i].start, slice_args[i].stop - 1, slice_args[i].step)
1692 remeshed_scales[i] = remeshed_scales[i][:-1]
1693 if bounds_error:
1694 if svalue[0] is not None and not np.isinf(svalue[0]) and svalue[0] < remeshed_scales[i][0]:
1695 raise ValueError(f"Value {svalue[0]} is below the interpolation range {remeshed_scales[i][0]}.")
1696 if svalue[-1] is not None and not np.isinf(svalue[-1]) and svalue[-1] > remeshed_scales[i][-1]:
1697 raise ValueError(f"Value {svalue[-1]} is above the interpolation range {remeshed_scales[i][-1]}.")
1698
1699 pre_slice_data = _apply_units(self._read(*slice_args, remesh=remesh), unit)
1700 if all(not sm for sm in slice_mask):
1701 if order is not None and order.upper() != self.order:
1702 pre_slice_data = pre_slice_data.T
1703 if not scales:
1704 return pre_slice_data
1705 return pre_slice_data, *remeshed_scales
1706
1707 pre_slice_scales = [sscale if sm else None for sscale, sm in zip(remeshed_scales, slice_mask)]
1708 pre_slice_values = [sv if sm else None for sv, sm in zip(slice_values, slice_mask)]
1709
1710 sliced_data = _slice_array(pre_slice_data, pre_slice_scales, pre_slice_values, self.order)
1711 if order is not None and order.upper() != self.order:
1712 sliced_data = sliced_data.T
1713 if not scales:
1714 return sliced_data
1715 sliced_scales = (psvalue if psvalue is not None else sscale for psvalue, sscale in zip(pre_slice_values, remeshed_scales))
1716 return sliced_data, *sliced_scales
1717
[docs]
1718 def load(self, interp: bool = False, recursive: bool = True):
1719 """Load the data array and optionally build the interpolator into memory.
1720
1721 Parameters
1722 ----------
1723 interp : bool, optional
1724 If ``True``, also build and cache the
1725 :class:`~scipy.interpolate.RegularGridInterpolator` after loading
1726 the data. Requires scipy. Default is ``False``.
1727 recursive : bool, optional
1728 If ``True`` (default), also call :meth:`load` on each coordinate
1729 scale reader.
1730
1731 Examples
1732 --------
1733 >>> reader.load() # doctest: +SKIP
1734 >>> reader.data_cached # doctest: +SKIP
1735 True
1736 >>> reader.load(interp=True) # doctest: +SKIP
1737 >>> reader.interp_cached # doctest: +SKIP
1738 True
1739 """
1740 if self._cache is None:
1741 warnings.warn(f"{self.__class__.__name__}({self}) has caching disabled; load() has no effect.", CacheWarning, stacklevel=3)
1742 return
1743 super().load()
1744 if recursive:
1745 for scale in self.scales:
1746 scale.load()
1747 if interp:
1748 _except_no_scipy()
1749 arr = self._vcache[:].T if self._reverse else self._vcache[:]
1750 self._icache = RegularGridInterpolator(
1751 [scale[:] for scale in self.scales], arr
1752 )
1753
[docs]
1754 def clear(self, data: bool = True, interp: bool = True, recursive: bool = True):
1755 """Release cached data and/or the cached interpolator.
1756
1757 Parameters
1758 ----------
1759 data : bool, optional
1760 If ``True`` (default), release the in-memory data array cache.
1761 interp : bool, optional
1762 If ``True`` (default), release the cached interpolator.
1763 recursive : bool, optional
1764 If ``True`` (default) and *data* is ``True``, also call
1765 :meth:`clear` on each coordinate scale reader.
1766
1767 Examples
1768 --------
1769 >>> reader.clear() # doctest: +SKIP
1770 >>> reader.data_cached, reader.interp_cached # doctest: +SKIP
1771 (False, False)
1772 """
1773 if self._cache == 'eager':
1774 warnings.warn(f"{self.__class__.__name__}({self}) has eager caching enabled; clear() was called explicitly.", CacheWarning, stacklevel=3)
1775 if data:
1776 super().clear()
1777 if recursive:
1778 for scale in self.scales:
1779 scale.clear()
1780 if interp:
1781 self._icache = None
1782
1783
1784class _H5ArrayMixin:
1785 """Mixin that provides HDF5 (h5py) property implementations for :class:`_HdfArray`.
1786
1787 Sets ``_HDFN = 5`` and implements :attr:`_shape`, :attr:`dtype`, :attr:`size`,
1788 :attr:`nbytes`, :attr:`ndim`, :attr:`attrs`, and :meth:`_dataset` using the
1789 :class:`h5py.Dataset` interface.
1790
1791 See Also
1792 --------
1793 _H4ArrayMixin : Analogous mixin for HDF4 files.
1794 """
1795
1796 __slots__ = ()
1797 _HDFN = 5
1798
1799 @property
1800 def _shape(self) -> tuple[int, ...]:
1801 """Raw dataset shape from the h5py Dataset object."""
1802 return self.dataset.shape
1803
1804 @property
1805 def dtype(self) -> np.dtype:
1806 """NumPy dtype from the h5py Dataset object."""
1807 return self.dataset.dtype
1808
1809 @property
1810 def size(self) -> int:
1811 """Total element count from the h5py Dataset object."""
1812 return self.dataset.size
1813
1814 @property
1815 def nbytes(self) -> int:
1816 """Byte size from the h5py Dataset object."""
1817 return self.dataset.nbytes
1818
1819 @property
1820 def ndim(self) -> int:
1821 """Number of dimensions from the h5py Dataset object."""
1822 return self.dataset.ndim
1823
1824 @property
1825 def attrs(self) -> dict:
1826 """HDF5 dataset attributes as a plain dict."""
1827 return dict(self.dataset.attrs)
1828
1829 def _dataset(self, id_: str):
1830 """Return the h5py Dataset at key *id_* from the open file."""
1831 return self._ref[id_]
1832
1833
1834class _H4ArrayMixin:
1835 """Mixin that provides HDF4 (pyhdf) property implementations for :class:`_HdfArray`.
1836
1837 Sets ``_HDFN = 4`` and implements :attr:`_shape`, :attr:`dtype`, :attr:`size`,
1838 :attr:`nbytes`, :attr:`ndim`, :attr:`attrs`, and :meth:`_dataset` using the
1839 ``pyhdf.SD`` interface.
1840
1841 See Also
1842 --------
1843 _H5ArrayMixin : Analogous mixin for HDF5 files.
1844 """
1845
1846 __slots__ = ()
1847 _HDFN = 4
1848
1849 @property
1850 def _shape(self) -> tuple[int, ...]:
1851 """Raw dataset shape from pyhdf SDS ``info()``."""
1852 shape_ = self.dataset.info()[2]
1853 return (shape_,) if not isinstance(shape_, Iterable) else tuple(shape_)
1854
1855 @property
1856 def dtype(self) -> np.dtype:
1857 """NumPy dtype mapped from the pyhdf SDC type code."""
1858 return SDC_TYPE_CONVERSIONS[self.dataset.info()[3]]
1859
1860 @property
1861 def size(self) -> int:
1862 """Total element count (product of all dimension sizes)."""
1863 return int(np.prod(self.shape))
1864
1865 @property
1866 def nbytes(self) -> int:
1867 """Dataset size in bytes (``size * dtype.itemsize``)."""
1868 return self.size * self.dtype.itemsize
1869
1870 @property
1871 def ndim(self) -> int:
1872 """Number of dimensions from pyhdf SDS ``info()``."""
1873 return self.dataset.info()[1]
1874
1875 @property
1876 def attrs(self) -> dict:
1877 """HDF4 dataset attributes as a plain dict."""
1878 return self.dataset.attributes()
1879
1880 def _dataset(self, id_: str):
1881 """Return the pyhdf SDS object at key *id_* from the open SD file."""
1882 return self._ref.select(id_)
1883
1884
1885class H4Scale(_H4ArrayMixin, _HdfScale):
1886 """HDF4 coordinate scale reader.
1887
1888 Combines :class:`_H4ArrayMixin` (pyhdf property implementations) with
1889 :class:`_HdfScale` (PSI scale metadata and validation).
1890
1891 See Also
1892 --------
1893 H5Scale : HDF5 equivalent.
1894 H4Data : The data reader that owns instances of this class.
1895 """
1896
1897
1898class H5Scale(_H5ArrayMixin, _HdfScale):
1899 """HDF5 coordinate scale reader.
1900
1901 Combines :class:`_H5ArrayMixin` (h5py property implementations) with
1902 :class:`_HdfScale` (PSI scale metadata and validation).
1903
1904 See Also
1905 --------
1906 H4Scale : HDF4 equivalent.
1907 H5Data : The data reader that owns instances of this class.
1908 """
1909
1910
1911class H4Data(_H4ArrayMixin, _HdfData):
1912 """HDF4 PSI data reader.
1913
1914 Combines :class:`_H4ArrayMixin` (pyhdf property implementations) with
1915 :class:`_HdfData` (PSI data metadata, slicing, and interpolation). Uses
1916 ``pyhdf.SD`` to open ``.hdf`` files.
1917
1918 Instances are normally obtained via :func:`PsiData`.
1919
1920 See Also
1921 --------
1922 H5Data : HDF5 equivalent.
1923 PsiData : Public factory function.
1924 """
1925 @classmethod
1926 def read_file(cls, ifile: PathLike):
1927 """Open an HDF4 file for reading and return the pyhdf ``SD`` object."""
1928 return h4.SD(str(ifile), h4.SDC.READ)
1929
1930 def open(self):
1931 """Re-open the HDF4 file if it was previously closed. Returns ``self``."""
1932 if not self._ref:
1933 self._ref = self.read_file(self._filepath)
1934 return self
1935
1936 def close(self):
1937 """Close the HDF4 file handle via ``end()``."""
1938 if self._ref is not None:
1939 self._ref.end()
1940 self._ref = None
1941
1942 def delete(self):
1943 """Close the HDF4 file handle during garbage collection."""
1944 _ref = getattr(self, '_ref', None)
1945 if _ref is not None:
1946 _ref.end()
1947 self._ref = None
1948
1949 def _get_dims(self) -> Sequence:
1950 sds = self.dataset
1951 dims = tuple(sds.dimensions(full=1).items())
1952 return dims[::-1] if self._reverse else dims
1953
1954 def _set_scales(self,
1955 scales: Sequence,) -> None:
1956 Scales = super()._set_scales(scales)
1957 dims = self._get_dims()
1958 self._scales: Scales = Scales(*(H4Scale(self, dim_label, cache=self._cache, name=scale)
1959 for (dim_label, dim_proxy), scale in zip(dims, Scales._fields)))
1960
1961
1962class H5Data(_H5ArrayMixin, _HdfData):
1963 """HDF5 PSI data reader.
1964
1965 Combines :class:`_H5ArrayMixin` (h5py property implementations) with
1966 :class:`_HdfData` (PSI data metadata, slicing, and interpolation). Uses
1967 ``h5py.File`` to open ``.h5`` files.
1968
1969 Instances are normally obtained via :func:`PsiData`.
1970
1971 See Also
1972 --------
1973 H4Data : HDF4 equivalent.
1974 PsiData : Public factory function.
1975 """
1976
1977 @classmethod
1978 def read_file(cls, ifile: PathLike):
1979 """Open an HDF5 file for reading and return the :class:`h5py.File` handle."""
1980 return h5.File(ifile, 'r')
1981
1982 def open(self):
1983 """Re-open the HDF5 file if it was previously closed. Returns ``self``."""
1984 if not self._ref:
1985 self._ref = self.read_file(self._filepath)
1986 return self
1987
1988 def close(self):
1989 """Close the HDF5 file handle. Returns ``self``."""
1990 if self._ref is not None:
1991 self._ref.close()
1992 self._ref = None
1993
1994 def delete(self):
1995 """Close the file handle during garbage collection (called by ``__del__``)."""
1996 _ref = getattr(self, '_ref', None)
1997 if _ref is not None:
1998 _ref.close()
1999 self._ref = None
2000
2001 def _get_dims(self) -> Sequence:
2002 return self.dataset.dims
2003
2004 def _set_scales(self,
2005 scales: Sequence,) -> None:
2006 Scales = super()._set_scales(scales)
2007 dims = self._get_dims()
2008 self._scales: Scales = Scales(*(H5Scale(self, dim.label, cache=self._cache, name=scale) for
2009 dim, scale in zip(dims, Scales._fields)))
2010
2011
[docs]
2012def PsiData(ifile: PathLike, /,
2013 *args,
2014 **kwargs):
2015 """Open a PSI MAS or POT3D HDF file and return the appropriate data reader.
2016
2017 Inspects the file extension and *model* argument, selects the correct
2018 concrete reader (HDF5 or HDF4 backend), and returns it. No data are read
2019 from disk at construction time — metadata is resolved from the filename and
2020 HDF file attributes, and data transfer happens only inside :meth:`read` or
2021 :meth:`vslice`. Full-array reads are cached automatically; partial reads
2022 are not.
2023
2024 The returned object is an :class:`_HdfData` instance; see :class:`_HdfData`
2025 and the inherited :class:`_HdfArray` interface for the full set of metadata
2026 properties (``name``, ``desc``, ``unit``, ``mesh``, ``scales``, ``shape``,
2027 ``dtype`` …) and reader methods. In brief, use :meth:`~_HdfData.read` to load
2028 a slice by index and :meth:`~_HdfData.vslice` to slice by physical coordinate
2029 value with linear interpolation; both return :class:`~astropy.units.Quantity`
2030 data in physical ``(r, θ, φ)`` order, and the object supports the
2031 context-manager protocol.
2032
2033 By default the reader is **lazy** (``cache='lazy'``): array data is
2034 transferred from disk only on access, and a full-array read is then cached on
2035 the reader. Pass ``cache='eager'`` to load the data immediately at
2036 construction, or ``cache=None`` to disable caching entirely.
2037
2038 .. warning:: **POT3D unit convention**
2039
2040 POT3D applies no normalization to its output. The stored values are in
2041 whatever physical unit the input photospheric magnetogram used — most
2042 commonly Gauss, but this is not encoded in the file. The default
2043 ``unit`` for POT3D is ``dimensionless_unscaled`` (scale factor 1), so
2044 ``read(unit='physical')`` will not perform a meaningful conversion
2045 unless the correct unit is supplied at construction:
2046
2047 .. code-block:: python
2048
2049 reader = PsiData('br001.h5', model='pot3d', unit='Gauss')
2050 data, r, t, p = reader.read()
2051
2052 .. rubric:: Metadata resolution
2053
2054 Every metadata field is drawn from up to three sources, in *decreasing*
2055 priority:
2056
2057 1. **Keyword arguments** passed to this function, named after the
2058 :class:`~psi_io.models.ModelProps` fields (``name``, ``desc``, ``unit``,
2059 ``scalar``, ``mesh``, ``order``, ``sequence``, ``scales``).
2060 2. The **HDF dataset attribute dictionary** (``reader.attrs``), for any of
2061 those same field names stored on the file.
2062 3. The **model-mapping defaults** in :mod:`psi_io.models`, consulted only
2063 when *model* names a recognized PSI model.
2064
2065 A value supplied at a higher level overrides the levels beneath it: an
2066 explicit keyword argument always wins over a file attribute, which in turn
2067 overrides the model default. This is how the keyword arguments below can
2068 *correct* or *complete* metadata that is missing, wrong, or absent from the
2069 file.
2070
2071 When *model* is ``'mas'`` or ``'pot3d'``, the quantity ``name`` and
2072 ``sequence`` additionally fall back to the values parsed from the filename
2073 stem (e.g. ``br001001`` → ``name='br'``, ``sequence=1001``), and the
2074 resolved ``name`` selects the :class:`~psi_io.models.ModelProps` entry that
2075 provides the default ``unit``, ``mesh``, ``scalar``, ``order``, ``scales``,
2076 and ``desc``.
2077
2078 When *model* is the default ``'custom'``, **no defaults are inferred** —
2079 every field must be given as a keyword argument or be present in the file
2080 attribute dictionary. ``name``, ``mesh``, ``scalar``, ``order``, and
2081 ``scales`` are required; ``unit`` (defaults to dimensionless), ``sequence``
2082 (defaults to ``0``), and ``desc`` (defaults to ``''``) are optional. If a
2083 required field cannot be resolved, :exc:`ValueError` (*"Missing or
2084 incompatible metadata"*) is raised.
2085
2086 Parameters
2087 ----------
2088 ifile : PathLike
2089 Path to the HDF4 (``.hdf``) or HDF5 (``.h5``) file.
2090 model : ModelType, optional
2091 PSI model type. Defaults to ``'custom'``. When ``'mas'`` or ``'pot3d'``
2092 is given, the reader resolves the quantity name, unit, mesh stagger, and
2093 other metadata from the corresponding mapping in :mod:`psi_io.models`.
2094 With the default ``'custom'``, no metadata is inferred and the required
2095 fields (``name``, ``mesh``, ``scalar``, ``order``, ``scales``) must be
2096 supplied as keyword arguments.
2097 dataset_id : str, optional
2098 Dataset name within the HDF file. Defaults to the PSI standard
2099 identifier for the given format.
2100 name : str, optional
2101 Override the quantity name inferred from the filename or file attributes.
2102 sequence : int, optional
2103 Override the time-step sequence number.
2104 unit : UnitLike, optional
2105 Override the code-to-physical unit from the quantity's
2106 :class:`~psi_io.models.ModelProps` entry. Accepts any string parseable by
2107 :class:`~astropy.units.Unit` or a :class:`~astropy.units.Unit` instance.
2108 mesh : MeshCodeType, optional
2109 Override the mesh stagger from the quantity's
2110 :class:`~psi_io.models.ModelProps` entry. Required when *model* is
2111 ``'custom'`` and no ``mesh`` attribute is stored on the file.
2112 scalar : bool, optional
2113 Whether the quantity is a scalar (rather than a component of a vector)
2114 field. Required when *model* is ``'custom'``.
2115 order : ArrayOrdering, optional
2116 Memory layout of the stored array — ``'F'`` (Fortran / column-major, the
2117 PSI default) or ``'C'`` (row-major). Determines whether :attr:`shape`
2118 and the slicing axes are reversed relative to the on-disk layout.
2119 Required when *model* is ``'custom'``.
2120 scales : Sequence, optional
2121 Names of the coordinate axes, in physical order (e.g.
2122 ``('r', 't', 'p')``). The argument does three things:
2123
2124 - **Names** the axes — the strings become the fields of the
2125 ``reader.scales`` named tuple and the ``name`` of each coordinate
2126 scale reader.
2127 - **Orders** them — the names are zipped positionally with the file's
2128 stored dimension arrays, so the *i*-th name labels the *i*-th
2129 dimension; the ordering must therefore match the physical axis order.
2130 - **Fixes the dimensionality** — ``len(scales)`` sets the expected
2131 dataset rank; :meth:`~_HdfData.validate_metadata` emits a
2132 :exc:`MetaDataWarning` if it disagrees with ``ndim`` (or with the
2133 length of *mesh* / :attr:`shape`), or if any named axis does not match
2134 the size of its corresponding data dimension.
2135
2136 Required when *model* is ``'custom'``.
2137 desc : str, optional
2138 Override the human-readable description. Optional; defaults to ``''``.
2139 cache : CacheType, optional
2140 Cache mode. ``'lazy'`` (default) caches the array on the first full
2141 read, ``'eager'`` loads it immediately, and ``None`` disables caching.
2142
2143 Returns
2144 -------
2145 out : H5Data | H4Data
2146 Open reader implementing the full :class:`_HdfData` API. Concrete type
2147 depends on the file extension.
2148
2149 Raises
2150 ------
2151 ValueError
2152 If the file extension / model combination is unsupported or required
2153 metadata cannot be resolved.
2154 FileNotFoundError
2155 If *ifile* does not exist.
2156
2157 See Also
2158 --------
2159 astropy.units.Unit : Unit constructor — accepts strings, compound
2160 expressions, and :class:`~astropy.units.Unit` instances.
2161 astropy.units.Quantity.to : Unit conversion used internally when
2162 a ``unit`` string is supplied to :meth:`read`.
2163 _HdfData : Base data reader interface.
2164 _HdfArray : Base data array interface.
2165
2166 Examples
2167 --------
2168 Read a MAS radial field — full array with coordinate scales, then convert:
2169
2170 >>> from psi_io.mhd_io import PsiData # doctest: +SKIP
2171 >>> reader = PsiData('br001001.h5') # doctest: +SKIP
2172 >>> data, r, t, p = reader.read() # code units (MAS_b) # doctest: +SKIP
2173 >>> data, r, t, p = reader.read(unit='Gauss') # convert to Gauss # doctest: +SKIP
2174
2175 Use as a context manager:
2176
2177 >>> with PsiData('vr001001.h5') as reader: # doctest: +SKIP
2178 ... data, r, t, p = reader.read(unit='km/s')
2179
2180 Inspect metadata without loading data:
2181
2182 >>> reader = PsiData('rho001001.h5') # doctest: +SKIP
2183 >>> reader.name # 'rho' # doctest: +SKIP
2184 >>> reader.unit # MAS_n # doctest: +SKIP
2185 >>> reader.mesh # Mesh(HALF, HALF, HALF) # doctest: +SKIP
2186 >>> reader.data_cached # False # doctest: +SKIP
2187 """
2188 return _dispatch_by_ext(ifile, H4Data, H5Data, *args, **kwargs)