Difference between revisions of "A new I/O file format"
(→Ideas) |
|||
Line 39: | Line 39: | ||
Names should be illustrative only and don't matter; only the connection between these concepts are relevant. | Names should be illustrative only and don't matter; only the connection between these concepts are relevant. | ||
+ | |||
+ | == Prototype, written in a C++ style == | ||
+ | |||
+ | <pre> | ||
+ | // Note: Unless noted otherwise, all pointers are non-null | ||
+ | |||
+ | // When translating to HDF5: | ||
+ | // - structs become objects, usually groups | ||
+ | // - simple struct fields (int, string) become attributes of the group | ||
+ | // - pointers become links inside the group | ||
+ | // - sets containing non-pointers become objects inside the group | ||
+ | // - sets containing pointers become links inside a subgroup of the group | ||
+ | // - vectors of simple types (int, string) become attributes | ||
+ | // - other vectors become objects inside a subgroup the group, sorted | ||
+ | // alphabetically | ||
+ | |||
+ | // Tensor types | ||
+ | |||
+ | struct TensorType { | ||
+ | int dimension; | ||
+ | int rank; | ||
+ | set<TensorComponent> storedcomponents; | ||
+ | bool invariant() const { | ||
+ | return dimension >= 0 && rank >= 0 && | ||
+ | storedcomponent.size() < pow(tensortype.dimension, tensortype.rank); | ||
+ | } | ||
+ | }; | ||
+ | set<TensorType> tensortypes; | ||
+ | |||
+ | struct TensorComponent { | ||
+ | TensorType &tensortype; | ||
+ | // We use objects to denote most concepts, but we make an exception | ||
+ | // for tensor component indices and tangent space basis vectors, | ||
+ | // which we number consecutively starting from zero. This simplifies | ||
+ | // the representation, and it introduces a canonical order (e.g. x, | ||
+ | // y, z) among the tangent space directions that people are | ||
+ | // expecting. | ||
+ | int storedcomponent; | ||
+ | vector<int> indexvalues; | ||
+ | bool invariant() const { | ||
+ | bool inv = storedcomponent >= 0 && | ||
+ | storedcomponent < tensortype.storedcomponents.size() && | ||
+ | indexvalues.size() == tensortype->rank; | ||
+ | for (int iv : indexvalues) | ||
+ | inv &= iv >= 0 && iv < tensortype->dimension; | ||
+ | return inv; | ||
+ | } | ||
+ | }; | ||
+ | |||
+ | TensorType Scalar3D{"Scalar3D", 3, 0, {TensorComponent{Scalar3D, {}}}}; | ||
+ | |||
+ | TensorType Vector3D{"Vector3D", | ||
+ | 3, | ||
+ | 1, | ||
+ | {TensorComponent{Vector3D, {0}}, | ||
+ | TensorComponent{Vector3D, {1}}, | ||
+ | TensorComponent{Vector3D, {2}}}}; | ||
+ | |||
+ | TensorType SymmetricTensor3D{"SymmetricTensor3D", | ||
+ | 3, | ||
+ | 2, | ||
+ | {TensorComponent{SymmetricTensor3D, {0, 0}}, | ||
+ | TensorComponent{SymmetricTensor3D, {0, 1}}, | ||
+ | TensorComponent{SymmetricTensor3D, {0, 2}}, | ||
+ | TensorComponent{SymmetricTensor3D, {1, 0}}, | ||
+ | TensorComponent{SymmetricTensor3D, {1, 1}}, | ||
+ | TensorComponent{SymmetricTensor3D, {2, 2}}}}; | ||
+ | |||
+ | // High-level continuum concepts | ||
+ | |||
+ | struct Manifold { | ||
+ | int dimension; | ||
+ | set<Discretization> discretizations; | ||
+ | set<Field *> fields; | ||
+ | bool invariant() const { return dimension >= 0; } | ||
+ | }; | ||
+ | set<Manifold> manifolds; | ||
+ | |||
+ | struct TangentSpace { | ||
+ | int dimension; | ||
+ | set<Basis> bases; | ||
+ | set<Field *> fields; | ||
+ | bool invariant() const { return dimension >= 0; } | ||
+ | }; | ||
+ | set<TangentSpace> tangentspaces; | ||
+ | |||
+ | struct Field { | ||
+ | Manifold *manifold; | ||
+ | TangentSpace *tangentspace; | ||
+ | TensorType *tensortype; | ||
+ | set<DiscreteField> discretefields; | ||
+ | bool invariant() const { | ||
+ | return tangentspace->dimension == tensortype->dimension; | ||
+ | } | ||
+ | }; | ||
+ | set<Field> Fields; | ||
+ | |||
+ | // Manifold discretizations | ||
+ | |||
+ | struct Discretization { | ||
+ | Manifold &manifold; | ||
+ | set<DiscretizationBlock> discretizationblocks; | ||
+ | bool invariant() const { return true; } | ||
+ | }; | ||
+ | |||
+ | struct DiscretizationBlock { | ||
+ | // Discretization of a certain region, represented by contiguous data | ||
+ | Discretization &discretization; | ||
+ | // bounding box? in terms of coordinates? | ||
+ | // connectivity? neighbouring blocks? | ||
+ | // overlaps? | ||
+ | bool invariant() const { return true; } | ||
+ | }; | ||
+ | |||
+ | // Tangent space bases | ||
+ | |||
+ | struct Basis { | ||
+ | TangentSpace &tangentspace; | ||
+ | vector<BasisVector> basisvectors; | ||
+ | set<CoordinateBasis *> coordinatebases; | ||
+ | bool invariant() const { | ||
+ | return basisvectors.size() == tangentspace->dimension; | ||
+ | } | ||
+ | }; | ||
+ | |||
+ | struct BasisVector { | ||
+ | Basis &basis; | ||
+ | // Since a BasisVector denotes essentially only an integer, we | ||
+ | // should be able to replace it by an integer. Not sure this is | ||
+ | // worthwhile. This essentially only gives names to directions; could use | ||
+ | // vector<string> in TangentSpace for this instead. | ||
+ | int direction; | ||
+ | set<CoordinateBasisElement *> coordinatebasiselements; | ||
+ | bool invariant() const { | ||
+ | return direction >= 0 && direction < basis->basisvectors.size() && | ||
+ | basis->basisvectors[direction] == this; | ||
+ | } | ||
+ | }; | ||
+ | |||
+ | // Discrete fields | ||
+ | |||
+ | struct DiscreteField { | ||
+ | Field &field; | ||
+ | Discretization *discretization; | ||
+ | Basis *basis; | ||
+ | set<DiscreteFieldBlock> discretefieldblocks; | ||
+ | bool invariant() const { | ||
+ | return field->manifold == discretization->manifold && | ||
+ | field->tangentspace == basis->tangentspace; | ||
+ | } | ||
+ | }; | ||
+ | |||
+ | struct DiscreteFieldBlock { | ||
+ | // Discrete field on a particular region (discretization block) | ||
+ | DiscreteField &discretefield; | ||
+ | DiscretizationBlock *discretizationblock; | ||
+ | set<DiscreteFieldBlockData> discretefieldblockdata; | ||
+ | bool invariant() const { return true; } | ||
+ | }; | ||
+ | |||
+ | struct DiscreteFieldBlockData { | ||
+ | // Tensor component for a discrete field on a particular region | ||
+ | DiscreteFieldBlock &discretefieldblock; | ||
+ | TensorComponent *tensorcomponent; | ||
+ | hid_t hdf5_dataset; | ||
+ | bool invariant() const { | ||
+ | return discretefieldblock.discretefield->field->tensortype == | ||
+ | tensorcomponent->tensortype; | ||
+ | } | ||
+ | }; | ||
+ | |||
+ | // Coordinates | ||
+ | |||
+ | struct CoordinateSystem { | ||
+ | Manifold *manifold; | ||
+ | vector<CoordinateField> coordinatefields; | ||
+ | set<CoordinateBasis *> coordinatebases; | ||
+ | bool invariant() const { return true; } | ||
+ | }; | ||
+ | set<Coordinate> coordinates; | ||
+ | |||
+ | struct CoordinateField { | ||
+ | CoordinateSystem &coordinatesystem; | ||
+ | int direction; | ||
+ | Field *field; | ||
+ | bool invariant() const { | ||
+ | return direction >= 0 && | ||
+ | direction < coordinatesystem->coordinatefields.size() && | ||
+ | coordinatesystem->coordinatefields[direction] == this; | ||
+ | } | ||
+ | }; | ||
+ | |||
+ | struct CoordinateBasis { | ||
+ | CoordinateSystem *coordinatesystem; | ||
+ | Basis *basis; | ||
+ | vector<CoordinateBasisElement> coordinatebasiselements; | ||
+ | }; | ||
+ | |||
+ | struct CoordinateBasisElement { | ||
+ | CoordinateBasis &coordinatebasis; | ||
+ | CoordinateField *coordinatefield; | ||
+ | BasisVector *basisvector; | ||
+ | bool invariant() const { | ||
+ | return coordinatefield->direction == basisvector->direction; | ||
+ | } | ||
+ | }; | ||
+ | </pre> |
Revision as of 13:59, 6 October 2015
Contents
Thoughts on a rather generic AMR I/O file format and library
(That would be useful for the Einstein Toolkit and related projects)
General Thoughts
Uses
- generic file I/O, i.e. exporting/importing data
- checkpointing, recovery
- visualization
- post-processing
- long-term archival
Necessary Properties
- needs to be based on HDF5, or an equivalent portrable, widely-supported file format
- needs to support efficient parallel I/O (including quick discovery of file contents, quick reading of individual datasets)
- needs to be portable
- should be useful for similar projects, e.g. Enzo
- must support unigrid, AMR, multi-block, DGFE, staggered grids
- must be usable from C, C++, Fortran, Python, Mathematica
- should be implemented in a portable stand-alone library that can easily be used in various projects
- must remain accessible without such a library
Ideas
We introduce various concepts that abstract various properties. Data are described in terms of these concepts instead of using ad-hoc descriptions for datasets.
It seems useful that each concept has abstract properties and several concrete realizations.
Concepts:
- project (consisting of simulations)
- manifold (consisting of discretizations)
- coordinate systems (various types)
- tangent spaces (what is the connection to a coordinate system?)
- tensors (described via bases) (should bases be related to tangent spaces?)
- generic variable types (such as "floating"), implemented via concrete types (such as float32, float64)
- fields, as defined by the user
Names should be illustrative only and don't matter; only the connection between these concepts are relevant.
Prototype, written in a C++ style
// Note: Unless noted otherwise, all pointers are non-null // When translating to HDF5: // - structs become objects, usually groups // - simple struct fields (int, string) become attributes of the group // - pointers become links inside the group // - sets containing non-pointers become objects inside the group // - sets containing pointers become links inside a subgroup of the group // - vectors of simple types (int, string) become attributes // - other vectors become objects inside a subgroup the group, sorted // alphabetically // Tensor types struct TensorType { int dimension; int rank; set<TensorComponent> storedcomponents; bool invariant() const { return dimension >= 0 && rank >= 0 && storedcomponent.size() < pow(tensortype.dimension, tensortype.rank); } }; set<TensorType> tensortypes; struct TensorComponent { TensorType &tensortype; // We use objects to denote most concepts, but we make an exception // for tensor component indices and tangent space basis vectors, // which we number consecutively starting from zero. This simplifies // the representation, and it introduces a canonical order (e.g. x, // y, z) among the tangent space directions that people are // expecting. int storedcomponent; vector<int> indexvalues; bool invariant() const { bool inv = storedcomponent >= 0 && storedcomponent < tensortype.storedcomponents.size() && indexvalues.size() == tensortype->rank; for (int iv : indexvalues) inv &= iv >= 0 && iv < tensortype->dimension; return inv; } }; TensorType Scalar3D{"Scalar3D", 3, 0, {TensorComponent{Scalar3D, {}}}}; TensorType Vector3D{"Vector3D", 3, 1, {TensorComponent{Vector3D, {0}}, TensorComponent{Vector3D, {1}}, TensorComponent{Vector3D, {2}}}}; TensorType SymmetricTensor3D{"SymmetricTensor3D", 3, 2, {TensorComponent{SymmetricTensor3D, {0, 0}}, TensorComponent{SymmetricTensor3D, {0, 1}}, TensorComponent{SymmetricTensor3D, {0, 2}}, TensorComponent{SymmetricTensor3D, {1, 0}}, TensorComponent{SymmetricTensor3D, {1, 1}}, TensorComponent{SymmetricTensor3D, {2, 2}}}}; // High-level continuum concepts struct Manifold { int dimension; set<Discretization> discretizations; set<Field *> fields; bool invariant() const { return dimension >= 0; } }; set<Manifold> manifolds; struct TangentSpace { int dimension; set<Basis> bases; set<Field *> fields; bool invariant() const { return dimension >= 0; } }; set<TangentSpace> tangentspaces; struct Field { Manifold *manifold; TangentSpace *tangentspace; TensorType *tensortype; set<DiscreteField> discretefields; bool invariant() const { return tangentspace->dimension == tensortype->dimension; } }; set<Field> Fields; // Manifold discretizations struct Discretization { Manifold &manifold; set<DiscretizationBlock> discretizationblocks; bool invariant() const { return true; } }; struct DiscretizationBlock { // Discretization of a certain region, represented by contiguous data Discretization &discretization; // bounding box? in terms of coordinates? // connectivity? neighbouring blocks? // overlaps? bool invariant() const { return true; } }; // Tangent space bases struct Basis { TangentSpace &tangentspace; vector<BasisVector> basisvectors; set<CoordinateBasis *> coordinatebases; bool invariant() const { return basisvectors.size() == tangentspace->dimension; } }; struct BasisVector { Basis &basis; // Since a BasisVector denotes essentially only an integer, we // should be able to replace it by an integer. Not sure this is // worthwhile. This essentially only gives names to directions; could use // vector<string> in TangentSpace for this instead. int direction; set<CoordinateBasisElement *> coordinatebasiselements; bool invariant() const { return direction >= 0 && direction < basis->basisvectors.size() && basis->basisvectors[direction] == this; } }; // Discrete fields struct DiscreteField { Field &field; Discretization *discretization; Basis *basis; set<DiscreteFieldBlock> discretefieldblocks; bool invariant() const { return field->manifold == discretization->manifold && field->tangentspace == basis->tangentspace; } }; struct DiscreteFieldBlock { // Discrete field on a particular region (discretization block) DiscreteField &discretefield; DiscretizationBlock *discretizationblock; set<DiscreteFieldBlockData> discretefieldblockdata; bool invariant() const { return true; } }; struct DiscreteFieldBlockData { // Tensor component for a discrete field on a particular region DiscreteFieldBlock &discretefieldblock; TensorComponent *tensorcomponent; hid_t hdf5_dataset; bool invariant() const { return discretefieldblock.discretefield->field->tensortype == tensorcomponent->tensortype; } }; // Coordinates struct CoordinateSystem { Manifold *manifold; vector<CoordinateField> coordinatefields; set<CoordinateBasis *> coordinatebases; bool invariant() const { return true; } }; set<Coordinate> coordinates; struct CoordinateField { CoordinateSystem &coordinatesystem; int direction; Field *field; bool invariant() const { return direction >= 0 && direction < coordinatesystem->coordinatefields.size() && coordinatesystem->coordinatefields[direction] == this; } }; struct CoordinateBasis { CoordinateSystem *coordinatesystem; Basis *basis; vector<CoordinateBasisElement> coordinatebasiselements; }; struct CoordinateBasisElement { CoordinateBasis &coordinatebasis; CoordinateField *coordinatefield; BasisVector *basisvector; bool invariant() const { return coordinatefield->direction == basisvector->direction; } };