Difference between revisions of "A new I/O file format"
(→Ideas) |
|||
| Line 39: | Line 39: | ||
Names should be illustrative only and don't matter; only the connection between these concepts are relevant. | Names should be illustrative only and don't matter; only the connection between these concepts are relevant. | ||
| + | |||
| + | == Prototype, written in a C++ style == | ||
| + | |||
| + | <pre> | ||
| + | // Note: Unless noted otherwise, all pointers are non-null | ||
| + | |||
| + | // When translating to HDF5: | ||
| + | // - structs become objects, usually groups | ||
| + | // - simple struct fields (int, string) become attributes of the group | ||
| + | // - pointers become links inside the group | ||
| + | // - sets containing non-pointers become objects inside the group | ||
| + | // - sets containing pointers become links inside a subgroup of the group | ||
| + | // - vectors of simple types (int, string) become attributes | ||
| + | // - other vectors become objects inside a subgroup the group, sorted | ||
| + | // alphabetically | ||
| + | |||
| + | // Tensor types | ||
| + | |||
| + | struct TensorType { | ||
| + | int dimension; | ||
| + | int rank; | ||
| + | set<TensorComponent> storedcomponents; | ||
| + | bool invariant() const { | ||
| + | return dimension >= 0 && rank >= 0 && | ||
| + | storedcomponent.size() < pow(tensortype.dimension, tensortype.rank); | ||
| + | } | ||
| + | }; | ||
| + | set<TensorType> tensortypes; | ||
| + | |||
| + | struct TensorComponent { | ||
| + | TensorType &tensortype; | ||
| + | // We use objects to denote most concepts, but we make an exception | ||
| + | // for tensor component indices and tangent space basis vectors, | ||
| + | // which we number consecutively starting from zero. This simplifies | ||
| + | // the representation, and it introduces a canonical order (e.g. x, | ||
| + | // y, z) among the tangent space directions that people are | ||
| + | // expecting. | ||
| + | int storedcomponent; | ||
| + | vector<int> indexvalues; | ||
| + | bool invariant() const { | ||
| + | bool inv = storedcomponent >= 0 && | ||
| + | storedcomponent < tensortype.storedcomponents.size() && | ||
| + | indexvalues.size() == tensortype->rank; | ||
| + | for (int iv : indexvalues) | ||
| + | inv &= iv >= 0 && iv < tensortype->dimension; | ||
| + | return inv; | ||
| + | } | ||
| + | }; | ||
| + | |||
| + | TensorType Scalar3D{"Scalar3D", 3, 0, {TensorComponent{Scalar3D, {}}}}; | ||
| + | |||
| + | TensorType Vector3D{"Vector3D", | ||
| + | 3, | ||
| + | 1, | ||
| + | {TensorComponent{Vector3D, {0}}, | ||
| + | TensorComponent{Vector3D, {1}}, | ||
| + | TensorComponent{Vector3D, {2}}}}; | ||
| + | |||
| + | TensorType SymmetricTensor3D{"SymmetricTensor3D", | ||
| + | 3, | ||
| + | 2, | ||
| + | {TensorComponent{SymmetricTensor3D, {0, 0}}, | ||
| + | TensorComponent{SymmetricTensor3D, {0, 1}}, | ||
| + | TensorComponent{SymmetricTensor3D, {0, 2}}, | ||
| + | TensorComponent{SymmetricTensor3D, {1, 0}}, | ||
| + | TensorComponent{SymmetricTensor3D, {1, 1}}, | ||
| + | TensorComponent{SymmetricTensor3D, {2, 2}}}}; | ||
| + | |||
| + | // High-level continuum concepts | ||
| + | |||
| + | struct Manifold { | ||
| + | int dimension; | ||
| + | set<Discretization> discretizations; | ||
| + | set<Field *> fields; | ||
| + | bool invariant() const { return dimension >= 0; } | ||
| + | }; | ||
| + | set<Manifold> manifolds; | ||
| + | |||
| + | struct TangentSpace { | ||
| + | int dimension; | ||
| + | set<Basis> bases; | ||
| + | set<Field *> fields; | ||
| + | bool invariant() const { return dimension >= 0; } | ||
| + | }; | ||
| + | set<TangentSpace> tangentspaces; | ||
| + | |||
| + | struct Field { | ||
| + | Manifold *manifold; | ||
| + | TangentSpace *tangentspace; | ||
| + | TensorType *tensortype; | ||
| + | set<DiscreteField> discretefields; | ||
| + | bool invariant() const { | ||
| + | return tangentspace->dimension == tensortype->dimension; | ||
| + | } | ||
| + | }; | ||
| + | set<Field> Fields; | ||
| + | |||
| + | // Manifold discretizations | ||
| + | |||
| + | struct Discretization { | ||
| + | Manifold &manifold; | ||
| + | set<DiscretizationBlock> discretizationblocks; | ||
| + | bool invariant() const { return true; } | ||
| + | }; | ||
| + | |||
| + | struct DiscretizationBlock { | ||
| + | // Discretization of a certain region, represented by contiguous data | ||
| + | Discretization &discretization; | ||
| + | // bounding box? in terms of coordinates? | ||
| + | // connectivity? neighbouring blocks? | ||
| + | // overlaps? | ||
| + | bool invariant() const { return true; } | ||
| + | }; | ||
| + | |||
| + | // Tangent space bases | ||
| + | |||
| + | struct Basis { | ||
| + | TangentSpace &tangentspace; | ||
| + | vector<BasisVector> basisvectors; | ||
| + | set<CoordinateBasis *> coordinatebases; | ||
| + | bool invariant() const { | ||
| + | return basisvectors.size() == tangentspace->dimension; | ||
| + | } | ||
| + | }; | ||
| + | |||
| + | struct BasisVector { | ||
| + | Basis &basis; | ||
| + | // Since a BasisVector denotes essentially only an integer, we | ||
| + | // should be able to replace it by an integer. Not sure this is | ||
| + | // worthwhile. This essentially only gives names to directions; could use | ||
| + | // vector<string> in TangentSpace for this instead. | ||
| + | int direction; | ||
| + | set<CoordinateBasisElement *> coordinatebasiselements; | ||
| + | bool invariant() const { | ||
| + | return direction >= 0 && direction < basis->basisvectors.size() && | ||
| + | basis->basisvectors[direction] == this; | ||
| + | } | ||
| + | }; | ||
| + | |||
| + | // Discrete fields | ||
| + | |||
| + | struct DiscreteField { | ||
| + | Field &field; | ||
| + | Discretization *discretization; | ||
| + | Basis *basis; | ||
| + | set<DiscreteFieldBlock> discretefieldblocks; | ||
| + | bool invariant() const { | ||
| + | return field->manifold == discretization->manifold && | ||
| + | field->tangentspace == basis->tangentspace; | ||
| + | } | ||
| + | }; | ||
| + | |||
| + | struct DiscreteFieldBlock { | ||
| + | // Discrete field on a particular region (discretization block) | ||
| + | DiscreteField &discretefield; | ||
| + | DiscretizationBlock *discretizationblock; | ||
| + | set<DiscreteFieldBlockData> discretefieldblockdata; | ||
| + | bool invariant() const { return true; } | ||
| + | }; | ||
| + | |||
| + | struct DiscreteFieldBlockData { | ||
| + | // Tensor component for a discrete field on a particular region | ||
| + | DiscreteFieldBlock &discretefieldblock; | ||
| + | TensorComponent *tensorcomponent; | ||
| + | hid_t hdf5_dataset; | ||
| + | bool invariant() const { | ||
| + | return discretefieldblock.discretefield->field->tensortype == | ||
| + | tensorcomponent->tensortype; | ||
| + | } | ||
| + | }; | ||
| + | |||
| + | // Coordinates | ||
| + | |||
| + | struct CoordinateSystem { | ||
| + | Manifold *manifold; | ||
| + | vector<CoordinateField> coordinatefields; | ||
| + | set<CoordinateBasis *> coordinatebases; | ||
| + | bool invariant() const { return true; } | ||
| + | }; | ||
| + | set<Coordinate> coordinates; | ||
| + | |||
| + | struct CoordinateField { | ||
| + | CoordinateSystem &coordinatesystem; | ||
| + | int direction; | ||
| + | Field *field; | ||
| + | bool invariant() const { | ||
| + | return direction >= 0 && | ||
| + | direction < coordinatesystem->coordinatefields.size() && | ||
| + | coordinatesystem->coordinatefields[direction] == this; | ||
| + | } | ||
| + | }; | ||
| + | |||
| + | struct CoordinateBasis { | ||
| + | CoordinateSystem *coordinatesystem; | ||
| + | Basis *basis; | ||
| + | vector<CoordinateBasisElement> coordinatebasiselements; | ||
| + | }; | ||
| + | |||
| + | struct CoordinateBasisElement { | ||
| + | CoordinateBasis &coordinatebasis; | ||
| + | CoordinateField *coordinatefield; | ||
| + | BasisVector *basisvector; | ||
| + | bool invariant() const { | ||
| + | return coordinatefield->direction == basisvector->direction; | ||
| + | } | ||
| + | }; | ||
| + | </pre> | ||
Revision as of 13:59, 6 October 2015
Contents
Thoughts on a rather generic AMR I/O file format and library
(That would be useful for the Einstein Toolkit and related projects)
General Thoughts
Uses
- generic file I/O, i.e. exporting/importing data
- checkpointing, recovery
- visualization
- post-processing
- long-term archival
Necessary Properties
- needs to be based on HDF5, or an equivalent portrable, widely-supported file format
- needs to support efficient parallel I/O (including quick discovery of file contents, quick reading of individual datasets)
- needs to be portable
- should be useful for similar projects, e.g. Enzo
- must support unigrid, AMR, multi-block, DGFE, staggered grids
- must be usable from C, C++, Fortran, Python, Mathematica
- should be implemented in a portable stand-alone library that can easily be used in various projects
- must remain accessible without such a library
Ideas
We introduce various concepts that abstract various properties. Data are described in terms of these concepts instead of using ad-hoc descriptions for datasets.
It seems useful that each concept has abstract properties and several concrete realizations.
Concepts:
- project (consisting of simulations)
- manifold (consisting of discretizations)
- coordinate systems (various types)
- tangent spaces (what is the connection to a coordinate system?)
- tensors (described via bases) (should bases be related to tangent spaces?)
- generic variable types (such as "floating"), implemented via concrete types (such as float32, float64)
- fields, as defined by the user
Names should be illustrative only and don't matter; only the connection between these concepts are relevant.
Prototype, written in a C++ style
// Note: Unless noted otherwise, all pointers are non-null
// When translating to HDF5:
// - structs become objects, usually groups
// - simple struct fields (int, string) become attributes of the group
// - pointers become links inside the group
// - sets containing non-pointers become objects inside the group
// - sets containing pointers become links inside a subgroup of the group
// - vectors of simple types (int, string) become attributes
// - other vectors become objects inside a subgroup the group, sorted
// alphabetically
// Tensor types
struct TensorType {
int dimension;
int rank;
set<TensorComponent> storedcomponents;
bool invariant() const {
return dimension >= 0 && rank >= 0 &&
storedcomponent.size() < pow(tensortype.dimension, tensortype.rank);
}
};
set<TensorType> tensortypes;
struct TensorComponent {
TensorType &tensortype;
// We use objects to denote most concepts, but we make an exception
// for tensor component indices and tangent space basis vectors,
// which we number consecutively starting from zero. This simplifies
// the representation, and it introduces a canonical order (e.g. x,
// y, z) among the tangent space directions that people are
// expecting.
int storedcomponent;
vector<int> indexvalues;
bool invariant() const {
bool inv = storedcomponent >= 0 &&
storedcomponent < tensortype.storedcomponents.size() &&
indexvalues.size() == tensortype->rank;
for (int iv : indexvalues)
inv &= iv >= 0 && iv < tensortype->dimension;
return inv;
}
};
TensorType Scalar3D{"Scalar3D", 3, 0, {TensorComponent{Scalar3D, {}}}};
TensorType Vector3D{"Vector3D",
3,
1,
{TensorComponent{Vector3D, {0}},
TensorComponent{Vector3D, {1}},
TensorComponent{Vector3D, {2}}}};
TensorType SymmetricTensor3D{"SymmetricTensor3D",
3,
2,
{TensorComponent{SymmetricTensor3D, {0, 0}},
TensorComponent{SymmetricTensor3D, {0, 1}},
TensorComponent{SymmetricTensor3D, {0, 2}},
TensorComponent{SymmetricTensor3D, {1, 0}},
TensorComponent{SymmetricTensor3D, {1, 1}},
TensorComponent{SymmetricTensor3D, {2, 2}}}};
// High-level continuum concepts
struct Manifold {
int dimension;
set<Discretization> discretizations;
set<Field *> fields;
bool invariant() const { return dimension >= 0; }
};
set<Manifold> manifolds;
struct TangentSpace {
int dimension;
set<Basis> bases;
set<Field *> fields;
bool invariant() const { return dimension >= 0; }
};
set<TangentSpace> tangentspaces;
struct Field {
Manifold *manifold;
TangentSpace *tangentspace;
TensorType *tensortype;
set<DiscreteField> discretefields;
bool invariant() const {
return tangentspace->dimension == tensortype->dimension;
}
};
set<Field> Fields;
// Manifold discretizations
struct Discretization {
Manifold &manifold;
set<DiscretizationBlock> discretizationblocks;
bool invariant() const { return true; }
};
struct DiscretizationBlock {
// Discretization of a certain region, represented by contiguous data
Discretization &discretization;
// bounding box? in terms of coordinates?
// connectivity? neighbouring blocks?
// overlaps?
bool invariant() const { return true; }
};
// Tangent space bases
struct Basis {
TangentSpace &tangentspace;
vector<BasisVector> basisvectors;
set<CoordinateBasis *> coordinatebases;
bool invariant() const {
return basisvectors.size() == tangentspace->dimension;
}
};
struct BasisVector {
Basis &basis;
// Since a BasisVector denotes essentially only an integer, we
// should be able to replace it by an integer. Not sure this is
// worthwhile. This essentially only gives names to directions; could use
// vector<string> in TangentSpace for this instead.
int direction;
set<CoordinateBasisElement *> coordinatebasiselements;
bool invariant() const {
return direction >= 0 && direction < basis->basisvectors.size() &&
basis->basisvectors[direction] == this;
}
};
// Discrete fields
struct DiscreteField {
Field &field;
Discretization *discretization;
Basis *basis;
set<DiscreteFieldBlock> discretefieldblocks;
bool invariant() const {
return field->manifold == discretization->manifold &&
field->tangentspace == basis->tangentspace;
}
};
struct DiscreteFieldBlock {
// Discrete field on a particular region (discretization block)
DiscreteField &discretefield;
DiscretizationBlock *discretizationblock;
set<DiscreteFieldBlockData> discretefieldblockdata;
bool invariant() const { return true; }
};
struct DiscreteFieldBlockData {
// Tensor component for a discrete field on a particular region
DiscreteFieldBlock &discretefieldblock;
TensorComponent *tensorcomponent;
hid_t hdf5_dataset;
bool invariant() const {
return discretefieldblock.discretefield->field->tensortype ==
tensorcomponent->tensortype;
}
};
// Coordinates
struct CoordinateSystem {
Manifold *manifold;
vector<CoordinateField> coordinatefields;
set<CoordinateBasis *> coordinatebases;
bool invariant() const { return true; }
};
set<Coordinate> coordinates;
struct CoordinateField {
CoordinateSystem &coordinatesystem;
int direction;
Field *field;
bool invariant() const {
return direction >= 0 &&
direction < coordinatesystem->coordinatefields.size() &&
coordinatesystem->coordinatefields[direction] == this;
}
};
struct CoordinateBasis {
CoordinateSystem *coordinatesystem;
Basis *basis;
vector<CoordinateBasisElement> coordinatebasiselements;
};
struct CoordinateBasisElement {
CoordinateBasis &coordinatebasis;
CoordinateField *coordinatefield;
BasisVector *basisvector;
bool invariant() const {
return coordinatefield->direction == basisvector->direction;
}
};