Difference between revisions of "A new I/O file format"

From Einstein Toolkit Documentation
Jump to: navigation, search
 
(Ideas)
Line 39: Line 39:
  
 
Names should be illustrative only and don't matter; only the connection between these concepts are relevant.
 
Names should be illustrative only and don't matter; only the connection between these concepts are relevant.
 +
 +
== Prototype, written in a C++ style ==
 +
 +
<pre>
 +
// Note: Unless noted otherwise, all pointers are non-null
 +
 +
// When translating to HDF5:
 +
// - structs become objects, usually groups
 +
// - simple struct fields (int, string) become attributes of the group
 +
// - pointers become links inside the group
 +
// - sets containing non-pointers become objects inside the group
 +
// - sets containing pointers become links inside a subgroup of the group
 +
// - vectors of simple types (int, string) become attributes
 +
// - other vectors become objects inside a subgroup the group, sorted
 +
//  alphabetically
 +
 +
// Tensor types
 +
 +
struct TensorType {
 +
  int dimension;
 +
  int rank;
 +
  set<TensorComponent> storedcomponents;
 +
  bool invariant() const {
 +
    return dimension >= 0 && rank >= 0 &&
 +
          storedcomponent.size() < pow(tensortype.dimension, tensortype.rank);
 +
  }
 +
};
 +
set<TensorType> tensortypes;
 +
 +
struct TensorComponent {
 +
  TensorType &tensortype;
 +
  // We use objects to denote most concepts, but we make an exception
 +
  // for tensor component indices and tangent space basis vectors,
 +
  // which we number consecutively starting from zero. This simplifies
 +
  // the representation, and it introduces a canonical order (e.g. x,
 +
  // y, z) among the tangent space directions that people are
 +
  // expecting.
 +
  int storedcomponent;
 +
  vector<int> indexvalues;
 +
  bool invariant() const {
 +
    bool inv = storedcomponent >= 0 &&
 +
              storedcomponent < tensortype.storedcomponents.size() &&
 +
              indexvalues.size() == tensortype->rank;
 +
    for (int iv : indexvalues)
 +
      inv &= iv >= 0 && iv < tensortype->dimension;
 +
    return inv;
 +
  }
 +
};
 +
 +
TensorType Scalar3D{"Scalar3D", 3, 0, {TensorComponent{Scalar3D, {}}}};
 +
 +
TensorType Vector3D{"Vector3D",
 +
                    3,
 +
                    1,
 +
                    {TensorComponent{Vector3D, {0}},
 +
                    TensorComponent{Vector3D, {1}},
 +
                    TensorComponent{Vector3D, {2}}}};
 +
 +
TensorType SymmetricTensor3D{"SymmetricTensor3D",
 +
                            3,
 +
                            2,
 +
                            {TensorComponent{SymmetricTensor3D, {0, 0}},
 +
                              TensorComponent{SymmetricTensor3D, {0, 1}},
 +
                              TensorComponent{SymmetricTensor3D, {0, 2}},
 +
                              TensorComponent{SymmetricTensor3D, {1, 0}},
 +
                              TensorComponent{SymmetricTensor3D, {1, 1}},
 +
                              TensorComponent{SymmetricTensor3D, {2, 2}}}};
 +
 +
// High-level continuum concepts
 +
 +
struct Manifold {
 +
  int dimension;
 +
  set<Discretization> discretizations;
 +
  set<Field *> fields;
 +
  bool invariant() const { return dimension >= 0; }
 +
};
 +
set<Manifold> manifolds;
 +
 +
struct TangentSpace {
 +
  int dimension;
 +
  set<Basis> bases;
 +
  set<Field *> fields;
 +
  bool invariant() const { return dimension >= 0; }
 +
};
 +
set<TangentSpace> tangentspaces;
 +
 +
struct Field {
 +
  Manifold *manifold;
 +
  TangentSpace *tangentspace;
 +
  TensorType *tensortype;
 +
  set<DiscreteField> discretefields;
 +
  bool invariant() const {
 +
    return tangentspace->dimension == tensortype->dimension;
 +
  }
 +
};
 +
set<Field> Fields;
 +
 +
// Manifold discretizations
 +
 +
struct Discretization {
 +
  Manifold &manifold;
 +
  set<DiscretizationBlock> discretizationblocks;
 +
  bool invariant() const { return true; }
 +
};
 +
 +
struct DiscretizationBlock {
 +
  // Discretization of a certain region, represented by contiguous data
 +
  Discretization &discretization;
 +
  // bounding box? in terms of coordinates?
 +
  // connectivity? neighbouring blocks?
 +
  // overlaps?
 +
  bool invariant() const { return true; }
 +
};
 +
 +
// Tangent space bases
 +
 +
struct Basis {
 +
  TangentSpace &tangentspace;
 +
  vector<BasisVector> basisvectors;
 +
  set<CoordinateBasis *> coordinatebases;
 +
  bool invariant() const {
 +
    return basisvectors.size() == tangentspace->dimension;
 +
  }
 +
};
 +
 +
struct BasisVector {
 +
  Basis &basis;
 +
  // Since a BasisVector denotes essentially only an integer, we
 +
  // should be able to replace it by an integer. Not sure this is
 +
  // worthwhile. This essentially only gives names to directions; could use
 +
  // vector<string> in TangentSpace for this instead.
 +
  int direction;
 +
  set<CoordinateBasisElement *> coordinatebasiselements;
 +
  bool invariant() const {
 +
    return direction >= 0 && direction < basis->basisvectors.size() &&
 +
          basis->basisvectors[direction] == this;
 +
  }
 +
};
 +
 +
// Discrete fields
 +
 +
struct DiscreteField {
 +
  Field &field;
 +
  Discretization *discretization;
 +
  Basis *basis;
 +
  set<DiscreteFieldBlock> discretefieldblocks;
 +
  bool invariant() const {
 +
    return field->manifold == discretization->manifold &&
 +
          field->tangentspace == basis->tangentspace;
 +
  }
 +
};
 +
 +
struct DiscreteFieldBlock {
 +
  // Discrete field on a particular region (discretization block)
 +
  DiscreteField &discretefield;
 +
  DiscretizationBlock *discretizationblock;
 +
  set<DiscreteFieldBlockData> discretefieldblockdata;
 +
  bool invariant() const { return true; }
 +
};
 +
 +
struct DiscreteFieldBlockData {
 +
  // Tensor component for a discrete field on a particular region
 +
  DiscreteFieldBlock &discretefieldblock;
 +
  TensorComponent *tensorcomponent;
 +
  hid_t hdf5_dataset;
 +
  bool invariant() const {
 +
    return discretefieldblock.discretefield->field->tensortype ==
 +
          tensorcomponent->tensortype;
 +
  }
 +
};
 +
 +
// Coordinates
 +
 +
struct CoordinateSystem {
 +
  Manifold *manifold;
 +
  vector<CoordinateField> coordinatefields;
 +
  set<CoordinateBasis *> coordinatebases;
 +
  bool invariant() const { return true; }
 +
};
 +
set<Coordinate> coordinates;
 +
 +
struct CoordinateField {
 +
  CoordinateSystem &coordinatesystem;
 +
  int direction;
 +
  Field *field;
 +
  bool invariant() const {
 +
    return direction >= 0 &&
 +
          direction < coordinatesystem->coordinatefields.size() &&
 +
          coordinatesystem->coordinatefields[direction] == this;
 +
  }
 +
};
 +
 +
struct CoordinateBasis {
 +
  CoordinateSystem *coordinatesystem;
 +
  Basis *basis;
 +
  vector<CoordinateBasisElement> coordinatebasiselements;
 +
};
 +
 +
struct CoordinateBasisElement {
 +
  CoordinateBasis &coordinatebasis;
 +
  CoordinateField *coordinatefield;
 +
  BasisVector *basisvector;
 +
  bool invariant() const {
 +
    return coordinatefield->direction == basisvector->direction;
 +
  }
 +
};
 +
</pre>

Revision as of 13:59, 6 October 2015

Thoughts on a rather generic AMR I/O file format and library

(That would be useful for the Einstein Toolkit and related projects)

General Thoughts

Uses

  • generic file I/O, i.e. exporting/importing data
  • checkpointing, recovery
  • visualization
  • post-processing
  • long-term archival

Necessary Properties

  • needs to be based on HDF5, or an equivalent portrable, widely-supported file format
  • needs to support efficient parallel I/O (including quick discovery of file contents, quick reading of individual datasets)
  • needs to be portable
  • should be useful for similar projects, e.g. Enzo
  • must support unigrid, AMR, multi-block, DGFE, staggered grids
  • must be usable from C, C++, Fortran, Python, Mathematica
  • should be implemented in a portable stand-alone library that can easily be used in various projects
  • must remain accessible without such a library

Ideas

We introduce various concepts that abstract various properties. Data are described in terms of these concepts instead of using ad-hoc descriptions for datasets.

It seems useful that each concept has abstract properties and several concrete realizations.

Concepts:

  • project (consisting of simulations)
  • manifold (consisting of discretizations)
  • coordinate systems (various types)
  • tangent spaces (what is the connection to a coordinate system?)
  • tensors (described via bases) (should bases be related to tangent spaces?)
  • generic variable types (such as "floating"), implemented via concrete types (such as float32, float64)
  • fields, as defined by the user

Names should be illustrative only and don't matter; only the connection between these concepts are relevant.

Prototype, written in a C++ style

// Note: Unless noted otherwise, all pointers are non-null

// When translating to HDF5:
// - structs become objects, usually groups
// - simple struct fields (int, string) become attributes of the group
// - pointers become links inside the group
// - sets containing non-pointers become objects inside the group
// - sets containing pointers become links inside a subgroup of the group
// - vectors of simple types (int, string) become attributes
// - other vectors become objects inside a subgroup the group, sorted
//   alphabetically

// Tensor types

struct TensorType {
  int dimension;
  int rank;
  set<TensorComponent> storedcomponents;
  bool invariant() const {
    return dimension >= 0 && rank >= 0 &&
           storedcomponent.size() < pow(tensortype.dimension, tensortype.rank);
  }
};
set<TensorType> tensortypes;

struct TensorComponent {
  TensorType &tensortype;
  // We use objects to denote most concepts, but we make an exception
  // for tensor component indices and tangent space basis vectors,
  // which we number consecutively starting from zero. This simplifies
  // the representation, and it introduces a canonical order (e.g. x,
  // y, z) among the tangent space directions that people are
  // expecting.
  int storedcomponent;
  vector<int> indexvalues;
  bool invariant() const {
    bool inv = storedcomponent >= 0 &&
               storedcomponent < tensortype.storedcomponents.size() &&
               indexvalues.size() == tensortype->rank;
    for (int iv : indexvalues)
      inv &= iv >= 0 && iv < tensortype->dimension;
    return inv;
  }
};

TensorType Scalar3D{"Scalar3D", 3, 0, {TensorComponent{Scalar3D, {}}}};

TensorType Vector3D{"Vector3D",
                    3,
                    1,
                    {TensorComponent{Vector3D, {0}},
                     TensorComponent{Vector3D, {1}},
                     TensorComponent{Vector3D, {2}}}};

TensorType SymmetricTensor3D{"SymmetricTensor3D",
                             3,
                             2,
                             {TensorComponent{SymmetricTensor3D, {0, 0}},
                              TensorComponent{SymmetricTensor3D, {0, 1}},
                              TensorComponent{SymmetricTensor3D, {0, 2}},
                              TensorComponent{SymmetricTensor3D, {1, 0}},
                              TensorComponent{SymmetricTensor3D, {1, 1}},
                              TensorComponent{SymmetricTensor3D, {2, 2}}}};

// High-level continuum concepts

struct Manifold {
  int dimension;
  set<Discretization> discretizations;
  set<Field *> fields;
  bool invariant() const { return dimension >= 0; }
};
set<Manifold> manifolds;

struct TangentSpace {
  int dimension;
  set<Basis> bases;
  set<Field *> fields;
  bool invariant() const { return dimension >= 0; }
};
set<TangentSpace> tangentspaces;

struct Field {
  Manifold *manifold;
  TangentSpace *tangentspace;
  TensorType *tensortype;
  set<DiscreteField> discretefields;
  bool invariant() const {
    return tangentspace->dimension == tensortype->dimension;
  }
};
set<Field> Fields;

// Manifold discretizations

struct Discretization {
  Manifold &manifold;
  set<DiscretizationBlock> discretizationblocks;
  bool invariant() const { return true; }
};

struct DiscretizationBlock {
  // Discretization of a certain region, represented by contiguous data
  Discretization &discretization;
  // bounding box? in terms of coordinates?
  // connectivity? neighbouring blocks?
  // overlaps?
  bool invariant() const { return true; }
};

// Tangent space bases

struct Basis {
  TangentSpace &tangentspace;
  vector<BasisVector> basisvectors;
  set<CoordinateBasis *> coordinatebases;
  bool invariant() const {
    return basisvectors.size() == tangentspace->dimension;
  }
};

struct BasisVector {
  Basis &basis;
  // Since a BasisVector denotes essentially only an integer, we
  // should be able to replace it by an integer. Not sure this is
  // worthwhile. This essentially only gives names to directions; could use
  // vector<string> in TangentSpace for this instead.
  int direction;
  set<CoordinateBasisElement *> coordinatebasiselements;
  bool invariant() const {
    return direction >= 0 && direction < basis->basisvectors.size() &&
           basis->basisvectors[direction] == this;
  }
};

// Discrete fields

struct DiscreteField {
  Field &field;
  Discretization *discretization;
  Basis *basis;
  set<DiscreteFieldBlock> discretefieldblocks;
  bool invariant() const {
    return field->manifold == discretization->manifold &&
           field->tangentspace == basis->tangentspace;
  }
};

struct DiscreteFieldBlock {
  // Discrete field on a particular region (discretization block)
  DiscreteField &discretefield;
  DiscretizationBlock *discretizationblock;
  set<DiscreteFieldBlockData> discretefieldblockdata;
  bool invariant() const { return true; }
};

struct DiscreteFieldBlockData {
  // Tensor component for a discrete field on a particular region
  DiscreteFieldBlock &discretefieldblock;
  TensorComponent *tensorcomponent;
  hid_t hdf5_dataset;
  bool invariant() const {
    return discretefieldblock.discretefield->field->tensortype ==
           tensorcomponent->tensortype;
  }
};

// Coordinates

struct CoordinateSystem {
  Manifold *manifold;
  vector<CoordinateField> coordinatefields;
  set<CoordinateBasis *> coordinatebases;
  bool invariant() const { return true; }
};
set<Coordinate> coordinates;

struct CoordinateField {
  CoordinateSystem &coordinatesystem;
  int direction;
  Field *field;
  bool invariant() const {
    return direction >= 0 &&
           direction < coordinatesystem->coordinatefields.size() &&
           coordinatesystem->coordinatefields[direction] == this;
  }
};

struct CoordinateBasis {
  CoordinateSystem *coordinatesystem;
  Basis *basis;
  vector<CoordinateBasisElement> coordinatebasiselements;
};

struct CoordinateBasisElement {
  CoordinateBasis &coordinatebasis;
  CoordinateField *coordinatefield;
  BasisVector *basisvector;
  bool invariant() const {
    return coordinatefield->direction == basisvector->direction;
  }
};