openMSX
serialize.hh
Go to the documentation of this file.
1 #ifndef SERIALIZE_HH
2 #define SERIALIZE_HH
3 
4 #include "serialize_core.hh"
5 #include "SerializeBuffer.hh"
6 #include "XMLElement.hh"
7 #include "TypeInfo.hh"
8 #include "StringOp.hh"
9 #include "inline.hh"
10 #include "unreachable.hh"
11 #include <zlib.h>
12 #include <string>
13 #include <type_traits>
14 #include <cstring>
15 #include <vector>
16 #include <map>
17 #include <sstream>
18 #include <cassert>
19 #include <memory>
20 
21 namespace openmsx {
22 
23 template<typename T> class MemBuffer;
24 template<typename T> struct SerializeClassVersion;
25 
26 // In this section, the archive classes are defined.
27 //
28 // Archives can be categorized in two ways:
29 // - backing stream they use
30 // - input or output (each backing stream has exactly one input and one
31 // output variant)
32 //
33 // ATM these backing streams implemented:
34 // - Mem
35 // Stores stream in memory. Is meant to be very compact and very fast.
36 // It does not support versioning (it's not possible to load this stream
37 // in a newer version of openMSX). It is also not platform independent
38 // (e.g. integers are stored using native platform endianess).
39 // The main use case for this archive format is regular in memory
40 // snapshots, for example to support replay/rewind.
41 // - XML
42 // Stores the stream in a XML file. These files are meant to be portable
43 // to different architectures (e.g. little/big endian, 32/64 bit system).
44 // There is version information in the stream, so it should be possible
45 // to load streams created with older openMSX versions a newer openMSX.
46 // The XML files are meant to be human readable. Having editable XML files
47 // is not a design goal (e.g. simply changing a value will probably work,
48 // but swapping the position of two tag or adding or removing tags can
49 // easily break the stream).
50 // - Text
51 // This stores to stream in a flat ascii file (one item per line). This
52 // format is only written as a proof-of-concept to test the design. It's
53 // not meant to be used in practice.
54 //
55 // Archive code is heavily templatized. It uses the CRTP (curiously recuring
56 // template pattern ; a base class templatized on it's derived class). To
57 // implement static polymorphism. This means there is practically no run-time
58 // overhead of using this mechansim compared to 6 seperatly handcoded functions
59 // (Mem/XML/Text x input/output).
60 // TODO At least in theory, still need to verify this in practice.
61 // Though my experience is that gcc is generally very good in this.
62 
63 template<typename Derived> class ArchiveBase
64 {
65 public:
74  template<typename Base, typename T>
75  void serializeBase(T& t)
76  {
77  const char* tag = BaseClassName<Base>::getName();
78  self().serialize(tag, static_cast<Base&>(t));
79  }
80 
97  template<typename Base, typename T>
98  void serializeInlinedBase(T& t, unsigned version)
99  {
100  ::openmsx::serialize(self(), static_cast<Base&>(t), version);
101  }
102 
103  // Each concrete archive class also has the following methods:
104  // Because of the implementation with static polymorphism, this
105  // interface is not explictly visible in the base class.
106  //
107  //
108  // template<typename T> void serializeWithID(const char* tag, const T& t, ...)
109  //
110  // This is _the_most_important_ method of the serialization
111  // framework. Depending on the concrete archive type (loader/saver)
112  // this method will load or save the given type 't'. In case of an XML
113  // archive the 'tag' parameter will be used as tagname.
114  //
115  // At the end there are still a number of optional parameters (in the
116  // current implementation there can be between 0 and 3, but can be
117  // extened when needed). These are 'global' constructor parameters,
118  // constructor parameters that are not stored in the stream, but that
119  // are needed to reconstruct the object (for example can be references
120  // to structures that were already stored in the stream). So these
121  // parameters are only actually used while loading.
122  // TODO document this in more detail in some section where the
123  // (polymorphic) constructors are also described.
124  //
125  //
126  // void serialize_blob(const char* tag, const void* data, size_t len)
127  //
128  // Serialize the given data as a binary blob.
129  // This cannot be part of the serialize() method above because we
130  // cannot know whether a byte-array should be serialized as a blob
131  // or as a collection of bytes (IOW we cannot decide it based on the
132  // type).
133  //
134  //
135  // template<typename T> void serialize(const char* tag, const T& t)
136  //
137  // This is much like the serializeWithID() method above, but it doesn't
138  // store an ID with this element. This means that it's not possible,
139  // later on in the stream, to refer to this element. For many elements
140  // you know this will not happen. This method results in a slightly
141  // more compact stream.
142  //
143  // Note that for primitive types we already don't store an ID, because
144  // pointers to primitive types are not supported (at least not ATM).
145  //
146  //
147  // template<typename T> void serializePointerID(const char* tag, const T& t)
148  //
149  // Serialize a pointer by storing the ID of the object it points to.
150  // This only works if the object was already serialized. The only
151  // reason to use this method instead of the more general serialize()
152  // method is that this one does not instantiate the object
153  // construction code. (So in some cases you can avoid having to
154  // provide specializations of SerializeConstructorArgs.)
155  //
156  //
157  // template<typename T> void serializePolymorphic(const char* tag, const T& t)
158  //
159  // Serialize a value-type whose concrete type is not yet known at
160  // compile-time (polymorphic pointers are already handled by the
161  // generic serialize() method).
162  //
163  // The difference between pointer and value-types is that for
164  // pointers, the de-serialize code also needs to construct the
165  // object, while for value-types, the object (with the correct
166  // concrete type) is already constructed, it only needs to be
167  // initialized.
168  //
169  //
170  // bool versionAtLeast(unsigned actual, unsigned required) const
171  // bool versionBelow (unsigned actual, unsigned required) const
172  //
173  // Checks whether the actual version is respective 'bigger or equal'
174  // or 'strictly lower' than the required version. So in fact these are
175  // equivalent to respectively:
176  // return actual >= required;
177  // return actual < required;
178  // Note that these two methods are the exact opposite of each other.
179  // Though for memory-archives and output-archives we know that the
180  // actual version is always equal to the latest class version and the
181  // required version can never be bigger than this latest version, so
182  // in these cases the methods can be optimized to respectively:
183  // return true;
184  // return false;
185  // By using these methods instead of direct comparisons, the compiler
186  // is able to perform more dead-code-elimination.
187 
188 /*internal*/
189  // These must be public for technical reasons, but they should only
190  // be used by the serialization framework.
191 
193  bool needVersion() const { return true; }
194 
198  bool translateEnumToString() const { return false; }
199 
206  template<typename T> void attribute(const char* name, T& t)
207  {
208  self().serialize(name, t);
209  }
210  void attribute(const char* name, const char* value);
211 
217  bool canHaveOptionalAttributes() const { return false; }
218 
223  bool hasAttribute(const char* /*name*/)
224  {
225  UNREACHABLE; return false;
226  }
227 
231  bool findAttribute(const char* /*name*/, unsigned& /*value*/)
232  {
233  UNREACHABLE; return false;
234  }
235 
244  bool canCountChildren() const { return false; }
245 
250  int countChildren() const
251  {
252  UNREACHABLE; return 0;
253  }
254 
263  void beginTag(const char* /*tag*/)
264  {
265  // nothing
266  }
273  void endTag(const char* /*tag*/)
274  {
275  // nothing
276  }
277 
278  // These (internal) methods should be implemented in the concrete
279  // archive classes.
280  //
281  // template<typename T> void save(const T& t)
282  //
283  // Should only be implemented for OuputArchives. Is called to
284  // store primitive types in the stream. In the end all structures
285  // are broken down to primitive types, so all data that ends up
286  // in the stream passes via this method (ok, depending on how
287  // attribute() and serialize_blob() is implemented, that data may
288  // not pass via save()).
289  //
290  // Often this method will be overloaded to handle certain types in a
291  // specific way.
292  //
293  //
294  // template<typename T> void load(T& t)
295  //
296  // Should only be implemented for InputArchives. This is similar (but
297  // opposite) to the save() method above. Loading of primitive types
298  // is done via this method.
299 
300  // void beginSection()
301  // void endSection()
302  // void skipSection(bool skip)
303  // The methods beginSection() and endSection() can only be used in
304  // output archives. These mark the location of a section that can
305  // later be skipped during loading.
306  // The method skipSection() can only be used in input archives. It
307  // optionally skips a section that was marked during saving.
308  // For every beginSection() call in the output, there must be a
309  // corresponding skipSection() call in the input (even if you don't
310  // actually want to skip the section).
311 
312 protected:
316  inline Derived& self()
317  {
318  return static_cast<Derived&>(*this);
319  }
320 };
321 
322 // The part of OutputArchiveBase that doesn't depend on the template parameter
324 {
325 public:
326  inline bool isLoader() const { return false; }
327  inline bool versionAtLeast(unsigned /*actual*/, unsigned /*required*/) const
328  {
329  return true;
330  }
331  inline bool versionBelow(unsigned /*actual*/, unsigned /*required*/) const
332  {
333  return false;
334  }
335 
336  void skipSection(bool /*skip*/)
337  {
338  UNREACHABLE;
339  }
340 
341 /*internal*/
342  #ifdef linux
343  // This routine is not portable, for example it breaks in
344  // windows (mingw) because there the location of the stack
345  // is _below_ the heap.
346  // But this is anyway only used to check assertions. So for now
347  // only do that in linux.
348  static NEVER_INLINE bool addressOnStack(const void* p)
349  {
350  // This is not portable, it assumes:
351  // - stack grows downwards
352  // - heap is at lower address than stack
353  // Also in c++ comparison between pointers is only defined when
354  // the two pointers point to objects in the same array.
355  int dummy;
356  return &dummy < p;
357  }
358  #endif
359 
360  // Generate a new ID for the given pointer and store this association
361  // for later (see getId()).
362  template<typename T> unsigned generateId(const T* p)
363  {
364  // For composed structures, for example
365  // struct A { ... };
366  // struct B { A a; ... };
367  // The pointer to the outer and inner structure can be the
368  // same while we still want a different ID to refer to these
369  // two. That's why we use a std::pair<void*, TypeInfo> as key
370  // in the map.
371  // For polymorphic types you do sometimes use a base pointer
372  // to refer to a subtype. So there we only use the pointer
373  // value as key in the map.
374  if (std::is_polymorphic<T>::value) {
375  return generateID1(p);
376  } else {
377  return generateID2(p, typeid(T));
378  }
379  }
380 
381  template<typename T> unsigned getId(const T* p)
382  {
383  if (std::is_polymorphic<T>::value) {
384  return getID1(p);
385  } else {
386  return getID2(p, typeid(T));
387  }
388  }
389 
390 protected:
392 
393 private:
394  unsigned generateID1(const void* p);
395  unsigned generateID2(const void* p, const std::type_info& typeInfo);
396  unsigned getID1(const void* p);
397  unsigned getID2(const void* p, const std::type_info& typeInfo);
398 
399  std::map<std::pair<const void*, TypeInfo>, unsigned> idMap;
400  std::map<const void*, unsigned> polyIdMap;
401  unsigned lastId;
402 };
403 
404 template<typename Derived>
405 class OutputArchiveBase : public ArchiveBase<Derived>, public OutputArchiveBase2
406 {
407 public:
408  template<typename Base, typename T>
409  void serializeInlinedBase(T& t, unsigned version)
410  {
411  // same implementation as base class, but with extra check
414  "base and derived must have same version when "
415  "using serializeInlinedBase()");
416  ArchiveBase<Derived>::template serializeInlinedBase<Base>(t, version);
417  }
418  // Main saver method. Heavy lifting is done in the Saver class.
419  template<typename T> void serializeWithID(const char* tag, const T& t)
420  {
421  this->self().beginTag(tag);
422  Saver<T> saver;
423  saver(this->self(), t, true);
424  this->self().endTag(tag);
425  }
426  // 3 methods below implement 'global constructor arguments'. Though
427  // the saver archives completely ignore those extra parameters. We
428  // anyway need to provide them because the same (templatized) code
429  // path is used both for saving and loading.
430  template<typename T, typename T1>
431  void serializeWithID(const char* tag, const T& t, T1 /*t1*/)
432  {
433  serializeWithID(tag, t);
434  }
435  template<typename T, typename T1, typename T2>
436  void serializeWithID(const char* tag, const T& t, T1 /*t1*/, T2 /*t2*/)
437  {
438  serializeWithID(tag, t);
439  }
440  template<typename T, typename T1, typename T2, typename T3>
441  void serializeWithID(const char* tag, const T& t, T1 /*t1*/, T2 /*t2*/, T3 /*t3*/)
442  {
443  serializeWithID(tag, t);
444  }
445 
446  // Default implementation is to base64-encode the blob and serialize
447  // the resulting string. But memory archives will memcpy the blob.
448  void serialize_blob(const char* tag, const void* data, size_t len);
449 
450  template<typename T> void serialize(const char* tag, const T& t)
451  {
452  this->self().beginTag(tag);
453  Saver<T> saver;
454  saver(this->self(), t, false);
455  this->self().endTag(tag);
456  }
457  template<typename T> void serializePointerID(const char* tag, const T& t)
458  {
459  this->self().beginTag(tag);
460  IDSaver<T> saver;
461  saver(this->self(), t);
462  this->self().endTag(tag);
463  }
464  template<typename T> void serializePolymorphic(const char* tag, const T& t)
465  {
466  static_assert(std::is_polymorphic<T>::value,
467  "must be a polymorphic type");
468  PolymorphicSaverRegistry<Derived>::save(tag, this->self(), t);
469  }
470 
471  // You shouldn't use this, it only exists for backwards compatibility
472  void serializeChar(const char* tag, char c)
473  {
474  this->self().beginTag(tag);
475  this->self().saveChar(c);
476  this->self().endTag(tag);
477  }
478 
479 protected:
481 };
482 
483 
484 // Part of InputArchiveBase that doesn't depend on the template parameter
486 {
487 public:
488  inline bool isLoader() const { return true; }
489 
491  {
492  UNREACHABLE;
493  }
494  void endSection()
495  {
496  UNREACHABLE;
497  }
498 
499 /*internal*/
500  void* getPointer(unsigned id);
501  void addPointer(unsigned id, const void* p);
502 
503  template<typename T> void resetSharedPtr(std::shared_ptr<T>& s, T* r)
504  {
505  if (!r) {
506  s.reset();
507  return;
508  }
509  auto it = sharedPtrMap.find(r);
510  if (it == sharedPtrMap.end()) {
511  s.reset(r);
512  sharedPtrMap[r] = s;
513  } else {
514  s = std::static_pointer_cast<T>(it->second);
515  }
516  }
517 
518 protected:
520 
521 private:
522  std::map<unsigned, void*> idMap;
523  std::map<void*, std::shared_ptr<void>> sharedPtrMap;
524 };
525 
526 template<typename Derived>
527 class InputArchiveBase : public ArchiveBase<Derived>, public InputArchiveBase2
528 {
529 public:
530  template<typename T>
531  void serializeWithID(const char* tag, T& t)
532  {
533  doSerialize(tag, t, std::tuple<>());
534  }
535  template<typename T, typename T1>
536  void serializeWithID(const char* tag, T& t, T1 t1)
537  {
538  doSerialize(tag, t, std::tuple<T1>(t1));
539  }
540  template<typename T, typename T1, typename T2>
541  void serializeWithID(const char* tag, T& t, T1 t1, T2 t2)
542  {
543  doSerialize(tag, t, std::tuple<T1, T2>(t1, t2));
544  }
545  template<typename T, typename T1, typename T2, typename T3>
546  void serializeWithID(const char* tag, T& t, T1 t1, T2 t2, T3 t3)
547  {
548  doSerialize(tag, t, std::tuple<T1, T2, T3>(t1, t2, t3));
549  }
550  void serialize_blob(const char* tag, void* data, size_t len);
551 
552  template<typename T>
553  void serialize(const char* tag, T& t)
554  {
555  this->self().beginTag(tag);
556  typedef typename std::remove_const<T>::type TNC;
557  auto& tnc = const_cast<TNC&>(t);
558  Loader<TNC> loader;
559  loader(this->self(), tnc, std::make_tuple(), -1); // don't load id
560  this->self().endTag(tag);
561  }
562  template<typename T> void serializePointerID(const char* tag, const T& t)
563  {
564  this->self().beginTag(tag);
565  typedef typename std::remove_const<T>::type TNC;
566  auto& tnc = const_cast<TNC&>(t);
567  IDLoader<TNC> loader;
568  loader(this->self(), tnc);
569  this->self().endTag(tag);
570  }
571  template<typename T> void serializePolymorphic(const char* tag, T& t)
572  {
573  static_assert(std::is_polymorphic<T>::value,
574  "must be a polymorphic type");
575  PolymorphicInitializerRegistry<Derived>::init(tag, this->self(), &t);
576  }
577 
578  // You shouldn't use this, it only exists for backwards compatibility
579  void serializeChar(const char* tag, char& c)
580  {
581  this->self().beginTag(tag);
582  this->self().loadChar(c);
583  this->self().endTag(tag);
584  }
585 
586 /*internal*/
587  // Actual loader method. Heavy lifting is done in the Loader class.
588  template<typename T, typename TUPLE>
589  void doSerialize(const char* tag, T& t, TUPLE args, int id = 0)
590  {
591  this->self().beginTag(tag);
592  typedef typename std::remove_const<T>::type TNC;
593  auto& tnc = const_cast<TNC&>(t);
594  Loader<TNC> loader;
595  loader(this->self(), tnc, args, id);
596  this->self().endTag(tag);
597  }
598 
599 protected:
601 };
602 
603 
604 class MemOutputArchive : public OutputArchiveBase<MemOutputArchive>
605 {
606 public:
608  {
609  }
610 
612  {
613  assert(openSections.empty());
614  }
615 
616  bool needVersion() const { return false; }
617 
618  template <typename T> void save(const T& t)
619  {
620  put(&t, sizeof(t));
621  }
622  inline void saveChar(char c)
623  {
624  save(c);
625  }
626  void save(const std::string& s);
627  void serialize_blob(const char*, const void* data, size_t len);
628 
630  {
631  size_t skip = 0; // filled in later
632  save(skip);
633  size_t beginPos = buffer.getPosition();
634  openSections.push_back(beginPos);
635  }
636  void endSection()
637  {
638  assert(!openSections.empty());
639  size_t endPos = buffer.getPosition();
640  size_t beginPos = openSections.back();
641  openSections.pop_back();
642  size_t skip = endPos - beginPos;
643  buffer.insertAt(beginPos - sizeof(skip),
644  &skip, sizeof(skip));
645  }
646 
648 
649 private:
650  void put(const void* data, size_t len)
651  {
652  if (len) {
653  buffer.insert(data, len);
654  }
655  }
656 
657  OutputBuffer buffer;
658  std::vector<size_t> openSections;
659 };
660 
661 class MemInputArchive : public InputArchiveBase<MemInputArchive>
662 {
663 public:
664  MemInputArchive(const byte* data, size_t size)
665  : buffer(data, size)
666  {
667  }
668 
669  bool needVersion() const { return false; }
670  inline bool versionAtLeast(unsigned /*actual*/, unsigned /*required*/) const
671  {
672  return true;
673  }
674  inline bool versionBelow(unsigned /*actual*/, unsigned /*required*/) const
675  {
676  return false;
677  }
678 
679  template<typename T> void load(T& t)
680  {
681  get(&t, sizeof(t));
682  }
683  inline void loadChar(char& c)
684  {
685  load(c);
686  }
687  void load(std::string& s);
688  void serialize_blob(const char*, void* data, size_t len);
689 
690  void skipSection(bool skip)
691  {
692  size_t num;
693  load(num);
694  if (skip) {
695  buffer.skip(num);
696  }
697  }
698 
699 private:
700  void get(void* data, size_t len)
701  {
702  if (len) {
703  buffer.read(data, len);
704  }
705  }
706 
707  InputBuffer buffer;
708 };
709 
711 
712 class XmlOutputArchive : public OutputArchiveBase<XmlOutputArchive>
713 {
714 public:
715  XmlOutputArchive(const std::string& filename);
717 
718  template <typename T> void saveImpl(const T& t)
719  {
720  // TODO make sure floating point is printed with enough digits
721  // maybe print as hex?
723  }
724  template <typename T> void save(const T& t)
725  {
726  saveImpl(t);
727  }
728  void saveChar(char c);
729  void save(const std::string& str);
730  void save(bool b);
731  void save(unsigned char b);
732  void save(signed char c);
733  void save(char c);
734  void save(int i); // these 3 are not strictly needed
735  void save(unsigned u); // but having them non-inline
736  void save(unsigned long long ull); // saves quite a bit of code
737 
738  void beginSection() { /*nothing*/ }
739  void endSection() { /*nothing*/ }
740 
741 //internal:
742  inline bool translateEnumToString() const { return true; }
743  inline bool canHaveOptionalAttributes() const { return true; }
744  inline bool canCountChildren() const { return true; }
745 
746  void beginTag(const char* tag);
747  void endTag(const char* tag);
748 
749  template<typename T> void attributeImpl(const char* name, const T& t)
750  {
751  attribute(name, StringOp::toString(t));
752  }
753  template<typename T> void attribute(const char* name, const T& t)
754  {
755  attributeImpl(name, t);
756  }
757  void attribute(const char* name, const std::string& str);
758  void attribute(const char* name, int i);
759  void attribute(const char* name, unsigned u);
760 
761 private:
762  gzFile file;
763  XMLElement root;
764  std::vector<XMLElement*> current;
765 };
766 
767 class XmlInputArchive : public InputArchiveBase<XmlInputArchive>
768 {
769 public:
770  XmlInputArchive(const std::string& filename);
771 
772  inline bool versionAtLeast(unsigned actual, unsigned required) const
773  {
774  return actual >= required;
775  }
776  inline bool versionBelow(unsigned actual, unsigned required) const
777  {
778  return actual < required;
779  }
780 
781  template<typename T> void load(T& t)
782  {
783  std::string str;
784  load(str);
785  std::istringstream is(str);
786  is >> t;
787  }
788  void loadChar(char& c);
789  void load(std::string& t);
790  void load(bool& b);
791  void load(unsigned char& b);
792  void load(signed char& c);
793  void load(char& c);
794  void load(int& i); // these 3 are not strictly needed
795  void load(unsigned& u); // but having them non-inline
796  void load(unsigned long long& ull); // saves quite a bit of code
797 
798  void skipSection(bool /*skip*/) { /*nothing*/ }
799 
800 //internal:
801  inline bool translateEnumToString() const { return true; }
802  inline bool canHaveOptionalAttributes() const { return true; }
803  inline bool canCountChildren() const { return true; }
804 
805  void beginTag(const char* tag);
806  void endTag(const char* tag);
807 
808  template<typename T> void attributeImpl(const char* name, T& t)
809  {
810  std::string str;
811  attribute(name, str);
812  std::istringstream is(str);
813  is >> t;
814  }
815  template<typename T> void attribute(const char* name, T& t)
816  {
817  attributeImpl(name, t);
818  }
819  void attribute(const char* name, std::string& t);
820  void attribute(const char* name, int& i);
821  void attribute(const char* name, unsigned& u);
822 
823  bool hasAttribute(const char* name);
824  bool findAttribute(const char* name, unsigned& value);
825  int countChildren() const;
826 
827 private:
828  XMLElement elem;
829  std::vector<std::pair<const XMLElement*, size_t>> elems;
830 };
831 
832 #define INSTANTIATE_SERIALIZE_METHODS(CLASS) \
833 template void CLASS::serialize(MemInputArchive&, unsigned); \
834 template void CLASS::serialize(MemOutputArchive&, unsigned); \
835 template void CLASS::serialize(XmlInputArchive&, unsigned); \
836 template void CLASS::serialize(XmlOutputArchive&, unsigned);
837 
838 } // namespace openmsx
839 
840 #endif