openMSX
serialize.cc
Go to the documentation of this file.
1 #include "serialize.hh"
2 #include "Base64.hh"
3 #include "HexDump.hh"
4 #include "XMLLoader.hh"
5 #include "XMLElement.hh"
6 #include "ConfigException.hh"
7 #include "XMLException.hh"
8 #include "snappy.hh"
9 #include "MemBuffer.hh"
10 #include "StringOp.hh"
11 #include "FileOperations.hh"
12 #include "Version.hh"
13 #include "Date.hh"
14 #include <cstring>
15 #include <limits>
16 
17 using std::string;
18 
19 namespace openmsx {
20 
21 template<typename Derived>
22 void ArchiveBase<Derived>::attribute(const char* name, const char* value)
23 {
24  string valueStr(value);
25  self().attribute(name, valueStr);
26 }
27 template class ArchiveBase<MemOutputArchive>;
28 template class ArchiveBase<XmlOutputArchive>;
29 
31 
33  : lastId(0)
34 {
35 }
36 
37 unsigned OutputArchiveBase2::generateID1(const void* p)
38 {
39  #ifdef linux
40  assert("Can't serialize ID of object located on the stack" &&
41  !addressOnStack(p));
42  #endif
43  ++lastId;
44  assert(polyIdMap.find(p) == end(polyIdMap));
45  polyIdMap[p] = lastId;
46  return lastId;
47 }
48 unsigned OutputArchiveBase2::generateID2(
49  const void* p, const std::type_info& typeInfo)
50 {
51  #ifdef linux
52  assert("Can't serialize ID of object located on the stack" &&
53  !addressOnStack(p));
54  #endif
55  ++lastId;
56  auto key = std::make_pair(p, std::type_index(typeInfo));
57  assert(idMap.find(key) == end(idMap));
58  idMap[key] = lastId;
59  return lastId;
60 }
61 
62 unsigned OutputArchiveBase2::getID1(const void* p)
63 {
64  auto it = polyIdMap.find(p);
65  return it != end(polyIdMap) ? it->second : 0;
66 }
67 unsigned OutputArchiveBase2::getID2(
68  const void* p, const std::type_info& typeInfo)
69 {
70  auto it = idMap.find({p, std::type_index(typeInfo)});
71  return it != end(idMap) ? it->second : 0;
72 }
73 
74 
75 template<typename Derived>
77  const char* tag, const void* data, size_t len)
78 {
79  string encoding;
80  string tmp;
81  if (false) {
82  // useful for debugging
83  encoding = "hex";
84  tmp = HexDump::encode(data, len);
85  } else if (false) {
86  encoding = "base64";
87  tmp = Base64::encode(data, len);
88  } else {
89  encoding = "gz-base64";
90  // TODO check for overflow?
91  auto dstLen = uLongf(len + len / 1000 + 12 + 1); // worst-case
92  MemBuffer<byte> buf(dstLen);
93  if (compress2(buf.data(), &dstLen,
94  reinterpret_cast<const Bytef*>(data),
95  uLong(len), 9)
96  != Z_OK) {
97  throw MSXException("Error while compressing blob.");
98  }
99  tmp = Base64::encode(buf.data(), dstLen);
100  }
101  this->self().beginTag(tag);
102  this->self().attribute("encoding", encoding);
103  Saver<string> saver;
104  saver(this->self(), tmp, false);
105  this->self().endTag(tag);
106 }
107 
110 
112 
114 {
115  auto it = idMap.find(id);
116  return it != end(idMap) ? it->second : nullptr;
117 }
118 
119 void InputArchiveBase2::addPointer(unsigned id, const void* p)
120 {
121  assert(idMap.find(id) == end(idMap));
122  idMap[id] = const_cast<void*>(p);
123 }
124 
125 template<typename Derived>
127  const char* tag, void* data, size_t len)
128 {
129  this->self().beginTag(tag);
130  string encoding;
131  this->self().attribute("encoding", encoding);
132 
133  string tmp;
134  Loader<string> loader;
135  loader(this->self(), tmp, std::make_tuple(), -1);
136  this->self().endTag(tag);
137 
138  if (encoding == "gz-base64") {
139  tmp = Base64::decode(tmp);
140  auto dstLen = uLongf(len); // TODO check for overflow?
141  if ((uncompress(reinterpret_cast<Bytef*>(data), &dstLen,
142  reinterpret_cast<const Bytef*>(tmp.data()), uLong(tmp.size()))
143  != Z_OK) ||
144  (dstLen != len)) {
145  throw MSXException("Error while decompressing blob.");
146  }
147  } else if ((encoding == "hex") || (encoding == "base64")) {
148  if (encoding == "hex") {
149  tmp = HexDump::decode(tmp);
150  } else {
151  tmp = Base64::decode(tmp);
152  }
153  if (tmp.size() != len) {
155  << "Length of decoded blob: " << tmp.size()
156  << " not identical to expected value: " << len);
157  }
158  memcpy(data, tmp.data(), len);
159  } else {
160  throw XMLException("Unsupported encoding \"" + encoding + "\" for blob");
161  }
162 }
163 
164 template class InputArchiveBase<MemInputArchive>;
165 template class InputArchiveBase<XmlInputArchive>;
166 
168 
169 void MemOutputArchive::save(const std::string& s)
170 {
171  auto size = s.size();
172  byte* buf = buffer.allocate(sizeof(size) + size);
173  memcpy(buf, &size, sizeof(size));
174  memcpy(buf + sizeof(size), s.data(), size);
175 }
176 
178 {
179  return buffer.release(size);
180 }
181 
183 
184 void MemInputArchive::load(std::string& s)
185 {
186  size_t length;
187  load(length);
188  s.resize(length);
189  if (length) {
190  get(&s[0], length);
191  }
192 }
193 
195 
196 // Too small inputs don't compress very well (often the compressed size is even
197 // bigger than the input). It also takes a relatively long time (because snappy
198 // has a relatively large setup time). I choose this value semi-arbitrary. I
199 // only made it >= 52 so that the (incompressible) RP5C01 registers won't be
200 // compressed.
201 static const size_t SMALL_SIZE = 100;
202 void MemOutputArchive::serialize_blob(const char*, const void* data, size_t len)
203 {
204  // Compress in-memory blobs:
205  //
206  // This is a bit slower than memcpy, but it uses a lot less memory.
207  // Memory usage is important for the reverse feature, where we keep a
208  // lot of savestates in memory.
209  //
210  // I compared 'gzip level=1' (fastest version with lowest compression
211  // ratio) with 'lzo'. lzo was considerably faster. Compression ratio
212  // was about the same (maybe lzo was slightly better (OTOH on higher
213  // levels gzip compresses better)). So I decided to go with lzo.
214  //
215  // Later I compared 'lzo' with 'snappy', lzo compresses 6-25% better,
216  // but 'snappy' is about twice as fast. So I switched to 'snappy'.
217  if (len >= SMALL_SIZE) {
218  size_t dstLen = snappy::maxCompressedLength(len);
219  byte* buf = buffer.allocate(sizeof(dstLen) + dstLen);
220  snappy::compress(static_cast<const char*>(data), len,
221  reinterpret_cast<char*>(&buf[sizeof(dstLen)]), dstLen);
222  memcpy(buf, &dstLen, sizeof(dstLen)); // fill-in actual size
223  buffer.deallocate(&buf[sizeof(dstLen) + dstLen]); // dealloc unused portion
224  } else {
225  byte* buf = buffer.allocate(len);
226  memcpy(buf, data, len);
227  }
228 
229 }
230 
231 void MemInputArchive::serialize_blob(const char*, void* data, size_t len)
232 {
233  if (len >= SMALL_SIZE) {
234  size_t srcLen; load(srcLen);
235  snappy::uncompress(reinterpret_cast<const char*>(buffer.getCurrentPos()),
236  srcLen, reinterpret_cast<char*>(data), len);
237  buffer.skip(srcLen);
238  } else {
239  memcpy(data, buffer.getCurrentPos(), len);
240  buffer.skip(len);
241  }
242 }
243 
245 
246 XmlOutputArchive::XmlOutputArchive(const string& filename)
247  : root("serial")
248 {
249  root.addAttribute("openmsx_version", Version::full());
250  root.addAttribute("date_time", Date::toString(time(nullptr)));
251  root.addAttribute("platform", TARGET_PLATFORM);
252  auto f = FileOperations::openFile(filename, "wb");
253  if (!f) {
254  throw XMLException("Could not open compressed file \"" + filename + "\"");
255  }
256  file = gzdopen(fileno(f.get()), "wb9");
257  if (!file) {
258  throw XMLException("Could not open compressed file \"" + filename + "\"");
259  }
260  f.release();
261  current.push_back(&root);
262 }
263 
265 {
266  assert(current.back() == &root);
267  const char* header =
268  "<?xml version=\"1.0\" ?>\n"
269  "<!DOCTYPE openmsx-serialize SYSTEM 'openmsx-serialize.dtd'>\n";
270  gzwrite(file, const_cast<char*>(header), unsigned(strlen(header)));
271  string dump = root.dump();
272  gzwrite(file, const_cast<char*>(dump.data()), unsigned(dump.size()));
273  gzclose(file);
274 }
275 
277 {
278  save(string(1, c));
279 }
280 void XmlOutputArchive::save(const string& str)
281 {
282  assert(!current.empty());
283  assert(current.back()->getData().empty());
284  current.back()->setData(str);
285 }
287 {
288  assert(!current.empty());
289  assert(current.back()->getData().empty());
290  current.back()->setData(b ? "true" : "false");
291 }
292 void XmlOutputArchive::save(unsigned char b)
293 {
294  save(unsigned(b));
295 }
296 void XmlOutputArchive::save(signed char c)
297 {
298  save(int(c));
299 }
301 {
302  save(int(c));
303 }
305 {
306  saveImpl(i);
307 }
308 void XmlOutputArchive::save(unsigned u)
309 {
310  saveImpl(u);
311 }
312 void XmlOutputArchive::save(unsigned long long ull)
313 {
314  saveImpl(ull);
315 }
316 
317 void XmlOutputArchive::attribute(const char* name, const string& str)
318 {
319  assert(!current.empty());
320  assert(!current.back()->hasAttribute(name));
321  current.back()->addAttribute(name, str);
322 }
323 void XmlOutputArchive::attribute(const char* name, int i)
324 {
325  attributeImpl(name, i);
326 }
327 void XmlOutputArchive::attribute(const char* name, unsigned u)
328 {
329  attributeImpl(name, u);
330 }
331 
332 void XmlOutputArchive::beginTag(const char* tag)
333 {
334  assert(!current.empty());
335  auto& elem = current.back()->addChild(tag);
336  current.push_back(&elem);
337 }
338 void XmlOutputArchive::endTag(const char* tag)
339 {
340  assert(!current.empty());
341  assert(current.back()->getName() == tag); (void)tag;
342  current.pop_back();
343 }
344 
346 
347 XmlInputArchive::XmlInputArchive(const string& filename)
348  : elem(XMLLoader::load(filename, "openmsx-serialize.dtd"))
349 {
350  elems.emplace_back(&elem, 0);
351 }
352 
354 {
355  std::string str;
356  load(str);
357  std::istringstream is(str);
358  is >> c;
359 }
360 void XmlInputArchive::load(string& t)
361 {
362  if (!elems.back().first->getChildren().empty()) {
363  throw XMLException("No child tags expected for string types");
364  }
365  t = elems.back().first->getData();
366 }
368 {
369  if (!elems.back().first->getChildren().empty()) {
370  throw XMLException("No child tags expected for boolean types");
371  }
372  const auto& s = elems.back().first->getData();
373  if ((s == "true") || (s == "1")) {
374  b = true;
375  } else if ((s == "false") || (s == "0")) {
376  b = false;
377  } else {
378  throw XMLException("Bad value found for boolean: " + s);
379  }
380 }
381 void XmlInputArchive::load(unsigned char& b)
382 {
383  unsigned i;
384  load(i);
385  b = i;
386 }
387 void XmlInputArchive::load(signed char& c)
388 {
389  int i;
390  load(i);
391  c = i;
392 }
394 {
395  int i;
396  load(i);
397  c = i;
398 }
399 
400 // This function parses a number from a string. It's similar to the generic
401 // templatized XmlInputArchive::load() method, but _much_ faster. It does
402 // have some limitations though:
403 // - it can't handle leading whitespace
404 // - it can't handle extra characters at the end of the string
405 // - it can only handle one base (only decimal, not octal or hexadecimal)
406 // - it doesn't understand a leading '+' sign
407 // - it doesn't detect overflow or underflow (The generic implementation sets
408 // a 'bad' flag on the stream and clips the result to the min/max allowed
409 // value. Though this 'bad' flag was ignored by the openMSX code).
410 // This routine is only used to parse strings we've written ourselves (and the
411 // savestate/replay XML files are not meant to be manually edited). So the
412 // above limitations don't really matter. And we can use the speed gain.
413 template<bool IS_SIGNED> struct ConditionalNegate;
414 template<> struct ConditionalNegate<true> {
415  template<typename T> void operator()(bool negate, T& t) {
416  if (negate) t = -t; // ok to negate a signed type
417  }
418 };
419 template<> struct ConditionalNegate<false> {
420  template<typename T> void operator()(bool negate, T& /*t*/) {
421  assert(!negate); (void)negate; // can't negate unsigned type
422  }
423 };
424 template<typename T> static inline void fastAtoi(const string& str, T& t)
425 {
426  t = 0;
427  bool neg = false;
428  size_t i = 0;
429  size_t l = str.size();
430 
431  static const bool IS_SIGNED = std::numeric_limits<T>::is_signed;
432  if (IS_SIGNED) {
433  if (l == 0) return;
434  if (str[0] == '-') {
435  neg = true;
436  i = 1;
437  }
438  }
439  for (; i < l; ++i) {
440  unsigned d = str[i] - '0';
441  if (unlikely(d > 9)) {
442  throw XMLException("Invalid integer: " + str);
443  }
444  t = 10 * t + d;
445  }
446  // The following stuff does the equivalent of:
447  // if (neg) t = -t;
448  // Though this expression triggers a warning on VC++ when T is an
449  // unsigned type. This complex template stuff avoids the warning.
450  ConditionalNegate<IS_SIGNED> negateFunctor;
451  negateFunctor(neg, t);
452 }
454 {
455  std::string str;
456  load(str);
457  fastAtoi(str, i);
458 }
459 void XmlInputArchive::load(unsigned& u)
460 {
461  std::string str;
462  load(str);
463  fastAtoi(str, u);
464 }
465 void XmlInputArchive::load(unsigned long long& ull)
466 {
467  std::string str;
468  load(str);
469  fastAtoi(str, ull);
470 }
471 
472 void XmlInputArchive::beginTag(const char* tag)
473 {
474  auto* child = elems.back().first->findNextChild(
475  tag, elems.back().second);
476  if (!child) {
477  string path;
478  for (auto& e : elems) {
479  path += e.first->getName() + '/';
480  }
482  "No child tag \"" << tag <<
483  "\" found at location \"" << path << '\"');
484  }
485  elems.emplace_back(child, 0);
486 }
487 void XmlInputArchive::endTag(const char* tag)
488 {
489  const auto& elem = *elems.back().first;
490  if (elem.getName() != tag) {
491  throw XMLException("End tag \"" + elem.getName() +
492  "\" not equal to begin tag \"" + tag + "\"");
493  }
494  auto& elem2 = const_cast<XMLElement&>(elem);
495  elem2.clearName(); // mark this elem for later beginTag() calls
496  elems.pop_back();
497 }
498 
499 void XmlInputArchive::attribute(const char* name, string& t)
500 {
501  try {
502  t = elems.back().first->getAttribute(name);
503  } catch (ConfigException& ex) {
504  throw XMLException(ex.getMessage());
505  }
506 }
507 void XmlInputArchive::attribute(const char* name, int& i)
508 {
509  attributeImpl(name, i);
510 }
511 void XmlInputArchive::attribute(const char* name, unsigned& u)
512 {
513  attributeImpl(name, u);
514 }
515 bool XmlInputArchive::hasAttribute(const char* name)
516 {
517  return elems.back().first->hasAttribute(name);
518 }
519 bool XmlInputArchive::findAttribute(const char* name, unsigned& value)
520 {
521  return elems.back().first->findAttributeInt(name, value);
522 }
524 {
525  return int(elems.back().first->getChildren().size());
526 }
527 
528 } // namespace openmsx
void attribute(const char *name, T &t)
Definition: serialize.hh:785
T length(const vecN< N, T > &x)
Definition: gl_vec.hh:322
size_t maxCompressedLength(size_t inLen)
Definition: snappy.cc:616
bool findAttribute(const char *name, unsigned &value)
Definition: serialize.cc:519
void serialize_blob(const char *tag, void *data, size_t len)
Definition: serialize.cc:126
string_ref::const_iterator end(const string_ref &x)
Definition: string_ref.hh:167
void operator()(bool negate, T &t)
Definition: serialize.cc:415
#define unlikely(x)
Definition: likely.hh:15
void loadChar(char &c)
Definition: serialize.cc:353
XmlInputArchive(const std::string &filename)
Definition: serialize.cc:347
const byte * getCurrentPos() const
Return a pointer to the current position in the buffer.
void * getPointer(unsigned id)
Definition: serialize.cc:113
void save(const T &t)
Definition: serialize.hh:588
void endTag(const char *tag)
Definition: serialize.cc:487
void serialize_blob(const char *, void *data, size_t len)
Definition: serialize.cc:231
std::string dump() const
Definition: XMLElement.cc:281
void addPointer(unsigned id, const void *p)
Definition: serialize.cc:119
MemBuffer< byte > releaseBuffer(size_t &size)
Definition: serialize.cc:177
XMLElement load(string_ref filename, string_ref systemID)
Definition: XMLLoader.cc:31
const std::string & getName() const
Definition: XMLElement.hh:30
bool hasAttribute(const char *name)
Definition: serialize.cc:515
void compress(const char *input, size_t inLen, char *output, size_t &outLen)
Definition: snappy.cc:603
void attribute(const char *name, const T &t)
Definition: serialize.hh:723
byte * allocate(size_t len)
Reserve space to insert the given number of bytes.
void endTag(const char *tag)
Definition: serialize.cc:338
void attributeImpl(const char *name, const T &t)
Definition: serialize.hh:719
void attribute(const char *name, T &t)
Load/store an attribute from/in the archive.
Definition: serialize.hh:205
void serialize_blob(const char *, const void *data, size_t len)
Definition: serialize.cc:202
void save(const T &t)
Definition: serialize.hh:694
const std::string & getMessage() const
Definition: MSXException.hh:14
void uncompress(const char *input, size_t inLen, char *output, size_t outLen)
Definition: snappy.cc:166
Thanks to enen for testing this on a real cartridge:
Definition: Autofire.cc:5
std::string toString(time_t time)
Definition: Date.cc:152
unsigned char byte
8 bit unsigned integer
Definition: openmsx.hh:25
void deallocate(byte *pos)
Free part of a previously allocated buffer.
void operator()(bool negate, T &)
Definition: serialize.cc:420
void beginTag(const char *tag)
Definition: serialize.cc:472
int countChildren() const
Definition: serialize.cc:523
uint8_t * data()
void addAttribute(string_ref name, string_ref value)
Definition: XMLElement.cc:57
size_t size() const
static std::string full()
Definition: Version.cc:7
FILE_t openFile(const std::string &filename, const std::string &mode)
Call fopen() in a platform-independent manner.
void saveImpl(const T &t)
Definition: serialize.hh:688
XmlOutputArchive(const std::string &filename)
Definition: serialize.cc:246
void beginTag(const char *tag)
Definition: serialize.cc:332
MemBuffer< byte > release(size_t &size)
Release ownership of the buffer.
void serialize_blob(const char *tag, const void *data, size_t len)
Definition: serialize.cc:76
const T * data() const
Returns pointer to the start of the memory buffer.
Definition: MemBuffer.hh:91
void skip(size_t len)
Skip the given number of bytes.
void attributeImpl(const char *name, T &t)
Definition: serialize.hh:778