openMSX
serialize.cc
Go to the documentation of this file.
1 #include "serialize.hh"
2 #include "Base64.hh"
3 #include "HexDump.hh"
4 #include "XMLLoader.hh"
5 #include "XMLElement.hh"
6 #include "ConfigException.hh"
7 #include "XMLException.hh"
8 #include "snappy.hh"
9 #include "MemBuffer.hh"
10 #include "StringOp.hh"
11 #include "FileOperations.hh"
12 #include "memory.hh"
13 #include <cstring>
14 #include <limits>
15 
16 using std::string;
17 
18 namespace openmsx {
19 
20 template<typename Derived>
21 void ArchiveBase<Derived>::attribute(const char* name, const char* value)
22 {
23  string valueStr(value);
24  self().attribute(name, valueStr);
25 }
26 template class ArchiveBase<MemOutputArchive>;
27 template class ArchiveBase<XmlOutputArchive>;
28 
30 
32  : lastId(0)
33 {
34 }
35 
36 unsigned OutputArchiveBase2::generateID1(const void* p)
37 {
38  #ifdef linux
39  assert("Can't serialize ID of object located on the stack" &&
40  !addressOnStack(p));
41  #endif
42  ++lastId;
43  assert(polyIdMap.find(p) == polyIdMap.end());
44  polyIdMap[p] = lastId;
45  return lastId;
46 }
47 unsigned OutputArchiveBase2::generateID2(
48  const void* p, const std::type_info& typeInfo)
49 {
50  #ifdef linux
51  assert("Can't serialize ID of object located on the stack" &&
52  !addressOnStack(p));
53  #endif
54  ++lastId;
55  auto key = std::make_pair(p, TypeInfo(typeInfo));
56  assert(idMap.find(key) == idMap.end());
57  idMap[key] = lastId;
58  return lastId;
59 }
60 
61 unsigned OutputArchiveBase2::getID1(const void* p)
62 {
63  auto it = polyIdMap.find(p);
64  return it != polyIdMap.end() ? it->second : 0;
65 }
66 unsigned OutputArchiveBase2::getID2(
67  const void* p, const std::type_info& typeInfo)
68 {
69  auto key = std::make_pair(p, TypeInfo(typeInfo));
70  auto it = idMap.find(key);
71  return it != idMap.end() ? it->second : 0;
72 }
73 
74 
75 template<typename Derived>
77  const char* tag, const void* data, size_t len)
78 {
79  string encoding;
80  string tmp;
81  if (false) {
82  // useful for debugging
83  encoding = "hex";
84  tmp = HexDump::encode(data, len);
85  } else if (false) {
86  encoding = "base64";
87  tmp = Base64::encode(data, len);
88  } else {
89  encoding = "gz-base64";
90  // TODO check for overflow?
91  auto dstLen = uLongf(len + len / 1000 + 12 + 1); // worst-case
92  MemBuffer<byte> buf(dstLen);
93  if (compress2(buf.data(), &dstLen,
94  reinterpret_cast<const Bytef*>(data),
95  uLong(len), 9)
96  != Z_OK) {
97  throw MSXException("Error while compressing blob.");
98  }
99  tmp = Base64::encode(buf.data(), dstLen);
100  }
101  this->self().beginTag(tag);
102  this->self().attribute("encoding", encoding);
103  Saver<string> saver;
104  saver(this->self(), tmp, false);
105  this->self().endTag(tag);
106 }
107 
110 
112 
114 {
115  auto it = idMap.find(id);
116  return it != idMap.end() ? it->second : nullptr;
117 }
118 
119 void InputArchiveBase2::addPointer(unsigned id, const void* p)
120 {
121  assert(idMap.find(id) == idMap.end());
122  idMap[id] = const_cast<void*>(p);
123 }
124 
125 template<typename Derived>
127  const char* tag, void* data, size_t len)
128 {
129  this->self().beginTag(tag);
130  string encoding;
131  this->self().attribute("encoding", encoding);
132 
133  string tmp;
134  Loader<string> loader;
135  loader(this->self(), tmp, std::make_tuple(), -1);
136  this->self().endTag(tag);
137 
138  if (encoding == "gz-base64") {
139  tmp = Base64::decode(tmp);
140  auto dstLen = uLongf(len); // TODO check for overflow?
141  if ((uncompress(reinterpret_cast<Bytef*>(data), &dstLen,
142  reinterpret_cast<const Bytef*>(tmp.data()), uLong(tmp.size()))
143  != Z_OK) ||
144  (dstLen != len)) {
145  throw MSXException("Error while decompressing blob.");
146  }
147  } else if ((encoding == "hex") || (encoding == "base64")) {
148  if (encoding == "hex") {
149  tmp = HexDump::decode(tmp);
150  } else {
151  tmp = Base64::decode(tmp);
152  }
153  if (tmp.size() != len) {
155  << "Length of decoded blob: " << tmp.size()
156  << " not identical to expected value: " << len);
157  }
158  memcpy(data, tmp.data(), len);
159  } else {
160  throw XMLException("Unsupported encoding \"" + encoding + "\" for blob");
161  }
162 }
163 
164 template class InputArchiveBase<MemInputArchive>;
165 template class InputArchiveBase<XmlInputArchive>;
166 
168 
169 void MemOutputArchive::save(const std::string& s)
170 {
171  auto size = s.size();
172  byte* buf = buffer.allocate(sizeof(size) + size);
173  memcpy(buf, &size, sizeof(size));
174  memcpy(buf + sizeof(size), s.data(), size);
175 }
176 
178 {
179  size_t size;
180  byte* data = buffer.release(size);
181  return MemBuffer<byte>(data, size);
182 }
183 
185 
186 void MemInputArchive::load(std::string& s)
187 {
188  size_t length;
189  load(length);
190  s.resize(length);
191  if (length) {
192  get(&s[0], length);
193  }
194 }
195 
197 
198 // Too small inputs don't compress very well (often the compressed size is even
199 // bigger than the input). It also takes a relatively long time (because snappy
200 // has a relatively large setup time). I choose this value semi-arbitrary. I
201 // only made it >= 52 so that the (incompressible) RP5C01 registers won't be
202 // compressed.
203 static const size_t SMALL_SIZE = 100;
204 void MemOutputArchive::serialize_blob(const char*, const void* data, size_t len)
205 {
206  // Compress in-memory blobs:
207  //
208  // This is a bit slower than memcpy, but it uses a lot less memory.
209  // Memory usage is important for the reverse feature, where we keep a
210  // lot of savestates in memory.
211  //
212  // I compared 'gzip level=1' (fastest version with lowest compression
213  // ratio) with 'lzo'. lzo was considerably faster. Compression ratio
214  // was about the same (maybe lzo was slightly better (OTOH on higher
215  // levels gzip compresses better)). So I decided to go with lzo.
216  //
217  // Later I compared 'lzo' with 'snappy', lzo compresses 6-25% better,
218  // but 'snappy' is about twice as fast. So I switched to 'snappy'.
219  if (len >= SMALL_SIZE) {
220  size_t dstLen = snappy::maxCompressedLength(len);
221  byte* buf = buffer.allocate(sizeof(dstLen) + dstLen);
222  snappy::compress(static_cast<const char*>(data), len,
223  reinterpret_cast<char*>(&buf[sizeof(dstLen)]), dstLen);
224  memcpy(buf, &dstLen, sizeof(dstLen)); // fill-in actual size
225  buffer.deallocate(&buf[sizeof(dstLen) + dstLen]); // dealloc unused portion
226  } else {
227  byte* buf = buffer.allocate(len);
228  memcpy(buf, data, len);
229  }
230 
231 }
232 
233 void MemInputArchive::serialize_blob(const char*, void* data, size_t len)
234 {
235  if (len >= SMALL_SIZE) {
236  size_t srcLen; load(srcLen);
237  snappy::uncompress(reinterpret_cast<const char*>(buffer.getCurrentPos()),
238  srcLen, reinterpret_cast<char*>(data), len);
239  buffer.skip(srcLen);
240  } else {
241  memcpy(data, buffer.getCurrentPos(), len);
242  buffer.skip(len);
243  }
244 }
245 
247 
248 XmlOutputArchive::XmlOutputArchive(const string& filename)
249  : root("serial")
250 {
251  FILE* f = FileOperations::openFile(filename, "wb");
252  if (!f) {
253  throw XMLException("Could not open compressed file \"" + filename + "\"");
254  }
255  file = gzdopen(fileno(f), "wb9");
256  if (!file) {
257  fclose(f);
258  throw XMLException("Could not open compressed file \"" + filename + "\"");
259  }
260  current.push_back(&root);
261 }
262 
264 {
265  assert(current.back() == &root);
266  const char* header =
267  "<?xml version=\"1.0\" ?>\n"
268  "<!DOCTYPE openmsx-serialize SYSTEM 'openmsx-serialize.dtd'>\n";
269  gzwrite(file, const_cast<char*>(header), unsigned(strlen(header)));
270  string dump = root.dump();
271  gzwrite(file, const_cast<char*>(dump.data()), unsigned(dump.size()));
272  gzclose(file);
273 }
274 
276 {
277  save(string(1, c));
278 }
279 void XmlOutputArchive::save(const string& str)
280 {
281  assert(!current.empty());
282  assert(current.back()->getData().empty());
283  current.back()->setData(str);
284 }
286 {
287  assert(!current.empty());
288  assert(current.back()->getData().empty());
289  current.back()->setData(b ? "true" : "false");
290 }
291 void XmlOutputArchive::save(unsigned char b)
292 {
293  save(unsigned(b));
294 }
295 void XmlOutputArchive::save(signed char c)
296 {
297  save(int(c));
298 }
300 {
301  save(int(c));
302 }
304 {
305  saveImpl(i);
306 }
307 void XmlOutputArchive::save(unsigned u)
308 {
309  saveImpl(u);
310 }
311 void XmlOutputArchive::save(unsigned long long ull)
312 {
313  saveImpl(ull);
314 }
315 
316 void XmlOutputArchive::attribute(const char* name, const string& str)
317 {
318  assert(!current.empty());
319  assert(!current.back()->hasAttribute(name));
320  current.back()->addAttribute(name, str);
321 }
322 void XmlOutputArchive::attribute(const char* name, int i)
323 {
324  attributeImpl(name, i);
325 }
326 void XmlOutputArchive::attribute(const char* name, unsigned u)
327 {
328  attributeImpl(name, u);
329 }
330 
331 void XmlOutputArchive::beginTag(const char* tag)
332 {
333  assert(!current.empty());
334  auto& elem = current.back()->addChild(XMLElement(tag));
335  current.push_back(&elem);
336 }
337 void XmlOutputArchive::endTag(const char* tag)
338 {
339  assert(!current.empty());
340  assert(current.back()->getName() == tag); (void)tag;
341  current.pop_back();
342 }
343 
345 
346 XmlInputArchive::XmlInputArchive(const string& filename)
347  : elem(XMLLoader::load(filename, "openmsx-serialize.dtd"))
348 {
349  elems.push_back(std::make_pair(&elem, 0));
350 }
351 
353 {
354  std::string str;
355  load(str);
356  std::istringstream is(str);
357  is >> c;
358 }
359 void XmlInputArchive::load(string& t)
360 {
361  if (!elems.back().first->getChildren().empty()) {
362  throw XMLException("No child tags expected for string types");
363  }
364  t = elems.back().first->getData();
365 }
367 {
368  if (!elems.back().first->getChildren().empty()) {
369  throw XMLException("No child tags expected for boolean types");
370  }
371  const auto& s = elems.back().first->getData();
372  if ((s == "true") || (s == "1")) {
373  b = true;
374  } else if ((s == "false") || (s == "0")) {
375  b = false;
376  } else {
377  throw XMLException("Bad value found for boolean: " + s);
378  }
379 }
380 void XmlInputArchive::load(unsigned char& b)
381 {
382  unsigned i;
383  load(i);
384  b = i;
385 }
386 void XmlInputArchive::load(signed char& c)
387 {
388  int i;
389  load(i);
390  c = i;
391 }
393 {
394  int i;
395  load(i);
396  c = i;
397 }
398 
399 // This function parses a number from a string. It's similar to the generic
400 // templatized XmlInputArchive::load() method, but _much_ faster. It does
401 // have some limitations though:
402 // - it can't handle leading whitespace
403 // - it can't handle extra characters at the end of the string
404 // - it can only handle one base (only decimal, not octal or hexadecimal)
405 // - it doesn't understand a leading '+' sign
406 // - it doesn't detect overflow or underflow (The generic implementation sets
407 // a 'bad' flag on the stream and clips the result to the min/max allowed
408 // value. Though this 'bad' flag was ignored by the openMSX code).
409 // This routine is only used to parse strings we've written ourselves (and the
410 // savestate/replay XML files are not meant to be manually edited). So the
411 // above limitations don't really matter. And we can use the speed gain.
412 template<bool IS_SIGNED> struct ConditionalNegate;
413 template<> struct ConditionalNegate<true> {
414  template<typename T> void operator()(bool negate, T& t) {
415  if (negate) t = -t; // ok to negate a signed type
416  }
417 };
418 template<> struct ConditionalNegate<false> {
419  template<typename T> void operator()(bool negate, T& /*t*/) {
420  assert(!negate); (void)negate; // can't negate unsigned type
421  }
422 };
423 template<typename T> static inline void fastAtoi(const string& str, T& t)
424 {
425  t = 0;
426  bool neg = false;
427  size_t i = 0;
428  size_t l = str.size();
429 
430  static const bool IS_SIGNED = std::numeric_limits<T>::is_signed;
431  if (IS_SIGNED) {
432  if (l == 0) return;
433  if (str[0] == '-') {
434  neg = true;
435  i = 1;
436  }
437  }
438  for (; i < l; ++i) {
439  unsigned d = str[i] - '0';
440  if (unlikely(d > 9)) {
441  throw XMLException("Invalid integer: " + str);
442  }
443  t = 10 * t + d;
444  }
445  // The following stuff does the equivalent of:
446  // if (neg) t = -t;
447  // Though this expression triggers a warning on VC++ when T is an
448  // unsigned type. This complex template stuff avoids the warning.
449  ConditionalNegate<IS_SIGNED> negateFunctor;
450  negateFunctor(neg, t);
451 }
453 {
454  std::string str;
455  load(str);
456  fastAtoi(str, i);
457 }
458 void XmlInputArchive::load(unsigned& u)
459 {
460  std::string str;
461  load(str);
462  fastAtoi(str, u);
463 }
464 void XmlInputArchive::load(unsigned long long& ull)
465 {
466  std::string str;
467  load(str);
468  fastAtoi(str, ull);
469 }
470 
471 void XmlInputArchive::beginTag(const char* tag)
472 {
473  auto* child = elems.back().first->findNextChild(
474  tag, elems.back().second);
475  if (!child) {
476  string path;
477  for (auto& e : elems) {
478  path += e.first->getName() + '/';
479  }
481  "No child tag \"" << tag <<
482  "\" found at location \"" << path << '\"');
483  }
484  elems.push_back(std::make_pair(child, 0));
485 }
486 void XmlInputArchive::endTag(const char* tag)
487 {
488  const auto& elem = *elems.back().first;
489  if (elem.getName() != tag) {
490  throw XMLException("End tag \"" + elem.getName() +
491  "\" not equal to begin tag \"" + tag + "\"");
492  }
493  auto& elem2 = const_cast<XMLElement&>(elem);
494  elem2.clearName(); // mark this elem for later beginTag() calls
495  elems.pop_back();
496 }
497 
498 void XmlInputArchive::attribute(const char* name, string& t)
499 {
500  try {
501  t = elems.back().first->getAttribute(name);
502  } catch (ConfigException& ex) {
503  throw XMLException(ex.getMessage());
504  }
505 }
506 void XmlInputArchive::attribute(const char* name, int& i)
507 {
508  attributeImpl(name, i);
509 }
510 void XmlInputArchive::attribute(const char* name, unsigned& u)
511 {
512  attributeImpl(name, u);
513 }
514 bool XmlInputArchive::hasAttribute(const char* name)
515 {
516  return elems.back().first->hasAttribute(name);
517 }
518 bool XmlInputArchive::findAttribute(const char* name, unsigned& value)
519 {
520  return elems.back().first->findAttributeInt(name, value);
521 }
523 {
524  return int(elems.back().first->getChildren().size());
525 }
526 
527 } // namespace openmsx