openMSX
FilePool.cc
Go to the documentation of this file.
1 #include "FilePool.hh"
2 #include "File.hh"
3 #include "FileException.hh"
4 #include "FileContext.hh"
5 #include "FileOperations.hh"
6 #include "TclObject.hh"
7 #include "ReadDir.hh"
8 #include "Date.hh"
9 #include "CommandController.hh"
10 #include "CommandException.hh"
11 #include "EventDistributor.hh"
12 #include "CliComm.hh"
13 #include "Timer.hh"
14 #include "StringOp.hh"
15 #include "memory.hh"
16 #include "sha1.hh"
17 #include "stl.hh"
18 #include <fstream>
19 #include <cassert>
20 
21 using std::ifstream;
22 using std::get;
23 using std::make_tuple;
24 using std::ofstream;
25 using std::pair;
26 using std::string;
27 using std::vector;
28 using std::unique_ptr;
29 
30 namespace openmsx {
31 
32 const char* const FILE_CACHE = "/.filecache";
33 
34 static string initialFilePoolSettingValue()
35 {
36  TclObject result;
37 
38  for (auto& p : SystemFileContext().getPaths()) {
39  TclObject entry1;
40  entry1.addListElement("-path");
41  entry1.addListElement(FileOperations::join(p, "systemroms"));
42  entry1.addListElement("-types");
43  entry1.addListElement("system_rom");
44  result.addListElement(entry1);
45 
46  TclObject entry2;
47  entry2.addListElement("-path");
48  entry2.addListElement(FileOperations::join(p, "software"));
49  entry2.addListElement("-types");
50  entry2.addListElement("rom disk tape");
51  result.addListElement(entry2);
52  }
53  return result.getString().str();
54 }
55 
57  : filePoolSetting(
58  controller, "__filepool",
59  "This is an internal setting. Don't change this directly, "
60  "instead use the 'filepool' command.",
61  initialFilePoolSettingValue())
62  , distributor(distributor_)
63  , cliComm(controller.getCliComm())
64  , quit(false)
65 {
66  filePoolSetting.attach(*this);
67  distributor.registerEventListener(OPENMSX_QUIT_EVENT, *this);
68  readSha1sums();
69  needWrite = false;
70 }
71 
73 {
74  if (needWrite) {
75  writeSha1sums();
76  }
77  distributor.unregisterEventListener(OPENMSX_QUIT_EVENT, *this);
78  filePoolSetting.detach(*this);
79 }
80 
81 void FilePool::insert(const Sha1Sum& sum, time_t time, const string& filename)
82 {
83  auto it = upper_bound(begin(pool), end(pool), sum,
85  pool.insert(it, make_tuple(sum, time, filename));
86  needWrite = true;
87 }
88 
89 void FilePool::remove(Pool::iterator it)
90 {
91  pool.erase(it);
92  needWrite = true;
93 }
94 
95 // Change the sha1sum of the element pointed to by 'it' into 'newSum'.
96 // Also re-arrange the items so that pool remains sorted on sha1sum. Internally
97 // this method doesn't actually sort, it merely rotates the elements.
98 // Returns false if the new position is before (or at) the old position.
99 // Returns true if the new position is after the old position.
100 bool FilePool::adjust(Pool::iterator it, const Sha1Sum& newSum)
101 {
102  needWrite = true;
103  auto newIt = upper_bound(begin(pool), end(pool), newSum,
105  get<0>(*it) = newSum; // update sum
106  if (newIt > it) {
107  // move to back
108  rotate(it, it + 1, newIt);
109  return true;
110  } else {
111  if (newIt < it) {
112  // move to front
113  rotate(newIt, it, it + 1);
114  } else {
115  // (unlikely) sha1sum has changed, but after
116  // resorting item would remain in the same
117  // position
118  }
119  return false;
120  }
121 }
122 
123 static bool parse(const string& line, Sha1Sum& sha1, time_t& time, string& filename)
124 {
125  if (line.size() <= 68) return false;
126 
127  try {
128  sha1.parse40(line.data());
129  } catch (MSXException& /*e*/) {
130  return false;
131  }
132 
133  time = Date::fromString(line.data() + 42);
134  if (time == time_t(-1)) return false;
135 
136  filename.assign(line, 68, line.size());
137  return true;
138 }
139 
140 void FilePool::readSha1sums()
141 {
142  assert(pool.empty());
143 
144  string cacheFile = FileOperations::getUserDataDir() + FILE_CACHE;
145  ifstream file(cacheFile.c_str());
146  string line;
147  Sha1Sum sum;
148  string filename;
149  time_t time;
150  while (file.good()) {
151  getline(file, line);
152  if (parse(line, sum, time, filename)) {
153  pool.emplace_back(sum, time, filename);
154  }
155  }
156 
157  if (!std::is_sorted(begin(pool), end(pool), LessTupleElement<0>())) {
158  // This should _rarely_ happen. In fact it should only happen
159  // when .filecache was manually edited. Though because it's
160  // very important that pool is indeed sorted I've added this
161  // safety mechanism.
162  sort(begin(pool), end(pool), LessTupleElement<0>());
163  }
164 }
165 
166 void FilePool::writeSha1sums()
167 {
168  string cacheFile = FileOperations::getUserDataDir() + FILE_CACHE;
169  ofstream file;
170  FileOperations::openofstream(file, cacheFile);
171  if (!file.is_open()) {
172  return;
173  }
174  for (auto& p : pool) {
175  file << get<0>(p).toString() << " " // sum
176  << Date::toString(get<1>(p)) << " " // date
177  << get<2>(p) // filename
178  << '\n';
179  }
180 }
181 
182 static int parseTypes(Interpreter& interp, const TclObject& list)
183 {
184  int result = 0;
185  unsigned num = list.getListLength(interp);
186  for (unsigned i = 0; i < num; ++i) {
187  string_ref elem = list.getListIndex(interp, i).getString();
188  if (elem == "system_rom") {
189  result |= FilePool::SYSTEM_ROM;
190  } else if (elem == "rom") {
191  result |= FilePool::ROM;
192  } else if (elem == "disk") {
193  result |= FilePool::DISK;
194  } else if (elem == "tape") {
195  result |= FilePool::TAPE;
196  } else {
197  throw CommandException("Unknown type: " + elem);
198  }
199  }
200  return result;
201 }
202 
203 void FilePool::update(const Setting& setting)
204 {
205  assert(&setting == &filePoolSetting); (void)setting;
206  getDirectories(); // check for syntax errors
207 }
208 
209 FilePool::Directories FilePool::getDirectories() const
210 {
211  Directories result;
212  auto& interp = filePoolSetting.getInterpreter();
213  const TclObject& all = filePoolSetting.getValue();
214  unsigned numLines = all.getListLength(interp);
215  for (unsigned i = 0; i < numLines; ++i) {
216  Entry entry;
217  bool hasPath = false;
218  entry.types = 0;
219  TclObject line = all.getListIndex(interp, i);
220  unsigned numItems = line.getListLength(interp);
221  if (numItems & 1) {
222  throw CommandException(
223  "Expected a list with an even number "
224  "of elements, but got " + line.getString());
225  }
226  for (unsigned j = 0; j < numItems; j += 2) {
227  string_ref name = line.getListIndex(interp, j + 0).getString();
228  TclObject value = line.getListIndex(interp, j + 1);
229  if (name == "-path") {
230  entry.path = value.getString().str();
231  hasPath = true;
232  } else if (name == "-types") {
233  entry.types = parseTypes(interp, value);
234  } else {
235  throw CommandException(
236  "Unknown item: " + name);
237  }
238  }
239  if (!hasPath) {
240  throw CommandException(
241  "Missing -path item: " + line.getString());
242  }
243  if (entry.types == 0) {
244  throw CommandException(
245  "Missing -types item: " + line.getString());
246  }
247  result.push_back(entry);
248  }
249  return result;
250 }
251 
252 unique_ptr<File> FilePool::getFile(FileType fileType, const Sha1Sum& sha1sum)
253 {
254  unique_ptr<File> result;
255  result = getFromPool(sha1sum);
256  if (result) return result;
257 
258  // not found in cache, need to scan directories
259  ScanProgress progress;
260  progress.lastTime = Timer::getTime();
261  progress.amountScanned = 0;
262 
263  Directories directories;
264  try {
265  directories = getDirectories();
266  } catch (CommandException& e) {
267  cliComm.printWarning("Error while parsing '__filepool' setting" +
268  e.getMessage());
269  }
270  for (auto& d : directories) {
271  if (d.types & fileType) {
272  string path = FileOperations::expandTilde(d.path);
273  result = scanDirectory(sha1sum, path, d.path, progress);
274  if (result) return result;
275  }
276  }
277 
278  return result; // not found
279 }
280 
281 static void reportProgress(const string& filename, size_t percentage,
282  CliComm& cliComm, EventDistributor& distributor)
283 {
284  cliComm.printProgress(
285  "Calculating SHA1 sum for " + filename + "... " + StringOp::toString(percentage) + "%");
286  distributor.deliverEvents();
287 }
288 
289 static Sha1Sum calcSha1sum(File& file, CliComm& cliComm, EventDistributor& distributor)
290 {
291  size_t size;
292  const byte* data = file.mmap(size);
293 
294  if (size < 10*1024*1024) {
295  // for small files, don't show progress
296  return SHA1::calc(data, size);
297  }
298 
299  // Calculate sha1 in several steps so that we can show progress information
300  SHA1 sha1;
301  static const size_t NUMBER_OF_STEPS = 100;
302  // calculate in NUMBER_OF_STEPS steps and report progress every step
303  auto stepSize = size / NUMBER_OF_STEPS;
304  auto remainder = size % NUMBER_OF_STEPS;
305  size_t offset = 0;
306  string filename = file.getOriginalName();
307  reportProgress(filename, 0, cliComm, distributor);
308  for (size_t i = 0; i < (NUMBER_OF_STEPS - 1); ++i) {
309  sha1.update(&data[offset], stepSize);
310  offset += stepSize;
311  reportProgress(filename, i + 1, cliComm, distributor);
312  }
313  sha1.update(data + offset, stepSize + remainder);
314  reportProgress(filename, 100, cliComm, distributor);
315  return sha1.digest();
316 }
317 
318 unique_ptr<File> FilePool::getFromPool(const Sha1Sum& sha1sum)
319 {
320  auto bound = equal_range(begin(pool), end(pool), sha1sum,
322  // use indices instead of iterators
323  auto i = distance(begin(pool), bound.first);
324  auto last = distance(begin(pool), bound.second);
325  while (i != last) {
326  auto it = begin(pool) + i;
327  auto& time = get<1>(*it);
328  const auto& filename = get<2>(*it);
329  try {
330  auto file = make_unique<File>(filename);
331  auto newTime = file->getModificationDate();
332  if (time == newTime) {
333  // When modification time is unchanged, assume
334  // sha1sum is also unchanged. So avoid
335  // expensive sha1sum calculation.
336  return file;
337  }
338  time = newTime; // update timestamp
339  needWrite = true;
340  auto newSum = calcSha1sum(*file, cliComm, distributor);
341  if (newSum == sha1sum) {
342  // Modification time was changed, but
343  // (recalculated) sha1sum is still the same.
344  return file;
345  }
346  // Sha1sum has changed: update sha1sum, move entry to
347  // new position new sum and continue searching.
348  if (adjust(it, newSum)) {
349  // after
350  --last; // no ++i
351  } else {
352  // before (or at)
353  ++i;
354  }
355  } catch (FileException&) {
356  // Error reading file: remove from db and continue
357  // searching.
358  remove(it);
359  --last;
360  }
361  }
362  return nullptr; // not found
363 }
364 
365 unique_ptr<File> FilePool::scanDirectory(
366  const Sha1Sum& sha1sum, const string& directory, const string& poolPath,
367  ScanProgress& progress)
368 {
369  ReadDir dir(directory);
370  while (dirent* d = dir.getEntry()) {
371  if (quit) {
372  // Scanning can take a long time. Allow to exit
373  // openmsx when it takes too long. Stop scanning
374  // by pretending we didn't find the file.
375  return nullptr;
376  }
377  string file = d->d_name;
378  string path = directory + '/' + file;
380  if (FileOperations::getStat(path, st)) {
381  unique_ptr<File> result;
383  result = scanFile(sha1sum, path, st, poolPath, progress);
384  } else if (FileOperations::isDirectory(st)) {
385  if ((file != ".") && (file != "..")) {
386  result = scanDirectory(sha1sum, path, poolPath, progress);
387  }
388  }
389  if (result) return result;
390  }
391  }
392  return nullptr; // not found
393 }
394 
395 unique_ptr<File> FilePool::scanFile(const Sha1Sum& sha1sum, const string& filename,
396  const FileOperations::Stat& st, const string& poolPath,
397  ScanProgress& progress)
398 {
399  ++progress.amountScanned;
400  // Periodically send a progress message with the current filename
401  auto now = Timer::getTime();
402  if (now > (progress.lastTime + 250000)) { // 4Hz
403  progress.lastTime = now;
404  cliComm.printProgress("Searching for file with sha1sum " +
405  sha1sum.toString() + "...\nIndexing filepool " + poolPath +
406  ": [" + StringOp::toString(progress.amountScanned) + "]: " +
407  filename.substr(poolPath.size()));
408  }
409 
410  // deliverEvents() is relatively cheap when there are no events to
411  // deliver, so it's ok to call on each file.
412  distributor.deliverEvents();
413 
414  auto it = findInDatabase(filename);
415  if (it == end(pool)) {
416  // not in pool
417  try {
418  auto file = make_unique<File>(filename);
419  auto sum = calcSha1sum(*file, cliComm, distributor);
420  auto time = FileOperations::getModificationDate(st);
421  insert(sum, time, filename);
422  if (sum == sha1sum) {
423  return file;
424  }
425  } catch (FileException&) {
426  // ignore
427  }
428  } else {
429  // already in pool
430  assert(filename == get<2>(*it));
431  try {
432  auto time = FileOperations::getModificationDate(st);
433  if (time == get<1>(*it)) {
434  // db is still up to date
435  if (get<0>(*it) == sha1sum) {
436  return make_unique<File>(filename);
437  }
438  } else {
439  // db outdated
440  auto file = make_unique<File>(filename);
441  auto sum = calcSha1sum(*file, cliComm, distributor);
442  get<1>(*it) = time;
443  adjust(it, sum);
444  if (sum == sha1sum) {
445  return file;
446  }
447  }
448  } catch (FileException&) {
449  // error reading file, remove from db
450  remove(it);
451  }
452  }
453  return nullptr; // not found
454 }
455 
456 FilePool::Pool::iterator FilePool::findInDatabase(const string& filename)
457 {
458  // Linear search in pool for filename.
459  // Search from back to front because often, soon after this search, we
460  // will insert/remove an element from the vector. This requires
461  // shifting all elements in the vector starting from a certain
462  // position. Starting the search from the back increases the likelihood
463  // that the to-be-shifted elements are already in the memory cache.
464  for (auto it = pool.rbegin(); it != pool.rend(); ++it) {
465  if (get<2>(*it) == filename) {
466  return it.base() - 1;
467  }
468  }
469  return end(pool); // not found
470 }
471 
473 {
474  auto time = file.getModificationDate();
475  const auto& filename = file.getURL();
476 
477  auto it = findInDatabase(filename);
478  if ((it != end(pool)) && (get<1>(*it) == time)) {
479  // in database and modification time matches,
480  // assume sha1sum also matches
481  return get<0>(*it);
482  }
483 
484  // not in database or timestamp mismatch
485  auto sum = calcSha1sum(file, cliComm, distributor);
486  if (it == end(pool)) {
487  // was not yet in database, insert new entry
488  insert(sum, time, filename);
489  } else {
490  // was already in database, but with wrong timestamp (and sha1sum)
491  get<1>(*it) = time;
492  adjust(it, sum);
493  }
494  return sum;
495 }
496 
498 {
499  auto it = findInDatabase(file.getURL());
500  if (it != end(pool)) {
501  remove(it);
502  }
503 }
504 
505 int FilePool::signalEvent(const std::shared_ptr<const Event>& event)
506 {
507  (void)event; // avoid warning for non-assert compiles
508  assert(event->getType() == OPENMSX_QUIT_EVENT);
509  quit = true;
510  return 0;
511 }
512 
513 } // namespace openmsx
bool isRegularFile(const Stat &st)
string_ref::const_iterator end(const string_ref &x)
Definition: string_ref.hh:150
std::string str() const
Definition: string_ref.cc:12
void removeSha1Sum(File &file)
Remove sha1sum for this file from the cache.
Definition: FilePool.cc:497
void registerEventListener(EventType type, EventListener &listener, Priority priority=OTHER)
Registers a given object to receive certain events.
string_ref getString() const
Definition: TclObject.cc:190
void unregisterEventListener(EventType type, EventListener &listener)
Unregisters a previously registered event listener.
string toString(long long a)
Definition: StringOp.cc:156
void openofstream(std::ofstream &stream, const std::string &filename)
Open an ofstream in a platform-independent manner.
std::unique_ptr< File > getFile(FileType fileType, const Sha1Sum &sha1sum)
Search file with the given sha1sum.
Definition: FilePool.cc:252
void printWarning(string_ref message)
Definition: CliComm.cc:28
string join(string_ref part1, string_ref part2)
Join two paths.
static Sha1Sum calc(const uint8_t *data, size_t len)
Easier to use interface, if you can pass all data in one go.
Definition: sha1.cc:281
This class implements a subset of the proposal for std::string_ref (proposed for the next c++ standar...
Definition: string_ref.hh:18
T sum(const vecN< N, T > &x)
Definition: gl_vec.hh:248
void deliverEvents()
This actually delivers the events.
bool getStat(string_ref filename_, Stat &st)
Call stat() and return the stat structure.
void attach(Observer< T > &observer)
Definition: Subject.hh:52
This class represents the result of a sha1 calculation (a 160-bit value).
Definition: sha1.hh:19
unsigned getListLength(Interpreter &interp) const
Definition: TclObject.cc:203
const std::string & getMessage() const
Definition: MSXException.hh:14
string getUserDataDir()
Get the openMSX data dir in the user's home directory.
time_t getModificationDate()
Get the date/time of last modification.
Definition: File.cc:138
Interpreter & getInterpreter() const
Definition: Setting.cc:155
Thanks to enen for testing this on a real cartridge:
Definition: Autofire.cc:5
std::string toString(time_t time)
Definition: Date.cc:152
std::iterator_traits< octet_iterator >::difference_type distance(octet_iterator first, octet_iterator last)
unsigned char byte
8 bit unsigned integer
Definition: openmsx.hh:25
FilePool(CommandController &controler, EventDistributor &distributor)
Definition: FilePool.cc:56
void addListElement(string_ref element)
Definition: TclObject.cc:120
bool isDirectory(const Stat &st)
const char *const FILE_CACHE
Definition: FilePool.cc:32
void printProgress(string_ref message)
Definition: CliComm.cc:38
Sha1Sum getSha1Sum(File &file)
Calculate sha1sum for the given File object.
Definition: FilePool.cc:472
const std::string getURL() const
Returns the URL of this file object.
Definition: File.cc:117
time_t fromString(const char *p)
Definition: Date.cc:31
const TclObject & getValue() const finaloverride
Gets the current value of this setting as a TclObject.
Definition: Setting.hh:115
void detach(Observer< T > &observer)
Definition: Subject.hh:58
string expandTilde(string_ref path)
Expand the '~' character to the users home directory.
uint64_t getTime()
Get current (real) time in us.
Definition: Timer.cc:24
mat4 rotate(float angle, const vec3 &axis)
Definition: gl_transform.hh:56
size_t size(string_ref utf8)
string_ref::const_iterator begin(const string_ref &x)
Definition: string_ref.hh:149
time_t getModificationDate(const Stat &st)
Get the date/time of last modification.