openMSX
StringOp.cc
Go to the documentation of this file.
1 #include "StringOp.hh"
2 #include "MSXException.hh"
3 #include <algorithm>
4 #include <limits>
5 #include <cassert>
6 #include <cstdlib>
7 #include <stdexcept>
8 
9 using std::advance;
10 using std::equal;
11 using std::string;
12 using std::transform;
13 using std::vector;
14 using std::set;
15 
16 namespace StringOp {
17 
18 // class Builder
19 
21 {
22 }
23 
25 {
26 }
27 
28 Builder& Builder::operator<<(const std::string& t)
29 {
30  buf += t; return *this;
31 }
33 {
34  buf.append(t.data(), t.size()); return *this;
35 }
37 {
38  buf += t; return *this;
39 }
40 Builder& Builder::operator<<(unsigned char t)
41 {
42  return operator<<(unsigned(t));
43 }
44 Builder& Builder::operator<<(unsigned short t)
45 {
46  buf += toString(t); return *this;
47 }
49 {
50  buf += toString(t); return *this;
51 }
52 Builder& Builder::operator<<(unsigned long t)
53 {
54  buf += toString(t); return *this;
55 }
56 Builder& Builder::operator<<(unsigned long long t)
57 {
58  buf += toString(t); return *this;
59 }
61 {
62  buf += t; return *this;
63 }
65 {
66  buf += toString(t); return *this;
67 }
69 {
70  buf += toString(t); return *this;
71 }
73 {
74  buf += toString(t); return *this;
75 }
77 {
78  buf += toString(t); return *this;
79 }
81 {
82  buf += toString(t); return *this;
83 }
85 {
86  buf += toString(t); return *this;
87 }
88 
89 
90 // Returns a fast type that is (at least) big enough to hold the absolute value
91 // of values of the given type. (It always returns 'unsigned' except for 64-bit
92 // integers it returns unsigned long long).
93 template<typename T> struct FastUnsigned { typedef unsigned type; };
94 template<> struct FastUnsigned<long long> { typedef unsigned long long type; };
95 template<> struct FastUnsigned<unsigned long long> { typedef unsigned long long type; };
96 template<> struct FastUnsigned<long> { typedef unsigned long type; };
97 template<> struct FastUnsigned<unsigned long> { typedef unsigned long type; };
98 
99 // This does the equivalent of
100 // unsigned u = (t < 0) ? -t : t;
101 // but it avoids a compiler warning on the operations
102 // 't < 0' and '-t'
103 // when 't' is actually an unsigned type.
104 template<bool IS_SIGNED> struct AbsHelper;
105 template<> struct AbsHelper<true> {
106  template<typename T>
107  inline typename FastUnsigned<T>::type operator()(T t) const {
108  return (t < 0) ? -t : t;
109  }
110 };
111 template<> struct AbsHelper<false> {
112  template<typename T>
113  inline typename FastUnsigned<T>::type operator()(T t) const {
114  return t;
115  }
116 };
117 
118 // Does the equivalent of if (t < 0) *--p = '-';
119 // but it avoids a compiler warning on 't < 0' when 't' is an unsigned type.
120 template<bool IS_SIGNED> struct PutSign;
121 template<> struct PutSign<true> {
122  template<typename T> inline void operator()(T t, char*& p) const {
123  if (t < 0) *--p = '-';
124  }
125 };
126 template<> struct PutSign<false> {
127  template<typename T> inline void operator()(T /*t*/, char*& /*p*/) const {
128  // nothing
129  }
130 };
131 
132 // This routine is inspired by boost::lexical_cast. It's much faster than a
133 // generic version using std::stringstream. See this page for some numbers:
134 // http://www.boost.org/doc/libs/1_47_0/libs/conversion/lexical_cast.htm#performance
135 template<typename T> static inline string toStringImpl(T t)
136 {
137  static const bool IS_SIGNED = std::numeric_limits<T>::is_signed;
138  static const unsigned BUF_SIZE = 1 + std::numeric_limits<T>::digits10
139  + (IS_SIGNED ? 1 : 0);
140 
141  char buf[BUF_SIZE];
142  char* p = &buf[BUF_SIZE];
143 
144  AbsHelper<IS_SIGNED> absHelper;
145  typename FastUnsigned<T>::type a = absHelper(t);
146  do {
147  *--p = '0' + (a % 10);
148  a /= 10;
149  } while (a);
150 
151  PutSign<IS_SIGNED> putSign;
152  putSign(t, p);
153 
154  return string(p, &buf[BUF_SIZE] - p);
155 }
156 string toString(long long a) { return toStringImpl(a); }
157 string toString(unsigned long long a) { return toStringImpl(a); }
158 string toString(long a) { return toStringImpl(a); }
159 string toString(unsigned long a) { return toStringImpl(a); }
160 string toString(int a) { return toStringImpl(a); }
161 string toString(unsigned a) { return toStringImpl(a); }
162 string toString(short a) { return toStringImpl(a); }
163 string toString(unsigned short a) { return toStringImpl(a); }
164 string toString(char a) { return string(1, a); }
165 string toString(signed char a) { return string(1, a); }
166 string toString(unsigned char a) { return string(1, a); }
167 string toString(bool a) { return string(1, '0' + a); }
168 
169 static inline char hexDigit(unsigned x)
170 {
171  return (x < 10) ? ('0' + x) : ('a' + x - 10);
172 }
173 string toHexString(unsigned x, unsigned width)
174 {
175  assert((0 < width) && (width <= 8));
176 
177  char buf[8];
178  char* p = &buf[8];
179  int i = width;
180  do {
181  *--p = hexDigit(x & 15);
182  x >>= 4;
183  } while (--i);
184  return string(p, width);
185 }
186 
187 int stringToInt(const string& str)
188 {
189  return strtol(str.c_str(), nullptr, 0);
190 }
191 bool stringToInt(const string& str, int& result)
192 {
193  char* endptr;
194  result = strtol(str.c_str(), &endptr, 0);
195  return *endptr == '\0';
196 }
197 
198 unsigned stringToUint(const string& str)
199 {
200  return strtoul(str.c_str(), nullptr, 0);
201 }
202 bool stringToUint(const string& str, unsigned& result)
203 {
204  char* endptr;
205  result = strtoul(str.c_str(), &endptr, 0);
206  return *endptr == '\0';
207 }
208 
209 uint64_t stringToUint64(const string& str)
210 {
211  return strtoull(str.c_str(), nullptr, 0);
212 }
213 
215 {
216  if (str == "1") return true;
217  if ((str.size() == 4) && (strncasecmp(str.data(), "true", 4) == 0))
218  return true;
219  if ((str.size() == 3) && (strncasecmp(str.data(), "yes", 3) == 0))
220  return true;
221  return false;
222 }
223 
224 double stringToDouble(const string& str)
225 {
226  return strtod(str.c_str(), nullptr);
227 }
228 bool stringToDouble(const string& str, double& result)
229 {
230  char* endptr;
231  result = strtod(str.c_str(), &endptr);
232  return *endptr == '\0';
233 }
234 
235 string toLower(string_ref str)
236 {
237  string result = str.str();
238  transform(begin(result), end(result), begin(result), ::tolower);
239  return result;
240 }
241 
243 {
244  return total.starts_with(part);
245 }
246 bool startsWith(string_ref total, char part)
247 {
248  return !total.empty() && (total.front() == part);
249 }
250 
251 bool endsWith(string_ref total, string_ref part)
252 {
253  return total.ends_with(part);
254 }
255 bool endsWith(string_ref total, char part)
256 {
257  return !total.empty() && (total.back() == part);
258 }
259 
260 void trimRight(string& str, const char* chars)
261 {
262  auto pos = str.find_last_not_of(chars);
263  if (pos != string::npos) {
264  str.erase(pos + 1);
265  } else {
266  str.clear();
267  }
268 }
269 void trimRight(string& str, char chars)
270 {
271  auto pos = str.find_last_not_of(chars);
272  if (pos != string::npos) {
273  str.erase(pos + 1);
274  } else {
275  str.clear();
276  }
277 }
278 void trimRight(string_ref& str, string_ref chars)
279 {
280  while (!str.empty() && (chars.find(str.back()) != string_ref::npos)) {
281  str.pop_back();
282  }
283 }
284 void trimRight(string_ref& str, char chars)
285 {
286  while (!str.empty() && (str.back() == chars)) {
287  str.pop_back();
288  }
289 }
290 
291 void trimLeft(string& str, const char* chars)
292 {
293  str.erase(0, str.find_first_not_of(chars));
294 }
295 void trimLeft(string& str, char chars)
296 {
297  str.erase(0, str.find_first_not_of(chars));
298 }
299 void trimLeft(string_ref& str, string_ref chars)
300 {
301  while (!str.empty() && (chars.find(str.front()) != string_ref::npos)) {
302  str.pop_front();
303  }
304 }
305 void trimLeft(string_ref& str, char chars)
306 {
307  while (!str.empty() && (str.front() == chars)) {
308  str.pop_front();
309  }
310 }
311 
312 void trim(string_ref& str, string_ref chars)
313 {
314  trimRight(str, chars);
315  trimLeft (str, chars);
316 }
317 
318 void trim(string_ref& str, char chars)
319 {
320  trimRight(str, chars);
321  trimLeft (str, chars);
322 }
323 
324 void splitOnFirst(string_ref str, string_ref chars, string_ref& first, string_ref& last)
325 {
326  auto pos = str.find_first_of(chars);
327  if (pos == string_ref::npos) {
328  first = str;
329  last.clear();
330  } else {
331  first = str.substr(0, pos);
332  last = str.substr(pos + 1);
333  }
334 }
335 void splitOnFirst(string_ref str, char chars, string_ref& first, string_ref& last)
336 {
337  auto pos = str.find_first_of(chars);
338  if (pos == string_ref::npos) {
339  first = str;
340  last.clear();
341  } else {
342  first = str.substr(0, pos);
343  last = str.substr(pos + 1);
344  }
345 }
346 
347 void splitOnLast(string_ref str, string_ref chars, string_ref& first, string_ref& last)
348 {
349  auto pos = str.find_last_of(chars);
350  if (pos == string_ref::npos) {
351  first.clear();
352  last = str;
353  } else {
354  first = str.substr(0, pos);
355  last = str.substr(pos + 1);
356  }
357 }
358 void splitOnLast(string_ref str, char chars, string_ref& first, string_ref& last)
359 {
360  auto pos = str.find_last_of(chars);
361  if (pos == string_ref::npos) {
362  first.clear();
363  last = str;
364  } else {
365  first = str.substr(0, pos);
366  last = str.substr(pos + 1);
367  }
368 }
369 
370 vector<string_ref> split(string_ref str, char chars)
371 {
372  vector<string_ref> result;
373  while (!str.empty()) {
374  string_ref first, last;
375  splitOnFirst(str, chars, first, last);
376  result.push_back(first);
377  str = last;
378  }
379  return result;
380 }
381 
382 string join(const vector<string_ref>& elems, char separator)
383 {
384  if (elems.empty()) return string();
385 
386  auto it = begin(elems);
387  Builder result;
388  result << *it;
389  for (++it; it != end(elems); ++it) {
390  result << separator;
391  result << *it;
392  }
393  return result;
394 }
395 
396 static unsigned parseNumber(string_ref str)
397 {
398  trim(str, " \t");
399  if (!str.empty()) {
400  try {
401  return fast_stou(str);
402  } catch (std::invalid_argument&) {
403  // parse error
404  }
405  }
406  throw openmsx::MSXException("Invalid integer: " + str);
407 }
408 
409 static void insert(unsigned x, set<unsigned>& result, unsigned min, unsigned max)
410 {
411  if ((x < min) || (x > max)) {
412  throw openmsx::MSXException("Out of range");
413  }
414  result.insert(x);
415 }
416 
417 static void parseRange2(string_ref str, set<unsigned>& result,
418  unsigned min, unsigned max)
419 {
420  // trimRight only: here we only care about all spaces
421  trimRight(str, " \t");
422  if (str.empty()) return;
423 
424  auto pos = str.find('-');
425  if (pos == string_ref::npos) {
426  insert(parseNumber(str), result, min, max);
427  } else {
428  unsigned begin = parseNumber(str.substr(0, pos));
429  unsigned end = parseNumber(str.substr(pos + 1));
430  if (end < begin) {
431  std::swap(begin, end);
432  }
433  for (unsigned i = begin; i <= end; ++i) {
434  insert(i, result, min, max);
435  }
436  }
437 }
438 
439 set<unsigned> parseRange(string_ref str, unsigned min, unsigned max)
440 {
441  set<unsigned> result;
442  while (true) {
443  auto next = str.find(',');
444  string_ref sub = (next == string_ref::npos)
445  ? str
446  : str.substr(0, next++);
447  parseRange2(sub, result, min, max);
448  if (next == string_ref::npos) break;
449  str = str.substr(next);
450  }
451  return result;
452 }
453 
454 #if defined(__APPLE__)
455 
456 std::string fromCFString(CFStringRef str)
457 {
458  // Try the quick route first.
459  const char *cstr = CFStringGetCStringPtr(str, kCFStringEncodingUTF8);
460  if (cstr) {
461  // String was already in UTF8 encoding.
462  return std::string(cstr);
463  }
464 
465  // Convert to UTF8 encoding.
466  CFIndex len = CFStringGetLength(str);
467  CFRange range = CFRangeMake(0, len);
468  CFIndex usedBufLen = 0;
469  CFStringGetBytes(
470  str, range, kCFStringEncodingUTF8, '?', false, nullptr, len, &usedBufLen);
471  UInt8 buffer[usedBufLen];
472  CFStringGetBytes(
473  str, range, kCFStringEncodingUTF8, '?', false, buffer, len, &usedBufLen);
474  return std::string(reinterpret_cast<const char *>(buffer), usedBufLen);
475 }
476 
477 #endif
478 
479 } // namespace StringOp
string_ref::const_iterator end(const string_ref &x)
Definition: string_ref.hh:150
string toHexString(unsigned x, unsigned width)
Definition: StringOp.cc:173
bool stringToBool(string_ref str)
Definition: StringOp.cc:214
void pop_front()
Definition: string_ref.hh:79
std::string str() const
Definition: string_ref.cc:12
string toLower(string_ref str)
Definition: StringOp.cc:235
void splitOnFirst(string_ref str, string_ref chars, string_ref &first, string_ref &last)
Definition: StringOp.cc:324
void splitOnLast(string_ref str, string_ref chars, string_ref &first, string_ref &last)
Definition: StringOp.cc:347
string toString(long long a)
Definition: StringOp.cc:156
void operator()(T t, char *&p) const
Definition: StringOp.cc:122
char front() const
Definition: string_ref.hh:66
Builder & operator<<(const std::string &t)
Definition: StringOp.cc:28
void pop_back()
Definition: string_ref.hh:78
void trimLeft(string &str, const char *chars)
Definition: StringOp.cc:291
bool starts_with(string_ref x) const
Definition: string_ref.cc:138
size_type find_last_of(string_ref s) const
Definition: string_ref.cc:123
string join(const vector< string_ref > &elems, char separator)
Definition: StringOp.cc:382
size_type find(string_ref s) const
Definition: string_ref.cc:60
set< unsigned > parseRange(string_ref str, unsigned min, unsigned max)
Definition: StringOp.cc:439
This class implements a subset of the proposal for std::string_ref (proposed for the next c++ standar...
Definition: string_ref.hh:18
size_type size() const
Definition: string_ref.hh:55
FastUnsigned< T >::type operator()(T t) const
Definition: StringOp.cc:113
const char * data() const
Definition: string_ref.hh:68
void advance(octet_iterator &it, distance_type n, octet_iterator end)
char back() const
Definition: string_ref.hh:67
void trimRight(string &str, const char *chars)
Definition: StringOp.cc:260
bool startsWith(string_ref total, string_ref part)
Definition: StringOp.cc:242
bool ends_with(string_ref x) const
Definition: string_ref.cc:148
FastUnsigned< T >::type operator()(T t) const
Definition: StringOp.cc:107
unsigned fast_stou(string_ref str)
Definition: string_ref.cc:173
static const size_type npos
Definition: string_ref.hh:26
void clear()
Definition: string_ref.hh:75
size_type find_first_of(string_ref s) const
Definition: string_ref.cc:109
void operator()(T, char *&) const
Definition: StringOp.cc:127
vector< string_ref > split(string_ref str, char chars)
Definition: StringOp.cc:370
int stringToInt(const string &str)
Definition: StringOp.cc:187
double stringToDouble(const string &str)
Definition: StringOp.cc:224
void trim(string_ref &str, string_ref chars)
Definition: StringOp.cc:312
string_ref substr(size_type pos, size_type n=npos) const
Definition: string_ref.cc:54
unsigned stringToUint(const string &str)
Definition: StringOp.cc:198
uint64_t stringToUint64(const string &str)
Definition: StringOp.cc:209
string_ref::const_iterator begin(const string_ref &x)
Definition: string_ref.hh:149
bool endsWith(string_ref total, string_ref part)
Definition: StringOp.cc:251
bool empty() const
Definition: string_ref.hh:56