openMSX
StringOp.cc
Go to the documentation of this file.
1 #include "StringOp.hh"
2 #include "MSXException.hh"
3 #include <algorithm>
4 #include <limits>
5 #include <cstdlib>
6 #include <cassert>
7 
8 using std::advance;
9 using std::equal;
10 using std::string;
11 using std::transform;
12 using std::vector;
13 using std::set;
14 
15 namespace StringOp {
16 
17 // class Builder
18 
20 {
21 }
22 
24 {
25 }
26 
27 Builder& Builder::operator<<(const std::string& t)
28 {
29  buf += t; return *this;
30 }
32 {
33  buf.append(t.data(), t.size()); return *this;
34 }
36 {
37  buf += t; return *this;
38 }
39 Builder& Builder::operator<<(unsigned char t)
40 {
41  return operator<<(unsigned(t));
42 }
43 Builder& Builder::operator<<(unsigned short t)
44 {
45  buf += toString(t); return *this;
46 }
48 {
49  buf += toString(t); return *this;
50 }
51 Builder& Builder::operator<<(unsigned long t)
52 {
53  buf += toString(t); return *this;
54 }
55 Builder& Builder::operator<<(unsigned long long t)
56 {
57  buf += toString(t); return *this;
58 }
60 {
61  buf += t; return *this;
62 }
64 {
65  buf += toString(t); return *this;
66 }
68 {
69  buf += toString(t); return *this;
70 }
72 {
73  buf += toString(t); return *this;
74 }
76 {
77  buf += toString(t); return *this;
78 }
80 {
81  buf += toString(t); return *this;
82 }
84 {
85  buf += toString(t); return *this;
86 }
87 
88 
89 // Returns a fast type that is (at least) big enough to hold the absolute value
90 // of values of the given type. (It always returns 'unsigned' except for 64-bit
91 // integers it returns unsigned long long).
92 template<typename T> struct FastUnsigned { typedef unsigned type; };
93 template<> struct FastUnsigned<long long> { typedef unsigned long long type; };
94 template<> struct FastUnsigned<unsigned long long> { typedef unsigned long long type; };
95 template<> struct FastUnsigned<long> { typedef unsigned long type; };
96 template<> struct FastUnsigned<unsigned long> { typedef unsigned long type; };
97 
98 // This does the equivalent of
99 // unsigned u = (t < 0) ? -t : t;
100 // but it avoids a compiler warning on the operations
101 // 't < 0' and '-t'
102 // when 't' is actually an unsigned type.
103 template<bool IS_SIGNED> struct AbsHelper;
104 template<> struct AbsHelper<true> {
105  template<typename T>
106  inline typename FastUnsigned<T>::type operator()(T t) const {
107  return (t < 0) ? -t : t;
108  }
109 };
110 template<> struct AbsHelper<false> {
111  template<typename T>
112  inline typename FastUnsigned<T>::type operator()(T t) const {
113  return t;
114  }
115 };
116 
117 // Does the equivalent of if (t < 0) *--p = '-';
118 // but it avoids a compiler warning on 't < 0' when 't' is an unsigned type.
119 template<bool IS_SIGNED> struct PutSign;
120 template<> struct PutSign<true> {
121  template<typename T> inline void operator()(T t, char*& p) const {
122  if (t < 0) *--p = '-';
123  }
124 };
125 template<> struct PutSign<false> {
126  template<typename T> inline void operator()(T /*t*/, char*& /*p*/) const {
127  // nothing
128  }
129 };
130 
131 // This routine is inspired by boost::lexical_cast. It's much faster than a
132 // generic version using std::stringstream. See this page for some numbers:
133 // http://www.boost.org/doc/libs/1_47_0/libs/conversion/lexical_cast.htm#performance
134 template<typename T> static inline string toStringImpl(T t)
135 {
136  static const bool IS_SIGNED = std::numeric_limits<T>::is_signed;
137  static const unsigned BUF_SIZE = 1 + std::numeric_limits<T>::digits10
138  + (IS_SIGNED ? 1 : 0);
139 
140  char buf[BUF_SIZE];
141  char* p = &buf[BUF_SIZE];
142 
143  AbsHelper<IS_SIGNED> absHelper;
144  typename FastUnsigned<T>::type a = absHelper(t);
145  do {
146  *--p = '0' + (a % 10);
147  a /= 10;
148  } while (a);
149 
150  PutSign<IS_SIGNED> putSign;
151  putSign(t, p);
152 
153  return string(p, &buf[BUF_SIZE] - p);
154 }
155 string toString(long long a) { return toStringImpl(a); }
156 string toString(unsigned long long a) { return toStringImpl(a); }
157 string toString(long a) { return toStringImpl(a); }
158 string toString(unsigned long a) { return toStringImpl(a); }
159 string toString(int a) { return toStringImpl(a); }
160 string toString(unsigned a) { return toStringImpl(a); }
161 string toString(short a) { return toStringImpl(a); }
162 string toString(unsigned short a) { return toStringImpl(a); }
163 string toString(char a) { return string(1, a); }
164 string toString(signed char a) { return string(1, a); }
165 string toString(unsigned char a) { return string(1, a); }
166 string toString(bool a) { return string(1, '0' + a); }
167 
168 static inline char hexDigit(unsigned x)
169 {
170  return (x < 10) ? ('0' + x) : ('a' + x - 10);
171 }
172 string toHexString(unsigned x, unsigned width)
173 {
174  assert((0 < width) && (width <= 8));
175 
176  char buf[8];
177  char* p = &buf[8];
178  int i = width;
179  do {
180  *--p = hexDigit(x & 15);
181  x >>= 4;
182  } while (--i);
183  return string(p, width);
184 }
185 
186 int stringToInt(const string& str)
187 {
188  return strtol(str.c_str(), nullptr, 0);
189 }
190 bool stringToInt(const string& str, int& result)
191 {
192  char* endptr;
193  result = strtol(str.c_str(), &endptr, 0);
194  return *endptr == '\0';
195 }
196 
197 unsigned stringToUint(const string& str)
198 {
199  return strtoul(str.c_str(), nullptr, 0);
200 }
201 bool stringToUint(const string& str, unsigned& result)
202 {
203  char* endptr;
204  result = strtoul(str.c_str(), &endptr, 0);
205  return *endptr == '\0';
206 }
207 
208 uint64_t stringToUint64(const string& str)
209 {
210  return strtoull(str.c_str(), nullptr, 0);
211 }
212 
214 {
215  if (str == "1") return true;
216  if ((str.size() == 4) && (strncasecmp(str.data(), "true", 4) == 0))
217  return true;
218  if ((str.size() == 3) && (strncasecmp(str.data(), "yes", 3) == 0))
219  return true;
220  return false;
221 }
222 
223 double stringToDouble(const string& str)
224 {
225  return strtod(str.c_str(), nullptr);
226 }
227 bool stringToDouble(const string& str, double& result)
228 {
229  char* endptr;
230  result = strtod(str.c_str(), &endptr);
231  return *endptr == '\0';
232 }
233 
234 string toLower(string_ref str)
235 {
236  string result = str.str();
237  transform(begin(result), end(result), begin(result), ::tolower);
238  return result;
239 }
240 
242 {
243  return total.starts_with(part);
244 }
245 bool startsWith(string_ref total, char part)
246 {
247  return !total.empty() && (total.front() == part);
248 }
249 
250 bool endsWith(string_ref total, string_ref part)
251 {
252  return total.ends_with(part);
253 }
254 bool endsWith(string_ref total, char part)
255 {
256  return !total.empty() && (total.back() == part);
257 }
258 
259 void trimRight(string& str, const char* chars)
260 {
261  auto pos = str.find_last_not_of(chars);
262  if (pos != string::npos) {
263  str.erase(pos + 1);
264  } else {
265  str.clear();
266  }
267 }
268 void trimRight(string& str, char chars)
269 {
270  auto pos = str.find_last_not_of(chars);
271  if (pos != string::npos) {
272  str.erase(pos + 1);
273  } else {
274  str.clear();
275  }
276 }
277 void trimRight(string_ref& str, string_ref chars)
278 {
279  while (!str.empty() && (chars.find(str.back()) != string_ref::npos)) {
280  str.pop_back();
281  }
282 }
283 void trimRight(string_ref& str, char chars)
284 {
285  while (!str.empty() && (str.back() == chars)) {
286  str.pop_back();
287  }
288 }
289 
290 void trimLeft(string& str, const char* chars)
291 {
292  str.erase(0, str.find_first_not_of(chars));
293 }
294 void trimLeft(string& str, char chars)
295 {
296  str.erase(0, str.find_first_not_of(chars));
297 }
298 void trimLeft(string_ref& str, string_ref chars)
299 {
300  while (!str.empty() && (chars.find(str.front()) != string_ref::npos)) {
301  str.pop_front();
302  }
303 }
304 void trimLeft(string_ref& str, char chars)
305 {
306  while (!str.empty() && (str.front() == chars)) {
307  str.pop_front();
308  }
309 }
310 
311 void splitOnFirst(string_ref str, string_ref chars, string_ref& first, string_ref& last)
312 {
313  auto pos = str.find_first_of(chars);
314  if (pos == string_ref::npos) {
315  first = str;
316  last.clear();
317  } else {
318  first = str.substr(0, pos);
319  last = str.substr(pos + 1);
320  }
321 }
322 void splitOnFirst(string_ref str, char chars, string_ref& first, string_ref& last)
323 {
324  auto pos = str.find_first_of(chars);
325  if (pos == string_ref::npos) {
326  first = str;
327  last.clear();
328  } else {
329  first = str.substr(0, pos);
330  last = str.substr(pos + 1);
331  }
332 }
333 
334 void splitOnLast(string_ref str, string_ref chars, string_ref& first, string_ref& last)
335 {
336  auto pos = str.find_last_of(chars);
337  if (pos == string_ref::npos) {
338  first.clear();
339  last = str;
340  } else {
341  first = str.substr(0, pos);
342  last = str.substr(pos + 1);
343  }
344 }
345 void splitOnLast(string_ref str, char chars, string_ref& first, string_ref& last)
346 {
347  auto pos = str.find_last_of(chars);
348  if (pos == string_ref::npos) {
349  first.clear();
350  last = str;
351  } else {
352  first = str.substr(0, pos);
353  last = str.substr(pos + 1);
354  }
355 }
356 
357 vector<string_ref> split(string_ref str, char chars)
358 {
359  vector<string_ref> result;
360  while (!str.empty()) {
361  string_ref first, last;
362  splitOnFirst(str, chars, first, last);
363  result.push_back(first);
364  str = last;
365  }
366  return result;
367 }
368 
369 string join(const vector<string_ref>& elems, char separator)
370 {
371  if (elems.empty()) return string();
372 
373  auto it = begin(elems);
374  Builder result;
375  result << *it;
376  for (++it; it != end(elems); ++it) {
377  result << separator;
378  result << *it;
379  }
380  return result;
381 }
382 
383 static unsigned parseNumber(string_ref str)
384 {
385  // trimRight only: strtoul can handle leading spaces
386  trimRight(str, " \t");
387  if (str.empty()) {
388  throw openmsx::MSXException("Invalid integer: empty string");
389  }
391  unsigned result = stoi(str, &idx);
392  if (idx != str.size()) {
393  throw openmsx::MSXException("Invalid integer: " + str);
394  }
395  return result;
396 }
397 
398 static void insert(unsigned x, set<unsigned>& result, unsigned min, unsigned max)
399 {
400  if ((x < min) || (x > max)) {
401  throw openmsx::MSXException("Out of range");
402  }
403  result.insert(x);
404 }
405 
406 static void parseRange2(string_ref str, set<unsigned>& result,
407  unsigned min, unsigned max)
408 {
409  // trimRight only: here we only care about all spaces
410  trimRight(str, " \t");
411  if (str.empty()) return;
412 
413  auto pos = str.find('-');
414  if (pos == string_ref::npos) {
415  insert(parseNumber(str), result, min, max);
416  } else {
417  unsigned begin = parseNumber(str.substr(0, pos));
418  unsigned end = parseNumber(str.substr(pos + 1));
419  if (end < begin) {
420  std::swap(begin, end);
421  }
422  for (unsigned i = begin; i <= end; ++i) {
423  insert(i, result, min, max);
424  }
425  }
426 }
427 
428 set<unsigned> parseRange(string_ref str, unsigned min, unsigned max)
429 {
430  set<unsigned> result;
431  while (true) {
432  auto next = str.find(',');
433  string_ref sub = (next == string_ref::npos)
434  ? str
435  : str.substr(0, next++);
436  parseRange2(sub, result, min, max);
437  if (next == string_ref::npos) break;
438  str = str.substr(next);
439  }
440  return result;
441 }
442 
443 #if defined(__APPLE__)
444 
445 std::string fromCFString(CFStringRef str)
446 {
447  // Try the quick route first.
448  const char *cstr = CFStringGetCStringPtr(str, kCFStringEncodingUTF8);
449  if (cstr) {
450  // String was already in UTF8 encoding.
451  return std::string(cstr);
452  }
453 
454  // Convert to UTF8 encoding.
455  CFIndex len = CFStringGetLength(str);
456  CFRange range = CFRangeMake(0, len);
457  CFIndex usedBufLen = 0;
458  CFStringGetBytes(
459  str, range, kCFStringEncodingUTF8, '?', false, nullptr, len, &usedBufLen);
460  UInt8 buffer[usedBufLen];
461  CFStringGetBytes(
462  str, range, kCFStringEncodingUTF8, '?', false, buffer, len, &usedBufLen);
463  return std::string(reinterpret_cast<const char *>(buffer), usedBufLen);
464 }
465 
466 #endif
467 
468 } // namespace StringOp
string_ref::const_iterator end(const string_ref &x)
Definition: string_ref.hh:135
string toHexString(unsigned x, unsigned width)
Definition: StringOp.cc:172
bool stringToBool(string_ref str)
Definition: StringOp.cc:213
void pop_front()
Definition: string_ref.hh:79
std::string str() const
Definition: string_ref.cc:10
string toLower(string_ref str)
Definition: StringOp.cc:234
void splitOnFirst(string_ref str, string_ref chars, string_ref &first, string_ref &last)
Definition: StringOp.cc:311
void splitOnLast(string_ref str, string_ref chars, string_ref &first, string_ref &last)
Definition: StringOp.cc:334
string toString(long long a)
Definition: StringOp.cc:155
void operator()(T t, char *&p) const
Definition: StringOp.cc:121
char front() const
Definition: string_ref.hh:66
Builder & operator<<(const std::string &t)
Definition: StringOp.cc:27
void pop_back()
Definition: string_ref.hh:78
void trimLeft(string &str, const char *chars)
Definition: StringOp.cc:290
bool starts_with(string_ref x) const
Definition: string_ref.cc:136
size_type find_last_of(string_ref s) const
Definition: string_ref.cc:121
string join(const vector< string_ref > &elems, char separator)
Definition: StringOp.cc:369
size_type find(string_ref s) const
Definition: string_ref.cc:58
set< unsigned > parseRange(string_ref str, unsigned min, unsigned max)
Definition: StringOp.cc:428
This class implements a subset of the proposal for std::string_ref (proposed for the next c++ standar...
Definition: string_ref.hh:18
size_type size() const
Definition: string_ref.hh:55
FastUnsigned< T >::type operator()(T t) const
Definition: StringOp.cc:112
const char * data() const
Definition: string_ref.hh:68
void advance(octet_iterator &it, distance_type n, octet_iterator end)
size_t size_type
Definition: string_ref.hh:21
char back() const
Definition: string_ref.hh:67
void trimRight(string &str, const char *chars)
Definition: StringOp.cc:259
bool startsWith(string_ref total, string_ref part)
Definition: StringOp.cc:241
bool ends_with(string_ref x) const
Definition: string_ref.cc:142
FastUnsigned< T >::type operator()(T t) const
Definition: StringOp.cc:106
static const size_type npos
Definition: string_ref.hh:26
void clear()
Definition: string_ref.hh:75
size_type find_first_of(string_ref s) const
Definition: string_ref.cc:107
void operator()(T, char *&) const
Definition: StringOp.cc:126
vector< string_ref > split(string_ref str, char chars)
Definition: StringOp.cc:357
int stringToInt(const string &str)
Definition: StringOp.cc:186
double stringToDouble(const string &str)
Definition: StringOp.cc:223
int stoi(string_ref str, string_ref::size_type *idx, int base)
Definition: string_ref.cc:164
string_ref substr(size_type pos, size_type n=npos) const
Definition: string_ref.cc:52
unsigned stringToUint(const string &str)
Definition: StringOp.cc:197
uint64_t stringToUint64(const string &str)
Definition: StringOp.cc:208
string_ref::const_iterator begin(const string_ref &x)
Definition: string_ref.hh:134
bool endsWith(string_ref total, string_ref part)
Definition: StringOp.cc:250
bool empty() const
Definition: string_ref.hh:56