openMSX
StringOp.cc
Go to the documentation of this file.
1 #include "StringOp.hh"
2 #include "MSXException.hh"
3 #include <algorithm>
4 #include <limits>
5 #include <cstdlib>
6 #include <cassert>
7 
8 using std::advance;
9 using std::equal;
10 using std::string;
11 using std::transform;
12 using std::vector;
13 using std::set;
14 
15 namespace StringOp {
16 
17 // class Builder
18 
20 {
21 }
22 
24 {
25 }
26 
27 Builder& Builder::operator<<(const std::string& t)
28 {
29  buf += t; return *this;
30 }
32 {
33  buf.append(t.data(), t.size()); return *this;
34 }
36 {
37  buf += t; return *this;
38 }
39 Builder& Builder::operator<<(unsigned char t)
40 {
41  return operator<<(unsigned(t));
42 }
43 Builder& Builder::operator<<(unsigned short t)
44 {
45  buf += toString(t); return *this;
46 }
48 {
49  buf += toString(t); return *this;
50 }
51 Builder& Builder::operator<<(unsigned long t)
52 {
53  buf += toString(t); return *this;
54 }
55 Builder& Builder::operator<<(unsigned long long t)
56 {
57  buf += toString(t); return *this;
58 }
60 {
61  buf += t; return *this;
62 }
64 {
65  buf += toString(t); return *this;
66 }
68 {
69  buf += toString(t); return *this;
70 }
72 {
73  buf += toString(t); return *this;
74 }
76 {
77  buf += toString(t); return *this;
78 }
80 {
81  buf += toString(t); return *this;
82 }
84 {
85  buf += toString(t); return *this;
86 }
87 
88 
89 // Returns a fast type that is (at least) big enough to hold the absolute value
90 // of values of the given type. (It always returns 'unsigned' except for 64-bit
91 // integers it returns unsigned long long).
92 template<typename T> struct FastUnsigned { typedef unsigned type; };
93 template<> struct FastUnsigned<long long> { typedef unsigned long long type; };
94 template<> struct FastUnsigned<unsigned long long> { typedef unsigned long long type; };
95 template<> struct FastUnsigned<long> { typedef unsigned long type; };
96 template<> struct FastUnsigned<unsigned long> { typedef unsigned long type; };
97 
98 // This does the equivalent of
99 // unsigned u = (t < 0) ? -t : t;
100 // but it avoids a compiler warning on the operations
101 // 't < 0' and '-t'
102 // when 't' is actually an unsigned type.
103 template<bool IS_SIGNED> struct AbsHelper;
104 template<> struct AbsHelper<true> {
105  template<typename T>
106  inline typename FastUnsigned<T>::type operator()(T t) const {
107  return (t < 0) ? -t : t;
108  }
109 };
110 template<> struct AbsHelper<false> {
111  template<typename T>
112  inline typename FastUnsigned<T>::type operator()(T t) const {
113  return t;
114  }
115 };
116 
117 // Does the equivalent of if (t < 0) *--p = '-';
118 // but it avoids a compiler warning on 't < 0' when 't' is an unsigned type.
119 template<bool IS_SIGNED> struct PutSign;
120 template<> struct PutSign<true> {
121  template<typename T> inline void operator()(T t, char*& p) const {
122  if (t < 0) *--p = '-';
123  }
124 };
125 template<> struct PutSign<false> {
126  template<typename T> inline void operator()(T /*t*/, char*& /*p*/) const {
127  // nothing
128  }
129 };
130 
131 // This routine is inspired by boost::lexical_cast. It's much faster than a
132 // generic version using std::stringstream. See this page for some numbers:
133 // http://www.boost.org/doc/libs/1_47_0/libs/conversion/lexical_cast.htm#performance
134 template<typename T> static inline string toStringImpl(T t)
135 {
136  static const bool IS_SIGNED = std::numeric_limits<T>::is_signed;
137  static const unsigned BUF_SIZE = 1 + std::numeric_limits<T>::digits10
138  + (IS_SIGNED ? 1 : 0);
139 
140  char buf[BUF_SIZE];
141  char* p = &buf[BUF_SIZE];
142 
143  AbsHelper<IS_SIGNED> absHelper;
144  typename FastUnsigned<T>::type a = absHelper(t);
145  do {
146  *--p = '0' + (a % 10);
147  a /= 10;
148  } while (a);
149 
150  PutSign<IS_SIGNED> putSign;
151  putSign(t, p);
152 
153  return string(p, &buf[BUF_SIZE] - p);
154 }
155 string toString(long long a) { return toStringImpl(a); }
156 string toString(unsigned long long a) { return toStringImpl(a); }
157 string toString(long a) { return toStringImpl(a); }
158 string toString(unsigned long a) { return toStringImpl(a); }
159 string toString(int a) { return toStringImpl(a); }
160 string toString(unsigned a) { return toStringImpl(a); }
161 string toString(short a) { return toStringImpl(a); }
162 string toString(unsigned short a) { return toStringImpl(a); }
163 string toString(char a) { return string(1, a); }
164 string toString(signed char a) { return string(1, a); }
165 string toString(unsigned char a) { return string(1, a); }
166 string toString(bool a) { return string(1, '0' + a); }
167 
168 static inline char hexDigit(unsigned x)
169 {
170  return (x < 10) ? ('0' + x) : ('a' + x - 10);
171 }
172 string toHexString(unsigned x, unsigned width)
173 {
174  assert((0 < width) && (width <= 8));
175 
176  char buf[8];
177  char* p = &buf[8];
178  int i = width;
179  do {
180  *--p = hexDigit(x & 15);
181  x >>= 4;
182  } while (--i);
183  return string(p, width);
184 }
185 
186 int stringToInt(const string& str)
187 {
188  return strtol(str.c_str(), nullptr, 0);
189 }
190 bool stringToInt(const string& str, int& result)
191 {
192  char* endptr;
193  result = strtol(str.c_str(), &endptr, 0);
194  return *endptr == '\0';
195 }
196 
197 unsigned stringToUint(const string& str)
198 {
199  return strtoul(str.c_str(), nullptr, 0);
200 }
201 bool stringToUint(const string& str, unsigned& result)
202 {
203  char* endptr;
204  result = strtoul(str.c_str(), &endptr, 0);
205  return *endptr == '\0';
206 }
207 
208 uint64_t stringToUint64(const string& str)
209 {
210  return strtoull(str.c_str(), nullptr, 0);
211 }
212 
214 {
215  if (str == "1") return true;
216  if ((str.size() == 4) && (strncasecmp(str.data(), "true", 4) == 0))
217  return true;
218  if ((str.size() == 3) && (strncasecmp(str.data(), "yes", 3) == 0))
219  return true;
220  return false;
221 }
222 
223 double stringToDouble(const string& str)
224 {
225  return strtod(str.c_str(), nullptr);
226 }
227 bool stringToDouble(const string& str, double& result)
228 {
229  char* endptr;
230  result = strtod(str.c_str(), &endptr);
231  return *endptr == '\0';
232 }
233 
234 string toLower(string_ref str)
235 {
236  string result = str.str();
237  transform(result.begin(), result.end(), result.begin(), ::tolower);
238  return result;
239 }
240 
242 {
243  return total.starts_with(part);
244 }
245 bool startsWith(string_ref total, char part)
246 {
247  return !total.empty() && (total.front() == part);
248 }
249 
250 bool endsWith(string_ref total, string_ref part)
251 {
252  return total.ends_with(part);
253 }
254 bool endsWith(string_ref total, char part)
255 {
256  return !total.empty() && (total.back() == part);
257 }
258 
259 void trimRight(string& str, const char* chars)
260 {
261  auto pos = str.find_last_not_of(chars);
262  if (pos != string::npos) {
263  str.erase(pos + 1);
264  } else {
265  str.clear();
266  }
267 }
268 void trimRight(string& str, char chars)
269 {
270  auto pos = str.find_last_not_of(chars);
271  if (pos != string::npos) {
272  str.erase(pos + 1);
273  } else {
274  str.clear();
275  }
276 }
277 void trimRight(string_ref& str, string_ref chars)
278 {
279  while (!str.empty() && (chars.find(str.back()) != string_ref::npos)) {
280  str.pop_back();
281  }
282 }
283 void trimRight(string_ref& str, char chars)
284 {
285  while (!str.empty() && (str.back() == chars)) {
286  str.pop_back();
287  }
288 }
289 void trimLeft (string& str, const char* chars)
290 {
291  str.erase(0, str.find_first_not_of(chars));
292 }
293 void trimLeft (string_ref& str, string_ref chars)
294 {
295  while (!str.empty() && (chars.find(str[0]) != string_ref::npos)) {
296  str.pop_front();
297  }
298 }
299 
300 void splitOnFirst(string_ref str, string_ref chars, string_ref& first, string_ref& last)
301 {
302  auto pos = str.find_first_of(chars);
303  if (pos == string_ref::npos) {
304  first = str;
305  last.clear();
306  } else {
307  first = str.substr(0, pos);
308  last = str.substr(pos + 1);
309  }
310 }
311 void splitOnFirst(string_ref str, char chars, string_ref& first, string_ref& last)
312 {
313  auto pos = str.find_first_of(chars);
314  if (pos == string_ref::npos) {
315  first = str;
316  last.clear();
317  } else {
318  first = str.substr(0, pos);
319  last = str.substr(pos + 1);
320  }
321 }
322 
323 void splitOnLast(string_ref str, string_ref chars, string_ref& first, string_ref& last)
324 {
325  auto pos = str.find_last_of(chars);
326  if (pos == string_ref::npos) {
327  first.clear();
328  last = str;
329  } else {
330  first = str.substr(0, pos);
331  last = str.substr(pos + 1);
332  }
333 }
334 void splitOnLast(string_ref str, char chars, string_ref& first, string_ref& last)
335 {
336  auto pos = str.find_last_of(chars);
337  if (pos == string_ref::npos) {
338  first.clear();
339  last = str;
340  } else {
341  first = str.substr(0, pos);
342  last = str.substr(pos + 1);
343  }
344 }
345 
346 vector<string_ref> split(string_ref str, char chars)
347 {
348  vector<string_ref> result;
349  while (!str.empty()) {
350  string_ref first, last;
351  splitOnFirst(str, chars, first, last);
352  result.push_back(first);
353  str = last;
354  }
355  return result;
356 }
357 
358 string join(const vector<string_ref>& elems, char separator)
359 {
360  if (elems.empty()) return string();
361 
362  auto it = elems.begin();
363  Builder result;
364  result << *it;
365  for (++it; it != elems.end(); ++it) {
366  result << separator;
367  result << *it;
368  }
369  return result;
370 }
371 
372 static unsigned parseNumber(string_ref str)
373 {
374  // trimRight only: strtoul can handle leading spaces
375  trimRight(str, " \t");
376  if (str.empty()) {
377  throw openmsx::MSXException("Invalid integer: empty string");
378  }
380  unsigned result = stoi(str, &idx);
381  if (idx != str.size()) {
382  throw openmsx::MSXException("Invalid integer: " + str);
383  }
384  return result;
385 }
386 
387 static void insert(unsigned x, set<unsigned>& result, unsigned min, unsigned max)
388 {
389  if ((x < min) || (x > max)) {
390  throw openmsx::MSXException("Out of range");
391  }
392  result.insert(x);
393 }
394 
395 static void parseRange2(string_ref str, set<unsigned>& result,
396  unsigned min, unsigned max)
397 {
398  // trimRight only: here we only care about all spaces
399  trimRight(str, " \t");
400  if (str.empty()) return;
401 
402  auto pos = str.find('-');
403  if (pos == string_ref::npos) {
404  insert(parseNumber(str), result, min, max);
405  } else {
406  unsigned begin = parseNumber(str.substr(0, pos));
407  unsigned end = parseNumber(str.substr(pos + 1));
408  if (end < begin) {
409  std::swap(begin, end);
410  }
411  for (unsigned i = begin; i <= end; ++i) {
412  insert(i, result, min, max);
413  }
414  }
415 }
416 
417 set<unsigned> parseRange(string_ref str, unsigned min, unsigned max)
418 {
419  set<unsigned> result;
420  while (true) {
421  auto next = str.find(',');
422  string_ref sub = (next == string_ref::npos)
423  ? str
424  : str.substr(0, next++);
425  parseRange2(sub, result, min, max);
426  if (next == string_ref::npos) break;
427  str = str.substr(next);
428  }
429  return result;
430 }
431 
432 #if defined(__APPLE__)
433 
434 std::string fromCFString(CFStringRef str)
435 {
436  // Try the quick route first.
437  const char *cstr = CFStringGetCStringPtr(str, kCFStringEncodingUTF8);
438  if (cstr) {
439  // String was already in UTF8 encoding.
440  return std::string(cstr);
441  }
442 
443  // Convert to UTF8 encoding.
444  CFIndex len = CFStringGetLength(str);
445  CFRange range = CFRangeMake(0, len);
446  CFIndex usedBufLen = 0;
447  CFStringGetBytes(
448  str, range, kCFStringEncodingUTF8, '?', false, nullptr, len, &usedBufLen);
449  UInt8 buffer[usedBufLen];
450  CFStringGetBytes(
451  str, range, kCFStringEncodingUTF8, '?', false, buffer, len, &usedBufLen);
452  return std::string(reinterpret_cast<const char *>(buffer), usedBufLen);
453 }
454 
455 #endif
456 
457 } // namespace StringOp
string toHexString(unsigned x, unsigned width)
Definition: StringOp.cc:172
bool stringToBool(string_ref str)
Definition: StringOp.cc:213
void pop_front()
Definition: string_ref.hh:79
std::string str() const
Definition: string_ref.cc:10
string toLower(string_ref str)
Definition: StringOp.cc:234
void splitOnFirst(string_ref str, string_ref chars, string_ref &first, string_ref &last)
Definition: StringOp.cc:300
void splitOnLast(string_ref str, string_ref chars, string_ref &first, string_ref &last)
Definition: StringOp.cc:323
string toString(long long a)
Definition: StringOp.cc:155
void operator()(T t, char *&p) const
Definition: StringOp.cc:121
char front() const
Definition: string_ref.hh:66
Builder & operator<<(const std::string &t)
Definition: StringOp.cc:27
void pop_back()
Definition: string_ref.hh:78
void trimLeft(string &str, const char *chars)
Definition: StringOp.cc:289
bool starts_with(string_ref x) const
Definition: string_ref.cc:136
size_type find_last_of(string_ref s) const
Definition: string_ref.cc:121
string join(const vector< string_ref > &elems, char separator)
Definition: StringOp.cc:358
size_type find(string_ref s) const
Definition: string_ref.cc:58
set< unsigned > parseRange(string_ref str, unsigned min, unsigned max)
Definition: StringOp.cc:417
This class implements a subset of the proposal for std::string_ref (proposed for the next c++ standar...
Definition: string_ref.hh:18
size_type size() const
Definition: string_ref.hh:55
FastUnsigned< T >::type operator()(T t) const
Definition: StringOp.cc:112
const char * data() const
Definition: string_ref.hh:68
void advance(octet_iterator &it, distance_type n, octet_iterator end)
size_t size_type
Definition: string_ref.hh:21
char back() const
Definition: string_ref.hh:67
void trimRight(string &str, const char *chars)
Definition: StringOp.cc:259
bool startsWith(string_ref total, string_ref part)
Definition: StringOp.cc:241
bool ends_with(string_ref x) const
Definition: string_ref.cc:142
FastUnsigned< T >::type operator()(T t) const
Definition: StringOp.cc:106
static const size_type npos
Definition: string_ref.hh:26
void clear()
Definition: string_ref.hh:75
size_type find_first_of(string_ref s) const
Definition: string_ref.cc:107
void operator()(T, char *&) const
Definition: StringOp.cc:126
vector< string_ref > split(string_ref str, char chars)
Definition: StringOp.cc:346
int stringToInt(const string &str)
Definition: StringOp.cc:186
double stringToDouble(const string &str)
Definition: StringOp.cc:223
int stoi(string_ref str, string_ref::size_type *idx, int base)
Definition: string_ref.cc:164
string_ref substr(size_type pos, size_type n=npos) const
Definition: string_ref.cc:52
unsigned stringToUint(const string &str)
Definition: StringOp.cc:197
uint64_t stringToUint64(const string &str)
Definition: StringOp.cc:208
bool endsWith(string_ref total, string_ref part)
Definition: StringOp.cc:250
bool empty() const
Definition: string_ref.hh:56