30 #ifndef UTF8_CHECKED_HH
31 #define UTF8_CHECKED_HH
44 virtual const char*
what()
const throw() {
return "Invalid code point"; }
53 virtual const char*
what()
const throw() {
return "Invalid UTF-8"; }
62 virtual const char*
what()
const throw() {
return "Invalid UTF-16"; }
69 virtual const char*
what()
const throw() {
return "Not enough space"; }
74 template <
typename octet_iterator,
typename output_iterator>
76 output_iterator out, uint32_t replacement)
78 while (start != end) {
79 auto sequence_start = start;
83 for (
auto it = sequence_start; it != start; ++it) {
108 template <
typename octet_iterator,
typename output_iterator>
115 template <
typename octet_iterator>
116 octet_iterator
append(uint32_t cp, octet_iterator result)
124 }
else if (cp < 0x800) {
126 *result++ = ((cp >> 6) ) | 0xc0;
127 *result++ = ((cp >> 0) & 0x3f) | 0x80;
128 }
else if (cp < 0x10000) {
130 *result++ = ((cp >> 12) ) | 0xe0;
131 *result++ = ((cp >> 6) & 0x3f) | 0x80;
132 *result++ = ((cp >> 0) & 0x3f) | 0x80;
135 *result++ = ((cp >> 18) ) | 0xf0;
136 *result++ = ((cp >> 12) & 0x3f) | 0x80;
137 *result++ = ((cp >> 6) & 0x3f) | 0x80;
138 *result++ = ((cp >> 0) & 0x3f) | 0x80;
145 template <
typename octet_iterator>
146 uint32_t
next(octet_iterator& it, octet_iterator end)
165 template <
typename octet_iterator>
166 uint32_t
peek_next(octet_iterator it, octet_iterator end)
168 return next(it, end);
171 template <
typename octet_iterator>
172 uint32_t
prior(octet_iterator& it, octet_iterator start)
182 return next(temp, end);
185 template <
typename octet_iterator,
typename distance_type>
186 void advance(octet_iterator& it, distance_type n, octet_iterator end)
188 for (distance_type i = 0; i < n; ++i) {
193 template <
typename octet_iterator>
194 typename std::iterator_traits<octet_iterator>::difference_type
195 distance(octet_iterator first, octet_iterator last)
197 typename std::iterator_traits<octet_iterator>::difference_type dist;
198 for (dist = 0; first < last; ++dist) {
204 template <
typename u16bit_iterator,
typename octet_iterator>
205 octet_iterator
utf16to8(u16bit_iterator start, u16bit_iterator end,
206 octet_iterator result)
208 while (start != end) {
209 uint32_t cp = *start++;
215 uint32_t trail_surrogate = *start++;
222 result =
append(cp, result);
227 template <
typename u16bit_iterator,
typename octet_iterator>
228 u16bit_iterator
utf8to16(octet_iterator start, octet_iterator end,
229 u16bit_iterator result)
231 while (start != end) {
232 uint32_t cp =
next(start, end);
243 template <
typename octet_iterator,
typename u32bit_iterator>
244 octet_iterator
utf32to8(u32bit_iterator start, u32bit_iterator end,
245 octet_iterator result)
247 while (start != end) {
248 result =
append(*start++, result);
253 template <
typename octet_iterator,
typename u32bit_iterator>
254 u32bit_iterator
utf8to32(octet_iterator start, octet_iterator end,
255 u32bit_iterator result)
257 while (start < end) {
258 *result++ =
next(start, end);
264 template <
typename octet_iterator>
265 class iterator :
public std::iterator<std::bidirectional_iterator_tag, uint32_t>
268 octet_iterator range_start;
269 octet_iterator range_end;
273 const octet_iterator& range_start,
274 const octet_iterator& range_end)
276 , range_start(range_start)
277 , range_end(range_end)
279 if (it < range_start || it > range_end) {
280 throw std::out_of_range(
"Invalid utf-8 iterator position");
284 octet_iterator
base()
const {
return it; }
288 return next(temp, range_end);
292 if ((range_start != rhs.range_start) ||
293 (range_end != rhs.range_end)) {
294 throw std::logic_error(
295 "Comparing utf-8 iterators defined with different ranges");
316 prior(it, range_start);
322 prior(it, range_start);
328 std::string unknowntoutf8(
const std::string& unknown);
329 std::string utf8toansi(
const std::string& utf8);
330 std::wstring
utf8to16(
const std::string& utf8);
331 std::string
utf16to8(
const std::wstring& utf16);