encodings.h Source File

encodings.h Source File#

Composable Kernel: encodings.h Source File
encodings.h
Go to the documentation of this file.
1// Tencent is pleased to support the open source community by making RapidJSON available.
2//
3// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
4//
5// Licensed under the MIT License (the "License"); you may not use this file except
6// in compliance with the License. You may obtain a copy of the License at
7//
8// http://opensource.org/licenses/MIT
9//
10// Unless required by applicable law or agreed to in writing, software distributed
11// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13// specific language governing permissions and limitations under the License.
14
15#ifndef RAPIDJSON_ENCODINGS_H_
16#define RAPIDJSON_ENCODINGS_H_
17
18#include "rapidjson.h"
19
20#if defined(_MSC_VER) && !defined(__clang__)
21RAPIDJSON_DIAG_PUSH
22RAPIDJSON_DIAG_OFF(4244) // conversion from 'type1' to 'type2', possible loss of data
23RAPIDJSON_DIAG_OFF(4702) // unreachable code
24#elif defined(__GNUC__)
25RAPIDJSON_DIAG_PUSH
26RAPIDJSON_DIAG_OFF(effc++)
27RAPIDJSON_DIAG_OFF(overflow)
28#endif
29
31
33// Encoding
34
48 template<typename OutputStream>
49 static void Encode(OutputStream& os, unsigned codepoint);
50
55 template <typename InputStream>
56 static bool Decode(InputStream& is, unsigned* codepoint);
57
63 template <typename InputStream, typename OutputStream>
64 static bool Validate(InputStream& is, OutputStream& os);
65
66 // The following functions are deal with byte streams.
67
69 template <typename InputByteStream>
70 static CharType TakeBOM(InputByteStream& is);
71
73 template <typename InputByteStream>
74 static Ch Take(InputByteStream& is);
75
77 template <typename OutputByteStream>
78 static void PutBOM(OutputByteStream& os);
79
81 template <typename OutputByteStream>
82 static void Put(OutputByteStream& os, Ch c);
83};
84\endcode
85*/
86
88// UTF8
89
91
96template <typename CharType = char>
97struct UTF8
98{
99 typedef CharType Ch;
100
101 enum
102 {
104 };
105
106 template <typename OutputStream>
107 static void Encode(OutputStream& os, unsigned codepoint)
108 {
109 if(codepoint <= 0x7F)
110 os.Put(static_cast<Ch>(codepoint & 0xFF));
111 else if(codepoint <= 0x7FF)
112 {
113 os.Put(static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF)));
114 os.Put(static_cast<Ch>(0x80 | ((codepoint & 0x3F))));
115 }
116 else if(codepoint <= 0xFFFF)
117 {
118 os.Put(static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF)));
119 os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
120 os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
121 }
122 else
123 {
124 RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
125 os.Put(static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF)));
126 os.Put(static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F)));
127 os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
128 os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
129 }
130 }
131
132 template <typename OutputStream>
133 static void EncodeUnsafe(OutputStream& os, unsigned codepoint)
134 {
135 if(codepoint <= 0x7F)
136 PutUnsafe(os, static_cast<Ch>(codepoint & 0xFF));
137 else if(codepoint <= 0x7FF)
138 {
139 PutUnsafe(os, static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF)));
140 PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint & 0x3F))));
141 }
142 else if(codepoint <= 0xFFFF)
143 {
144 PutUnsafe(os, static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF)));
145 PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
146 PutUnsafe(os, static_cast<Ch>(0x80 | (codepoint & 0x3F)));
147 }
148 else
149 {
150 RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
151 PutUnsafe(os, static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF)));
152 PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F)));
153 PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
154 PutUnsafe(os, static_cast<Ch>(0x80 | (codepoint & 0x3F)));
155 }
156 }
157
158 template <typename InputStream>
159 static bool Decode(InputStream& is, unsigned* codepoint)
160 {
161#define RAPIDJSON_COPY() \
162 c = is.Take(); \
163 *codepoint = (*codepoint << 6) | (static_cast<unsigned char>(c) & 0x3Fu)
164#define RAPIDJSON_TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0)
165#define RAPIDJSON_TAIL() \
166 RAPIDJSON_COPY(); \
167 RAPIDJSON_TRANS(0x70)
168 typename InputStream::Ch c = is.Take();
169 if(!(c & 0x80))
170 {
171 *codepoint = static_cast<unsigned char>(c);
172 return true;
173 }
174
175 unsigned char type = GetRange(static_cast<unsigned char>(c));
176 if(type >= 32)
177 {
178 *codepoint = 0;
179 }
180 else
181 {
182 *codepoint = (0xFFu >> type) & static_cast<unsigned char>(c);
183 }
184 bool result = true;
185 switch(type)
186 {
187 case 2: RAPIDJSON_TAIL(); return result;
188 case 3:
191 return result;
192 case 4:
194 RAPIDJSON_TRANS(0x50);
196 return result;
197 case 5:
199 RAPIDJSON_TRANS(0x10);
202 return result;
203 case 6:
207 return result;
208 case 10:
210 RAPIDJSON_TRANS(0x20);
212 return result;
213 case 11:
215 RAPIDJSON_TRANS(0x60);
218 return result;
219 default: return false;
220 }
221#undef RAPIDJSON_COPY
222#undef RAPIDJSON_TRANS
223#undef RAPIDJSON_TAIL
224 }
225
226 template <typename InputStream, typename OutputStream>
227 static bool Validate(InputStream& is, OutputStream& os)
228 {
229#define RAPIDJSON_COPY() \
230 if(c != '\0') \
231 os.Put(c = is.Take())
232#define RAPIDJSON_TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0)
233#define RAPIDJSON_TAIL() \
234 RAPIDJSON_COPY(); \
235 RAPIDJSON_TRANS(0x70)
236 Ch c = static_cast<Ch>(-1);
238 if(!(c & 0x80))
239 return true;
240
241 bool result = true;
242 switch(GetRange(static_cast<unsigned char>(c)))
243 {
244 case 2: RAPIDJSON_TAIL(); return result;
245 case 3:
248 return result;
249 case 4:
251 RAPIDJSON_TRANS(0x50);
253 return result;
254 case 5:
256 RAPIDJSON_TRANS(0x10);
259 return result;
260 case 6:
264 return result;
265 case 10:
267 RAPIDJSON_TRANS(0x20);
269 return result;
270 case 11:
272 RAPIDJSON_TRANS(0x60);
275 return result;
276 default: return false;
277 }
278#undef RAPIDJSON_COPY
279#undef RAPIDJSON_TRANS
280#undef RAPIDJSON_TAIL
281 }
282
283 static unsigned char GetRange(unsigned char c)
284 {
285 // Referring to DFA of http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
286 // With new mapping 1 -> 0x10, 7 -> 0x20, 9 -> 0x40, such that AND operation can test
287 // multiple types.
288 static const unsigned char type[] = {
289 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
290 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
291 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
292 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
293 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
294 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
295 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
296 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
297 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
298 0, 0, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
299 0x10, 0x10, 0x10, 0x10, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
300 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
301 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
302 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 8, 8, 2, 2,
303 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
304 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
305 10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4,
306 3, 3, 11, 6, 6, 6, 5, 8, 8, 8, 8, 8, 8, 8,
307 8, 8, 8, 8,
308 };
309 return type[c];
310 }
311
312 template <typename InputByteStream>
313 static CharType TakeBOM(InputByteStream& is)
314 {
315 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
316 typename InputByteStream::Ch c = Take(is);
317 if(static_cast<unsigned char>(c) != 0xEFu)
318 return c;
319 c = is.Take();
320 if(static_cast<unsigned char>(c) != 0xBBu)
321 return c;
322 c = is.Take();
323 if(static_cast<unsigned char>(c) != 0xBFu)
324 return c;
325 c = is.Take();
326 return c;
327 }
328
329 template <typename InputByteStream>
330 static Ch Take(InputByteStream& is)
331 {
332 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
333 return static_cast<Ch>(is.Take());
334 }
335
336 template <typename OutputByteStream>
337 static void PutBOM(OutputByteStream& os)
338 {
339 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
340 os.Put(static_cast<typename OutputByteStream::Ch>(0xEFu));
341 os.Put(static_cast<typename OutputByteStream::Ch>(0xBBu));
342 os.Put(static_cast<typename OutputByteStream::Ch>(0xBFu));
343 }
344
345 template <typename OutputByteStream>
346 static void Put(OutputByteStream& os, Ch c)
347 {
348 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
349 os.Put(static_cast<typename OutputByteStream::Ch>(c));
350 }
351};
352
354// UTF16
355
357
365template <typename CharType = wchar_t>
366struct UTF16
367{
368 typedef CharType Ch;
370
371 enum
372 {
374 };
375
376 template <typename OutputStream>
377 static void Encode(OutputStream& os, unsigned codepoint)
378 {
379 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2);
380 if(codepoint <= 0xFFFF)
381 {
382 RAPIDJSON_ASSERT(codepoint < 0xD800 ||
383 codepoint > 0xDFFF); // Code point itself cannot be surrogate pair
384 os.Put(static_cast<typename OutputStream::Ch>(codepoint));
385 }
386 else
387 {
388 RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
389 unsigned v = codepoint - 0x10000;
390 os.Put(static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800));
391 os.Put(static_cast<typename OutputStream::Ch>((v & 0x3FF) | 0xDC00));
392 }
393 }
394
395 template <typename OutputStream>
396 static void EncodeUnsafe(OutputStream& os, unsigned codepoint)
397 {
398 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2);
399 if(codepoint <= 0xFFFF)
400 {
401 RAPIDJSON_ASSERT(codepoint < 0xD800 ||
402 codepoint > 0xDFFF); // Code point itself cannot be surrogate pair
403 PutUnsafe(os, static_cast<typename OutputStream::Ch>(codepoint));
404 }
405 else
406 {
407 RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
408 unsigned v = codepoint - 0x10000;
409 PutUnsafe(os, static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800));
410 PutUnsafe(os, static_cast<typename OutputStream::Ch>((v & 0x3FF) | 0xDC00));
411 }
412 }
413
414 template <typename InputStream>
415 static bool Decode(InputStream& is, unsigned* codepoint)
416 {
417 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2);
418 typename InputStream::Ch c = is.Take();
419 if(c < 0xD800 || c > 0xDFFF)
420 {
421 *codepoint = static_cast<unsigned>(c);
422 return true;
423 }
424 else if(c <= 0xDBFF)
425 {
426 *codepoint = (static_cast<unsigned>(c) & 0x3FF) << 10;
427 c = is.Take();
428 *codepoint |= (static_cast<unsigned>(c) & 0x3FF);
429 *codepoint += 0x10000;
430 return c >= 0xDC00 && c <= 0xDFFF;
431 }
432 return false;
433 }
434
435 template <typename InputStream, typename OutputStream>
436 static bool Validate(InputStream& is, OutputStream& os)
437 {
438 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2);
439 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2);
440 typename InputStream::Ch c;
441 os.Put(static_cast<typename OutputStream::Ch>(c = is.Take()));
442 if(c < 0xD800 || c > 0xDFFF)
443 return true;
444 else if(c <= 0xDBFF)
445 {
446 os.Put(c = is.Take());
447 return c >= 0xDC00 && c <= 0xDFFF;
448 }
449 return false;
450 }
451};
452
454template <typename CharType = wchar_t>
455struct UTF16LE : UTF16<CharType>
456{
457 template <typename InputByteStream>
458 static CharType TakeBOM(InputByteStream& is)
459 {
460 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
461 CharType c = Take(is);
462 return static_cast<uint16_t>(c) == 0xFEFFu ? Take(is) : c;
463 }
464
465 template <typename InputByteStream>
466 static CharType Take(InputByteStream& is)
467 {
468 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
469 unsigned c = static_cast<uint8_t>(is.Take());
470 c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8;
471 return static_cast<CharType>(c);
472 }
473
474 template <typename OutputByteStream>
475 static void PutBOM(OutputByteStream& os)
476 {
477 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
478 os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
479 os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
480 }
481
482 template <typename OutputByteStream>
483 static void Put(OutputByteStream& os, CharType c)
484 {
485 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
486 os.Put(static_cast<typename OutputByteStream::Ch>(static_cast<unsigned>(c) & 0xFFu));
487 os.Put(static_cast<typename OutputByteStream::Ch>((static_cast<unsigned>(c) >> 8) & 0xFFu));
488 }
489};
490
492template <typename CharType = wchar_t>
493struct UTF16BE : UTF16<CharType>
494{
495 template <typename InputByteStream>
496 static CharType TakeBOM(InputByteStream& is)
497 {
498 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
499 CharType c = Take(is);
500 return static_cast<uint16_t>(c) == 0xFEFFu ? Take(is) : c;
501 }
502
503 template <typename InputByteStream>
504 static CharType Take(InputByteStream& is)
505 {
506 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
507 unsigned c = static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8;
508 c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take()));
509 return static_cast<CharType>(c);
510 }
511
512 template <typename OutputByteStream>
513 static void PutBOM(OutputByteStream& os)
514 {
515 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
516 os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
517 os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
518 }
519
520 template <typename OutputByteStream>
521 static void Put(OutputByteStream& os, CharType c)
522 {
523 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
524 os.Put(static_cast<typename OutputByteStream::Ch>((static_cast<unsigned>(c) >> 8) & 0xFFu));
525 os.Put(static_cast<typename OutputByteStream::Ch>(static_cast<unsigned>(c) & 0xFFu));
526 }
527};
528
530// UTF32
531
533
540template <typename CharType = unsigned>
541struct UTF32
542{
543 typedef CharType Ch;
545
546 enum
547 {
549 };
550
551 template <typename OutputStream>
552 static void Encode(OutputStream& os, unsigned codepoint)
553 {
554 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4);
555 RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
556 os.Put(codepoint);
557 }
558
559 template <typename OutputStream>
560 static void EncodeUnsafe(OutputStream& os, unsigned codepoint)
561 {
562 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4);
563 RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
564 PutUnsafe(os, codepoint);
565 }
566
567 template <typename InputStream>
568 static bool Decode(InputStream& is, unsigned* codepoint)
569 {
570 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4);
571 Ch c = is.Take();
572 *codepoint = c;
573 return c <= 0x10FFFF;
574 }
575
576 template <typename InputStream, typename OutputStream>
577 static bool Validate(InputStream& is, OutputStream& os)
578 {
579 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4);
580 Ch c;
581 os.Put(c = is.Take());
582 return c <= 0x10FFFF;
583 }
584};
585
587template <typename CharType = unsigned>
588struct UTF32LE : UTF32<CharType>
589{
590 template <typename InputByteStream>
591 static CharType TakeBOM(InputByteStream& is)
592 {
593 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
594 CharType c = Take(is);
595 return static_cast<uint32_t>(c) == 0x0000FEFFu ? Take(is) : c;
596 }
597
598 template <typename InputByteStream>
599 static CharType Take(InputByteStream& is)
600 {
601 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
602 unsigned c = static_cast<uint8_t>(is.Take());
603 c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8;
604 c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 16;
605 c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 24;
606 return static_cast<CharType>(c);
607 }
608
609 template <typename OutputByteStream>
610 static void PutBOM(OutputByteStream& os)
611 {
612 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
613 os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
614 os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
615 os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
616 os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
617 }
618
619 template <typename OutputByteStream>
620 static void Put(OutputByteStream& os, CharType c)
621 {
622 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
623 os.Put(static_cast<typename OutputByteStream::Ch>(c & 0xFFu));
624 os.Put(static_cast<typename OutputByteStream::Ch>((c >> 8) & 0xFFu));
625 os.Put(static_cast<typename OutputByteStream::Ch>((c >> 16) & 0xFFu));
626 os.Put(static_cast<typename OutputByteStream::Ch>((c >> 24) & 0xFFu));
627 }
628};
629
631template <typename CharType = unsigned>
632struct UTF32BE : UTF32<CharType>
633{
634 template <typename InputByteStream>
635 static CharType TakeBOM(InputByteStream& is)
636 {
637 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
638 CharType c = Take(is);
639 return static_cast<uint32_t>(c) == 0x0000FEFFu ? Take(is) : c;
640 }
641
642 template <typename InputByteStream>
643 static CharType Take(InputByteStream& is)
644 {
645 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
646 unsigned c = static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 24;
647 c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 16;
648 c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8;
649 c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take()));
650 return static_cast<CharType>(c);
651 }
652
653 template <typename OutputByteStream>
654 static void PutBOM(OutputByteStream& os)
655 {
656 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
657 os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
658 os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
659 os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
660 os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
661 }
662
663 template <typename OutputByteStream>
664 static void Put(OutputByteStream& os, CharType c)
665 {
666 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
667 os.Put(static_cast<typename OutputByteStream::Ch>((c >> 24) & 0xFFu));
668 os.Put(static_cast<typename OutputByteStream::Ch>((c >> 16) & 0xFFu));
669 os.Put(static_cast<typename OutputByteStream::Ch>((c >> 8) & 0xFFu));
670 os.Put(static_cast<typename OutputByteStream::Ch>(c & 0xFFu));
671 }
672};
673
675// ASCII
676
678
682template <typename CharType = char>
683struct ASCII
684{
685 typedef CharType Ch;
686
687 enum
688 {
690 };
691
692 template <typename OutputStream>
693 static void Encode(OutputStream& os, unsigned codepoint)
694 {
695 RAPIDJSON_ASSERT(codepoint <= 0x7F);
696 os.Put(static_cast<Ch>(codepoint & 0xFF));
697 }
698
699 template <typename OutputStream>
700 static void EncodeUnsafe(OutputStream& os, unsigned codepoint)
701 {
702 RAPIDJSON_ASSERT(codepoint <= 0x7F);
703 PutUnsafe(os, static_cast<Ch>(codepoint & 0xFF));
704 }
705
706 template <typename InputStream>
707 static bool Decode(InputStream& is, unsigned* codepoint)
708 {
709 uint8_t c = static_cast<uint8_t>(is.Take());
710 *codepoint = c;
711 return c <= 0X7F;
712 }
713
714 template <typename InputStream, typename OutputStream>
715 static bool Validate(InputStream& is, OutputStream& os)
716 {
717 uint8_t c = static_cast<uint8_t>(is.Take());
718 os.Put(static_cast<typename OutputStream::Ch>(c));
719 return c <= 0x7F;
720 }
721
722 template <typename InputByteStream>
723 static CharType TakeBOM(InputByteStream& is)
724 {
725 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
726 uint8_t c = static_cast<uint8_t>(Take(is));
727 return static_cast<Ch>(c);
728 }
729
730 template <typename InputByteStream>
731 static Ch Take(InputByteStream& is)
732 {
733 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
734 return static_cast<Ch>(is.Take());
735 }
736
737 template <typename OutputByteStream>
738 static void PutBOM(OutputByteStream& os)
739 {
740 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
741 (void)os;
742 }
743
744 template <typename OutputByteStream>
745 static void Put(OutputByteStream& os, Ch c)
746 {
747 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
748 os.Put(static_cast<typename OutputByteStream::Ch>(c));
749 }
750};
751
753// AutoUTF
754
757{
758 kUTF8 = 0,
763};
764
766
769template <typename CharType>
771{
772 typedef CharType Ch;
773
774 enum
775 {
777 };
778
779#define RAPIDJSON_ENCODINGS_FUNC(x) \
780 UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
781
782 template <typename OutputStream>
783 static RAPIDJSON_FORCEINLINE void Encode(OutputStream& os, unsigned codepoint)
784 {
785 typedef void (*EncodeFunc)(OutputStream&, unsigned);
786 static const EncodeFunc f[] = {RAPIDJSON_ENCODINGS_FUNC(Encode)};
787 (*f[os.GetType()])(os, codepoint);
788 }
789
790 template <typename OutputStream>
791 static RAPIDJSON_FORCEINLINE void EncodeUnsafe(OutputStream& os, unsigned codepoint)
792 {
793 typedef void (*EncodeFunc)(OutputStream&, unsigned);
794 static const EncodeFunc f[] = {RAPIDJSON_ENCODINGS_FUNC(EncodeUnsafe)};
795 (*f[os.GetType()])(os, codepoint);
796 }
797
798 template <typename InputStream>
799 static RAPIDJSON_FORCEINLINE bool Decode(InputStream& is, unsigned* codepoint)
800 {
801 typedef bool (*DecodeFunc)(InputStream&, unsigned*);
802 static const DecodeFunc f[] = {RAPIDJSON_ENCODINGS_FUNC(Decode)};
803 return (*f[is.GetType()])(is, codepoint);
804 }
805
806 template <typename InputStream, typename OutputStream>
807 static RAPIDJSON_FORCEINLINE bool Validate(InputStream& is, OutputStream& os)
808 {
809 typedef bool (*ValidateFunc)(InputStream&, OutputStream&);
810 static const ValidateFunc f[] = {RAPIDJSON_ENCODINGS_FUNC(Validate)};
811 return (*f[is.GetType()])(is, os);
812 }
813
814#undef RAPIDJSON_ENCODINGS_FUNC
815};
816
818// Transcoder
819
821template <typename SourceEncoding, typename TargetEncoding>
823{
826 template <typename InputStream, typename OutputStream>
827 static RAPIDJSON_FORCEINLINE bool Transcode(InputStream& is, OutputStream& os)
828 {
829 unsigned codepoint;
830 if(!SourceEncoding::Decode(is, &codepoint))
831 return false;
832 TargetEncoding::Encode(os, codepoint);
833 return true;
834 }
835
836 template <typename InputStream, typename OutputStream>
837 static RAPIDJSON_FORCEINLINE bool TranscodeUnsafe(InputStream& is, OutputStream& os)
838 {
839 unsigned codepoint;
840 if(!SourceEncoding::Decode(is, &codepoint))
841 return false;
842 TargetEncoding::EncodeUnsafe(os, codepoint);
843 return true;
844 }
845
847 template <typename InputStream, typename OutputStream>
848 static RAPIDJSON_FORCEINLINE bool Validate(InputStream& is, OutputStream& os)
849 {
850 return Transcode(is, os); // Since source/target encoding is different, must transcode.
851 }
852};
853
854// Forward declaration.
855template <typename Stream>
856inline void PutUnsafe(Stream& stream, typename Stream::Ch c);
857
859template <typename Encoding>
861{
862 template <typename InputStream, typename OutputStream>
863 static RAPIDJSON_FORCEINLINE bool Transcode(InputStream& is, OutputStream& os)
864 {
865 os.Put(is.Take()); // Just copy one code unit. This semantic is different from primary
866 // template class.
867 return true;
868 }
869
870 template <typename InputStream, typename OutputStream>
871 static RAPIDJSON_FORCEINLINE bool TranscodeUnsafe(InputStream& is, OutputStream& os)
872 {
873 PutUnsafe(os, is.Take()); // Just copy one code unit. This semantic is different from
874 // primary template class.
875 return true;
876 }
877
878 template <typename InputStream, typename OutputStream>
879 static RAPIDJSON_FORCEINLINE bool Validate(InputStream& is, OutputStream& os)
880 {
881 return Encoding::Validate(is, os); // source/target encoding are the same
882 }
883};
884
886
887#if defined(__GNUC__) || (defined(_MSC_VER) && !defined(__clang__))
888RAPIDJSON_DIAG_POP
889#endif
890
891#endif // RAPIDJSON_ENCODINGS_H_
Concept for encoding of Unicode characters.
Concept for reading and writing characters.
#define RAPIDJSON_COPY()
#define RAPIDJSON_ENCODINGS_FUNC(x)
Definition encodings.h:779
#define RAPIDJSON_TAIL()
UTFType
Runtime-specified UTF encoding type of a stream.
Definition encodings.h:757
@ kUTF32BE
UTF-32 big endian.
Definition encodings.h:762
@ kUTF16BE
UTF-16 big endian.
Definition encodings.h:760
@ kUTF8
UTF-8.
Definition encodings.h:758
@ kUTF32LE
UTF-32 little endian.
Definition encodings.h:761
@ kUTF16LE
UTF-16 little endian.
Definition encodings.h:759
void PutUnsafe(Stream &stream, typename Stream::Ch c)
Write character to a stream, presuming buffer is reserved.
Definition stream.h:96
#define RAPIDJSON_TRANS(mask)
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition rapidjson.h:451
#define RAPIDJSON_NAMESPACE_BEGIN
provide custom rapidjson namespace (opening expression)
Definition rapidjson.h:121
#define RAPIDJSON_NAMESPACE_END
provide custom rapidjson namespace (closing expression)
Definition rapidjson.h:124
common definitions and configuration
#define RAPIDJSON_STATIC_ASSERT(x)
(Internal) macro to check for conditions at compile-time
Definition rapidjson.h:500
unsigned short uint16_t
Definition stdint.h:125
unsigned int uint32_t
Definition stdint.h:126
unsigned char uint8_t
Definition stdint.h:124
ASCII encoding.
Definition encodings.h:684
CharType Ch
Definition encodings.h:685
static void Put(OutputByteStream &os, Ch c)
Definition encodings.h:745
static void PutBOM(OutputByteStream &os)
Definition encodings.h:738
static bool Validate(InputStream &is, OutputStream &os)
Definition encodings.h:715
static bool Decode(InputStream &is, unsigned *codepoint)
Definition encodings.h:707
@ supportUnicode
Definition encodings.h:689
static CharType TakeBOM(InputByteStream &is)
Definition encodings.h:723
static Ch Take(InputByteStream &is)
Definition encodings.h:731
static void Encode(OutputStream &os, unsigned codepoint)
Definition encodings.h:693
static void EncodeUnsafe(OutputStream &os, unsigned codepoint)
Definition encodings.h:700
Dynamically select encoding according to stream's runtime-specified UTF encoding type.
Definition encodings.h:771
static RAPIDJSON_FORCEINLINE void EncodeUnsafe(OutputStream &os, unsigned codepoint)
Definition encodings.h:791
CharType Ch
Definition encodings.h:772
@ supportUnicode
Definition encodings.h:776
static RAPIDJSON_FORCEINLINE bool Validate(InputStream &is, OutputStream &os)
Definition encodings.h:807
static RAPIDJSON_FORCEINLINE void Encode(OutputStream &os, unsigned codepoint)
Definition encodings.h:783
static RAPIDJSON_FORCEINLINE bool Decode(InputStream &is, unsigned *codepoint)
Definition encodings.h:799
static RAPIDJSON_FORCEINLINE bool Validate(InputStream &is, OutputStream &os)
Definition encodings.h:879
static RAPIDJSON_FORCEINLINE bool Transcode(InputStream &is, OutputStream &os)
Definition encodings.h:863
static RAPIDJSON_FORCEINLINE bool TranscodeUnsafe(InputStream &is, OutputStream &os)
Definition encodings.h:871
Encoding conversion.
Definition encodings.h:823
static RAPIDJSON_FORCEINLINE bool Transcode(InputStream &is, OutputStream &os)
Definition encodings.h:827
static RAPIDJSON_FORCEINLINE bool TranscodeUnsafe(InputStream &is, OutputStream &os)
Definition encodings.h:837
static RAPIDJSON_FORCEINLINE bool Validate(InputStream &is, OutputStream &os)
Validate one Unicode codepoint from an encoded stream.
Definition encodings.h:848
UTF-16 big endian encoding.
Definition encodings.h:494
static CharType TakeBOM(InputByteStream &is)
Definition encodings.h:496
static CharType Take(InputByteStream &is)
Definition encodings.h:504
static void Put(OutputByteStream &os, CharType c)
Definition encodings.h:521
static void PutBOM(OutputByteStream &os)
Definition encodings.h:513
UTF-16 encoding.
Definition encodings.h:367
RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >=2)
static bool Decode(InputStream &is, unsigned *codepoint)
Definition encodings.h:415
static bool Validate(InputStream &is, OutputStream &os)
Definition encodings.h:436
CharType Ch
Definition encodings.h:368
@ supportUnicode
Definition encodings.h:373
static void Encode(OutputStream &os, unsigned codepoint)
Definition encodings.h:377
static void EncodeUnsafe(OutputStream &os, unsigned codepoint)
Definition encodings.h:396
UTF-16 little endian encoding.
Definition encodings.h:456
static CharType Take(InputByteStream &is)
Definition encodings.h:466
static void PutBOM(OutputByteStream &os)
Definition encodings.h:475
static CharType TakeBOM(InputByteStream &is)
Definition encodings.h:458
static void Put(OutputByteStream &os, CharType c)
Definition encodings.h:483
UTF-32 big endian encoding.
Definition encodings.h:633
static CharType TakeBOM(InputByteStream &is)
Definition encodings.h:635
static void PutBOM(OutputByteStream &os)
Definition encodings.h:654
static CharType Take(InputByteStream &is)
Definition encodings.h:643
static void Put(OutputByteStream &os, CharType c)
Definition encodings.h:664
UTF-32 encoding.
Definition encodings.h:542
static void Encode(OutputStream &os, unsigned codepoint)
Definition encodings.h:552
static bool Decode(InputStream &is, unsigned *codepoint)
Definition encodings.h:568
static bool Validate(InputStream &is, OutputStream &os)
Definition encodings.h:577
@ supportUnicode
Definition encodings.h:548
RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >=4)
CharType Ch
Definition encodings.h:543
static void EncodeUnsafe(OutputStream &os, unsigned codepoint)
Definition encodings.h:560
UTF-32 little endian enocoding.
Definition encodings.h:589
static void Put(OutputByteStream &os, CharType c)
Definition encodings.h:620
static CharType TakeBOM(InputByteStream &is)
Definition encodings.h:591
static void PutBOM(OutputByteStream &os)
Definition encodings.h:610
static CharType Take(InputByteStream &is)
Definition encodings.h:599
UTF-8 encoding.
Definition encodings.h:98
static bool Decode(InputStream &is, unsigned *codepoint)
Definition encodings.h:159
static CharType TakeBOM(InputByteStream &is)
Definition encodings.h:313
@ supportUnicode
Definition encodings.h:103
static Ch Take(InputByteStream &is)
Definition encodings.h:330
static void PutBOM(OutputByteStream &os)
Definition encodings.h:337
CharType Ch
Definition encodings.h:99
static bool Validate(InputStream &is, OutputStream &os)
Definition encodings.h:227
static void EncodeUnsafe(OutputStream &os, unsigned codepoint)
Definition encodings.h:133
static void Put(OutputByteStream &os, Ch c)
Definition encodings.h:346
static unsigned char GetRange(unsigned char c)
Definition encodings.h:283
static void Encode(OutputStream &os, unsigned codepoint)
Definition encodings.h:107