from-utf32.i (4959B)
1 /* -*- C++ -*- vim: set syntax=cpp: 2 * PURPOSE: 3 * 4 * Provide the implementation of character and string converter functions 5 * FROM utf32 to utf8, utf16, utf32, char, and wchar_t. 6 * 7 * STEPS: 8 * 9 * (1) Include the implementation of the character converters from utf32 10 * to utf8, utf16, utf32, char, and wchar_t. 11 * 12 * Use: "character-converter/from-utf32.i" 13 * --> implementation for utf32 14 * 15 * "../generator/character-converter-char-wchar_t.gi" 16 * --> route 'char' and 'wchar_t' conversion to 17 * one of the converters defined before. 18 * 19 * (2) Generate the implementation of the string converters in terms 20 * of those character converters. 21 * 22 * Use: "../generator/implementation-string-converters.gi" 23 * 24 * which uses 25 * 26 * "../generator/string-converter.gi" 27 * 28 * to implement each string converter from the given 29 * character converters. 30 * 31 * All functions are placed in the analyzer's namespace. 32 * 33 * 2010 (C) Frank-Rene Schaefer; 34 * ABSOLUTELY NO WARRANTY */ 35 #if ! defined(__QUEX_INCLUDE_GUARD__CONVERTER_HELPER__FROM_UTF32_I) \ 36 || defined(__QUEX_INCLUDE_GUARD__CONVERTER_HELPER__TMP_DISABLED) 37 #if ! defined(__QUEX_INCLUDE_GUARD__CONVERTER_HELPER__TMP_DISABLED) 38 # define __QUEX_INCLUDE_GUARD__CONVERTER_HELPER__FROM_UTF32_I 39 #endif 40 41 #include "from-utf32" 42 43 #define __QUEX_FROM utf32 44 #define __QUEX_FROM_TYPE uint32_t 45 46 QUEX_NAMESPACE_MAIN_OPEN 47 48 /* (1) Implement the character converters utf8, utf16, utf32. 49 * (Note, that character converters are generated into namespace 'quex'.)*/ 50 QUEX_INLINE void 51 QUEX_CONVERTER_CHAR_DEF(utf32, utf8)(const uint32_t** input_pp, 52 uint8_t** output_pp) 53 { 54 /* PURPOSE: This function converts the specified unicode character 55 * into its utf8 representation. The result is stored 56 * at the location where utf8_result points to. Thus, the 57 * user has to make sure, that enough space is allocated! 58 * 59 * NOTE: For general applicability let utf8_result point to a space 60 * of 7 bytes! This way you can store always a terminating 61 * zero after the last byte of the representation. 62 * 63 * RETURNS: Pointer to the fist position after the last character. */ 64 uint32_t Unicode = **input_pp; 65 /**/ 66 67 if (Unicode <= 0x0000007f) { 68 *((*output_pp)++) = (uint8_t)Unicode; 69 } else if (Unicode <= 0x000007ff) { 70 *((*output_pp)++) = (uint8_t)(0xC0 | (Unicode >> 6)); 71 *((*output_pp)++) = (uint8_t)(0x80 | (Unicode & (uint32_t)0x3f)); 72 } else if (Unicode <= 0x0000ffff) { 73 *((*output_pp)++) = (uint8_t)(0xE0 | Unicode >> 12); 74 *((*output_pp)++) = (uint8_t)(0x80 | (Unicode & (uint32_t)0xFFF) >> 6); 75 *((*output_pp)++) = (uint8_t)(0x80 | (Unicode & (uint32_t)0x3F)); 76 } else { 77 /* Assume that only character appear, that are defined in unicode. */ 78 __quex_assert(Unicode <= (uint32_t)0x1FFFFF); 79 /* No surrogate pairs (They are reserved even in non-utf16). */ 80 __quex_assert(! (Unicode >= 0xd800 && Unicode <= 0xdfff) ); 81 82 *((*output_pp)++) = (uint8_t)(0xF0 | Unicode >> 18); 83 *((*output_pp)++) = (uint8_t)(0x80 | (Unicode & (uint32_t)0x3FFFF) >> 12); 84 *((*output_pp)++) = (uint8_t)(0x80 | (Unicode & (uint32_t)0xFFF) >> 6); 85 *((*output_pp)++) = (uint8_t)(0x80 | (Unicode & (uint32_t)0x3F)); 86 } 87 /* NOTE: Do not check here for forbitten UTF-8 characters. 88 * They cannot appear here because we do proper conversion. */ 89 ++(*input_pp); 90 } 91 92 QUEX_INLINE void 93 QUEX_CONVERTER_CHAR_DEF(utf32, utf16)(const uint32_t** input_pp, 94 uint16_t** output_pp) 95 { 96 uint32_t tmp = 0; 97 98 if( **input_pp < 0x10000 ) { 99 *((*output_pp)++) = (uint16_t)**input_pp; 100 } else { 101 tmp = (uint32_t)(**input_pp - (uint32_t)0x10000); 102 103 *(((*output_pp)++)) = (uint16_t)((tmp >> 10) | (uint16_t)0xD800); 104 *(((*output_pp)++)) = (uint16_t)((tmp & (uint32_t)0x3FF) | (uint16_t)0xDC00); 105 } 106 ++(*input_pp); 107 } 108 109 QUEX_INLINE void 110 QUEX_CONVERTER_CHAR_DEF(utf32, utf32)(const uint32_t** input_pp, 111 uint32_t** output_pp) 112 { 113 *((*output_pp)++) = (uint32_t)(*(*input_pp)++); 114 } 115 116 /* (1b) Derive converters to char and wchar_t from the given set 117 * of converters. (Generator uses __QUEX_FROM and QUEX_FROM_TYPE) */ 118 #include "character-converter-to-char-wchar_t.gi" 119 120 /* (2) Generate string converters to utf8, utf16, utf32 based on the 121 * definitions of the character converters. */ 122 #include "implementations.gi" 123 124 QUEX_NAMESPACE_MAIN_CLOSE 125 126 #endif /* __QUEX_INCLUDE_GUARD__CONVERTER_HELPER__FROM_UTF32_I */