string-converter.gi (4766B)
1 /* -*- C++ -*- vim: set syntax=cpp: 2 * PURPOSE: 3 * 4 * Generate string converter functions which convert a string from one 5 * character codec into another. The conversion is implemented by means 6 * of a character converter function given by: 7 * 8 * QUEX_CONVERTER_CHAR(FROM, TO)(in, out); 9 * 10 * which converts only a single character. The converter function must 11 * be defined before the inclusion of this file. 12 * 13 * PARAMETERS (must be defined macros): 14 * 15 * __QUEX_FROM -- Name of the source character encoding. 16 * __QUEX_FROM_TYPE -- Type of characters that carry the source. 17 * __QUEX_TO_MAX_LENGTH -- Maximum number of 'chunks' which a character 18 * may occupy. A 'chunk' is a 'unit' in which 19 * a codec is interpreted. See below for 20 * the definition of chunks per codec. 21 * 22 * of a code element (== sizeof(__QUEX_FROM_TYPE)). 23 * This is important to maintain a safety margin. 24 * __QUEX_TO -- Name of the target encoding. 25 * __QUEX_TO_TYPE -- Type of characters that carry the drain. 26 * 27 * (C) 2010-2012 Frank-Rene Schaefer 28 * ABSOLUTELY NO WARRANTY */ 29 30 #if ! defined(__QUEX_FROM) 31 # error "__QUEX_FROM must be defined!" 32 #elif ! defined(__QUEX_FROM_TYPE) 33 # error "__QUEX_FROM_TYPE must be defined!" 34 #elif ! defined(__QUEX_TO_TYPE) 35 # error "__QUEX_TO_TYPE must be defined!" 36 #elif ! defined(__QUEX_TO) 37 # error "__QUEX_TO must be defined!" 38 #endif 39 40 /* UTF8 element = 1 byte. UCS character range => max. 4 chunks / char. */ 41 #define __QUEX_TO_MAX_LENGTH_utf8 4 42 /* UTF16 element = 2 bytes. Max. 2 chunks per character. */ 43 #define __QUEX_TO_MAX_LENGTH_utf16 2 44 /* UTF32 element = 4 bytes. Always 1 chunk per character. */ 45 #define __QUEX_TO_MAX_LENGTH_utf32 1 46 /* Assume the worst case for 'char' and 'wchar_t': Both are encoded in 47 * UTF8 (!?). Thus, we would need 4 elements per character for UCS. */ 48 #define __QUEX_TO_MAX_LENGTH_char __QUEX_TO_MAX_LENGTH_utf8 49 #define __QUEX_TO_MAX_LENGTH_wchar __QUEX_TO_MAX_LENGTH_utf8 50 51 /* Define max. length in terms of the given output codec. */ 52 #define ____QUEX_TO_MAX_LENGTH(X) __QUEX_TO_MAX_LENGTH_ ## X 53 #define __QUEX_TO_MAX_LENGTH(X) ____QUEX_TO_MAX_LENGTH(X) 54 55 QUEX_INLINE void 56 QUEX_CONVERTER_STRING_DEF(__QUEX_FROM, __QUEX_TO)(const __QUEX_FROM_TYPE** source_pp, 57 const __QUEX_FROM_TYPE* SourceEnd, 58 __QUEX_TO_TYPE** drain_pp, 59 const __QUEX_TO_TYPE* DrainEnd) 60 { 61 const __QUEX_FROM_TYPE* source_iterator; 62 __QUEX_TO_TYPE* drain_iterator; 63 64 __quex_assert(source_pp != 0x0); 65 __quex_assert(*source_pp != 0x0); 66 __quex_assert(drain_pp != 0x0); 67 __quex_assert(*drain_pp != 0x0); 68 69 drain_iterator = *drain_pp; 70 source_iterator = *source_pp; 71 72 while( 1 + 1 == 2 ) { 73 if( source_iterator == SourceEnd ) break; 74 if( DrainEnd - drain_iterator < (ptrdiff_t)__QUEX_TO_MAX_LENGTH(__QUEX_TO) ) break; 75 QUEX_CONVERTER_CHAR(__QUEX_FROM, __QUEX_TO)(&source_iterator, &drain_iterator); 76 __quex_assert(source_iterator > *source_pp); 77 __quex_assert(source_iterator <= SourceEnd); 78 } 79 80 *drain_pp = drain_iterator; 81 *source_pp = source_iterator; 82 } 83 84 #if ! defined(__QUEX_OPTION_PLAIN_C) 85 QUEX_INLINE std::basic_string<__QUEX_TO_TYPE> 86 QUEX_CONVERTER_STRING_DEF(__QUEX_FROM, __QUEX_TO)(const std::basic_string<__QUEX_FROM_TYPE>& Source) 87 { 88 const __QUEX_FROM_TYPE* source_iterator = (__QUEX_FROM_TYPE*)Source.c_str(); 89 const __QUEX_FROM_TYPE* source_end = source_iterator + Source.length(); 90 __QUEX_TO_TYPE drain[__QUEX_TO_MAX_LENGTH(__QUEX_TO) + 1]; 91 __QUEX_TO_TYPE* drain_iterator = 0; 92 std::basic_string<__QUEX_TO_TYPE> result; 93 94 while( source_iterator != source_end ) { 95 drain_iterator = drain; 96 QUEX_CONVERTER_CHAR(__QUEX_FROM, __QUEX_TO)(&source_iterator, &drain_iterator); 97 __quex_assert(source_iterator > (__QUEX_FROM_TYPE*)Source.c_str()); 98 __quex_assert(source_iterator <= source_end); 99 result.append((__QUEX_TO_TYPE*)drain, (size_t)(drain_iterator - drain)); 100 } 101 return result; 102 } 103 #endif 104 105 #undef __QUEX_TO 106 #undef __QUEX_TO_TYPE 107 #undef __QUEX_TO_MAX_LENGTH_utf8 108 #undef __QUEX_TO_MAX_LENGTH_utf16 109 #undef __QUEX_TO_MAX_LENGTH_utf32 110 #undef ____QUEX_TO_MAX_LENGTH 111 #undef __QUEX_TO_MAX_LENGTH