sheepy

build system (sheepy) and package manager (spm) for C
git clone https://spartatek.se/git/sheepy.git
Log | Files | Refs | README | LICENSE

from-utf16.i (5067B)


      1 /* -*- C++ -*- vim: set syntax=cpp:
      2  * PURPOSE: 
      3  *
      4  * Provide the implementation of character and string converter functions
      5  * FROM utf16 to utf8, utf16, utf32, char, and wchar_t.
      6  *
      7  * STEPS:
      8  *
      9  * (1) Include the implementation of the character converters from utf16 
     10  *     to utf8, utf16, utf32, char, and wchar_t.
     11  *
     12  *     Use: "character-converter/from-utf16.i"
     13  *             --> implementation for utf16
     14  *
     15  *          "../generator/character-converter-char-wchar_t.gi"
     16  *             --> route 'char' and 'wchar_t' conversion to
     17  *                 one of the converters defined before.
     18  *
     19  * (2) Generate the implementation of the string converters in terms
     20  *     of those character converters.
     21  *
     22  *     Use: "../generator/implementation-string-converters.gi"
     23  *
     24  *          which uses
     25  *
     26  *              "../generator/string-converter.gi"
     27  *
     28  *          to implement each string converter from the given 
     29  *          character converters. 
     30  *
     31  * All functions are placed in the analyzer's namespace.
     32  *
     33  * 2010 (C) Frank-Rene Schaefer; 
     34  * ABSOLUTELY NO WARRANTY                                                    */
     35 #if    ! defined(__QUEX_INCLUDE_GUARD__CONVERTER_HELPER__FROM_UTF16_I) \
     36     ||   defined(__QUEX_INCLUDE_GUARD__CONVERTER_HELPER__TMP_DISABLED)
     37 #if    ! defined(__QUEX_INCLUDE_GUARD__CONVERTER_HELPER__TMP_DISABLED)
     38 #        define  __QUEX_INCLUDE_GUARD__CONVERTER_HELPER__FROM_UTF16_I
     39 #endif
     40 
     41 #include "from-utf16"
     42 
     43 #define __QUEX_FROM       utf16
     44 #define __QUEX_FROM_TYPE  uint16_t
     45 
     46 /* (1) Implement the character converters utf8, utf16, utf32.
     47  *     (Note, that character converters are generated into namespace 'quex'.)*/
     48 QUEX_NAMESPACE_MAIN_OPEN
     49 
     50 QUEX_INLINE void
     51 QUEX_CONVERTER_CHAR_DEF(utf16, utf8)(const uint16_t** input_pp, uint8_t** output_pp)
     52 {
     53     uint32_t  x0      = (uint16_t)0;
     54     uint32_t  x1      = (uint16_t)0;
     55     uint32_t  unicode = (uint32_t)0;
     56 
     57     if ( **input_pp <= (uint16_t)0x7f ) {
     58         *((*output_pp)++) = (uint8_t)*(*input_pp);
     59         ++(*input_pp);
     60 
     61     } else if ( **input_pp <= (uint16_t)0x7ff ) {
     62         *((*output_pp)++) = (uint8_t)(0xC0 | (*(*input_pp) >> 6)); 
     63         *((*output_pp)++) = (uint8_t)(0x80 | (*(*input_pp) & (uint16_t)0x3F));
     64         ++(*input_pp);
     65 
     66     } else if ( **input_pp < (uint16_t)0xD800 ) { 
     67         *((*output_pp)++) = (uint8_t)(0xE0 |  *(*input_pp)                    >> 12);
     68         *((*output_pp)++) = (uint8_t)(0x80 | (*(*input_pp) & (uint16_t)0xFFF) >> 6);
     69         *((*output_pp)++) = (uint8_t)(0x80 | (*(*input_pp) & (uint16_t)0x3F));
     70         ++(*input_pp);
     71 
     72     } else if ( **input_pp < (uint16_t)0xE000 ) { 
     73         /* Characters > 0xFFFF need to be coded in two bytes by means of surrogates. */
     74         x0 = (uint32_t)(*(*input_pp)++ - (uint32_t)0xD800);
     75         x1 = (uint32_t)(*(*input_pp)++ - (uint32_t)0xDC00);
     76         unicode = (x0 << 10) + x1 + 0x10000;
     77 
     78         /* Assume that only character appear, that are defined in unicode. */
     79         __quex_assert(unicode <= (uint16_t)0x1FFFFF);
     80 
     81         *((*output_pp)++) = (uint8_t)(0xF0 | unicode                       >> 18);
     82         *((*output_pp)++) = (uint8_t)(0x80 | (unicode & (uint32_t)0x3FFFF) >> 12);
     83         *((*output_pp)++) = (uint8_t)(0x80 | (unicode & (uint32_t)0xFFF)   >> 6);
     84         *((*output_pp)++) = (uint8_t)(0x80 | (unicode & (uint32_t)0x3F));
     85 
     86     } else { 
     87         /* Always true: **input_pp <= 0xFFFF */
     88         *((*output_pp)++) = (uint8_t)(0xE0 |  *(*input_pp)                    >> 12);
     89         *((*output_pp)++) = (uint8_t)(0x80 | (*(*input_pp) & (uint16_t)0xFFF) >> 6);
     90         *((*output_pp)++) = (uint8_t)(0x80 | (*(*input_pp) & (uint16_t)0x3F));
     91         ++(*input_pp);
     92     } 
     93 }
     94 
     95 QUEX_INLINE void
     96 QUEX_CONVERTER_CHAR_DEF(utf16, utf16)(const uint16_t**  input_pp, 
     97                                       uint16_t**        output_pp)
     98 {
     99     if( **input_pp < (uint16_t)0xD800 || **input_pp >= (uint16_t)0xE000 ) {
    100         *((*output_pp)++) = *(*input_pp)++;
    101     } else { 
    102         *((*output_pp)++) = *(*input_pp)++;
    103         *((*output_pp)++) = *(*input_pp)++;
    104     }
    105 }
    106 
    107 QUEX_INLINE void
    108 QUEX_CONVERTER_CHAR_DEF(utf16, utf32)(const uint16_t**  input_pp, 
    109                                       uint32_t**        output_pp)
    110 {
    111     uint32_t  x0 = (uint32_t)0;
    112     uint32_t  x1 = (uint32_t)0;
    113 
    114     if( **input_pp < (uint16_t)0xD800 || **input_pp >= (uint16_t)0xE000 ) {
    115         *((*output_pp)++) = *(*input_pp)++;
    116     } else { 
    117         x0 = (uint32_t)(*(*input_pp)++) - (uint32_t)0xD800;
    118         x1 = (uint32_t)(*(*input_pp)++) - (uint32_t)0xDC00;
    119         *((*output_pp)++) = (x0 << 10) + x1 + (uint32_t)0x10000;
    120     }
    121 }
    122 
    123 /* (1b) Derive converters to char and wchar_t from the given set 
    124  *      of converters. (Generator uses __QUEX_FROM and QUEX_FROM_TYPE)      */
    125 #include "character-converter-to-char-wchar_t.gi"
    126 
    127 /* (2) Generate string converters to utf8, utf16, utf32 based on the
    128  *     definitions of the character converters.                             */
    129 #include "implementations.gi"
    130 
    131 QUEX_NAMESPACE_MAIN_CLOSE
    132 
    133 #endif /* __QUEX_INCLUDE_GUARD__CONVERTER_HELPER__FROM_UTF16_I */