sheepy

build system (sheepy) and package manager (spm) for C
git clone https://spartatek.se/git/sheepy.git
Log | Files | Refs | README | LICENSE

from-utf32.i (4959B)


      1 /* -*- C++ -*- vim: set syntax=cpp:
      2  * PURPOSE: 
      3  *
      4  * Provide the implementation of character and string converter functions
      5  * FROM utf32 to utf8, utf16, utf32, char, and wchar_t.
      6  *
      7  * STEPS:
      8  *
      9  * (1) Include the implementation of the character converters from utf32 
     10  *     to utf8, utf16, utf32, char, and wchar_t.
     11  *
     12  *     Use: "character-converter/from-utf32.i"
     13  *             --> implementation for utf32
     14  *
     15  *          "../generator/character-converter-char-wchar_t.gi"
     16  *             --> route 'char' and 'wchar_t' conversion to
     17  *                 one of the converters defined before.
     18  *
     19  * (2) Generate the implementation of the string converters in terms
     20  *     of those character converters.
     21  *
     22  *     Use: "../generator/implementation-string-converters.gi"
     23  *
     24  *          which uses
     25  *
     26  *              "../generator/string-converter.gi"
     27  *
     28  *          to implement each string converter from the given 
     29  *          character converters. 
     30  *
     31  * All functions are placed in the analyzer's namespace.
     32  *
     33  * 2010 (C) Frank-Rene Schaefer; 
     34  * ABSOLUTELY NO WARRANTY                                                    */
     35 #if    ! defined(__QUEX_INCLUDE_GUARD__CONVERTER_HELPER__FROM_UTF32_I) \
     36     ||   defined(__QUEX_INCLUDE_GUARD__CONVERTER_HELPER__TMP_DISABLED)
     37 #if    ! defined(__QUEX_INCLUDE_GUARD__CONVERTER_HELPER__TMP_DISABLED)
     38 #        define  __QUEX_INCLUDE_GUARD__CONVERTER_HELPER__FROM_UTF32_I
     39 #endif
     40 
     41 #include "from-utf32"
     42 
     43 #define __QUEX_FROM       utf32
     44 #define __QUEX_FROM_TYPE  uint32_t
     45 
     46 QUEX_NAMESPACE_MAIN_OPEN
     47 
     48 /* (1) Implement the character converters utf8, utf16, utf32.
     49  *     (Note, that character converters are generated into namespace 'quex'.)*/
     50 QUEX_INLINE void
     51 QUEX_CONVERTER_CHAR_DEF(utf32, utf8)(const uint32_t**  input_pp, 
     52                                      uint8_t**         output_pp)
     53 {
     54     /* PURPOSE: This function converts the specified unicode character
     55      *          into its utf8 representation. The result is stored
     56      *          at the location where utf8_result points to. Thus, the
     57      *          user has to make sure, that enough space is allocated!
     58      *
     59      * NOTE:    For general applicability let utf8_result point to a space
     60      *          of 7 bytes! This way you can store always a terminating
     61      *          zero after the last byte of the representation.
     62      *
     63      * RETURNS: Pointer to the fist position after the last character.      */
     64     uint32_t  Unicode = **input_pp;
     65     /**/
     66 
     67     if (Unicode <= 0x0000007f) {
     68         *((*output_pp)++) = (uint8_t)Unicode;
     69     } else if (Unicode <= 0x000007ff) {
     70         *((*output_pp)++) = (uint8_t)(0xC0 | (Unicode >> 6)); 
     71         *((*output_pp)++) = (uint8_t)(0x80 | (Unicode & (uint32_t)0x3f));
     72     } else if (Unicode <= 0x0000ffff) {
     73         *((*output_pp)++) = (uint8_t)(0xE0 | Unicode           >> 12);
     74         *((*output_pp)++) = (uint8_t)(0x80 | (Unicode & (uint32_t)0xFFF) >> 6);
     75         *((*output_pp)++) = (uint8_t)(0x80 | (Unicode & (uint32_t)0x3F));
     76     } else { 
     77         /* Assume that only character appear, that are defined in unicode. */
     78         __quex_assert(Unicode <= (uint32_t)0x1FFFFF);
     79         /* No surrogate pairs (They are reserved even in non-utf16).       */
     80         __quex_assert(! (Unicode >= 0xd800 && Unicode <= 0xdfff) );
     81 
     82         *((*output_pp)++) = (uint8_t)(0xF0 | Unicode >> 18);
     83         *((*output_pp)++) = (uint8_t)(0x80 | (Unicode & (uint32_t)0x3FFFF) >> 12);
     84         *((*output_pp)++) = (uint8_t)(0x80 | (Unicode & (uint32_t)0xFFF)   >> 6);
     85         *((*output_pp)++) = (uint8_t)(0x80 | (Unicode & (uint32_t)0x3F));
     86     }
     87     /* NOTE: Do not check here for forbitten UTF-8 characters.
     88      * They cannot appear here because we do proper conversion. */
     89     ++(*input_pp);
     90 }
     91 
     92 QUEX_INLINE void
     93 QUEX_CONVERTER_CHAR_DEF(utf32, utf16)(const uint32_t**  input_pp, 
     94                                       uint16_t**        output_pp)
     95 {
     96     uint32_t   tmp = 0;
     97 
     98     if( **input_pp < 0x10000 ) {
     99         *((*output_pp)++) = (uint16_t)**input_pp;
    100     } else { 
    101         tmp             = (uint32_t)(**input_pp - (uint32_t)0x10000);
    102 
    103         *(((*output_pp)++)) = (uint16_t)((tmp >> 10)             | (uint16_t)0xD800);
    104         *(((*output_pp)++)) = (uint16_t)((tmp & (uint32_t)0x3FF) | (uint16_t)0xDC00);
    105     }
    106     ++(*input_pp);
    107 }
    108 
    109 QUEX_INLINE void
    110 QUEX_CONVERTER_CHAR_DEF(utf32, utf32)(const uint32_t**  input_pp, 
    111                                       uint32_t**        output_pp)
    112 {
    113     *((*output_pp)++) = (uint32_t)(*(*input_pp)++);
    114 }
    115 
    116 /* (1b) Derive converters to char and wchar_t from the given set 
    117  *      of converters. (Generator uses __QUEX_FROM and QUEX_FROM_TYPE)      */
    118 #include "character-converter-to-char-wchar_t.gi"
    119 
    120 /* (2) Generate string converters to utf8, utf16, utf32 based on the
    121  *     definitions of the character converters.                             */
    122 #include "implementations.gi"
    123 
    124 QUEX_NAMESPACE_MAIN_CLOSE
    125 
    126 #endif /* __QUEX_INCLUDE_GUARD__CONVERTER_HELPER__FROM_UTF32_I */