sheepy

build system (sheepy) and package manager (spm) for C
git clone https://spartatek.se/git/sheepy.git
Log | Files | Refs | README | LICENSE

bom (3963B)


      1 /* -*- C++ -*- vim:set syntax=cpp: 
      2  *
      3  * Byte Order Mark (BOM) Handling.
      4  *
      5  * The byte order mark (BOM) is a Unicode character used to signal 
      6  * the endianness (byte order) of a text file or stream. Its code 
      7  * point is U+FEFF. 
      8  * [Source: <http://en.wikipedia.org/wiki/Byte_order_mark>]
      9  *
     10  * This file implements a function to cut the BOM and tell about 
     11  * the encoding of the data stream.
     12  *
     13  * (C) 2010 Frank-Rene Schaefer    
     14 
     15  * ABSOLUTELY NO WARRANTY                                                      */
     16 #ifndef __QUEX_INCLUDE_GUARD__BOM
     17 #define __QUEX_INCLUDE_GUARD__BOM
     18 
     19 #include "definitions"
     20 
     21 typedef enum {
     22     QUEX_BOM_NONE            = 0x200,  /* D9 --> NONE/NOT SURE */
     23     QUEX_BOM_UTF_8           = 0x001,  /* D0 --> UTF 8         */
     24     QUEX_BOM_UTF_1           = 0x002,  /* D1 --> UTF 1         */
     25     QUEX_BOM_UTF_EBCDIC      = 0x004,  /* D2 --> UTF EBCDIC    */
     26     QUEX_BOM_BOCU_1          = 0x008,  /* D3 --> BOCU 1        */
     27     QUEX_BOM_GB_18030        = 0x010,  /* D4 --> GB_18030      */
     28     QUEX_BOM_UTF_7           = 0x220,  /* D5 --> UTF 7;        
     29                                         * D9 --> May be not.   */
     30     QUEX_BOM_UTF_16          = 0x040,  /* D6 --> UTF 16        */         
     31     QUEX_BOM_UTF_16_LE       = 0x041,                          
     32     QUEX_BOM_UTF_16_BE       = 0x042,                          
     33     QUEX_BOM_UTF_32          = 0x080,  /* D7 --> UTF 32        */
     34     QUEX_BOM_UTF_32_LE       = 0x081,                          
     35     QUEX_BOM_UTF_32_BE       = 0x082,                          
     36     QUEX_BOM_SCSU            = 0x100,  /* D8 --> SCSU          */
     37     QUEX_BOM_SCSU_TO_UCS     = 0x101,  
     38     QUEX_BOM_SCSU_W0_TO_FE80 = 0x102, 
     39     QUEX_BOM_SCSU_W1_TO_FE80 = 0x103, 
     40     QUEX_BOM_SCSU_W2_TO_FE80 = 0x104, 
     41     QUEX_BOM_SCSU_W3_TO_FE80 = 0x105, 
     42     QUEX_BOM_SCSU_W4_TO_FE80 = 0x106, 
     43     QUEX_BOM_SCSU_W5_TO_FE80 = 0x107, 
     44     QUEX_BOM_SCSU_W6_TO_FE80 = 0x108, 
     45     QUEX_BOM_SCSU_W7_TO_FE80 = 0x109 
     46 } QUEX_TYPE_BOM;
     47 
     48 /* Table of (known) BOMs _____________________________________________________
     49  *
     50  *         BOM_UTF_8        { 0xEF, 0xBB, 0xBF }
     51  *         UTF_16_BE        { 0xFE, 0xFF }
     52  *         UTF_16_LE        { 0xFF, 0xFE }
     53  *         UTF_32_BE        { 0x00, 0x00, 0xFE, 0xFF }
     54  *         UTF_32_LE        { 0xFF, 0xFE, 0x00, 0x00 }
     55  *         UTF_7_38         { 0x2B, 0x2F, 0x76, 0x38 }
     56  *         UTF_7_39         { 0x2B, 0x2F, 0x76, 0x39 }
     57  *         UTF_7_2B         { 0x2B, 0x2F, 0x76, 0x2B }
     58  *         UTF_7_2F         { 0x2B, 0x2F, 0x76, 0x2F }
     59  *         UTF_1            { 0xF7, 0x64, 0x4C }
     60  *         UTF_EBCDIC       { 0xDD, 0x73, 0x66, 0x73 }
     61  *         SCSU             { 0x0E, 0xFE, 0xFF }
     62  *         SCSU_TO_UCS      { 0x0F, 0xFE, 0xFF }
     63  *         SCSU_W0_TO_FE80  { 0x18, 0xA5, 0xFF }
     64  *         SCSU_W1_TO_FE80  { 0x19, 0xA5, 0xFF }
     65  *         SCSU_W2_TO_FE80  { 0x1A, 0xA5, 0xFF }
     66  *         SCSU_W3_TO_FE80  { 0x1B, 0xA5, 0xFF }
     67  *         SCSU_W4_TO_FE80  { 0x1C, 0xA5, 0xFF }
     68  *         SCSU_W5_TO_FE80  { 0x1D, 0xA5, 0xFF }
     69  *         SCSU_W6_TO_FE80  { 0x1E, 0xA5, 0xFF }
     70  *         SCSU_W7_TO_FE80  { 0x1F, 0xA5, 0xFF }
     71  *         BOCU_1_x         { 0xFB, 0xEE, 0x28, 0xFF }
     72  *         BOCU_1           { 0xFB, 0xEE, 0x28, }
     73  *         GB_18030         { 0x84, 0x31, 0x95, 0x33 }                         
     74  *_____________________________________________________________________________*/
     75 
     76 QUEX_NAMESPACE_QUEX_OPEN
     77 
     78 extern QUEX_TYPE_BOM
     79 QUEXED_DEF(bom_snap)(__QUEX_STD_FILE* InputHandle);
     80 
     81 #if ! defined(__QUEX_OPTION_PLAIN_C)
     82 template <class InputStream> QUEX_INLINE QUEX_TYPE_BOM
     83 QUEXED_DEF(bom_snap)(InputStream* p_input_stream);
     84 #endif
     85 
     86 extern QUEX_TYPE_BOM
     87 QUEXED_DEF(__bom_snap_core)(uint8_t buffer[4], size_t read_n, size_t* byte_n);
     88 
     89 extern QUEX_TYPE_BOM
     90 QUEXED_DEF(bom_identify)(const uint8_t* const Buffer, size_t* n);
     91 
     92 extern const char*
     93 QUEXED_DEF(bom_name)(QUEX_TYPE_BOM BOM);
     94 
     95 QUEX_NAMESPACE_QUEX_CLOSE
     96 
     97 #endif /* __QUEX_INCLUDE_GUARD__BOM */
     98