sheepy

build system (sheepy) and package manager (spm) for C
git clone https://spartatek.se/git/sheepy.git
Log | Files | Refs | README | LICENSE

LexatomLoader_Converter.i (20470B)


      1 /* -*- C++ -*-  vim: set syntax=cpp:
      2  * (C) 2007-2015 Frank-Rene Schaefer  */
      3 #ifndef  __QUEX_INCLUDE_GUARD__BUFFER__LEXATOMS__LEXATOM_LOADER_CONVERTER_I
      4 #define  __QUEX_INCLUDE_GUARD__BUFFER__LEXATOMS__LEXATOM_LOADER_CONVERTER_I
      5 
      6 #include "MemoryManager"
      7 #include "LexatomLoader"
      8 #include "LexatomLoader_Converter"
      9 #include "iconv-argument-types.h"
     10 
     11 
     12 QUEX_NAMESPACE_MAIN_OPEN
     13 
     14 QUEX_INLINE void
     15 QUEX_NAME(LexatomLoader_Converter_construct)(QUEX_NAME(LexatomLoader_Converter)* me, 
     16                                              QUEX_NAME(ByteLoader)*              byte_loader,
     17                                              QUEX_NAME(Converter)*               converter,
     18                                              size_t                              RawMemorySize);
     19 
     20 QUEX_INLINE void   
     21 QUEX_NAME(LexatomLoader_Converter_stomach_clear)(QUEX_NAME(LexatomLoader)* alter_ego);
     22 
     23 QUEX_INLINE void   
     24 QUEX_NAME(LexatomLoader_Converter_destruct_self)(QUEX_NAME(LexatomLoader)* alter_ego);
     25 
     26 QUEX_INLINE ptrdiff_t   
     27 QUEX_NAME(LexatomLoader_Converter_stomach_byte_n)(QUEX_NAME(LexatomLoader)* alter_ego);
     28 
     29 QUEX_INLINE size_t 
     30 QUEX_NAME(LexatomLoader_Converter_load_lexatoms)(QUEX_NAME(LexatomLoader)*  alter_ego,
     31                                                  QUEX_TYPE_LEXATOM*         RegionBeginP, 
     32                                                  const size_t               N,
     33                                                  bool*                      end_of_stream_f,
     34                                                  bool*                      encoding_error_f);
     35 QUEX_INLINE void 
     36 QUEX_NAME(LexatomLoader_Converter_fill_prepare)(QUEX_NAME(LexatomLoader)*  alter_ego,
     37                                                 QUEX_NAME(Buffer)*         buffer,
     38                                                 void**                     begin_p,
     39                                                 const void**               end_p);
     40 
     41 QUEX_INLINE void 
     42 QUEX_NAME(LexatomLoader_Converter_get_fill_boundaries)(QUEX_NAME(LexatomLoader)*  alter_ego,
     43                                                        QUEX_NAME(Buffer)*         buffer,
     44                                                        void**       begin_p, 
     45                                                        const void** end_p);
     46 
     47 QUEX_INLINE ptrdiff_t 
     48 QUEX_NAME(LexatomLoader_Converter_fill_finish)(QUEX_NAME(LexatomLoader)*   alter_ego,
     49                                               QUEX_TYPE_LEXATOM*       insertion_p,
     50                                               const QUEX_TYPE_LEXATOM* BufferEnd,
     51                                               const void*                ContentEnd);
     52 
     53 QUEX_INLINE E_LoadResult
     54 QUEX_NAME(LexatomLoader_call_converter)(QUEX_NAME(LexatomLoader_Converter)* me,
     55                                         QUEX_TYPE_LEXATOM**                 insertion_p,
     56                                         QUEX_TYPE_LEXATOM*                  RegionBeginP,
     57                                         const QUEX_TYPE_LEXATOM*            RegionEndP);
     58 QUEX_INLINE void
     59 QUEX_NAME(LexatomLoader_remove_spurious_BOM)(QUEX_NAME(LexatomLoader_Converter)* me,
     60                                             QUEX_TYPE_LEXATOM**                  buffer_insertion_p,
     61                                             QUEX_TYPE_LEXATOM*                   RegionBeginP);
     62 
     63 QUEX_INLINE void   
     64 QUEX_NAME(RawBuffer_init)(QUEX_NAME(RawBuffer)* me, 
     65                           uint8_t* Begin, size_t SizeInBytes);
     66 QUEX_INLINE void 
     67 QUEX_NAME(RawBuffer_move_away_passed_content)(QUEX_NAME(RawBuffer)*  me);
     68 
     69 QUEX_INLINE bool 
     70 QUEX_NAME(RawBuffer_load)(QUEX_NAME(RawBuffer)*  me,
     71                           QUEX_NAME(ByteLoader)*            byte_loader, 
     72                           bool*                  end_of_stream_f);
     73 
     74 QUEX_INLINE QUEX_NAME(LexatomLoader)*
     75 QUEX_NAME(LexatomLoader_Converter_new)(QUEX_NAME(ByteLoader)* byte_loader,
     76                                       QUEX_NAME(Converter)*  converter,
     77                                       size_t                 RawMemorySize)
     78 { 
     79     QUEX_NAME(LexatomLoader_Converter)*  me;
     80     __quex_assert(RawMemorySize >= 6);  /* UTF-8 char can be 6 bytes long    */
     81 
     82     if( ! converter ) {
     83         return (QUEX_NAME(LexatomLoader)*)0;
     84     }
     85 #   if 0 /* No longer a constraint:                                          */
     86     else if( byte_loader && ! byte_loader->binary_mode_f ) {
     87         /* Binary mode is ABSOLUTELY REQUIRED for converters, otherwise the 
     88          * positioning with respect to the raw buffer becomes unreliable.    */
     89         __QUEX_STD_printf("! LexatomLoader_Converter_new: QUEX_NAME(ByteLoader )is not in binary mode. !\n");
     90         __QUEX_STD_printf("! Has file been opened in binary mode?                          !\n");
     91         return (QUEX_NAME(LexatomLoader)*)0;
     92     }
     93 #   endif
     94 
     95     /* The 'LexatomLoader_Converter' is the same host for all converters.
     96      * Converters are pointed to by 'converter',                             */
     97     me = (QUEX_NAME(LexatomLoader_Converter)*) \
     98           QUEXED(MemoryManager_allocate)(sizeof(QUEX_NAME(LexatomLoader_Converter)),
     99                                          E_MemoryObjectType_BUFFER_FILLER);
    100     if( ! me) return (QUEX_NAME(LexatomLoader)*)0;
    101 
    102     QUEX_NAME(LexatomLoader_Converter_construct)(me, byte_loader, converter, RawMemorySize);
    103 
    104     return &me->base;
    105 
    106 }
    107 
    108 QUEX_INLINE void
    109 QUEX_NAME(LexatomLoader_Converter_construct)(QUEX_NAME(LexatomLoader_Converter)* me, 
    110                                             QUEX_NAME(ByteLoader)*             byte_loader,
    111                                             QUEX_NAME(Converter)*              converter,
    112                                             size_t                             RawMemorySize)
    113 {
    114     /* A linear relationship between stream position and lexatom index 
    115      * requires that: (1) The input stream is in 'binary mode'. That is, the 
    116      * stream position is proportional to the number of bytes that lie 
    117      * behind. (2) The input codec is of fixed size, i.e. 
    118      * converter->byte_n_per_lexatom != -1.                                */ 
    119     ptrdiff_t   byte_n_per_lexatom = byte_loader && byte_loader->binary_mode_f ? 
    120                                        converter->byte_n_per_lexatom : -1;
    121     uint8_t*    raw_memory;
    122 
    123     /* NO LONGER VALID: "Binary mode is absolutely required for converters,
    124      *                   otherwise the positioning with respect to the raw 
    125      *                   buffer becomes unreliable."                         */
    126     /* __quex_assert( ! byte_loader || byte_loader->binary_mode_f );         */
    127 
    128     QUEX_NAME(LexatomLoader_setup)(&me->base,
    129                                   QUEX_NAME(LexatomLoader_Converter_load_lexatoms),
    130                                   QUEX_NAME(LexatomLoader_Converter_stomach_byte_n),
    131                                   QUEX_NAME(LexatomLoader_Converter_stomach_clear),
    132                                   QUEX_NAME(LexatomLoader_Converter_destruct_self),
    133                                   QUEX_NAME(LexatomLoader_Converter_fill_prepare),
    134                                   QUEX_NAME(LexatomLoader_Converter_fill_finish),
    135                                   QUEX_NAME(LexatomLoader_Converter_get_fill_boundaries),
    136                                   byte_loader,
    137                                   byte_n_per_lexatom);
    138 
    139     /* Initialize the conversion operations                                  */
    140     me->converter = converter;
    141     me->converter->virginity_f = true;
    142 
    143     /* Initialize the raw buffer that holds the plain bytes of the input file
    144      * (setup to trigger initial reload)                                     */
    145     raw_memory = QUEXED(MemoryManager_allocate)(RawMemorySize, 
    146                                                 E_MemoryObjectType_BUFFER_RAW);
    147     QUEX_NAME(RawBuffer_init)(&me->raw_buffer, raw_memory, RawMemorySize);
    148 
    149     QUEX_ASSERT_RAW_BUFFER(&me->raw_buffer);
    150 }
    151 
    152 QUEX_INLINE ptrdiff_t   
    153 QUEX_NAME(LexatomLoader_Converter_stomach_byte_n)(QUEX_NAME(LexatomLoader)*  alter_ego)
    154 /* RETURNS: Number of bytes that were read from the input stream, but remained
    155  *                 inside the 'stomach' without being filled into the drain.
    156  *          '-1'   reports that the LexatomLoader cannot tell how many bytes are
    157  *                 in the stomach.                                           */
    158 {
    159     QUEX_NAME(LexatomLoader_Converter)* me = (QUEX_NAME(LexatomLoader_Converter)*)alter_ego;
    160     ptrdiff_t  byte_n;
    161     ptrdiff_t  converter_byte_n;
    162 
    163     byte_n = me->raw_buffer.fill_end_p - me->raw_buffer.next_to_convert_p;
    164 
    165     /* me->converter->stomach_byte_n   == 0   => converter does NOT keep any 
    166      *                                           bytes in stomach.
    167      * me->converter->stomach_byte_n() == -1  => converter CANNOT tell how 
    168      *                                           many bytes in stomach.      */
    169     if( me->converter->stomach_byte_n ) {
    170         converter_byte_n = me->converter->stomach_byte_n(me->converter);
    171         if( converter_byte_n == - 1) return (ptrdiff_t)-1;
    172         byte_n += converter_byte_n;
    173     }
    174 
    175     return byte_n;
    176 }
    177 
    178 QUEX_INLINE void   
    179 QUEX_NAME(LexatomLoader_Converter_stomach_clear)(QUEX_NAME(LexatomLoader)* alter_ego)
    180 {
    181     QUEX_NAME(LexatomLoader_Converter)* me = (QUEX_NAME(LexatomLoader_Converter)*)alter_ego;
    182     QUEX_NAME(RawBuffer_init)(&me->raw_buffer, 0, 0);
    183     if( me->converter->stomach_clear ) me->converter->stomach_clear(me->converter);
    184 }
    185 
    186 QUEX_INLINE void   
    187 QUEX_NAME(LexatomLoader_Converter_destruct_self)(QUEX_NAME(LexatomLoader)* alter_ego)
    188 /* destruct_self: Free resources occupied by 'me' BUT NOT 'myself'.
    189  * delete_self:   Free resources occupied by 'me' AND 'myself'.              */
    190 { 
    191     QUEX_NAME(LexatomLoader_Converter)* me = (QUEX_NAME(LexatomLoader_Converter)*)alter_ego;
    192 
    193     if( ! me ) return;
    194 
    195     QUEX_ASSERT_RAW_BUFFER(&me->raw_buffer);
    196 
    197     if( me->converter && me->converter->ownership == E_Ownership_LEXICAL_ANALYZER ) {
    198         me->converter->delete_self(me->converter); 
    199     }
    200 
    201     QUEXED(MemoryManager_free)((void*)me->raw_buffer.begin,
    202                                E_MemoryObjectType_BUFFER_RAW); 
    203 }
    204 
    205 QUEX_INLINE size_t 
    206 QUEX_NAME(LexatomLoader_Converter_load_lexatoms)(QUEX_NAME(LexatomLoader)* alter_ego,
    207                                                  QUEX_TYPE_LEXATOM*        RegionBeginP, 
    208                                                  const size_t              N,
    209                                                  bool*                     end_of_stream_f,
    210                                                  bool*                     encoding_error_f)
    211 /* Loads content into the raw buffer, convert it and write it to the engine's
    212  * buffer. The region where to write into the engine's buffer expands from
    213  * 'RegionBeginP' to 'N' lexatoms after it.                                
    214  *
    215  * RETURNS: Number of loaded lexatoms into the given region.               */
    216 {
    217     QUEX_NAME(LexatomLoader_Converter)* me = (QUEX_NAME(LexatomLoader_Converter)*)alter_ego;
    218     QUEX_NAME(RawBuffer)*               raw = &me->raw_buffer;
    219     QUEX_TYPE_LEXATOM*                  buffer_insertion_p = RegionBeginP;
    220     const QUEX_TYPE_LEXATOM*            BufferRegionEnd    = &RegionBeginP[N];
    221     E_LoadResult                        load_result;
    222     bool                                raw_load_complete_f;
    223     bool                                raw_end_of_stream_f;
    224 #   if 0
    225     int                              i;
    226     QUEX_TYPE_LEXATOM*               buffer_insertion_begin_p;
    227 #   endif
    228     (void)encoding_error_f;
    229     (void)raw;
    230 
    231     __quex_assert(me->converter);
    232     __quex_assert(alter_ego); 
    233     __quex_assert(RegionBeginP); 
    234     QUEX_ASSERT_RAW_BUFFER(raw);
    235 
    236     /* NOT: QUEX_IF_ASSERTS_poison(RegionBeginP, &RegionBeginP[N]);
    237      * The buffer must remain intact, in case that not all is loaded.        */
    238 
    239     /* Some converters keep some content internally. So, it is a more general
    240      * solution to convert first and reload new bytes upon need.             */
    241     load_result = QUEX_NAME(LexatomLoader_call_converter)(me, &buffer_insertion_p, 
    242                                                           RegionBeginP,
    243                                                           BufferRegionEnd);
    244 
    245     /* Convert, as long as the following two hold:
    246      *  (i)  Drain is not totally filled.
    247      *  (ii) Loading of 'to-be-converted bytes' received enough to fill the
    248      *       raw buffer to its limits. The contrary indicates that there is 
    249      *       an transmission interuption, or even and end-of-stream. In both
    250      *       cases, the analyser may continue, before the next try.          */
    251     raw_end_of_stream_f = false;
    252     raw_load_complete_f = true;
    253     while( load_result == E_LoadResult_INCOMPLETE && raw_load_complete_f ) {
    254         __quex_assert(buffer_insertion_p < BufferRegionEnd);  /* '==' break  */
    255 
    256         if( ! raw_end_of_stream_f ) {
    257             raw_load_complete_f = QUEX_NAME(RawBuffer_load)(&me->raw_buffer, 
    258                                                             me->base.byte_loader,
    259                                                             &raw_end_of_stream_f);
    260         } else  {
    261             raw_load_complete_f = false;
    262         }
    263 
    264         /* next_to_convert_p == raw->fill_end_p => nothing happens.          */
    265         load_result = QUEX_NAME(LexatomLoader_call_converter)(me, &buffer_insertion_p, 
    266                                                               RegionBeginP,
    267                                                               BufferRegionEnd);
    268     }
    269 
    270     __quex_assert(BufferRegionEnd >= buffer_insertion_p);
    271     /* NOT: QUEX_IF_ASSERTS_poison(buffer_insertion_p, BufferRegionEnd);
    272      *      Buffer MUST be left as is, in case of ERROR!                     */
    273 
    274     switch( load_result ) {
    275         case E_LoadResult_COMPLETE:
    276             break;
    277         case E_LoadResult_INCOMPLETE:
    278             /* Some ByteLoader-s (socket based ones, for example) may not be
    279              * able to fill the whole raw buffer, but still the end of stream
    280              * is not reached. Only, if the raw buffer detected end of stream
    281              * the end of stream can be claimed.                             */
    282             if( raw_end_of_stream_f ) {
    283                 *end_of_stream_f = true;
    284             }
    285             /* __quex_assert(raw->next_to_convert_p == raw->fill_end_p);     */
    286             /* Nothing can be loaded; Everything is converted.               */
    287 
    288             break;
    289         case E_LoadResult_BAD_LEXATOM:
    290             *encoding_error_f = true;
    291             break;
    292 
    293         case E_LoadResult_NO_MORE_DATA:
    294             /* A converter does not load--when called, there should be data.
    295              * => Cannot complain 'NO_MORE_DATA' (end of stream).            */
    296         default:
    297             __quex_assert(false);
    298     }
    299 
    300     /* 'buffer_insertion_p' was updated by 'convert' and points behind the 
    301      * last byte that was converted.                                         */ 
    302     return (size_t)(buffer_insertion_p - RegionBeginP);
    303 }
    304 
    305 QUEX_INLINE void 
    306 QUEX_NAME(LexatomLoader_Converter_fill_prepare)(QUEX_NAME(LexatomLoader)*  alter_ego,
    307                                                 QUEX_NAME(Buffer)*         buffer,
    308                                                 void**                     begin_p,
    309                                                 const void**               end_p)
    310 {
    311     (void)buffer;
    312     QUEX_NAME(LexatomLoader_Converter)* me = (QUEX_NAME(LexatomLoader_Converter)*)alter_ego;
    313 
    314     QUEX_NAME(RawBuffer_move_away_passed_content)(&me->raw_buffer);
    315 
    316     alter_ego->derived.get_fill_boundaries(alter_ego, buffer, begin_p, end_p);
    317 }
    318 
    319 QUEX_INLINE void 
    320 QUEX_NAME(LexatomLoader_Converter_get_fill_boundaries)(QUEX_NAME(LexatomLoader)*  alter_ego,
    321                                                        QUEX_NAME(Buffer)*         buffer,
    322                                                        void**                     begin_p, 
    323                                                        const void**               end_p)
    324 {
    325     QUEX_NAME(LexatomLoader_Converter)* me = (QUEX_NAME(LexatomLoader_Converter)*)alter_ego;
    326     (void)buffer;
    327 
    328     *begin_p = (void*)me->raw_buffer.fill_end_p; 
    329     *end_p   = (void*)me->raw_buffer.memory_end;
    330 }
    331 
    332 QUEX_INLINE ptrdiff_t 
    333 QUEX_NAME(LexatomLoader_Converter_fill_finish)(QUEX_NAME(LexatomLoader)* alter_ego,
    334                                               QUEX_TYPE_LEXATOM*         RegionBeginP,
    335                                               const QUEX_TYPE_LEXATOM*   RegionEndP,
    336                                               const void*                FilledEndP_raw)
    337 /* Converts what has been filled into the 'raw_buffer' until 'FilledEndP
    338  * and stores it into the buffer.                                            */
    339 {
    340     QUEX_NAME(LexatomLoader_Converter)* me  = (QUEX_NAME(LexatomLoader_Converter)*)alter_ego;
    341     QUEX_NAME(RawBuffer)*               raw = &me->raw_buffer;
    342     QUEX_TYPE_LEXATOM*                  insertion_p = RegionBeginP;
    343     uint8_t*                            FilledEndP = (uint8_t*)FilledEndP_raw;
    344     E_LoadResult                        load_result;
    345 
    346     __quex_assert(FilledEndP >= raw->next_to_convert_p);
    347     __quex_assert(FilledEndP <= raw->memory_end);
    348 
    349     /* Assert triggers => FilledEndP points WRONGLY BEHIND terminating zero. 
    350      * (FilledEndP, may point to it, at max.)                                */
    351     __quex_assert(   FilledEndP     <= raw->next_to_convert_p 
    352                   || FilledEndP[-1] != QUEX_SETTING_BUFFER_LIMIT_CODE);
    353 
    354     raw->fill_end_p = FilledEndP;   
    355     QUEX_ASSERT_RAW_BUFFER(raw);
    356 
    357     load_result = QUEX_NAME(LexatomLoader_call_converter)(me, &insertion_p, 
    358                                                           RegionBeginP,
    359                                                           RegionEndP);
    360     (void)load_result;
    361     
    362     QUEX_ASSERT_RAW_BUFFER(raw);
    363     return insertion_p - RegionBeginP;
    364 }
    365 
    366 QUEX_INLINE E_LoadResult
    367 QUEX_NAME(LexatomLoader_call_converter)(QUEX_NAME(LexatomLoader_Converter)* me,
    368                                         QUEX_TYPE_LEXATOM**                 insertion_p,
    369                                         QUEX_TYPE_LEXATOM*                  RegionBeginP,
    370                                         const QUEX_TYPE_LEXATOM*            RegionEndP)
    371 {
    372     QUEX_NAME(RawBuffer)*  raw = &me->raw_buffer;
    373     E_LoadResult           load_result;
    374     (void)load_result;
    375 
    376     load_result = me->converter->convert(me->converter, 
    377                                          &raw->next_to_convert_p, raw->fill_end_p,
    378                                          insertion_p, RegionEndP);
    379     
    380     QUEX_NAME(LexatomLoader_remove_spurious_BOM)(me, insertion_p, RegionBeginP);
    381     me->converter->virginity_f = false;
    382 
    383     /* A converter does not load => It cannot report 'end of stream'     */
    384     __quex_assert(   load_result == E_LoadResult_COMPLETE
    385                   || load_result == E_LoadResult_INCOMPLETE
    386                   || load_result == E_LoadResult_BAD_LEXATOM);
    387     return load_result;
    388 }
    389 
    390 QUEX_INLINE void
    391 QUEX_NAME(LexatomLoader_remove_spurious_BOM)(QUEX_NAME(LexatomLoader_Converter)* me,
    392                                              QUEX_TYPE_LEXATOM**                 buffer_insertion_p,
    393                                              QUEX_TYPE_LEXATOM*                  RegionBeginP)
    394 {
    395     uint32_t  first_lexatom;
    396 
    397     if( *buffer_insertion_p == RegionBeginP ) return;
    398 
    399     first_lexatom = (uint32_t)RegionBeginP[0];    /* avoid warning */
    400     if( first_lexatom != 0xFEFF ) return;
    401 
    402     if( ! me->converter->virginity_f ) {
    403         QUEX_ERROR_EXIT("Converter produced BOM upon not-first call to 'convert'\n"
    404                         "Better make sure that converter NEVER produces BOM.\n"
    405                         "(May be, by specifiying the endianness of 'FromCoding' or 'ToCoding')\n");
    406     }
    407 
    408     __QUEX_STD_memmove(RegionBeginP, &RegionBeginP[1], 
    409                        (size_t)(*buffer_insertion_p - &RegionBeginP[1]) * sizeof(QUEX_TYPE_LEXATOM)); 
    410 
    411     *buffer_insertion_p = &(*buffer_insertion_p)[-1];
    412 }
    413 
    414 QUEX_NAMESPACE_MAIN_CLOSE
    415 
    416 #include "LexatomLoader.i"
    417 #include "LexatomLoader_Converter_RawBuffer.i"
    418 #include "Converter.i"
    419 
    420 #ifdef QUEX_OPTION_CONVERTER_ICONV
    421 #   include <quex/code_base/buffer/lexatoms/converter/iconv/Converter_IConv.i>
    422 #endif
    423 #ifdef QUEX_OPTION_CONVERTER_ICU
    424 #   include <quex/code_base/buffer/lexatoms/converter/icu/Converter_ICU.i>
    425 #endif
    426 
    427 
    428 #endif /*  __QUEX_INCLUDE_GUARD__BUFFER__LEXATOMS__LEXATOM_LOADER_CONVERTER_I */