LexatomLoader_Converter.i (20470B)
1 /* -*- C++ -*- vim: set syntax=cpp: 2 * (C) 2007-2015 Frank-Rene Schaefer */ 3 #ifndef __QUEX_INCLUDE_GUARD__BUFFER__LEXATOMS__LEXATOM_LOADER_CONVERTER_I 4 #define __QUEX_INCLUDE_GUARD__BUFFER__LEXATOMS__LEXATOM_LOADER_CONVERTER_I 5 6 #include "MemoryManager" 7 #include "LexatomLoader" 8 #include "LexatomLoader_Converter" 9 #include "iconv-argument-types.h" 10 11 12 QUEX_NAMESPACE_MAIN_OPEN 13 14 QUEX_INLINE void 15 QUEX_NAME(LexatomLoader_Converter_construct)(QUEX_NAME(LexatomLoader_Converter)* me, 16 QUEX_NAME(ByteLoader)* byte_loader, 17 QUEX_NAME(Converter)* converter, 18 size_t RawMemorySize); 19 20 QUEX_INLINE void 21 QUEX_NAME(LexatomLoader_Converter_stomach_clear)(QUEX_NAME(LexatomLoader)* alter_ego); 22 23 QUEX_INLINE void 24 QUEX_NAME(LexatomLoader_Converter_destruct_self)(QUEX_NAME(LexatomLoader)* alter_ego); 25 26 QUEX_INLINE ptrdiff_t 27 QUEX_NAME(LexatomLoader_Converter_stomach_byte_n)(QUEX_NAME(LexatomLoader)* alter_ego); 28 29 QUEX_INLINE size_t 30 QUEX_NAME(LexatomLoader_Converter_load_lexatoms)(QUEX_NAME(LexatomLoader)* alter_ego, 31 QUEX_TYPE_LEXATOM* RegionBeginP, 32 const size_t N, 33 bool* end_of_stream_f, 34 bool* encoding_error_f); 35 QUEX_INLINE void 36 QUEX_NAME(LexatomLoader_Converter_fill_prepare)(QUEX_NAME(LexatomLoader)* alter_ego, 37 QUEX_NAME(Buffer)* buffer, 38 void** begin_p, 39 const void** end_p); 40 41 QUEX_INLINE void 42 QUEX_NAME(LexatomLoader_Converter_get_fill_boundaries)(QUEX_NAME(LexatomLoader)* alter_ego, 43 QUEX_NAME(Buffer)* buffer, 44 void** begin_p, 45 const void** end_p); 46 47 QUEX_INLINE ptrdiff_t 48 QUEX_NAME(LexatomLoader_Converter_fill_finish)(QUEX_NAME(LexatomLoader)* alter_ego, 49 QUEX_TYPE_LEXATOM* insertion_p, 50 const QUEX_TYPE_LEXATOM* BufferEnd, 51 const void* ContentEnd); 52 53 QUEX_INLINE E_LoadResult 54 QUEX_NAME(LexatomLoader_call_converter)(QUEX_NAME(LexatomLoader_Converter)* me, 55 QUEX_TYPE_LEXATOM** insertion_p, 56 QUEX_TYPE_LEXATOM* RegionBeginP, 57 const QUEX_TYPE_LEXATOM* RegionEndP); 58 QUEX_INLINE void 59 QUEX_NAME(LexatomLoader_remove_spurious_BOM)(QUEX_NAME(LexatomLoader_Converter)* me, 60 QUEX_TYPE_LEXATOM** buffer_insertion_p, 61 QUEX_TYPE_LEXATOM* RegionBeginP); 62 63 QUEX_INLINE void 64 QUEX_NAME(RawBuffer_init)(QUEX_NAME(RawBuffer)* me, 65 uint8_t* Begin, size_t SizeInBytes); 66 QUEX_INLINE void 67 QUEX_NAME(RawBuffer_move_away_passed_content)(QUEX_NAME(RawBuffer)* me); 68 69 QUEX_INLINE bool 70 QUEX_NAME(RawBuffer_load)(QUEX_NAME(RawBuffer)* me, 71 QUEX_NAME(ByteLoader)* byte_loader, 72 bool* end_of_stream_f); 73 74 QUEX_INLINE QUEX_NAME(LexatomLoader)* 75 QUEX_NAME(LexatomLoader_Converter_new)(QUEX_NAME(ByteLoader)* byte_loader, 76 QUEX_NAME(Converter)* converter, 77 size_t RawMemorySize) 78 { 79 QUEX_NAME(LexatomLoader_Converter)* me; 80 __quex_assert(RawMemorySize >= 6); /* UTF-8 char can be 6 bytes long */ 81 82 if( ! converter ) { 83 return (QUEX_NAME(LexatomLoader)*)0; 84 } 85 # if 0 /* No longer a constraint: */ 86 else if( byte_loader && ! byte_loader->binary_mode_f ) { 87 /* Binary mode is ABSOLUTELY REQUIRED for converters, otherwise the 88 * positioning with respect to the raw buffer becomes unreliable. */ 89 __QUEX_STD_printf("! LexatomLoader_Converter_new: QUEX_NAME(ByteLoader )is not in binary mode. !\n"); 90 __QUEX_STD_printf("! Has file been opened in binary mode? !\n"); 91 return (QUEX_NAME(LexatomLoader)*)0; 92 } 93 # endif 94 95 /* The 'LexatomLoader_Converter' is the same host for all converters. 96 * Converters are pointed to by 'converter', */ 97 me = (QUEX_NAME(LexatomLoader_Converter)*) \ 98 QUEXED(MemoryManager_allocate)(sizeof(QUEX_NAME(LexatomLoader_Converter)), 99 E_MemoryObjectType_BUFFER_FILLER); 100 if( ! me) return (QUEX_NAME(LexatomLoader)*)0; 101 102 QUEX_NAME(LexatomLoader_Converter_construct)(me, byte_loader, converter, RawMemorySize); 103 104 return &me->base; 105 106 } 107 108 QUEX_INLINE void 109 QUEX_NAME(LexatomLoader_Converter_construct)(QUEX_NAME(LexatomLoader_Converter)* me, 110 QUEX_NAME(ByteLoader)* byte_loader, 111 QUEX_NAME(Converter)* converter, 112 size_t RawMemorySize) 113 { 114 /* A linear relationship between stream position and lexatom index 115 * requires that: (1) The input stream is in 'binary mode'. That is, the 116 * stream position is proportional to the number of bytes that lie 117 * behind. (2) The input codec is of fixed size, i.e. 118 * converter->byte_n_per_lexatom != -1. */ 119 ptrdiff_t byte_n_per_lexatom = byte_loader && byte_loader->binary_mode_f ? 120 converter->byte_n_per_lexatom : -1; 121 uint8_t* raw_memory; 122 123 /* NO LONGER VALID: "Binary mode is absolutely required for converters, 124 * otherwise the positioning with respect to the raw 125 * buffer becomes unreliable." */ 126 /* __quex_assert( ! byte_loader || byte_loader->binary_mode_f ); */ 127 128 QUEX_NAME(LexatomLoader_setup)(&me->base, 129 QUEX_NAME(LexatomLoader_Converter_load_lexatoms), 130 QUEX_NAME(LexatomLoader_Converter_stomach_byte_n), 131 QUEX_NAME(LexatomLoader_Converter_stomach_clear), 132 QUEX_NAME(LexatomLoader_Converter_destruct_self), 133 QUEX_NAME(LexatomLoader_Converter_fill_prepare), 134 QUEX_NAME(LexatomLoader_Converter_fill_finish), 135 QUEX_NAME(LexatomLoader_Converter_get_fill_boundaries), 136 byte_loader, 137 byte_n_per_lexatom); 138 139 /* Initialize the conversion operations */ 140 me->converter = converter; 141 me->converter->virginity_f = true; 142 143 /* Initialize the raw buffer that holds the plain bytes of the input file 144 * (setup to trigger initial reload) */ 145 raw_memory = QUEXED(MemoryManager_allocate)(RawMemorySize, 146 E_MemoryObjectType_BUFFER_RAW); 147 QUEX_NAME(RawBuffer_init)(&me->raw_buffer, raw_memory, RawMemorySize); 148 149 QUEX_ASSERT_RAW_BUFFER(&me->raw_buffer); 150 } 151 152 QUEX_INLINE ptrdiff_t 153 QUEX_NAME(LexatomLoader_Converter_stomach_byte_n)(QUEX_NAME(LexatomLoader)* alter_ego) 154 /* RETURNS: Number of bytes that were read from the input stream, but remained 155 * inside the 'stomach' without being filled into the drain. 156 * '-1' reports that the LexatomLoader cannot tell how many bytes are 157 * in the stomach. */ 158 { 159 QUEX_NAME(LexatomLoader_Converter)* me = (QUEX_NAME(LexatomLoader_Converter)*)alter_ego; 160 ptrdiff_t byte_n; 161 ptrdiff_t converter_byte_n; 162 163 byte_n = me->raw_buffer.fill_end_p - me->raw_buffer.next_to_convert_p; 164 165 /* me->converter->stomach_byte_n == 0 => converter does NOT keep any 166 * bytes in stomach. 167 * me->converter->stomach_byte_n() == -1 => converter CANNOT tell how 168 * many bytes in stomach. */ 169 if( me->converter->stomach_byte_n ) { 170 converter_byte_n = me->converter->stomach_byte_n(me->converter); 171 if( converter_byte_n == - 1) return (ptrdiff_t)-1; 172 byte_n += converter_byte_n; 173 } 174 175 return byte_n; 176 } 177 178 QUEX_INLINE void 179 QUEX_NAME(LexatomLoader_Converter_stomach_clear)(QUEX_NAME(LexatomLoader)* alter_ego) 180 { 181 QUEX_NAME(LexatomLoader_Converter)* me = (QUEX_NAME(LexatomLoader_Converter)*)alter_ego; 182 QUEX_NAME(RawBuffer_init)(&me->raw_buffer, 0, 0); 183 if( me->converter->stomach_clear ) me->converter->stomach_clear(me->converter); 184 } 185 186 QUEX_INLINE void 187 QUEX_NAME(LexatomLoader_Converter_destruct_self)(QUEX_NAME(LexatomLoader)* alter_ego) 188 /* destruct_self: Free resources occupied by 'me' BUT NOT 'myself'. 189 * delete_self: Free resources occupied by 'me' AND 'myself'. */ 190 { 191 QUEX_NAME(LexatomLoader_Converter)* me = (QUEX_NAME(LexatomLoader_Converter)*)alter_ego; 192 193 if( ! me ) return; 194 195 QUEX_ASSERT_RAW_BUFFER(&me->raw_buffer); 196 197 if( me->converter && me->converter->ownership == E_Ownership_LEXICAL_ANALYZER ) { 198 me->converter->delete_self(me->converter); 199 } 200 201 QUEXED(MemoryManager_free)((void*)me->raw_buffer.begin, 202 E_MemoryObjectType_BUFFER_RAW); 203 } 204 205 QUEX_INLINE size_t 206 QUEX_NAME(LexatomLoader_Converter_load_lexatoms)(QUEX_NAME(LexatomLoader)* alter_ego, 207 QUEX_TYPE_LEXATOM* RegionBeginP, 208 const size_t N, 209 bool* end_of_stream_f, 210 bool* encoding_error_f) 211 /* Loads content into the raw buffer, convert it and write it to the engine's 212 * buffer. The region where to write into the engine's buffer expands from 213 * 'RegionBeginP' to 'N' lexatoms after it. 214 * 215 * RETURNS: Number of loaded lexatoms into the given region. */ 216 { 217 QUEX_NAME(LexatomLoader_Converter)* me = (QUEX_NAME(LexatomLoader_Converter)*)alter_ego; 218 QUEX_NAME(RawBuffer)* raw = &me->raw_buffer; 219 QUEX_TYPE_LEXATOM* buffer_insertion_p = RegionBeginP; 220 const QUEX_TYPE_LEXATOM* BufferRegionEnd = &RegionBeginP[N]; 221 E_LoadResult load_result; 222 bool raw_load_complete_f; 223 bool raw_end_of_stream_f; 224 # if 0 225 int i; 226 QUEX_TYPE_LEXATOM* buffer_insertion_begin_p; 227 # endif 228 (void)encoding_error_f; 229 (void)raw; 230 231 __quex_assert(me->converter); 232 __quex_assert(alter_ego); 233 __quex_assert(RegionBeginP); 234 QUEX_ASSERT_RAW_BUFFER(raw); 235 236 /* NOT: QUEX_IF_ASSERTS_poison(RegionBeginP, &RegionBeginP[N]); 237 * The buffer must remain intact, in case that not all is loaded. */ 238 239 /* Some converters keep some content internally. So, it is a more general 240 * solution to convert first and reload new bytes upon need. */ 241 load_result = QUEX_NAME(LexatomLoader_call_converter)(me, &buffer_insertion_p, 242 RegionBeginP, 243 BufferRegionEnd); 244 245 /* Convert, as long as the following two hold: 246 * (i) Drain is not totally filled. 247 * (ii) Loading of 'to-be-converted bytes' received enough to fill the 248 * raw buffer to its limits. The contrary indicates that there is 249 * an transmission interuption, or even and end-of-stream. In both 250 * cases, the analyser may continue, before the next try. */ 251 raw_end_of_stream_f = false; 252 raw_load_complete_f = true; 253 while( load_result == E_LoadResult_INCOMPLETE && raw_load_complete_f ) { 254 __quex_assert(buffer_insertion_p < BufferRegionEnd); /* '==' break */ 255 256 if( ! raw_end_of_stream_f ) { 257 raw_load_complete_f = QUEX_NAME(RawBuffer_load)(&me->raw_buffer, 258 me->base.byte_loader, 259 &raw_end_of_stream_f); 260 } else { 261 raw_load_complete_f = false; 262 } 263 264 /* next_to_convert_p == raw->fill_end_p => nothing happens. */ 265 load_result = QUEX_NAME(LexatomLoader_call_converter)(me, &buffer_insertion_p, 266 RegionBeginP, 267 BufferRegionEnd); 268 } 269 270 __quex_assert(BufferRegionEnd >= buffer_insertion_p); 271 /* NOT: QUEX_IF_ASSERTS_poison(buffer_insertion_p, BufferRegionEnd); 272 * Buffer MUST be left as is, in case of ERROR! */ 273 274 switch( load_result ) { 275 case E_LoadResult_COMPLETE: 276 break; 277 case E_LoadResult_INCOMPLETE: 278 /* Some ByteLoader-s (socket based ones, for example) may not be 279 * able to fill the whole raw buffer, but still the end of stream 280 * is not reached. Only, if the raw buffer detected end of stream 281 * the end of stream can be claimed. */ 282 if( raw_end_of_stream_f ) { 283 *end_of_stream_f = true; 284 } 285 /* __quex_assert(raw->next_to_convert_p == raw->fill_end_p); */ 286 /* Nothing can be loaded; Everything is converted. */ 287 288 break; 289 case E_LoadResult_BAD_LEXATOM: 290 *encoding_error_f = true; 291 break; 292 293 case E_LoadResult_NO_MORE_DATA: 294 /* A converter does not load--when called, there should be data. 295 * => Cannot complain 'NO_MORE_DATA' (end of stream). */ 296 default: 297 __quex_assert(false); 298 } 299 300 /* 'buffer_insertion_p' was updated by 'convert' and points behind the 301 * last byte that was converted. */ 302 return (size_t)(buffer_insertion_p - RegionBeginP); 303 } 304 305 QUEX_INLINE void 306 QUEX_NAME(LexatomLoader_Converter_fill_prepare)(QUEX_NAME(LexatomLoader)* alter_ego, 307 QUEX_NAME(Buffer)* buffer, 308 void** begin_p, 309 const void** end_p) 310 { 311 (void)buffer; 312 QUEX_NAME(LexatomLoader_Converter)* me = (QUEX_NAME(LexatomLoader_Converter)*)alter_ego; 313 314 QUEX_NAME(RawBuffer_move_away_passed_content)(&me->raw_buffer); 315 316 alter_ego->derived.get_fill_boundaries(alter_ego, buffer, begin_p, end_p); 317 } 318 319 QUEX_INLINE void 320 QUEX_NAME(LexatomLoader_Converter_get_fill_boundaries)(QUEX_NAME(LexatomLoader)* alter_ego, 321 QUEX_NAME(Buffer)* buffer, 322 void** begin_p, 323 const void** end_p) 324 { 325 QUEX_NAME(LexatomLoader_Converter)* me = (QUEX_NAME(LexatomLoader_Converter)*)alter_ego; 326 (void)buffer; 327 328 *begin_p = (void*)me->raw_buffer.fill_end_p; 329 *end_p = (void*)me->raw_buffer.memory_end; 330 } 331 332 QUEX_INLINE ptrdiff_t 333 QUEX_NAME(LexatomLoader_Converter_fill_finish)(QUEX_NAME(LexatomLoader)* alter_ego, 334 QUEX_TYPE_LEXATOM* RegionBeginP, 335 const QUEX_TYPE_LEXATOM* RegionEndP, 336 const void* FilledEndP_raw) 337 /* Converts what has been filled into the 'raw_buffer' until 'FilledEndP 338 * and stores it into the buffer. */ 339 { 340 QUEX_NAME(LexatomLoader_Converter)* me = (QUEX_NAME(LexatomLoader_Converter)*)alter_ego; 341 QUEX_NAME(RawBuffer)* raw = &me->raw_buffer; 342 QUEX_TYPE_LEXATOM* insertion_p = RegionBeginP; 343 uint8_t* FilledEndP = (uint8_t*)FilledEndP_raw; 344 E_LoadResult load_result; 345 346 __quex_assert(FilledEndP >= raw->next_to_convert_p); 347 __quex_assert(FilledEndP <= raw->memory_end); 348 349 /* Assert triggers => FilledEndP points WRONGLY BEHIND terminating zero. 350 * (FilledEndP, may point to it, at max.) */ 351 __quex_assert( FilledEndP <= raw->next_to_convert_p 352 || FilledEndP[-1] != QUEX_SETTING_BUFFER_LIMIT_CODE); 353 354 raw->fill_end_p = FilledEndP; 355 QUEX_ASSERT_RAW_BUFFER(raw); 356 357 load_result = QUEX_NAME(LexatomLoader_call_converter)(me, &insertion_p, 358 RegionBeginP, 359 RegionEndP); 360 (void)load_result; 361 362 QUEX_ASSERT_RAW_BUFFER(raw); 363 return insertion_p - RegionBeginP; 364 } 365 366 QUEX_INLINE E_LoadResult 367 QUEX_NAME(LexatomLoader_call_converter)(QUEX_NAME(LexatomLoader_Converter)* me, 368 QUEX_TYPE_LEXATOM** insertion_p, 369 QUEX_TYPE_LEXATOM* RegionBeginP, 370 const QUEX_TYPE_LEXATOM* RegionEndP) 371 { 372 QUEX_NAME(RawBuffer)* raw = &me->raw_buffer; 373 E_LoadResult load_result; 374 (void)load_result; 375 376 load_result = me->converter->convert(me->converter, 377 &raw->next_to_convert_p, raw->fill_end_p, 378 insertion_p, RegionEndP); 379 380 QUEX_NAME(LexatomLoader_remove_spurious_BOM)(me, insertion_p, RegionBeginP); 381 me->converter->virginity_f = false; 382 383 /* A converter does not load => It cannot report 'end of stream' */ 384 __quex_assert( load_result == E_LoadResult_COMPLETE 385 || load_result == E_LoadResult_INCOMPLETE 386 || load_result == E_LoadResult_BAD_LEXATOM); 387 return load_result; 388 } 389 390 QUEX_INLINE void 391 QUEX_NAME(LexatomLoader_remove_spurious_BOM)(QUEX_NAME(LexatomLoader_Converter)* me, 392 QUEX_TYPE_LEXATOM** buffer_insertion_p, 393 QUEX_TYPE_LEXATOM* RegionBeginP) 394 { 395 uint32_t first_lexatom; 396 397 if( *buffer_insertion_p == RegionBeginP ) return; 398 399 first_lexatom = (uint32_t)RegionBeginP[0]; /* avoid warning */ 400 if( first_lexatom != 0xFEFF ) return; 401 402 if( ! me->converter->virginity_f ) { 403 QUEX_ERROR_EXIT("Converter produced BOM upon not-first call to 'convert'\n" 404 "Better make sure that converter NEVER produces BOM.\n" 405 "(May be, by specifiying the endianness of 'FromCoding' or 'ToCoding')\n"); 406 } 407 408 __QUEX_STD_memmove(RegionBeginP, &RegionBeginP[1], 409 (size_t)(*buffer_insertion_p - &RegionBeginP[1]) * sizeof(QUEX_TYPE_LEXATOM)); 410 411 *buffer_insertion_p = &(*buffer_insertion_p)[-1]; 412 } 413 414 QUEX_NAMESPACE_MAIN_CLOSE 415 416 #include "LexatomLoader.i" 417 #include "LexatomLoader_Converter_RawBuffer.i" 418 #include "Converter.i" 419 420 #ifdef QUEX_OPTION_CONVERTER_ICONV 421 # include <quex/code_base/buffer/lexatoms/converter/iconv/Converter_IConv.i> 422 #endif 423 #ifdef QUEX_OPTION_CONVERTER_ICU 424 # include <quex/code_base/buffer/lexatoms/converter/icu/Converter_ICU.i> 425 #endif 426 427 428 #endif /* __QUEX_INCLUDE_GUARD__BUFFER__LEXATOMS__LEXATOM_LOADER_CONVERTER_I */