// contig_sequence_conv.hh // This file is part of libpbe; see http://anyterm.org/ // (C) 2008 Philip Endecott // Distributed under the Boost Software License, Version 1.0: // // Permission is hereby granted, free of charge, to any person or organization // obtaining a copy of the software and accompanying documentation covered by // this license (the "Software") to use, reproduce, display, distribute, // execute, and transmit the Software, and to prepare derivative works of the // Software, and to permit third-parties to whom the Software is furnished to // do so, all subject to the following: // // The copyright notices in the Software and this entire statement, including // the above license grant, this restriction and the following disclaimer, // must be included in all copies of the Software, in whole or in part, and // all derivative works of the Software, unless such copies or derivative // works are solely in the form of machine-executable object code generated by // a source language processor. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT // SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE // FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER // DEALINGS IN THE SOFTWARE. #ifndef libpbe_charset_contig_sequence_conv_hh #define libpbe_charset_contig_sequence_conv_hh #include "charset_t.hh" #include "sequence_conv.hh" #include "const_character_iterator.hh" #include "charset_traits.hh" #include namespace pbe { // Conversion of strings in contiguous memory // ------------------------------------------ // // This file provides a specialisation of sequence_conv for data in // contiguous memory, for which an optimised algorithm is possible. // Some background can be found at // http://gcc.gnu.org/ml/libstdc++/2005-11/msg00275.html // There are at least these cases where the elements are contiguous: // - raw memory, i.e. the iterator is a pointer (guaranteed) // - std::vector (guaranteed) // - std::valarray (guaranteed, I think) // - tr1::array / boost::array (guaranteed, I think) // - std::basic_string (not formally guaranteed, but true in practice) // It would be good to be able to automatically detect other cases, // e.g. user-defined array-like classes, but I don't know how that can be // done. // This file includes the optimisations for raw memory (pointers), vectors // and strings. Adding the others would be straightforward; the only // issue is that it would bring in their headers. template static uint8_t byte_n(unsigned int i) { return (i>>(8*n))&0xff; } // First we specialise for a character_iterator over a unit_t*: template< charset_t from_cs, charset_t to_cs, typename error_policy, typename OutputIterator > struct sequence_conv< from_cs, to_cs, error_policy, const_character_iterator::unit_t*>, OutputIterator, typename boost::enable_if_c< charset_traits::is_ascii_superset && charset_traits::is_ascii_superset >::type > { typedef const_character_iterator::unit_t*> InputIterator; void operator() ( InputIterator first, const InputIterator& last, OutputIterator result, // ?? modify in place, return new version, or what? typename charset_traits::state_t& from_state, typename charset_traits::state_t& to_state) { std::cout << "specialisation!\n"; uintptr_t last_addr = reinterpret_cast(last.base()); while (first != last) { uintptr_t first_addr = reinterpret_cast(first.base()); if (((first_addr & (sizeof(int)-1))==0) && ((last_addr-first_addr) >= sizeof(int))) { const unsigned* uptr = static_cast(static_cast(first.base())); unsigned u = *uptr; if (!(u&0x80808080)) { // FIXME 64 bits *(result++) = byte_n<0>(u); // FIXME enable_if both cs ASCII supersets // FIXME result is ptr too *(result++) = byte_n<1>(u); *(result++) = byte_n<2>(u); *(result++) = byte_n<3>(u); first = InputIterator(first.base() + 4); continue; } } *(result++) = char_conv()(*first,from_state,to_state); ++first; } } }; // Now specialise for std::basic_string and std::vector by calling the above // pointer specialisation: template struct sequence_conv< from_cs, to_cs, error_policy, const_character_iterator::unit_t> ::const_iterator>, OutputIterator, typename boost::enable_if_c< charset_traits::is_ascii_superset && charset_traits::is_ascii_superset >::type > { typedef const_character_iterator::unit_t> ::const_iterator> InputIterator; typedef const_character_iterator::unit_t*> InputPointer; void operator() ( InputIterator first, const InputIterator& last, OutputIterator result, // ?? modify in place, return new version, or what? typename charset_traits::state_t& from_state, typename charset_traits::state_t& to_state) { sequence_conv< from_cs,to_cs,error_policy, InputPointer, OutputIterator > sc; InputPointer first_p(&(*(first.base()))); InputPointer last_p(&(*(last.base()))); sc(first_p,last_p,result,from_state,to_state); } }; }; #endif