149 lines
6.3 KiB
C++
149 lines
6.3 KiB
C++
// contig_sequence_conv.hh
|
|
// This file is part of libpbe; see http://anyterm.org/
|
|
// (C) 2008 Philip Endecott
|
|
|
|
// Distributed under the Boost Software License, Version 1.0:
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person or organization
|
|
// obtaining a copy of the software and accompanying documentation covered by
|
|
// this license (the "Software") to use, reproduce, display, distribute,
|
|
// execute, and transmit the Software, and to prepare derivative works of the
|
|
// Software, and to permit third-parties to whom the Software is furnished to
|
|
// do so, all subject to the following:
|
|
//
|
|
// The copyright notices in the Software and this entire statement, including
|
|
// the above license grant, this restriction and the following disclaimer,
|
|
// must be included in all copies of the Software, in whole or in part, and
|
|
// all derivative works of the Software, unless such copies or derivative
|
|
// works are solely in the form of machine-executable object code generated by
|
|
// a source language processor.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
|
|
// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
|
|
// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
|
|
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
// DEALINGS IN THE SOFTWARE.
|
|
|
|
#ifndef libpbe_charset_contig_sequence_conv_hh
|
|
#define libpbe_charset_contig_sequence_conv_hh
|
|
|
|
#include "charset_t.hh"
|
|
#include "sequence_conv.hh"
|
|
#include "const_character_iterator.hh"
|
|
#include "charset_traits.hh"
|
|
|
|
#include <boost/utility/enable_if.hpp>
|
|
|
|
|
|
namespace pbe {
|
|
|
|
// Conversion of strings in contiguous memory
|
|
// ------------------------------------------
|
|
//
|
|
// This file provides a specialisation of sequence_conv for data in
|
|
// contiguous memory, for which an optimised algorithm is possible.
|
|
|
|
// Some background can be found at
|
|
// http://gcc.gnu.org/ml/libstdc++/2005-11/msg00275.html
|
|
|
|
// There are at least these cases where the elements are contiguous:
|
|
// - raw memory, i.e. the iterator is a pointer (guaranteed)
|
|
// - std::vector (guaranteed)
|
|
// - std::valarray (guaranteed, I think)
|
|
// - tr1::array / boost::array (guaranteed, I think)
|
|
// - std::basic_string (not formally guaranteed, but true in practice)
|
|
// It would be good to be able to automatically detect other cases,
|
|
// e.g. user-defined array-like classes, but I don't know how that can be
|
|
// done.
|
|
// This file includes the optimisations for raw memory (pointers), vectors
|
|
// and strings. Adding the others would be straightforward; the only
|
|
// issue is that it would bring in their headers.
|
|
|
|
|
|
template <int n>
|
|
static uint8_t byte_n(unsigned int i) {
|
|
return (i>>(8*n))&0xff;
|
|
}
|
|
|
|
|
|
// First we specialise for a character_iterator over a unit_t*:
|
|
|
|
template< charset_t from_cs, charset_t to_cs, typename error_policy, typename OutputIterator >
|
|
struct sequence_conv< from_cs, to_cs, error_policy,
|
|
const_character_iterator<from_cs, const typename charset_traits<from_cs>::unit_t*>,
|
|
OutputIterator,
|
|
typename boost::enable_if_c< charset_traits<from_cs>::is_ascii_superset
|
|
&& charset_traits<to_cs>::is_ascii_superset >::type
|
|
>
|
|
{
|
|
typedef const_character_iterator<from_cs, const typename charset_traits<from_cs>::unit_t*> InputIterator;
|
|
void
|
|
operator() ( InputIterator first, const InputIterator& last,
|
|
OutputIterator result, // ?? modify in place, return new version, or what?
|
|
typename charset_traits<from_cs>::state_t& from_state,
|
|
typename charset_traits<to_cs>::state_t& to_state)
|
|
{
|
|
std::cout << "specialisation!\n";
|
|
uintptr_t last_addr = reinterpret_cast<intptr_t>(last.base());
|
|
while (first != last) {
|
|
uintptr_t first_addr = reinterpret_cast<intptr_t>(first.base());
|
|
if (((first_addr & (sizeof(int)-1))==0)
|
|
&& ((last_addr-first_addr) >= sizeof(int))) {
|
|
const unsigned* uptr = static_cast<const unsigned*>(static_cast<const void*>(first.base()));
|
|
unsigned u = *uptr;
|
|
if (!(u&0x80808080)) { // FIXME 64 bits
|
|
*(result++) = byte_n<0>(u); // FIXME enable_if both cs ASCII supersets // FIXME result is ptr too
|
|
*(result++) = byte_n<1>(u);
|
|
*(result++) = byte_n<2>(u);
|
|
*(result++) = byte_n<3>(u);
|
|
first = InputIterator(first.base() + 4);
|
|
continue;
|
|
}
|
|
}
|
|
*(result++) = char_conv<from_cs,to_cs,error_policy>()(*first,from_state,to_state);
|
|
++first;
|
|
}
|
|
}
|
|
};
|
|
|
|
|
|
// Now specialise for std::basic_string and std::vector by calling the above
|
|
// pointer specialisation:
|
|
|
|
template <charset_t from_cs, charset_t to_cs, typename error_policy, typename OutputIterator>
|
|
struct sequence_conv< from_cs, to_cs, error_policy,
|
|
const_character_iterator<from_cs,
|
|
typename std::basic_string<typename charset_traits<from_cs>::unit_t>
|
|
::const_iterator>,
|
|
OutputIterator,
|
|
typename boost::enable_if_c< charset_traits<from_cs>::is_ascii_superset
|
|
&& charset_traits<to_cs>::is_ascii_superset >::type
|
|
>
|
|
{
|
|
typedef const_character_iterator<from_cs,
|
|
typename std::basic_string<typename charset_traits<from_cs>::unit_t>
|
|
::const_iterator> InputIterator;
|
|
typedef const_character_iterator<from_cs, const typename charset_traits<from_cs>::unit_t*> InputPointer;
|
|
void
|
|
operator() ( InputIterator first, const InputIterator& last,
|
|
OutputIterator result, // ?? modify in place, return new version, or what?
|
|
typename charset_traits<from_cs>::state_t& from_state,
|
|
typename charset_traits<to_cs>::state_t& to_state)
|
|
{
|
|
sequence_conv< from_cs,to_cs,error_policy,
|
|
InputPointer, OutputIterator > sc;
|
|
InputPointer first_p(&(*(first.base())));
|
|
InputPointer last_p(&(*(last.base())));
|
|
sc(first_p,last_p,result,from_state,to_state);
|
|
}
|
|
};
|
|
|
|
|
|
|
|
|
|
};
|
|
|
|
#endif
|