pandorafms/extras/anytermd/libpbe/include/charset/conv/iso8859.hh

// iso8859.hh
// This file is part of libpbe; see http://anyterm.org/
// (C) 2008 Philip Endecott

// Distributed under the Boost Software License, Version 1.0:
//
// Permission is hereby granted, free of charge, to any person or organization
// obtaining a copy of the software and accompanying documentation covered by
// this license (the "Software") to use, reproduce, display, distribute,
// execute, and transmit the Software, and to prepare derivative works of the
// Software, and to permit third-parties to whom the Software is furnished to
// do so, all subject to the following:
// 
// The copyright notices in the Software and this entire statement, including
// the above license grant, this restriction and the following disclaimer,
// must be included in all copies of the Software, in whole or in part, and
// all derivative works of the Software, unless such copies or derivative
// works are solely in the form of machine-executable object code generated by
// a source language processor.
// 
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS IN THE SOFTWARE.

#ifndef libpbe_charset_conv_iso8859_hh
#define libpbe_charset_conv_iso8859_hh

#include "charset/charset_t.hh"
#include "charset/char_t.hh"
#include "charset/charset_traits.hh"
#include "charset/char_conv.hh"

#include <boost/cstdint.hpp>

#include <algorithm>


namespace pbe {

// Conversion of iso8859 characters
// --------------------------------
//
// This file implements character conversions to and from the iso8859 character
// sets.


// Conversions to Unicode characters
// ---------------------------------

// All iso8859 characters correspond to "basic multilingual plane" unicode
// characters, i.e. they fit in a 16-bit ucs2 character.

// iso8859-1 is a special case: the first 256 Unicode characters are
// the iso8859-1 characters.

IDENTITY_CHAR_CONV(cs::iso8859_1,cs::ucs2)
IDENTITY_CHAR_CONV(cs::iso8859_1,cs::ucs4)


// For the other character sets, characters 0 to 160 inclusivve map directly to
// Unicode.  Tables are used to map characters 161 to 255:

typedef char16_t iso8859_table_t[95];      // character n in [n-161].
extern iso8859_table_t iso8859_tables[15]; // iso8859-n in [n-2].

// These tables are automatically generated from data from unicode.org.

// Some character sets don't use some codes.  The sentinel value -1 is stored
// in the tables for these cases.  (0xffff is not a valid ucs2 character.)

// In the following, template parameter n is the character set number 2-16.

template <int n>
static inline int iso8859_to_ucs ( char8_t c )
{
  int i =  static_cast<int>(static_cast<uint8_t>(c));
  return (i<161) ? i : iso8859_tables[n-2][i-161];
}

#define CONV_ISO8859_TO_UCS(N)                                     \
template <typename error_policy>                                   \
struct char_conv<cs::iso8859_##N, cs::ucs2, error_policy> {        \
  char16_t operator() ( char8_t c,                                 \
                        charset_traits<cs::iso8859_##N>::state_t&, \
                        charset_traits<cs::ucs2>::state_t&  )      \
  {                                                                \
    return iso8859_to_ucs<N>(c);                                   \
  }                                                                \
};                                                                 \
template <typename error_policy>                                   \
struct char_conv<cs::iso8859_##N, cs::ucs4, error_policy> {        \
  char32_t operator() ( char8_t c,                                 \
                        charset_traits<cs::iso8859_##N>::state_t&, \
                        charset_traits<cs::ucs4>::state_t&  )      \
  {                                                                \
    return iso8859_to_ucs<N>(c);                                   \
  }                                                                \
};

CONV_ISO8859_TO_UCS(2)
CONV_ISO8859_TO_UCS(3)
CONV_ISO8859_TO_UCS(4)
CONV_ISO8859_TO_UCS(5)
CONV_ISO8859_TO_UCS(6)
CONV_ISO8859_TO_UCS(7)
CONV_ISO8859_TO_UCS(8)
CONV_ISO8859_TO_UCS(9)
CONV_ISO8859_TO_UCS(10)
//CONV_ISO8859_TO_UCS(11)  // This is missing from the IANA file
                           // -11 should be Thai.
                           // -12 is supposed to be missing; it's the abandoned Devanagari
CONV_ISO8859_TO_UCS(13)
CONV_ISO8859_TO_UCS(14)
CONV_ISO8859_TO_UCS(15)
CONV_ISO8859_TO_UCS(16)

#undef CONV_ISO8859_TO_UCS


// Conversion from Unicode characters
// ----------------------------------

// iso8859-1 is a special case again:

template <typename error_policy>
struct char_conv<cs::ucs2, cs::iso8859_1, error_policy> {
  char8_t operator() ( char16_t c,
                       charset_traits<cs::ucs2>::state_t&,
                       charset_traits<cs::iso8859_1>::state_t& )
  {
    if (c>0xff) {
      error_policy::no_equivalent();
    }
    return c;
  }
};

template <typename error_policy>
struct char_conv<cs::ucs4, cs::iso8859_1, error_policy> {
  char8_t operator() ( char32_t c,
                       charset_traits<cs::ucs4>::state_t&,
                       charset_traits<cs::iso8859_1>::state_t& )
  {
    if (c>0xff) {
      error_policy::no_equivalent();
    }
    return c;
  }
};


// For the other cases we use tables.
// A single-level table-driven conversion would require large, sparse
// tables; instead we break the unicode space into pages and have one table
// for each combination of ucs page and iso8859 character set.
// These tables are generated dynamically only as needed by invoking
// the reverse functions above.


template <int n, int page>
static inline const char8_t* mk_ucs_to_iso8859_page_table() {
  char8_t* table = new char8_t[256];  // never deleted
  std::fill(table,table+256,0);  // 0 = no equivalent
  for (int c=161; c<256; ++c) {
    int unichar = iso8859_to_ucs<n>(c);
    if ((unichar>>8) == page) {
      table[unichar&0xff] = c;
    }
  }
  return table;
}

template <int n, int page, typename error_policy>
static inline char8_t char_conv_ucs_to_iso8859_lookup ( uint8_t point )
{
  /*FIXME THREAD SAFE*/ static const char8_t* table_p = mk_ucs_to_iso8859_page_table<n,page>();
  char8_t c = table_p[point];
  if (c==0) {
    return error_policy::no_equivalent();
  }
  return c;
}

template <int n, typename error_policy>
static inline char8_t ucs_to_iso8859 ( int c )
{
  if (c<=160) {
    return c;
  } else {
    int page = c>>8;
    uint8_t point = c&0xff;
    switch (page) {
      // These are the only pages that have any characters in any iso8859 character sets.
      // FIXME we could use specialisation to consider only those pages that apply to
      // a partiuclar character set.
      case 0x00: return char_conv_ucs_to_iso8859_lookup<n,0x00,error_policy>(point);
      case 0x01: return char_conv_ucs_to_iso8859_lookup<n,0x01,error_policy>(point);
      case 0x02: return char_conv_ucs_to_iso8859_lookup<n,0x02,error_policy>(point);
      case 0x03: return char_conv_ucs_to_iso8859_lookup<n,0x03,error_policy>(point);
      case 0x04: return char_conv_ucs_to_iso8859_lookup<n,0x04,error_policy>(point);
      case 0x05: return char_conv_ucs_to_iso8859_lookup<n,0x05,error_policy>(point);
      case 0x06: return char_conv_ucs_to_iso8859_lookup<n,0x06,error_policy>(point);
      case 0x0e: return char_conv_ucs_to_iso8859_lookup<n,0x0e,error_policy>(point);
      case 0x1e: return char_conv_ucs_to_iso8859_lookup<n,0x1e,error_policy>(point);
      case 0x20: return char_conv_ucs_to_iso8859_lookup<n,0x20,error_policy>(point);
      case 0x21: return char_conv_ucs_to_iso8859_lookup<n,0x21,error_policy>(point);
      default:   return error_policy::no_equivalent();
    }
  }
}


#define CONV_UCS_TO_ISO8859(N)                                        \
template <typename error_policy>                                      \
struct char_conv<cs::ucs2, cs::iso8859_##N, error_policy> {           \
  char8_t operator() ( char16_t c,                                    \
                       charset_traits<cs::ucs2>::state_t&,            \
                       charset_traits<cs::iso8859_##N>::state_t& )    \
  {                                                                   \
    return ucs_to_iso8859<N,error_policy>(c);                         \
  }                                                                   \
};                                                                    \
template <typename error_policy>                                      \
struct char_conv<cs::ucs4, cs::iso8859_##N, error_policy> {           \
  char8_t operator() ( char32_t c,                                    \
                       charset_traits<cs::ucs4>::state_t&,            \
                       charset_traits<cs::iso8859_##N>::state_t& )    \
  {                                                                   \
    return ucs_to_iso8859<N,error_policy>(c);                         \
  }                                                                   \
};

CONV_UCS_TO_ISO8859(2)
CONV_UCS_TO_ISO8859(3)
CONV_UCS_TO_ISO8859(4)
CONV_UCS_TO_ISO8859(5)
CONV_UCS_TO_ISO8859(6)
CONV_UCS_TO_ISO8859(7)
CONV_UCS_TO_ISO8859(8)
CONV_UCS_TO_ISO8859(9)
CONV_UCS_TO_ISO8859(10)
CONV_UCS_TO_ISO8859(13)
CONV_UCS_TO_ISO8859(14)
CONV_UCS_TO_ISO8859(15)
CONV_UCS_TO_ISO8859(16)

#undef CONV_UCS_TO_ISO8859

};

#endif
012-05-05 Sancho Lerena <slerena@artica.es> * anytermd: Added anyterm to extras. Included modifications on original anytermd project source code. Added a new spec file for centos/fedora/rhel. Tested on FC16/i386 and centos6/x86_84. git-svn-id: https://svn.code.sf.net/p/pandora/code/trunk@6257 c3f86ba8-e40f-0410-aaad-9ba5e7f4b01f 2012-05-04 21:14:27 +02:00			`// iso8859.hh`
			`// This file is part of libpbe; see http://anyterm.org/`
			`// (C) 2008 Philip Endecott`

			`// Distributed under the Boost Software License, Version 1.0:`
			`//`
			`// Permission is hereby granted, free of charge, to any person or organization`
			`// obtaining a copy of the software and accompanying documentation covered by`
			`// this license (the "Software") to use, reproduce, display, distribute,`
			`// execute, and transmit the Software, and to prepare derivative works of the`
			`// Software, and to permit third-parties to whom the Software is furnished to`
			`// do so, all subject to the following:`
			`//`
			`// The copyright notices in the Software and this entire statement, including`
			`// the above license grant, this restriction and the following disclaimer,`
			`// must be included in all copies of the Software, in whole or in part, and`
			`// all derivative works of the Software, unless such copies or derivative`
			`// works are solely in the form of machine-executable object code generated by`
			`// a source language processor.`
			`//`
			`// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR`
			`// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,`
			`// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT`
			`// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE`
			`// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,`
			`// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER`
			`// DEALINGS IN THE SOFTWARE.`

			`#ifndef libpbe_charset_conv_iso8859_hh`
			`#define libpbe_charset_conv_iso8859_hh`

			`#include "charset/charset_t.hh"`
			`#include "charset/char_t.hh"`
			`#include "charset/charset_traits.hh"`
			`#include "charset/char_conv.hh"`

			`#include <boost/cstdint.hpp>`

			`#include <algorithm>`


			`namespace pbe {`

			`// Conversion of iso8859 characters`
			`// --------------------------------`
			`//`
			`// This file implements character conversions to and from the iso8859 character`
			`// sets.`


			`// Conversions to Unicode characters`
			`// ---------------------------------`

			`// All iso8859 characters correspond to "basic multilingual plane" unicode`
			`// characters, i.e. they fit in a 16-bit ucs2 character.`

			`// iso8859-1 is a special case: the first 256 Unicode characters are`
			`// the iso8859-1 characters.`

			`IDENTITY_CHAR_CONV(cs::iso8859_1,cs::ucs2)`
			`IDENTITY_CHAR_CONV(cs::iso8859_1,cs::ucs4)`


			`// For the other character sets, characters 0 to 160 inclusivve map directly to`
			`// Unicode. Tables are used to map characters 161 to 255:`

			`typedef char16_t iso8859_table_t[95]; // character n in [n-161].`
			`extern iso8859_table_t iso8859_tables[15]; // iso8859-n in [n-2].`

			`// These tables are automatically generated from data from unicode.org.`

			`// Some character sets don't use some codes. The sentinel value -1 is stored`
			`// in the tables for these cases. (0xffff is not a valid ucs2 character.)`

			`// In the following, template parameter n is the character set number 2-16.`

			`template <int n>`
			`static inline int iso8859_to_ucs ( char8_t c )`
			`{`
			`int i = static_cast<int>(static_cast<uint8_t>(c));`
			`return (i<161) ? i : iso8859_tables[n-2][i-161];`
			`}`

			`#define CONV_ISO8859_TO_UCS(N) \`
			`template <typename error_policy> \`
			`struct char_conv<cs::iso8859_##N, cs::ucs2, error_policy> { \`
			`char16_t operator() ( char8_t c, \`
			`charset_traits<cs::iso8859_##N>::state_t&, \`
			`charset_traits<cs::ucs2>::state_t& ) \`
			`{ \`
			`return iso8859_to_ucs<N>(c); \`
			`} \`
			`}; \`
			`template <typename error_policy> \`
			`struct char_conv<cs::iso8859_##N, cs::ucs4, error_policy> { \`
			`char32_t operator() ( char8_t c, \`
			`charset_traits<cs::iso8859_##N>::state_t&, \`
			`charset_traits<cs::ucs4>::state_t& ) \`
			`{ \`
			`return iso8859_to_ucs<N>(c); \`
			`} \`
			`};`

			`CONV_ISO8859_TO_UCS(2)`
			`CONV_ISO8859_TO_UCS(3)`
			`CONV_ISO8859_TO_UCS(4)`
			`CONV_ISO8859_TO_UCS(5)`
			`CONV_ISO8859_TO_UCS(6)`
			`CONV_ISO8859_TO_UCS(7)`
			`CONV_ISO8859_TO_UCS(8)`
			`CONV_ISO8859_TO_UCS(9)`
			`CONV_ISO8859_TO_UCS(10)`
			`//CONV_ISO8859_TO_UCS(11) // This is missing from the IANA file`
			`// -11 should be Thai.`
			`// -12 is supposed to be missing; it's the abandoned Devanagari`
			`CONV_ISO8859_TO_UCS(13)`
			`CONV_ISO8859_TO_UCS(14)`
			`CONV_ISO8859_TO_UCS(15)`
			`CONV_ISO8859_TO_UCS(16)`

			`#undef CONV_ISO8859_TO_UCS`


			`// Conversion from Unicode characters`
			`// ----------------------------------`

			`// iso8859-1 is a special case again:`

			`template <typename error_policy>`
			`struct char_conv<cs::ucs2, cs::iso8859_1, error_policy> {`
			`char8_t operator() ( char16_t c,`
			`charset_traits<cs::ucs2>::state_t&,`
			`charset_traits<cs::iso8859_1>::state_t& )`
			`{`
			`if (c>0xff) {`
			`error_policy::no_equivalent();`
			`}`
			`return c;`
			`}`
			`};`

			`template <typename error_policy>`
			`struct char_conv<cs::ucs4, cs::iso8859_1, error_policy> {`
			`char8_t operator() ( char32_t c,`
			`charset_traits<cs::ucs4>::state_t&,`
			`charset_traits<cs::iso8859_1>::state_t& )`
			`{`
			`if (c>0xff) {`
			`error_policy::no_equivalent();`
			`}`
			`return c;`
			`}`
			`};`


			`// For the other cases we use tables.`
			`// A single-level table-driven conversion would require large, sparse`
			`// tables; instead we break the unicode space into pages and have one table`
			`// for each combination of ucs page and iso8859 character set.`
			`// These tables are generated dynamically only as needed by invoking`
			`// the reverse functions above.`


			`template <int n, int page>`
			`static inline const char8_t* mk_ucs_to_iso8859_page_table() {`
			`char8_t* table = new char8_t[256]; // never deleted`
			`std::fill(table,table+256,0); // 0 = no equivalent`
			`for (int c=161; c<256; ++c) {`
			`int unichar = iso8859_to_ucs<n>(c);`
			`if ((unichar>>8) == page) {`
			`table[unichar&0xff] = c;`
			`}`
			`}`
			`return table;`
			`}`

			`template <int n, int page, typename error_policy>`
			`static inline char8_t char_conv_ucs_to_iso8859_lookup ( uint8_t point )`
			`{`
			`/FIXME THREAD SAFE/ static const char8_t* table_p = mk_ucs_to_iso8859_page_table<n,page>();`
			`char8_t c = table_p[point];`
			`if (c==0) {`
			`return error_policy::no_equivalent();`
			`}`
			`return c;`
			`}`

			`template <int n, typename error_policy>`
			`static inline char8_t ucs_to_iso8859 ( int c )`
			`{`
			`if (c<=160) {`
			`return c;`
			`} else {`
			`int page = c>>8;`
			`uint8_t point = c&0xff;`
			`switch (page) {`
			`// These are the only pages that have any characters in any iso8859 character sets.`
			`// FIXME we could use specialisation to consider only those pages that apply to`
			`// a partiuclar character set.`
			`case 0x00: return char_conv_ucs_to_iso8859_lookup<n,0x00,error_policy>(point);`
			`case 0x01: return char_conv_ucs_to_iso8859_lookup<n,0x01,error_policy>(point);`
			`case 0x02: return char_conv_ucs_to_iso8859_lookup<n,0x02,error_policy>(point);`
			`case 0x03: return char_conv_ucs_to_iso8859_lookup<n,0x03,error_policy>(point);`
			`case 0x04: return char_conv_ucs_to_iso8859_lookup<n,0x04,error_policy>(point);`
			`case 0x05: return char_conv_ucs_to_iso8859_lookup<n,0x05,error_policy>(point);`
			`case 0x06: return char_conv_ucs_to_iso8859_lookup<n,0x06,error_policy>(point);`
			`case 0x0e: return char_conv_ucs_to_iso8859_lookup<n,0x0e,error_policy>(point);`
			`case 0x1e: return char_conv_ucs_to_iso8859_lookup<n,0x1e,error_policy>(point);`
			`case 0x20: return char_conv_ucs_to_iso8859_lookup<n,0x20,error_policy>(point);`
			`case 0x21: return char_conv_ucs_to_iso8859_lookup<n,0x21,error_policy>(point);`
			`default: return error_policy::no_equivalent();`
			`}`
			`}`
			`}`


			`#define CONV_UCS_TO_ISO8859(N) \`
			`template <typename error_policy> \`
			`struct char_conv<cs::ucs2, cs::iso8859_##N, error_policy> { \`
			`char8_t operator() ( char16_t c, \`
			`charset_traits<cs::ucs2>::state_t&, \`
			`charset_traits<cs::iso8859_##N>::state_t& ) \`
			`{ \`
			`return ucs_to_iso8859<N,error_policy>(c); \`
			`} \`
			`}; \`
			`template <typename error_policy> \`
			`struct char_conv<cs::ucs4, cs::iso8859_##N, error_policy> { \`
			`char8_t operator() ( char32_t c, \`
			`charset_traits<cs::ucs4>::state_t&, \`
			`charset_traits<cs::iso8859_##N>::state_t& ) \`
			`{ \`
			`return ucs_to_iso8859<N,error_policy>(c); \`
			`} \`
			`};`

			`CONV_UCS_TO_ISO8859(2)`
			`CONV_UCS_TO_ISO8859(3)`
			`CONV_UCS_TO_ISO8859(4)`
			`CONV_UCS_TO_ISO8859(5)`
			`CONV_UCS_TO_ISO8859(6)`
			`CONV_UCS_TO_ISO8859(7)`
			`CONV_UCS_TO_ISO8859(8)`
			`CONV_UCS_TO_ISO8859(9)`
			`CONV_UCS_TO_ISO8859(10)`
			`CONV_UCS_TO_ISO8859(13)`
			`CONV_UCS_TO_ISO8859(14)`
			`CONV_UCS_TO_ISO8859(15)`
			`CONV_UCS_TO_ISO8859(16)`

			`#undef CONV_UCS_TO_ISO8859`

			`};`

			`#endif`