// src/Iconvert.hh // This file is part of libpbe; see http://decimail.org and http://anyterm.org // (C) 2006 Philip Endecott // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. // C++ wrapper around iconv. // This is not thread-safe in the sense that an Iconver object cannot // be used safely from multiple threads; each thread must have its own // object. #ifndef libpbe_Iconver_hh #define libpbe_Iconver_hh #include #include #include #include #include "Exception.hh" #if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__sun__) // Previously __APPLE__ was included in this list; presumably they have // changed their headers. If you have an older system you may need to put // it back. #define ICONV_ARG2_IS_CONST #endif namespace pbe { enum iconv_errmode { reversible, // Throw if an input character cannot be reversibly converted. complete, // Throw if the input is not complete. valid, // Throw if the input is not valid. permissive // Don't throw. }; // TODO there are combinations of these.... // The template parameter char types don't need to be the actual types of the character sets, // since iconv just deals with lumps of bytes; they just need to correspond to the types of the // input and output to operator(). template class Iconver { public: Iconver(std::string from_charset, std::string to_charset) { iconverter = iconv_open(to_charset.c_str(), from_charset.c_str()); if (iconverter==reinterpret_cast(-1)) { throw InvalidCharset(); } } ~Iconver() { int rc = iconv_close(iconverter); if (rc==-1) { // pbe::throw_ErrnoException("iconv_close()"); // Don't throw an exception from a destructor, in case it has been invoked // during exception processing. // (TODO is there a better solution to this?) } } std::basic_string operator()(std::basic_string i) { return operator()(i.data(),i.size()); } std::basic_string operator()(const from_char* i, size_t l) { if (carry.size()) { std::basic_string s = carry; s.append(i,l); carry.clear(); i = s.data(); l = s.size(); } const size_t bytes_in = sizeof(from_char) * l; const size_t obuf_sz = l * 2; // do multiple chunks if necessary const size_t buf_bytes = obuf_sz * sizeof(to_char); boost::scoped_array obuf (new to_char[obuf_sz]); std::basic_string o; #ifdef ICONV_ARG2_IS_CONST const char* ip = reinterpret_cast(i); #else char* ip = reinterpret_cast(const_cast(i)); #endif size_t in = bytes_in; do { char* op = reinterpret_cast(obuf.get()); size_t on = buf_bytes; int rc = iconv(iconverter, &ip, &in, &op, &on); if (rc==-1) { if (errno==E2BIG) { // Output buffer is full. We'll go around the loop again. } else if (errno==EILSEQ) { // An invalid multibyte sequence has been found. if (errmode==permissive) { // Skip the offending character and continue. // (iconv stores any valid converted data from before the error and updates // the pointers correctly in this case.) ip += sizeof(from_char); in -= sizeof(from_char); } else { throw InvalidInput(); } } else if (errno==EINVAL) { // An incomplete multibyte sequence has been found at the end of the input. if (errmode==complete) { throw IncompleteInput(); } else { carry = std::basic_string(reinterpret_cast(ip),in/sizeof(from_char)); in = 0; } } else { pbe::throw_ErrnoException("iconv()"); } } else if (rc>0) { if (errmode==reversible) { throw NotReversible(); } } o += std::basic_string(obuf.get(), (buf_bytes - on)/sizeof(to_char)); } while (in>0); return o; } void flush() { // Caller believes that the input is complete; throws if a multi-byte character is outstanding. if (errmode==permissive) { reset(); } else if (carry.size()) { throw IncompleteInput(); } } void reset() { // Clear any outstanding partial multi-byte character. carry.clear(); } class InvalidCharset: public pbe::StrException { public: InvalidCharset(): pbe::StrException("Invalid character set or unsupported conversion") {} }; class InvalidInput: public pbe::StrException { public: InvalidInput(): pbe::StrException("Invalid input to Iconv") {} }; class IncompleteInput: public pbe::StrException { public: IncompleteInput(): pbe::StrException("Incomplete multi-byte input to Iconv") {} }; class NotReversible: public pbe::StrException { public: NotReversible(): pbe::StrException("Non-reversible input to Iconv") {} }; private: iconv_t iconverter; std::basic_string carry; }; }; #endif