From fe53a4100a553a17f08a0f6f83af90c481877833 Mon Sep 17 00:00:00 2001 From: Manoj Ampalam Date: Sun, 9 Oct 2016 23:29:52 -0700 Subject: [PATCH] fopen Windows wrapper to support reading UTF-8 encoded files --- contrib/win32/win32compat/inc/unistd.h | 2 + contrib/win32/win32compat/inc/w32posix.h | 1 + contrib/win32/win32compat/misc.c | 48 ++++++++++++++++++++++++ 3 files changed, 51 insertions(+) diff --git a/contrib/win32/win32compat/inc/unistd.h b/contrib/win32/win32compat/inc/unistd.h index 3e1a055..d65150c 100644 --- a/contrib/win32/win32compat/inc/unistd.h +++ b/contrib/win32/win32compat/inc/unistd.h @@ -35,6 +35,8 @@ #define getdtablesize() MAX_FDS +#define fopen w32_fopen_utf8 + /* Compatibility header to avoid lots of #ifdefs in includes.h on Win32 */ #include diff --git a/contrib/win32/win32compat/inc/w32posix.h b/contrib/win32/win32compat/inc/w32posix.h index 4fc91df..78a31fd 100644 --- a/contrib/win32/win32compat/inc/w32posix.h +++ b/contrib/win32/win32compat/inc/w32posix.h @@ -69,6 +69,7 @@ sighandler_t w32_signal(int signum, sighandler_t handler); int w32_sigprocmask(int how, const sigset_t *set, sigset_t *oldset); int w32_raise(int sig); int w32_kill(int pid, int sig); +FILE* w32_fopen_utf8(const char *, const char *); /* Shutdown constants */ diff --git a/contrib/win32/win32compat/misc.c b/contrib/win32/win32compat/misc.c index 038bf80..ec6c109 100644 --- a/contrib/win32/win32compat/misc.c +++ b/contrib/win32/win32compat/misc.c @@ -1,4 +1,5 @@ #include +#include #include "inc\defs.h" #include "inc\sys\statvfs.h" @@ -69,4 +70,51 @@ int dlclose(HMODULE handle) { FARPROC dlsym(HMODULE handle, const char *symbol) { return GetProcAddress(handle, symbol); +} + + +/*fopen on Windows to mimic https://linux.die.net/man/3/fopen +* only r, w, a are supported for now +*/ +FILE* +w32_fopen_utf8(const char *path, const char *mode) { + wchar_t wpath[MAX_PATH], wmode[5]; + FILE* f; + char utf8_bom[] = { 0xEF,0xBB,0xBF }; + char first3_bytes[3]; + + if (mode[1] != '\0') { + errno = ENOTSUP; + return NULL; + } + + if (MultiByteToWideChar(CP_UTF8, 0, path, -1, wpath, MAX_PATH) == 0 || + MultiByteToWideChar(CP_UTF8, 0, mode, -1, wmode, 5) == 0) { + errno = EFAULT; + debug("WideCharToMultiByte failed for %c - ERROR:%d", path, GetLastError()); + return NULL; + } + + f = _wfopen(wpath, wmode); + + if (f) { + /* BOM adjustments for file streams*/ + if (mode[0] == 'w' && fseek(f, 0, SEEK_SET) != EBADF) { + /* write UTF-8 BOM - should we ?*/ + /*if (fwrite(utf8_bom, sizeof(utf8_bom), 1, f) != 1) { + fclose(f); + return NULL; + }*/ + + } + else if (mode[0] == 'r' && fseek(f, 0, SEEK_SET) != EBADF) { + /* read out UTF-8 BOM if present*/ + if (fread(first3_bytes, 3, 1, f) != 1 || + memcmp(first3_bytes, utf8_bom, 3) != 0) { + fseek(f, 0, SEEK_SET); + } + } + } + + return f; } \ No newline at end of file