/*	$NetBSD: conv.c,v 1.11 2019/10/24 18:17:14 kamil Exp $ */
/*-
 * Copyright (c) 1993, 1994
 *	The Regents of the University of California.  All rights reserved.
 * Copyright (c) 1993, 1994, 1995, 1996
 *	Keith Bostic.  All rights reserved.
 *
 * See the LICENSE file for redistribution information.
 */

#include "config.h"

#include <sys/cdefs.h>
#if 0
#ifndef lint
static const char sccsid[] = "Id: conv.c,v 1.27 2001/08/18 21:41:41 skimo Exp  (Berkeley) Date: 2001/08/18 21:41:41 ";
#endif /* not lint */
#else
__RCSID("$NetBSD: conv.c,v 1.11 2019/10/24 18:17:14 kamil Exp $");
#endif

#include <sys/types.h>
#include <sys/queue.h>
#include <sys/time.h>

#include <bitstring.h>
#include <errno.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#include "common.h"

#if defined(USE_WIDECHAR) && defined(USE_ICONV)
#include <langinfo.h>
#include <iconv.h>

#define LANGCODESET	nl_langinfo(CODESET)
#else
#define LANGCODESET	""
#endif

#include <locale.h>

#ifdef USE_WIDECHAR
#ifdef USE_ICONV
static int 
raw2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen,
	const CHAR_T **dst)
{
    int i;
    CHAR_T **tostr = (CHAR_T **)(void *)&cw->bp1;
    size_t  *blen = &cw->blen1;

    BINC_RETW(NULL, *tostr, *blen, len);

    *tolen = len;
    for (i = 0; i < len; ++i) {
	CHAR_T w = (u_char)str[i];
	memcpy((*tostr) + i, &w, sizeof(**tostr));
    }

    *dst = cw->bp1;

    return 0;
}
#endif

#ifndef ERROR_ON_CONVERT
#define HANDLE_ICONV_ERROR(o, i, ol, il) do {				\
		*o++ = *i++;						\
		ol--; il--;						\
	} while (/*CONSTCOND*/0)
#define HANDLE_MBR_ERROR(n, mbs, d, s) do {				\
		d = s;							\
		MEMSET(&mbs, 0, 1); 					\
		n = 1; 							\
	} while (/*CONSTCOND*/0)
#else
#define HANDLE_ICONV_ERROR goto err
#define	HANDLE_MBR_ERROR goto err
#endif

#define CONV_BUFFER_SIZE    512
/* fill the buffer with codeset encoding of string pointed to by str
 * left has the number of bytes left in str and is adjusted
 * len contains the number of bytes put in the buffer
 */
#ifdef USE_ICONV
#define CONVERT(str, left, src, len)				    	\
    do {								\
	size_t outleft;							\
	char *bp = buffer;						\
	outleft = CONV_BUFFER_SIZE;					\
	errno = 0;							\
	if (iconv(id, (char **)(void *)&str, &left, &bp, &outleft) 	\
	    == (size_t)-1 && errno != E2BIG)				\
		HANDLE_ICONV_ERROR(bp, str, outleft, left);		\
	if ((len = CONV_BUFFER_SIZE - outleft) == 0) {			\
	    error = -left;						\
	    goto err;							\
	}				    				\
	src = buffer;							\
    } while (0)
#endif

static int 
default_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, 
		size_t *tolen, const CHAR_T **dst, const char *enc)
{
    int j;
    size_t i = 0;
    CHAR_T **tostr = (CHAR_T **)(void *)&cw->bp1;
    size_t  *blen = &cw->blen1;
    mbstate_t mbs;
    size_t   n;
    ssize_t  nlen = len;
    const char *src = (const char *)str;
    int		error = 1;
#ifdef USE_ICONV
    iconv_t	id = (iconv_t)-1;
    char	buffer[CONV_BUFFER_SIZE];
    size_t	left = len;
#endif

    MEMSET(&mbs, 0, 1);
    BINC_RETW(NULL, *tostr, *blen, nlen);

#ifdef USE_ICONV
    if (strcmp(nl_langinfo(CODESET), enc)) {
	id = iconv_open(nl_langinfo(CODESET), enc);
	if (id == (iconv_t)-1)
	    goto err;
	CONVERT(str, left, src, len);
    }
#endif

    for (i = 0, j = 0; j < len; ) {
	CHAR_T w;
	n = mbrtowc(&w, src + j, len - j, &mbs);
	memcpy((*tostr) + i, &w, sizeof(**tostr));
	/* NULL character converted */
	if (n == (size_t)-2) error = -(len - j);
	if (n == (size_t)-1 || n == (size_t)-2) {
	    HANDLE_MBR_ERROR(n, mbs, w, src[j]); 
	    memcpy((*tostr) + i, &w, sizeof(**tostr));
	}
	if (n == 0) n = 1;
	j += n;
	if (++i >= *blen) {
	    nlen += 256;
	    BINC_GOTOW(NULL, *tostr, *blen, nlen);
	}
#ifdef USE_ICONV
	if (id != (iconv_t)-1 && j == len && left) {
	    CONVERT(str, left, src, len);
	    j = 0;
	}
#endif
    }
    *tolen = i;

#ifdef USE_ICONV
    if (id != (iconv_t)-1)
	iconv_close(id);
#endif

    *dst = cw->bp1;

    return 0;
alloc_err:
#ifdef USE_ICONV
err:
    if (id != (iconv_t)-1)
	iconv_close(id);
#endif
    *tolen = i;
    *dst = cw->bp1;

    return error;
}

static int 
fe_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, 
	    size_t *tolen, const CHAR_T **dst)
{
    return default_char2int(sp, str, len, cw, tolen, dst, O_STR(sp, O_FILEENCODING));
}

static int 
ie_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, 
	    size_t *tolen, const CHAR_T **dst)
{
    return default_char2int(sp, str, len, cw, tolen, dst, O_STR(sp, O_INPUTENCODING));
}

static int 
cs_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, 
	    size_t *tolen, const CHAR_T **dst)
{
    return default_char2int(sp, str, len, cw, tolen, dst, LANGCODESET);
}

#ifdef USE_ICONV
static int 
CHAR_T_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 
	size_t *tolen, const char **dst)
{
    *tolen = len * sizeof(CHAR_T);
    *dst = (const char *)(const void *)str;

    return 0;
}

static int 
CHAR_T_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, 
	size_t *tolen, const CHAR_T **dst)
{
    *tolen = len / sizeof(CHAR_T);
    *dst = (const CHAR_T*) str;

    return 0;
}

static int 
int2raw(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, size_t *tolen,
	const char **dst)
{
    int i;
    char **tostr = (char **)(void *)&cw->bp1;
    size_t  *blen = &cw->blen1;

    BINC_RETC(NULL, *tostr, *blen, len);

    *tolen = len;
    for (i = 0; i < len; ++i) {
	CHAR_T w;
	memcpy(&w, str + i, sizeof(w));
	(*tostr)[i] = w;
    }

    *dst = cw->bp1;

    return 0;
}
#endif

static int 
default_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 
		size_t *tolen, const char **pdst, const char *enc)
{
    size_t i, j = 0;
    char **tostr = (char **)(void *)&cw->bp1;
    size_t  *blen = &cw->blen1;
    mbstate_t mbs;
    size_t n;
    ssize_t  nlen = len + MB_CUR_MAX;
    char *dst;
    size_t buflen;
#ifdef USE_ICONV
    int		offset = 0;
    char	buffer[CONV_BUFFER_SIZE];
    iconv_t	id = (iconv_t)-1;
#endif

/* convert first len bytes of buffer and append it to cw->bp
 * len is adjusted => 0
 * offset contains the offset in cw->bp and is adjusted
 * cw->bp is grown as required
 */
#ifdef USE_ICONV
#define CONVERT2(_buffer, lenp, cw, offset)				\
    do {								\
	const char *bp = _buffer;					\
	size_t ret;							\
	do {								\
	    size_t outleft = cw->blen1 - offset;			\
	    char *obp = (char *)cw->bp1 + offset;		    	\
	    if (cw->blen1 < offset + MB_CUR_MAX) {		    	\
		nlen += 256;						\
		BINC_GOTOC(NULL, cw->bp1, cw->blen1, nlen);		\
	    }						    		\
	    errno = 0;						    	\
	    ret = iconv(id, (char **)(void *)&bp, lenp, &obp, &outleft);\
	    if (ret == (size_t)-1 && errno != E2BIG) 			\
		    HANDLE_ICONV_ERROR(obp, bp, outleft, len);		\
	    offset = cw->blen1 - outleft;			        \
	} while (ret != 0);					        \
    } while (0)
#endif

    MEMSET(&mbs, 0, 1);
    BINC_RETC(NULL, *tostr, *blen, nlen);
    dst = *tostr; buflen = *blen;

#ifdef USE_ICONV
    if (strcmp(nl_langinfo(CODESET), enc)) {
	id = iconv_open(enc, nl_langinfo(CODESET));
	if (id == (iconv_t)-1)
	    goto err;
	dst = buffer; buflen = CONV_BUFFER_SIZE;
    }
#endif

    for (i = 0, j = 0; i < (size_t)len; ++i) {
	CHAR_T w;
	memcpy(&w, str + i, sizeof(w));
	n = wcrtomb(dst + j, w, &mbs);
	if (n == (size_t)-1) 
	   HANDLE_MBR_ERROR(n, mbs, dst[j], w);
	j += n;
	if (buflen < j + MB_CUR_MAX) {
#ifdef USE_ICONV
	    if (id != (iconv_t)-1) {
		CONVERT2(buffer, &j, cw, offset);
	    } else
#endif
	    {
		nlen += 256;
		BINC_RETC(NULL, *tostr, *blen, nlen);
		dst = *tostr; buflen = *blen;
	    }
	}
    }

    n = wcrtomb(dst + j, L'\0', &mbs);
    j += n - 1;				/* don't count NUL at the end */
    *tolen = j;

#ifdef USE_ICONV
    if (id != (iconv_t)-1) {
	CONVERT2(buffer, &j, cw, offset);
	CONVERT2(NULL, NULL, cw, offset);  /* back to the initial state */
	*tolen = offset;
	iconv_close(id);
    }
#endif

    *pdst = cw->bp1;

    return 0;
#ifdef USE_ICONV
alloc_err:
err:
    if (id != (iconv_t)-1)
	iconv_close(id);
    *tolen = j;
    *pdst = cw->bp1;

    return 1;
#endif
}

static int 
fe_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 
	    size_t *tolen, const char **dst)
{
    return default_int2char(sp, str, len, cw, tolen, dst, O_STR(sp, O_FILEENCODING));
}

static int 
cs_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 
	    size_t *tolen, const char **dst)
{
    return default_int2char(sp, str, len, cw, tolen, dst, LANGCODESET);
}

#endif


void
conv_init (SCR *orig, SCR *sp)
{
    if (orig != NULL)
	MEMCPY(&sp->conv, &orig->conv, 1);
    else {
	setlocale(LC_ALL, "");
#ifdef USE_WIDECHAR
	sp->conv.sys2int = cs_char2int;
	sp->conv.int2sys = cs_int2char;
	sp->conv.file2int = fe_char2int;
	sp->conv.int2file = fe_int2char;
	sp->conv.input2int = ie_char2int;
#ifdef USE_ICONV
	o_set(sp, O_FILEENCODING, OS_STRDUP, nl_langinfo(CODESET), 0);
	o_set(sp, O_INPUTENCODING, OS_STRDUP, nl_langinfo(CODESET), 0);
#endif
#endif
    }
}

int
conv_enc (SCR *sp, int option, const char *enc)
{
#if defined(USE_WIDECHAR) && defined(USE_ICONV)
    iconv_t id;
    char2wchar_t    *c2w;
    wchar2char_t    *w2c;

    switch (option) {
    case O_FILEENCODING:
	c2w = &sp->conv.file2int;
	w2c = &sp->conv.int2file;
	break;
    case O_INPUTENCODING:
	c2w = &sp->conv.input2int;
	w2c = NULL;
	break;
    default:
	c2w = NULL;
	w2c = NULL;
	break;
    }

    if (!*enc) {
	if (c2w) *c2w = raw2int;
	if (w2c) *w2c = int2raw;
	return 0;
    }

    if (!strcmp(enc, "WCHAR_T")) {
	if (c2w) *c2w = CHAR_T_char2int;
	if (w2c) *w2c = CHAR_T_int2char;
	return 0;
    }

    id = iconv_open(enc, nl_langinfo(CODESET));
    if (id == (iconv_t)-1)
	goto err;
    iconv_close(id);
    id = iconv_open(nl_langinfo(CODESET), enc);
    if (id == (iconv_t)-1)
	goto err;
    iconv_close(id);

    switch (option) {
    case O_FILEENCODING:
	*c2w = fe_char2int;
	*w2c = fe_int2char;
	break;
    case O_INPUTENCODING:
	*c2w = ie_char2int;
	break;
    }

    F_CLR(sp, SC_CONV_ERROR);
    F_SET(sp, SC_SCR_REFORMAT);

    return 0;
err:
    switch (option) {
    case O_FILEENCODING:
	msgq(sp, M_ERR,
	    "321|File encoding conversion not supported");
	break;
    case O_INPUTENCODING:
	msgq(sp, M_ERR,
	    "322|Input encoding conversion not supported");
	break;
    }
#endif
    return 1;
}