//**************************************************************************
//*                     This file is part of the                           *
//*                      Mpxplay - audio player.                           *
//*                  The source code of Mpxplay is                         *
//*        (C) copyright 1998-2020 by PDSoft (Attila Padar)                *
//*                http://mpxplay.sourceforge.net                          *
//*                  email: mpxplay@freemail.hu                            *
//**************************************************************************
//*  This program is distributed in the hope that it will be useful,       *
//*  but WITHOUT ANY WARRANTY; without even the implied warranty of        *
//*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.                  *
//*  Please contact with the author (with me) if you want to use           *
//*  or modify this source.                                                *
//**************************************************************************
//function:text (tag) conversion : codepage to codepage and UTF-8/16 decoding

//#define MPXPLAY_USE_DEBUGF 1
#define MPXPLAY_DEBUG_OUTPUT stdout

#include <malloc.h>
#include "mpxplay.h"
#include "charmaps.h"

#ifdef MPXPLAY_GUI_CONSOLE
extern unsigned int id3textconv,textscreen_console_codepage;
extern char cp_winchars[256],cp_doschars[256];
#endif

// A lot of elements in the table(s) are same with the US-ASCII
// We begin the table at the first different element (cp_maps[].begin)

#pragma pack(push,1)
static const struct cp_map_s{
  const char *cp_name;
  const unsigned short cp_id_num; // probably win32 only
  const unsigned short *map;
  unsigned short begin;
} textconv_codepage_maps[] = {
 {"ISO-8859-2", 28592, mapping_iso_8859_2  , 161 }, // the default source cp
 {"CP437",        437, mapping_unicode_cp437,  0 }, // the default target cp (in non UTF8 version)
 {"ISO-8859-1", 28591, NULL                , 256 },
 {"ISO-8859-3", 28593, mapping_iso_8859_3  , 161 },
 {"ISO-8859-4", 28594, mapping_iso_8859_4  , 161 },
 {"ISO-8859-5", 28595, mapping_iso_8859_5  , 161 },
 {"ISO-8859-6", 28596, mapping_iso_8859_6  , 172 },
 {"ISO-8859-7", 28597, mapping_iso_8859_7  , 161 },
 {"ISO-8859-8", 28598, mapping_iso_8859_8  , 170 },
 {"ISO-8859-9", 28599, mapping_iso_8859_9  , 208 },
 {"ISO-8859-10",28600, mapping_iso_8859_10 , 161 },
 {"ISO-8859-11",28601, mapping_iso_8859_11 , 161 },
 {"ISO-8859-13",28603, mapping_iso_8859_13 , 161 },
 {"ISO-8859-14",28604, mapping_iso_8859_14 , 161 },
 {"ISO-8859-15",28605, mapping_iso_8859_15 , 164 },
 {"ISO-8859-16",28606, mapping_iso_8859_16 , 161 },
 {"CP424",      20424, mapping_unicode_cp424 ,   4 },
 {"CP720",        720, mapping_unicode_cp720 , 130 },
 {"CP737",        737, mapping_unicode_cp737 , 128 },
 {"CP775",        775, mapping_unicode_cp775 , 128 },
 {"CP850",        850, mapping_unicode_cp850 , 128 },
 {"CP852",        852, mapping_unicode_cp852 , 128 },
 {"CP855",        855, mapping_unicode_cp855 , 128 },
 {"CP856",        856, mapping_unicode_cp856 , 128 }, // ???
 {"CP857",        857, mapping_unicode_cp857 , 128 },
 {"CP858",        858, mapping_unicode_cp858 , 128 },
 {"CP860",        860, mapping_unicode_cp860 , 128 },
 {"CP861",        861, mapping_unicode_cp861 , 128 },
 {"CP862",        862, mapping_unicode_cp862 , 128 },
 {"CP863",        863, mapping_unicode_cp863 , 128 },
 {"CP864",        864, mapping_unicode_cp864 ,  37 },
 {"CP865",        865, mapping_unicode_cp865 , 128 },
 {"CP866",        866, mapping_unicode_cp866 , 128 },
 {"CP869",        869, mapping_unicode_cp869 , 128 },
 {"CP874",        874, mapping_unicode_cp874 , 128 },
 {"CP903",        903, NULL                  , 256 },
 {"CP932",        932, mapping_unicode_cp932 , 161 },
 {"CP936",        936, NULL                  , 256 }, // TODO: this codepage is not complete on this way, it contains 2 byte codes too
 {"CP949",        949, NULL                  , 256 }, // TODO: this codepage is not complete on this way, it contains 2 byte codes too
 {"CP950",        950, NULL                  , 256 },
 {"CP1006",      1006, mapping_unicode_cp1006, 161 }, // ???
 {"CP1250",      1250, mapping_unicode_cp1250, 128 },
 {"CP1251",      1251, mapping_unicode_cp1251, 128 },
 {"CP1252",      1252, mapping_unicode_cp1252, 128 },
 {"CP1253",      1253, mapping_unicode_cp1253, 128 },
 {"CP1254",      1254, mapping_unicode_cp1254, 128 },
 {"CP1255",      1255, mapping_unicode_cp1255, 128 },
 {"CP1256",      1256, mapping_unicode_cp1256, 128 },
 {"CP1257",      1257, mapping_unicode_cp1257, 128 },
 {"CP1258",      1258, mapping_unicode_cp1258, 128 },
 {NULL,0,NULL,0}
};

static const struct textconv_name_to_type_s{
	const char *textconv_name;
	const mpxp_uint8_t textconv_type;
}textconv_name_to_types[] = {
 {"UTF-8",    MPXPLAY_TEXTCONV_TYPE_UTF8},
 {"UCS-2LE",  MPXPLAY_TEXTCONV_TYPE_UTF16LE},
 {"UCS-2BE",  MPXPLAY_TEXTCONV_TYPE_UTF16BE},
 {"UTF-16LE", MPXPLAY_TEXTCONV_TYPE_UTF16LE}, // TODO: MPXPLAY_TEXTCONV_TYPE_UTF16 is handled as UCS-2 only (Byte Order Mark and Surrogate Pairs are not handled)
 {"UTF-16BE", MPXPLAY_TEXTCONV_TYPE_UTF16BE},
 {NULL, 0}
};

static const struct html_to_unicode_s{
 const char *html_str;
 const mpxp_uint16_t unicode;
}html_to_unicodes[] = {
 {"amp;",  '&' },
 {"apos;", '\'' },
 {"gt;",   '>' },
 {"lt;",   '<' },
 {"quot;", '\"' },
 {NULL,0}
};

static const unsigned char textconv_convtypes_datasizes[MPXPLAY_TEXTCONV_TYPE_MAX+1]=
{1,1,2,2
#ifndef MPXPLAY_UTF8
,1,1
#endif
};

#pragma pack(pop)

char *textconv_codepage_sourcename,*textconv_codepage_targetname;
static const struct cp_map_s *textconv_cpsource_map,*textconv_cptarget_map;
static unsigned char *textconv_table_unicode_to_char;
#ifdef MPXPLAY_UTF8
static struct cp_map_s textconv_cpmap_generated;
static mpxp_uint16_t *mapping_unicode_generated;
#else
static unsigned char *textconv_table_cp_to_cp_IN;
static unsigned char *textconv_table_cp_to_cp_OUT;
#endif

static const struct cp_map_s *textconv_select_mapping_by_name(char *codepage_name)
{
	const struct cp_map_s *mapp = &textconv_codepage_maps[0];
	const struct cp_map_s *targetmap = NULL;

	if(!codepage_name || !codepage_name[0])
		return targetmap;

	do{
		if(pds_stricmp((char *)mapp->cp_name, codepage_name) == 0)
		{
			targetmap = mapp;
			break;
		}
		mapp++;
	}while(mapp->cp_name);

	return targetmap;
}

static mpxp_uint8_t textconv_select_textconvtype_by_name(char *conv_name)
{
	mpxp_uint8_t textconv_type = MPXPLAY_TEXTCONV_TYPES_PUT(MPXPLAY_TEXTCONV_TYPE_CHAR, MPXPLAY_TEXTCONV_TYPE_MPXPLAY);
	const struct textconv_name_to_type_s *convname_to_types = &textconv_name_to_types[0];

	if(!conv_name || !conv_name[0])
		return textconv_type;

	do{
		if(pds_stricmp((char *)convname_to_types->textconv_name, conv_name) == 0)
		{
			textconv_type = MPXPLAY_TEXTCONV_TYPES_PUT(convname_to_types->textconv_type, MPXPLAY_TEXTCONV_TYPE_MPXPLAY);
			break;
		}
		convname_to_types++;
	}while(convname_to_types->textconv_name);

	return textconv_type;
}

#ifdef MPXPLAY_WIN32

#define WIN32_LEAN_AND_MEAN 1
#include <windows.h>

static unsigned int textconv_cpmaps_ok;

static const struct cp_map_s *textconv_select_mapping_by_id(unsigned int id)
{
 const struct cp_map_s *targetmap=NULL;
 if(id){
  const struct cp_map_s *mapp=&textconv_codepage_maps[0];
  while(mapp->cp_id_num){
   if(mapp->cp_id_num==id){
    targetmap=mapp;
    break;
   }
   mapp++;
  }
 }
 return (targetmap);
}

static void textconv_cpmaps_init(void)
{
 if(textconv_cpmaps_ok)
  return;
 textconv_cpmaps_ok=1;

 textconv_cpsource_map=textconv_select_mapping_by_name(textconv_codepage_sourcename);
 textconv_cptarget_map=textconv_select_mapping_by_name(textconv_codepage_targetname);
#ifdef MPXPLAY_UTF8
 if(!textconv_cpsource_map){
  textconv_cpsource_map=textconv_select_mapping_by_id(GetACP());
  if(!textconv_cpsource_map){
   mpxp_uint16_t *mug;
   char asciistr[260];
   mapping_unicode_generated=mug=(mpxp_uint16_t*)pds_calloc(1,(256-32+8)*sizeof(mpxp_uint16_t));
   if(mug){
    unsigned int i;
    for(i=32;i<256;i++)
     asciistr[i]=i;
    asciistr[256]=0;
    for(i=32;i<256;){
     unsigned int len=MultiByteToWideChar(CP_ACP,0,(const char *)&asciistr[i],-1,(wchar_t *)mug,256+4-i);
     //fprintf(stdout,"%d len:%d   ",i,len);
     if(len){
      mug+=len;
      i+=len;
     }else{
      *mug++=(mpxp_uint16_t)('_');
      i++;
     }
    }
    //mug=mapping_unicode_generated;
    //for(i=32;i<256;i++,mug++)
    // fprintf(stdout,"| %3d %c %4.4X    ",i,i,mug[0]);
    textconv_cpmap_generated.map=mapping_unicode_generated;
    textconv_cpmap_generated.begin=32;
    textconv_cpsource_map=&textconv_cpmap_generated;
   }else
    textconv_cpsource_map=&textconv_codepage_maps[0];
  }
 }
 #ifdef MPXPLAY_GUI_CONSOLE
 if(textconv_cptarget_map)
  textscreen_console_codepage=textconv_cptarget_map->cp_id_num;
 #endif
#else
 if(!textconv_cpsource_map){
  if(funcbit_test(id3textconv,ID3TEXTCONV_GET_WINCP))
   textconv_cpsource_map=textconv_select_mapping_by_id(GetACP());
  if(!textconv_cpsource_map)
   textconv_cpsource_map=&cp_maps[0];
 }
 if(!textconv_cptarget_map){
  if(funcbit_test(id3textconv,ID3TEXTCONV_GET_WINCP))
   textconv_cptarget_map=textconv_select_mapping_by_id(GetOEMCP());
  if(!textconv_cptarget_map)
   textconv_cptarget_map=&cp_maps[1];
 }
 textscreen_console_codepage=textconv_cptarget_map->cp_id_num;
 if(funcbit_test(id3textconv,ID3TEXTCONV_CODEPAGE)) // at -8
  textconv_cptarget_map=NULL; // WinChars/DosChars are used
 else if(textconv_cptarget_map==textconv_cpsource_map)
  textconv_cptarget_map=NULL;
#endif
}

#else

static void textconv_cpmaps_init(void)
{
 if(!textconv_cpsource_map){
  textconv_cpsource_map=textconv_select_mapping_by_name(textconv_codepage_sourcename);
  if(!textconv_cpsource_map)
   textconv_cpsource_map=&textconv_codepage_maps[0];
  if(!funcbit_test(id3textconv,ID3TEXTCONV_CODEPAGE)){
   textconv_cptarget_map=textconv_select_mapping_by_name(textconv_codepage_targetname);
   if(!textconv_cptarget_map)
    textconv_cptarget_map=&textconv_codepage_maps[1];
  }
 }
}

#endif

//---------------------------------------------------------------------
// text decoding side

#ifndef MPXPLAY_UTF8
// init codepage conversion (WinChars to DosChars or -8ucp to -8ccp)
static unsigned int playlist_textconv_init_codepage_IN(void)
{
 unsigned int i,j;
 const struct cp_map_s *sourcemap,*targetmap;
 const unsigned short *srmap;

 if(textconv_table_cp_to_cp_IN)
  return 1;

 textconv_table_cp_to_cp_IN=pds_malloc(256);
 if(!textconv_table_cp_to_cp_IN)
  return 0;
 for(i=0;i<32;i++)
  textconv_table_cp_to_cp_IN[i]=32;
 for(i=32;i<256;i++)
  textconv_table_cp_to_cp_IN[i]=i;

 if(funcbit_test(id3textconv,ID3TEXTCONV_CODEPAGE)){ // WinChars/DosChars based codepage conversion
  for(i=0;i<pds_strlen(cp_winchars);i++)
   textconv_table_cp_to_cp_IN[cp_winchars[i]]=cp_doschars[i];
 }else{      // -8cup to -8ccp codepage conversion
  targetmap=textconv_cptarget_map;
  if(!targetmap)
   return 0;
  sourcemap=textconv_cpsource_map;
  if(!sourcemap)
   return 0;
  srmap=sourcemap->map;
  for(i=0;i<(256-sourcemap->begin);i++,srmap++){
   const unsigned short *tm=targetmap->map,srcunicode=*srmap;
   for(j=0;j<(256-targetmap->begin);j++,tm++){
    if(*tm==srcunicode){
     textconv_table_cp_to_cp_IN[sourcemap->begin+i]=targetmap->begin+j;
     break;
    }
   }
  }
  funcbit_enable(id3textconv,ID3TEXTCONV_CODEPAGE);
 }

 return 1;
}
#endif

// init unicode (UTF16,UTF8) to CPNNN conversion
static unsigned int playlist_textconv_init_unicode_IN(void)
{
 const unsigned short *srcmap_map;
 unsigned int i,srcmap_begin;

 if(textconv_table_unicode_to_char)
  return 1;
 textconv_cpmaps_init();

 textconv_table_unicode_to_char=pds_malloc(65536*sizeof(*textconv_table_unicode_to_char));
 if(!textconv_table_unicode_to_char)
  return 0;

 pds_memset(textconv_table_unicode_to_char,'_',65536*sizeof(*textconv_table_unicode_to_char));

 srcmap_map=textconv_cpsource_map->map;
 srcmap_begin=textconv_cpsource_map->begin;

 for(i=0;i<srcmap_begin;i++)
  textconv_table_unicode_to_char[i]=i;

 for(   ;i<256;i++)
#ifndef MPXPLAY_UTF8
  if(!funcbit_test(id3textconv,ID3TEXTCONV_VALIDATE) || (pds_strchr(cp_winchars,(int)i)))   // to avoid invalid UTF-8 decodings
#endif
   textconv_table_unicode_to_char[srcmap_map[i-srcmap_begin]]=i;

 return 1;
}

void mpxplay_playlist_textconv_init(void)
{
 textconv_cpmaps_init();
#ifdef MPXPLAY_UTF8
 playlist_textconv_init_unicode_IN();
#else
 if(!playlist_textconv_init_codepage_IN())
  funcbit_disable(id3textconv,ID3TEXTCONV_CODEPAGE);
 if(!playlist_textconv_init_unicode_IN())
  funcbit_disable(id3textconv,ID3TEXTCONV_UTF_ALL);
#endif
}

void mpxplay_playlist_textconv_close(void)
{
 if(textconv_table_unicode_to_char){
  pds_free(textconv_table_unicode_to_char);
  textconv_table_unicode_to_char=NULL;
 }
#ifdef MPXPLAY_UTF8
 if(mapping_unicode_generated){
  pds_free(mapping_unicode_generated);
  mapping_unicode_generated=NULL;
 }
#else
 if(textconv_table_cp_to_cp_IN){
  pds_free(textconv_table_cp_to_cp_IN);
  textconv_table_cp_to_cp_IN=NULL;
 }
 if(textconv_table_cp_to_cp_OUT){ // close of encoding side
  pds_free(textconv_table_cp_to_cp_OUT);
  textconv_table_cp_to_cp_OUT=NULL;
 }
#endif
}

#ifndef MPXPLAY_UTF8
static unsigned int playlist_textconv_init_codepage_OUT(void)
{
 unsigned int i,j;
 const struct cp_map_s *sourcemap,*targetmap;
 const unsigned short *tgmap;

 if(textconv_table_cp_to_cp_OUT)
  return 1;
 if((!textconv_cpsource_map || !textconv_cptarget_map) && !funcbit_test(id3textconv,ID3TEXTCONV_CODEPAGE))
  return 0;
 textconv_table_cp_to_cp_OUT=pds_malloc(256);
 if(!textconv_table_cp_to_cp_OUT)
  return 0;
 for(i=0;i<32;i++)
  textconv_table_cp_to_cp_OUT[i]=32;
 for(i=32;i<256;i++)
  textconv_table_cp_to_cp_OUT[i]=i;

 if(funcbit_test(id3textconv,ID3TEXTCONV_CODEPAGE)){
  for(i=0;i<pds_strlen(cp_doschars);i++){
   unsigned char d=cp_doschars[i];
   if(!(d>='a' && d<='z') && !(d>='A' && d<='Z') && !(d>='0' && d<='9')) // ??? (else the converting back may be wrong)
    textconv_table_cp_to_cp_OUT[cp_doschars[i]]=cp_winchars[i];
  }
 }else{
  sourcemap=textconv_cpsource_map;
  targetmap=textconv_cptarget_map;
  tgmap=targetmap->map;
  for(i=0;i<(256-targetmap->begin);i++,tgmap++){
   const unsigned short *sm=sourcemap->map,tgunicode=*tgmap;
   for(j=0;j<(256-sourcemap->begin);j++,sm++){
    if(*sm==tgunicode){
     textconv_table_cp_to_cp_OUT[targetmap->begin+i]=sourcemap->begin+j;
     break;
    }
   }
  }
 }
 return 1;
}
#endif

static int playlist_textconv_by_sourcemap(const struct cp_map_s *sourcemap, mpxp_uint32_t convtype,char *src_string,int src_len,char *dest_string,unsigned int dest_buflen)
{
 unsigned int src_texttype=MPXPLAY_TEXTCONV_TYPES_GET_SRC(convtype);
 unsigned int dest_texttype=MPXPLAY_TEXTCONV_TYPES_GET_DEST(convtype);
 unsigned int src_htmltype;
 unsigned int index_in=0,index_out=0,src_datasize,dest_datasize=2,i;
 unsigned int src_utf=1,dest_utf=0;
 unsigned char *tmpbuf=NULL;

 if(!convtype || !dest_string || (dest_buflen<2))
  return index_out;

 if(!src_string || (src_len==0)){
  if(src_string==dest_string)
   return index_out;
  goto conv_out_finish;
 }

 switch(src_texttype){
  case MPXPLAY_TEXTCONV_TYPE_MPXPLAY:
#ifdef MPXPLAY_UTF8
   src_texttype=MPXPLAY_TEXTCONV_TYPE_UTF8;
#else
   if(funcbit_test(id3textconv,ID3TEXTCONV_CODEPAGE))
    src_texttype=MPXPLAY_TEXTCONV_TYPE_CP_OUT;
   else
    src_texttype=MPXPLAY_TEXTCONV_TYPE_CHAR;
#endif
   break;
 }

 switch(dest_texttype){
  case MPXPLAY_TEXTCONV_TYPE_MPXPLAY:
#ifdef MPXPLAY_UTF8
   dest_texttype=MPXPLAY_TEXTCONV_TYPE_UTF8;
#else
   if(funcbit_test(id3textconv,ID3TEXTCONV_CODEPAGE))
    dest_texttype=MPXPLAY_TEXTCONV_TYPE_CP_IN;
   else
    dest_texttype=MPXPLAY_TEXTCONV_TYPE_CHAR;
#endif
   break;
 }

 if((src_texttype>MPXPLAY_TEXTCONV_TYPE_MAX) || (dest_texttype>MPXPLAY_TEXTCONV_TYPE_MAX))
  return index_out;

 src_datasize=textconv_convtypes_datasizes[src_texttype];
 dest_datasize=textconv_convtypes_datasizes[dest_texttype];

 if((src_string==dest_string) && ((dest_datasize>src_datasize) || (dest_texttype==MPXPLAY_TEXTCONV_TYPE_UTF8))){
  switch(src_texttype){
   case MPXPLAY_TEXTCONV_TYPE_UTF16LE:
   case MPXPLAY_TEXTCONV_TYPE_UTF16BE:
    if(src_len>0)
     i=src_len;
    else
     i=pds_UTF16_strlen((mpxp_uint16_t *)src_string);
    i=(i+1)*2;
    break;
   default:
    if(src_len>0)
     i=src_len;
    else
     i=pds_strlen(src_string);
    i++;
    break;
  }
  if(!i)
   return 0;
  tmpbuf=(unsigned char *)alloca(i+8);
  if(!tmpbuf)
   return 0;
  pds_memcpy(tmpbuf,src_string,i);
  src_string=tmpbuf;
 }

 switch(src_texttype){
  case MPXPLAY_TEXTCONV_TYPE_UTF8:
   if((src_string[0]==0xef) && (src_string[1]==0xbb) && (src_string[2]==0xbf))
    index_in+=3;
   break;
  case MPXPLAY_TEXTCONV_TYPE_UTF16LE:
  case MPXPLAY_TEXTCONV_TYPE_UTF16BE:
   i=PDS_GETB_LEU16(src_string);
   if(i==0xfffe || i==0xfeff)
    index_in+=src_datasize;
   break;
#ifndef MPXPLAY_UTF8
  case MPXPLAY_TEXTCONV_TYPE_CP_OUT:
   if(!playlist_textconv_init_codepage_OUT())
    dest_texttype=MPXPLAY_TEXTCONV_TYPE_CHAR;
   src_utf=0;
   break;
#endif
  default: src_utf=0; break;
 }

 switch(dest_texttype){
  case MPXPLAY_TEXTCONV_TYPE_UTF8:
  case MPXPLAY_TEXTCONV_TYPE_UTF16LE:
  case MPXPLAY_TEXTCONV_TYPE_UTF16BE:dest_utf=1;break;
#ifndef MPXPLAY_UTF8
  case MPXPLAY_TEXTCONV_TYPE_CP_IN:
   if(!playlist_textconv_init_codepage_IN())
    dest_texttype=MPXPLAY_TEXTCONV_TYPE_CHAR;
   break;
#endif
 }

 if(src_utf && !dest_utf)
  if(!playlist_textconv_init_unicode_IN())
   return 0;

 if(!sourcemap)
  dest_utf=0;

 src_htmltype=((convtype&MPXPLAY_TEXTCONV_TYPEFLAG_HTML) && ((src_texttype==MPXPLAY_TEXTCONV_TYPE_CHAR) || (src_texttype==MPXPLAY_TEXTCONV_TYPE_UTF8)))? 1 : 0;

 do{
  unsigned short c,unicode;
  unsigned int src_codesize,dest_codesize;

  src_codesize = 0;

  if(src_htmltype){ // detect html codes only at ascii and utf8 input
   char *srcp = &src_string[index_in];
   if(src_string[index_in] == '&'){
    //mpxplay_debugf(MPXPLAY_DEBUG_OUTPUT,"\"%s\"", &src_string[index_in]);
    srcp++;
    if(*srcp == '#'){
     srcp++;
     src_codesize = 2;
     if((*srcp == 'x') || (*srcp == 'X')){
      srcp++; src_codesize++;
      unicode = pds_atol16(srcp);
     }else{
      unicode = pds_atol(srcp); // FIXME: limited to UTF-16 codes
     }
     while(*srcp && (*srcp != ';')){
      srcp++; src_codesize++;
     }
     src_codesize = (*srcp)? (src_codesize + 1) : 0;  // +1 == ';'
     if(unicode < 32)
      unicode = 32;
    }else{
     const struct html_to_unicode_s *htu = &html_to_unicodes[0];
     do{
      unsigned int len = pds_strlen((char *)htu->html_str);
      if(pds_strncmp(srcp, (char *)htu->html_str, len) == 0){
       unicode = htu->unicode;
       //if(unicode != '&')  // FIXME: embedded codes are not supported (like: "S&amp;#xe9;pia" -> "Sepia")
        len++;               // +1 == '&'
       src_codesize = len;
       break;
      }
      htu++;
     }while(htu->html_str);
    }
   }
  }

  if(!src_codesize){
   src_codesize=src_datasize;

   switch(src_texttype){
    case MPXPLAY_TEXTCONV_TYPE_CHAR:
#ifndef MPXPLAY_UTF8
    case MPXPLAY_TEXTCONV_TYPE_CP_OUT:
#endif
     c=src_string[index_in];
     if(!c)
      goto conv_out_finish;
#ifndef MPXPLAY_UTF8
     if(src_texttype==MPXPLAY_TEXTCONV_TYPE_CP_OUT)
      c=textconv_table_cp_to_cp_OUT[c];
#endif
     if(dest_utf && (c>=sourcemap->begin))
      unicode=sourcemap->map[c-sourcemap->begin];
     else
      unicode=c;
     break;
    case MPXPLAY_TEXTCONV_TYPE_UTF8:
     c=src_string[index_in];
     if(!c)
      goto conv_out_finish;
     src_codesize=0;
     if(c&0x80){
      if((c&0xe0)==0xe0){
       unicode = (c&0x0F) << 12;
       c = src_string[index_in+1];
       if(c&0x80){
        unicode |= (c&0x3F) << 6;
        c = src_string[index_in+2];
        if(c&0x80){
         unicode |= (c&0x3F);
         src_codesize=3;
        }
       }
      }else{
       unicode = (c&0x3F) << 6;
       c = src_string[index_in+1];
       if(c&0x80){
        unicode |= (c&0x3F);
        src_codesize=2;
       }
      }
     }
     if(!src_codesize){ // invalid or non-UTF8 code (like old Ogg)
      unicode=src_string[index_in];
      if(dest_utf && (unicode>=sourcemap->begin))
       unicode=sourcemap->map[unicode-sourcemap->begin];
      src_codesize=1;
     }
     break;
    case MPXPLAY_TEXTCONV_TYPE_UTF16LE:
     unicode=PDS_GETB_LEU16(&src_string[index_in]);
     if(!unicode)
      goto conv_out_finish;
     break;
    case MPXPLAY_TEXTCONV_TYPE_UTF16BE:
     unicode=PDS_GETB_BE16(&src_string[index_in]);
     if(!unicode)
      goto conv_out_finish;
     break;
   }
  }

  index_in+=src_codesize;

  dest_codesize=dest_datasize;

  if(src_utf){
   switch(dest_texttype){
    case MPXPLAY_TEXTCONV_TYPE_CHAR:
#ifndef MPXPLAY_UTF8
    case MPXPLAY_TEXTCONV_TYPE_CP_IN:
#endif
     if(unicode!=0xfffe && unicode!=0xfeff && unicode!=0xffff)
      unicode=textconv_table_unicode_to_char[unicode];
     else
      unicode=0x20;
     break;
   }
  }

  switch(dest_texttype){
   case MPXPLAY_TEXTCONV_TYPE_CHAR:
    PDS_PUTB_8U(dest_string,unicode);
    break;
   case MPXPLAY_TEXTCONV_TYPE_UTF8:
    if(unicode < (1<<7)){
     PDS_PUTB_8U(dest_string,unicode);
    }else if(unicode < (1<<11)){
     PDS_PUTB_8U(&dest_string[0], 0xc0 | (unicode >> 6));
     PDS_PUTB_8U(&dest_string[1], 0x80 | (unicode & 0x3f));
     dest_codesize=2;
    }else{
     PDS_PUTB_8U(&dest_string[0], 0xe0 | (unicode >> 12));
     PDS_PUTB_8U(&dest_string[1], 0x80 | ((unicode >> 6) & 0x3f));
     PDS_PUTB_8U(&dest_string[2], 0x80 | (unicode & 0x3f));
     dest_codesize=3;
    }
    break;
   case MPXPLAY_TEXTCONV_TYPE_UTF16LE:PDS_PUTB_LEU16(dest_string,unicode);break;
   case MPXPLAY_TEXTCONV_TYPE_UTF16BE:PDS_PUTB_BEU16(dest_string,unicode);break;
#ifndef MPXPLAY_UTF8
   case MPXPLAY_TEXTCONV_TYPE_CP_IN:PDS_PUTB_8U(dest_string,textconv_table_cp_to_cp_IN[unicode]);break;
#endif
  }

  dest_string+=dest_codesize;
  index_out+=dest_codesize;
  if((src_len>0) && (index_in>=src_len))
   break;
 }while(index_out<dest_buflen);

conv_out_finish:

 if(dest_datasize==1)
  *dest_string=0;
 else
  PDS_PUTB_LEU16(dest_string,0);

 return index_out;
}

// convert a specified codepage to Mpxplay's native string format (eg. UTF8 in Win32 version)
int mpxplay_playlist_textconv_by_cpsrcname(char *cp_src_name,char *src_string,int src_len,char *dest_string,unsigned int dest_buflen)
{
	mpxp_uint32_t convtype = textconv_select_textconvtype_by_name(cp_src_name);
	const struct cp_map_s *sourcemap = NULL;

	if(convtype == MPXPLAY_TEXTCONV_TYPES_PUT(MPXPLAY_TEXTCONV_TYPE_CHAR, MPXPLAY_TEXTCONV_TYPE_MPXPLAY))
		sourcemap = textconv_select_mapping_by_name(cp_src_name);

	return playlist_textconv_by_sourcemap(sourcemap, convtype, src_string, src_len, dest_string, dest_buflen);
}

// convert a source text to Mpxplay's native string format (codepage is selected globally)
int mpxplay_playlist_textconv_by_texttypes(mpxp_uint32_t convtype,char *src_string,int src_len,char *dest_string,unsigned int dest_buflen)
{
	return playlist_textconv_by_sourcemap(textconv_cpsource_map, convtype, src_string, src_len, dest_string, dest_buflen);
}
