unrealircd

- supernets unrealircd source & configuration
git clone git://git.acid.vegas/unrealircd.git
Log | Files | Refs | Archive | README | LICENSE

charsys.c (41744B)

      1 /*
      2  * Unreal Internet Relay Chat Daemon, src/charsys.c
      3  * (C) Copyright 2005-2017 Bram Matthys and The UnrealIRCd Team.
      4  *
      5  * Character system: This subsystem deals with finding out wheter a
      6  * character should be allowed or not in nicks (nicks only for now).
      7  *
      8  * This program is free software; you can redistribute it and/or modify
      9  * it under the terms of the GNU General Public License as published by
     10  * the Free Software Foundation; either version 1, or (at your option)
     11  * any later version.
     12  *
     13  * This program is distributed in the hope that it will be useful,
     14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     16  * GNU General Public License for more details.
     17  *
     18  * You should have received a copy of the GNU General Public License
     19  * along with this program; if not, write to the Free Software
     20  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
     21  */
     22 
     23 #include "unrealircd.h"
     24 
     25 #ifndef ARRAY_SIZEOF
     26  #define ARRAY_SIZEOF(x) (sizeof((x))/sizeof((x)[0]))
     27 #endif
     28 
     29 ModuleHeader MOD_HEADER
     30 = {
     31 	"charsys",	/* Name of module */
     32 	"5.0", /* Version */
     33 	"Character System (set::allowed-nickchars)", /* Short description of module */
     34 	"UnrealIRCd Team",
     35 	"unrealircd-6",
     36 };
     37 
     38 /* NOTE: it is guaranteed that char is unsigned by compiling options
     39  *       (-funsigned-char @ gcc, /J @ MSVC)
     40  * NOTE2: Original credit for supplying the correct chinese
     41  *        coderanges goes to: RexHsu, Mr.WebBar and Xuefer
     42  */
     43 
     44 /** Our multibyte structure */
     45 typedef struct MBList MBList;
     46 struct MBList
     47 {
     48 	MBList *next;
     49 	char s1, e1, s2, e2;
     50 };
     51 MBList *mblist = NULL, *mblist_tail = NULL;
     52 
     53 /* Use this to prevent mixing of certain combinations
     54  * (such as GBK & high-ascii, etc)
     55  */
     56 static int langav = 0;
     57 char langsinuse[4096];
     58 
     59 /* bitmasks: */
     60 #define LANGAV_ASCII			0x000001 /* 8 bit ascii */
     61 #define LANGAV_LATIN1			0x000002 /* latin1 (western europe) */
     62 #define LANGAV_LATIN2			0x000004 /* latin2 (eastern europe, eg: polish) */
     63 #define LANGAV_ISO8859_7		0x000008 /* greek */
     64 #define LANGAV_ISO8859_8I		0x000010 /* hebrew */
     65 #define LANGAV_ISO8859_9		0x000020 /* turkish */
     66 #define LANGAV_W1250			0x000040 /* windows-1250 (eg: polish-w1250) */
     67 #define LANGAV_W1251			0x000080 /* windows-1251 (eg: russian) */
     68 #define LANGAV_LATIN2W1250		0x000100 /* Compatible with both latin2 AND windows-1250 (eg: hungarian) */
     69 #define LANGAV_ISO8859_6		0x000200 /* arabic */
     70 #define LANGAV_GBK			0x001000 /* (Chinese) GBK encoding */
     71 #define LANGAV_UTF8			0x002000 /* any UTF8 encoding */
     72 #define LANGAV_LATIN_UTF8		0x004000 /* UTF8: latin script */
     73 #define LANGAV_CYRILLIC_UTF8		0x008000 /* UTF8: cyrillic script */
     74 #define LANGAV_GREEK_UTF8		0x010000 /* UTF8: greek script */
     75 #define LANGAV_HEBREW_UTF8		0x020000 /* UTF8: hebrew script */
     76 #define LANGAV_ARABIC_UTF8		0x040000 /* UTF8: arabic script */
     77 typedef struct LangList LangList;
     78 struct LangList
     79 {
     80 	char *directive;
     81 	char *code;
     82 	int setflags;
     83 };
     84 
     85 /* MUST be alphabetized (first column) */
     86 static LangList langlist[] = {
     87 	{ "arabic-utf8", "ara-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_ARABIC_UTF8 },
     88 	{ "belarussian-utf8", "blr-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_CYRILLIC_UTF8 },
     89 	{ "belarussian-w1251", "blr", LANGAV_ASCII|LANGAV_W1251 },
     90 	{ "catalan",      "cat", LANGAV_ASCII|LANGAV_LATIN1 },
     91 	{ "catalan-utf8", "cat-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 },
     92 	{ "chinese",      "chi-j,chi-s,chi-t", LANGAV_GBK },
     93 	{ "chinese-ja",   "chi-j", LANGAV_GBK },
     94 	{ "chinese-simp", "chi-s", LANGAV_GBK },
     95 	{ "chinese-trad", "chi-t", LANGAV_GBK },
     96 	{ "cyrillic-utf8", "blr-utf8,rus-utf8,ukr-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_CYRILLIC_UTF8 },
     97 	{ "czech",        "cze-m", LANGAV_ASCII|LANGAV_W1250 },
     98 	{ "czech-utf8",   "cze-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 },
     99 	{ "danish",       "dan", LANGAV_ASCII|LANGAV_LATIN1 },
    100 	{ "danish-utf8",  "dan-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 },
    101 	{ "dutch",        "dut", LANGAV_ASCII|LANGAV_LATIN1 },
    102 	{ "dutch-utf8",   "dut-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 },
    103 	{ "estonian-utf8","est-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 },
    104 	{ "french",       "fre", LANGAV_ASCII|LANGAV_LATIN1 },
    105 	{ "french-utf8",  "fre-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 },
    106 	{ "gbk",          "chi-s,chi-t,chi-j", LANGAV_GBK },
    107 	{ "german",       "ger", LANGAV_ASCII|LANGAV_LATIN1 },
    108 	{ "german-utf8",  "ger-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 },
    109 	{ "greek",        "gre", LANGAV_ASCII|LANGAV_ISO8859_7 },
    110 	{ "greek-utf8",   "gre-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_GREEK_UTF8 },
    111 	{ "hebrew",       "heb", LANGAV_ASCII|LANGAV_ISO8859_8I },
    112 	{ "hebrew-utf8",  "heb-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_HEBREW_UTF8 },
    113 	{ "hungarian",    "hun", LANGAV_ASCII|LANGAV_LATIN2W1250 },
    114 	{ "hungarian-utf8","hun-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 },
    115 	{ "icelandic",    "ice", LANGAV_ASCII|LANGAV_LATIN1 },
    116 	{ "icelandic-utf8","ice-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 },
    117 	{ "italian",      "ita", LANGAV_ASCII|LANGAV_LATIN1 },
    118 	{ "italian-utf8", "ita-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 },
    119 	{ "latin-utf8",   "cat-utf8,cze-utf8,dan-utf8,dut-utf8,fre-utf8,ger-utf8,hun-utf8,ice-utf8,ita-utf8,pol-utf8,rum-utf8,slo-utf8,spa-utf8,swe-utf8,tur-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 },
    120 	{ "latin1",       "cat,dut,fre,ger,ita,spa,swe", LANGAV_ASCII|LANGAV_LATIN1 },
    121 	{ "latin2",       "hun,pol,rum", LANGAV_ASCII|LANGAV_LATIN2 },
    122 	{ "latvian-utf8", "lav-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 },
    123 	{ "lithuanian-utf8","lit-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 },
    124 	{ "polish",       "pol", LANGAV_ASCII|LANGAV_LATIN2 },
    125 	{ "polish-utf8",  "pol-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 },
    126 	{ "polish-w1250", "pol-m", LANGAV_ASCII|LANGAV_W1250 },
    127 	{ "romanian",     "rum", LANGAV_ASCII|LANGAV_LATIN2W1250 },
    128 	{ "romanian-utf8","rum-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 },
    129 	{ "russian-utf8", "rus-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_CYRILLIC_UTF8 },
    130 	{ "russian-w1251","rus", LANGAV_ASCII|LANGAV_W1251 },
    131 	{ "slovak",       "slo-m", LANGAV_ASCII|LANGAV_W1250 },
    132 	{ "slovak-utf8",  "slo-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 },
    133 	{ "spanish",      "spa", LANGAV_ASCII|LANGAV_LATIN1 },
    134 	{ "spanish-utf8", "spa-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 },
    135 	{ "swedish",      "swe", LANGAV_ASCII|LANGAV_LATIN1 },
    136 	{ "swedish-utf8", "swe-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 },
    137 	{ "swiss-german", "swg", LANGAV_ASCII|LANGAV_LATIN1 },
    138 	{ "swiss-german-utf8", "swg-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 },
    139 	{ "turkish",      "tur", LANGAV_ASCII|LANGAV_ISO8859_9 },
    140 	{ "turkish-utf8", "tur-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 },
    141 	{ "ukrainian-utf8", "ukr-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_CYRILLIC_UTF8 },
    142 	{ "ukrainian-w1251", "ukr", LANGAV_ASCII|LANGAV_W1251 },
    143 	{ "windows-1250", "cze-m,pol-m,rum,slo-m,hun",  LANGAV_ASCII|LANGAV_W1250 },
    144 	{ "windows-1251", "rus,ukr,blr", LANGAV_ASCII|LANGAV_W1251 },
    145 	{ NULL, NULL, 0 }
    146 };
    147 
    148 /* For temporary use during config_run */
    149 typedef struct ILangList ILangList;
    150 struct ILangList
    151 {
    152 	ILangList *prev, *next;
    153 	char *name;
    154 };
    155 ILangList *ilanglist = NULL;
    156 
    157 /* These characters are ALWAYS disallowed... from remote, in
    158  * multibyte, etc.. even though this might mean a certain
    159  * (legit) character cannot be used (eg: in chinese GBK).
    160  * - ! (nick!user seperator)
    161  * - prefix chars: +, %, @, &, ~
    162  * - channel chars: #
    163  * - scary chars: $, :, ', ", ?, *, ',', '.'
    164  * NOTE: the caller should also check for ascii <= 32.
    165  * [CHANGING THIS WILL CAUSE SECURITY/SYNCH PROBLEMS AND WILL
    166  *  VIOLATE YOUR ""RIGHT"" ON SUPPORT IMMEDIATELY]
    167  */
    168 const char *illegalnickchars = "!+%@&~#$:'\"?*,.";
    169 
    170 /* Forward declarations */
    171 int _do_nick_name(char *nick);
    172 int _do_remote_nick_name(char *nick);
    173 static int do_nick_name_multibyte(char *nick);
    174 static int do_nick_name_standard(char *nick);
    175 void charsys_reset(void);
    176 void charsys_reset_pretest(void);
    177 void charsys_finish(void);
    178 void charsys_addmultibyterange(char s1, char e1, char s2, char e2);
    179 void charsys_addallowed(char *s);
    180 int charsys_test_language(char *name);
    181 void charsys_add_language(char *name);
    182 static void charsys_doadd_language(char *name);
    183 int charsys_config_test(ConfigFile *cf, ConfigEntry *ce, int type, int *errs);
    184 int charsys_config_run(ConfigFile *cf, ConfigEntry *ce, int type);
    185 int charsys_config_posttest(int *errs);
    186 char *_charsys_get_current_languages(void);
    187 
    188 MOD_TEST()
    189 {
    190 	MARK_AS_OFFICIAL_MODULE(modinfo);
    191 	EfunctionAdd(modinfo->handle, EFUNC_DO_NICK_NAME, _do_nick_name);
    192 	EfunctionAdd(modinfo->handle, EFUNC_DO_REMOTE_NICK_NAME, _do_remote_nick_name);
    193 	EfunctionAddString(modinfo->handle, EFUNC_CHARSYS_GET_CURRENT_LANGUAGES, _charsys_get_current_languages);
    194 	charsys_reset();
    195 	charsys_reset_pretest();
    196 	HookAdd(modinfo->handle, HOOKTYPE_CONFIGTEST, 0, charsys_config_test);
    197 	HookAdd(modinfo->handle, HOOKTYPE_CONFIGPOSTTEST, 0, charsys_config_posttest);
    198 	return MOD_SUCCESS;
    199 }
    200 
    201 MOD_INIT()
    202 {
    203 	MARK_AS_OFFICIAL_MODULE(modinfo);
    204 	HookAdd(modinfo->handle, HOOKTYPE_CONFIGRUN, 0, charsys_config_run);
    205 	return MOD_SUCCESS;
    206 }
    207 
    208 /* Is first run when server is 100% ready */
    209 MOD_LOAD()
    210 {
    211 	charsys_finish();
    212 	return MOD_SUCCESS;
    213 }
    214 
    215 /* Called when module is unloaded */
    216 MOD_UNLOAD()
    217 {
    218 	return MOD_SUCCESS;
    219 }
    220 
    221 int charsys_config_test(ConfigFile *cf, ConfigEntry *ce, int type, int *errs)
    222 {
    223 	int errors = 0;
    224 	ConfigEntry *cep;
    225 
    226 	if (type != CONFIG_SET)
    227 		return 0;
    228 
    229 	/* We are only interrested in set::allowed-nickchars... */
    230 	if (!ce || !ce->name || strcmp(ce->name, "allowed-nickchars"))
    231 		return 0;
    232 
    233 	if (ce->value)
    234 	{
    235 		config_error("%s:%i: set::allowed-nickchars: please use 'allowed-nickchars { name; };' "
    236 					 "and not 'allowed-nickchars name;'",
    237 					 ce->file->filename, ce->line_number);
    238 		/* Give up immediately. Don't bother the user with any other errors. */
    239 		errors++;
    240 		*errs = errors;
    241 		return -1;
    242 	}
    243 
    244 	for (cep = ce->items; cep; cep=cep->next)
    245 	{
    246 		if (!charsys_test_language(cep->name))
    247 		{
    248 			config_error("%s:%i: set::allowed-nickchars: Unknown (sub)language '%s'",
    249 				ce->file->filename, ce->line_number, cep->name);
    250 			errors++;
    251 		}
    252 	}
    253 
    254 	*errs = errors;
    255 	return errors ? -1 : 1;
    256 }
    257 
    258 int charsys_config_run(ConfigFile *cf, ConfigEntry *ce, int type)
    259 {
    260 	ConfigEntry *cep;
    261 
    262 	if (type != CONFIG_SET)
    263 		return 0;
    264 
    265 	/* We are only interrested in set::allowed-nickchars... */
    266 	if (!ce || !ce->name || strcmp(ce->name, "allowed-nickchars"))
    267 		return 0;
    268 
    269 	for (cep = ce->items; cep; cep = cep->next)
    270 		charsys_add_language(cep->name);
    271 
    272 	return 1;
    273 }
    274 
    275 /** Check if the specified charsets during the TESTING phase can be
    276  * premitted without getting into problems.
    277  * RETURNS: -1 in case of failure, 1 if ok
    278  */
    279 int charsys_config_posttest(int *errs)
    280 {
    281 	int errors = 0;
    282 	int x=0;
    283 
    284 	if ((langav & LANGAV_ASCII) && (langav & LANGAV_GBK))
    285 	{
    286 		config_error("ERROR: set::allowed-nickchars specifies incorrect combination "
    287 		             "of languages: high-ascii languages (such as german, french, etc) "
    288 		             "cannot be mixed with chinese/..");
    289 		return -1;
    290 	}
    291 	if (langav & LANGAV_LATIN_UTF8)
    292 		x++;
    293 	if (langav & LANGAV_GREEK_UTF8)
    294 		x++;
    295 	if (langav & LANGAV_CYRILLIC_UTF8)
    296 		x++;
    297 	if (langav & LANGAV_HEBREW_UTF8)
    298 		x++;
    299 	if (langav & LANGAV_LATIN1)
    300 		x++;
    301 	if (langav & LANGAV_LATIN2)
    302 		x++;
    303 	if (langav & LANGAV_ISO8859_6)
    304 		x++;
    305 	if (langav & LANGAV_ISO8859_7)
    306 		x++;
    307 	if (langav & LANGAV_ISO8859_9)
    308 		x++;
    309 	if (langav & LANGAV_W1250)
    310 		x++;
    311 	if (langav & LANGAV_W1251)
    312 		x++;
    313 	if ((langav & LANGAV_LATIN2W1250) && !(langav & LANGAV_LATIN2) && !(langav & LANGAV_W1250))
    314 	    x++;
    315 	if (x > 1)
    316 	{
    317 #if 0
    318 // I don't think this should be hard error, right? Some combinations may be problematic, but not all.
    319 		if (langav & LANGAV_LATIN_UTF8)
    320 		{
    321 			config_error("ERROR: set::allowed-nickchars: you cannot combine 'latin-utf8' with any other character set");
    322 			errors++;
    323 		}
    324 		if (langav & LANGAV_GREEK_UTF8)
    325 		{
    326 			config_error("ERROR: set::allowed-nickchars: you cannot combine 'greek-utf8' with any other character set");
    327 			errors++;
    328 		}
    329 		if (langav & LANGAV_CYRILLIC_UTF8)
    330 		{
    331 			config_error("ERROR: set::allowed-nickchars: you cannot combine 'cyrillic-utf8' with any other character set");
    332 			errors++;
    333 		}
    334 		if (langav & LANGAV_HEBREW_UTF8)
    335 		{
    336 			config_error("ERROR: set::allowed-nickchars: you cannot combine 'hebrew-utf8' with any other character set");
    337 			errors++;
    338 		}
    339 		if (langav & LANGAV_ARABIC_UTF8)
    340 		{
    341 			config_error("ERROR: set::allowed-nickchars: you cannot combine 'arabic-utf8' with any other character set");
    342 			errors++;
    343 		}
    344 #endif
    345 		config_status("WARNING: set::allowed-nickchars: Mixing of charsets (eg: latin1+latin2) may cause display problems");
    346 	}
    347 
    348 	*errs = errors;
    349 	return errors ? -1 : 1;
    350 }
    351 
    352 /** Called on boot and just before config run */
    353 void charsys_reset(void)
    354 {
    355 	int i;
    356 	MBList *m, *m_next;
    357 
    358 	/* First, reset everything */
    359 	for (i=0; i < 256; i++)
    360 		char_atribs[i] &= ~ALLOWN;
    361 	for (m=mblist; m; m=m_next)
    362 	{
    363 		m_next = m->next;
    364 		safe_free(m);
    365 	}
    366 	mblist=mblist_tail=NULL;
    367 	/* Then add the default which will always be allowed */
    368 	charsys_addallowed("0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyzy{|}");
    369 	langav = 0;
    370 	langsinuse[0] = '\0';
    371 #ifdef DEBUGMODE
    372 	if (ilanglist)
    373 		abort();
    374 #endif
    375 }
    376 
    377 void charsys_reset_pretest(void)
    378 {
    379 	langav = 0;
    380 	non_utf8_nick_chars_in_use = 0;
    381 }
    382 
    383 static inline void ilang_swap(ILangList *one, ILangList *two)
    384 {
    385 	char *tmp = one->name;
    386 	one->name = two->name;
    387 	two->name = tmp;
    388 }
    389 
    390 static void ilang_sort(void)
    391 {
    392 	ILangList *outer, *inner;
    393 
    394 	/* Selection sort -- perhaps optimize to qsort/whatever if
    395      * possible? ;)
    396      */
    397 	for (outer=ilanglist; outer; outer=outer->next)
    398 	{
    399 		for (inner=outer->next; inner; inner=inner->next)
    400 		{
    401 			if (strcmp(outer->name, inner->name) > 0)
    402 				ilang_swap(outer, inner);
    403 		}
    404 	}
    405 }
    406 
    407 void charsys_finish(void)
    408 {
    409 	ILangList *e, *e_next;
    410 
    411 	/* Sort alphabetically */
    412 	ilang_sort();
    413 
    414 	/* [note: this can be optimized] */
    415 	langsinuse[0] = '\0';
    416 	for (e=ilanglist; e; e=e->next)
    417 	{
    418 		strlcat(langsinuse, e->name, sizeof(langsinuse));
    419 		if (e->next)
    420 			strlcat(langsinuse, ",", sizeof(langsinuse));
    421 	}
    422 
    423 	/* Free everything */
    424 	for (e=ilanglist; e; e=e_next)
    425 	{
    426 		e_next=e->next;
    427 		safe_free(e->name);
    428 		safe_free(e);
    429 	}
    430 	ilanglist = NULL;
    431 #ifdef DEBUGMODE
    432 	if (strlen(langsinuse) > 490)
    433 		abort();
    434 #endif
    435 	charsys_check_for_changes();
    436 }
    437 
    438 /** Add a character range to the multibyte list.
    439  * Eg: charsys_addmultibyterange(0xaa, 0xbb, 0x00, 0xff) for 0xaa00-0xbbff.
    440  * @param s1 Start of highest byte
    441  * @param e1 End of highest byte
    442  * @param s2 Start of lowest byte
    443  * @param e2 End of lowest byte
    444  */
    445 void charsys_addmultibyterange(char s1, char e1, char s2, char e2)
    446 {
    447 MBList *m = safe_alloc(sizeof(MBList));
    448 
    449 	m->s1 = s1;
    450 	m->e1 = e1;
    451 	m->s2 = s2;
    452 	m->e2 = e2;
    453 
    454 	if (mblist_tail)
    455 		mblist_tail->next = m;
    456 	else
    457 		mblist = m;
    458 	mblist_tail = m;
    459 }
    460 
    461 /** Adds all characters in the specified string to the allowed list. */
    462 void charsys_addallowed(char *s)
    463 {
    464 	for (; *s; s++)
    465 	{
    466 		if ((*s <= 32) || strchr(illegalnickchars, *s))
    467 		{
    468 			config_error("INTERNAL ERROR: charsys_addallowed() called for illegal characters: %s", s);
    469 #ifdef DEBUGMODE
    470 			abort();
    471 #endif
    472 		}
    473 		char_atribs[(unsigned char)*s] |= ALLOWN;
    474 	}
    475 }
    476 
    477 void charsys_addallowed_range(unsigned char from, unsigned char to)
    478 {
    479 	unsigned char i;
    480 
    481 	for (i = from; i != to; i++)
    482 		char_atribs[i] |= ALLOWN;
    483 }
    484 
    485 int _do_nick_name(char *nick)
    486 {
    487 	if (mblist)
    488 		return do_nick_name_multibyte(nick);
    489 	else
    490 		return do_nick_name_standard(nick);
    491 }
    492 
    493 static int do_nick_name_standard(char *nick)
    494 {
    495 	int len;
    496 	char *ch;
    497 
    498 	if ((*nick == '-') || isdigit(*nick))
    499 		return 0;
    500 
    501 	for (ch=nick,len=0; *ch && len <= NICKLEN; ch++, len++)
    502 		if (!isvalid(*ch))
    503 			return 0; /* reject the full nick */
    504 	*ch = '\0';
    505 	return len;
    506 }
    507 
    508 static int isvalidmbyte(unsigned char c1, unsigned char c2)
    509 {
    510 	MBList *m;
    511 
    512 	for (m=mblist; m; m=m->next)
    513 	{
    514 		if ((c1 >= m->s1) && (c1 <= m->e1) &&
    515 		    (c2 >= m->s2) && (c2 <= m->e2))
    516 		    return 1;
    517 	}
    518 	return 0;
    519 }
    520 
    521 /* hmmm.. there must be some problems with multibyte &
    522  * other high ascii characters I think (such as german etc).
    523  * Not sure if this can be solved? I don't think so... -- Syzop.
    524  */
    525 static int do_nick_name_multibyte(char *nick)
    526 {
    527 	int len;
    528 	char *ch;
    529 	int firstmbchar = 0;
    530 
    531 	if ((*nick == '-') || isdigit(*nick))
    532 		return 0;
    533 
    534 	for (ch=nick,len=0; *ch && len <= NICKLEN; ch++, len++)
    535 	{
    536 		/* Some characters are ALWAYS illegal, so they have to be disallowed here */
    537 		if ((*ch <= 32) || strchr(illegalnickchars, *ch))
    538 			return 0;
    539 		if (firstmbchar)
    540 		{
    541 			if (!isvalidmbyte(ch[-1], *ch))
    542 				return 0;
    543 			firstmbchar = 0;
    544 		} else if ((*ch) & 0x80)
    545 			firstmbchar = 1;
    546 		else if (!isvalid(*ch))
    547 			return 0;
    548 	}
    549 	if (firstmbchar)
    550 	{
    551 		ch--;
    552 		len--;
    553 	}
    554 	*ch = '\0';
    555 	return len;
    556 }
    557 
    558 /** Does some very basic checking on remote nickname.
    559  * It's only purpose is not to cause the whole network
    560  * to fall down in pieces, that's all. Display problems
    561  * are not really handled here. They are assumed to have been
    562  * checked by PROTOCTL NICKCHARS= -- Syzop.
    563  */
    564 int _do_remote_nick_name(char *nick)
    565 {
    566 	char *c;
    567 
    568 	/* Don't allow nicks to start with a digit, ever. */
    569 	if ((*nick == '-') || isdigit(*nick))
    570 		return 0;
    571 
    572 	/* Now the other, more relaxed checks.. */
    573 	for (c=nick; *c; c++)
    574 		if ((*c <= 32) || strchr(illegalnickchars, *c))
    575 			return 0;
    576 
    577 	return (c - nick);
    578 }
    579 
    580 static LangList *charsys_find_language(char *name)
    581 {
    582 	int start = 0;
    583 	int stop = ARRAY_SIZEOF(langlist)-1;
    584 	int mid;
    585 
    586 	while (start <= stop)
    587 	{
    588 		mid = (start+stop)/2;
    589 		if (!langlist[mid].directive || smycmp(name, langlist[mid].directive) < 0)
    590 			stop = mid-1;
    591 		else if (strcmp(name, langlist[mid].directive) == 0)
    592 			return &langlist[mid];
    593 		else
    594 			start = mid+1;
    595 	}
    596 	return NULL;
    597 }
    598 
    599 static LangList *charsys_find_language_code(char *code)
    600 {
    601 	int i;
    602 	for (i = 0; langlist[i].code; i++)
    603 		if (!strcasecmp(langlist[i].code, code))
    604 			return &langlist[i];
    605 	return NULL;
    606 }
    607 
    608 /** Check if language is available. */
    609 int charsys_test_language(char *name)
    610 {
    611 	LangList *l = charsys_find_language(name);
    612 
    613 	if (l)
    614 	{
    615 		langav |= l->setflags;
    616 		if (!(l->setflags & LANGAV_UTF8))
    617 			non_utf8_nick_chars_in_use = 1;
    618 		return 1;
    619 	}
    620 	if (!strcmp(name, "euro-west"))
    621 	{
    622 		config_error("set::allowed-nickchars: ERROR: 'euro-west' got renamed to 'latin1'");
    623 		return 0;
    624 	}
    625 	return 0;
    626 }
    627 
    628 static void charsys_doadd_language(char *name)
    629 {
    630 LangList *l;
    631 ILangList *li;
    632 int found;
    633 char tmp[512], *lang, *p;
    634 
    635 	l = charsys_find_language(name);
    636 	if (!l)
    637 	{
    638 #ifdef DEBUGMODE
    639 		abort();
    640 #endif
    641 		return;
    642 	}
    643 
    644 	strlcpy(tmp, l->code, sizeof(tmp));
    645 	for (lang = strtoken(&p, tmp, ","); lang; lang = strtoken(&p, NULL, ","))
    646 	{
    647 		/* Check if present... */
    648 		found=0;
    649 		for (li=ilanglist; li; li=li->next)
    650 			if (!strcmp(li->name, lang))
    651 			{
    652 				found = 1;
    653 				break;
    654 			}
    655 		if (!found)
    656 		{
    657 			/* Add... */
    658 			li = safe_alloc(sizeof(ILangList));
    659 			safe_strdup(li->name, lang);
    660 			AddListItem(li, ilanglist);
    661 		}
    662 	}
    663 }
    664 
    665 void charsys_add_language(char *name)
    666 {
    667 	char latin1=0, latin2=0, w1250=0, w1251=0, chinese=0;
    668 	char latin_utf8=0, cyrillic_utf8=0;
    669 
    670 	/** Note: there could well be some characters missing in the lists below.
    671 	 *        While I've seen other altnernatives that just allow pretty much
    672 	 *        every accent that exists even for dutch (where we rarely use
    673 	 *        accents except for like 3 types), I rather prefer to use a bit more
    674 	 *        reasonable aproach ;). That said, anyone is welcome to make
    675 	 *        suggestions about characters that should be added (or removed)
    676 	 *        of course. -- Syzop
    677 	 */
    678 
    679 	/* Add our language to our list */
    680 	charsys_doadd_language(name);
    681 
    682 	/* GROUPS */
    683 	if (!strcmp(name, "latin-utf8"))
    684 		latin_utf8 = 1;
    685 	else if (!strcmp(name, "cyrillic-utf8"))
    686 		cyrillic_utf8 = 1;
    687 	else if (!strcmp(name, "latin1"))
    688 		latin1 = 1;
    689 	else if (!strcmp(name, "latin2"))
    690 		latin2 = 1;
    691 	else if (!strcmp(name, "windows-1250"))
    692 		w1250 = 1;
    693 	else if (!strcmp(name, "windows-1251"))
    694 		w1251 = 1;
    695 	else if (!strcmp(name, "chinese") || !strcmp(name, "gbk"))
    696 		chinese = 1;
    697 
    698 	/* INDIVIDUAL CHARSETS */
    699 
    700 	/* [LATIN1] and [LATIN-UTF8] */
    701 	if (latin1 || !strcmp(name, "german"))
    702 	{
    703 		/* a", A", o", O", u", U" and es-zett */
    704 		charsys_addallowed("");
    705 	}
    706 	if (latin_utf8 || !strcmp(name, "german-utf8"))
    707 	{
    708 		charsys_addmultibyterange(0xc3, 0xc3, 0x84, 0x84);
    709 		charsys_addmultibyterange(0xc3, 0xc3, 0x96, 0x96);
    710 		charsys_addmultibyterange(0xc3, 0xc3, 0x9c, 0x9c);
    711 		charsys_addmultibyterange(0xc3, 0xc3, 0x9f, 0x9f);
    712 		charsys_addmultibyterange(0xc3, 0xc3, 0xa4, 0xa4);
    713 		charsys_addmultibyterange(0xc3, 0xc3, 0xb6, 0xb6);
    714 		charsys_addmultibyterange(0xc3, 0xc3, 0xbc, 0xbc);
    715 	}
    716 	if (latin1 || !strcmp(name, "swiss-german"))
    717 	{
    718 		/* a", A", o", O", u", U"  */
    719 		charsys_addallowed("");
    720 	}
    721 	if (latin_utf8 || !strcmp(name, "swiss-german-utf8"))
    722 	{
    723 		charsys_addmultibyterange(0xc3, 0xc3, 0x84, 0x84);
    724 		charsys_addmultibyterange(0xc3, 0xc3, 0x96, 0x96);
    725 		charsys_addmultibyterange(0xc3, 0xc3, 0x9c, 0x9c);
    726 		charsys_addmultibyterange(0xc3, 0xc3, 0xa4, 0xa4);
    727 		charsys_addmultibyterange(0xc3, 0xc3, 0xb6, 0xb6);
    728 		charsys_addmultibyterange(0xc3, 0xc3, 0xbc, 0xbc);
    729 	}
    730 	if (latin1 || !strcmp(name, "dutch"))
    731 	{
    732 		/* Ok, even though I'm Dutch myself, I've trouble getting
    733 		 * a proper list of this ;). I think I got them all now, but
    734 		 * I did not include "borrow-words" like words we use in Dutch
    735 		 * that are literal French. So if you really want to use them all,
    736 		 * I suggest you to use just latin1 :P.
    737 		 */
    738 		/* e', e", o", i", u", e`. */
    739 		charsys_addallowed("");
    740 	}
    741 	if (latin_utf8 || !strcmp(name, "dutch-utf8"))
    742 	{
    743 		charsys_addmultibyterange(0xc3, 0xc3, 0xa8, 0xa9);
    744 		charsys_addmultibyterange(0xc3, 0xc3, 0xab, 0xab);
    745 		charsys_addmultibyterange(0xc3, 0xc3, 0xaf, 0xaf);
    746 		charsys_addmultibyterange(0xc3, 0xc3, 0xb6, 0xb6);
    747 		charsys_addmultibyterange(0xc3, 0xc3, 0xbc, 0xbc);
    748 	}
    749 	if (latin1 || !strcmp(name, "danish"))
    750 	{
    751 		/* supplied by klaus:
    752 		 * <ae>, <AE>, ao, Ao, o/, O/ */
    753 		charsys_addallowed("");
    754 	}
    755 	if (latin_utf8 || !strcmp(name, "danish-utf8"))
    756 	{
    757 		charsys_addmultibyterange(0xc3, 0xc3, 0x85, 0x86);
    758 		charsys_addmultibyterange(0xc3, 0xc3, 0x98, 0x98);
    759 		charsys_addmultibyterange(0xc3, 0xc3, 0xa5, 0xa6);
    760 		charsys_addmultibyterange(0xc3, 0xc3, 0xb8, 0xb8);
    761 	}
    762 	if (latin1 || !strcmp(name, "french"))
    763 	{
    764 		/* A`, A^, a`, a^, weird-C, weird-c, E`, E', E^, E", e`, e', e^, e",
    765 		 * I^, I", i^, i", O^, o^, U`, U^, U", u`, u", u`, y" [not in that order, sry]
    766 		 * Hmm.. there might be more, but I'm not sure how common they are
    767 		 * and I don't think they are always displayed correctly (?).
    768 		 */
    769 		charsys_addallowed("");
    770 	}
    771 	if (latin_utf8 || !strcmp(name, "french-utf8"))
    772 	{
    773 		charsys_addmultibyterange(0xc3, 0xc3, 0x80, 0x80);
    774 		charsys_addmultibyterange(0xc3, 0xc3, 0x82, 0x82);
    775 		charsys_addmultibyterange(0xc3, 0xc3, 0x87, 0x8b);
    776 		charsys_addmultibyterange(0xc3, 0xc3, 0x8e, 0x8f);
    777 		charsys_addmultibyterange(0xc3, 0xc3, 0x94, 0x94);
    778 		charsys_addmultibyterange(0xc3, 0xc3, 0x99, 0x99);
    779 		charsys_addmultibyterange(0xc3, 0xc3, 0x9b, 0x9c);
    780 		charsys_addmultibyterange(0xc3, 0xc3, 0xa0, 0xa0);
    781 		charsys_addmultibyterange(0xc3, 0xc3, 0xa2, 0xa2);
    782 		charsys_addmultibyterange(0xc3, 0xc3, 0xa7, 0xab);
    783 		charsys_addmultibyterange(0xc3, 0xc3, 0xae, 0xaf);
    784 		charsys_addmultibyterange(0xc3, 0xc3, 0xb4, 0xb4);
    785 		charsys_addmultibyterange(0xc3, 0xc3, 0xb9, 0xb9);
    786 		charsys_addmultibyterange(0xc3, 0xc3, 0xbb, 0xbc);
    787 		charsys_addmultibyterange(0xc3, 0xc3, 0xbf, 0xbf);
    788 	}
    789 	if (latin1 || !strcmp(name, "spanish"))
    790 	{
    791 		/* a', A', e', E', i', I', o', O', u', U', u", U", n~, N~ */
    792 		charsys_addallowed("");
    793 	}
    794 	if (latin_utf8 || !strcmp(name, "spanish-utf8"))
    795 	{
    796 		charsys_addmultibyterange(0xc3, 0xc3, 0x81, 0x81);
    797 		charsys_addmultibyterange(0xc3, 0xc3, 0x89, 0x89);
    798 		charsys_addmultibyterange(0xc3, 0xc3, 0x8d, 0x8d);
    799 		charsys_addmultibyterange(0xc3, 0xc3, 0x91, 0x91);
    800 		charsys_addmultibyterange(0xc3, 0xc3, 0x93, 0x93);
    801 		charsys_addmultibyterange(0xc3, 0xc3, 0x9a, 0x9a);
    802 		charsys_addmultibyterange(0xc3, 0xc3, 0x9c, 0x9c);
    803 		charsys_addmultibyterange(0xc3, 0xc3, 0xa1, 0xa1);
    804 		charsys_addmultibyterange(0xc3, 0xc3, 0xa9, 0xa9);
    805 		charsys_addmultibyterange(0xc3, 0xc3, 0xad, 0xad);
    806 		charsys_addmultibyterange(0xc3, 0xc3, 0xb1, 0xb1);
    807 		charsys_addmultibyterange(0xc3, 0xc3, 0xb3, 0xb3);
    808 		charsys_addmultibyterange(0xc3, 0xc3, 0xba, 0xba);
    809 		charsys_addmultibyterange(0xc3, 0xc3, 0xbc, 0xbc);
    810 	}
    811 	if (latin1 || !strcmp(name, "italian"))
    812 	{
    813 		/* A`, E`, E', I`, I', O`, O', U`, U', a`, e`, e', i`, i', o`, o', u`, u' */
    814 		charsys_addallowed("");
    815 	}
    816 	if (latin_utf8 || !strcmp(name, "italian-utf8"))
    817 	{
    818 		charsys_addmultibyterange(0xc3, 0xc3, 0x80, 0x80);
    819 		charsys_addmultibyterange(0xc3, 0xc3, 0x88, 0x89);
    820 		charsys_addmultibyterange(0xc3, 0xc3, 0x8c, 0x8d);
    821 		charsys_addmultibyterange(0xc3, 0xc3, 0x92, 0x93);
    822 		charsys_addmultibyterange(0xc3, 0xc3, 0x99, 0x9a);
    823 		charsys_addmultibyterange(0xc3, 0xc3, 0xa0, 0xa0);
    824 		charsys_addmultibyterange(0xc3, 0xc3, 0xa8, 0xa9);
    825 		charsys_addmultibyterange(0xc3, 0xc3, 0xac, 0xad);
    826 		charsys_addmultibyterange(0xc3, 0xc3, 0xb2, 0xb3);
    827 		charsys_addmultibyterange(0xc3, 0xc3, 0xb9, 0xba);
    828 	}
    829 	if (latin1 || !strcmp(name, "catalan"))
    830 	{
    831 		/* supplied by Trocotronic */
    832 		/* a`, A`, e`, weird-c, weird-C, E`, e', E', i', I', o`, O`, o', O', u', U', i", I", u", U", weird-dot */
    833 		charsys_addallowed("");
    834 	}
    835 	if (latin_utf8 || !strcmp(name, "catalan-utf8"))
    836 	{
    837 		charsys_addmultibyterange(0xc3, 0xc3, 0x80, 0x80);
    838 		charsys_addmultibyterange(0xc3, 0xc3, 0x87, 0x89);
    839 		charsys_addmultibyterange(0xc3, 0xc3, 0x8d, 0x8d);
    840 		charsys_addmultibyterange(0xc3, 0xc3, 0x8f, 0x8f);
    841 		charsys_addmultibyterange(0xc3, 0xc3, 0x92, 0x93);
    842 		charsys_addmultibyterange(0xc3, 0xc3, 0x9a, 0x9a);
    843 		charsys_addmultibyterange(0xc3, 0xc3, 0x9c, 0x9c);
    844 		charsys_addmultibyterange(0xc3, 0xc3, 0xa0, 0xa0);
    845 		charsys_addmultibyterange(0xc3, 0xc3, 0xa7, 0xa9);
    846 		charsys_addmultibyterange(0xc3, 0xc3, 0xad, 0xad);
    847 		charsys_addmultibyterange(0xc3, 0xc3, 0xaf, 0xaf);
    848 		charsys_addmultibyterange(0xc3, 0xc3, 0xb2, 0xb3);
    849 		charsys_addmultibyterange(0xc3, 0xc3, 0xba, 0xba);
    850 	}
    851 	if (latin1 || !strcmp(name, "swedish"))
    852 	{
    853 		/* supplied by Tank */
    854 		/* ao, Ao, a", A", o", O" */
    855 		charsys_addallowed("");
    856 	}
    857 	if (latin_utf8 || !strcmp(name, "swedish-utf8"))
    858 	{
    859 		charsys_addmultibyterange(0xc3, 0xc3, 0x84, 0x85);
    860 		charsys_addmultibyterange(0xc3, 0xc3, 0x96, 0x96);
    861 		charsys_addmultibyterange(0xc3, 0xc3, 0xa4, 0xa5);
    862 		charsys_addmultibyterange(0xc3, 0xc3, 0xb6, 0xb6);
    863 	}
    864 	if (latin1 || !strcmp(name, "icelandic"))
    865 	{
    866 		/* supplied by Saevar */
    867 		charsys_addallowed("");
    868 	}
    869 	if (latin_utf8 || !strcmp(name, "icelandic-utf8"))
    870 	{
    871 		charsys_addmultibyterange(0xc3, 0xc3, 0x81, 0x81);
    872 		charsys_addmultibyterange(0xc3, 0xc3, 0x86, 0x86);
    873 		charsys_addmultibyterange(0xc3, 0xc3, 0x8d, 0x8d);
    874 		charsys_addmultibyterange(0xc3, 0xc3, 0x90, 0x90);
    875 		charsys_addmultibyterange(0xc3, 0xc3, 0x93, 0x93);
    876 		charsys_addmultibyterange(0xc3, 0xc3, 0x96, 0x96);
    877 		charsys_addmultibyterange(0xc3, 0xc3, 0x9a, 0x9a);
    878 		charsys_addmultibyterange(0xc3, 0xc3, 0x9d, 0x9e);
    879 		charsys_addmultibyterange(0xc3, 0xc3, 0xa1, 0xa1);
    880 		charsys_addmultibyterange(0xc3, 0xc3, 0xa6, 0xa6);
    881 		charsys_addmultibyterange(0xc3, 0xc3, 0xad, 0xad);
    882 		charsys_addmultibyterange(0xc3, 0xc3, 0xb0, 0xb0);
    883 		charsys_addmultibyterange(0xc3, 0xc3, 0xb3, 0xb3);
    884 		charsys_addmultibyterange(0xc3, 0xc3, 0xb6, 0xb6);
    885 		charsys_addmultibyterange(0xc3, 0xc3, 0xba, 0xba);
    886 		charsys_addmultibyterange(0xc3, 0xc3, 0xbd, 0xbe);
    887 	}
    888 
    889 	/* [LATIN2] and rest of [LATIN-UTF8] */
    890 	/* actually hungarian is a special case, include it in both w1250 and latin2 ;p */
    891 	if (latin2 || w1250 || !strcmp(name, "hungarian"))
    892 	{
    893 		/* supplied by AngryWolf */
    894 		/* a', e', i', o', o", o~, u', u", u~, A', E', I', O', O", O~, U', U", U~ */
    895 		charsys_addallowed("");
    896 	}
    897 	if (latin_utf8 || !strcmp(name, "hungarian-utf8"))
    898 	{
    899 		charsys_addmultibyterange(0xc3, 0xc3, 0x81, 0x81);
    900 		charsys_addmultibyterange(0xc3, 0xc3, 0x89, 0x89);
    901 		charsys_addmultibyterange(0xc3, 0xc3, 0x8d, 0x8d);
    902 		charsys_addmultibyterange(0xc3, 0xc3, 0x93, 0x93);
    903 		charsys_addmultibyterange(0xc3, 0xc3, 0x96, 0x96);
    904 		charsys_addmultibyterange(0xc3, 0xc3, 0x9a, 0x9a);
    905 		charsys_addmultibyterange(0xc3, 0xc3, 0x9c, 0x9c);
    906 		charsys_addmultibyterange(0xc3, 0xc3, 0xa1, 0xa1);
    907 		charsys_addmultibyterange(0xc3, 0xc3, 0xa9, 0xa9);
    908 		charsys_addmultibyterange(0xc3, 0xc3, 0xad, 0xad);
    909 		charsys_addmultibyterange(0xc3, 0xc3, 0xb3, 0xb3);
    910 		charsys_addmultibyterange(0xc3, 0xc3, 0xb6, 0xb6);
    911 		charsys_addmultibyterange(0xc3, 0xc3, 0xba, 0xba);
    912 		charsys_addmultibyterange(0xc3, 0xc3, 0xbc, 0xbc);
    913 		charsys_addmultibyterange(0xc5, 0xc5, 0x90, 0x91);
    914 		charsys_addmultibyterange(0xc5, 0xc5, 0xb0, 0xb1);
    915 	}
    916 	/* same is true for romanian: latin2 & w1250 compatible */
    917 	if (latin2 || w1250 || !strcmp(name, "romanian"))
    918 	{
    919 		/* With some help from crazytoon */
    920 		/* 'S,' 's,' 'A^' 'A<' 'I^' 'T,' 'a^' 'a<' 'i^' 't,' */
    921 		charsys_addallowed("");
    922 	}
    923 	if (latin_utf8 || !strcmp(name, "romanian-utf8"))
    924 	{
    925 		charsys_addmultibyterange(0xc3, 0xc3, 0x82, 0x82);
    926 		charsys_addmultibyterange(0xc3, 0xc3, 0x8e, 0x8e);
    927 		charsys_addmultibyterange(0xc3, 0xc3, 0xa2, 0xa2);
    928 		charsys_addmultibyterange(0xc3, 0xc3, 0xae, 0xae);
    929 		charsys_addmultibyterange(0xc4, 0xc4, 0x82, 0x83);
    930 		charsys_addmultibyterange(0xc5, 0xc5, 0x9e, 0x9f);
    931 		charsys_addmultibyterange(0xc5, 0xc5, 0xa2, 0xa3);
    932 	}
    933 
    934 	if (latin2 || !strcmp(name, "polish"))
    935 	{
    936 		/* supplied by k4be */
    937 		charsys_addallowed("󶿼ʣӦ");
    938 	}
    939 	if (latin_utf8 || !strcmp(name, "polish-utf8"))
    940 	{
    941 		charsys_addmultibyterange(0xc3, 0xc3, 0x93, 0x93);
    942 		charsys_addmultibyterange(0xc3, 0xc3, 0xb3, 0xb3);
    943 		charsys_addmultibyterange(0xc4, 0xc4, 0x84, 0x87);
    944 		charsys_addmultibyterange(0xc4, 0xc4, 0x98, 0x99);
    945 		charsys_addmultibyterange(0xc5, 0xc5, 0x81, 0x84);
    946 		charsys_addmultibyterange(0xc5, 0xc5, 0x9a, 0x9b);
    947 		charsys_addmultibyterange(0xc5, 0xc5, 0xb9, 0xbc);
    948 	}
    949 	/* [windows 1250] */
    950 	if (w1250 || !strcmp(name, "polish-w1250"))
    951 	{
    952 		/* supplied by k4be */
    953 		charsys_addallowed("󜿟ʣӌ");
    954 	}
    955 	if (w1250 || !strcmp(name, "czech-w1250"))
    956 	{
    957 		/* Syzop [probably incomplete] */
    958 		charsys_addallowed("");
    959 	}
    960 	if (latin_utf8 || !strcmp(name, "czech-utf8"))
    961 	{
    962 		charsys_addmultibyterange(0xc3, 0xc3, 0x81, 0x81);
    963 		charsys_addmultibyterange(0xc3, 0xc3, 0x89, 0x89);
    964 		charsys_addmultibyterange(0xc3, 0xc3, 0x8d, 0x8d);
    965 		charsys_addmultibyterange(0xc3, 0xc3, 0x93, 0x93);
    966 		charsys_addmultibyterange(0xc3, 0xc3, 0x9a, 0x9a);
    967 		charsys_addmultibyterange(0xc3, 0xc3, 0x9d, 0x9d);
    968 		charsys_addmultibyterange(0xc3, 0xc3, 0xa1, 0xa1);
    969 		charsys_addmultibyterange(0xc3, 0xc3, 0xa9, 0xa9);
    970 		charsys_addmultibyterange(0xc3, 0xc3, 0xad, 0xad);
    971 		charsys_addmultibyterange(0xc3, 0xc3, 0xb3, 0xb3);
    972 		charsys_addmultibyterange(0xc3, 0xc3, 0xba, 0xba);
    973 		charsys_addmultibyterange(0xc3, 0xc3, 0xbd, 0xbd);
    974 		charsys_addmultibyterange(0xc4, 0xc4, 0x8c, 0x8f);
    975 		charsys_addmultibyterange(0xc4, 0xc4, 0x9a, 0x9b);
    976 		charsys_addmultibyterange(0xc5, 0xc5, 0x87, 0x88);
    977 		charsys_addmultibyterange(0xc5, 0xc5, 0x98, 0x99);
    978 		charsys_addmultibyterange(0xc5, 0xc5, 0xa0, 0xa1);
    979 		charsys_addmultibyterange(0xc5, 0xc5, 0xa4, 0xa5);
    980 		charsys_addmultibyterange(0xc5, 0xc5, 0xae, 0xaf);
    981 		charsys_addmultibyterange(0xc5, 0xc5, 0xbd, 0xbe);
    982 	}
    983 	if (w1250 || !strcmp(name, "slovak-w1250"))
    984 	{
    985 		/* Syzop [probably incomplete] */
    986 		charsys_addallowed("");
    987 	}
    988 	if (latin_utf8 || !strcmp(name, "slovak-utf8"))
    989 	{
    990 		charsys_addmultibyterange(0xc3, 0xc3, 0x81, 0x81);
    991 		charsys_addmultibyterange(0xc3, 0xc3, 0x84, 0x84);
    992 		charsys_addmultibyterange(0xc3, 0xc3, 0x89, 0x89);
    993 		charsys_addmultibyterange(0xc3, 0xc3, 0x8d, 0x8d);
    994 		charsys_addmultibyterange(0xc3, 0xc3, 0xa1, 0xa1);
    995 		charsys_addmultibyterange(0xc3, 0xc3, 0xa4, 0xa4);
    996 		charsys_addmultibyterange(0xc3, 0xc3, 0xa9, 0xa9);
    997 		charsys_addmultibyterange(0xc3, 0xc3, 0xad, 0xad);
    998 		charsys_addmultibyterange(0xc3, 0xc3, 0xb3, 0xb4);
    999 		charsys_addmultibyterange(0xc3, 0xc3, 0xba, 0xba);
   1000 		charsys_addmultibyterange(0xc3, 0xc3, 0xbd, 0xbd);
   1001 		charsys_addmultibyterange(0xc4, 0xc4, 0x8c, 0x8f);
   1002 		charsys_addmultibyterange(0xc4, 0xc4, 0xb9, 0xba);
   1003 		charsys_addmultibyterange(0xc4, 0xc4, 0xbd, 0xbe);
   1004 		charsys_addmultibyterange(0xc5, 0xc5, 0x88, 0x88);
   1005 		charsys_addmultibyterange(0xc5, 0xc5, 0x94, 0x95);
   1006 		charsys_addmultibyterange(0xc5, 0xc5, 0xa0, 0xa1);
   1007 		charsys_addmultibyterange(0xc5, 0xc5, 0xa4, 0xa5);
   1008 		charsys_addmultibyterange(0xc5, 0xc5, 0xbd, 0xbe);
   1009 	}
   1010 
   1011 	/* [windows 1251] */
   1012 	if (w1251 || !strcmp(name, "russian-w1251"))
   1013 	{
   1014 		/* supplied by Roman Parkin:
   1015 		 * 128-159 and 223-254
   1016 		 */
   1017 		charsys_addallowed("");
   1018 	}
   1019 	if (cyrillic_utf8 || !strcmp(name, "russian-utf8"))
   1020 	{
   1021 		charsys_addmultibyterange(0xd0, 0xd0, 0x81, 0x81);
   1022 		charsys_addmultibyterange(0xd0, 0xd0, 0x90, 0xbf);
   1023 		charsys_addmultibyterange(0xd1, 0xd1, 0x80, 0x8f);
   1024 		charsys_addmultibyterange(0xd1, 0xd1, 0x91, 0x91);
   1025 	}
   1026 
   1027 	if (w1251 || !strcmp(name, "belarussian-w1251"))
   1028 	{
   1029 		/* supplied by Bock (Samets Anton) & ss:
   1030 		 * 128-159, 161, 162, 178, 179 and 223-254
   1031 		 * Corrected 01.11.2006 to more "correct" behavior by Bock
   1032 		 */
   1033 		charsys_addallowed("ŨDzӡ");
   1034 	}
   1035 	if (cyrillic_utf8 || !strcmp(name, "belarussian-utf8"))
   1036 	{
   1037 		charsys_addmultibyterange(0xd0, 0xd0, 0x81, 0x81);
   1038 		charsys_addmultibyterange(0xd0, 0xd0, 0x86, 0x86);
   1039 		charsys_addmultibyterange(0xd0, 0xd0, 0x8e, 0x8e);
   1040 		charsys_addmultibyterange(0xd0, 0xd0, 0x90, 0x97);
   1041 		charsys_addmultibyterange(0xd0, 0xd0, 0x99, 0xa8);
   1042 		charsys_addmultibyterange(0xd0, 0xd0, 0xab, 0xb7);
   1043 		charsys_addmultibyterange(0xd0, 0xd0, 0xb9, 0xbf);
   1044 		charsys_addmultibyterange(0xd1, 0xd1, 0x80, 0x88);
   1045 		charsys_addmultibyterange(0xd1, 0xd1, 0x8b, 0x8f);
   1046 		charsys_addmultibyterange(0xd1, 0xd1, 0x91, 0x91);
   1047 		charsys_addmultibyterange(0xd1, 0xd1, 0x96, 0x96);
   1048 		charsys_addmultibyterange(0xd1, 0xd1, 0x9e, 0x9e);
   1049 	}
   1050 
   1051 	if (w1251 || !strcmp(name, "ukrainian-w1251"))
   1052 	{
   1053 		/* supplied by Anton Samets & ss:
   1054 		 * 128-159, 170, 175, 178, 179, 186, 191 and 223-254
   1055 		 * Corrected 01.11.2006 to more "correct" behavior by core
   1056 		 */
   1057 		charsys_addallowed("åŪȲ賿");
   1058 	}
   1059 	if (cyrillic_utf8 || !strcmp(name, "ukrainian-utf8"))
   1060 	{
   1061 		charsys_addmultibyterange(0xd0, 0xd0, 0x84, 0x84);
   1062 		charsys_addmultibyterange(0xd0, 0xd0, 0x86, 0x87);
   1063 		charsys_addmultibyterange(0xd0, 0xd0, 0x90, 0xa9);
   1064 		charsys_addmultibyterange(0xd0, 0xd0, 0xac, 0xac);
   1065 		charsys_addmultibyterange(0xd0, 0xd0, 0xae, 0xbf);
   1066 		charsys_addmultibyterange(0xd1, 0xd1, 0x80, 0x89);
   1067 		charsys_addmultibyterange(0xd1, 0xd1, 0x8c, 0x8c);
   1068 		charsys_addmultibyterange(0xd1, 0xd1, 0x8e, 0x8f);
   1069 		charsys_addmultibyterange(0xd1, 0xd1, 0x94, 0x94);
   1070 		charsys_addmultibyterange(0xd1, 0xd1, 0x96, 0x97);
   1071 		charsys_addmultibyterange(0xd2, 0xd2, 0x90, 0x91);
   1072 	}
   1073 
   1074 	/* [GREEK] */
   1075 	if (!strcmp(name, "greek"))
   1076 	{
   1077 		/* supplied by GSF */
   1078 		/* ranges from rfc1947 / iso 8859-7 */
   1079 		charsys_addallowed("");
   1080 	}
   1081 	if (!strcmp(name, "greek-utf8"))
   1082 	{
   1083 		charsys_addmultibyterange(0xce, 0xce, 0x86, 0x86);
   1084 		charsys_addmultibyterange(0xce, 0xce, 0x88, 0x8a);
   1085 		charsys_addmultibyterange(0xce, 0xce, 0x8c, 0x8c);
   1086 		charsys_addmultibyterange(0xce, 0xce, 0x8e, 0xa1);
   1087 		charsys_addmultibyterange(0xce, 0xce, 0xa3, 0xbf);
   1088 		charsys_addmultibyterange(0xcf, 0xcf, 0x80, 0x84);
   1089 	}
   1090 
   1091 	/* [TURKISH] */
   1092 	if (!strcmp(name, "turkish"))
   1093 	{
   1094 		/* Supplied by Ayberk Yancatoral */
   1095 		charsys_addallowed("");
   1096 	}
   1097 	if (!strcmp(name, "turkish-utf8"))
   1098 	{
   1099 		charsys_addmultibyterange(0xc3, 0xc3, 0x87, 0x87);
   1100 		charsys_addmultibyterange(0xc3, 0xc3, 0x96, 0x96);
   1101 		charsys_addmultibyterange(0xc3, 0xc3, 0x9c, 0x9c);
   1102 		charsys_addmultibyterange(0xc3, 0xc3, 0xa7, 0xa7);
   1103 		charsys_addmultibyterange(0xc3, 0xc3, 0xb6, 0xb6);
   1104 		charsys_addmultibyterange(0xc3, 0xc3, 0xbc, 0xbc);
   1105 		charsys_addmultibyterange(0xc4, 0xc4, 0x9e, 0x9f);
   1106 		charsys_addmultibyterange(0xc4, 0xc4, 0xb1, 0xb1);
   1107 		charsys_addmultibyterange(0xc5, 0xc5, 0x9e, 0x9f);
   1108 	}
   1109 
   1110 	/* [HEBREW] */
   1111 	if (!strcmp(name, "hebrew"))
   1112 	{
   1113 		/* Supplied by PHANTOm. */
   1114 		/* 0xE0 - 0xFE */
   1115 		charsys_addallowed("");
   1116 	}
   1117 	if (!strcmp(name, "hebrew-utf8"))
   1118 	{
   1119 		/* Supplied by Lion-O */
   1120 		charsys_addmultibyterange(0xd7, 0xd7, 0x90, 0xaa);
   1121 	}
   1122 
   1123 	/* [CHINESE] */
   1124 	if (chinese || !strcmp(name, "chinese-ja"))
   1125 	{
   1126 		charsys_addmultibyterange(0xa4, 0xa4, 0xa1, 0xf3); /* JIS_PIN */
   1127 		charsys_addmultibyterange(0xa5, 0xa5, 0xa1, 0xf6); /* JIS_PIN */
   1128 	}
   1129 	if (chinese || !strcmp(name, "chinese-simp"))
   1130 	{
   1131 		charsys_addmultibyterange(0xb0, 0xd6, 0xa1, 0xfe); /* GBK/2 BC with GB2312 */
   1132 		charsys_addmultibyterange(0xd7, 0xd7, 0xa1, 0xf9); /* GBK/2 BC with GB2312 */
   1133 		charsys_addmultibyterange(0xd8, 0xf7, 0xa1, 0xfe); /* GBK/2 BC with GB2312 */
   1134 	}
   1135 	if (chinese || !strcmp(name, "chinese-trad"))
   1136 	{
   1137 		charsys_addmultibyterange(0x81, 0xa0, 0x40, 0x7e); /* GBK/3 - lower half */
   1138 		charsys_addmultibyterange(0x81, 0xa0, 0x80, 0xfe); /* GBK/3 - upper half */
   1139 		charsys_addmultibyterange(0xaa, 0xfe, 0x40, 0x7e); /* GBK/4 - lower half */
   1140 		charsys_addmultibyterange(0xaa, 0xfe, 0x80, 0xa0); /* GBK/4 - upper half */
   1141 	}
   1142 
   1143 	/* [LATVIAN] */
   1144 	if (latin_utf8 || !strcmp(name, "latvian-utf8"))
   1145 	{
   1146 		/* A a, C c, E e, G g, I i, K k,  , U u,   */
   1147 		charsys_addmultibyterange(0xc4, 0xc4, 0x80, 0x81);
   1148 		charsys_addmultibyterange(0xc4, 0xc4, 0x92, 0x93);
   1149 		charsys_addmultibyterange(0xc4, 0xc4, 0x8c, 0x8d);
   1150 		charsys_addmultibyterange(0xc4, 0xc4, 0x92, 0x93);
   1151 		charsys_addmultibyterange(0xc4, 0xc4, 0xa2, 0xa3);
   1152 		charsys_addmultibyterange(0xc4, 0xc4, 0xaa, 0xab);
   1153 		charsys_addmultibyterange(0xc4, 0xc4, 0xb6, 0xb7);
   1154 		charsys_addmultibyterange(0xc5, 0xc5, 0xa0, 0xa1);
   1155 		charsys_addmultibyterange(0xc5, 0xc5, 0xaa, 0xab);
   1156 		charsys_addmultibyterange(0xc5, 0xc5, 0xbd, 0xbe);
   1157 	}
   1158 
   1159 	/* [ESTONIAN] */
   1160 	if (latin_utf8 || !strcmp(name, "estonian-utf8"))
   1161 	{
   1162 		/* , , , ,  , , ,  */
   1163 		charsys_addmultibyterange(0xc3, 0xc3, 0xb5, 0xb6);
   1164 		charsys_addmultibyterange(0xc3, 0xc3, 0xa4, 0xa4);
   1165 		charsys_addmultibyterange(0xc3, 0xc3, 0xbc, 0xbc);
   1166 		charsys_addmultibyterange(0xc3, 0xc3, 0x95, 0x96);
   1167 		charsys_addmultibyterange(0xc3, 0xc3, 0x84, 0x84);
   1168 		charsys_addmultibyterange(0xc3, 0xc3, 0x9c, 0x9c);
   1169 	}
   1170 
   1171 	/* [LITHUANIAN] */
   1172 	if (latin_utf8 || !strcmp(name, "lithuanian-utf8"))
   1173 	{
   1174 		/* a, c, e, e, i, , u, u, , A, C, E, E, I, , U, U,  */
   1175 		charsys_addmultibyterange(0xc4, 0xc4, 0x84, 0x85);
   1176 		charsys_addmultibyterange(0xc4, 0xc4, 0x8c, 0x8d);
   1177 		charsys_addmultibyterange(0xc4, 0xc4, 0x96, 0x99);
   1178 		charsys_addmultibyterange(0xc4, 0xc4, 0xae, 0xaf);
   1179 		charsys_addmultibyterange(0xc4, 0xc4, 0xae, 0xaf);
   1180 		charsys_addmultibyterange(0xc5, 0xc5, 0xa0, 0xa1);
   1181 		charsys_addmultibyterange(0xc5, 0xc5, 0xb2, 0xb3);
   1182 		charsys_addmultibyterange(0xc5, 0xc5, 0xaa, 0xab);
   1183 		charsys_addmultibyterange(0xc5, 0xc5, 0xbd, 0xbe);
   1184 	}
   1185 
   1186 	/* [ARABIC] */
   1187 	if (latin_utf8 || !strcmp(name, "arabic-utf8"))
   1188 	{
   1189 		/* Supplied by Sensiva */
   1190 		/*charsys_addallowed("اأإآءبتثجحخدذرزسشصضطظعغفقكلمنهؤةويىئ");*/
   1191 		/*- From U+0621 to U+063A (Regex: [\u0621-\u063A])*/
   1192 		/* 0xd8a1 - 0xd8ba */
   1193 		charsys_addmultibyterange(0xd8, 0xd8, 0xa1, 0xba);
   1194 		/*- From U+0641 to U+064A (Regex: [\u0641-\u064A])*/
   1195 		/* 0xd981 - 0xd98a */
   1196 		charsys_addmultibyterange(0xd9, 0xd9, 0x81, 0x8a);
   1197 	}
   1198 }
   1199 
   1200 /** This displays all the nick characters that are permitted */
   1201 char *charsys_displaychars(void)
   1202 {
   1203 #if 0
   1204 	MBList *m;
   1205 	unsigned char hibyte, lobyte;
   1206 #endif
   1207 	static char buf[512];
   1208 	int n = 0;
   1209 	int i, j;
   1210 
   1211 	// 		char_atribs[(unsigned char)*s] |= ALLOWN;
   1212 	for (i = 0; i <= 255; i++)
   1213 	{
   1214 		if (char_atribs[i] & ALLOWN)
   1215 			buf[n++] = i;
   1216 		/* (no bounds checking: first 255 characters always fit a 512 byte buffer) */
   1217 	}
   1218 
   1219 #if 0
   1220 	for (m=mblist; m; m=m->next)
   1221 	{
   1222 		for (hibyte = m->s1; hibyte <= m->e1; hibyte++)
   1223 		{
   1224 			for (lobyte = m->s2; lobyte <= m->e2; lobyte++)
   1225 			{
   1226 				if (n >= sizeof(buf) - 3)
   1227 					break; // break, or an attempt anyway
   1228 				buf[n++] = hibyte;
   1229 				buf[n++] = lobyte;
   1230 			}
   1231 		}
   1232 	}
   1233 #endif
   1234 	/* above didn't work due to multiple overlapping ranges permitted.
   1235 	 * try this instead (lazy).. this is only used in DEBUGMODE
   1236 	 * via a command line option anyway:
   1237 	 */
   1238 	for (i=0; i <= 255; i++)
   1239 	{
   1240 		for (j=0; j <= 255; j++)
   1241 		{
   1242 			if (isvalidmbyte(i, j))
   1243 			{
   1244 				if (n >= sizeof(buf) - 3)
   1245 					break; // break, or an attempt anyway
   1246 				buf[n++] = i;
   1247 				buf[n++] = j;
   1248 			}
   1249 		}
   1250 	}
   1251 
   1252 	buf[n] = '\0'; /* there's always room for a NUL */
   1253 
   1254 	return buf;
   1255 }
   1256 
   1257 char *charsys_group(int v)
   1258 {
   1259 	if (v & LANGAV_LATIN_UTF8)
   1260 		return "Latin script";
   1261 	if (v & LANGAV_CYRILLIC_UTF8)
   1262 		return "Cyrillic script";
   1263 	if (v & LANGAV_GREEK_UTF8)
   1264 		return "Greek script";
   1265 	if (v & LANGAV_HEBREW_UTF8)
   1266 		return "Hebrew script";
   1267 	if (v & LANGAV_ARABIC_UTF8)
   1268 		return "Arabic script";
   1269 
   1270 	return "Other";
   1271 }
   1272 
   1273 void charsys_dump_table(char *filter)
   1274 {
   1275 	int i = 0;
   1276 
   1277 	for (i = 0; langlist[i].directive; i++)
   1278 	{
   1279 		char *charset = langlist[i].directive;
   1280 
   1281 		if (!match_simple(filter, charset))
   1282 			continue; /* skip */
   1283 
   1284 		charsys_reset();
   1285 		charsys_add_language(charset);
   1286 		charsys_finish();
   1287 		printf("%s;%s;%s\n", charset, charsys_group(langlist[i].setflags), charsys_displaychars());
   1288 	}
   1289 }
   1290 
   1291 /** Get current languages (the 'langsinuse' variable) */
   1292 char *_charsys_get_current_languages(void)
   1293 {
   1294 	return langsinuse;
   1295 }