unrealircd- supernets unrealircd source & configuration |
git clone git://git.acid.vegas/unrealircd.git |
Log | Files | Refs | Archive | README | LICENSE |
charsys.c (41744B)
1 /* 2 * Unreal Internet Relay Chat Daemon, src/charsys.c 3 * (C) Copyright 2005-2017 Bram Matthys and The UnrealIRCd Team. 4 * 5 * Character system: This subsystem deals with finding out wheter a 6 * character should be allowed or not in nicks (nicks only for now). 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation; either version 1, or (at your option) 11 * any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with this program; if not, write to the Free Software 20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 21 */ 22 23 #include "unrealircd.h" 24 25 #ifndef ARRAY_SIZEOF 26 #define ARRAY_SIZEOF(x) (sizeof((x))/sizeof((x)[0])) 27 #endif 28 29 ModuleHeader MOD_HEADER 30 = { 31 "charsys", /* Name of module */ 32 "5.0", /* Version */ 33 "Character System (set::allowed-nickchars)", /* Short description of module */ 34 "UnrealIRCd Team", 35 "unrealircd-6", 36 }; 37 38 /* NOTE: it is guaranteed that char is unsigned by compiling options 39 * (-funsigned-char @ gcc, /J @ MSVC) 40 * NOTE2: Original credit for supplying the correct chinese 41 * coderanges goes to: RexHsu, Mr.WebBar and Xuefer 42 */ 43 44 /** Our multibyte structure */ 45 typedef struct MBList MBList; 46 struct MBList 47 { 48 MBList *next; 49 char s1, e1, s2, e2; 50 }; 51 MBList *mblist = NULL, *mblist_tail = NULL; 52 53 /* Use this to prevent mixing of certain combinations 54 * (such as GBK & high-ascii, etc) 55 */ 56 static int langav = 0; 57 char langsinuse[4096]; 58 59 /* bitmasks: */ 60 #define LANGAV_ASCII 0x000001 /* 8 bit ascii */ 61 #define LANGAV_LATIN1 0x000002 /* latin1 (western europe) */ 62 #define LANGAV_LATIN2 0x000004 /* latin2 (eastern europe, eg: polish) */ 63 #define LANGAV_ISO8859_7 0x000008 /* greek */ 64 #define LANGAV_ISO8859_8I 0x000010 /* hebrew */ 65 #define LANGAV_ISO8859_9 0x000020 /* turkish */ 66 #define LANGAV_W1250 0x000040 /* windows-1250 (eg: polish-w1250) */ 67 #define LANGAV_W1251 0x000080 /* windows-1251 (eg: russian) */ 68 #define LANGAV_LATIN2W1250 0x000100 /* Compatible with both latin2 AND windows-1250 (eg: hungarian) */ 69 #define LANGAV_ISO8859_6 0x000200 /* arabic */ 70 #define LANGAV_GBK 0x001000 /* (Chinese) GBK encoding */ 71 #define LANGAV_UTF8 0x002000 /* any UTF8 encoding */ 72 #define LANGAV_LATIN_UTF8 0x004000 /* UTF8: latin script */ 73 #define LANGAV_CYRILLIC_UTF8 0x008000 /* UTF8: cyrillic script */ 74 #define LANGAV_GREEK_UTF8 0x010000 /* UTF8: greek script */ 75 #define LANGAV_HEBREW_UTF8 0x020000 /* UTF8: hebrew script */ 76 #define LANGAV_ARABIC_UTF8 0x040000 /* UTF8: arabic script */ 77 typedef struct LangList LangList; 78 struct LangList 79 { 80 char *directive; 81 char *code; 82 int setflags; 83 }; 84 85 /* MUST be alphabetized (first column) */ 86 static LangList langlist[] = { 87 { "arabic-utf8", "ara-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_ARABIC_UTF8 }, 88 { "belarussian-utf8", "blr-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_CYRILLIC_UTF8 }, 89 { "belarussian-w1251", "blr", LANGAV_ASCII|LANGAV_W1251 }, 90 { "catalan", "cat", LANGAV_ASCII|LANGAV_LATIN1 }, 91 { "catalan-utf8", "cat-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 }, 92 { "chinese", "chi-j,chi-s,chi-t", LANGAV_GBK }, 93 { "chinese-ja", "chi-j", LANGAV_GBK }, 94 { "chinese-simp", "chi-s", LANGAV_GBK }, 95 { "chinese-trad", "chi-t", LANGAV_GBK }, 96 { "cyrillic-utf8", "blr-utf8,rus-utf8,ukr-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_CYRILLIC_UTF8 }, 97 { "czech", "cze-m", LANGAV_ASCII|LANGAV_W1250 }, 98 { "czech-utf8", "cze-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 }, 99 { "danish", "dan", LANGAV_ASCII|LANGAV_LATIN1 }, 100 { "danish-utf8", "dan-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 }, 101 { "dutch", "dut", LANGAV_ASCII|LANGAV_LATIN1 }, 102 { "dutch-utf8", "dut-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 }, 103 { "estonian-utf8","est-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 }, 104 { "french", "fre", LANGAV_ASCII|LANGAV_LATIN1 }, 105 { "french-utf8", "fre-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 }, 106 { "gbk", "chi-s,chi-t,chi-j", LANGAV_GBK }, 107 { "german", "ger", LANGAV_ASCII|LANGAV_LATIN1 }, 108 { "german-utf8", "ger-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 }, 109 { "greek", "gre", LANGAV_ASCII|LANGAV_ISO8859_7 }, 110 { "greek-utf8", "gre-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_GREEK_UTF8 }, 111 { "hebrew", "heb", LANGAV_ASCII|LANGAV_ISO8859_8I }, 112 { "hebrew-utf8", "heb-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_HEBREW_UTF8 }, 113 { "hungarian", "hun", LANGAV_ASCII|LANGAV_LATIN2W1250 }, 114 { "hungarian-utf8","hun-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 }, 115 { "icelandic", "ice", LANGAV_ASCII|LANGAV_LATIN1 }, 116 { "icelandic-utf8","ice-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 }, 117 { "italian", "ita", LANGAV_ASCII|LANGAV_LATIN1 }, 118 { "italian-utf8", "ita-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 }, 119 { "latin-utf8", "cat-utf8,cze-utf8,dan-utf8,dut-utf8,fre-utf8,ger-utf8,hun-utf8,ice-utf8,ita-utf8,pol-utf8,rum-utf8,slo-utf8,spa-utf8,swe-utf8,tur-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 }, 120 { "latin1", "cat,dut,fre,ger,ita,spa,swe", LANGAV_ASCII|LANGAV_LATIN1 }, 121 { "latin2", "hun,pol,rum", LANGAV_ASCII|LANGAV_LATIN2 }, 122 { "latvian-utf8", "lav-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 }, 123 { "lithuanian-utf8","lit-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 }, 124 { "polish", "pol", LANGAV_ASCII|LANGAV_LATIN2 }, 125 { "polish-utf8", "pol-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 }, 126 { "polish-w1250", "pol-m", LANGAV_ASCII|LANGAV_W1250 }, 127 { "romanian", "rum", LANGAV_ASCII|LANGAV_LATIN2W1250 }, 128 { "romanian-utf8","rum-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 }, 129 { "russian-utf8", "rus-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_CYRILLIC_UTF8 }, 130 { "russian-w1251","rus", LANGAV_ASCII|LANGAV_W1251 }, 131 { "slovak", "slo-m", LANGAV_ASCII|LANGAV_W1250 }, 132 { "slovak-utf8", "slo-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 }, 133 { "spanish", "spa", LANGAV_ASCII|LANGAV_LATIN1 }, 134 { "spanish-utf8", "spa-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 }, 135 { "swedish", "swe", LANGAV_ASCII|LANGAV_LATIN1 }, 136 { "swedish-utf8", "swe-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 }, 137 { "swiss-german", "swg", LANGAV_ASCII|LANGAV_LATIN1 }, 138 { "swiss-german-utf8", "swg-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 }, 139 { "turkish", "tur", LANGAV_ASCII|LANGAV_ISO8859_9 }, 140 { "turkish-utf8", "tur-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_LATIN_UTF8 }, 141 { "ukrainian-utf8", "ukr-utf8", LANGAV_ASCII|LANGAV_UTF8|LANGAV_CYRILLIC_UTF8 }, 142 { "ukrainian-w1251", "ukr", LANGAV_ASCII|LANGAV_W1251 }, 143 { "windows-1250", "cze-m,pol-m,rum,slo-m,hun", LANGAV_ASCII|LANGAV_W1250 }, 144 { "windows-1251", "rus,ukr,blr", LANGAV_ASCII|LANGAV_W1251 }, 145 { NULL, NULL, 0 } 146 }; 147 148 /* For temporary use during config_run */ 149 typedef struct ILangList ILangList; 150 struct ILangList 151 { 152 ILangList *prev, *next; 153 char *name; 154 }; 155 ILangList *ilanglist = NULL; 156 157 /* These characters are ALWAYS disallowed... from remote, in 158 * multibyte, etc.. even though this might mean a certain 159 * (legit) character cannot be used (eg: in chinese GBK). 160 * - ! (nick!user seperator) 161 * - prefix chars: +, %, @, &, ~ 162 * - channel chars: # 163 * - scary chars: $, :, ', ", ?, *, ',', '.' 164 * NOTE: the caller should also check for ascii <= 32. 165 * [CHANGING THIS WILL CAUSE SECURITY/SYNCH PROBLEMS AND WILL 166 * VIOLATE YOUR ""RIGHT"" ON SUPPORT IMMEDIATELY] 167 */ 168 const char *illegalnickchars = "!+%@&~#$:'\"?*,."; 169 170 /* Forward declarations */ 171 int _do_nick_name(char *nick); 172 int _do_remote_nick_name(char *nick); 173 static int do_nick_name_multibyte(char *nick); 174 static int do_nick_name_standard(char *nick); 175 void charsys_reset(void); 176 void charsys_reset_pretest(void); 177 void charsys_finish(void); 178 void charsys_addmultibyterange(char s1, char e1, char s2, char e2); 179 void charsys_addallowed(char *s); 180 int charsys_test_language(char *name); 181 void charsys_add_language(char *name); 182 static void charsys_doadd_language(char *name); 183 int charsys_config_test(ConfigFile *cf, ConfigEntry *ce, int type, int *errs); 184 int charsys_config_run(ConfigFile *cf, ConfigEntry *ce, int type); 185 int charsys_config_posttest(int *errs); 186 char *_charsys_get_current_languages(void); 187 188 MOD_TEST() 189 { 190 MARK_AS_OFFICIAL_MODULE(modinfo); 191 EfunctionAdd(modinfo->handle, EFUNC_DO_NICK_NAME, _do_nick_name); 192 EfunctionAdd(modinfo->handle, EFUNC_DO_REMOTE_NICK_NAME, _do_remote_nick_name); 193 EfunctionAddString(modinfo->handle, EFUNC_CHARSYS_GET_CURRENT_LANGUAGES, _charsys_get_current_languages); 194 charsys_reset(); 195 charsys_reset_pretest(); 196 HookAdd(modinfo->handle, HOOKTYPE_CONFIGTEST, 0, charsys_config_test); 197 HookAdd(modinfo->handle, HOOKTYPE_CONFIGPOSTTEST, 0, charsys_config_posttest); 198 return MOD_SUCCESS; 199 } 200 201 MOD_INIT() 202 { 203 MARK_AS_OFFICIAL_MODULE(modinfo); 204 HookAdd(modinfo->handle, HOOKTYPE_CONFIGRUN, 0, charsys_config_run); 205 return MOD_SUCCESS; 206 } 207 208 /* Is first run when server is 100% ready */ 209 MOD_LOAD() 210 { 211 charsys_finish(); 212 return MOD_SUCCESS; 213 } 214 215 /* Called when module is unloaded */ 216 MOD_UNLOAD() 217 { 218 return MOD_SUCCESS; 219 } 220 221 int charsys_config_test(ConfigFile *cf, ConfigEntry *ce, int type, int *errs) 222 { 223 int errors = 0; 224 ConfigEntry *cep; 225 226 if (type != CONFIG_SET) 227 return 0; 228 229 /* We are only interrested in set::allowed-nickchars... */ 230 if (!ce || !ce->name || strcmp(ce->name, "allowed-nickchars")) 231 return 0; 232 233 if (ce->value) 234 { 235 config_error("%s:%i: set::allowed-nickchars: please use 'allowed-nickchars { name; };' " 236 "and not 'allowed-nickchars name;'", 237 ce->file->filename, ce->line_number); 238 /* Give up immediately. Don't bother the user with any other errors. */ 239 errors++; 240 *errs = errors; 241 return -1; 242 } 243 244 for (cep = ce->items; cep; cep=cep->next) 245 { 246 if (!charsys_test_language(cep->name)) 247 { 248 config_error("%s:%i: set::allowed-nickchars: Unknown (sub)language '%s'", 249 ce->file->filename, ce->line_number, cep->name); 250 errors++; 251 } 252 } 253 254 *errs = errors; 255 return errors ? -1 : 1; 256 } 257 258 int charsys_config_run(ConfigFile *cf, ConfigEntry *ce, int type) 259 { 260 ConfigEntry *cep; 261 262 if (type != CONFIG_SET) 263 return 0; 264 265 /* We are only interrested in set::allowed-nickchars... */ 266 if (!ce || !ce->name || strcmp(ce->name, "allowed-nickchars")) 267 return 0; 268 269 for (cep = ce->items; cep; cep = cep->next) 270 charsys_add_language(cep->name); 271 272 return 1; 273 } 274 275 /** Check if the specified charsets during the TESTING phase can be 276 * premitted without getting into problems. 277 * RETURNS: -1 in case of failure, 1 if ok 278 */ 279 int charsys_config_posttest(int *errs) 280 { 281 int errors = 0; 282 int x=0; 283 284 if ((langav & LANGAV_ASCII) && (langav & LANGAV_GBK)) 285 { 286 config_error("ERROR: set::allowed-nickchars specifies incorrect combination " 287 "of languages: high-ascii languages (such as german, french, etc) " 288 "cannot be mixed with chinese/.."); 289 return -1; 290 } 291 if (langav & LANGAV_LATIN_UTF8) 292 x++; 293 if (langav & LANGAV_GREEK_UTF8) 294 x++; 295 if (langav & LANGAV_CYRILLIC_UTF8) 296 x++; 297 if (langav & LANGAV_HEBREW_UTF8) 298 x++; 299 if (langav & LANGAV_LATIN1) 300 x++; 301 if (langav & LANGAV_LATIN2) 302 x++; 303 if (langav & LANGAV_ISO8859_6) 304 x++; 305 if (langav & LANGAV_ISO8859_7) 306 x++; 307 if (langav & LANGAV_ISO8859_9) 308 x++; 309 if (langav & LANGAV_W1250) 310 x++; 311 if (langav & LANGAV_W1251) 312 x++; 313 if ((langav & LANGAV_LATIN2W1250) && !(langav & LANGAV_LATIN2) && !(langav & LANGAV_W1250)) 314 x++; 315 if (x > 1) 316 { 317 #if 0 318 // I don't think this should be hard error, right? Some combinations may be problematic, but not all. 319 if (langav & LANGAV_LATIN_UTF8) 320 { 321 config_error("ERROR: set::allowed-nickchars: you cannot combine 'latin-utf8' with any other character set"); 322 errors++; 323 } 324 if (langav & LANGAV_GREEK_UTF8) 325 { 326 config_error("ERROR: set::allowed-nickchars: you cannot combine 'greek-utf8' with any other character set"); 327 errors++; 328 } 329 if (langav & LANGAV_CYRILLIC_UTF8) 330 { 331 config_error("ERROR: set::allowed-nickchars: you cannot combine 'cyrillic-utf8' with any other character set"); 332 errors++; 333 } 334 if (langav & LANGAV_HEBREW_UTF8) 335 { 336 config_error("ERROR: set::allowed-nickchars: you cannot combine 'hebrew-utf8' with any other character set"); 337 errors++; 338 } 339 if (langav & LANGAV_ARABIC_UTF8) 340 { 341 config_error("ERROR: set::allowed-nickchars: you cannot combine 'arabic-utf8' with any other character set"); 342 errors++; 343 } 344 #endif 345 config_status("WARNING: set::allowed-nickchars: Mixing of charsets (eg: latin1+latin2) may cause display problems"); 346 } 347 348 *errs = errors; 349 return errors ? -1 : 1; 350 } 351 352 /** Called on boot and just before config run */ 353 void charsys_reset(void) 354 { 355 int i; 356 MBList *m, *m_next; 357 358 /* First, reset everything */ 359 for (i=0; i < 256; i++) 360 char_atribs[i] &= ~ALLOWN; 361 for (m=mblist; m; m=m_next) 362 { 363 m_next = m->next; 364 safe_free(m); 365 } 366 mblist=mblist_tail=NULL; 367 /* Then add the default which will always be allowed */ 368 charsys_addallowed("0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyzy{|}"); 369 langav = 0; 370 langsinuse[0] = '\0'; 371 #ifdef DEBUGMODE 372 if (ilanglist) 373 abort(); 374 #endif 375 } 376 377 void charsys_reset_pretest(void) 378 { 379 langav = 0; 380 non_utf8_nick_chars_in_use = 0; 381 } 382 383 static inline void ilang_swap(ILangList *one, ILangList *two) 384 { 385 char *tmp = one->name; 386 one->name = two->name; 387 two->name = tmp; 388 } 389 390 static void ilang_sort(void) 391 { 392 ILangList *outer, *inner; 393 394 /* Selection sort -- perhaps optimize to qsort/whatever if 395 * possible? ;) 396 */ 397 for (outer=ilanglist; outer; outer=outer->next) 398 { 399 for (inner=outer->next; inner; inner=inner->next) 400 { 401 if (strcmp(outer->name, inner->name) > 0) 402 ilang_swap(outer, inner); 403 } 404 } 405 } 406 407 void charsys_finish(void) 408 { 409 ILangList *e, *e_next; 410 411 /* Sort alphabetically */ 412 ilang_sort(); 413 414 /* [note: this can be optimized] */ 415 langsinuse[0] = '\0'; 416 for (e=ilanglist; e; e=e->next) 417 { 418 strlcat(langsinuse, e->name, sizeof(langsinuse)); 419 if (e->next) 420 strlcat(langsinuse, ",", sizeof(langsinuse)); 421 } 422 423 /* Free everything */ 424 for (e=ilanglist; e; e=e_next) 425 { 426 e_next=e->next; 427 safe_free(e->name); 428 safe_free(e); 429 } 430 ilanglist = NULL; 431 #ifdef DEBUGMODE 432 if (strlen(langsinuse) > 490) 433 abort(); 434 #endif 435 charsys_check_for_changes(); 436 } 437 438 /** Add a character range to the multibyte list. 439 * Eg: charsys_addmultibyterange(0xaa, 0xbb, 0x00, 0xff) for 0xaa00-0xbbff. 440 * @param s1 Start of highest byte 441 * @param e1 End of highest byte 442 * @param s2 Start of lowest byte 443 * @param e2 End of lowest byte 444 */ 445 void charsys_addmultibyterange(char s1, char e1, char s2, char e2) 446 { 447 MBList *m = safe_alloc(sizeof(MBList)); 448 449 m->s1 = s1; 450 m->e1 = e1; 451 m->s2 = s2; 452 m->e2 = e2; 453 454 if (mblist_tail) 455 mblist_tail->next = m; 456 else 457 mblist = m; 458 mblist_tail = m; 459 } 460 461 /** Adds all characters in the specified string to the allowed list. */ 462 void charsys_addallowed(char *s) 463 { 464 for (; *s; s++) 465 { 466 if ((*s <= 32) || strchr(illegalnickchars, *s)) 467 { 468 config_error("INTERNAL ERROR: charsys_addallowed() called for illegal characters: %s", s); 469 #ifdef DEBUGMODE 470 abort(); 471 #endif 472 } 473 char_atribs[(unsigned char)*s] |= ALLOWN; 474 } 475 } 476 477 void charsys_addallowed_range(unsigned char from, unsigned char to) 478 { 479 unsigned char i; 480 481 for (i = from; i != to; i++) 482 char_atribs[i] |= ALLOWN; 483 } 484 485 int _do_nick_name(char *nick) 486 { 487 if (mblist) 488 return do_nick_name_multibyte(nick); 489 else 490 return do_nick_name_standard(nick); 491 } 492 493 static int do_nick_name_standard(char *nick) 494 { 495 int len; 496 char *ch; 497 498 if ((*nick == '-') || isdigit(*nick)) 499 return 0; 500 501 for (ch=nick,len=0; *ch && len <= NICKLEN; ch++, len++) 502 if (!isvalid(*ch)) 503 return 0; /* reject the full nick */ 504 *ch = '\0'; 505 return len; 506 } 507 508 static int isvalidmbyte(unsigned char c1, unsigned char c2) 509 { 510 MBList *m; 511 512 for (m=mblist; m; m=m->next) 513 { 514 if ((c1 >= m->s1) && (c1 <= m->e1) && 515 (c2 >= m->s2) && (c2 <= m->e2)) 516 return 1; 517 } 518 return 0; 519 } 520 521 /* hmmm.. there must be some problems with multibyte & 522 * other high ascii characters I think (such as german etc). 523 * Not sure if this can be solved? I don't think so... -- Syzop. 524 */ 525 static int do_nick_name_multibyte(char *nick) 526 { 527 int len; 528 char *ch; 529 int firstmbchar = 0; 530 531 if ((*nick == '-') || isdigit(*nick)) 532 return 0; 533 534 for (ch=nick,len=0; *ch && len <= NICKLEN; ch++, len++) 535 { 536 /* Some characters are ALWAYS illegal, so they have to be disallowed here */ 537 if ((*ch <= 32) || strchr(illegalnickchars, *ch)) 538 return 0; 539 if (firstmbchar) 540 { 541 if (!isvalidmbyte(ch[-1], *ch)) 542 return 0; 543 firstmbchar = 0; 544 } else if ((*ch) & 0x80) 545 firstmbchar = 1; 546 else if (!isvalid(*ch)) 547 return 0; 548 } 549 if (firstmbchar) 550 { 551 ch--; 552 len--; 553 } 554 *ch = '\0'; 555 return len; 556 } 557 558 /** Does some very basic checking on remote nickname. 559 * It's only purpose is not to cause the whole network 560 * to fall down in pieces, that's all. Display problems 561 * are not really handled here. They are assumed to have been 562 * checked by PROTOCTL NICKCHARS= -- Syzop. 563 */ 564 int _do_remote_nick_name(char *nick) 565 { 566 char *c; 567 568 /* Don't allow nicks to start with a digit, ever. */ 569 if ((*nick == '-') || isdigit(*nick)) 570 return 0; 571 572 /* Now the other, more relaxed checks.. */ 573 for (c=nick; *c; c++) 574 if ((*c <= 32) || strchr(illegalnickchars, *c)) 575 return 0; 576 577 return (c - nick); 578 } 579 580 static LangList *charsys_find_language(char *name) 581 { 582 int start = 0; 583 int stop = ARRAY_SIZEOF(langlist)-1; 584 int mid; 585 586 while (start <= stop) 587 { 588 mid = (start+stop)/2; 589 if (!langlist[mid].directive || smycmp(name, langlist[mid].directive) < 0) 590 stop = mid-1; 591 else if (strcmp(name, langlist[mid].directive) == 0) 592 return &langlist[mid]; 593 else 594 start = mid+1; 595 } 596 return NULL; 597 } 598 599 static LangList *charsys_find_language_code(char *code) 600 { 601 int i; 602 for (i = 0; langlist[i].code; i++) 603 if (!strcasecmp(langlist[i].code, code)) 604 return &langlist[i]; 605 return NULL; 606 } 607 608 /** Check if language is available. */ 609 int charsys_test_language(char *name) 610 { 611 LangList *l = charsys_find_language(name); 612 613 if (l) 614 { 615 langav |= l->setflags; 616 if (!(l->setflags & LANGAV_UTF8)) 617 non_utf8_nick_chars_in_use = 1; 618 return 1; 619 } 620 if (!strcmp(name, "euro-west")) 621 { 622 config_error("set::allowed-nickchars: ERROR: 'euro-west' got renamed to 'latin1'"); 623 return 0; 624 } 625 return 0; 626 } 627 628 static void charsys_doadd_language(char *name) 629 { 630 LangList *l; 631 ILangList *li; 632 int found; 633 char tmp[512], *lang, *p; 634 635 l = charsys_find_language(name); 636 if (!l) 637 { 638 #ifdef DEBUGMODE 639 abort(); 640 #endif 641 return; 642 } 643 644 strlcpy(tmp, l->code, sizeof(tmp)); 645 for (lang = strtoken(&p, tmp, ","); lang; lang = strtoken(&p, NULL, ",")) 646 { 647 /* Check if present... */ 648 found=0; 649 for (li=ilanglist; li; li=li->next) 650 if (!strcmp(li->name, lang)) 651 { 652 found = 1; 653 break; 654 } 655 if (!found) 656 { 657 /* Add... */ 658 li = safe_alloc(sizeof(ILangList)); 659 safe_strdup(li->name, lang); 660 AddListItem(li, ilanglist); 661 } 662 } 663 } 664 665 void charsys_add_language(char *name) 666 { 667 char latin1=0, latin2=0, w1250=0, w1251=0, chinese=0; 668 char latin_utf8=0, cyrillic_utf8=0; 669 670 /** Note: there could well be some characters missing in the lists below. 671 * While I've seen other altnernatives that just allow pretty much 672 * every accent that exists even for dutch (where we rarely use 673 * accents except for like 3 types), I rather prefer to use a bit more 674 * reasonable aproach ;). That said, anyone is welcome to make 675 * suggestions about characters that should be added (or removed) 676 * of course. -- Syzop 677 */ 678 679 /* Add our language to our list */ 680 charsys_doadd_language(name); 681 682 /* GROUPS */ 683 if (!strcmp(name, "latin-utf8")) 684 latin_utf8 = 1; 685 else if (!strcmp(name, "cyrillic-utf8")) 686 cyrillic_utf8 = 1; 687 else if (!strcmp(name, "latin1")) 688 latin1 = 1; 689 else if (!strcmp(name, "latin2")) 690 latin2 = 1; 691 else if (!strcmp(name, "windows-1250")) 692 w1250 = 1; 693 else if (!strcmp(name, "windows-1251")) 694 w1251 = 1; 695 else if (!strcmp(name, "chinese") || !strcmp(name, "gbk")) 696 chinese = 1; 697 698 /* INDIVIDUAL CHARSETS */ 699 700 /* [LATIN1] and [LATIN-UTF8] */ 701 if (latin1 || !strcmp(name, "german")) 702 { 703 /* a", A", o", O", u", U" and es-zett */ 704 charsys_addallowed(""); 705 } 706 if (latin_utf8 || !strcmp(name, "german-utf8")) 707 { 708 charsys_addmultibyterange(0xc3, 0xc3, 0x84, 0x84); 709 charsys_addmultibyterange(0xc3, 0xc3, 0x96, 0x96); 710 charsys_addmultibyterange(0xc3, 0xc3, 0x9c, 0x9c); 711 charsys_addmultibyterange(0xc3, 0xc3, 0x9f, 0x9f); 712 charsys_addmultibyterange(0xc3, 0xc3, 0xa4, 0xa4); 713 charsys_addmultibyterange(0xc3, 0xc3, 0xb6, 0xb6); 714 charsys_addmultibyterange(0xc3, 0xc3, 0xbc, 0xbc); 715 } 716 if (latin1 || !strcmp(name, "swiss-german")) 717 { 718 /* a", A", o", O", u", U" */ 719 charsys_addallowed(""); 720 } 721 if (latin_utf8 || !strcmp(name, "swiss-german-utf8")) 722 { 723 charsys_addmultibyterange(0xc3, 0xc3, 0x84, 0x84); 724 charsys_addmultibyterange(0xc3, 0xc3, 0x96, 0x96); 725 charsys_addmultibyterange(0xc3, 0xc3, 0x9c, 0x9c); 726 charsys_addmultibyterange(0xc3, 0xc3, 0xa4, 0xa4); 727 charsys_addmultibyterange(0xc3, 0xc3, 0xb6, 0xb6); 728 charsys_addmultibyterange(0xc3, 0xc3, 0xbc, 0xbc); 729 } 730 if (latin1 || !strcmp(name, "dutch")) 731 { 732 /* Ok, even though I'm Dutch myself, I've trouble getting 733 * a proper list of this ;). I think I got them all now, but 734 * I did not include "borrow-words" like words we use in Dutch 735 * that are literal French. So if you really want to use them all, 736 * I suggest you to use just latin1 :P. 737 */ 738 /* e', e", o", i", u", e`. */ 739 charsys_addallowed(""); 740 } 741 if (latin_utf8 || !strcmp(name, "dutch-utf8")) 742 { 743 charsys_addmultibyterange(0xc3, 0xc3, 0xa8, 0xa9); 744 charsys_addmultibyterange(0xc3, 0xc3, 0xab, 0xab); 745 charsys_addmultibyterange(0xc3, 0xc3, 0xaf, 0xaf); 746 charsys_addmultibyterange(0xc3, 0xc3, 0xb6, 0xb6); 747 charsys_addmultibyterange(0xc3, 0xc3, 0xbc, 0xbc); 748 } 749 if (latin1 || !strcmp(name, "danish")) 750 { 751 /* supplied by klaus: 752 * <ae>, <AE>, ao, Ao, o/, O/ */ 753 charsys_addallowed(""); 754 } 755 if (latin_utf8 || !strcmp(name, "danish-utf8")) 756 { 757 charsys_addmultibyterange(0xc3, 0xc3, 0x85, 0x86); 758 charsys_addmultibyterange(0xc3, 0xc3, 0x98, 0x98); 759 charsys_addmultibyterange(0xc3, 0xc3, 0xa5, 0xa6); 760 charsys_addmultibyterange(0xc3, 0xc3, 0xb8, 0xb8); 761 } 762 if (latin1 || !strcmp(name, "french")) 763 { 764 /* A`, A^, a`, a^, weird-C, weird-c, E`, E', E^, E", e`, e', e^, e", 765 * I^, I", i^, i", O^, o^, U`, U^, U", u`, u", u`, y" [not in that order, sry] 766 * Hmm.. there might be more, but I'm not sure how common they are 767 * and I don't think they are always displayed correctly (?). 768 */ 769 charsys_addallowed(""); 770 } 771 if (latin_utf8 || !strcmp(name, "french-utf8")) 772 { 773 charsys_addmultibyterange(0xc3, 0xc3, 0x80, 0x80); 774 charsys_addmultibyterange(0xc3, 0xc3, 0x82, 0x82); 775 charsys_addmultibyterange(0xc3, 0xc3, 0x87, 0x8b); 776 charsys_addmultibyterange(0xc3, 0xc3, 0x8e, 0x8f); 777 charsys_addmultibyterange(0xc3, 0xc3, 0x94, 0x94); 778 charsys_addmultibyterange(0xc3, 0xc3, 0x99, 0x99); 779 charsys_addmultibyterange(0xc3, 0xc3, 0x9b, 0x9c); 780 charsys_addmultibyterange(0xc3, 0xc3, 0xa0, 0xa0); 781 charsys_addmultibyterange(0xc3, 0xc3, 0xa2, 0xa2); 782 charsys_addmultibyterange(0xc3, 0xc3, 0xa7, 0xab); 783 charsys_addmultibyterange(0xc3, 0xc3, 0xae, 0xaf); 784 charsys_addmultibyterange(0xc3, 0xc3, 0xb4, 0xb4); 785 charsys_addmultibyterange(0xc3, 0xc3, 0xb9, 0xb9); 786 charsys_addmultibyterange(0xc3, 0xc3, 0xbb, 0xbc); 787 charsys_addmultibyterange(0xc3, 0xc3, 0xbf, 0xbf); 788 } 789 if (latin1 || !strcmp(name, "spanish")) 790 { 791 /* a', A', e', E', i', I', o', O', u', U', u", U", n~, N~ */ 792 charsys_addallowed(""); 793 } 794 if (latin_utf8 || !strcmp(name, "spanish-utf8")) 795 { 796 charsys_addmultibyterange(0xc3, 0xc3, 0x81, 0x81); 797 charsys_addmultibyterange(0xc3, 0xc3, 0x89, 0x89); 798 charsys_addmultibyterange(0xc3, 0xc3, 0x8d, 0x8d); 799 charsys_addmultibyterange(0xc3, 0xc3, 0x91, 0x91); 800 charsys_addmultibyterange(0xc3, 0xc3, 0x93, 0x93); 801 charsys_addmultibyterange(0xc3, 0xc3, 0x9a, 0x9a); 802 charsys_addmultibyterange(0xc3, 0xc3, 0x9c, 0x9c); 803 charsys_addmultibyterange(0xc3, 0xc3, 0xa1, 0xa1); 804 charsys_addmultibyterange(0xc3, 0xc3, 0xa9, 0xa9); 805 charsys_addmultibyterange(0xc3, 0xc3, 0xad, 0xad); 806 charsys_addmultibyterange(0xc3, 0xc3, 0xb1, 0xb1); 807 charsys_addmultibyterange(0xc3, 0xc3, 0xb3, 0xb3); 808 charsys_addmultibyterange(0xc3, 0xc3, 0xba, 0xba); 809 charsys_addmultibyterange(0xc3, 0xc3, 0xbc, 0xbc); 810 } 811 if (latin1 || !strcmp(name, "italian")) 812 { 813 /* A`, E`, E', I`, I', O`, O', U`, U', a`, e`, e', i`, i', o`, o', u`, u' */ 814 charsys_addallowed(""); 815 } 816 if (latin_utf8 || !strcmp(name, "italian-utf8")) 817 { 818 charsys_addmultibyterange(0xc3, 0xc3, 0x80, 0x80); 819 charsys_addmultibyterange(0xc3, 0xc3, 0x88, 0x89); 820 charsys_addmultibyterange(0xc3, 0xc3, 0x8c, 0x8d); 821 charsys_addmultibyterange(0xc3, 0xc3, 0x92, 0x93); 822 charsys_addmultibyterange(0xc3, 0xc3, 0x99, 0x9a); 823 charsys_addmultibyterange(0xc3, 0xc3, 0xa0, 0xa0); 824 charsys_addmultibyterange(0xc3, 0xc3, 0xa8, 0xa9); 825 charsys_addmultibyterange(0xc3, 0xc3, 0xac, 0xad); 826 charsys_addmultibyterange(0xc3, 0xc3, 0xb2, 0xb3); 827 charsys_addmultibyterange(0xc3, 0xc3, 0xb9, 0xba); 828 } 829 if (latin1 || !strcmp(name, "catalan")) 830 { 831 /* supplied by Trocotronic */ 832 /* a`, A`, e`, weird-c, weird-C, E`, e', E', i', I', o`, O`, o', O', u', U', i", I", u", U", weird-dot */ 833 charsys_addallowed(""); 834 } 835 if (latin_utf8 || !strcmp(name, "catalan-utf8")) 836 { 837 charsys_addmultibyterange(0xc3, 0xc3, 0x80, 0x80); 838 charsys_addmultibyterange(0xc3, 0xc3, 0x87, 0x89); 839 charsys_addmultibyterange(0xc3, 0xc3, 0x8d, 0x8d); 840 charsys_addmultibyterange(0xc3, 0xc3, 0x8f, 0x8f); 841 charsys_addmultibyterange(0xc3, 0xc3, 0x92, 0x93); 842 charsys_addmultibyterange(0xc3, 0xc3, 0x9a, 0x9a); 843 charsys_addmultibyterange(0xc3, 0xc3, 0x9c, 0x9c); 844 charsys_addmultibyterange(0xc3, 0xc3, 0xa0, 0xa0); 845 charsys_addmultibyterange(0xc3, 0xc3, 0xa7, 0xa9); 846 charsys_addmultibyterange(0xc3, 0xc3, 0xad, 0xad); 847 charsys_addmultibyterange(0xc3, 0xc3, 0xaf, 0xaf); 848 charsys_addmultibyterange(0xc3, 0xc3, 0xb2, 0xb3); 849 charsys_addmultibyterange(0xc3, 0xc3, 0xba, 0xba); 850 } 851 if (latin1 || !strcmp(name, "swedish")) 852 { 853 /* supplied by Tank */ 854 /* ao, Ao, a", A", o", O" */ 855 charsys_addallowed(""); 856 } 857 if (latin_utf8 || !strcmp(name, "swedish-utf8")) 858 { 859 charsys_addmultibyterange(0xc3, 0xc3, 0x84, 0x85); 860 charsys_addmultibyterange(0xc3, 0xc3, 0x96, 0x96); 861 charsys_addmultibyterange(0xc3, 0xc3, 0xa4, 0xa5); 862 charsys_addmultibyterange(0xc3, 0xc3, 0xb6, 0xb6); 863 } 864 if (latin1 || !strcmp(name, "icelandic")) 865 { 866 /* supplied by Saevar */ 867 charsys_addallowed(""); 868 } 869 if (latin_utf8 || !strcmp(name, "icelandic-utf8")) 870 { 871 charsys_addmultibyterange(0xc3, 0xc3, 0x81, 0x81); 872 charsys_addmultibyterange(0xc3, 0xc3, 0x86, 0x86); 873 charsys_addmultibyterange(0xc3, 0xc3, 0x8d, 0x8d); 874 charsys_addmultibyterange(0xc3, 0xc3, 0x90, 0x90); 875 charsys_addmultibyterange(0xc3, 0xc3, 0x93, 0x93); 876 charsys_addmultibyterange(0xc3, 0xc3, 0x96, 0x96); 877 charsys_addmultibyterange(0xc3, 0xc3, 0x9a, 0x9a); 878 charsys_addmultibyterange(0xc3, 0xc3, 0x9d, 0x9e); 879 charsys_addmultibyterange(0xc3, 0xc3, 0xa1, 0xa1); 880 charsys_addmultibyterange(0xc3, 0xc3, 0xa6, 0xa6); 881 charsys_addmultibyterange(0xc3, 0xc3, 0xad, 0xad); 882 charsys_addmultibyterange(0xc3, 0xc3, 0xb0, 0xb0); 883 charsys_addmultibyterange(0xc3, 0xc3, 0xb3, 0xb3); 884 charsys_addmultibyterange(0xc3, 0xc3, 0xb6, 0xb6); 885 charsys_addmultibyterange(0xc3, 0xc3, 0xba, 0xba); 886 charsys_addmultibyterange(0xc3, 0xc3, 0xbd, 0xbe); 887 } 888 889 /* [LATIN2] and rest of [LATIN-UTF8] */ 890 /* actually hungarian is a special case, include it in both w1250 and latin2 ;p */ 891 if (latin2 || w1250 || !strcmp(name, "hungarian")) 892 { 893 /* supplied by AngryWolf */ 894 /* a', e', i', o', o", o~, u', u", u~, A', E', I', O', O", O~, U', U", U~ */ 895 charsys_addallowed(""); 896 } 897 if (latin_utf8 || !strcmp(name, "hungarian-utf8")) 898 { 899 charsys_addmultibyterange(0xc3, 0xc3, 0x81, 0x81); 900 charsys_addmultibyterange(0xc3, 0xc3, 0x89, 0x89); 901 charsys_addmultibyterange(0xc3, 0xc3, 0x8d, 0x8d); 902 charsys_addmultibyterange(0xc3, 0xc3, 0x93, 0x93); 903 charsys_addmultibyterange(0xc3, 0xc3, 0x96, 0x96); 904 charsys_addmultibyterange(0xc3, 0xc3, 0x9a, 0x9a); 905 charsys_addmultibyterange(0xc3, 0xc3, 0x9c, 0x9c); 906 charsys_addmultibyterange(0xc3, 0xc3, 0xa1, 0xa1); 907 charsys_addmultibyterange(0xc3, 0xc3, 0xa9, 0xa9); 908 charsys_addmultibyterange(0xc3, 0xc3, 0xad, 0xad); 909 charsys_addmultibyterange(0xc3, 0xc3, 0xb3, 0xb3); 910 charsys_addmultibyterange(0xc3, 0xc3, 0xb6, 0xb6); 911 charsys_addmultibyterange(0xc3, 0xc3, 0xba, 0xba); 912 charsys_addmultibyterange(0xc3, 0xc3, 0xbc, 0xbc); 913 charsys_addmultibyterange(0xc5, 0xc5, 0x90, 0x91); 914 charsys_addmultibyterange(0xc5, 0xc5, 0xb0, 0xb1); 915 } 916 /* same is true for romanian: latin2 & w1250 compatible */ 917 if (latin2 || w1250 || !strcmp(name, "romanian")) 918 { 919 /* With some help from crazytoon */ 920 /* 'S,' 's,' 'A^' 'A<' 'I^' 'T,' 'a^' 'a<' 'i^' 't,' */ 921 charsys_addallowed(""); 922 } 923 if (latin_utf8 || !strcmp(name, "romanian-utf8")) 924 { 925 charsys_addmultibyterange(0xc3, 0xc3, 0x82, 0x82); 926 charsys_addmultibyterange(0xc3, 0xc3, 0x8e, 0x8e); 927 charsys_addmultibyterange(0xc3, 0xc3, 0xa2, 0xa2); 928 charsys_addmultibyterange(0xc3, 0xc3, 0xae, 0xae); 929 charsys_addmultibyterange(0xc4, 0xc4, 0x82, 0x83); 930 charsys_addmultibyterange(0xc5, 0xc5, 0x9e, 0x9f); 931 charsys_addmultibyterange(0xc5, 0xc5, 0xa2, 0xa3); 932 } 933 934 if (latin2 || !strcmp(name, "polish")) 935 { 936 /* supplied by k4be */ 937 charsys_addallowed("ʣӦ"); 938 } 939 if (latin_utf8 || !strcmp(name, "polish-utf8")) 940 { 941 charsys_addmultibyterange(0xc3, 0xc3, 0x93, 0x93); 942 charsys_addmultibyterange(0xc3, 0xc3, 0xb3, 0xb3); 943 charsys_addmultibyterange(0xc4, 0xc4, 0x84, 0x87); 944 charsys_addmultibyterange(0xc4, 0xc4, 0x98, 0x99); 945 charsys_addmultibyterange(0xc5, 0xc5, 0x81, 0x84); 946 charsys_addmultibyterange(0xc5, 0xc5, 0x9a, 0x9b); 947 charsys_addmultibyterange(0xc5, 0xc5, 0xb9, 0xbc); 948 } 949 /* [windows 1250] */ 950 if (w1250 || !strcmp(name, "polish-w1250")) 951 { 952 /* supplied by k4be */ 953 charsys_addallowed("ʣӌ"); 954 } 955 if (w1250 || !strcmp(name, "czech-w1250")) 956 { 957 /* Syzop [probably incomplete] */ 958 charsys_addallowed(""); 959 } 960 if (latin_utf8 || !strcmp(name, "czech-utf8")) 961 { 962 charsys_addmultibyterange(0xc3, 0xc3, 0x81, 0x81); 963 charsys_addmultibyterange(0xc3, 0xc3, 0x89, 0x89); 964 charsys_addmultibyterange(0xc3, 0xc3, 0x8d, 0x8d); 965 charsys_addmultibyterange(0xc3, 0xc3, 0x93, 0x93); 966 charsys_addmultibyterange(0xc3, 0xc3, 0x9a, 0x9a); 967 charsys_addmultibyterange(0xc3, 0xc3, 0x9d, 0x9d); 968 charsys_addmultibyterange(0xc3, 0xc3, 0xa1, 0xa1); 969 charsys_addmultibyterange(0xc3, 0xc3, 0xa9, 0xa9); 970 charsys_addmultibyterange(0xc3, 0xc3, 0xad, 0xad); 971 charsys_addmultibyterange(0xc3, 0xc3, 0xb3, 0xb3); 972 charsys_addmultibyterange(0xc3, 0xc3, 0xba, 0xba); 973 charsys_addmultibyterange(0xc3, 0xc3, 0xbd, 0xbd); 974 charsys_addmultibyterange(0xc4, 0xc4, 0x8c, 0x8f); 975 charsys_addmultibyterange(0xc4, 0xc4, 0x9a, 0x9b); 976 charsys_addmultibyterange(0xc5, 0xc5, 0x87, 0x88); 977 charsys_addmultibyterange(0xc5, 0xc5, 0x98, 0x99); 978 charsys_addmultibyterange(0xc5, 0xc5, 0xa0, 0xa1); 979 charsys_addmultibyterange(0xc5, 0xc5, 0xa4, 0xa5); 980 charsys_addmultibyterange(0xc5, 0xc5, 0xae, 0xaf); 981 charsys_addmultibyterange(0xc5, 0xc5, 0xbd, 0xbe); 982 } 983 if (w1250 || !strcmp(name, "slovak-w1250")) 984 { 985 /* Syzop [probably incomplete] */ 986 charsys_addallowed(""); 987 } 988 if (latin_utf8 || !strcmp(name, "slovak-utf8")) 989 { 990 charsys_addmultibyterange(0xc3, 0xc3, 0x81, 0x81); 991 charsys_addmultibyterange(0xc3, 0xc3, 0x84, 0x84); 992 charsys_addmultibyterange(0xc3, 0xc3, 0x89, 0x89); 993 charsys_addmultibyterange(0xc3, 0xc3, 0x8d, 0x8d); 994 charsys_addmultibyterange(0xc3, 0xc3, 0xa1, 0xa1); 995 charsys_addmultibyterange(0xc3, 0xc3, 0xa4, 0xa4); 996 charsys_addmultibyterange(0xc3, 0xc3, 0xa9, 0xa9); 997 charsys_addmultibyterange(0xc3, 0xc3, 0xad, 0xad); 998 charsys_addmultibyterange(0xc3, 0xc3, 0xb3, 0xb4); 999 charsys_addmultibyterange(0xc3, 0xc3, 0xba, 0xba); 1000 charsys_addmultibyterange(0xc3, 0xc3, 0xbd, 0xbd); 1001 charsys_addmultibyterange(0xc4, 0xc4, 0x8c, 0x8f); 1002 charsys_addmultibyterange(0xc4, 0xc4, 0xb9, 0xba); 1003 charsys_addmultibyterange(0xc4, 0xc4, 0xbd, 0xbe); 1004 charsys_addmultibyterange(0xc5, 0xc5, 0x88, 0x88); 1005 charsys_addmultibyterange(0xc5, 0xc5, 0x94, 0x95); 1006 charsys_addmultibyterange(0xc5, 0xc5, 0xa0, 0xa1); 1007 charsys_addmultibyterange(0xc5, 0xc5, 0xa4, 0xa5); 1008 charsys_addmultibyterange(0xc5, 0xc5, 0xbd, 0xbe); 1009 } 1010 1011 /* [windows 1251] */ 1012 if (w1251 || !strcmp(name, "russian-w1251")) 1013 { 1014 /* supplied by Roman Parkin: 1015 * 128-159 and 223-254 1016 */ 1017 charsys_addallowed(""); 1018 } 1019 if (cyrillic_utf8 || !strcmp(name, "russian-utf8")) 1020 { 1021 charsys_addmultibyterange(0xd0, 0xd0, 0x81, 0x81); 1022 charsys_addmultibyterange(0xd0, 0xd0, 0x90, 0xbf); 1023 charsys_addmultibyterange(0xd1, 0xd1, 0x80, 0x8f); 1024 charsys_addmultibyterange(0xd1, 0xd1, 0x91, 0x91); 1025 } 1026 1027 if (w1251 || !strcmp(name, "belarussian-w1251")) 1028 { 1029 /* supplied by Bock (Samets Anton) & ss: 1030 * 128-159, 161, 162, 178, 179 and 223-254 1031 * Corrected 01.11.2006 to more "correct" behavior by Bock 1032 */ 1033 charsys_addallowed("ŨDzӡ"); 1034 } 1035 if (cyrillic_utf8 || !strcmp(name, "belarussian-utf8")) 1036 { 1037 charsys_addmultibyterange(0xd0, 0xd0, 0x81, 0x81); 1038 charsys_addmultibyterange(0xd0, 0xd0, 0x86, 0x86); 1039 charsys_addmultibyterange(0xd0, 0xd0, 0x8e, 0x8e); 1040 charsys_addmultibyterange(0xd0, 0xd0, 0x90, 0x97); 1041 charsys_addmultibyterange(0xd0, 0xd0, 0x99, 0xa8); 1042 charsys_addmultibyterange(0xd0, 0xd0, 0xab, 0xb7); 1043 charsys_addmultibyterange(0xd0, 0xd0, 0xb9, 0xbf); 1044 charsys_addmultibyterange(0xd1, 0xd1, 0x80, 0x88); 1045 charsys_addmultibyterange(0xd1, 0xd1, 0x8b, 0x8f); 1046 charsys_addmultibyterange(0xd1, 0xd1, 0x91, 0x91); 1047 charsys_addmultibyterange(0xd1, 0xd1, 0x96, 0x96); 1048 charsys_addmultibyterange(0xd1, 0xd1, 0x9e, 0x9e); 1049 } 1050 1051 if (w1251 || !strcmp(name, "ukrainian-w1251")) 1052 { 1053 /* supplied by Anton Samets & ss: 1054 * 128-159, 170, 175, 178, 179, 186, 191 and 223-254 1055 * Corrected 01.11.2006 to more "correct" behavior by core 1056 */ 1057 charsys_addallowed("åŪȲ賿"); 1058 } 1059 if (cyrillic_utf8 || !strcmp(name, "ukrainian-utf8")) 1060 { 1061 charsys_addmultibyterange(0xd0, 0xd0, 0x84, 0x84); 1062 charsys_addmultibyterange(0xd0, 0xd0, 0x86, 0x87); 1063 charsys_addmultibyterange(0xd0, 0xd0, 0x90, 0xa9); 1064 charsys_addmultibyterange(0xd0, 0xd0, 0xac, 0xac); 1065 charsys_addmultibyterange(0xd0, 0xd0, 0xae, 0xbf); 1066 charsys_addmultibyterange(0xd1, 0xd1, 0x80, 0x89); 1067 charsys_addmultibyterange(0xd1, 0xd1, 0x8c, 0x8c); 1068 charsys_addmultibyterange(0xd1, 0xd1, 0x8e, 0x8f); 1069 charsys_addmultibyterange(0xd1, 0xd1, 0x94, 0x94); 1070 charsys_addmultibyterange(0xd1, 0xd1, 0x96, 0x97); 1071 charsys_addmultibyterange(0xd2, 0xd2, 0x90, 0x91); 1072 } 1073 1074 /* [GREEK] */ 1075 if (!strcmp(name, "greek")) 1076 { 1077 /* supplied by GSF */ 1078 /* ranges from rfc1947 / iso 8859-7 */ 1079 charsys_addallowed(""); 1080 } 1081 if (!strcmp(name, "greek-utf8")) 1082 { 1083 charsys_addmultibyterange(0xce, 0xce, 0x86, 0x86); 1084 charsys_addmultibyterange(0xce, 0xce, 0x88, 0x8a); 1085 charsys_addmultibyterange(0xce, 0xce, 0x8c, 0x8c); 1086 charsys_addmultibyterange(0xce, 0xce, 0x8e, 0xa1); 1087 charsys_addmultibyterange(0xce, 0xce, 0xa3, 0xbf); 1088 charsys_addmultibyterange(0xcf, 0xcf, 0x80, 0x84); 1089 } 1090 1091 /* [TURKISH] */ 1092 if (!strcmp(name, "turkish")) 1093 { 1094 /* Supplied by Ayberk Yancatoral */ 1095 charsys_addallowed(""); 1096 } 1097 if (!strcmp(name, "turkish-utf8")) 1098 { 1099 charsys_addmultibyterange(0xc3, 0xc3, 0x87, 0x87); 1100 charsys_addmultibyterange(0xc3, 0xc3, 0x96, 0x96); 1101 charsys_addmultibyterange(0xc3, 0xc3, 0x9c, 0x9c); 1102 charsys_addmultibyterange(0xc3, 0xc3, 0xa7, 0xa7); 1103 charsys_addmultibyterange(0xc3, 0xc3, 0xb6, 0xb6); 1104 charsys_addmultibyterange(0xc3, 0xc3, 0xbc, 0xbc); 1105 charsys_addmultibyterange(0xc4, 0xc4, 0x9e, 0x9f); 1106 charsys_addmultibyterange(0xc4, 0xc4, 0xb1, 0xb1); 1107 charsys_addmultibyterange(0xc5, 0xc5, 0x9e, 0x9f); 1108 } 1109 1110 /* [HEBREW] */ 1111 if (!strcmp(name, "hebrew")) 1112 { 1113 /* Supplied by PHANTOm. */ 1114 /* 0xE0 - 0xFE */ 1115 charsys_addallowed(""); 1116 } 1117 if (!strcmp(name, "hebrew-utf8")) 1118 { 1119 /* Supplied by Lion-O */ 1120 charsys_addmultibyterange(0xd7, 0xd7, 0x90, 0xaa); 1121 } 1122 1123 /* [CHINESE] */ 1124 if (chinese || !strcmp(name, "chinese-ja")) 1125 { 1126 charsys_addmultibyterange(0xa4, 0xa4, 0xa1, 0xf3); /* JIS_PIN */ 1127 charsys_addmultibyterange(0xa5, 0xa5, 0xa1, 0xf6); /* JIS_PIN */ 1128 } 1129 if (chinese || !strcmp(name, "chinese-simp")) 1130 { 1131 charsys_addmultibyterange(0xb0, 0xd6, 0xa1, 0xfe); /* GBK/2 BC with GB2312 */ 1132 charsys_addmultibyterange(0xd7, 0xd7, 0xa1, 0xf9); /* GBK/2 BC with GB2312 */ 1133 charsys_addmultibyterange(0xd8, 0xf7, 0xa1, 0xfe); /* GBK/2 BC with GB2312 */ 1134 } 1135 if (chinese || !strcmp(name, "chinese-trad")) 1136 { 1137 charsys_addmultibyterange(0x81, 0xa0, 0x40, 0x7e); /* GBK/3 - lower half */ 1138 charsys_addmultibyterange(0x81, 0xa0, 0x80, 0xfe); /* GBK/3 - upper half */ 1139 charsys_addmultibyterange(0xaa, 0xfe, 0x40, 0x7e); /* GBK/4 - lower half */ 1140 charsys_addmultibyterange(0xaa, 0xfe, 0x80, 0xa0); /* GBK/4 - upper half */ 1141 } 1142 1143 /* [LATVIAN] */ 1144 if (latin_utf8 || !strcmp(name, "latvian-utf8")) 1145 { 1146 /* A a, C c, E e, G g, I i, K k, , U u, */ 1147 charsys_addmultibyterange(0xc4, 0xc4, 0x80, 0x81); 1148 charsys_addmultibyterange(0xc4, 0xc4, 0x92, 0x93); 1149 charsys_addmultibyterange(0xc4, 0xc4, 0x8c, 0x8d); 1150 charsys_addmultibyterange(0xc4, 0xc4, 0x92, 0x93); 1151 charsys_addmultibyterange(0xc4, 0xc4, 0xa2, 0xa3); 1152 charsys_addmultibyterange(0xc4, 0xc4, 0xaa, 0xab); 1153 charsys_addmultibyterange(0xc4, 0xc4, 0xb6, 0xb7); 1154 charsys_addmultibyterange(0xc5, 0xc5, 0xa0, 0xa1); 1155 charsys_addmultibyterange(0xc5, 0xc5, 0xaa, 0xab); 1156 charsys_addmultibyterange(0xc5, 0xc5, 0xbd, 0xbe); 1157 } 1158 1159 /* [ESTONIAN] */ 1160 if (latin_utf8 || !strcmp(name, "estonian-utf8")) 1161 { 1162 /* , , , , , , , */ 1163 charsys_addmultibyterange(0xc3, 0xc3, 0xb5, 0xb6); 1164 charsys_addmultibyterange(0xc3, 0xc3, 0xa4, 0xa4); 1165 charsys_addmultibyterange(0xc3, 0xc3, 0xbc, 0xbc); 1166 charsys_addmultibyterange(0xc3, 0xc3, 0x95, 0x96); 1167 charsys_addmultibyterange(0xc3, 0xc3, 0x84, 0x84); 1168 charsys_addmultibyterange(0xc3, 0xc3, 0x9c, 0x9c); 1169 } 1170 1171 /* [LITHUANIAN] */ 1172 if (latin_utf8 || !strcmp(name, "lithuanian-utf8")) 1173 { 1174 /* a, c, e, e, i, , u, u, , A, C, E, E, I, , U, U, */ 1175 charsys_addmultibyterange(0xc4, 0xc4, 0x84, 0x85); 1176 charsys_addmultibyterange(0xc4, 0xc4, 0x8c, 0x8d); 1177 charsys_addmultibyterange(0xc4, 0xc4, 0x96, 0x99); 1178 charsys_addmultibyterange(0xc4, 0xc4, 0xae, 0xaf); 1179 charsys_addmultibyterange(0xc4, 0xc4, 0xae, 0xaf); 1180 charsys_addmultibyterange(0xc5, 0xc5, 0xa0, 0xa1); 1181 charsys_addmultibyterange(0xc5, 0xc5, 0xb2, 0xb3); 1182 charsys_addmultibyterange(0xc5, 0xc5, 0xaa, 0xab); 1183 charsys_addmultibyterange(0xc5, 0xc5, 0xbd, 0xbe); 1184 } 1185 1186 /* [ARABIC] */ 1187 if (latin_utf8 || !strcmp(name, "arabic-utf8")) 1188 { 1189 /* Supplied by Sensiva */ 1190 /*charsys_addallowed("اأإآءبتثجحخدذرزسشصضطظعغفقكلمنهؤةويىئ");*/ 1191 /*- From U+0621 to U+063A (Regex: [\u0621-\u063A])*/ 1192 /* 0xd8a1 - 0xd8ba */ 1193 charsys_addmultibyterange(0xd8, 0xd8, 0xa1, 0xba); 1194 /*- From U+0641 to U+064A (Regex: [\u0641-\u064A])*/ 1195 /* 0xd981 - 0xd98a */ 1196 charsys_addmultibyterange(0xd9, 0xd9, 0x81, 0x8a); 1197 } 1198 } 1199 1200 /** This displays all the nick characters that are permitted */ 1201 char *charsys_displaychars(void) 1202 { 1203 #if 0 1204 MBList *m; 1205 unsigned char hibyte, lobyte; 1206 #endif 1207 static char buf[512]; 1208 int n = 0; 1209 int i, j; 1210 1211 // char_atribs[(unsigned char)*s] |= ALLOWN; 1212 for (i = 0; i <= 255; i++) 1213 { 1214 if (char_atribs[i] & ALLOWN) 1215 buf[n++] = i; 1216 /* (no bounds checking: first 255 characters always fit a 512 byte buffer) */ 1217 } 1218 1219 #if 0 1220 for (m=mblist; m; m=m->next) 1221 { 1222 for (hibyte = m->s1; hibyte <= m->e1; hibyte++) 1223 { 1224 for (lobyte = m->s2; lobyte <= m->e2; lobyte++) 1225 { 1226 if (n >= sizeof(buf) - 3) 1227 break; // break, or an attempt anyway 1228 buf[n++] = hibyte; 1229 buf[n++] = lobyte; 1230 } 1231 } 1232 } 1233 #endif 1234 /* above didn't work due to multiple overlapping ranges permitted. 1235 * try this instead (lazy).. this is only used in DEBUGMODE 1236 * via a command line option anyway: 1237 */ 1238 for (i=0; i <= 255; i++) 1239 { 1240 for (j=0; j <= 255; j++) 1241 { 1242 if (isvalidmbyte(i, j)) 1243 { 1244 if (n >= sizeof(buf) - 3) 1245 break; // break, or an attempt anyway 1246 buf[n++] = i; 1247 buf[n++] = j; 1248 } 1249 } 1250 } 1251 1252 buf[n] = '\0'; /* there's always room for a NUL */ 1253 1254 return buf; 1255 } 1256 1257 char *charsys_group(int v) 1258 { 1259 if (v & LANGAV_LATIN_UTF8) 1260 return "Latin script"; 1261 if (v & LANGAV_CYRILLIC_UTF8) 1262 return "Cyrillic script"; 1263 if (v & LANGAV_GREEK_UTF8) 1264 return "Greek script"; 1265 if (v & LANGAV_HEBREW_UTF8) 1266 return "Hebrew script"; 1267 if (v & LANGAV_ARABIC_UTF8) 1268 return "Arabic script"; 1269 1270 return "Other"; 1271 } 1272 1273 void charsys_dump_table(char *filter) 1274 { 1275 int i = 0; 1276 1277 for (i = 0; langlist[i].directive; i++) 1278 { 1279 char *charset = langlist[i].directive; 1280 1281 if (!match_simple(filter, charset)) 1282 continue; /* skip */ 1283 1284 charsys_reset(); 1285 charsys_add_language(charset); 1286 charsys_finish(); 1287 printf("%s;%s;%s\n", charset, charsys_group(langlist[i].setflags), charsys_displaychars()); 1288 } 1289 } 1290 1291 /** Get current languages (the 'langsinuse' variable) */ 1292 char *_charsys_get_current_languages(void) 1293 { 1294 return langsinuse; 1295 }