unrealircd- supernets unrealircd source & configuration |
git clone git://git.acid.vegas/unrealircd.git |
Log | Files | Refs | Archive | README | LICENSE |
match.c (23974B)
1 /* 2 * Unreal Internet Relay Chat Daemon, src/match.c 3 * Copyright (C) 1990 Jarkko Oikarinen 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 1, or (at your option) 8 * any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 18 */ 19 20 21 #include "unrealircd.h" 22 23 ID_Copyright("(C) 1990 Jarkko Oikarinen"); 24 25 /* 26 * Compare if a given string (name) matches the given 27 * mask (which can contain wild cards: '*' - match any 28 * number of chars, '?' - match any single character. 29 * 30 * return 0, if match 31 * 1, if no match 32 */ 33 34 u_char touppertab[], tolowertab[]; 35 #define tolowertab2 tolowertab 36 #define lc(x) tolowertab2[x] 37 38 /* Match routine for special cases where escaping is needed in a normal fashion. 39 * Checks a string ('name') against a globbing(+more) pattern ('mask'). 40 * Original by Douglas A Lewis (dalewis@acsu.buffalo.edu). 41 * Code based on hybrid7's version (match_esc()). 42 * Various modifications by Bram Matthys (Syzop). 43 * Returns 1 on match and 0 for no match. 44 * Instead of our previous code, this one is less optimized but actually _readable_ ;). 45 * Modifications I (Syzop) had to do vs the hybrid7 code: 46 * - Got rid of (u_char *) casts, since we already compile with 47 * chars defaulting to unsigned [or else major things break] ;). 48 * - Support for '_'. 49 * - Rip out support for '#'. 50 */ 51 int match_esc(const char *mask, const char *name) 52 { 53 const u_char *m = mask; 54 const u_char *n = name; 55 const u_char *ma = NULL; 56 const u_char *na = name; 57 58 while(1) 59 { 60 if (*m == '*') 61 { 62 while (*m == '*') /* collapse.. */ 63 m++; 64 ma = m; 65 na = n; 66 } 67 68 if (!*m) 69 { 70 if (!*n) 71 return 1; 72 if (!ma) 73 return 0; 74 for (m--; (m > (const u_char *)mask) && (*m == '?'); m--); 75 if (*m == '*') 76 return 1; 77 m = ma; 78 n = ++na; 79 } else 80 if (!*n) 81 { 82 while (*m == '*') /* collapse.. */ 83 m++; 84 return (*m == 0); 85 } 86 87 if (*m != '?') 88 { 89 if (*m == '\\') 90 if (!*++m) 91 return 0; /* unfinished escape sequence */ 92 if ((lc(*m) != lc(*n)) && !((*m == '_') && (*n == ' '))) 93 { 94 if (!ma) 95 return 0; 96 m = ma; 97 n = ++na; 98 } else 99 { 100 m++; 101 n++; 102 } 103 } else 104 { 105 m++; 106 n++; 107 } 108 } 109 return 0; 110 } 111 112 /** Same credit/copyright as match_esc() applies, except escaping removed.. ;p */ 113 int match_simple(const char *mask, const char *name) 114 { 115 const u_char *m = mask; 116 const u_char *n = name; 117 const u_char *ma = NULL; 118 const u_char *na = name; 119 120 while(1) 121 { 122 if (*m == '*') 123 { 124 while (*m == '*') /* collapse.. */ 125 m++; 126 ma = m; 127 na = n; 128 } 129 130 if (!*m) 131 { 132 if (!*n) 133 return 1; 134 if (!ma) 135 return 0; 136 for (m--; (m > (const u_char *)mask) && (*m == '?'); m--); 137 if (*m == '*') 138 return 1; 139 m = ma; 140 n = ++na; 141 } else 142 if (!*n) 143 { 144 while (*m == '*') /* collapse.. */ 145 m++; 146 return (*m == 0); 147 } 148 149 if ((lc(*m) != lc(*n)) && !((*m == '_') && (*n == ' ')) && (*m != '?')) 150 { 151 if (!ma) 152 return 0; 153 m = ma; 154 n = ++na; 155 } else 156 { 157 m++; 158 n++; 159 } 160 } 161 return 0; 162 } 163 164 /* 165 * collapse a pattern string into minimal components. 166 * This particular version is "in place", so that it changes the pattern 167 * which is to be reduced to a "minimal" size. 168 */ 169 char *collapse(char *pattern) 170 { 171 char *s; 172 char *s1; 173 char *t; 174 175 s = pattern; 176 177 if (BadPtr(pattern)) 178 return pattern; 179 /* 180 * Collapse all \** into \*, \*[?]+\** into \*[?]+ 181 */ 182 for (; *s; s++) 183 if (*s == '\\') 184 { 185 if (!*(s + 1)) 186 break; 187 else 188 s++; 189 } 190 else if (*s == '*') 191 { 192 if (*(t = s1 = s + 1) == '*') 193 while (*t == '*') 194 t++; 195 else if (*t == '?') 196 for (t++, s1++; *t == '*' || *t == '?'; t++) 197 if (*t == '?') 198 *s1++ = *t; 199 while ((*s1++ = *t++)) 200 ; 201 } 202 return pattern; 203 } 204 205 206 /* Case insensitive comparison of two NULL terminated strings, 207 * using the "IRC nick comparison" rules. Or, well, partially 208 * anyway. 209 * Should be used for NICK-related comparisons. And probably 210 * not even then, since this does not deal with multibyte. 211 * @returns 0, if s1 equal to s2 212 * <0, if s1 lexicographically less than s2 213 * >0, if s1 lexicographically greater than s2 214 */ 215 int smycmp(const char *s1, const char *s2) 216 { 217 u_char *str1; 218 u_char *str2; 219 int res; 220 221 str1 = (u_char *)s1; 222 str2 = (u_char *)s2; 223 224 while ((res = toupper(*str1) - toupper(*str2)) == 0) 225 { 226 if (*str1 == '\0') 227 return 0; 228 str1++; 229 str2++; 230 } 231 return (res); 232 } 233 234 u_char tolowertab[] = { 235 0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 236 0xb, 0xc, 0xd, 0xe, 0xf, 0x10, 0x11, 0x12, 0x13, 0x14, 237 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 238 0x1e, 0x1f, 239 ' ', '!', '"', '#', '$', '%', '&', 0x27, '(', ')', 240 '*', '+', ',', '-', '.', '/', 241 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 242 ':', ';', '<', '=', '>', '?', 243 '@', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 244 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 245 't', 'u', 'v', 'w', 'x', 'y', 'z', '[', '\\', ']', '^', 246 '_', 247 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 248 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 249 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~', 250 0x7f, 251 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 252 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 253 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 254 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 255 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 256 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 257 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 258 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 259 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 260 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 261 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 262 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 263 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 264 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 265 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 266 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff 267 }; 268 269 u_char touppertab[] = { 270 0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 271 0xb, 0xc, 0xd, 0xe, 0xf, 0x10, 0x11, 0x12, 0x13, 0x14, 272 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 273 0x1e, 0x1f, 274 ' ', '!', '"', '#', '$', '%', '&', 0x27, '(', ')', 275 '*', '+', ',', '-', '.', '/', 276 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 277 ':', ';', '<', '=', '>', '?', 278 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 279 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 280 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', '\\', ']', '^', 281 0x5f, 282 '`', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 283 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 284 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '{', '|', '}', '~', 285 0x7f, 286 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 287 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 288 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 289 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 290 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 291 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 292 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 293 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 294 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 295 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 296 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 297 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 298 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 299 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 300 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 301 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff 302 }; 303 304 u_char char_atribs[] = { 305 /* 0-7 */ CNTRL, CNTRL, CNTRL, CNTRL, CNTRL, CNTRL, CNTRL, CNTRL, 306 /* 8-12 */ CNTRL, CNTRL | SPACE, CNTRL | SPACE, CNTRL | SPACE, 307 CNTRL | SPACE, 308 /* 13-15 */ CNTRL | SPACE, CNTRL, CNTRL, 309 /* 16-23 */ CNTRL, CNTRL, CNTRL, CNTRL, CNTRL, CNTRL, CNTRL, CNTRL, 310 /* 24-31 */ CNTRL, CNTRL, CNTRL, CNTRL, CNTRL, CNTRL, CNTRL, CNTRL, 311 /* space */ PRINT | SPACE, 312 /* !"#$%&'( */ PRINT, PRINT, PRINT, PRINT, PRINT, PRINT, PRINT, PRINT, 313 /* )*+,-./ */ PRINT, PRINT, PRINT, PRINT, PRINT | ALLOW, PRINT | ALLOW, 314 PRINT, 315 /* 012 */ PRINT | DIGIT | ALLOW, PRINT | DIGIT | ALLOW, 316 PRINT | DIGIT | ALLOW, 317 /* 345 */ PRINT | DIGIT | ALLOW, PRINT | DIGIT | ALLOW, 318 PRINT | DIGIT | ALLOW, 319 /* 678 */ PRINT | DIGIT | ALLOW, PRINT | DIGIT | ALLOW, 320 PRINT | DIGIT | ALLOW, 321 /* 9:; */ PRINT | DIGIT | ALLOW, PRINT, PRINT, 322 /* <=>? */ PRINT, PRINT, PRINT, PRINT, 323 /* @ */ PRINT, 324 /* ABC */ PRINT | ALPHA | ALLOW, PRINT | ALPHA | ALLOW, 325 PRINT | ALPHA | ALLOW, 326 /* DEF */ PRINT | ALPHA | ALLOW, PRINT | ALPHA | ALLOW, 327 PRINT | ALPHA | ALLOW, 328 /* GHI */ PRINT | ALPHA | ALLOW, PRINT | ALPHA | ALLOW, 329 PRINT | ALPHA | ALLOW, 330 /* JKL */ PRINT | ALPHA | ALLOW, PRINT | ALPHA | ALLOW, 331 PRINT | ALPHA | ALLOW, 332 /* MNO */ PRINT | ALPHA | ALLOW, PRINT | ALPHA | ALLOW, 333 PRINT | ALPHA | ALLOW, 334 /* PQR */ PRINT | ALPHA | ALLOW, PRINT | ALPHA | ALLOW, 335 PRINT | ALPHA | ALLOW, 336 /* STU */ PRINT | ALPHA | ALLOW, PRINT | ALPHA | ALLOW, 337 PRINT | ALPHA | ALLOW, 338 /* VWX */ PRINT | ALPHA | ALLOW, PRINT | ALPHA | ALLOW, 339 PRINT | ALPHA | ALLOW, 340 /* YZ[ */ PRINT | ALPHA | ALLOW, PRINT | ALPHA | ALLOW, PRINT, 341 /* \]^ */ PRINT, PRINT, PRINT, 342 /* _` */ PRINT | ALLOW, PRINT, 343 /* abc */ PRINT | ALPHA | ALLOW, PRINT | ALPHA | ALLOW, 344 PRINT | ALPHA | ALLOW, 345 /* def */ PRINT | ALPHA | ALLOW, PRINT | ALPHA | ALLOW, 346 PRINT | ALPHA | ALLOW, 347 /* ghi */ PRINT | ALPHA | ALLOW, PRINT | ALPHA | ALLOW, 348 PRINT | ALPHA | ALLOW, 349 /* jkl */ PRINT | ALPHA | ALLOW, PRINT | ALPHA | ALLOW, 350 PRINT | ALPHA | ALLOW, 351 /* mno */ PRINT | ALPHA | ALLOW, PRINT | ALPHA | ALLOW, 352 PRINT | ALPHA | ALLOW, 353 /* pqr */ PRINT | ALPHA | ALLOW, PRINT | ALPHA | ALLOW, 354 PRINT | ALPHA | ALLOW, 355 /* stu */ PRINT | ALPHA | ALLOW, PRINT | ALPHA | ALLOW, 356 PRINT | ALPHA | ALLOW, 357 /* vwx */ PRINT | ALPHA | ALLOW, PRINT | ALPHA | ALLOW, 358 PRINT | ALPHA | ALLOW, 359 /* yz{ */ PRINT | ALPHA | ALLOW, PRINT | ALPHA | ALLOW, PRINT, 360 /* |}~ */ PRINT, PRINT, PRINT, 361 /* del */ 0, 362 /* 80-8f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 363 /* 90-9f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 364 /* a0-af */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 365 /* b0-bf */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 366 /* c0-cf */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 367 /* d0-df */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 368 /* e0-ef */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 369 /* f0-ff */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 370 }; 371 372 /** Free up all resources of an Match entry (including the struct itself). 373 * NOTE: this function may (also) be called for Match structs that have only been 374 * setup half-way, so use special care when accessing members (NULL checks!) 375 */ 376 void unreal_delete_match(Match *m) 377 { 378 safe_free(m->str); 379 if (m->type == MATCH_PCRE_REGEX) 380 { 381 if (m->ext.pcre2_expr) 382 pcre2_code_free(m->ext.pcre2_expr); 383 } 384 safe_free(m); 385 } 386 387 Match *unreal_create_match(MatchType type, const char *str, char **error) 388 { 389 Match *m = safe_alloc(sizeof(Match)); 390 static char errorbuf[512]; 391 392 *errorbuf = '\0'; 393 394 safe_strdup(m->str, str); 395 m->type = type; 396 397 if (m->type == MATCH_SIMPLE) 398 { 399 /* Nothing to do */ 400 } 401 else if (m->type == MATCH_PCRE_REGEX) 402 { 403 int errorcode = 0; 404 PCRE2_SIZE erroroffset = 0; 405 int options = 0; 406 char buf2[512]; 407 408 if (iConf.spamfilter_utf8) 409 options = PCRE2_CASELESS|PCRE2_MATCH_INVALID_UTF; 410 else 411 options = PCRE2_CASELESS|PCRE2_NEVER_UTF|PCRE2_NEVER_UCP; 412 413 m->ext.pcre2_expr = pcre2_compile(str, PCRE2_ZERO_TERMINATED, options, &errorcode, &erroroffset, NULL); 414 if (m->ext.pcre2_expr == NULL) 415 { 416 *buf2 = '\0'; 417 pcre2_get_error_message(errorcode, buf2, sizeof(buf2)); 418 if (error) 419 { 420 if (erroroffset > 0) 421 snprintf(errorbuf, sizeof(errorbuf), "%s (at character #%d)", buf2, (int)erroroffset); 422 else 423 strlcpy(errorbuf, buf2, sizeof(errorbuf)); 424 *error = errorbuf; 425 } 426 unreal_delete_match(m); 427 return NULL; 428 } 429 pcre2_jit_compile(m->ext.pcre2_expr, PCRE2_JIT_COMPLETE); 430 return m; 431 } 432 else { 433 /* Unknown type, how did that happen ? */ 434 unreal_delete_match(m); 435 return NULL; 436 } 437 return m; 438 } 439 440 /** Try to match an Match entry ('m') against a string ('str'). 441 * @returns 1 if matched, 0 if not. 442 * @note These (more logical) return values are opposite to the match_simple() function. 443 */ 444 int unreal_match(Match *m, const char *str) 445 { 446 if (m->type == MATCH_SIMPLE) 447 { 448 if (match_simple(m->str, str)) 449 return 1; 450 return 0; 451 } 452 453 if (m->type == MATCH_PCRE_REGEX) 454 { 455 pcre2_match_data *md = pcre2_match_data_create(9, NULL); 456 int ret; 457 458 ret = pcre2_match(m->ext.pcre2_expr, str, PCRE2_ZERO_TERMINATED, 0, 0, md, NULL); /* run the regex */ 459 pcre2_match_data_free(md); /* yeah, we never use it. unfortunately argument must be non-NULL for pcre2_match() */ 460 461 if (ret > 0) 462 return 1; /* MATCH */ 463 return 0; /* NO MATCH */ 464 } 465 466 return 0; 467 } 468 469 int unreal_match_method_strtoval(const char *str) 470 { 471 if (!strcmp(str, "regex") || !strcmp(str, "pcre")) 472 return MATCH_PCRE_REGEX; 473 if (!strcmp(str, "simple") || !strcmp(str, "glob")) 474 return MATCH_SIMPLE; 475 return 0; 476 } 477 478 char *unreal_match_method_valtostr(int val) 479 { 480 if (val == MATCH_PCRE_REGEX) 481 return "regex"; 482 if (val == MATCH_SIMPLE) 483 return "simple"; 484 485 return "unknown"; 486 } 487 488 /* It is unfortunately that we have 2 matching/replace systems. 489 * However, the above is for spamfilter matching and stuff 490 * and below is for matching on WORDS, which does specific things 491 * like replacement on word boundaries etc. 492 * Moved here from the censor channel and user mode module 493 * (previously was present in both modules, code duplication) 494 */ 495 int fast_badword_match(ConfigItem_badword *badword, const char *line) 496 { 497 const char *p; 498 int bwlen = strlen(badword->word); 499 500 if ((badword->type & BADW_TYPE_FAST_L) && (badword->type & BADW_TYPE_FAST_R)) 501 return (our_strcasestr(line, badword->word) ? 1 : 0); 502 503 p = line; 504 while((p = our_strcasestr(p, badword->word))) 505 { 506 if (!(badword->type & BADW_TYPE_FAST_L)) 507 { 508 if ((p != line) && !iswseperator(*(p - 1))) /* aaBLA but no *BLA */ 509 goto next; 510 } 511 if (!(badword->type & BADW_TYPE_FAST_R)) 512 { 513 if (!iswseperator(*(p + bwlen))) /* BLAaa but no BLA* */ 514 goto next; 515 } 516 /* Looks like it matched */ 517 return 1; 518 next: 519 p += bwlen; 520 } 521 return 0; 522 } 523 524 /* fast_badword_replace: 525 * A fast replace routine written by Syzop used for replacing badwords. 526 * This searches in line for the bad word and replaces it. 527 * buf is used for the result and max is sizeof(buf). 528 * Assumptions[!]: max > 0 AND max > strlen(line)+1 529 */ 530 int fast_badword_replace(ConfigItem_badword *badword, const char *line, char *buf, int max) 531 { 532 /* Some aliases ;P */ 533 char *replacew = badword->replace ? badword->replace : REPLACEWORD; 534 const char *pold = line; /* pointer to the old string */ 535 const char *poldx = line; 536 char *pnew = buf; /* pointer to the new string */ 537 int replacen = -1; /* Only calculated if needed. w00t! saves us a few nanosecs? lol */ 538 int searchn = -1; 539 const char *startw, *endw; /* start and end of the word */ 540 char *c_eol = buf + max - 1; /* Cached end of (new) line */ 541 int run = 1; 542 int cleaned = 0; 543 544 while(run) { 545 pold = our_strcasestr(pold, badword->word); 546 if (!pold) 547 break; 548 if (replacen == -1) 549 replacen = strlen(replacew); 550 if (searchn == -1) 551 searchn = strlen(badword->word); 552 /* Hunt for start of word */ 553 if (pold > line) { 554 for (startw = pold; (!iswseperator(*startw) && (startw != line)); startw--); 555 if (iswseperator(*startw)) 556 startw++; /* Don't point at the space/seperator but at the word! */ 557 } else { 558 startw = pold; 559 } 560 561 if (!(badword->type & BADW_TYPE_FAST_L) && (pold != startw)) { 562 /* not matched */ 563 pold++; 564 continue; 565 } 566 567 /* Hunt for end of word 568 * Fix for bug #4909: word will be at least 'searchn' long so we can skip 569 * 'searchn' bytes and avoid stopping half-way the badword. 570 */ 571 for (endw = pold+searchn; ((*endw != '\0') && (!iswseperator(*endw))); endw++); 572 573 if (!(badword->type & BADW_TYPE_FAST_R) && (pold+searchn != endw)) { 574 /* not matched */ 575 pold++; 576 continue; 577 } 578 579 cleaned = 1; /* still too soon? Syzop/20050227 */ 580 581 /* Do we have any not-copied-yet data? */ 582 if (poldx != startw) { 583 int tmp_n = startw - poldx; 584 if (pnew + tmp_n >= c_eol) { 585 /* Partial copy and return... */ 586 memcpy(pnew, poldx, c_eol - pnew); 587 *c_eol = '\0'; 588 return 1; 589 } 590 591 memcpy(pnew, poldx, tmp_n); 592 pnew += tmp_n; 593 } 594 /* Now update the word in buf (pnew is now something like startw-in-new-buffer */ 595 596 if (replacen) { 597 if ((pnew + replacen) >= c_eol) { 598 /* Partial copy and return... */ 599 memcpy(pnew, replacew, c_eol - pnew); 600 *c_eol = '\0'; 601 return 1; 602 } 603 memcpy(pnew, replacew, replacen); 604 pnew += replacen; 605 } 606 poldx = pold = endw; 607 } 608 /* Copy the last part */ 609 if (*poldx) { 610 strncpy(pnew, poldx, c_eol - pnew); 611 *(c_eol) = '\0'; 612 } else { 613 *pnew = '\0'; 614 } 615 return cleaned; 616 } 617 618 /* 619 * Returns a string, which has been filtered by the words loaded via 620 * the loadbadwords() function. It's primary use is to filter swearing 621 * in both private and public messages 622 */ 623 const char *stripbadwords(const char *str, ConfigItem_badword *start_bw, int *blocked) 624 { 625 static char cleanstr[4096]; 626 char buf[4096]; 627 char *ptr; 628 int matchlen, m, stringlen, cleaned; 629 ConfigItem_badword *this_word; 630 631 *blocked = 0; 632 633 if (!start_bw) 634 return str; 635 636 /* 637 * work on a copy 638 */ 639 stringlen = strlcpy(cleanstr, StripControlCodes(str), sizeof cleanstr); 640 matchlen = 0; 641 buf[0] = '\0'; 642 cleaned = 0; 643 644 for (this_word = start_bw; this_word; this_word = this_word->next) 645 { 646 if (this_word->type & BADW_TYPE_FAST) 647 { 648 if (this_word->action == BADWORD_BLOCK) 649 { 650 if (fast_badword_match(this_word, cleanstr)) 651 { 652 *blocked = 1; 653 return NULL; 654 } 655 } 656 else 657 { 658 int n; 659 /* fast_badword_replace() does size checking so we can use 512 here instead of 4096 */ 660 n = fast_badword_replace(this_word, cleanstr, buf, 512); 661 if (!cleaned && n) 662 cleaned = n; 663 strcpy(cleanstr, buf); 664 memset(buf, 0, sizeof(buf)); /* regexp likes this somehow */ 665 } 666 } else 667 if (this_word->type & BADW_TYPE_REGEX) 668 { 669 if (this_word->action == BADWORD_BLOCK) 670 { 671 pcre2_match_data *md = pcre2_match_data_create(9, NULL); 672 int ret; 673 674 ret = pcre2_match(this_word->pcre2_expr, cleanstr, PCRE2_ZERO_TERMINATED, 0, 0, md, NULL); /* run the regex */ 675 pcre2_match_data_free(md); /* yeah, we never use it. unfortunately argument must be non-NULL for pcre2_match() */ 676 if (ret > 0) 677 { 678 *blocked = 1; 679 return NULL; 680 } 681 } 682 else 683 { 684 pcre2_match_data *md; 685 int ret; 686 PCRE2_SIZE *dd; 687 int start, end; 688 689 ptr = cleanstr; /* set pointer to start of string */ 690 while(1) { 691 md = pcre2_match_data_create(9, NULL); 692 /* ^^ we need to free 'md' in ALL circumstances. 693 * remember this if you break or continue in this loop! 694 */ 695 ret = pcre2_match(this_word->pcre2_expr, ptr, PCRE2_ZERO_TERMINATED, 0, 0, md, NULL); /* run the regex */ 696 if (ret > 0) 697 { 698 dd = pcre2_get_ovector_pointer(md); 699 start = (int)dd[0]; 700 end = (int)dd[1]; 701 if ((start < 0) || (end < 0) || (start > strlen(ptr)) || (end > strlen(ptr)+1)) 702 { 703 unreal_log(ULOG_FATAL, "main", "BUG_STRIPBADWORDS_PCRE2_MATCH_OOB", NULL, 704 "[BUG] pcre2_match() returned an ovector with OOB start/end: $start/$end, len $length: '$buf'", 705 log_data_integer("start", start), 706 log_data_integer("end", end), 707 log_data_integer("length", strlen(ptr)), 708 log_data_string("buf", ptr)); 709 abort(); 710 } 711 m = end - start; 712 if (m == 0) 713 { 714 pcre2_match_data_free(md); 715 break; /* anti-loop */ 716 } 717 cleaned = 1; 718 matchlen += m; 719 strlncat(buf, ptr, sizeof buf, start); 720 if (this_word->replace) 721 strlcat(buf, this_word->replace, sizeof buf); 722 else 723 strlcat(buf, REPLACEWORD, sizeof buf); 724 ptr += end; /* Set pointer after the match pos */ 725 pcre2_match_data_free(md); 726 continue; /* next! */ 727 } 728 pcre2_match_data_free(md); 729 break; /* NOMATCH: we are done! */ 730 } 731 /* All the better to eat you with! */ 732 strlcat(buf, ptr, sizeof buf); 733 memcpy(cleanstr, buf, sizeof cleanstr); 734 memset(buf, 0, sizeof(buf)); 735 if (matchlen == stringlen) 736 break; 737 } 738 } 739 } 740 741 cleanstr[511] = '\0'; /* cutoff, just to be sure */ 742 743 return (cleaned) ? cleanstr : str; 744 } 745 746 /** Checks if the specified regex (or fast badwords) is valid. 747 * returns NULL in case of success [!], 748 * pointer to buffer with error message otherwise 749 * if check_broadness is 1, the function will attempt to determine 750 * if the given regex string is too broad (i.e. matches everything) 751 */ 752 const char *badword_config_check_regex(const char *str, int fastsupport, int check_broadness) 753 { 754 int regex=0; 755 const char *tmp; 756 static char errorbuf[512]; 757 758 if (fastsupport) 759 { 760 for (tmp = str; *tmp; tmp++) { 761 if (!isalnum(*tmp) && !(*tmp >= 128)) { 762 if ((str == tmp) && (*tmp == '*')) 763 continue; 764 if ((*(tmp + 1) == '\0') && (*tmp == '*')) 765 continue; 766 regex = 1; 767 break; 768 } 769 } 770 } 771 if (!fastsupport || regex) 772 { 773 int errorcode = 0; 774 PCRE2_SIZE erroroffset = 0; 775 pcre2_code *expr; 776 int options = 0; 777 char buf2[512]; 778 779 options = PCRE2_CASELESS|PCRE2_NEVER_UTF|PCRE2_NEVER_UCP; 780 781 expr = pcre2_compile(str, PCRE2_ZERO_TERMINATED, options, &errorcode, &erroroffset, NULL); 782 if (expr == NULL) 783 { 784 pcre2_get_error_message(errorcode, buf2, sizeof(buf2)); 785 if (erroroffset > 0) 786 snprintf(errorbuf, sizeof(errorbuf), "%s (at character #%d)", buf2, (int)erroroffset); 787 else 788 strlcpy(errorbuf, buf2, sizeof(errorbuf)); 789 return errorbuf; 790 } 791 pcre2_code_free(expr); 792 } 793 return NULL; 794 } 795 796 int badword_config_process(ConfigItem_badword *ca, const char *str) 797 { 798 const char *tmp; 799 short regex = 0; 800 int ast_l = 0, ast_r = 0; 801 802 /* The fast badwords routine can do: "blah" "*blah" "blah*" and "*blah*", 803 * in all other cases use regex. 804 */ 805 for (tmp = str; *tmp; tmp++) { 806 if (!isalnum(*tmp) && !(*tmp >= 128)) { 807 if ((str == tmp) && (*tmp == '*')) { 808 ast_l = 1; /* Asterisk at the left */ 809 continue; 810 } 811 if ((*(tmp + 1) == '\0') && (*tmp == '*')) { 812 ast_r = 1; /* Asterisk at the right */ 813 continue; 814 } 815 regex = 1; 816 break; 817 } 818 } 819 if (regex) 820 { 821 int errorcode = 0; 822 PCRE2_SIZE erroroffset = 0; 823 int options = 0; 824 825 ca->type = BADW_TYPE_REGEX; 826 safe_strdup(ca->word, str); 827 828 options = PCRE2_CASELESS|PCRE2_NEVER_UTF|PCRE2_NEVER_UCP; 829 830 ca->pcre2_expr = pcre2_compile(str, PCRE2_ZERO_TERMINATED, options, &errorcode, &erroroffset, NULL); 831 if (ca->pcre2_expr == NULL) 832 { 833 /* This cannot happen since badword_config_check_regex() 834 * should be called from config_test on each regex. 835 */ 836 config_error("badword_config_process(): failed to compile regex '%s', this is impossible!", str); 837 abort(); 838 } 839 pcre2_jit_compile(ca->pcre2_expr, PCRE2_JIT_COMPLETE); 840 } 841 else 842 { 843 char *tmpw; 844 ca->type = BADW_TYPE_FAST; 845 ca->word = tmpw = safe_alloc(strlen(str) - ast_l - ast_r + 1); 846 /* Copy except for asterisks */ 847 for (tmp = str; *tmp; tmp++) 848 if (*tmp != '*') 849 *tmpw++ = *tmp; 850 *tmpw = '\0'; 851 if (ast_l) 852 ca->type |= BADW_TYPE_FAST_L; 853 if (ast_r) 854 ca->type |= BADW_TYPE_FAST_R; 855 } 856 857 return 1; 858 } 859 860 /** Frees a ConfigItem_badword item. 861 * Note that it does NOT remove from the list, you need 862 * to do this BEFORE calling this function. 863 */ 864 void badword_config_free(ConfigItem_badword *e) 865 { 866 safe_free(e->word); 867 safe_free(e->replace); 868 if (e->pcre2_expr) 869 pcre2_code_free(e->pcre2_expr); 870 safe_free(e); 871 }