Blame view
lib/ts_bm.c
5.07 KB
2874c5fd2 treewide: Replace... |
1 |
// SPDX-License-Identifier: GPL-2.0-or-later |
8082e4ed0 [LIB]: Boyer-Moor... |
2 3 4 |
/* * lib/ts_bm.c Boyer-Moore text search implementation * |
8082e4ed0 [LIB]: Boyer-Moor... |
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
* Authors: Pablo Neira Ayuso <pablo@eurodev.net> * * ========================================================================== * * Implements Boyer-Moore string matching algorithm: * * [1] A Fast String Searching Algorithm, R.S. Boyer and Moore. * Communications of the Association for Computing Machinery, * 20(10), 1977, pp. 762-772. * http://www.cs.utexas.edu/users/moore/publications/fstrpos.pdf * * [2] Handbook of Exact String Matching Algorithms, Thierry Lecroq, 2004 * http://www-igm.univ-mlv.fr/~lecroq/string/string.pdf * * Note: Since Boyer-Moore (BM) performs searches for matchings from right * to left, it's still possible that a matching could be spread over * multiple blocks, in that case this algorithm won't find any coincidence. * * If you're willing to ensure that such thing won't ever happen, use the * Knuth-Pratt-Morris (KMP) implementation instead. In conclusion, choose * the proper string search algorithm depending on your setting. * * Say you're using the textsearch infrastructure for filtering, NIDS or * any similar security focused purpose, then go KMP. Otherwise, if you * really care about performance, say you're classifying packets to apply * Quality of Service (QoS) policies, and you don't mind about possible * matchings spread over multiple fragments, then go BM. */ |
8082e4ed0 [LIB]: Boyer-Moor... |
33 34 35 36 |
#include <linux/kernel.h> #include <linux/module.h> #include <linux/types.h> #include <linux/string.h> |
3b76d0819 textsearch: ts_bm... |
37 |
#include <linux/ctype.h> |
8082e4ed0 [LIB]: Boyer-Moor... |
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
#include <linux/textsearch.h> /* Alphabet size, use ASCII */ #define ASIZE 256 #if 0 #define DEBUGP printk #else #define DEBUGP(args, format...) #endif struct ts_bm { u8 * pattern; unsigned int patlen; unsigned int bad_shift[ASIZE]; unsigned int good_shift[0]; }; static unsigned int bm_find(struct ts_config *conf, struct ts_state *state) { struct ts_bm *bm = ts_config_priv(conf); unsigned int i, text_len, consumed = state->offset; const u8 *text; |
aebb6a849 textsearch: fix B... |
62 |
int shift = bm->patlen - 1, bs; |
3b76d0819 textsearch: ts_bm... |
63 |
const u8 icase = conf->flags & TS_IGNORECASE; |
8082e4ed0 [LIB]: Boyer-Moor... |
64 65 66 67 68 69 70 71 72 73 74 75 |
for (;;) { text_len = conf->get_next_block(consumed, &text, conf, state); if (unlikely(text_len == 0)) break; while (shift < text_len) { DEBUGP("Searching in position %d (%c) ", shift, text[shift]); for (i = 0; i < bm->patlen; i++) |
3b76d0819 textsearch: ts_bm... |
76 77 78 |
if ((icase ? toupper(text[shift-i]) : text[shift-i]) != bm->pattern[bm->patlen-1-i]) |
8082e4ed0 [LIB]: Boyer-Moor... |
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
goto next; /* London calling... */ DEBUGP("found! "); return consumed += (shift-(bm->patlen-1)); next: bs = bm->bad_shift[text[shift-i]]; /* Now jumping to... */ shift = max_t(int, shift-i+bs, shift+bm->good_shift[i]); } consumed += text_len; } return UINT_MAX; } |
3f330317a [TEXTSEARCH]: Fix... |
96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
static int subpattern(u8 *pattern, int i, int j, int g) { int x = i+g-1, y = j+g-1, ret = 0; while(pattern[x--] == pattern[y--]) { if (y < 0) { ret = 1; break; } if (--g == 0) { ret = pattern[i-1] != pattern[j-1]; break; } } return ret; } |
3b76d0819 textsearch: ts_bm... |
113 |
static void compute_prefix_tbl(struct ts_bm *bm, int flags) |
8082e4ed0 [LIB]: Boyer-Moor... |
114 |
{ |
3f330317a [TEXTSEARCH]: Fix... |
115 |
int i, j, g; |
8082e4ed0 [LIB]: Boyer-Moor... |
116 117 |
for (i = 0; i < ASIZE; i++) |
3ffaa8c7c [TEXTSEARCH]: Fix... |
118 |
bm->bad_shift[i] = bm->patlen; |
3b76d0819 textsearch: ts_bm... |
119 |
for (i = 0; i < bm->patlen - 1; i++) { |
3ffaa8c7c [TEXTSEARCH]: Fix... |
120 |
bm->bad_shift[bm->pattern[i]] = bm->patlen - 1 - i; |
3b76d0819 textsearch: ts_bm... |
121 122 123 124 |
if (flags & TS_IGNORECASE) bm->bad_shift[tolower(bm->pattern[i])] = bm->patlen - 1 - i; } |
8082e4ed0 [LIB]: Boyer-Moor... |
125 126 127 |
/* Compute the good shift array, used to match reocurrences * of a subpattern */ |
8082e4ed0 [LIB]: Boyer-Moor... |
128 129 130 |
bm->good_shift[0] = 1; for (i = 1; i < bm->patlen; i++) bm->good_shift[i] = bm->patlen; |
3f330317a [TEXTSEARCH]: Fix... |
131 132 133 134 135 136 |
for (i = bm->patlen-1, g = 1; i > 0; g++, i--) { for (j = i-1; j >= 1-g ; j--) if (subpattern(bm->pattern, i, j, g)) { bm->good_shift[g] = bm->patlen-j-g; break; } |
8082e4ed0 [LIB]: Boyer-Moor... |
137 138 139 140 |
} } static struct ts_config *bm_init(const void *pattern, unsigned int len, |
3b76d0819 textsearch: ts_bm... |
141 |
gfp_t gfp_mask, int flags) |
8082e4ed0 [LIB]: Boyer-Moor... |
142 143 144 |
{ struct ts_config *conf; struct ts_bm *bm; |
3b76d0819 textsearch: ts_bm... |
145 |
int i; |
8082e4ed0 [LIB]: Boyer-Moor... |
146 147 148 149 150 151 |
unsigned int prefix_tbl_len = len * sizeof(unsigned int); size_t priv_size = sizeof(*bm) + len + prefix_tbl_len; conf = alloc_ts_config(priv_size, gfp_mask); if (IS_ERR(conf)) return conf; |
3b76d0819 textsearch: ts_bm... |
152 |
conf->flags = flags; |
8082e4ed0 [LIB]: Boyer-Moor... |
153 154 155 |
bm = ts_config_priv(conf); bm->patlen = len; bm->pattern = (u8 *) bm->good_shift + prefix_tbl_len; |
3b76d0819 textsearch: ts_bm... |
156 157 158 159 160 161 |
if (flags & TS_IGNORECASE) for (i = 0; i < len; i++) bm->pattern[i] = toupper(((u8 *)pattern)[i]); else memcpy(bm->pattern, pattern, len); compute_prefix_tbl(bm, flags); |
8082e4ed0 [LIB]: Boyer-Moor... |
162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 |
return conf; } static void *bm_get_pattern(struct ts_config *conf) { struct ts_bm *bm = ts_config_priv(conf); return bm->pattern; } static unsigned int bm_get_pattern_len(struct ts_config *conf) { struct ts_bm *bm = ts_config_priv(conf); return bm->patlen; } static struct ts_ops bm_ops = { .name = "bm", .find = bm_find, .init = bm_init, .get_pattern = bm_get_pattern, .get_pattern_len = bm_get_pattern_len, .owner = THIS_MODULE, .list = LIST_HEAD_INIT(bm_ops.list) }; static int __init init_bm(void) { return textsearch_register(&bm_ops); } static void __exit exit_bm(void) { textsearch_unregister(&bm_ops); } MODULE_LICENSE("GPL"); module_init(init_bm); module_exit(exit_bm); |