bmhasrch.c 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. /* +++Date last modified: 05-Jul-1997 */
  2. /*
  3. ** Boyer-Moore-Horspool pattern match
  4. ** Case-insensitive with accented character translation
  5. **
  6. ** public domain by Raymond Gardner 7/92
  7. **
  8. ** limitation: pattern length + subject length must be less than 32767
  9. **
  10. ** 10/21/93 rdg Fixed bug found by Jeff Dunlop
  11. */
  12. #include <limits.h> /* rdg 10/93 */
  13. #include <stddef.h>
  14. #include <string.h>
  15. typedef unsigned char uchar;
  16. #define LOWER_ACCENTED_CHARS
  17. unsigned char lowervec[UCHAR_MAX+1] = { /* rdg 10/93 */
  18. 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
  19. 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
  20. 32,'!','"','#','$','%','&','\'','(',')','*','+',',','-','.','/',
  21. '0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?',
  22. '@','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o',
  23. 'p','q','r','s','t','u','v','w','x','y','z','[','\\',']','^','_',
  24. '`','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o',
  25. 'p','q','r','s','t','u','v','w','x','y','z','{','|','}','~',127,
  26. #ifdef LOWER_ACCENTED_CHARS
  27. 'c','u','e','a','a','a','a','c','e','e','e','i','i','i','a','a',
  28. 'e',145,146,'o','o','o','u','u','y','o','u',155,156,157,158,159,
  29. 'a','i','o','u','n','n',166,167,168,169,170,171,172,173,174,175,
  30. #else
  31. 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
  32. 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
  33. 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
  34. #endif
  35. 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
  36. 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
  37. 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
  38. 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
  39. 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,
  40. };
  41. #define lowerc(c) lowervec[(uchar)(c)]
  42. #define LARGE 32767
  43. static int patlen;
  44. static int skip[UCHAR_MAX+1]; /* rdg 10/93 */
  45. static int skip2;
  46. static uchar *pat;
  47. void bmha_init(const char *pattern)
  48. {
  49. int i, j;
  50. pat = (uchar *)pattern;
  51. patlen = strlen(pattern);
  52. for (i = 0; i <= UCHAR_MAX; ++i) /* rdg 10/93 */
  53. {
  54. skip[i] = patlen;
  55. for (j = patlen - 1; j >= 0; --j)
  56. {
  57. if (lowerc(i) == lowerc(pat[j]))
  58. break;
  59. }
  60. if (j >= 0)
  61. skip[i] = patlen - j - 1;
  62. if (j == patlen - 1)
  63. skip[i] = LARGE;
  64. }
  65. skip2 = patlen;
  66. for (i = 0; i < patlen - 1; ++i)
  67. {
  68. if ( lowerc(pat[i]) == lowerc(pat[patlen - 1]) )
  69. skip2 = patlen - i - 1;
  70. }
  71. }
  72. char *bmha_search(const char *string, const int stringlen)
  73. {
  74. int i, j;
  75. char *s;
  76. i = patlen - 1 - stringlen;
  77. if (i >= 0)
  78. return NULL;
  79. string += stringlen;
  80. for ( ;; )
  81. {
  82. while ((i += skip[((uchar *)string)[i]]) < 0)
  83. ; /* mighty fast inner loop */
  84. if (i < (LARGE - stringlen))
  85. return NULL;
  86. i -= LARGE;
  87. j = patlen - 1;
  88. s = (char *)string + (i - j);
  89. while (--j >= 0 && lowerc(s[j]) == lowerc(pat[j]))
  90. ;
  91. if ( j < 0 ) /* rdg 10/93 */
  92. return s; /* rdg 10/93 */
  93. if ( (i += skip2) >= 0 ) /* rdg 10/93 */
  94. return NULL; /* rdg 10/93 */
  95. }
  96. }