bmhisrch.c 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. /* +++Date last modified: 05-Jul-1997 */
  2. /*
  3. ** Case-Insensitive Boyer-Moore-Horspool pattern match
  4. **
  5. ** Public Domain version by Thad Smith 7/21/1992,
  6. ** based on a 7/92 public domain BMH version by Raymond Gardner.
  7. **
  8. ** This program is written in ANSI C and inherits the compilers
  9. ** ability (or lack thereof) to support non-"C" locales by use of
  10. ** toupper() and tolower() to perform case conversions.
  11. ** Limitation: pattern length + string length must be less than 32767.
  12. **
  13. ** 10/21/93 rdg Fixed bugs found by Jeff Dunlop
  14. */
  15. #include <limits.h>
  16. #include <stdlib.h>
  17. #include <string.h>
  18. #include <ctype.h>
  19. typedef unsigned char uchar;
  20. void bmhi_init(const char *);
  21. char *bmhi_search(const char *, const int);
  22. void bhmi_cleanup(void);
  23. #define LARGE 32767 /* flag for last character match */
  24. static int patlen; /* # chars in pattern */
  25. static int skip[UCHAR_MAX+1]; /* skip-ahead count for test chars */
  26. static int skip2; /* skip-ahead after non-match with
  27. ** matching final character */
  28. static uchar *pat = NULL; /* uppercase copy of pattern */
  29. /*
  30. ** bmhi_init() is called prior to bmhi_search() to calculate the
  31. ** skip array for the given pattern.
  32. ** Error: exit(1) is called if no memory is available.
  33. */
  34. void bmhi_init(const char *pattern)
  35. {
  36. int i, lastpatchar;
  37. patlen = strlen(pattern);
  38. /* Make uppercase copy of pattern */
  39. pat = realloc ((void*)pat, patlen);
  40. if (!pat)
  41. exit(1);
  42. else atexit(bhmi_cleanup);
  43. for (i=0; i < patlen; i++)
  44. pat[i] = toupper(pattern[i]);
  45. /* initialize skip array */
  46. for ( i = 0; i <= UCHAR_MAX; ++i ) /* rdg 10/93 */
  47. skip[i] = patlen;
  48. for ( i = 0; i < patlen - 1; ++i )
  49. {
  50. skip[ pat[i] ] = patlen - i - 1;
  51. skip[tolower(pat[i])] = patlen - i - 1;
  52. }
  53. lastpatchar = pat[patlen - 1];
  54. skip[ lastpatchar ] = LARGE;
  55. skip[tolower(lastpatchar)] = LARGE;
  56. skip2 = patlen; /* Horspool's fixed second shift */
  57. for (i = 0; i < patlen - 1; ++i)
  58. {
  59. if ( pat[i] == lastpatchar )
  60. skip2 = patlen - i - 1;
  61. }
  62. }
  63. char *bmhi_search(const char *string, const int stringlen)
  64. {
  65. int i, j;
  66. char *s;
  67. i = patlen - 1 - stringlen;
  68. if (i >= 0)
  69. return NULL;
  70. string += stringlen;
  71. for ( ;; )
  72. {
  73. while ( (i += skip[((uchar *)string)[i]]) < 0 )
  74. ; /* mighty fast inner loop */
  75. if (i < (LARGE - stringlen))
  76. return NULL;
  77. i -= LARGE;
  78. j = patlen - 1;
  79. s = (char *)string + (i - j);
  80. while ( --j >= 0 && toupper(s[j]) == pat[j] )
  81. ;
  82. if ( j < 0 ) /* rdg 10/93 */
  83. return s; /* rdg 10/93 */
  84. if ( (i += skip2) >= 0 ) /* rdg 10/93 */
  85. return NULL; /* rdg 10/93 */
  86. }
  87. }
  88. void bhmi_cleanup(void)
  89. {
  90. free(pat);
  91. }