]>
Commit | Line | Data |
---|---|---|
0d23b6e5 BB |
1 | // Copyright 2010 Wincent Colaiuta. All rights reserved. |
2 | // | |
3 | // Redistribution and use in source and binary forms, with or without | |
4 | // modification, are permitted provided that the following conditions are met: | |
5 | // | |
6 | // 1. Redistributions of source code must retain the above copyright notice, | |
7 | // this list of conditions and the following disclaimer. | |
8 | // 2. Redistributions in binary form must reproduce the above copyright notice, | |
9 | // this list of conditions and the following disclaimer in the documentation | |
10 | // and/or other materials provided with the distribution. | |
11 | // | |
12 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
13 | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
14 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
15 | // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE | |
16 | // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
17 | // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
18 | // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
19 | // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
20 | // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
21 | // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
22 | // POSSIBILITY OF SUCH DAMAGE. | |
23 | ||
24 | #include "match.h" | |
25 | #include "ext.h" | |
26 | #include "ruby_compat.h" | |
27 | ||
28 | // use a struct to make passing params during recursion easier | |
29 | typedef struct | |
30 | { | |
31 | char *str_p; // pointer to string to be searched | |
32 | long str_len; // length of same | |
33 | char *abbrev_p; // pointer to search string (abbreviation) | |
34 | long abbrev_len; // length of same | |
35 | double max_score_per_char; | |
36 | int dot_file; // boolean: true if str is a dot-file | |
37 | int always_show_dot_files; // boolean | |
38 | int never_show_dot_files; // boolean | |
39 | } matchinfo_t; | |
40 | ||
41 | double recursive_match(matchinfo_t *m, // sharable meta-data | |
42 | long str_idx, // where in the path string to start | |
43 | long abbrev_idx, // where in the search string to start | |
44 | long last_idx, // location of last matched character | |
45 | double score) // cumulative score so far | |
46 | { | |
47 | double seen_score = 0; // remember best score seen via recursion | |
48 | int dot_file_match = 0; // true if abbrev matches a dot-file | |
49 | int dot_search = 0; // true if searching for a dot | |
50 | ||
51 | for (long i = abbrev_idx; i < m->abbrev_len; i++) | |
52 | { | |
53 | char c = m->abbrev_p[i]; | |
54 | if (c == '.') | |
55 | dot_search = 1; | |
56 | int found = 0; | |
57 | for (long j = str_idx; j < m->str_len; j++, str_idx++) | |
58 | { | |
59 | char d = m->str_p[j]; | |
60 | if (d == '.') | |
61 | { | |
62 | if (j == 0 || m->str_p[j - 1] == '/') | |
63 | { | |
64 | m->dot_file = 1; // this is a dot-file | |
65 | if (dot_search) // and we are searching for a dot | |
66 | dot_file_match = 1; // so this must be a match | |
67 | } | |
68 | } | |
69 | else if (d >= 'A' && d <= 'Z') | |
70 | d += 'a' - 'A'; // add 32 to downcase | |
71 | if (c == d) | |
72 | { | |
73 | found = 1; | |
74 | dot_search = 0; | |
75 | ||
76 | // calculate score | |
77 | double score_for_char = m->max_score_per_char; | |
78 | long distance = j - last_idx; | |
79 | if (distance > 1) | |
80 | { | |
81 | double factor = 1.0; | |
82 | char last = m->str_p[j - 1]; | |
83 | char curr = m->str_p[j]; // case matters, so get again | |
84 | if (last == '/') | |
85 | factor = 0.9; | |
86 | else if (last == '-' || | |
87 | last == '_' || | |
88 | last == ' ' || | |
89 | (last >= '0' && last <= '9')) | |
90 | factor = 0.8; | |
91 | else if (last >= 'a' && last <= 'z' && | |
92 | curr >= 'A' && curr <= 'Z') | |
93 | factor = 0.8; | |
94 | else if (last == '.') | |
95 | factor = 0.7; | |
96 | else | |
97 | // if no "special" chars behind char, factor diminishes | |
98 | // as distance from last matched char increases | |
99 | factor = (1.0 / distance) * 0.75; | |
100 | score_for_char *= factor; | |
101 | } | |
102 | ||
103 | if (++j < m->str_len) | |
104 | { | |
105 | // bump cursor one char to the right and | |
106 | // use recursion to try and find a better match | |
107 | double sub_score = recursive_match(m, j, i, last_idx, score); | |
108 | if (sub_score > seen_score) | |
109 | seen_score = sub_score; | |
110 | } | |
111 | ||
112 | score += score_for_char; | |
113 | last_idx = str_idx++; | |
114 | break; | |
115 | } | |
116 | } | |
117 | if (!found) | |
118 | return 0.0; | |
119 | } | |
120 | if (m->dot_file) | |
121 | { | |
122 | if (m->never_show_dot_files || | |
123 | (!dot_file_match && !m->always_show_dot_files)) | |
124 | return 0.0; | |
125 | } | |
126 | return (score > seen_score) ? score : seen_score; | |
127 | } | |
128 | ||
129 | // Match.new abbrev, string, options = {} | |
130 | VALUE CommandTMatch_initialize(int argc, VALUE *argv, VALUE self) | |
131 | { | |
132 | // process arguments: 2 mandatory, 1 optional | |
133 | VALUE str, abbrev, options; | |
134 | if (rb_scan_args(argc, argv, "21", &str, &abbrev, &options) == 2) | |
135 | options = Qnil; | |
136 | str = StringValue(str); | |
137 | abbrev = StringValue(abbrev); // already downcased by caller | |
138 | ||
139 | // check optional options hash for overrides | |
140 | VALUE always_show_dot_files = CommandT_option_from_hash("always_show_dot_files", options); | |
141 | VALUE never_show_dot_files = CommandT_option_from_hash("never_show_dot_files", options); | |
142 | ||
143 | matchinfo_t m; | |
144 | m.str_p = RSTRING_PTR(str); | |
145 | m.str_len = RSTRING_LEN(str); | |
146 | m.abbrev_p = RSTRING_PTR(abbrev); | |
147 | m.abbrev_len = RSTRING_LEN(abbrev); | |
148 | m.max_score_per_char = (1.0 / m.str_len + 1.0 / m.abbrev_len) / 2; | |
149 | m.dot_file = 0; | |
150 | m.always_show_dot_files = always_show_dot_files == Qtrue; | |
151 | m.never_show_dot_files = never_show_dot_files == Qtrue; | |
152 | ||
153 | // calculate score | |
154 | double score = 1.0; | |
155 | if (m.abbrev_len == 0) // special case for zero-length search string | |
156 | { | |
157 | // filter out dot files | |
158 | if (!m.always_show_dot_files) | |
159 | { | |
160 | for (long i = 0; i < m.str_len; i++) | |
161 | { | |
162 | char c = m.str_p[i]; | |
163 | if (c == '.' && (i == 0 || m.str_p[i - 1] == '/')) | |
164 | { | |
165 | score = 0.0; | |
166 | break; | |
167 | } | |
168 | } | |
169 | } | |
170 | } | |
171 | else // normal case | |
172 | score = recursive_match(&m, 0, 0, 0, 0.0); | |
173 | ||
174 | // clean-up and final book-keeping | |
175 | rb_iv_set(self, "@score", rb_float_new(score)); | |
176 | rb_iv_set(self, "@str", str); | |
177 | return Qnil; | |
178 | } | |
179 | ||
180 | VALUE CommandTMatch_matches(VALUE self) | |
181 | { | |
182 | double score = NUM2DBL(rb_iv_get(self, "@score")); | |
183 | return score > 0 ? Qtrue : Qfalse; | |
184 | } | |
185 | ||
186 | VALUE CommandTMatch_to_s(VALUE self) | |
187 | { | |
188 | return rb_iv_get(self, "@str"); | |
189 | } |