// Copyright 2010 Wincent Colaiuta. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // // 1. Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. #include "match.h" #include "ext.h" #include "ruby_compat.h" // use a struct to make passing params during recursion easier typedef struct { char *str_p; // pointer to string to be searched long str_len; // length of same char *abbrev_p; // pointer to search string (abbreviation) long abbrev_len; // length of same double max_score_per_char; int dot_file; // boolean: true if str is a dot-file int always_show_dot_files; // boolean int never_show_dot_files; // boolean } matchinfo_t; double recursive_match(matchinfo_t *m, // sharable meta-data long str_idx, // where in the path string to start long abbrev_idx, // where in the search string to start long last_idx, // location of last matched character double score) // cumulative score so far { double seen_score = 0; // remember best score seen via recursion int dot_file_match = 0; // true if abbrev matches a dot-file int dot_search = 0; // true if searching for a dot for (long i = abbrev_idx; i < m->abbrev_len; i++) { char c = m->abbrev_p[i]; if (c == '.') dot_search = 1; int found = 0; for (long j = str_idx; j < m->str_len; j++, str_idx++) { char d = m->str_p[j]; if (d == '.') { if (j == 0 || m->str_p[j - 1] == '/') { m->dot_file = 1; // this is a dot-file if (dot_search) // and we are searching for a dot dot_file_match = 1; // so this must be a match } } else if (d >= 'A' && d <= 'Z') d += 'a' - 'A'; // add 32 to downcase if (c == d) { found = 1; dot_search = 0; // calculate score double score_for_char = m->max_score_per_char; long distance = j - last_idx; if (distance > 1) { double factor = 1.0; char last = m->str_p[j - 1]; char curr = m->str_p[j]; // case matters, so get again if (last == '/') factor = 0.9; else if (last == '-' || last == '_' || last == ' ' || (last >= '0' && last <= '9')) factor = 0.8; else if (last >= 'a' && last <= 'z' && curr >= 'A' && curr <= 'Z') factor = 0.8; else if (last == '.') factor = 0.7; else // if no "special" chars behind char, factor diminishes // as distance from last matched char increases factor = (1.0 / distance) * 0.75; score_for_char *= factor; } if (++j < m->str_len) { // bump cursor one char to the right and // use recursion to try and find a better match double sub_score = recursive_match(m, j, i, last_idx, score); if (sub_score > seen_score) seen_score = sub_score; } score += score_for_char; last_idx = str_idx++; break; } } if (!found) return 0.0; } if (m->dot_file) { if (m->never_show_dot_files || (!dot_file_match && !m->always_show_dot_files)) return 0.0; } return (score > seen_score) ? score : seen_score; } // Match.new abbrev, string, options = {} VALUE CommandTMatch_initialize(int argc, VALUE *argv, VALUE self) { // process arguments: 2 mandatory, 1 optional VALUE str, abbrev, options; if (rb_scan_args(argc, argv, "21", &str, &abbrev, &options) == 2) options = Qnil; str = StringValue(str); abbrev = StringValue(abbrev); // already downcased by caller // check optional options hash for overrides VALUE always_show_dot_files = CommandT_option_from_hash("always_show_dot_files", options); VALUE never_show_dot_files = CommandT_option_from_hash("never_show_dot_files", options); matchinfo_t m; m.str_p = RSTRING_PTR(str); m.str_len = RSTRING_LEN(str); m.abbrev_p = RSTRING_PTR(abbrev); m.abbrev_len = RSTRING_LEN(abbrev); m.max_score_per_char = (1.0 / m.str_len + 1.0 / m.abbrev_len) / 2; m.dot_file = 0; m.always_show_dot_files = always_show_dot_files == Qtrue; m.never_show_dot_files = never_show_dot_files == Qtrue; // calculate score double score = 1.0; if (m.abbrev_len == 0) // special case for zero-length search string { // filter out dot files if (!m.always_show_dot_files) { for (long i = 0; i < m.str_len; i++) { char c = m.str_p[i]; if (c == '.' && (i == 0 || m.str_p[i - 1] == '/')) { score = 0.0; break; } } } } else // normal case score = recursive_match(&m, 0, 0, 0, 0.0); // clean-up and final book-keeping rb_iv_set(self, "@score", rb_float_new(score)); rb_iv_set(self, "@str", str); return Qnil; } VALUE CommandTMatch_matches(VALUE self) { double score = NUM2DBL(rb_iv_get(self, "@score")); return score > 0 ? Qtrue : Qfalse; } VALUE CommandTMatch_to_s(VALUE self) { return rb_iv_get(self, "@str"); }