Compare commits

...

2 Commits

Author SHA1 Message Date
Junegunn Choi
c980f9d3ce Fix test-case and improve command-line interface 2014-04-21 23:29:03 +09:00
Junegunn Choi
501af62661 Parallelize search using multiple processes
This commit is a proof-of-concept implementation of parallelized search
to take advantage of multiple cores of the system. A multi-threaded
program written in Ruby cannot saturate more than a single core of CPU
because of GIL. So the only way we can use multiple cores is to spawn
multiple processes, split the work, and merge the partial results. The
result of each child process is passed to the parent via pipe in a
serialized form. This serialization and deserialization of the result is
the unavoidable overhead of using multiple processes instead of multiple
threads. Unfortunately, Ruby's default serialization method,
Marshal.dump is found to be so slow that it often shadows the gain
especially when the selectivity of the query is not very good. So in
this implementation I chose to use MessagePack which is at least 5-times
faster than Marshal. However, this will make installing fzf much
trickier for many people. So I'm not sure if I want to merge this code
into the master branch considering that fzf is already pretty fast in
most cases thanks to its caching scheme and the benefit of
parallelization is only noticeable when the list has more than 50k
items.
2014-04-21 02:44:26 +09:00
2 changed files with 249 additions and 90 deletions

224
fzf
View File

@ -7,7 +7,7 @@
# / __/ / /_/ __/
# /_/ /___/_/ Fuzzy finder for your shell
#
# Version: 0.8.3 (April 3, 2014)
# Version: 0.9.0-alpha (April 21, 2014)
#
# Author: Junegunn Choi
# URL: https://github.com/junegunn/fzf
@ -74,6 +74,35 @@ class FZF
end
end
# TODO
def cores?
if File.readable? '/proc/cpuinfo'
cores = Hash.new { |h, k| h[k] = Set.new }
proc_id = nil
set = Set.new
File.readlines('/proc/cpuinfo').each do |line|
case line
when /physical id/
if proc_id != line
cores[line] = set
set = Set.new
end
cores[proc_id = line] = set
when /core id/
set << line
end
end
cores.values.inject(0) { |sum, s| sum + (s.length || 1) }
else
cores = `sysctl -n hw.physicalcpu 2> /dev/null`.chomp
if $?.exitstatus == 0
cores.to_i
else
1
end
end
end
def initialize argv, source = $stdin
@rxflag = nil
@sort = ENV.fetch('FZF_DEFAULT_SORT', 1000).to_i
@ -88,6 +117,9 @@ class FZF
@filter = nil
@nth = nil
@delim = nil
@pids = []
@procs = nil
@par_min = 10000
argv =
if opts = ENV['FZF_DEFAULT_OPTS']
@ -148,11 +180,28 @@ class FZF
@sort = $1.to_i
when '-e', '--extended-exact' then @extended = :exact
when '+e', '--no-extended-exact' then @extended = nil
when '-p', '--parallel'
usage 1, 'number of processes required' unless pnum = argv.shift
@procs = pnum.to_i
when /^-p([1-9][0-9]*)$/, /^--parallel=([1-9][0-9]*)$/
@procs = $1.to_i
when '--parallel-min'
usage 1, 'number required' unless pmin = argv.shift
@par_min = pmin.to_i
when /^--parallel-min=([1-9][0-9]*)$/
@par_min = $1.to_i
else
usage 1, "illegal option: #{o}"
end
end
begin
require 'msgpack'
@procs ||= cores?
rescue LoadError
@procs = nil
end
@source = source.clone
@mtx = Mutex.new
@cv = ConditionVariable.new
@ -223,7 +272,7 @@ class FZF
end
def filter_list list
matches = matcher.match(list, @filter, '', '')
matches = matcher.match(list, @filter)
if @sort && matches.length <= @sort
matches = FZF.sort(matches)
end
@ -281,6 +330,10 @@ class FZF
-n, --nth=[-]N[,..] Comma-separated list of field indexes for limiting
search scope (positive or negative integers)
-d, --delimiter=STR Field delimiter regex for --nth (default: AWK-style)
-p, --parallel=N Number of processes for parallel search
(default: auto-detect)
--parallel-min=N Minimum number of items to start parallel search
(default: 10000)
Search result
-s, --sort=MAX Maximum number of matched items to sort (default: 1000)
@ -667,6 +720,100 @@ class FZF
end
end
def cached lists, q, prefix, suffix
cnt = 0
cached = lists.inject({}) do |sum, l|
cached = matcher.cached(l, q, prefix, suffix)
cnt += cached ? cached.length : l.length
sum[l] = cached
sum
end
[cnt, cached]
end
def search lists, q, cx
cache_count, cached = cached(lists, q, q[0, cx], q[cx..-1])
if !@procs || @procs <= 1 || lists.empty? || lists.length < @procs || cache_count < @par_min
search_sequential lists, q, cached
else
search_parallel lists, q, cached
end
end
def search_sequential lists, q, cached
progress = 0
started_at = Time.now
found = []
skip = false
cnt = 0
lists.each do |list|
cnt += list.length
skip = @mtx.synchronize { @events[:key] }
break if skip
if (progress = 100 * cnt / @count.get) < 100 && Time.now - started_at > 0.5
render { print_info " (#{progress}%)" }
end
matches = matcher.match(list, q, cached[list])
matcher.cache list, q, matches
found.concat matches
end
return :skip if skip
found
end
def search_parallel lists, q, cached
list_map = lists.inject({}) { |h, l| h[l.object_id] = l; h }
slice_size = lists.length / @procs
slices = lists.each_slice(slice_size)
render { print_info " (#{@procs}x)" }
triples = slices.map do |lists|
read, write = IO.pipe
[fork do
read.close
running = true
Signal.trap('USR1') do
running = false
end
matches = {}
lists.each do |list|
break unless running
matches[list.object_id] = matcher.match(list, q, cached[list])
end
write << MessagePack.pack(matches)
exit! running
end, read, write]
end
matches = []
@pids = triples.map { |t| t.first }
mutex = Mutex.new
skip = false
triples.map { |pid, read, write|
Thread.new do
write.close
result = read.read
_, status = Process.wait2(pid)
skip = status.exitstatus != 0
MessagePack.unpack(result).each do |list_object_id, data|
mutex.synchronize do
matches.concat data
end
matcher.cache list_map[list_object_id], q, data
end
end
}.each(&:join)
skip ? :skip : matches
end
def signal_children
while pid = @pids.pop
Process.kill 'USR1', pid rescue nil
end
end
def start_search &callback
Thread.new do
lists = []
@ -704,39 +851,26 @@ class FZF
new_search = events[:key] || events.delete(:new)
user_input = events[:key]
progress = 0
started_at = Time.now
if updated = new_search && !lists.empty?
q, cx = events.delete(:key) || [q, 0]
empty = matcher.empty?(q)
unless matches = fcache[q]
found = []
skip = false
cnt = 0
lists.each do |list|
cnt += list.length
skip = @mtx.synchronize { @events[:key] }
break if skip
if !empty && (progress = 100 * cnt / @count.get) < 100 && Time.now - started_at > 0.5
render { print_info " (#{progress}%)" }
end
found.concat(q.empty? ? list :
matcher.match(list, q, q[0, cx], q[cx..-1]))
end
next if skip
matches = @sort ? found : found.reverse
if !empty && @sort && matches.length <= @sort
# Simply concats the list
if matcher.empty?(q)
matches = lists.inject([]) { |cc, l| cc.concat l }
else
matches ||= search(lists, q, cx)
next if matches == :skip
matches = @sort ? matches : matches.reverse
if @sort && matches.length <= @sort
matches = FZF.sort(matches)
end
fcache[q] = matches
end
end
# Atomic update
@matches.set matches
end#new_search
@matches.set fcache[q] = matches
end
callback = nil if callback &&
(updated || events[:loaded]) &&
@ -1083,7 +1217,10 @@ class FZF
upd = actions.fetch(key, actions[:default]).call(key)
# Dispatch key event
emit(:key) { [@query.set(input.dup), cursor] } if upd
if upd
signal_children
emit(:key) { [@query.set(input.dup), cursor] }
end
end
end
ensure
@ -1176,9 +1313,7 @@ class FZF
end
end
def match list, q, prefix, suffix
regexp = fuzzy_regex q
def cached list, q, prefix, suffix
cache = @caches[list.object_id]
prefix_cache = nil
(prefix.length - 1).downto(1) do |len|
@ -1190,10 +1325,17 @@ class FZF
break if suffix_cache = cache[suffix[idx..-1]]
end unless suffix.empty?
partial_cache = [prefix_cache,
suffix_cache].compact.sort_by { |e| e.length }.first
cache[q] ||= (partial_cache ?
partial_cache.map { |e| e.first } : list).map { |line|
[prefix_cache, suffix_cache].compact.sort_by { |e| e.length }.first
end
def cache list, q, data
@caches[list.object_id][q] = data
end
def match list, q, partial_cache = nil
regexp = fuzzy_regex q
(partial_cache ? partial_cache.map { |e| e.first } : list).map { |line|
# Ignore errors: e.g. invalid byte sequence in UTF-8
md = do_match(line, regexp)
md && [line, [md.offset(0)]]
@ -1249,7 +1391,11 @@ class FZF
Regexp.new(sanitize(Regexp.escape(w)), rxflag_for(w))
end
def match list, q, prefix, suffix
def cache list, q, data
@caches[list.object_id][Set[parse q]] = data
end
def cached list, q, prefix, suffix
regexps = parse q
# Look for prefix cache
cache = @caches[list.object_id]
@ -1258,10 +1404,12 @@ class FZF
(prefix.length - 1).downto(1) do |len|
break if prefix_cache = cache[Set[@regexps[prefix[0, len]]]]
end
prefix_cache
end
cache[Set[regexps]] ||= (prefix_cache ?
prefix_cache.map { |e| e.first } :
list).map { |line|
def match list, q, partial_cache = nil
regexps = parse q
(partial_cache ? partial_cache.map { |e| e.first } : list).map { |line|
offsets = []
regexps.all? { |pair|
regexp, invert = pair

View File

@ -220,7 +220,10 @@ class TestFZF < MiniTest::Unit::TestCase
["juiceless", [[0, 1]]],
["juicily", [[0, 1]]],
["juiciness", [[0, 1]]],
["juicy", [[0, 1]]]], matcher.match(list, 'j', '', '').sort)
["juicy", [[0, 1]]]], matcher.match(list, 'j').sort)
assert matcher.caches.empty?
matcher.cache list, 'j', matcher.match(list, 'j')
assert !matcher.caches.empty?
assert_equal [list.object_id], matcher.caches.keys
assert_equal 1, matcher.caches[list.object_id].length
@ -228,11 +231,13 @@ class TestFZF < MiniTest::Unit::TestCase
assert_equal(
[["juicily", [[0, 5]]],
["juiciness", [[0, 5]]]], matcher.match(list, 'jii', '', '').sort)
["juiciness", [[0, 5]]]], matcher.match(list, 'jii').sort)
matcher.cache list, 'jii', matcher.match(list, 'jii')
assert_equal(
[["juicily", [[2, 5]]],
["juiciness", [[2, 5]]]], matcher.match(list, 'ii', '', '').sort)
["juiciness", [[2, 5]]]], matcher.match(list, 'ii').sort)
matcher.cache list, 'ii', matcher.match(list, 'jii')
assert_equal 3, matcher.caches[list.object_id].length
assert_equal 2, matcher.caches[list.object_id]['ii'].length
@ -256,40 +261,40 @@ class TestFZF < MiniTest::Unit::TestCase
def test_fuzzy_matcher_case_sensitive
# Smart-case match (Uppercase found)
assert_equal [['Fruit', [[0, 5]]]],
FZF::FuzzyMatcher.new(nil).match(%w[Fruit Grapefruit], 'Fruit', '', '').sort
FZF::FuzzyMatcher.new(nil).match(%w[Fruit Grapefruit], 'Fruit').sort
# Smart-case match (Uppercase not-found)
assert_equal [["Fruit", [[0, 5]]], ["Grapefruit", [[5, 10]]]],
FZF::FuzzyMatcher.new(nil).match(%w[Fruit Grapefruit], 'fruit', '', '').sort
FZF::FuzzyMatcher.new(nil).match(%w[Fruit Grapefruit], 'fruit').sort
# Case-sensitive match (-i)
assert_equal [['Fruit', [[0, 5]]]],
FZF::FuzzyMatcher.new(0).match(%w[Fruit Grapefruit], 'Fruit', '', '').sort
FZF::FuzzyMatcher.new(0).match(%w[Fruit Grapefruit], 'Fruit').sort
# Case-insensitive match (+i)
assert_equal [["Fruit", [[0, 5]]], ["Grapefruit", [[5, 10]]]],
FZF::FuzzyMatcher.new(Regexp::IGNORECASE).
match(%w[Fruit Grapefruit], 'Fruit', '', '').sort
match(%w[Fruit Grapefruit], 'Fruit').sort
end
def test_extended_fuzzy_matcher_case_sensitive
%w['Fruit Fruit$].each do |q|
# Smart-case match (Uppercase found)
assert_equal [['Fruit', [[0, 5]]]],
FZF::ExtendedFuzzyMatcher.new(nil).match(%w[Fruit Grapefruit], q, '', '').sort
FZF::ExtendedFuzzyMatcher.new(nil).match(%w[Fruit Grapefruit], q).sort
# Smart-case match (Uppercase not-found)
assert_equal [["Fruit", [[0, 5]]], ["Grapefruit", [[5, 10]]]],
FZF::ExtendedFuzzyMatcher.new(nil).match(%w[Fruit Grapefruit], q.downcase, '', '').sort
FZF::ExtendedFuzzyMatcher.new(nil).match(%w[Fruit Grapefruit], q.downcase).sort
# Case-sensitive match (-i)
assert_equal [['Fruit', [[0, 5]]]],
FZF::ExtendedFuzzyMatcher.new(0).match(%w[Fruit Grapefruit], q, '', '').sort
FZF::ExtendedFuzzyMatcher.new(0).match(%w[Fruit Grapefruit], q).sort
# Case-insensitive match (+i)
assert_equal [["Fruit", [[0, 5]]], ["Grapefruit", [[5, 10]]]],
FZF::ExtendedFuzzyMatcher.new(Regexp::IGNORECASE).
match(%w[Fruit Grapefruit], q, '', '').sort
match(%w[Fruit Grapefruit], q).sort
end
end
@ -304,7 +309,8 @@ class TestFZF < MiniTest::Unit::TestCase
juicy
_juice]
match = proc { |q, prefix|
matcher.match(list, q, prefix, '').sort.map { |p| [p.first, p.last.sort] }
cached = matcher.cached(list, q, prefix, '')
matcher.match(list, q, cached).sort.map { |p| [p.first, p.last.sort] }
}
assert matcher.caches.empty?
@ -366,15 +372,20 @@ class TestFZF < MiniTest::Unit::TestCase
c.java$
d.java
]
match = lambda do |q, p|
cached = matcher.cached(list, q, p, '')
result = matcher.match(list, q, cached)
matcher.cache list, q, result
result
end
2.times do
assert_equal 5, matcher.match(list, 'java', 'java', '').length
assert_equal 3, matcher.match(list, 'java$', 'java$', '').length
assert_equal 1, matcher.match(list, 'java$$', 'java$$', '').length
assert_equal 0, matcher.match(list, '!java', '!java', '').length
assert_equal 4, matcher.match(list, '!^jav', '!^jav', '').length
assert_equal 4, matcher.match(list, '!^java', '!^java', '').length
assert_equal 2, matcher.match(list, '!^java !b !c', '!^java', '').length
assert_equal 5, match.call('java', 'java').length
assert_equal 3, match.call('java$', 'java$').length
assert_equal 1, match.call('java$$', 'java$$').length
assert_equal 0, match.call('!java', '!java').length
assert_equal 4, match.call('!^jav', '!^jav').length
assert_equal 4, match.call('!^java', '!^java').length
assert_equal 2, match.call('!^java !b !c', '!^java').length
end
end
@ -400,7 +411,7 @@ class TestFZF < MiniTest::Unit::TestCase
["0____1", [[0, 6]]],
["0_____1", [[0, 7]]],
["0______1", [[0, 8]]]],
FZF.sort(matcher.match(list, '01', '', '')))
FZF.sort(matcher.match(list, '01')))
assert_equal(
[["01", [[0, 1], [1, 2]]],
@ -411,7 +422,7 @@ class TestFZF < MiniTest::Unit::TestCase
["____0_1", [[4, 5], [6, 7]]],
["0______1", [[0, 1], [7, 8]]],
["___01___", [[3, 4], [4, 5]]]],
FZF.sort(xmatcher.match(list, '0 1', '', '')))
FZF.sort(xmatcher.match(list, '0 1')))
assert_equal(
[["_01_", [[1, 3], [0, 4]], [4, 4, "_01_"]],
@ -420,7 +431,7 @@ class TestFZF < MiniTest::Unit::TestCase
["0____1", [[0, 6], [1, 3]], [6, 6, "0____1"]],
["0_____1", [[0, 7], [1, 3]], [7, 7, "0_____1"]],
["0______1", [[0, 8], [1, 3]], [8, 8, "0______1"]]],
FZF.sort(xmatcher.match(list, '01 __', '', '')).map { |tuple|
FZF.sort(xmatcher.match(list, '01 __')).map { |tuple|
tuple << FZF.rank(tuple)
}
)
@ -433,16 +444,16 @@ class TestFZF < MiniTest::Unit::TestCase
extended-exact-mode-not-fuzzy
extended'-fuzzy-mode
]
assert_equal 2, fuzzy.match(list, 'extended', '', '').length
assert_equal 2, fuzzy.match(list, 'mode extended', '', '').length
assert_equal 2, fuzzy.match(list, 'xtndd', '', '').length
assert_equal 2, fuzzy.match(list, "'-fuzzy", '', '').length
assert_equal 2, fuzzy.match(list, 'extended').length
assert_equal 2, fuzzy.match(list, 'mode extended').length
assert_equal 2, fuzzy.match(list, 'xtndd').length
assert_equal 2, fuzzy.match(list, "'-fuzzy").length
assert_equal 2, exact.match(list, 'extended', '', '').length
assert_equal 2, exact.match(list, 'mode extended', '', '').length
assert_equal 0, exact.match(list, 'xtndd', '', '').length
assert_equal 1, exact.match(list, "'-fuzzy", '', '').length
assert_equal 2, exact.match(list, "-fuzzy", '', '').length
assert_equal 2, exact.match(list, 'extended').length
assert_equal 2, exact.match(list, 'mode extended').length
assert_equal 0, exact.match(list, 'xtndd').length
assert_equal 1, exact.match(list, "'-fuzzy").length
assert_equal 2, exact.match(list, "-fuzzy").length
end
if RUBY_PLATFORM =~ /darwin/
@ -472,16 +483,16 @@ class TestFZF < MiniTest::Unit::TestCase
def test_nfd_fuzzy_matcher
matcher = FZF::FuzzyMatcher.new 0
assert_equal [], matcher.match([NFD + NFD], '할', '', '')
match = matcher.match([NFD + NFD], '글글', '', '')
assert_equal [], matcher.match([NFD + NFD], '할')
match = matcher.match([NFD + NFD], '글글')
assert_equal [[NFD + NFD, [[3, 12]]]], match
assert_equal ['한글한글', [[1, 4]]], FZF::UConv.nfc(*match.first)
end
def test_nfd_extended_fuzzy_matcher
matcher = FZF::ExtendedFuzzyMatcher.new 0
assert_equal [], matcher.match([NFD], "'글글", '', '')
match = matcher.match([NFD], "'한글", '', '')
assert_equal [], matcher.match([NFD], "'글글")
match = matcher.match([NFD], "'한글")
assert_equal [[NFD, [[0, 6]]]], match
assert_equal ['한글', [[0, 2]]], FZF::UConv.nfc(*match.first)
end
@ -524,46 +535,46 @@ class TestFZF < MiniTest::Unit::TestCase
]
matcher = FZF::FuzzyMatcher.new Regexp::IGNORECASE
assert_equal list, matcher.match(list, 'f', '', '').map(&:first)
assert_equal list, matcher.match(list, 'f').map(&:first)
assert_equal [
[list[0], [[2, 5]]],
[list[1], [[9, 17]]]], matcher.match(list, 'is', '', '')
[list[1], [[9, 17]]]], matcher.match(list, 'is')
matcher = FZF::FuzzyMatcher.new Regexp::IGNORECASE, [2]
assert_equal [[list[1], [[8, 9]]]], matcher.match(list, 'f', '', '')
assert_equal [[list[0], [[8, 9]]]], matcher.match(list, 's', '', '')
assert_equal [[list[1], [[8, 9]]]], matcher.match(list, 'f')
assert_equal [[list[0], [[8, 9]]]], matcher.match(list, 's')
matcher = FZF::FuzzyMatcher.new Regexp::IGNORECASE, [3]
assert_equal [[list[0], [[19, 20]]]], matcher.match(list, 'r', '', '')
assert_equal [[list[0], [[19, 20]]]], matcher.match(list, 'r')
# Comma-separated
matcher = FZF::FuzzyMatcher.new Regexp::IGNORECASE, [3, 1]
assert_equal [[list[0], [[19, 20]]], [list[1], [[3, 4]]]], matcher.match(list, 'r', '', '')
assert_equal [[list[0], [[19, 20]]], [list[1], [[3, 4]]]], matcher.match(list, 'r')
# Ordered
matcher = FZF::FuzzyMatcher.new Regexp::IGNORECASE, [1, 3]
assert_equal [[list[0], [[3, 4]]], [list[1], [[3, 4]]]], matcher.match(list, 'r', '', '')
assert_equal [[list[0], [[3, 4]]], [list[1], [[3, 4]]]], matcher.match(list, 'r')
regex = FZF.build_delim_regex "\t"
matcher = FZF::FuzzyMatcher.new Regexp::IGNORECASE, [1], regex
assert_equal [[list[0], [[3, 10]]]], matcher.match(list, 're', '', '')
assert_equal [[list[0], [[3, 10]]]], matcher.match(list, 're')
matcher = FZF::FuzzyMatcher.new Regexp::IGNORECASE, [2], regex
assert_equal [], matcher.match(list, 'r', '', '')
assert_equal [[list[1], [[9, 17]]]], matcher.match(list, 'is', '', '')
assert_equal [], matcher.match(list, 'r')
assert_equal [[list[1], [[9, 17]]]], matcher.match(list, 'is')
# Negative indexing
matcher = FZF::FuzzyMatcher.new Regexp::IGNORECASE, [-1], regex
assert_equal [[list[0], [[3, 6]]]], matcher.match(list, 'rt', '', '')
assert_equal [[list[0], [[2, 5]]], [list[1], [[9, 17]]]], matcher.match(list, 'is', '', '')
assert_equal [[list[0], [[3, 6]]]], matcher.match(list, 'rt')
assert_equal [[list[0], [[2, 5]]], [list[1], [[9, 17]]]], matcher.match(list, 'is')
# Regex delimiter
regex = FZF.build_delim_regex "[ \t]+"
matcher = FZF::FuzzyMatcher.new Regexp::IGNORECASE, [1], regex
assert_equal [list[1]], matcher.match(list, 'f', '', '').map(&:first)
assert_equal [list[1]], matcher.match(list, 'f').map(&:first)
matcher = FZF::FuzzyMatcher.new Regexp::IGNORECASE, [2], regex
assert_equal [[list[0], [[1, 2]]], [list[1], [[8, 9]]]], matcher.match(list, 'f', '', '')
assert_equal [[list[0], [[1, 2]]], [list[1], [[8, 9]]]], matcher.match(list, 'f')
end
def stream_for str
@ -659,7 +670,7 @@ class TestFZF < MiniTest::Unit::TestCase
assert_equal [
['1 2 3 4', [[0, 13], [16, 22]]],
['1 3 4 2', [[0, 24], [12, 17]]],
], FZF.sort(FZF::ExtendedFuzzyMatcher.new(nil).match(list, '12 34', '', ''))
], FZF.sort(FZF::ExtendedFuzzyMatcher.new(nil).match(list, '12 34'))
end
end