Remove UTF-8 NFD conversion

We have iconv.
2014-05-29 01:08:44 +09:00 · 2014-05-29 01:08:44 +09:00 · edcd7c6aa6
commit edcd7c6aa6
parent b0fdd6db99
3 changed files with 15 additions and 170 deletions
--- a/README.md
+++ b/README.md
@ -549,6 +549,13 @@ fzf works on [Cygwin](http://www.cygwin.com/) and
 [MSYS2](http://sourceforge.net/projects/msys2/). You may need to use `--black`
 option on MSYS2 to avoid rendering issues.

+### Handling UTF-8 NFD paths on OSX
+
+Use iconv to convert NFD paths to NFC:
+
+```sh
+find . | iconv -f utf-8-mac -t utf8//ignore | fzf
+```

 License
 -------
--- a/129
+++ b/129
@ -7,7 +7,7 @@
 #  / __/ / /_/ __/
 # /_/   /___/_/    Fuzzy finder for your shell
 #
-# Version: 0.8.5 (May 21, 2014)
+# Version: 0.8.5 (May 29, 2014)
 #
 # Author:  Junegunn Choi
 # URL:     https://github.com/junegunn/fzf
@ -309,121 +309,6 @@ class FZF
    exit x
  end

-  case RUBY_PLATFORM
-  when /darwin/
-    module UConv
-      CHOSUNG   = 0x1100
-      JUNGSUNG  = 0x1161
-      JONGSUNG  = 0x11A7
-      CHOSUNGS  = 19
-      JUNGSUNGS = 21
-      JONGSUNGS = 28
-      JJCOUNT   = JUNGSUNGS * JONGSUNGS
-      NFC_BEGIN = 0xAC00
-      NFC_END   = NFC_BEGIN + CHOSUNGS * JUNGSUNGS * JONGSUNGS
-
-      def self.nfd str
-        str.split(//).map do |c|
-          cp = c.ord
-          if cp >= NFC_BEGIN && cp < NFC_END
-            chr = ''
-            idx  = cp - NFC_BEGIN
-            cho  = CHOSUNG  + idx / JJCOUNT
-            jung = JUNGSUNG + (idx % JJCOUNT) / JONGSUNGS
-            jong = JONGSUNG + idx % JONGSUNGS
-            chr << cho << jung
-            chr << jong if jong != JONGSUNG
-            chr
-          else
-            c
-          end
-        end
-      end
-
-      def self.to_nfc arr
-        [NFC_BEGIN + arr[0] * JJCOUNT +
-         (arr[1] || 0) * JONGSUNGS +
-         (arr[2] || 0)].pack('U*')
-      end
-
-      if String.method_defined?(:each_char)
-        def self.split str
-          str.each_char.to_a
-        end
-      else
-        def self.split str
-          str.split('')
-        end
-      end
-
-      def self.nfc str, offsets = []
-        ret  = ''
-        omap = []
-        pend = []
-        split(str).each_with_index do |c, idx|
-          cp =
-            begin
-              c.ord
-            rescue Exception
-              next
-            end
-          omap << ret.length
-          unless pend.empty?
-            if cp >= JUNGSUNG && cp < JUNGSUNG + JUNGSUNGS
-              pend << cp - JUNGSUNG
-              next
-            elsif cp >= JONGSUNG && cp < JONGSUNG + JONGSUNGS
-              pend << cp - JONGSUNG
-              next
-            else
-              omap[-1] = omap[-1] + 1
-              ret << to_nfc(pend)
-              pend.clear
-            end
-          end
-          if cp >= CHOSUNG && cp < CHOSUNG + CHOSUNGS
-            pend << cp - CHOSUNG
-          else
-            ret << c
-          end
-        end
-        ret << to_nfc(pend) unless pend.empty?
-        return [ret,
-                offsets.map { |pair|
-                  b, e = pair
-                  [omap[b] || 0, omap[e] || ((omap.last || 0) + 1)] }]
-      end
-    end
-
-    def convert_item item
-      UConv.nfc(*item)
-    end
-
-    class Matcher
-      def query_chars q
-        UConv.nfd(q)
-      end
-
-      def sanitize q
-        UConv.nfd(q).join
-      end
-    end
-  else
-    def convert_item item
-      item
-    end
-
-    class Matcher
-      def query_chars q
-        q.split(//)
-      end
-
-      def sanitize q
-        q
-      end
-    end
-  end
-
  def emit event
    @mtx.synchronize do
      @events[event] = yield
@ -792,7 +677,7 @@ class FZF
        row           = cursor_y(idx + 2)
        chosen        = idx == vcursor
        selected      = @selects.include?([*item][0])
-        line, offsets = convert_item item
+        line, offsets = item
        tokens        = format line, maxc, offsets
        print_item row, tokens, chosen, selected
      end
@ -1176,7 +1061,7 @@ class FZF
    def fuzzy_regex q
      @regexp[q] ||= begin
        q = q.downcase if @rxflag == Regexp::IGNORECASE
-        Regexp.new(query_chars(q).inject('') { |sum, e|
+        Regexp.new(q.split(//).inject('') { |sum, e|
          e = Regexp.escape e
          sum << (e.length > 1 ? "(?:#{e}).*?" :  # FIXME: not equivalent
                                 "#{e}[^#{e}]*?")
@ -1234,7 +1119,7 @@ class FZF
            when ''
              nil
            when /^\^(.*)\$$/
-              Regexp.new('^' << sanitize(Regexp.escape($1)) << '$', rxflag_for(w))
+              Regexp.new('^' << Regexp.escape($1) << '$', rxflag_for(w))
            when /^'/
              if @mode == :fuzzy && w.length > 1
                exact_regex w[1..-1]
@ -1243,10 +1128,10 @@ class FZF
              end
            when /^\^/
              w.length > 1 ?
-                Regexp.new('^' << sanitize(Regexp.escape(w[1..-1])), rxflag_for(w)) : nil
+                Regexp.new('^' << Regexp.escape(w[1..-1]), rxflag_for(w)) : nil
            when /\$$/
              w.length > 1 ?
-                Regexp.new(sanitize(Regexp.escape(w[0..-2])) << '$', rxflag_for(w)) : nil
+                Regexp.new(Regexp.escape(w[0..-2]) << '$', rxflag_for(w)) : nil
            else
              @mode == :fuzzy ? fuzzy_regex(w) : exact_regex(w)
            end, invert ]
@ -1254,7 +1139,7 @@ class FZF
    end

    def exact_regex w
-      Regexp.new(sanitize(Regexp.escape(w)), rxflag_for(w))
+      Regexp.new(Regexp.escape(w), rxflag_for(w))
    end

    def match list, q, prefix, suffix
--- a/test/test_fzf.rb
+++ b/test/test_fzf.rb
@ -450,58 +450,11 @@ class TestFZF < MiniTest::Unit::TestCase
    assert_equal 2, exact.match(list, "-fuzzy", '', '').length
  end

-  if RUBY_PLATFORM =~ /darwin/
-    NFD = '한글'
-    def test_nfc
-      assert_equal 6, NFD.length
-      assert_equal ["한글", [[0, 1], [1, 2]]],
-        FZF::UConv.nfc(NFD, [[0, 3], [3, 6]])
-
-      nfd2 = 'before' + NFD + 'after'
-      assert_equal 6 + 6 + 5, nfd2.length
-
-      nfc, offsets = FZF::UConv.nfc(nfd2, [[4, 14], [9, 13]])
-      o1, o2 = offsets
-      assert_equal 'before한글after', nfc
-      assert_equal 're한글af',        nfc[(o1.first...o1.last)]
-      assert_equal '글a',             nfc[(o2.first...o2.last)]
-    end
-
-    def test_nfd
-      nfc = '한글'
-      nfd = FZF::UConv.nfd(nfc)
-      assert_equal 2, nfd.length
-      assert_equal 6, nfd.join.length
-      assert_equal NFD, nfd.join
-    end
-
-    def test_nfd_fuzzy_matcher
-      matcher = FZF::FuzzyMatcher.new 0
-      assert_equal [], matcher.match([NFD + NFD], '할', '', '')
-      match   = matcher.match([NFD + NFD], '글글', '', '')
-      assert_equal [[NFD + NFD, [[3, 12]]]], match
-      assert_equal ['한글한글', [[1, 4]]], FZF::UConv.nfc(*match.first)
-    end
-
-    def test_nfd_extended_fuzzy_matcher
-      matcher = FZF::ExtendedFuzzyMatcher.new 0
-      assert_equal [], matcher.match([NFD], "'글글", '', '')
-      match   = matcher.match([NFD], "'한글", '', '')
-      assert_equal [[NFD, [[0, 6]]]], match
-      assert_equal ['한글', [[0, 2]]], FZF::UConv.nfc(*match.first)
-    end
-  end
-
-  def test_split
-    assert_equal ["a", "b", "c", "\xFF", "d", "e", "f"],
-      FZF::UConv.split("abc\xFFdef")
-  end
-
  # ^$ -> matches empty item
  def test_format_empty_item
    fzf = FZF.new []
    item = ['', [[0, 0]]]
-    line, offsets = fzf.convert_item item
+    line, offsets = item
    tokens        = fzf.format line, 80, offsets
    assert_equal [], tokens
  end