Use heuristic check to determine binary files based on null bytes

The git adapter currently has a bug that denies to annotate regular text files when the author name has an umlaut. This commit changes the binary file detection by simply counting null bytes. This may not detect all binary files correctly, but that isn't a requirement. The original check was arguably used to avoid rendering large binary files in the browser. Relevant bug report: https://community.openproject.org/work_packages/20883
9 years ago · 2b0ac5f86e
parent e562fde587
commit 2b0ac5f86e
2 changed files with 8 additions and 12 deletions
--- a/lib/open_project/scm/adapters/git.rb
+++ b/lib/open_project/scm/adapters/git.rb
@ -296,23 +296,20 @@ module OpenProject

        def annotate(path, identifier = nil)
          identifier = 'HEAD' if identifier.blank?
-          args = %w|blame|
+          args = %w|blame --encoding=UTF-8|
          args << '-p' << identifier << '--' << scm_encode(@path_encoding, 'UTF-8', path)
          blame = Annotate.new
-          content = capture_git(args, binmode: true)
+          content = capture_git(args)

-          if content.respond_to?(:force_encoding) &&
-             (content.dup.force_encoding('UTF-8') != content.dup.force_encoding('BINARY'))
-
-            # Ruby 1.9.2
-            # TODO: need to handle edge cases of non-binary content that isn't UTF-8
-            return nil
-          end
+          # Deny to parse large binary files
+          # Quick test for null bytes, this may not match all files,
+          # but should be a reasonable workaround
+          return nil if content.dup.force_encoding('BINARY').count("\x00") > 0

          identifier = ''
          # git shows commit author on the first occurrence only
          authors_by_commit = {}
-          content.split("\n").each do |line|
+          content.scrub.split("\n").each do |line|
            if line =~ /^([0-9a-f]{39,40})\s.*/
              identifier = $1
            elsif line =~ /^author (.+)/
--- a/spec/legacy/unit/lib/redmine/scm/adapters/git_adapter_spec.rb
+++ b/spec/legacy/unit/lib/redmine/scm/adapters/git_adapter_spec.rb
@ -169,8 +169,7 @@ describe OpenProject::Scm::Adapters::Git, type: :model do
    assert_equal '2010-09-18 19:59:46'.to_time, last_rev.time
  end

-  # TODO: need to handle edge cases of non-binary content that isn't UTF-8
-  xit 'test latin 1 path' do
+  it 'test latin 1 path' do
    if WINDOWS_PASS
      #
    else