Use heuristic check to determine binary files based on null bytes

The git adapter currently has a bug that denies to annotate regular
text files when the author name has an umlaut.

This commit changes the binary file detection by simply counting null
bytes. This may not detect all binary files correctly, but that isn't a
requirement.

The original check was arguably used to avoid rendering large binary
files in the browser.

Relevant bug report: https://community.openproject.org/work_packages/20883
pull/3296/head
Oliver Günther 9 years ago
parent e562fde587
commit 2b0ac5f86e
  1. 17
      lib/open_project/scm/adapters/git.rb
  2. 3
      spec/legacy/unit/lib/redmine/scm/adapters/git_adapter_spec.rb

@ -296,23 +296,20 @@ module OpenProject
def annotate(path, identifier = nil)
identifier = 'HEAD' if identifier.blank?
args = %w|blame|
args = %w|blame --encoding=UTF-8|
args << '-p' << identifier << '--' << scm_encode(@path_encoding, 'UTF-8', path)
blame = Annotate.new
content = capture_git(args, binmode: true)
content = capture_git(args)
if content.respond_to?(:force_encoding) &&
(content.dup.force_encoding('UTF-8') != content.dup.force_encoding('BINARY'))
# Ruby 1.9.2
# TODO: need to handle edge cases of non-binary content that isn't UTF-8
return nil
end
# Deny to parse large binary files
# Quick test for null bytes, this may not match all files,
# but should be a reasonable workaround
return nil if content.dup.force_encoding('BINARY').count("\x00") > 0
identifier = ''
# git shows commit author on the first occurrence only
authors_by_commit = {}
content.split("\n").each do |line|
content.scrub.split("\n").each do |line|
if line =~ /^([0-9a-f]{39,40})\s.*/
identifier = $1
elsif line =~ /^author (.+)/

@ -169,8 +169,7 @@ describe OpenProject::Scm::Adapters::Git, type: :model do
assert_equal '2010-09-18 19:59:46'.to_time, last_rev.time
end
# TODO: need to handle edge cases of non-binary content that isn't UTF-8
xit 'test latin 1 path' do
it 'test latin 1 path' do
if WINDOWS_PASS
#
else

Loading…
Cancel
Save