Gem name changed from ‚text_extractor‘ to ‚plaintext‘

pull/6038/head
Wieland Lindenthal 7 years ago
parent 68d4f36a15
commit 90ba46d406
  1. 2
      .gitignore
  2. 3
      Gemfile
  3. 10
      Gemfile.lock
  4. 12
      app/controllers/admin_controller.rb
  5. 2
      app/workers/extract_fulltext_job.rb
  6. 4
      config/initializers/plaintext.rb
  7. 38
      config/plaintext.yml
  8. 4
      spec/features/work_packages/table/queries/filter_spec.rb
  9. 4
      spec/workers/extract_fulltext_job_spec.rb

2
.gitignore vendored

@ -90,5 +90,5 @@ npm-debug.log*
/frontend/dist/
/frontend/tests/*.gif
node_modules/
text_extractor.yml
plaintext.yml
structure.sql

@ -163,8 +163,7 @@ gem 'aws-sdk', '~> 2.10.1'
gem 'openproject-token', '~> 1.0.1'
gem 'text-extractor', '0.1.0'
gem 'plaintext', '0.1.0'
group :test do
gem 'rack-test', '~> 0.6.3'

@ -383,6 +383,10 @@ GEM
rake (>= 0.8.1)
pdf-core (0.7.0)
pg (0.21.0)
plaintext (0.1.0)
activesupport (> 2.2.1)
nokogiri (~> 1.8.1)
rubyzip (~> 1.2.1)
powerpack (0.1.1)
prawn (2.2.2)
pdf-core (~> 0.7.0)
@ -559,10 +563,6 @@ GEM
sys-filesystem (1.1.8)
ffi
test-prof (0.1.0)
text-extractor (0.1.0)
activesupport (> 2.2.1)
nokogiri (~> 1.8.1)
rubyzip (~> 1.2.1)
thin (1.7.2)
daemons (~> 1.0, >= 1.0.9)
eventmachine (~> 1.0, >= 1.0.4)
@ -674,6 +674,7 @@ DEPENDENCIES
parallel_tests (~> 2.14.0)
passenger
pg (~> 0.21.0)
plaintext (= 0.1.0)
prawn (~> 2.2)
prawn-table (~> 0.2.2)
pry-byebug (~> 3.4.2)
@ -720,7 +721,6 @@ DEPENDENCIES
syck (~> 1.3.0)
sys-filesystem (~> 1.1.4)
test-prof
text-extractor (= 0.1.0)
thin (~> 1.7.2)
timecop (~> 0.9.0)
transactional_lock!

@ -77,12 +77,12 @@ class AdminController < ApplicationController
@checklist = [
[:text_default_administrator_account_changed, User.default_admin_account_changed?],
[:text_file_repository_writable, repository_writable],
[:'extraction.available.pdftotext', TextExtractor::PdfHandler.available?],
[:'extraction.available.unrtf', TextExtractor::RtfHandler.available?],
[:'extraction.available.catdoc', TextExtractor::DocHandler.available?],
[:'extraction.available.xls2csv', TextExtractor::XlsHandler.available?],
[:'extraction.available.catppt', TextExtractor::PptHandler.available?],
[:'extraction.available.tesseract', TextExtractor::ImageHandler.available?]
[:'extraction.available.pdftotext', Plaintext::PdfHandler.available?],
[:'extraction.available.unrtf', Plaintext::RtfHandler.available?],
[:'extraction.available.catdoc', Plaintext::DocHandler.available?],
[:'extraction.available.xls2csv', Plaintext::XlsHandler.available?],
[:'extraction.available.catppt', Plaintext::PptHandler.available?],
[:'extraction.available.tesseract', Plaintext::ImageHandler.available?]
]
@storage_information = OpenProject::Storage.mount_information

@ -55,7 +55,7 @@ class ExtractFulltextJob < ApplicationJob
begin
if @attachment.readable?
resolver = TextExtractor::Resolver.new(@file, @attachment.content_type)
resolver = Plaintext::Resolver.new(@file, @attachment.content_type)
@text = resolver.text
end
rescue => e

@ -27,8 +27,8 @@
# See doc/COPYRIGHT.rdoc for more details.
#++
file_name = File.join([Rails.root.to_s, 'config', 'text_extractor.yml'])
file_name = File.join([Rails.root.to_s, 'config', 'plaintext.yml'])
if File.file?(file_name)
config_file = File.read(file_name)
TextExtractor::Configuration.load(config_file)
Plaintext::Configuration.load(config_file)
end

@ -0,0 +1,38 @@
# Text extraction helper programs.
#
# commands should write the resulting plain text to STDOUT. Use __FILE__ as
# placeholder for the file path. The values below are the defaults.
# apt install poppler-utils
pdftotext:
- /usr/local/bin/pdftotext
- -enc
- UTF-8
- __FILE__
- '-'
# apt install unrtf
unrtf:
- /usr/local/bin/unrtf
- --text
- __FILE__
tesseract:
- /usr/local/bin/tesseract
- __FILE__
- stdout
# apt install catdoc
catdoc:
- /usr/bin/textutil
- -convert
- txt
- -stdout
- __FILE__
# xls2csv:
# - /usr/bin/xls2csv
# - -dutf-8
# - __FILE__
# catppt:
# - /usr/bin/catppt
# - -dutf-8
# - __FILE__

@ -263,10 +263,10 @@ describe 'filter work packages', js: true do
allow(EnterpriseToken).to receive(:allows_to?).and_return(false)
allow(EnterpriseToken).to receive(:allows_to?).with(:attachment_filters).and_return(true)
allow_any_instance_of(TextExtractor::Resolver).to receive(:text).and_return('I am the first text $1.99.')
allow_any_instance_of(Plaintext::Resolver).to receive(:text).and_return('I am the first text $1.99.')
wp_with_attachment_a
ExtractFulltextJob.new(attachment_a.id).perform
allow_any_instance_of(TextExtractor::Resolver).to receive(:text).and_return('I am the second text.')
allow_any_instance_of(Plaintext::Resolver).to receive(:text).and_return('I am the second text.')
wp_with_attachment_b
ExtractFulltextJob.new(attachment_b.id).perform
wp_without_attachment

@ -38,7 +38,7 @@ describe ExtractFulltextJob, type: :job do
context "with successful text extraction" do
before do
allow_any_instance_of(TextExtractor::Resolver).to receive(:text).and_return(text)
allow_any_instance_of(Plaintext::Resolver).to receive(:text).and_return(text)
end
context 'attachment is readable' do
@ -91,7 +91,7 @@ describe ExtractFulltextJob, type: :job do
let(:logger) { Rails.logger }
before do
allow_any_instance_of(TextExtractor::Resolver).to receive(:text).and_raise(exception_message)
allow_any_instance_of(Plaintext::Resolver).to receive(:text).and_raise(exception_message)
# This line is actually part of the test. `expect` call needs to go so far up here, as we want to verify that a message gets logged.
expect(logger).to receive(:error).with(exception_message)

Loading…
Cancel
Save