diff --git a/.env.template b/.env.template index 4a2573584b5b74c7e0e9f7a510e5c9e0d2be7e2d..1470bdb98fe5df76c25c39e3028d29236c44dd24 100644 --- a/.env.template +++ b/.env.template @@ -100,3 +100,14 @@ SMTP_PORT= # Host used for generating URLs APP_HOST=localhost:3000 + +# S3 settings +USE_S3=false +S3_ENDPOINT=https://s3.location.example.com +S3_ACCESS_KEY= +S3_SECRET_KEY= +S3_REGION= +S3_FILE_UPLOAD_BUCKET= + +# RDMS collections +CRC_1280_COLLECTION='CRC 1280' \ No newline at end of file diff --git a/.env.template.development b/.env.template.development index aaa65a92a4d1f0fa0ecad30f56ae833ad8493dd6..5ccb94b11eb3249afd5a9cc5de8ce715ec215026 100644 --- a/.env.template.development +++ b/.env.template.development @@ -103,3 +103,14 @@ SMTP_PORT= # Host used for generating URLs APP_HOST=localhost:3000 + +# S3 settings +USE_S3=false +S3_ENDPOINT= +S3_ACCESS_KEY= +S3_SECRET_KEY= +S3_REGION= +S3_FILE_UPLOAD_BUCKET= + +# RDMS collections +CRC_1280_COLLECTION='CRC 1280' \ No newline at end of file diff --git a/hyrax/Gemfile b/hyrax/Gemfile index 7011cccf31b65c5b2cba39e74ced8f70b84a2b01..4651d3418f17a1af74f4410a6bcd92ef99d2fb9b 100644 --- a/hyrax/Gemfile +++ b/hyrax/Gemfile @@ -16,7 +16,8 @@ gem 'uglifier', '>= 1.3.0' # See https://github.com/rails/execjs#readme for more supported runtimes # gem 'mini_racer', platforms: :ruby gem 'therubyracer' - +# Use carrierwave-aws for upload file on aws s3 +gem 'carrierwave-aws' # Use CoffeeScript for .coffee assets and views gem 'coffee-rails', '~> 4.2' # Turbolinks makes navigating your web application faster. Read more: https://github.com/turbolinks/turbolinks @@ -90,3 +91,4 @@ gem 'riiif', '~> 2.3' gem 'rinku' gem 'coveralls', require: false gem 'database_cleaner' +gem 'aws-sdk-s3', '~> 1' diff --git a/hyrax/Gemfile.lock b/hyrax/Gemfile.lock index b2e360d693016bf9c53c2d306f3ee1e9fbdaa30a..7b1c1c451b153bd91a86a127bc5d3a12b195b39a 100644 --- a/hyrax/Gemfile.lock +++ b/hyrax/Gemfile.lock @@ -177,6 +177,9 @@ GEM activesupport (>= 4.0.0) mime-types (>= 1.16) ssrf_filter (~> 1.0) + carrierwave-aws (1.4.0) + aws-sdk-s3 (~> 1.0) + carrierwave (>= 0.7, < 2.1) childprocess (4.1.0) chromedriver-helper (2.1.1) archive-zip (~> 0.10) @@ -983,12 +986,14 @@ PLATFORMS ruby DEPENDENCIES + aws-sdk-s3 (~> 1) bootsnap (>= 1.1.0) bootstrap-datepicker-rails bootstrap-sass (~> 3.0) bulkrax byebug capybara (>= 2.15) + carrierwave-aws chromedriver-helper coffee-rails (~> 4.2) coveralls diff --git a/hyrax/app/actors/hyrax/actors/file_actor.rb b/hyrax/app/actors/hyrax/actors/file_actor.rb new file mode 100644 index 0000000000000000000000000000000000000000..e8859e67c9f1d2903c90535643eaf35ec386bc9a --- /dev/null +++ b/hyrax/app/actors/hyrax/actors/file_actor.rb @@ -0,0 +1,200 @@ +# frozen_string_literal: true + +module Hyrax + module Actors + # Actions for a file identified by file_set and relation (maps to use predicate) + # @note Spawns asynchronous jobs + class FileActor + attr_reader :file_set, :relation, :user, :use_valkyrie + + # @param [FileSet] file_set the parent FileSet + # @param [Symbol, #to_sym] relation the type/use for the file + # @param [User] user the user to record as the Agent acting upon the file + def initialize(file_set, relation, user) + @file_set = file_set + @relation = normalize_relation(relation) + @user = user + end + + # Persists file as part of file_set and spawns async job to characterize and create derivatives. + # @param [JobIoWrapper] io the file to save in the repository, with mime_type and original_name + # @return [CharacterizeJob, FalseClass] spawned job on success, false on failure + # @note Instead of calling this method, use IngestJob to avoid synchronous execution cost + # @see IngestJob + # @todo create a job to monitor the temp directory (or in a multi-worker system, directories!) to prune old files that have made it into the repo + def ingest_file(io) + io.uploaded_file.update(file_set_uri: file_set.uri.to_s) if io.uploaded_file.file_set_uri.blank? + + Hydra::Works::AddFileToFileSet.call(file_set, + alias_file(io, file_set), + relation, + versioning: false) + return false unless file_set.save + + repository_file = related_file + + create_version(repository_file, user) + set_characterization_data(file_set) + end + + # Reverts file and spawns async job to characterize and create derivatives. + # @param [String] revision_id + # @return [CharacterizeJob, FalseClass] spawned job on success, false on failure + def revert_to(revision_id) + repository_file = related_file + repository_file.restore_version(revision_id) + return false unless file_set.save + + create_version(repository_file, user) + CharacterizeJob.perform_later(file_set, repository_file.id) + end + + # @note FileSet comparison is limited to IDs, but this should be sufficient, given that + # most operations here are on the other side of async retrieval in Jobs (based solely on ID). + def ==(other) + return false unless other.is_a?(self.class) + + file_set.id == other.file_set.id && relation == other.relation && user == other.user + end + + private + + ## + # Wraps the verisoning service with erro handling. if the service's + # create handler isn't implemented, we want to accept that quietly here. + def create_version(content, user) + Hyrax::VersioningService.create(content, user) + rescue NotImplementedError + :no_op + end + + ## using this method to set the characterization data instead of running the + # actual characterization service + def set_characterization_data(file_set) + s3_file_url = file_set.characterization_proxy.content + path = URI.parse(Addressable::URI.unencode(s3_file_url)).path + file_name = path.split('/')[-1] + uploaded_file_id = path.split('/')[-2] + uploaded_file = Hyrax::UploadedFile.find(uploaded_file_id) + + file_set.characterization_proxy.file_name = file_name + file_set.characterization_proxy.original_name = file_name + file_set.characterization_proxy.mime_type = uploaded_file.characterization_data['content_type'] + file_set.characterization_proxy.file_size = uploaded_file.characterization_data['file_size'] + file_set.characterization_proxy.original_checksum = uploaded_file.characterization_data['original_checksum'] + file_set.characterization_proxy.format_label = [] + + if file_set.image? + file_set.characterization_proxy.height = uploaded_file.characterization_data['height'] + file_set.characterization_proxy.width = uploaded_file.characterization_data['width'] + + file_set.characterization_proxy.alpha_channels = channels(s3_file_url) if file_set.image? + end + + move_s3_object_to_work_bucket(uploaded_file, file_set) + + file_set.characterization_proxy.save! + + file_set.label = file_set.characterization_proxy.original_name + + file_set.save! + end + + ## + # @return [Hydra::PCDM::File] the file referenced by relation + def related_file + file_set.public_send(normalize_relation(relation)) || raise("No #{relation} returned for FileSet #{file_set.id}") + end + + def normalize_relation(relation) + use_valkyrie ? normalize_relation_for_valkyrie(relation) : normalize_relation_for_active_fedora(relation) + end + + def normalize_relation_for_active_fedora(relation) + return relation.to_sym if relation.respond_to? :to_sym + + case relation + when Hyrax::FileMetadata::Use::ORIGINAL_FILE + :original_file + when Hyrax::FileMetadata::Use::EXTRACTED_TEXT + :extracted_file + when Hyrax::FileMetadata::Use::THUMBNAIL + :thumbnail_file + else + :original_file + end + end + + ## + # @return [RDF::URI] + def normalize_relation_for_valkyrie(relation) + return relation if relation.is_a?(RDF::URI) + + Hyrax::FileMetadata::Use.uri_for(use: relation.to_sym) + rescue ArgumentError + Hyrax::FileMetadata::Use::ORIGINAL_FILE + end + + def pathhint(io) + io.uploaded_file&.uploader&.path || io.path + end + + def alias_file(io, file_set) + alias_folder_path = File.join(Rails.root, 'tmp/s3_alias_files', file_set.id) + + FileUtils.mkdir_p(alias_folder_path) + + alias_file = File.new("#{alias_folder_path}/#{io.original_name}", 'w+') + + File.open(alias_file, 'w+') { |file| file.write(io.uploaded_file.file.url) } + + alias_file + end + + def move_s3_object_to_work_bucket(uploaded_file, file_set) + repository_file = file_set.characterization_proxy + s3 = S3StorageService.new + s3.init_client + + source_object_key = uploaded_file.file.path + target_bucket_name = file_set.parent_works.first.id + + target_bucket = Aws::S3::Bucket.new(target_bucket_name) + + s3.create_bucket(target_bucket_name) unless s3.bucket_exists?(target_bucket_name) + target_object_key = "/#{repository_file.id}/#{file_set.title.first}" + + s3.move_object(source_object_key, target_bucket_name, target_object_key, { uploaded_file_path: source_object_key }) + target_object = target_bucket.object(target_object_key) + + # change_repository_file_content(uploaded_file, repository_file, target_object) + end + + def change_repository_file_content(uploaded_file, repository_file, target_object) + uploaded_file.file.file.file = target_object + + repository_file.content = uploaded_file.file_url + end + + def clear_metadata(file_set) + # The characterization of additional file versions adds new height/width/size/checksum values to un-orderable... + # `ActiveTriples::Relation` fields on `original_file`. Values from those are then randomly pulled into Solr... + # fields which may have scalar or vector cardinality. So for height/width you get two scalar values pulled from... + # "randomized parallel arrays". Upshot is to reset all of these before (re)characterization to stop the mayhem. + file_set.characterization_proxy.height = [] + file_set.characterization_proxy.width = [] + file_set.characterization_proxy.original_checksum = [] + file_set.characterization_proxy.file_size = [] + file_set.characterization_proxy.format_label = [] + end + + def channels(filepath) + ch = MiniMagick::Tool::Identify.new do |cmd| + cmd.format '%[channels]' + cmd << filepath + end + [ch] + end + end + end +end diff --git a/hyrax/app/actors/hyrax/actors/file_set_actor.rb b/hyrax/app/actors/hyrax/actors/file_set_actor.rb new file mode 100644 index 0000000000000000000000000000000000000000..1b48527afc844043f470fe4d456a405360223b27 --- /dev/null +++ b/hyrax/app/actors/hyrax/actors/file_set_actor.rb @@ -0,0 +1,176 @@ +# frozen_string_literal: true +module Hyrax + module Actors + # Actions are decoupled from controller logic so that they may be called from a controller or a background job. + class FileSetActor # rubocop:disable Metrics/ClassLength + include Lockable + attr_reader :file_set, :user, :attributes + + def initialize(file_set, user) + @file_set = file_set + @user = user + end + + # @!group Asynchronous Operations + + # Spawns asynchronous IngestJob unless ingesting from URL + # Called from FileSetsController, AttachFilesToWorkJob, IngestLocalFileJob, ImportUrlJob + # @param [Hyrax::UploadedFile, File] file the file uploaded by the user + # @param [Symbol, #to_s] relation + # @return [IngestJob, FalseClass] false on failure, otherwise the queued job + def create_content(file, relation = :original_file, from_url: false) + # If the file set doesn't have a title or label assigned, set a default. + file_set.label ||= label_for(file) + file_set.title = [file_set.label] if file_set.title.blank? + @file_set = perform_save(file_set) + return false unless file_set + if from_url + # If ingesting from URL, don't spawn an IngestJob; instead + # reach into the FileActor and run the ingest with the file instance in + # hand. Do this because we don't have the underlying UploadedFile instance + file_actor = build_file_actor(relation) + file_actor.ingest_file(wrapper!(file: file, relation: relation)) + parent = parent_for(file_set: file_set) + VisibilityCopyJob.perform_later(parent) + InheritPermissionsJob.perform_later(parent) + else + IngestJob.perform_later(wrapper!(file: file, relation: relation)) + end + end + + # Spawns asynchronous IngestJob with user notification afterward + # @param [Hyrax::UploadedFile, File, ActionDigest::HTTP::UploadedFile] file the file uploaded by the user + # @param [Symbol, #to_s] relation + # @return [IngestJob] the queued job + def update_content(file, relation = :original_file) + IngestJob.perform_later(wrapper!(file: file, relation: relation), notification: true) + end + # @!endgroup + + # Adds the appropriate metadata, visibility and relationships to file_set + # @note In past versions of Hyrax this method did not perform a save because it is mainly used in conjunction with + # create_content, which also performs a save. However, due to the relationship between Hydra::PCDM objects, + # we have to save both the parent work and the file_set in order to record the "metadata" relationship between them. + # @param [Hash] file_set_params specifying the visibility, lease and/or embargo of the file set. + # Without visibility, embargo_release_date or lease_expiration_date, visibility will be copied from the parent. + def create_metadata(file_set_params = {}) + file_set.depositor = depositor_id(user) + now = TimeService.time_in_utc + file_set.date_uploaded = now + file_set.date_modified = now + file_set.creator = [user.user_key] + if assign_visibility?(file_set_params) + env = Actors::Environment.new(file_set, ability, file_set_params) + CurationConcern.file_set_create_actor.create(env) + end + yield(file_set) if block_given? + end + + # Locks to ensure that only one process is operating on the list at a time. + def attach_to_work(work, file_set_params = {}) + acquire_lock_for(work.id) do + # Ensure we have an up-to-date copy of the members association, so that we append to the end of the list. + work.reload unless work.new_record? + file_set.visibility = work.visibility unless assign_visibility?(file_set_params) + work.ordered_members << file_set + work.representative = file_set if work.representative_id.blank? + work.thumbnail = file_set if work.thumbnail_id.blank? + # Save the work so the association between the work and the file_set is persisted (head_id) + # NOTE: the work may not be valid, in which case this save doesn't do anything. + work.save + Hyrax.config.callback.run(:after_create_fileset, file_set, user, warn: false) + end + end + + # @param [String] revision_id the revision to revert to + # @param [Symbol, #to_sym] relation + # @return [Boolean] true on success, false otherwise + def revert_content(revision_id, relation = :original_file) + return false unless build_file_actor(relation).revert_to(revision_id) + Hyrax.config.callback.run(:after_revert_content, file_set, user, revision_id, warn: false) + true + end + + def update_metadata(attributes) + env = Actors::Environment.new(file_set, ability, attributes) + CurationConcern.file_set_update_actor.update(env) + end + + def destroy + unlink_from_work + file_set.destroy + Hyrax.config.callback.run(:after_destroy, file_set.id, user, warn: false) + end + + class_attribute :file_actor_class + self.file_actor_class = Hyrax::Actors::FileActor + + private + + def ability + @ability ||= ::Ability.new(user) + end + + # @param file_set [FileSet] + # @return [ActiveFedora::Base] + def parent_for(file_set:) + file_set.parent + end + + def build_file_actor(relation) + file_actor_class.new(file_set, relation, user) + end + + # uses create! because object must be persisted to serialize for jobs + def wrapper!(file:, relation:) + JobIoWrapper.create_with_varied_file_handling!(user: user, file: file, relation: relation, file_set: file_set) + end + + # For the label, use the original_filename or original_name if it's there. + # If the file was imported via URL, parse the original filename. + # If all else fails, use the basename of the file where it sits. + # @note This is only useful for labeling the file_set, because of the recourse to import_url + def label_for(file) + if file.is_a?(Hyrax::UploadedFile) # filename not present for uncached remote file! + file.uploader.filename.presence || File.basename(URI.parse(Addressable::URI.unencode(file.file_url)).path) + elsif file.respond_to?(:original_name) # e.g. Hydra::Derivatives::IoDecorator + file.original_name + elsif file_set.import_url.present? + # This path is taken when file is a Tempfile (e.g. from ImportUrlJob) + File.basename(Addressable::URI.unencode(file.file_url)) + elsif file.respond_to?(:original_filename) # e.g. Rack::Test::UploadedFile + file.original_filename + else + File.basename(file) + end + end + + def assign_visibility?(file_set_params = {}) + !((file_set_params || {}).keys.map(&:to_s) & %w[visibility embargo_release_date lease_expiration_date]).empty? + end + + # replaces file_set.apply_depositor_metadata(user)from hydra-access-controls so depositor doesn't automatically get edit access + def depositor_id(depositor) + depositor.respond_to?(:user_key) ? depositor.user_key : depositor + end + + # Must clear the fileset from the thumbnail_id, representative_id and rendering_ids fields on the work + # and force it to be re-solrized. + # Although ActiveFedora clears the children nodes it leaves those fields in Solr populated. + # rubocop:disable Metrics/CyclomaticComplexity + def unlink_from_work + work = parent_for(file_set: file_set) + return unless work && (work.thumbnail_id == file_set.id || work.representative_id == file_set.id || work.rendering_ids.include?(file_set.id)) + work.thumbnail = nil if work.thumbnail_id == file_set.id + work.representative = nil if work.representative_id == file_set.id + work.rendering_ids -= [file_set.id] + work.save! + end + + def perform_save(object) + object.save + object + end + end + end +end \ No newline at end of file diff --git a/hyrax/app/helpers/hyrax_helper.rb b/hyrax/app/helpers/hyrax_helper.rb index c2e9a7d90e14db4560329430a53ff2fdf3fad830..4f47e0039de4a28110ff4f894d98a6e420f9705c 100644 --- a/hyrax/app/helpers/hyrax_helper.rb +++ b/hyrax/app/helpers/hyrax_helper.rb @@ -20,4 +20,29 @@ module HyraxHelper 'experiment' end end + + def s3_file_download_url_for_file_set(file_set_id) + file_set = FileSet.find(file_set_id) + + bucket = Aws::S3::Resource.new(region: ENV['S3_REGION']).bucket(file_set.parent_works.first.id) + + begin + latest_version_uri = file_set.latest_content_version.uri + file_key = "/#{file_set_id}/files/#{latest_version_uri.split("/")[-3]}/#{file_set.title.first}" + rescue + begin + latest_version_uri = file_set.files[0] + file_key = "/#{file_set_id}/files/#{latest_version_uri.split("/")[-1]}/#{file_set.title.first}" + rescue + return nil + end + end + url_options = { + expires_in: 60.minutes.seconds.to_i, + response_content_disposition: "attachment; filename=\"#{file_key}\"" + } + + object = bucket.object(file_key) + object.exists? ? object.presigned_url(:get, url_options).to_s : nil + end end diff --git a/hyrax/app/jobs/attach_files_to_work_job.rb b/hyrax/app/jobs/attach_files_to_work_job.rb new file mode 100644 index 0000000000000000000000000000000000000000..3ce704a949a07aade443a55d383b71860d379473 --- /dev/null +++ b/hyrax/app/jobs/attach_files_to_work_job.rb @@ -0,0 +1,76 @@ +# frozen_string_literal: true +# Converts UploadedFiles into FileSets and attaches them to works. +class AttachFilesToWorkJob < Hyrax::ApplicationJob + queue_as Hyrax.config.ingest_queue_name + + # @param [ActiveFedora::Base] work - the work object + # @param [Array<Hyrax::UploadedFile>] uploaded_files - an array of files to attach + def perform(work, uploaded_files, **work_attributes) + case work + when ActiveFedora::Base + perform_af(work, uploaded_files, work_attributes) + else + Hyrax::WorkUploadsHandler.new(work: work).add(files: uploaded_files).attach || + raise("Could not complete AttachFilesToWorkJob. Some of these are probably in an undesirable state: #{uploaded_files}") + end + end + + private + + def perform_af(work, uploaded_files, work_attributes) + validate_files!(uploaded_files) + depositor = proxy_or_depositor(work) + user = User.find_by_user_key(depositor) + + work, work_permissions = create_permissions work, depositor + uploaded_files.each do |uploaded_file| + next if uploaded_file.file_set_uri.present? + attach_work(user, work, work_attributes, work_permissions, uploaded_file) + end + end + + def attach_work(user, work, work_attributes, work_permissions, uploaded_file) + actor = Hyrax::Actors::FileSetActor.new(FileSet.create, user) + file_set_attributes = file_set_attrs(work_attributes, uploaded_file) + metadata = visibility_attributes(work_attributes, file_set_attributes) + uploaded_file.add_file_set!(actor.file_set) + actor.file_set.permissions_attributes = work_permissions + actor.create_metadata(metadata) + actor.attach_to_work(work, metadata) + actor.create_content(uploaded_file) + end + + def create_permissions(work, depositor) + work.edit_users += [depositor] + work.edit_users = work.edit_users.dup + work_permissions = work.permissions.map(&:to_hash) + [work, work_permissions] + end + + # The attributes used for visibility - sent as initial params to created FileSets. + def visibility_attributes(attributes, file_set_attributes) + attributes.merge(file_set_attributes).slice(:visibility, :visibility_during_lease, + :visibility_after_lease, :lease_expiration_date, + :embargo_release_date, :visibility_during_embargo, + :visibility_after_embargo) + end + + def file_set_attrs(attributes, uploaded_file) + attrs = Array(attributes[:file_set]).find { |fs| fs[:uploaded_file_id].present? && (fs[:uploaded_file_id].to_i == uploaded_file&.id) } + Hash(attrs).symbolize_keys + end + + def validate_files!(uploaded_files) + uploaded_files.each do |uploaded_file| + next if uploaded_file.is_a? Hyrax::UploadedFile + raise ArgumentError, "Hyrax::UploadedFile required, but #{uploaded_file.class} received: #{uploaded_file.inspect}" + end + end + + ## + # A work with files attached by a proxy user will set the depositor as the intended user + # that the proxy was depositing on behalf of. See tickets #2764, #2902. + def proxy_or_depositor(work) + work.on_behalf_of.presence || work.depositor + end +end \ No newline at end of file diff --git a/hyrax/app/jobs/ingest_job.rb b/hyrax/app/jobs/ingest_job.rb new file mode 100644 index 0000000000000000000000000000000000000000..00c31a00bc0fa47ccf0158c7277dc3b6bef5a871 --- /dev/null +++ b/hyrax/app/jobs/ingest_job.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true +class IngestJob < Hyrax::ApplicationJob + queue_as Hyrax.config.ingest_queue_name + + after_perform do |job| + # We want the lastmost Hash, if any. + opts = job.arguments.reverse.detect { |x| x.is_a? Hash } || {} + wrapper = job.arguments.first + ContentNewVersionEventJob.perform_later(wrapper.file_set, wrapper.user) if opts[:notification] + end + + # @param [JobIoWrapper] wrapper + # @param [Boolean] notification send the user a notification, used in after_perform callback + # @see 'config/initializers/hyrax_callbacks.rb' + # rubocop:disable Lint/UnusedMethodArgument + def perform(wrapper, notification: false) + wrapper.ingest_file + end +end \ No newline at end of file diff --git a/hyrax/app/models/bulkrax/file_factory.rb b/hyrax/app/models/bulkrax/file_factory.rb new file mode 100644 index 0000000000000000000000000000000000000000..345bc88c5d66f96659cbd225340ab81527e6f2bb --- /dev/null +++ b/hyrax/app/models/bulkrax/file_factory.rb @@ -0,0 +1,152 @@ +# frozen_string_literal: true + +module Bulkrax + module FileFactory + extend ActiveSupport::Concern + + # Find existing files or upload new files. This assumes a Work will have unique file titles; + # and that those file titles will not have changed + # could filter by URIs instead (slower). + # When an uploaded_file already exists we do not want to pass its id in `file_attributes` + # otherwise it gets reuploaded by `work_actor`. + # support multiple files; ensure attributes[:file] is an Array + def upload_ids + return [] if klass == Collection + attributes[:file] = file_paths + import_files + end + + def file_attributes(update_files = false) + @update_files = update_files + hash = {} + return hash if klass == Collection + hash[:uploaded_files] = upload_ids if attributes[:file].present? + hash[:remote_files] = new_remote_files if new_remote_files.present? + hash + end + + # Its possible to get just an array of strings here, so we need to make sure they are all hashes + def parsed_remote_files + return @parsed_remote_files if @parsed_remote_files.present? + @parsed_remote_files = attributes[:remote_files] || [] + @parsed_remote_files = @parsed_remote_files.map do |file_value| + if file_value.is_a?(Hash) + file_value + elsif file_value.is_a?(String) + name = Bulkrax::Importer.safe_uri_filename(file_value) + { url: file_value, file_name: name } + else + Rails.logger.error("skipped remote file #{file_value} because we do not recognize the type") + nil + end + end + @parsed_remote_files.delete(nil) + @parsed_remote_files + end + + def new_remote_files + return if object.is_a? FileSet + + @new_remote_files ||= if object.present? && object.file_sets.present? + parsed_remote_files.select do |file| + # is the url valid? + is_valid = file[:url]&.match(URI::ABS_URI) + # does the file already exist + is_existing = object.file_sets.detect { |f| f.import_url && f.import_url == file[:url] } + is_valid && !is_existing + end + else + parsed_remote_files.select do |file| + file[:url]&.match(URI::ABS_URI) + end + end + end + + def file_paths + @file_paths ||= Array.wrap(attributes[:file])&.select { |file| File.exist?(file) } + end + + # Retrieve the orginal filenames for the files to be imported + def work_files_filenames + object.file_sets.map { |fn| fn.original_file.file_name.to_a }.flatten if object.present? && object.file_sets.present? + end + + # Retrieve the filenames for the files to be imported + def import_files_filenames + file_paths.map { |f| f.split('/').last } + end + + # Called if #replace_files is true + # Destroy all file_sets for this object + # Reload the object to ensure the remaining methods have the most up to date object + def destroy_existing_files + return unless object.present? && object.file_sets.present? + object.file_sets.each do |fs| + Hyrax::Actors::FileSetActor.new(fs, @user).destroy + end + @object = object.reload + log_deleted_fs(object) + end + + def set_removed_filesets + local_file_sets.each do |fileset| + fileset.files.first.create_version + opts = {} + opts[:path] = fileset.files.first.id.split('/', 2).last + opts[:original_name] = 'removed.png' + opts[:mime_type] = 'image/png' + + fileset.add_file(File.open(Bulkrax.removed_image_path), opts) + fileset.save + ::CreateDerivativesJob.set(wait: 1.minute).perform_later(fileset, fileset.files.first.id) + end + end + + def local_file_sets + @local_file_sets ||= ordered_file_sets + end + + def ordered_file_sets + # OVERRIDE Hyrda-works 1.2.0 - this method was deprecated in v1.0 + object&.ordered_members.to_a.select(&:file_set?) + end + + def import_files + paths = file_paths.map { |path| import_file(path) }.compact + set_removed_filesets if local_file_sets.present? + paths + end + + def import_file(path) + u = Hyrax::UploadedFile.new + u.user_id = @user.id + + carrierwave_file = CarrierWave::SanitizedFile.new(path) + carrierwave_file.content_type = Marcel::Magic.by_path(path).to_s # get correct content type on the basis of file data + + u.file = carrierwave_file + update_filesets(u) + end + + def update_filesets(current_file) + if @update_files && local_file_sets.present? + fileset = local_file_sets.shift + return nil if fileset.files.first.checksum.value == Digest::SHA1.file(current_file.file.path).to_s + + fileset.files.first.create_version + opts = {} + opts[:path] = fileset.files.first.id.split('/', 2).last + opts[:original_name] = current_file.file.file.original_filename + opts[:mime_type] = current_file.file.content_type + + fileset.add_file(File.open(current_file.file.to_s), opts) + fileset.save + ::CreateDerivativesJob.set(wait: 1.minute).perform_later(fileset, fileset.files.first.id) + nil + else + current_file.save + current_file.id + end + end + end +end \ No newline at end of file diff --git a/hyrax/app/models/crc_dataset.rb b/hyrax/app/models/crc_dataset.rb index ed62412e8e3e7a863d9c49086a58796e91565677..07f07dd836dde5bdd4382ed6fa98d1b0b2fd7c97 100644 --- a/hyrax/app/models/crc_dataset.rb +++ b/hyrax/app/models/crc_dataset.rb @@ -10,6 +10,8 @@ class CrcDataset < ActiveFedora::Base # self.valid_child_concerns = [] validates :title, presence: { message: 'Your CRC dataset must have a title.' } + after_save :save_work_meta_json_file_to_s3 + # ------ properties from core metadata ------ # property date_modified - not displayed (filled in by the system) # property date_uploaded - not displayed (filled in by the system) @@ -124,4 +126,15 @@ class CrcDataset < ActiveFedora::Base accepts_nested_attributes_for :complex_funding_reference, reject_if: :fundref_blank, allow_destroy: true accepts_nested_attributes_for :complex_relation, reject_if: :relation_blank, allow_destroy: true accepts_nested_attributes_for :complex_subject, reject_if: :all_blank, allow_destroy: true + + private + + def save_work_meta_json_file_to_s3 + s3 = S3StorageService.new + s3.init_client + + s3.create_bucket(id) unless s3.bucket_exists?(id) + + s3.add_content(id, 'metadata.json', to_json) + end end diff --git a/hyrax/app/models/hyrax/uploaded_file.rb b/hyrax/app/models/hyrax/uploaded_file.rb new file mode 100644 index 0000000000000000000000000000000000000000..92300ff01ff7db5efde9637f953a1550e89d8147 --- /dev/null +++ b/hyrax/app/models/hyrax/uploaded_file.rb @@ -0,0 +1,48 @@ +# frozen_string_literal: true +module Hyrax + ## + # Store a file uploaded by a user. + # + # Eventually these files get attached to {FileSet}s and pushed into Fedora. + class UploadedFile < ActiveRecord::Base + self.table_name = 'uploaded_files' + mount_uploader :file, UploadedFileUploader + alias uploader file + has_many :job_io_wrappers, + inverse_of: 'uploaded_file', + class_name: 'JobIoWrapper', + dependent: :destroy + belongs_to :user, class_name: '::User' + + before_create :set_characterization_data + ## + # Associate a {FileSet} with this uploaded file. + # + # @param [Hyrax::Resource, ActiveFedora::Base] file_set + # @return [void] + def add_file_set!(file_set) + uri = case file_set + when ActiveFedora::Base + file_set.uri + when Hyrax::Resource + file_set.id + end + update!(file_set_uri: uri) + end + + private + + def set_characterization_data + if file.file.content_type.include?('image') + image = MiniMagick::Image.open(file.path) + + self.characterization_data[:height] = image[:height] + self.characterization_data[:width] = image[:width] + end + + self.characterization_data[:content_type] = Marcel::Magic.by_path(file.path).to_s + self.characterization_data[:file_size] = file.size + self.characterization_data[:original_checksum] = Digest::SHA1.file(file.path).to_s + end + end +end \ No newline at end of file diff --git a/hyrax/app/services/prepare_csv_from_crc_folder.rb b/hyrax/app/services/prepare_csv_from_crc_folder.rb index 2c6d7e819b373b2c5af9c7240882b6de94def7b1..241d1bf48bde34ef7195a280d694b949315e44ae 100644 --- a/hyrax/app/services/prepare_csv_from_crc_folder.rb +++ b/hyrax/app/services/prepare_csv_from_crc_folder.rb @@ -6,6 +6,7 @@ class PrepareCsvFromCrcFolder def initialize(parser) @parser = parser @load_or_create_session_per_subject = {} + @system_created_sessions = [] end def prepare_csv! @@ -26,11 +27,10 @@ class PrepareCsvFromCrcFolder @csv_file_path ||= "#{csv_folder_path}/folder_data.csv" end - private - # Prepare the csv file to this path def csv_folder_path - @csv_folder_path ||= File.join(Rails.root, parser.base_path, "#{parser.importer.id}_#{parser.importer.created_at.strftime('%Y%m%d%H%M%S')}") + @csv_folder_path ||= File.join(Rails.root, parser.base_path, + "#{parser.importer.id}_#{parser.importer.created_at.strftime('%Y%m%d%H%M%S')}") end def csv_file_headers @@ -85,10 +85,11 @@ class PrepareCsvFromCrcFolder end def crc_1280_collection_source_indentifier - collection_type = Hyrax::CollectionType.find_or_create_by(title: 'CRC 1280') + crc_collection_title = ENV.fetch('CRC_1280_COLLECTION', "CRC 1280") + collection_type = Hyrax::CollectionType.find_or_create_by(title: crc_collection_title) collection_type_gid = "gid://hyrax3-app/Hyrax::CollectionType/#{collection_type.id}" - crc_1280_collections = collection_type.collections.select{ |t| t.title == ["CRC 1280"] } + crc_1280_collections = collection_type.collections.select{ |t| t.title == [crc_collection_title] } return "" unless crc_1280_collections.any? @@ -96,12 +97,13 @@ class PrepareCsvFromCrcFolder end def prepare_group_data(group_folder_path, csv) + crc_collection_title = ENV.fetch('CRC_1280_COLLECTION', "CRC 1280") experiment_folders_path = Dir.glob("#{group_folder_path}/*").select { |item| Pathname(item).directory? } title = group_folder_path.split('/').last source_identifier = SecureRandom.uuid - csv << ['Collection', 'CRC 1280', '', source_identifier, crc_1280_collection_source_indentifier, title] + csv << ['Collection', crc_collection_title, '', source_identifier, crc_1280_collection_source_indentifier, title] return unless experiment_folders_path.present? @@ -165,7 +167,7 @@ class PrepareCsvFromCrcFolder if json_file_path.nil? parent_path = File.dirname(session_folder_path) - json_file_path = find_meta_json_file_path_in_look_up(parent_path) + json_file_path = find_meta_json_file_path_in_look_up(parent_path, true) end return unless json_file_path.present? @@ -195,7 +197,8 @@ class PrepareCsvFromCrcFolder if json_file_path.nil? parent_path = File.dirname(modality_folder_path) - json_file_path = find_meta_json_file_path_in_look_up(parent_path) + json_file_path = find_meta_json_file_path_in_look_up(parent_path, + @system_created_sessions.include?(parent_source_identifier)) end return unless json_file_path.present? @@ -244,7 +247,7 @@ class PrepareCsvFromCrcFolder if json_file_path.nil? parent_path = File.dirname(folder_path) - json_file_path = find_meta_json_file_path_in_look_up(parent_path) + json_file_path = find_meta_json_file_path_in_look_up(parent_path, true) end return unless json_file_path.present? @@ -259,6 +262,7 @@ class PrepareCsvFromCrcFolder } @load_or_create_session_per_subject[key] = map_work_meta_attributes_with_json_file(json_file_path, meta_info) + @system_created_sessions << source_identifier csv << @load_or_create_session_per_subject[key] end @@ -267,6 +271,8 @@ class PrepareCsvFromCrcFolder end def find_meta_json_file_path_in_look_down(folder_path) + meta_json_file_path = nil + folder_items_path = Dir.glob("#{folder_path}/*") return nil unless folder_items_path.any? @@ -275,25 +281,29 @@ class PrepareCsvFromCrcFolder return file_path if folder_items_path.include?(file_path) - first_sub_item_path = folder_items_path.select { |item| Pathname(item).directory? }.first + folder_items_path.select { |item| Pathname(item).directory? }.each do |folder_item_path| + meta_json_file_path = find_meta_json_file_path_in_look_down(folder_item_path) - return nil unless first_sub_item_path + break if meta_json_file_path + end - find_meta_json_file_path_in_look_down(first_sub_item_path) + meta_json_file_path end - def find_meta_json_file_path_in_look_up(folder_path) - return nil if @parser.parser_fields['import_file_path'] == folder_path - + def find_meta_json_file_path_in_look_up(folder_path, is_lookup_subject_folder) folder_items_path = Dir.glob("#{folder_path}/*") - if folder_items_path.any? - file_path = "#{folder_path}/meta.json" - return file_path if folder_items_path.include?(file_path) + return nil unless folder_items_path.any? - parent_path = File.dirname(folder_path) - find_meta_json_file_path_in_look_up(parent_path) - end + file_path = "#{folder_path}/meta.json" + + return file_path if folder_items_path.include?(file_path) + + return nil if is_lookup_subject_folder + + parent_path = File.dirname(folder_path) + + find_meta_json_file_path_in_look_up(parent_path, true) end def map_work_meta_attributes_with_json_file(file_path, meta_info) @@ -350,8 +360,10 @@ class PrepareCsvFromCrcFolder end def create_file_sets_for_work(folder_path, csv, parent_source_identifier) - Dir.glob("#{folder_path}/*").reject { |item| Pathname(item).directory? || item.include?('MetaApp2_SFB1280.exe') || item.include?('meta.json') }.each_with_index do |file_path, index| - csv << ['FileSet', '', file_path, SecureRandom.uuid, parent_source_identifier, "File #{index + 1}"] + Dir.glob("#{folder_path}/*").reject do |item| + Pathname(item).directory? || item.include?('MetaApp2_SFB1280.exe') || item.include?('meta.json') + end.each_with_index do |file_path, _index| + csv << ['FileSet', '', file_path, SecureRandom.uuid, parent_source_identifier, file_path.split('/')[-1]] end end diff --git a/hyrax/app/services/s3_storage_service.rb b/hyrax/app/services/s3_storage_service.rb new file mode 100644 index 0000000000000000000000000000000000000000..743a73e83a4578d7d5e8246af3b6379cf8f86caa --- /dev/null +++ b/hyrax/app/services/s3_storage_service.rb @@ -0,0 +1,156 @@ +# frozen_string_literal: true + +class S3StorageService + # Reference: https://docs.aws.amazon.com/sdk-for-ruby/v3/developer-guide/ + # https://docs.ceph.com/en/latest/radosgw/s3/ruby/ + # https://docs.aws.amazon.com/sdk-for-ruby/v3/developer-guide/s3-example-create-buckets.html + attr_reader :s3_client + + def init_client + @s3_client = Aws::S3::Client.new + end + + def list_buckets + buckets = [] + @s3_client.list_buckets.buckets.each do |bucket| + buckets << { + name: bucket.name, + creation_date: bucket.creation_date, + size: bucket.size + } + end + buckets + end + + def bucket_exists?(bucket_name) + @s3_client.list_buckets.buckets.each do |bucket| + return true if bucket.name == bucket_name + end + false + end + + def list_objects(bucket_name, prefix = nil, max_keys = 1000) + # To get the first max_keys objects in a bucket + contents = [] + objects = @s3_client.list_objects_v2( + bucket: bucket_name, + max_keys: max_keys, + prefix: prefix + ).contents + objects.each do |object| + contents << { + key: object.key, + size: object.size, + last_modified_date: object.last_modified + } + end + contents + end + + def list_all_objects(bucket_name, prefix = nil) + contents = [] + resp = @s3_client.list_objects_v2( + bucket: bucket_name, + max_keys: 1000, + prefix: prefix + ) + loop do + objects = resp.contents + objects.each do |object| + contents << { + key: object.key, + size: object.size, + last_modified_date: object.last_modified + } + end + break unless resp.next_page? + + resp = resp.next_page + end + contents + end + + def get_content(bucket_name, object_key, local_file_path) + @s3_client.get_object( + bucket: bucket_name, + key: object_key, + response_target: local_file_path + ) + end + + def create_bucket(bucket_name) + response = @s3_client.create_bucket(bucket: bucket_name) + return true if response.location == "/#{bucket_name}" + + false + end + + def add_content(bucket_name, object_key, object_content) + response = s3_client.put_object( + bucket: bucket_name, + key: object_key, + body: object_content + # content_type: 'text/plain' + ) + return true if response.etag + + false + end + + def add_multipart_content(_bucket_name, _object_key, _file_path) + # ToDo + false + end + + def delete_object(bucket_name, object_key) + response = @s3_client.delete_objects( + bucket: bucket_name, + delete: { + objects: [ + { + key: object_key + } + ] + } + ) + return true if response.deleted.count == 1 + + false + end + + def delete_objects(bucket_name, object_keys) + delete_keys = [] + object_keys.each do |object_key| + delete_keys << { key: object_key } + end + response = @s3_client.delete_objects( + bucket: bucket_name, + delete: { + objects: object_keys + } + ) + return true if response.deleted.count == object_keys.count + + false + end + + def delete_all_objects(bucket_name) + Aws::S3::Bucket.new(bucket_name, client: @s3_client).clear! + end + + def delete_bucket(bucket_name) + response = @s3_client.delete_bucket(bucket: bucket_name) + end + + def move_object(source_object_key, target_bucket_name, target_object_key, metadata = {}) + source_bucket = bucket = Aws::S3::Resource + .new(region: ENV['S3_REGION']) + .bucket(ENV['S3_FILE_UPLOAD_BUCKET']) + + source_object = source_bucket.object(source_object_key) + + source_object.move_to(bucket: target_bucket_name, key: target_object_key, metadata: metadata) + rescue Aws::Errors::ServiceError => e + puts "Couldn't copy #{source_object.key} to #{target_object_key}. Here's why: #{e.message}" + end +end diff --git a/hyrax/app/uploaders/hyrax/uploaded_file_uploader.rb b/hyrax/app/uploaders/hyrax/uploaded_file_uploader.rb new file mode 100644 index 0000000000000000000000000000000000000000..561594519515967cc7f0f061ae58100113f60d38 --- /dev/null +++ b/hyrax/app/uploaders/hyrax/uploaded_file_uploader.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true +module Hyrax + class UploadedFileUploader < CarrierWave::Uploader::Base + # Override the directory where uploaded files will be stored. + # This is a sensible default for uploaders that are meant to be mounted: + storage :aws + + def store_dir + (configured_upload_path / model.class.to_s.underscore / mounted_as.to_s / model.id.to_s).to_s + end + + def cache_dir + (configured_cache_path / model.class.to_s.underscore / mounted_as.to_s / model.id.to_s).to_s + end + + private + + def configured_upload_path + Pathname.new(Hyrax.config.upload_path.call) + end + + def configured_cache_path + Pathname.new(Hyrax.config.cache_path.call) + end + end +end \ No newline at end of file diff --git a/hyrax/app/views/hyrax/collections/show.html.erb b/hyrax/app/views/hyrax/collections/show.html.erb index 178f0d013b072f3ad338f73351ae60b2817f6dc9..95cb8b7dbc72c8ab3be4bdcf9c18c7260ebe8f07 100644 --- a/hyrax/app/views/hyrax/collections/show.html.erb +++ b/hyrax/app/views/hyrax/collections/show.html.erb @@ -1,6 +1,6 @@ <% collection_type = @presenter.collection_type.title %> -<% if collection_type == 'CRC 1280' %> +<% if collection_type == ENV.fetch('CRC_1280_COLLECTION', "CRC 1280") %> <%= render 'hyrax/collections/show/crc_1280_template', presenter: @presenter %> <% else %> <%= render 'hyrax/collections/show/default_template', presenter: @presenter %> diff --git a/hyrax/app/views/hyrax/dashboard/collections/show.html.erb b/hyrax/app/views/hyrax/dashboard/collections/show.html.erb index ea0dbecda4d8e28d2287ab496ee29f5180e2a980..aab9699cb5e7f0d874e714b860a7835ea3fa2a1c 100644 --- a/hyrax/app/views/hyrax/dashboard/collections/show.html.erb +++ b/hyrax/app/views/hyrax/dashboard/collections/show.html.erb @@ -1,5 +1,5 @@ <% collection_type = @presenter.collection_type.title %> -<% if collection_type == 'CRC 1280' %> +<% if collection_type == ENV.fetch('CRC_1280_COLLECTION', "CRC 1280") %> <%= render 'hyrax/dashboard/collections/show/crc_1280_template', presenter: @presenter %> <% else %> <%= render 'hyrax/dashboard/collections/show/default_template', presenter: @presenter %> diff --git a/hyrax/app/views/hyrax/file_sets/_actions.html.erb b/hyrax/app/views/hyrax/file_sets/_actions.html.erb new file mode 100644 index 0000000000000000000000000000000000000000..ece63f0aa021e93d9cb70788990c1789d54a6de9 --- /dev/null +++ b/hyrax/app/views/hyrax/file_sets/_actions.html.erb @@ -0,0 +1,53 @@ +<% if (can?(:download, file_set.id) || can?(:destroy, file_set.id) || can?(:edit, file_set.id)) && !workflow_restriction?(@parent) %> + <% if can?(:download, file_set.id) && !(can?(:edit, file_set.id) || can?(:destroy, file_set.id)) && s3_file_download_url_for_file_set(file_set.id).present? %> + <%= link_to t('.download'), + s3_file_download_url_for_file_set(file_set.id), + class: 'btn btn-secondary btn-sm', + title: t('.download_title', file_set: file_set), + target: "_blank", + id: "file_download", + data: { label: file_set.id, work_id: @presenter.id, collection_ids: @presenter.member_of_collection_ids } %> + <% else %> + <div class="btn-group"> + <button class="btn btn-default dropdown-toggle" data-toggle="dropdown" type="button" id="dropdownMenu_<%= file_set.id %>" aria-haspopup="true" aria-expanded="false"> + <span class="sr-only"><%= t('.press_to') %> </span> + <%= t('.header') %> + </button> + + <ul role="menu" class="dropdown-menu dropdown-menu-right" aria-labelledby="dropdownMenu_<%= file_set.id %>"> + <% if can?(:edit, file_set.id) %> + <li class="dropdown-item" role="menuitem" tabindex="-1"> + <%= link_to t('.edit'), edit_polymorphic_path([main_app, file_set]), + { title: t('.edit_title', file_set: file_set) } %> + </li> + + <li class="dropdown-item" role="menuitem" tabindex="-1"> + <%= link_to t('.versions'), edit_polymorphic_path([main_app, file_set], anchor: 'versioning_display'), + { title: t('.versions_title') } %> + </li> + <% end %> + + <% if can?(:destroy, file_set.id) %> + <li class="dropdown-item" role="menuitem" tabindex="-1"> + <%= link_to t('.delete'), polymorphic_path([main_app, file_set]), + method: :delete, title: t('.delete_title', file_set: file_set), + data: { confirm: t('.delete_confirm', file_set: file_set, application_name: application_name) } %> + </li> + <% end %> + + <% if can?(:download, file_set.id) && s3_file_download_url_for_file_set(file_set.id).present? %> + <li class="dropdown-item" role="menuitem" tabindex="-1"> + <%= link_to t('.download'), + s3_file_download_url_for_file_set(file_set.id), + title: t('.download_title', file_set: file_set), + target: "_blank", + id: "file_download", + class: "download", + data: { label: file_set.id, work_id: @presenter.id, collection_ids: @presenter.member_of_collection_ids } %> + </li> + <% end %> + + </ul> + </div> + <% end %> +<% end %> \ No newline at end of file diff --git a/hyrax/app/views/hyrax/file_sets/media_display/_audio.html.erb b/hyrax/app/views/hyrax/file_sets/media_display/_audio.html.erb new file mode 100644 index 0000000000000000000000000000000000000000..cbb90ccd590b1bb94af55ab22edb749e16caa24f --- /dev/null +++ b/hyrax/app/views/hyrax/file_sets/media_display/_audio.html.erb @@ -0,0 +1,25 @@ +<% s3_file_url = s3_file_download_url_for_file_set(file_set.id) || "#" %> + +<% if display_media_download_link?(file_set: file_set) && s3_file_download_url_for_file_set(file_set.id).present? %> + <div> + <h2 class="sr-only"><%= t('hyrax.file_set.show.downloadable_content.heading') %></h2> + <audio controls="controls" class="audiojs" style="width:100%" controlsList="nodownload" preload="auto"> + <source src="<%= s3_file_url %>" type="audio/ogg" /> + <source src="<%= s3_file_url %>" type="audio/mpeg" /> + <%= t('hyrax.file_set.show.downloadable_content.audio_tag_not_supported') %> + </audio> + <%= link_to t('hyrax.file_set.show.downloadable_content.audio_link'), + s3_file_download_url_for_file_set(file_set.id), + data: { label: file_set.id }, + target: :_blank, + id: "file_download" %> + </div> +<% else %> + <div> + <audio controls="controls" class="audiojs" style="width:100%" controlsList="nodownload" preload="auto"> + <source src="<%= s3_file_url %>" type="audio/ogg" /> + <source src="<%= s3_file_url %>" type="audio/mpeg" /> + <%= t('hyrax.file_set.show.downloadable_content.audio_tag_not_supported') %> + </audio> + </div> +<% end %> \ No newline at end of file diff --git a/hyrax/app/views/hyrax/file_sets/media_display/_default.html.erb b/hyrax/app/views/hyrax/file_sets/media_display/_default.html.erb new file mode 100644 index 0000000000000000000000000000000000000000..73077358b4c4e24eb0a395a60536de996d61af6c --- /dev/null +++ b/hyrax/app/views/hyrax/file_sets/media_display/_default.html.erb @@ -0,0 +1,10 @@ +<div class="no-preview"> + <%= t('hyrax.works.show.no_preview') %> + <% if display_media_download_link?(file_set: file_set) && s3_file_download_url_for_file_set(file_set.id).present? %> + <p /><%= link_to t('hyrax.file_set.show.download'), + s3_file_download_url_for_file_set(file_set.id), + id: "file_download", + data: { label: file_set.id }, + target: "_new" %> + <% end %> +</div> \ No newline at end of file diff --git a/hyrax/app/views/hyrax/file_sets/media_display/_image.html.erb b/hyrax/app/views/hyrax/file_sets/media_display/_image.html.erb new file mode 100644 index 0000000000000000000000000000000000000000..c20cd6777f92cc6c1929c436f316e20df6fe0623 --- /dev/null +++ b/hyrax/app/views/hyrax/file_sets/media_display/_image.html.erb @@ -0,0 +1,21 @@ +<% if display_media_download_link?(file_set: file_set) && s3_file_download_url_for_file_set(file_set.id).present? %> + <div> + <h2 class="sr-only"><%= t('hyrax.file_set.show.downloadable_content.heading') %></h2> + <%= image_tag s3_file_download_url_for_file_set(file_set.id), + class: "representative-media", + alt: "", + role: "presentation" %> + <%= link_to t('hyrax.file_set.show.downloadable_content.image_link'), + s3_file_download_url_for_file_set(file_set.id), + data: { label: file_set.id }, + target: :_blank, + id: "file_download" %> + </div> +<% elsif s3_file_download_url_for_file_set(file_set.id).present? %> + <div> + <%= image_tag s3_file_download_url_for_file_set(file_set.id), + class: "representative-media", + alt: "", + role: "presentation" %> + </div> +<% end %> \ No newline at end of file diff --git a/hyrax/app/views/hyrax/file_sets/media_display/_office_document.html.erb b/hyrax/app/views/hyrax/file_sets/media_display/_office_document.html.erb new file mode 100644 index 0000000000000000000000000000000000000000..095afd8618c94bd116d5e5fcd59a21097a1fc285 --- /dev/null +++ b/hyrax/app/views/hyrax/file_sets/media_display/_office_document.html.erb @@ -0,0 +1,21 @@ +<% if display_media_download_link?(file_set: file_set) && s3_file_download_url_for_file_set(file_set.id).present? %> + <div> + <h2 class="sr-only"><%= t('hyrax.file_set.show.downloadable_content.heading') %></h2> + <%= image_tag thumbnail_url(file_set), + class: "representative-media", + alt: "", + role: "presentation" %> + <%= link_to t('hyrax.file_set.show.downloadable_content.office_link'), + s3_file_download_url_for_file_set(file_set.id), + target: :_blank, + id: "file_download", + data: { label: file_set.id } %> + </div> +<% else %> + <div> + <%= image_tag thumbnail_url(file_set), + class: "representative-media", + alt: "", + role: "presentation" %> + </div> +<% end %> \ No newline at end of file diff --git a/hyrax/app/views/hyrax/file_sets/media_display/_pdf.html.erb b/hyrax/app/views/hyrax/file_sets/media_display/_pdf.html.erb new file mode 100644 index 0000000000000000000000000000000000000000..16737e0df4494474d39db27b98054758d2e10048 --- /dev/null +++ b/hyrax/app/views/hyrax/file_sets/media_display/_pdf.html.erb @@ -0,0 +1,21 @@ +<% if display_media_download_link?(file_set: file_set) && s3_file_download_url_for_file_set(file_set.id).present? %> + <div> + <h2 class="sr-only"><%= t('hyrax.file_set.show.downloadable_content.heading') %></h2> + <%= image_tag thumbnail_url(file_set), + class: "representative-media", + alt: "", + role: "presentation" %> + <%= link_to t('hyrax.file_set.show.downloadable_content.pdf_link'), + s3_file_download_url_for_file_set(file_set.id), + target: :_blank, + id: "file_download", + data: { label: file_set.id } %> + </div> +<% else %> + <div> + <%= image_tag thumbnail_url(file_set), + class: "representative-media", + alt: "", + role: "presentation" %> + </div> +<% end %> \ No newline at end of file diff --git a/hyrax/app/views/hyrax/file_sets/media_display/_video.html.erb b/hyrax/app/views/hyrax/file_sets/media_display/_video.html.erb new file mode 100644 index 0000000000000000000000000000000000000000..78550ced398ba5f34983baeae00cef0b60fe0b72 --- /dev/null +++ b/hyrax/app/views/hyrax/file_sets/media_display/_video.html.erb @@ -0,0 +1,25 @@ +<% s3_file_url = s3_file_download_url_for_file_set(file_set.id) || "#" %> + +<% if display_media_download_link?(file_set: file_set) && s3_file_download_url_for_file_set(file_set.id).present?%> + <div> + <h2 class="sr-only"><%= t('hyrax.file_set.show.downloadable_content.heading') %></h2> + <video controls="controls" class="video-js vjs-default-skin" style="width:100%" data-setup="{}" controlsList="nodownload" preload="auto"> + <source src="<%= s3_file_url %>" type="video/webm" /> + <source src="<%= s3_file_url %>" type="video/mp4" /> + <%= t('hyrax.file_set.show.downloadable_content.video_tag_not_supported') %> + </video> + <%= link_to t('hyrax.file_set.show.downloadable_content.video_link'), + s3_file_url, + data: { label: file_set.id }, + target: :_blank, + id: "file_download" %> + </div> +<% else %> + <div> + <video controls="controls" class="video-js vjs-default-skin" style="width:100%" data-setup="{}" controlsList="nodownload" preload="auto"> + <source src="<%= s3_file_url %>" type="video/webm" /> + <source src="<%= s3_file_url %>" type="video/mp4" /> + <%= t('hyrax.file_set.show.downloadable_content.video_tag_not_supported') %> + </video> + </div> +<% end %> \ No newline at end of file diff --git a/hyrax/config/initializers/aws.rb b/hyrax/config/initializers/aws.rb new file mode 100644 index 0000000000000000000000000000000000000000..1b060624c6cc7540565aedadd82b08493ba68b9e --- /dev/null +++ b/hyrax/config/initializers/aws.rb @@ -0,0 +1,9 @@ +if ENV['USE_S3'] || false + Aws.config.update( + endpoint: ENV['S3_ENDPOINT'], + access_key_id: ENV['S3_ACCESS_KEY'], + secret_access_key: ENV['S3_SECRET_KEY'], + force_path_style: true, + region: ENV['S3_REGION'] + ) +end diff --git a/hyrax/config/initializers/carrierwave.rb b/hyrax/config/initializers/carrierwave.rb new file mode 100644 index 0000000000000000000000000000000000000000..4d1950535eb11506eba9adb67ff1d2b79e3494fc --- /dev/null +++ b/hyrax/config/initializers/carrierwave.rb @@ -0,0 +1,26 @@ +CarrierWave.configure do |config| + config.storage = :aws + config.aws_bucket = ENV.fetch('S3_FILE_UPLOAD_BUCKET') # for AWS-side bucket access permissions config, see section below + config.aws_acl = 'private' + + # Optionally define an asset host for configurations that are fronted by a + # content host, such as CloudFront. + config.asset_host = ENV.fetch('S3_ENDPOINT') + + # The maximum period for authenticated_urls is only 7 days. + config.aws_authenticated_url_expiration = 60 * 60 * 24 * 7 + + # Set custom options such as cache control to leverage browser caching. + # You can use either a static Hash or a Proc. + config.aws_attributes = -> { { + expires: 1.week.from_now.httpdate, + cache_control: 'max-age=604800' + } } + + config.aws_credentials = { + access_key_id: ENV.fetch('S3_ACCESS_KEY'), + secret_access_key: ENV.fetch('S3_SECRET_KEY'), + region: ENV.fetch('S3_REGION'), # Required + stub_responses: Rails.env.test? # Optional, avoid hitting S3 actual during tests + } +end \ No newline at end of file diff --git a/hyrax/config/initializers/sidekiq.rb b/hyrax/config/initializers/sidekiq.rb index 351838d72a28f03922915e37236944b360b45fdf..438ce86f9100434951e22d84f88b77eadccfc4f3 100644 --- a/hyrax/config/initializers/sidekiq.rb +++ b/hyrax/config/initializers/sidekiq.rb @@ -14,4 +14,4 @@ end Sidekiq.configure_client do |s| # s.redis = redis_conn s.redis = redis_config -end +end \ No newline at end of file diff --git a/hyrax/config/sidekiq.yml b/hyrax/config/sidekiq.yml index e620e088563b18119d334713670d19adba1044bf..d784de383d2a16a34b562d93ed39ff9ba2ecbf8a 100644 --- a/hyrax/config/sidekiq.yml +++ b/hyrax/config/sidekiq.yml @@ -1,4 +1,12 @@ :queues: - default - import # added - - export # added \ No newline at end of file + - export # added + +:max_retries: 5 + +development: + :concurrency: <%= ENV.fetch("RAILS_MAX_THREADS") { 5 } %> + +production: + :concurrency: <%= ENV.fetch("RAILS_MAX_THREADS") { 5 } %> diff --git a/hyrax/db/migrate/20220916103404_add_characterization_data_to_uploaded_files.rb b/hyrax/db/migrate/20220916103404_add_characterization_data_to_uploaded_files.rb new file mode 100644 index 0000000000000000000000000000000000000000..1d4e58d884baa041dc814c7c6f55ffe0544a4223 --- /dev/null +++ b/hyrax/db/migrate/20220916103404_add_characterization_data_to_uploaded_files.rb @@ -0,0 +1,5 @@ +class AddCharacterizationDataToUploadedFiles < ActiveRecord::Migration[5.2] + def change + add_column :uploaded_files, :characterization_data, :jsonb, default: {} + end +end diff --git a/hyrax/db/schema.rb b/hyrax/db/schema.rb index 98308db2a0b6ae612d7b0d4d8a3a00cba2c07f8e..91b33505655e929f785210a115713c4a116d4a7d 100644 --- a/hyrax/db/schema.rb +++ b/hyrax/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 2022_07_01_102131) do +ActiveRecord::Schema.define(version: 2022_09_16_103404) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -610,6 +610,7 @@ ActiveRecord::Schema.define(version: 2022_07_01_102131) do t.string "file_set_uri" t.datetime "created_at", null: false t.datetime "updated_at", null: false + t.jsonb "characterization_data", default: {} t.index ["file_set_uri"], name: "index_uploaded_files_on_file_set_uri" t.index ["user_id"], name: "index_uploaded_files_on_user_id" end diff --git a/hyrax/lib/tasks/create_crc_1280_collection.rake b/hyrax/lib/tasks/create_crc_1280_collection.rake index 39597f84ab5c9f5c16b13605da54963634db3807..cb7db1990d6d890c59ce3e49d851f94cab31adc7 100644 --- a/hyrax/lib/tasks/create_crc_1280_collection.rake +++ b/hyrax/lib/tasks/create_crc_1280_collection.rake @@ -3,13 +3,14 @@ namespace :rdms do namespace :crc_1280_collection do desc "Create CRC 1280 Collection Types and CRC 1280 Collection" task create: :environment do - collection_type = Hyrax::CollectionType.find_or_create_by(title: 'CRC 1280') + crc_collection_title = ENV.fetch('CRC_1280_COLLECTION', "CRC 1280") + collection_type = Hyrax::CollectionType.find_or_create_by(title: crc_collection_title) collection_type_gid = "gid://hyrax3-app/Hyrax::CollectionType/#{collection_type.id}" - crc_1280_collections = collection_type.collections.select{ |t| t.title == ["CRC 1280"] } + crc_1280_collections = collection_type.collections.select{ |t| t.title == [crc_collection_title] } unless crc_1280_collections.any? - collection = Hyrax.config.collection_class.create(title: ['CRC 1280'], source:[SecureRandom.uuid], collection_type_gid: collection_type_gid) + collection = Hyrax.config.collection_class.create(title: [crc_collection_title], source:[SecureRandom.uuid], collection_type_gid: collection_type_gid) admin_user = User.find_by(email: 'admin@hyrax') collection.apply_depositor_metadata(admin_user.user_key) diff --git a/hyrax/lib/tasks/create_s3_bucket.rake b/hyrax/lib/tasks/create_s3_bucket.rake new file mode 100644 index 0000000000000000000000000000000000000000..a9c7ff4d11a5d07298372af7cbae042d36b2f549 --- /dev/null +++ b/hyrax/lib/tasks/create_s3_bucket.rake @@ -0,0 +1,12 @@ +namespace :rdms do + namespace :s3_bucket do + desc "Create S3 bucket for RDMS uploads" + task create: :environment do + if ENV.fetch('USE_S3', false) + s3 = S3StorageService.new + s3.init_client + s3.create_bucket(ENV['S3_FILE_UPLOAD_BUCKET']) unless s3.bucket_exists?(ENV['S3_FILE_UPLOAD_BUCKET']) + end + end + end +end \ No newline at end of file diff --git a/hyrax/lib/tasks/setup_hyrax.rake b/hyrax/lib/tasks/setup_hyrax.rake index b7eb35c40b6dca392e2ae2a81216970550a1041b..ef4e4ec56af5da3f6dd6f4c4e9bb109271fe0c13 100644 --- a/hyrax/lib/tasks/setup_hyrax.rake +++ b/hyrax/lib/tasks/setup_hyrax.rake @@ -40,6 +40,7 @@ namespace :rdms do Rake::Task['hyrax:workflow:load'].invoke Rake::Task['hyrax:default_collection_types:create'].invoke Rake::Task['rdms:crc_1280_collection:create'].invoke + Rake::Task['rdms:s3_bucket:create'].invoke ############################################## # Create languages controlled vocabulary