Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision

Target

Select target project
  • researchdata/rdms
1 result
Select Git revision
Show changes
Commits on Source (33)
Showing
with 835 additions and 110 deletions
......@@ -100,3 +100,18 @@ SMTP_PORT=
# Host used for generating URLs
APP_HOST=localhost:3000
# S3 settings
USE_S3=false
S3_ENDPOINT=https://s3.location.example.com
S3_ACCESS_KEY=
S3_SECRET_KEY=
S3_REGION=
S3_FILE_UPLOAD_BUCKET=
# RDMS collections
CRC_1280_COLLECTION=CRC 1280
# File uploads
MAX_FILES=1000
MAX_FILE_SIZE=2.gigabytes
......@@ -103,3 +103,18 @@ SMTP_PORT=
# Host used for generating URLs
APP_HOST=localhost:3000
# S3 settings
USE_S3=false
S3_ENDPOINT=
S3_ACCESS_KEY=
S3_SECRET_KEY=
S3_REGION=
S3_FILE_UPLOAD_BUCKET=
# RDMS collections
CRC_1280_COLLECTION=CRC 1280
# File uploads
MAX_FILES=100
MAX_FILE_SIZE=500.megabytes
......@@ -40,7 +40,7 @@ services:
networks:
internal:
volumes:
- solr:/opt/solr/server/solr/mycores
- solr:/var/solr
- ./hyrax/solr/conf:/opt/solr/solr_conf
command:
- sh
......
......@@ -16,7 +16,8 @@ gem 'uglifier', '>= 1.3.0'
# See https://github.com/rails/execjs#readme for more supported runtimes
# gem 'mini_racer', platforms: :ruby
gem 'therubyracer'
# Use carrierwave-aws for upload file on aws s3
gem 'carrierwave-aws'
# Use CoffeeScript for .coffee assets and views
gem 'coffee-rails', '~> 4.2'
# Turbolinks makes navigating your web application faster. Read more: https://github.com/turbolinks/turbolinks
......@@ -90,3 +91,4 @@ gem 'riiif', '~> 2.3'
gem 'rinku'
gem 'coveralls', require: false
gem 'database_cleaner'
gem 'aws-sdk-s3', '~> 1'
......@@ -177,6 +177,9 @@ GEM
activesupport (>= 4.0.0)
mime-types (>= 1.16)
ssrf_filter (~> 1.0)
carrierwave-aws (1.4.0)
aws-sdk-s3 (~> 1.0)
carrierwave (>= 0.7, < 2.1)
childprocess (4.1.0)
chromedriver-helper (2.1.1)
archive-zip (~> 0.10)
......@@ -983,12 +986,14 @@ PLATFORMS
ruby
DEPENDENCIES
aws-sdk-s3 (~> 1)
bootsnap (>= 1.1.0)
bootstrap-datepicker-rails
bootstrap-sass (~> 3.0)
bulkrax
byebug
capybara (>= 2.15)
carrierwave-aws
chromedriver-helper
coffee-rails (~> 4.2)
coveralls
......
# frozen_string_literal: true
module Hyrax
module Actors
# Actions for a file identified by file_set and relation (maps to use predicate)
# @note Spawns asynchronous jobs
class FileActor
attr_reader :file_set, :relation, :user, :use_valkyrie
# @param [FileSet] file_set the parent FileSet
# @param [Symbol, #to_sym] relation the type/use for the file
# @param [User] user the user to record as the Agent acting upon the file
def initialize(file_set, relation, user)
@file_set = file_set
@relation = normalize_relation(relation)
@user = user
end
# Persists file as part of file_set and spawns async job to characterize and create derivatives.
# @param [JobIoWrapper] io the file to save in the repository, with mime_type and original_name
# @return [CharacterizeJob, FalseClass] spawned job on success, false on failure
# @note Instead of calling this method, use IngestJob to avoid synchronous execution cost
# @see IngestJob
# @todo create a job to monitor the temp directory (or in a multi-worker system, directories!) to prune old files that have made it into the repo
def ingest_file(io)
io.uploaded_file.update(file_set_uri: file_set.uri.to_s) if io.uploaded_file.file_set_uri.blank?
Hydra::Works::AddFileToFileSet.call(file_set,
alias_file(io, file_set),
relation,
versioning: false)
return false unless file_set.save
repository_file = related_file
create_version(repository_file, user)
set_characterization_data(file_set)
end
# Reverts file and spawns async job to characterize and create derivatives.
# @param [String] revision_id
# @return [CharacterizeJob, FalseClass] spawned job on success, false on failure
def revert_to(revision_id)
repository_file = related_file
repository_file.restore_version(revision_id)
return false unless file_set.save
create_version(repository_file, user)
CharacterizeJob.perform_later(file_set, repository_file.id)
end
# @note FileSet comparison is limited to IDs, but this should be sufficient, given that
# most operations here are on the other side of async retrieval in Jobs (based solely on ID).
def ==(other)
return false unless other.is_a?(self.class)
file_set.id == other.file_set.id && relation == other.relation && user == other.user
end
private
##
# Wraps the verisoning service with erro handling. if the service's
# create handler isn't implemented, we want to accept that quietly here.
def create_version(content, user)
Hyrax::VersioningService.create(content, user)
rescue NotImplementedError
:no_op
end
## using this method to set the characterization data instead of running the
# actual characterization service
def set_characterization_data(file_set)
s3_file_url = file_set.characterization_proxy.content
path = URI.parse(Addressable::URI.unencode(s3_file_url)).path
file_name = path.split('/')[-1]
uploaded_file_id = path.split('/')[-2]
uploaded_file = Hyrax::UploadedFile.find(uploaded_file_id)
file_set.characterization_proxy.file_name = file_name
file_set.characterization_proxy.original_name = file_name
file_set.characterization_proxy.mime_type = uploaded_file.characterization_data['content_type']
file_set.characterization_proxy.file_size = uploaded_file.characterization_data['file_size']
file_set.characterization_proxy.original_checksum = uploaded_file.characterization_data['original_checksum']
file_set.characterization_proxy.format_label = []
if file_set.image?
file_set.characterization_proxy.height = uploaded_file.characterization_data['height']
file_set.characterization_proxy.width = uploaded_file.characterization_data['width']
file_set.characterization_proxy.alpha_channels = channels(s3_file_url) if file_set.image?
end
move_s3_object_to_work_bucket(uploaded_file, file_set)
file_set.characterization_proxy.save!
file_set.label = file_set.characterization_proxy.original_name
file_set.save!
end
##
# @return [Hydra::PCDM::File] the file referenced by relation
def related_file
file_set.public_send(normalize_relation(relation)) || raise("No #{relation} returned for FileSet #{file_set.id}")
end
def normalize_relation(relation)
use_valkyrie ? normalize_relation_for_valkyrie(relation) : normalize_relation_for_active_fedora(relation)
end
def normalize_relation_for_active_fedora(relation)
return relation.to_sym if relation.respond_to? :to_sym
case relation
when Hyrax::FileMetadata::Use::ORIGINAL_FILE
:original_file
when Hyrax::FileMetadata::Use::EXTRACTED_TEXT
:extracted_file
when Hyrax::FileMetadata::Use::THUMBNAIL
:thumbnail_file
else
:original_file
end
end
##
# @return [RDF::URI]
def normalize_relation_for_valkyrie(relation)
return relation if relation.is_a?(RDF::URI)
Hyrax::FileMetadata::Use.uri_for(use: relation.to_sym)
rescue ArgumentError
Hyrax::FileMetadata::Use::ORIGINAL_FILE
end
def pathhint(io)
io.uploaded_file&.uploader&.path || io.path
end
def alias_file(io, file_set)
alias_folder_path = File.join(Rails.root, 'tmp/s3_alias_files', file_set.id)
FileUtils.mkdir_p(alias_folder_path)
alias_file = File.new("#{alias_folder_path}/#{io.original_name}", 'w+')
File.open(alias_file, 'w+') { |file| file.write(io.uploaded_file.file.url) }
alias_file
end
def move_s3_object_to_work_bucket(uploaded_file, file_set)
repository_file = file_set.characterization_proxy
s3 = S3StorageService.new
s3.init_client
source_object_key = uploaded_file.file.path
target_bucket_name = file_set.parent_works.first.id
target_bucket = Aws::S3::Bucket.new(target_bucket_name)
s3.create_bucket(target_bucket_name) unless s3.bucket_exists?(target_bucket_name)
target_object_key = "/#{repository_file.id}/#{file_set.title.first}"
s3.move_object(source_object_key, target_bucket_name, target_object_key, { uploaded_file_path: source_object_key })
target_object = target_bucket.object(target_object_key)
# change_repository_file_content(uploaded_file, repository_file, target_object)
end
def change_repository_file_content(uploaded_file, repository_file, target_object)
uploaded_file.file.file.file = target_object
repository_file.content = uploaded_file.file_url
end
def clear_metadata(file_set)
# The characterization of additional file versions adds new height/width/size/checksum values to un-orderable...
# `ActiveTriples::Relation` fields on `original_file`. Values from those are then randomly pulled into Solr...
# fields which may have scalar or vector cardinality. So for height/width you get two scalar values pulled from...
# "randomized parallel arrays". Upshot is to reset all of these before (re)characterization to stop the mayhem.
file_set.characterization_proxy.height = []
file_set.characterization_proxy.width = []
file_set.characterization_proxy.original_checksum = []
file_set.characterization_proxy.file_size = []
file_set.characterization_proxy.format_label = []
end
def channels(filepath)
ch = MiniMagick::Tool::Identify.new do |cmd|
cmd.format '%[channels]'
cmd << filepath
end
[ch]
end
end
end
end
# frozen_string_literal: true
module Hyrax
module Actors
# Actions are decoupled from controller logic so that they may be called from a controller or a background job.
class FileSetActor # rubocop:disable Metrics/ClassLength
include Lockable
attr_reader :file_set, :user, :attributes
def initialize(file_set, user)
@file_set = file_set
@user = user
end
# @!group Asynchronous Operations
# Spawns asynchronous IngestJob unless ingesting from URL
# Called from FileSetsController, AttachFilesToWorkJob, IngestLocalFileJob, ImportUrlJob
# @param [Hyrax::UploadedFile, File] file the file uploaded by the user
# @param [Symbol, #to_s] relation
# @return [IngestJob, FalseClass] false on failure, otherwise the queued job
def create_content(file, relation = :original_file, from_url: false)
# If the file set doesn't have a title or label assigned, set a default.
file_set.label ||= label_for(file)
file_set.title = [file_set.label] if file_set.title.blank?
@file_set = perform_save(file_set)
return false unless file_set
if from_url
# If ingesting from URL, don't spawn an IngestJob; instead
# reach into the FileActor and run the ingest with the file instance in
# hand. Do this because we don't have the underlying UploadedFile instance
file_actor = build_file_actor(relation)
file_actor.ingest_file(wrapper!(file: file, relation: relation))
parent = parent_for(file_set: file_set)
VisibilityCopyJob.perform_later(parent)
InheritPermissionsJob.perform_later(parent)
else
IngestJob.perform_later(wrapper!(file: file, relation: relation))
end
end
# Spawns asynchronous IngestJob with user notification afterward
# @param [Hyrax::UploadedFile, File, ActionDigest::HTTP::UploadedFile] file the file uploaded by the user
# @param [Symbol, #to_s] relation
# @return [IngestJob] the queued job
def update_content(file, relation = :original_file)
IngestJob.perform_later(wrapper!(file: file, relation: relation), notification: true)
end
# @!endgroup
# Adds the appropriate metadata, visibility and relationships to file_set
# @note In past versions of Hyrax this method did not perform a save because it is mainly used in conjunction with
# create_content, which also performs a save. However, due to the relationship between Hydra::PCDM objects,
# we have to save both the parent work and the file_set in order to record the "metadata" relationship between them.
# @param [Hash] file_set_params specifying the visibility, lease and/or embargo of the file set.
# Without visibility, embargo_release_date or lease_expiration_date, visibility will be copied from the parent.
def create_metadata(file_set_params = {})
file_set.depositor = depositor_id(user)
now = TimeService.time_in_utc
file_set.date_uploaded = now
file_set.date_modified = now
file_set.creator = [user.user_key]
if assign_visibility?(file_set_params)
env = Actors::Environment.new(file_set, ability, file_set_params)
CurationConcern.file_set_create_actor.create(env)
end
yield(file_set) if block_given?
end
# Locks to ensure that only one process is operating on the list at a time.
def attach_to_work(work, file_set_params = {})
acquire_lock_for(work.id) do
# Ensure we have an up-to-date copy of the members association, so that we append to the end of the list.
work.reload unless work.new_record?
file_set.visibility = work.visibility unless assign_visibility?(file_set_params)
work.ordered_members << file_set
work.representative = file_set if work.representative_id.blank?
work.thumbnail = file_set if work.thumbnail_id.blank?
# Save the work so the association between the work and the file_set is persisted (head_id)
# NOTE: the work may not be valid, in which case this save doesn't do anything.
work.save
Hyrax.config.callback.run(:after_create_fileset, file_set, user, warn: false)
end
end
# @param [String] revision_id the revision to revert to
# @param [Symbol, #to_sym] relation
# @return [Boolean] true on success, false otherwise
def revert_content(revision_id, relation = :original_file)
return false unless build_file_actor(relation).revert_to(revision_id)
Hyrax.config.callback.run(:after_revert_content, file_set, user, revision_id, warn: false)
true
end
def update_metadata(attributes)
env = Actors::Environment.new(file_set, ability, attributes)
CurationConcern.file_set_update_actor.update(env)
end
def destroy
unlink_from_work
file_set.destroy
Hyrax.config.callback.run(:after_destroy, file_set.id, user, warn: false)
end
class_attribute :file_actor_class
self.file_actor_class = Hyrax::Actors::FileActor
private
def ability
@ability ||= ::Ability.new(user)
end
# @param file_set [FileSet]
# @return [ActiveFedora::Base]
def parent_for(file_set:)
file_set.parent
end
def build_file_actor(relation)
file_actor_class.new(file_set, relation, user)
end
# uses create! because object must be persisted to serialize for jobs
def wrapper!(file:, relation:)
JobIoWrapper.create_with_varied_file_handling!(user: user, file: file, relation: relation, file_set: file_set)
end
# For the label, use the original_filename or original_name if it's there.
# If the file was imported via URL, parse the original filename.
# If all else fails, use the basename of the file where it sits.
# @note This is only useful for labeling the file_set, because of the recourse to import_url
def label_for(file)
if file.is_a?(Hyrax::UploadedFile) # filename not present for uncached remote file!
file.uploader.filename.presence || File.basename(URI.parse(Addressable::URI.unencode(file.file_url)).path)
elsif file.respond_to?(:original_name) # e.g. Hydra::Derivatives::IoDecorator
file.original_name
elsif file_set.import_url.present?
# This path is taken when file is a Tempfile (e.g. from ImportUrlJob)
File.basename(Addressable::URI.unencode(file.file_url))
elsif file.respond_to?(:original_filename) # e.g. Rack::Test::UploadedFile
file.original_filename
else
File.basename(file)
end
end
def assign_visibility?(file_set_params = {})
!((file_set_params || {}).keys.map(&:to_s) & %w[visibility embargo_release_date lease_expiration_date]).empty?
end
# replaces file_set.apply_depositor_metadata(user)from hydra-access-controls so depositor doesn't automatically get edit access
def depositor_id(depositor)
depositor.respond_to?(:user_key) ? depositor.user_key : depositor
end
# Must clear the fileset from the thumbnail_id, representative_id and rendering_ids fields on the work
# and force it to be re-solrized.
# Although ActiveFedora clears the children nodes it leaves those fields in Solr populated.
# rubocop:disable Metrics/CyclomaticComplexity
def unlink_from_work
work = parent_for(file_set: file_set)
return unless work && (work.thumbnail_id == file_set.id || work.representative_id == file_set.id || work.rendering_ids.include?(file_set.id))
work.thumbnail = nil if work.thumbnail_id == file_set.id
work.representative = nil if work.representative_id == file_set.id
work.rendering_ids -= [file_set.id]
work.save!
end
def perform_save(object)
object.save
object
end
end
end
end
\ No newline at end of file
......@@ -170,4 +170,12 @@ form .field-wrapper label[required="required"]::after {
.sub-collections-wrapper,
.works-wrapper {
margin-bottom: 10px;
}
.float-right {
float: right;
}
.work_description {
text-align: justify;
}
\ No newline at end of file
......@@ -19,41 +19,40 @@ module Hyrax
# Adding all fields in order of display in form
:parent_work_id,
:crc_work_type,
:doi,
:title,
:alternative_title,
:complex_person,
:abstract,
:coverage,
:description,
:complex_identifier,
:complex_date,
:modality,
:complex_subject,
:approval_number,
:keyword,
:crc_resource_type,
:experiment_title,
:experiment_description,
:modality,
:complex_subject,
:approval_number,
:complex_identifier,
:complex_date,
:subject,
:publisher,
:language,
:complex_funding_reference,
:extra_information,
:description,
:keyword,
:resource_type,
:doi,
:complex_relation,
:software_version,
:extra_information
]
self.required_fields -= [
# Fields not interested in
:creator, :rights_statement,
:creator, :rights_statement, :abstract, :keyword, :modality,
# Fields interested in, but removing to re-order
:title
]
self.required_fields += [
# Adding all required fields in order of display in form
:title, :complex_person, :abstract, :keyword, :crc_resource_type, :modality, :coverage, :license
:title, :complex_person, :crc_resource_type, :coverage, :license
]
protected
......
......@@ -20,4 +20,29 @@ module HyraxHelper
'experiment'
end
end
def s3_file_download_url_for_file_set(file_set_id)
file_set = FileSet.find(file_set_id)
bucket = Aws::S3::Resource.new(region: ENV['S3_REGION']).bucket(file_set.parent_works.first.id)
begin
latest_version_uri = file_set.latest_content_version.uri
file_key = "/#{file_set_id}/files/#{latest_version_uri.split("/")[-3]}/#{file_set.title.first}"
rescue
begin
latest_version_uri = file_set.files[0]
file_key = "/#{file_set_id}/files/#{latest_version_uri.split("/")[-1]}/#{file_set.title.first}"
rescue
return nil
end
end
url_options = {
expires_in: 60.minutes.seconds.to_i,
response_content_disposition: "attachment; filename=\"#{file_key}\""
}
object = bucket.object(file_key)
object.exists? ? object.presigned_url(:get, url_options).to_s : nil
end
end
......@@ -67,19 +67,11 @@ module ComplexField
# solr fields that will be treated as facets
fields = []
# change all dates to years
fields << Solrizer.solr_name('complex_year_accepted', :facetable)
fields << Solrizer.solr_name('complex_year_available', :facetable)
fields << Solrizer.solr_name('complex_year_copyrighted', :facetable)
fields << Solrizer.solr_name('complex_year_collected', :facetable)
fields << Solrizer.solr_name('complex_year_created', :facetable)
fields << Solrizer.solr_name('complex_year_issued', :facetable)
fields << Solrizer.solr_name('complex_year_published', :facetable)
fields << Solrizer.solr_name('complex_year_submitted', :facetable)
fields << Solrizer.solr_name('complex_year_updated', :facetable)
fields << Solrizer.solr_name('complex_year_valid', :facetable)
fields << Solrizer.solr_name('complex_year_processed', :facetable)
fields << Solrizer.solr_name('complex_year_purchased', :facetable)
fields << Solrizer.solr_name('complex_year_other', :facetable)
date_options = DateService.new.select_all_options
date_options.each do |d|
fields << Solrizer.solr_name("complex_date_#{d[0].downcase.tr(' ', '_')}", :dateable)
# fields << Solrizer.solr_name("complex_year_#{d[0].downcase.tr(' ', '_')}", :facetable)
end
fields
end
......@@ -94,19 +86,10 @@ module ComplexField
def self.date_show_fields
# solr fields that will be used to display results on the record page
fields = []
fields << Solrizer.solr_name('complex_date_accepted', :displayable)
fields << Solrizer.solr_name('complex_date_available', :displayable)
fields << Solrizer.solr_name('complex_date_copyrighted', :displayable)
fields << Solrizer.solr_name('complex_date_collected', :displayable)
fields << Solrizer.solr_name('complex_date_created', :displayable)
fields << Solrizer.solr_name('complex_date_issued', :displayable)
fields << Solrizer.solr_name('complex_date_published', :displayable)
fields << Solrizer.solr_name('complex_date_submitted', :displayable)
fields << Solrizer.solr_name('complex_date_updated', :displayable)
fields << Solrizer.solr_name('complex_date_valid', :displayable)
fields << Solrizer.solr_name('complex_date_processed', :displayable)
fields << Solrizer.solr_name('complex_date_purchased', :displayable)
fields << Solrizer.solr_name('complex_date_other', :displayable)
date_options = DateService.new.select_all_options
date_options.each do |d|
fields << Solrizer.solr_name("complex_date_#{d[0].downcase.tr(' ', '_')}", :displayable)
end
fields
end
end
......
......@@ -41,12 +41,6 @@ module ComplexField
solr_doc[fld_name] = [] unless solr_doc.include?(fld_name)
solr_doc[fld_name] << person_name
solr_doc[fld_name].flatten!
# identifier
fld_name = Solrizer.solr_name('complex_person_identifier', :symbol)
vals = c.orcid.reject(&:blank?)
solr_doc[fld_name] = [] unless solr_doc.include?(fld_name)
solr_doc[fld_name] << vals
solr_doc[fld_name] = solr_doc[fld_name].flatten.uniq
# affiliation
vals = c.affiliation.reject(&:blank?)
fld_name = Solrizer.solr_name('complex_person_affiliation', :stored_searchable)
......@@ -63,14 +57,10 @@ module ComplexField
def self.person_facet_fields
# solr fields that will be treated as facets
fields = []
fields << Solrizer.solr_name('complex_person_other', :facetable)
fields << Solrizer.solr_name('complex_person_author', :facetable)
fields << Solrizer.solr_name('complex_person_editor', :facetable)
fields << Solrizer.solr_name('complex_person_translator', :facetable)
fields << Solrizer.solr_name('complex_person_data_depositor', :facetable)
fields << Solrizer.solr_name('complex_person_data_curator', :facetable)
fields << Solrizer.solr_name('complex_person_operator', :facetable)
fields << Solrizer.solr_name('complex_person_contact_person', :facetable)
roles = RoleService.new.select_all_options
roles.each do |r|
fields << Solrizer.solr_name("complex_person_#{r[1].downcase.tr(' ', '_')}", :facetable)
end
fields << Solrizer.solr_name('complex_person_affiliation', :facetable)
fields
end
......
......@@ -20,6 +20,7 @@ class CrcDatasetIndexer < RdmsIndexer
def self.facet_fields
# solr fields that will be treated as facets
super.tap do |fields|
fields << Solrizer.solr_name('crc_work_type', :facetable)
fields << Solrizer.solr_name('modality', :facetable)
fields << Solrizer.solr_name('crc_resource_type', :facetable)
fields << Solrizer.solr_name('coverage', :facetable)
......@@ -34,6 +35,7 @@ class CrcDatasetIndexer < RdmsIndexer
def self.search_fields
# solr fields that will be used for a search
super.tap do |fields|
fields << Solrizer.solr_name('crc_work_type', :stored_searchable)
fields << Solrizer.solr_name('modality', :stored_searchable)
fields << Solrizer.solr_name('crc_resource_type', :stored_searchable)
fields << Solrizer.solr_name('coverage', :stored_searchable)
......@@ -48,6 +50,7 @@ class CrcDatasetIndexer < RdmsIndexer
def self.show_fields
# solr fields that will be used to display results on the record page
super.tap do |fields|
fields << Solrizer.solr_name('crc_work_type', :stored_searchable)
fields << Solrizer.solr_name('modality', :stored_searchable)
fields << Solrizer.solr_name('crc_resource_type', :stored_searchable)
fields << Solrizer.solr_name('coverage', :stored_searchable)
......
......@@ -17,54 +17,54 @@ protected
parent_attribute = name_for(attribute_name, index, '', parent)[0..-5]
# --- last_name
field = :last_name
# --- name
field = :name
field_name = name_for(attribute_name, index, field, parent)
field_id = id_for(attribute_name, index, field, parent)
field_value = value.send(field).first
out << "<div class='row'>"
out << " <div class='col-md-3'>"
out << template.label_tag(field_name, I18n.t('rdms.fields.last_name'), required: required)
out << template.label_tag(field_name, I18n.t('rdms.fields.full_name'), required: required)
out << ' </div>'
out << " <div class='col-md-9'>"
out << @builder.text_field(field_name,
options.merge(value: field_value, name: field_name, id: field_id, required: required, placeholder: "Alphabets"))
options.merge(value: field_value, name: field_name, id: field_id, required: required, placeholder: "SURNAME, Given Names"))
out << ' </div>'
out << '</div>' # row
# --- first_name
field = :first_name
# --- last_name
field = :last_name
field_name = name_for(attribute_name, index, field, parent)
field_id = id_for(attribute_name, index, field, parent)
field_value = value.send(field).first
out << "<div class='row'>"
out << " <div class='col-md-3'>"
out << template.label_tag(field_name, I18n.t('rdms.fields.first_name'), required: required)
out << template.label_tag(field_name, I18n.t('rdms.fields.last_name'), required: false)
out << ' </div>'
out << " <div class='col-md-9'>"
out << @builder.text_field(field_name,
options.merge(value: field_value, name: field_name, id: field_id, required: required, placeholder: "Alphabets"))
options.merge(value: field_value, name: field_name, id: field_id, required: false, placeholder: "Alphabets"))
out << ' </div>'
out << '</div>' # row
# --- name
field = :name
# --- first_name
field = :first_name
field_name = name_for(attribute_name, index, field, parent)
field_id = id_for(attribute_name, index, field, parent)
field_value = value.send(field).first
out << "<div class='row'>"
out << " <div class='col-md-3'>"
out << template.label_tag(field_name, I18n.t('rdms.fields.full_name'), required: required)
out << template.label_tag(field_name, I18n.t('rdms.fields.first_name'), required: false)
out << ' </div>'
out << " <div class='col-md-9'>"
out << @builder.text_field(field_name,
options.merge(value: field_value, name: field_name, id: field_id, required: required, placeholder: "SURNAME, Given Names"))
options.merge(value: field_value, name: field_name, id: field_id, required: false, placeholder: "Alphabets"))
out << ' </div>'
out << '</div>' # row
......@@ -94,12 +94,12 @@ protected
out << "<div class='row'>"
out << " <div class='col-md-3'>"
out << template.label_tag(field_name, field.to_s.humanize, required: required)
out << template.label_tag(field_name, field.to_s.humanize, required: false)
out << ' </div>'
out << " <div class='col-md-9'>"
out << @builder.text_field(field_name,
options.merge(value: field_value, name: field_name, id: field_id, required: required, placeholder: "https://orcid.org/0000-0000-0000-0000"))
options.merge(value: field_value, name: field_name, id: field_id, required: false, placeholder: "https://orcid.org/0000-0000-0000-0000"))
out << ' </div>'
out << '</div>' # row
......@@ -111,12 +111,12 @@ protected
out << "<div class='row'>"
out << " <div class='col-md-3'>"
out << template.label_tag(field_name, field.to_s.humanize, required: required)
out << template.label_tag(field_name, field.to_s.humanize, required: false)
out << ' </div>'
out << " <div class='col-md-9'>"
out << @builder.text_field(field_name,
options.merge(value: field_value, name: field_name, id: field_id, required: required, placeholder: "affiliation"))
options.merge(value: field_value, name: field_name, id: field_id, required: false, placeholder: "affiliation"))
out << ' </div>'
out << '</div>' # row
......
# frozen_string_literal: true
# Converts UploadedFiles into FileSets and attaches them to works.
class AttachFilesToWorkJob < Hyrax::ApplicationJob
queue_as Hyrax.config.ingest_queue_name
# @param [ActiveFedora::Base] work - the work object
# @param [Array<Hyrax::UploadedFile>] uploaded_files - an array of files to attach
def perform(work, uploaded_files, **work_attributes)
case work
when ActiveFedora::Base
perform_af(work, uploaded_files, work_attributes)
else
Hyrax::WorkUploadsHandler.new(work: work).add(files: uploaded_files).attach ||
raise("Could not complete AttachFilesToWorkJob. Some of these are probably in an undesirable state: #{uploaded_files}")
end
end
private
def perform_af(work, uploaded_files, work_attributes)
validate_files!(uploaded_files)
depositor = proxy_or_depositor(work)
user = User.find_by_user_key(depositor)
work, work_permissions = create_permissions work, depositor
uploaded_files.each do |uploaded_file|
next if uploaded_file.file_set_uri.present?
attach_work(user, work, work_attributes, work_permissions, uploaded_file)
end
end
def attach_work(user, work, work_attributes, work_permissions, uploaded_file)
actor = Hyrax::Actors::FileSetActor.new(FileSet.create, user)
file_set_attributes = file_set_attrs(work_attributes, uploaded_file)
metadata = visibility_attributes(work_attributes, file_set_attributes)
uploaded_file.add_file_set!(actor.file_set)
actor.file_set.permissions_attributes = work_permissions
actor.create_metadata(metadata)
actor.attach_to_work(work, metadata)
actor.create_content(uploaded_file)
end
def create_permissions(work, depositor)
work.edit_users += [depositor]
work.edit_users = work.edit_users.dup
work_permissions = work.permissions.map(&:to_hash)
[work, work_permissions]
end
# The attributes used for visibility - sent as initial params to created FileSets.
def visibility_attributes(attributes, file_set_attributes)
attributes.merge(file_set_attributes).slice(:visibility, :visibility_during_lease,
:visibility_after_lease, :lease_expiration_date,
:embargo_release_date, :visibility_during_embargo,
:visibility_after_embargo)
end
def file_set_attrs(attributes, uploaded_file)
attrs = Array(attributes[:file_set]).find { |fs| fs[:uploaded_file_id].present? && (fs[:uploaded_file_id].to_i == uploaded_file&.id) }
Hash(attrs).symbolize_keys
end
def validate_files!(uploaded_files)
uploaded_files.each do |uploaded_file|
next if uploaded_file.is_a? Hyrax::UploadedFile
raise ArgumentError, "Hyrax::UploadedFile required, but #{uploaded_file.class} received: #{uploaded_file.inspect}"
end
end
##
# A work with files attached by a proxy user will set the depositor as the intended user
# that the proxy was depositing on behalf of. See tickets #2764, #2902.
def proxy_or_depositor(work)
work.on_behalf_of.presence || work.depositor
end
end
\ No newline at end of file
# frozen_string_literal: true
class IngestJob < Hyrax::ApplicationJob
queue_as Hyrax.config.ingest_queue_name
after_perform do |job|
# We want the lastmost Hash, if any.
opts = job.arguments.reverse.detect { |x| x.is_a? Hash } || {}
wrapper = job.arguments.first
ContentNewVersionEventJob.perform_later(wrapper.file_set, wrapper.user) if opts[:notification]
end
# @param [JobIoWrapper] wrapper
# @param [Boolean] notification send the user a notification, used in after_perform callback
# @see 'config/initializers/hyrax_callbacks.rb'
# rubocop:disable Lint/UnusedMethodArgument
def perform(wrapper, notification: false)
wrapper.ingest_file
end
end
\ No newline at end of file
# frozen_string_literal: true
module Bulkrax
module FileFactory
extend ActiveSupport::Concern
# Find existing files or upload new files. This assumes a Work will have unique file titles;
# and that those file titles will not have changed
# could filter by URIs instead (slower).
# When an uploaded_file already exists we do not want to pass its id in `file_attributes`
# otherwise it gets reuploaded by `work_actor`.
# support multiple files; ensure attributes[:file] is an Array
def upload_ids
return [] if klass == Collection
attributes[:file] = file_paths
import_files
end
def file_attributes(update_files = false)
@update_files = update_files
hash = {}
return hash if klass == Collection
hash[:uploaded_files] = upload_ids if attributes[:file].present?
hash[:remote_files] = new_remote_files if new_remote_files.present?
hash
end
# Its possible to get just an array of strings here, so we need to make sure they are all hashes
def parsed_remote_files
return @parsed_remote_files if @parsed_remote_files.present?
@parsed_remote_files = attributes[:remote_files] || []
@parsed_remote_files = @parsed_remote_files.map do |file_value|
if file_value.is_a?(Hash)
file_value
elsif file_value.is_a?(String)
name = Bulkrax::Importer.safe_uri_filename(file_value)
{ url: file_value, file_name: name }
else
Rails.logger.error("skipped remote file #{file_value} because we do not recognize the type")
nil
end
end
@parsed_remote_files.delete(nil)
@parsed_remote_files
end
def new_remote_files
return if object.is_a? FileSet
@new_remote_files ||= if object.present? && object.file_sets.present?
parsed_remote_files.select do |file|
# is the url valid?
is_valid = file[:url]&.match(URI::ABS_URI)
# does the file already exist
is_existing = object.file_sets.detect { |f| f.import_url && f.import_url == file[:url] }
is_valid && !is_existing
end
else
parsed_remote_files.select do |file|
file[:url]&.match(URI::ABS_URI)
end
end
end
def file_paths
@file_paths ||= Array.wrap(attributes[:file])&.select { |file| File.exist?(file) }
end
# Retrieve the orginal filenames for the files to be imported
def work_files_filenames
object.file_sets.map { |fn| fn.original_file.file_name.to_a }.flatten if object.present? && object.file_sets.present?
end
# Retrieve the filenames for the files to be imported
def import_files_filenames
file_paths.map { |f| f.split('/').last }
end
# Called if #replace_files is true
# Destroy all file_sets for this object
# Reload the object to ensure the remaining methods have the most up to date object
def destroy_existing_files
return unless object.present? && object.file_sets.present?
object.file_sets.each do |fs|
Hyrax::Actors::FileSetActor.new(fs, @user).destroy
end
@object = object.reload
log_deleted_fs(object)
end
def set_removed_filesets
local_file_sets.each do |fileset|
fileset.files.first.create_version
opts = {}
opts[:path] = fileset.files.first.id.split('/', 2).last
opts[:original_name] = 'removed.png'
opts[:mime_type] = 'image/png'
fileset.add_file(File.open(Bulkrax.removed_image_path), opts)
fileset.save
::CreateDerivativesJob.set(wait: 1.minute).perform_later(fileset, fileset.files.first.id)
end
end
def local_file_sets
@local_file_sets ||= ordered_file_sets
end
def ordered_file_sets
# OVERRIDE Hyrda-works 1.2.0 - this method was deprecated in v1.0
object&.ordered_members.to_a.select(&:file_set?)
end
def import_files
paths = file_paths.map { |path| import_file(path) }.compact
set_removed_filesets if local_file_sets.present?
paths
end
def import_file(path)
u = Hyrax::UploadedFile.new
u.user_id = @user.id
carrierwave_file = CarrierWave::SanitizedFile.new(path)
carrierwave_file.content_type = Marcel::Magic.by_path(path).to_s # get correct content type on the basis of file data
u.file = carrierwave_file
update_filesets(u)
end
def update_filesets(current_file)
if @update_files && local_file_sets.present?
fileset = local_file_sets.shift
return nil if fileset.files.first.checksum.value == Digest::SHA1.file(current_file.file.path).to_s
fileset.files.first.create_version
opts = {}
opts[:path] = fileset.files.first.id.split('/', 2).last
opts[:original_name] = current_file.file.file.original_filename
opts[:mime_type] = current_file.file.content_type
fileset.add_file(File.open(current_file.file.to_s), opts)
fileset.save
::CreateDerivativesJob.set(wait: 1.minute).perform_later(fileset, fileset.files.first.id)
nil
else
current_file.save
current_file.id
end
end
end
end
\ No newline at end of file
......@@ -19,7 +19,6 @@ module Bulkrax::HasLocalProcessing
parsed_metadata['complex_date'] = complex_date
parsed_metadata['complex_subject'] = [complex_subject]
parsed_metadata['complex_funding_reference'] = [complex_funding_reference]
parsed_metadata['complex_relation'] = [complex_relation]
end
def map_multiple_subject
......@@ -27,7 +26,7 @@ module Bulkrax::HasLocalProcessing
end
def map_multiple_modality
parsed_metadata['modality'] = record['modality'].present? ? record['modality'].split(';') : []
parsed_metadata['modality'] = record['modality'].present? ? JSON.parse(record['modality']) : []
end
def collection_type
......@@ -41,6 +40,10 @@ module Bulkrax::HasLocalProcessing
next unless record[role.to_s].present?
full_names = record[role.to_s].split(';')
orcids = record.fetch("#{role}_orcid", '').split(';')
affiliations = record.fetch("#{role}_affilation", '').split(';')
# ignoring creator identifier scheme and contributor identifier scheme
_schemes = record.fetch("#{role}_scheme", '').split(';')
full_names.each_with_index do |full_name, index|
complex_person_object = ComplexPerson.new(RDF::Node.new, ActiveTriples::Resource.new)
......@@ -48,14 +51,8 @@ module Bulkrax::HasLocalProcessing
complex_person_object['last_name'], complex_person_object['first_name'] = full_name.split(',').map(&:strip)
complex_person_object['name'] = full_name
complex_person_object['role'] = role
%w[orcid affilation].each do |key|
value = record["#{role}_#{key}"]
next unless value.present? && value.split(';') == full_names.count
complex_person[key] = value.split(';')[index]
end
complex_person_object['orcid'] = orcids[index] if orcids.count == full_names.count
complex_person_object['affiliation'] = affiliations[index] if affiliations.count == full_names.count
complex_persons << complex_person_object
end
......@@ -67,18 +64,13 @@ module Bulkrax::HasLocalProcessing
def complex_identifier
complex_identifiers = []
unless record['identifier'].present? &&
record['scheme'].present? &&
record['identifier'].split(';') == record['scheme'].split(';')
return complex_identifiers
end
return complex_identifiers unless record['group_identifier'].present?
record['identifier'].split(';').each_with_index do |_id, index|
complex_identifier_object = ComplexPerson.new(RDF::Node.new, ActiveTriples::Resource.new)
record['group_identifier'].split(';').each do |id|
complex_identifier_object = ComplexIdentifier.new(RDF::Node.new, ActiveTriples::Resource.new)
%w[scheme identifier].each do |attribute|
complex_identifier_object[attribute] = record[attribute.to_s].split(';')[index]
end
complex_identifier_object['scheme'] = 'group id'
complex_identifier_object['identifier'] = id
complex_identifiers << complex_identifier_object
end
......@@ -89,18 +81,31 @@ module Bulkrax::HasLocalProcessing
def complex_date
complex_dates = []
unless record['date_description'].present? &&
record['date'].present? &&
record['date_description'].split(';') == record['date'].split(';')
return complex_dates
# record_date
record_dates = record.fetch('record_date', '').split(';')
record_dates.each do |record_date|
complex_date_object = ComplexDate.new(RDF::Node.new, ActiveTriples::Resource.new)
complex_date_object['description'] = 'Recorded'
complex_date_object['date'] = record_date
complex_dates << complex_date_object
end
record['date_description'].split(';').each_with_index do |_date_type, index|
# Datacite date and date type
date_types = record.fetch('date_description', '').split(';')
dates = record.fetch('date', '').split(';')
return complex_dates unless date_types.count == dates.count
date_types.each_with_index do |date_type, index|
typ = DateService.new.find_by_label(date_type)
next unless typ.present?
complex_date_object = ComplexDate.new(RDF::Node.new, ActiveTriples::Resource.new)
%w[date_description date].each do |attribute|
complex_date_object[attribute] = record[attribute.to_s].split(';')[index]
end
complex_date_object['description'] = typ['id']
complex_date_object['date'] = dates[index]
complex_dates << complex_date_object
end
......@@ -111,7 +116,7 @@ module Bulkrax::HasLocalProcessing
def complex_subject
complex_subject_object = ComplexSubject.new(RDF::Node.new, ActiveTriples::Resource.new)
%w[subject_type subject_species subject_type subject_sex subject_sex subject_age].each do |attribute|
%w[subject_identifier subject_species subject_type subject_sex subject_age].each do |attribute|
complex_subject_object[attribute] = record[attribute]
end
......@@ -128,13 +133,4 @@ module Bulkrax::HasLocalProcessing
complex_funding_reference_object
end
def complex_relation
complex_relation_object = ComplexRelation.new(RDF::Node.new, ActiveTriples::Resource.new)
%w[title url relationship].each do |attribute|
complex_relation_object[attribute] = record[attribute]
end
complex_relation_object
end
end
......@@ -10,6 +10,8 @@ class CrcDataset < ActiveFedora::Base
# self.valid_child_concerns = []
validates :title, presence: { message: 'Your CRC dataset must have a title.' }
after_save :save_work_meta_json_file_to_s3
# ------ properties from core metadata ------
# property date_modified - not displayed (filled in by the system)
# property date_uploaded - not displayed (filled in by the system)
......@@ -76,7 +78,7 @@ class CrcDataset < ActiveFedora::Base
end
property :crc_work_type, predicate: ::RDF::Vocab::Rdms.workType, multiple: false do |index|
index.as :stored_searchable
index.as :stored_searchable, :facetable
end
property :crc_resource_type, predicate: ::RDF::Vocab::Rdms.crcResourceType, multiple: false do |index|
......@@ -124,4 +126,15 @@ class CrcDataset < ActiveFedora::Base
accepts_nested_attributes_for :complex_funding_reference, reject_if: :fundref_blank, allow_destroy: true
accepts_nested_attributes_for :complex_relation, reject_if: :relation_blank, allow_destroy: true
accepts_nested_attributes_for :complex_subject, reject_if: :all_blank, allow_destroy: true
private
def save_work_meta_json_file_to_s3
s3 = S3StorageService.new
s3.init_client
s3.create_bucket(id) unless s3.bucket_exists?(id)
s3.add_content(id, 'metadata.json', to_json)
end
end
# frozen_string_literal: true
module Hyrax
##
# Store a file uploaded by a user.
#
# Eventually these files get attached to {FileSet}s and pushed into Fedora.
class UploadedFile < ActiveRecord::Base
self.table_name = 'uploaded_files'
mount_uploader :file, UploadedFileUploader
alias uploader file
has_many :job_io_wrappers,
inverse_of: 'uploaded_file',
class_name: 'JobIoWrapper',
dependent: :destroy
belongs_to :user, class_name: '::User'
before_create :set_characterization_data
##
# Associate a {FileSet} with this uploaded file.
#
# @param [Hyrax::Resource, ActiveFedora::Base] file_set
# @return [void]
def add_file_set!(file_set)
uri = case file_set
when ActiveFedora::Base
file_set.uri
when Hyrax::Resource
file_set.id
end
update!(file_set_uri: uri)
end
private
def set_characterization_data
if file.file.content_type.include?('image')
image = MiniMagick::Image.open(file.path)
self.characterization_data[:height] = image[:height]
self.characterization_data[:width] = image[:width]
end
self.characterization_data[:content_type] = Marcel::Magic.by_path(file.path).to_s
self.characterization_data[:file_size] = file.size
self.characterization_data[:original_checksum] = Digest::SHA1.file(file.path).to_s
end
end
end
\ No newline at end of file