diff --git a/hyrax/app/models/complex_modality.rb b/hyrax/app/models/complex_modality.rb index 69ced79beab6a8f701e3104b747d95bd7681e77f..b42f8c10e31368718d2ad4ceebb81a6308e5ee00 100644 --- a/hyrax/app/models/complex_modality.rb +++ b/hyrax/app/models/complex_modality.rb @@ -18,7 +18,7 @@ class ComplexModality < ActiveRecord::Base validates :s3_folder_name, uniqueness: { scope: :parent_source_identifier, message: ->(object, data) { I18n.t('rdms.errors.complex_modality.title.uniqueness', value: object.s3_folder_name) } }, unless: -> { self.is_imported } before_validation :set_s3_folder_name - after_save :save_work_meta_json_file_to_s3, :update_crc_dataset_date_modified, :relocate_files_on_folder_name_change, :handle_meta_file_on_s3 + after_save :save_metadata_as_json_in_s3, :update_crc_dataset_date_modified, :relocate_files_on_folder_name_change, :save_meta_json_in_s3 after_commit :fix_duplication_of_title, on: :create delegate :crc_dataset, to: :complex_session @@ -43,7 +43,7 @@ class ComplexModality < ActiveRecord::Base self.s3_folder_name = sanitize_title(modality_title) end - def save_work_meta_json_file_to_s3 + def save_metadata_as_json_in_s3 s3 = S3StorageService.new s3.init_client bucket_name = s3.sanitise_name(crc_dataset.id) diff --git a/hyrax/app/models/complex_session.rb b/hyrax/app/models/complex_session.rb index 964529d05eef054260b614014a8f7d0ec82cfa3e..abb6167d381a5c85c6cb2be034b4d9ee4c05f441 100644 --- a/hyrax/app/models/complex_session.rb +++ b/hyrax/app/models/complex_session.rb @@ -20,7 +20,7 @@ class ComplexSession < ActiveRecord::Base validates :s3_folder_name, uniqueness: { scope: :parent_source_identifier, message: ->(object, data) { I18n.t('rdms.errors.complex_session.title.uniqueness', value: object.s3_folder_name) } }, unless: -> { self.is_imported } before_validation :set_s3_folder_name - after_save :save_work_meta_json_file_to_s3, :update_crc_dataset_date_modified, :relocate_files_on_folder_name_change, :handle_meta_file_on_s3 + after_save :save_metadata_as_json_in_s3, :update_crc_dataset_date_modified, :relocate_files_on_folder_name_change, :save_meta_json_in_s3 after_commit :fix_duplication_of_title, on: :create delegate :crc_dataset, to: :complex_subject @@ -44,7 +44,7 @@ class ComplexSession < ActiveRecord::Base self.s3_folder_name = sanitize_title(session_title) end - def save_work_meta_json_file_to_s3 + def save_metadata_as_json_in_s3 s3 = S3StorageService.new s3.init_client bucket_name = s3.sanitise_name(crc_dataset.id) diff --git a/hyrax/app/models/complex_subject.rb b/hyrax/app/models/complex_subject.rb index ba4b817759c826ffef07069a77913783ae44bff3..268db968726c7d4e88d74a72a541927c4891bef3 100644 --- a/hyrax/app/models/complex_subject.rb +++ b/hyrax/app/models/complex_subject.rb @@ -19,7 +19,7 @@ class ComplexSubject < ActiveRecord::Base validates :s3_folder_name, uniqueness: { scope: :parent_source_identifier, message: ->(object, data) { I18n.t('rdms.errors.complex_subject.title.uniqueness', value: object.s3_folder_name) } }, unless: -> { self.is_imported } before_validation :set_s3_folder_name - after_save :save_work_meta_json_file_to_s3, :update_crc_dataset_date_modified, :relocate_files_on_folder_name_change, :handle_meta_file_on_s3 + after_save :save_metadata_as_json_in_s3, :update_crc_dataset_date_modified, :relocate_files_on_folder_name_change, :save_meta_json_in_s3 after_commit :fix_duplication_of_title, on: :create scope :sort_by_title, ->(source_identifier) { @@ -45,7 +45,7 @@ class ComplexSubject < ActiveRecord::Base self.s3_folder_name = sanitize_title(subject_title) end - def save_work_meta_json_file_to_s3 + def save_metadata_as_json_in_s3 s3 = S3StorageService.new s3.init_client bucket_name = s3.sanitise_name(crc_dataset.id) diff --git a/hyrax/app/models/concerns/external_services.rb b/hyrax/app/models/concerns/external_services.rb index 739c806ebfae9212c732d108045422f5cf5c4f97..000728177ae60345cd293fa762fb2ae007cb8fcc 100644 --- a/hyrax/app/models/concerns/external_services.rb +++ b/hyrax/app/models/concerns/external_services.rb @@ -40,7 +40,7 @@ module ExternalServices [_list_of_objects.count, size] end - def save_work_meta_json_file_to_s3 + def save_metadata_as_json_in_s3 s3 = S3StorageService.new s3.init_client bucket_name = s3.sanitise_name(id) diff --git a/hyrax/app/models/concerns/s3_file_handleable.rb b/hyrax/app/models/concerns/s3_file_handleable.rb index a31f3b569502889a9a980786407d866b738ddb86..5fe30f2df8b12074a6cd97da3aae6b7d95047a40 100644 --- a/hyrax/app/models/concerns/s3_file_handleable.rb +++ b/hyrax/app/models/concerns/s3_file_handleable.rb @@ -1,4 +1,5 @@ # app/models/concerns/file_relocation_concern.rb +require 'json' module S3FileHandleable extend ActiveSupport::Concern @@ -10,7 +11,7 @@ module S3FileHandleable Hyrax::HandleS3FolderNameJob.perform_later(self.class.name, self.id, name_changes) end - def handle_meta_file_on_s3 + def save_meta_json_in_s3 s3 = S3StorageService.new s3.init_client @@ -32,94 +33,286 @@ module S3FileHandleable def prepare_json(crc_dataset, subject, session, modality) @meta = {} - data_from_crc_dataset(crc_dataset) data_from_complex_subject(subject) data_from_complex_session(session) data_from_complex_modality(modality) - - @meta.to_json + JSON.pretty_generate(@meta) end def data_from_complex_subject(subject) - @meta["Subject ID"] = subject.id - @meta["Subject Species"] = subject.subject_species - @meta["Subject type"] = subject.subject_type - @meta["Subject sex"] = subject.subject_sex - @meta["Subject age"] = subject.subject_age + @meta["Subject ID"] = "" + @meta["Subject ID"] = subject.subject_title.strip if subject.subject_title.present? + + @meta["Subject Species"] = "" + @meta["Subject Species"] = subject.subject_species.strip if subject.subject_species.present? + + @meta["Subject type"] = "" + @meta["Subject type"] = subject.subject_type.strip if subject.subject_type.present? + + @meta["Subject sex"] = "" + @meta["Subject sex"] = subject.subject_sex.strip if subject.subject_sex.present? + + @meta["Subject age"] = "" + @meta["Subject age"] = subject.subject_age.to_s.strip if subject.subject_age.present? end def data_from_complex_session(session) - # TODO Date recorded curruntly fetch from ComplexDate we need to confirm is this correct? + # date from complex session will overwrite date from complex date + if session.present? and session.session_date_recorded.present? + @meta["Record date"] = session.session_date_recorded + @meta["DataCite-Date"] = session.session_date_recorded + @meta["DataCite-dateType"] = "Recorded" + @meta["DublinCore-Date"] = session.session_date_recorded + end end def data_from_complex_modality(modality) - @meta["Modality"] = modality.modality + @meta["Modality"] = [] + @meta["Modality"] << modality.modality if modality.present? and modality.modality.present? end def data_from_crc_dataset(crc_dataset) @meta["DublinCore-Title"] = crc_dataset.title[0] @meta["Experiment title"] = crc_dataset.title[0] @meta["DataCite-Title"] = crc_dataset.title[0] - @meta["DublinCore-Language"] = crc_dataset.language[0] - @meta["DataCite-Language"] = crc_dataset.language[0] - @meta["Resource Type"] = crc_dataset.resource_type[0] - @meta["Experiment Description"] = crc_dataset.experiment_description[0] - @meta["Group ID"] = "" # Complex Identifier removed - @meta["Animal|Ethics approval No."] = crc_dataset.approval_number - @meta["DublinCore-Subject"] = crc_dataset.subject.join("; ") - @meta["DataCite-Subject"] = crc_dataset.subject.join("; ") - @meta["DublinCore-Publisher"] = crc_dataset.publisher[0] - @meta["DataCite-Publisher"] = crc_dataset.publisher[0] - @meta["DublinCore-Coverage"] = crc_dataset.coverage - @meta["Software version"] = "" # TODO we don't have for now - @meta["Extra information"] = crc_dataset.extra_information[0] + @meta["DublinCore-Language"] = "" + @meta["DataCite-Language"] = "" + if crc_dataset.language.any? and crc_dataset.language.first.present? + @meta["DublinCore-Language"] = crc_dataset.language[0].strip + @meta["DataCite-Language"] = crc_dataset.language[0].strip + end + + if crc_dataset.crc_resource_type.present? + @meta["Resource Type"] = crc_dataset.crc_resource_type.strip + end + + @meta["DataCite-Description"] = "" + @meta["DublinCore-Description"] = "" + @meta["DataCite-descriptionType"] = "Abstract" + if crc_dataset.experiment_description.any? and crc_dataset.experiment_description.first.present? + @meta["DataCite-Description"] = crc_dataset.experiment_description[0].strip + @meta["DublinCore-Description"] = crc_dataset.experiment_description[0].strip + end + + @meta["Group ID"] = "" + if crc_dataset.parent_collections.any? and + crc_dataset.parent_collections.first.present? and + crc_dataset.parent_collections.first.title.present? + @meta["Group ID"] = crc_dataset.parent_collections.first.title[0].strip + end + @meta["SharedWith"] = "" + + @meta["Animal|Ethics approval No."] = "" + if crc_dataset.approval_number.present? + @meta["Animal|Ethics approval No."] = crc_dataset.approval_number.to_s.strip + end + + @meta["DublinCore-Subject"] = "" + @meta["DataCite-Subject"] = "" + if crc_dataset.keyword.any? + keywords = crc_dataset.keyword.reject(&:empty?).map { |s| s.strip }.reject(&:empty?) + @meta["DublinCore-Subject"] = keywords.join("; ") if keywords.any? + @meta["DataCite-Subject"] = keywords.join("; ") if keywords.any? + end + + @meta["DublinCore-Publisher"] = "" + @meta["DataCite-Publisher"] = "" + if crc_dataset.publisher.any? and crc_dataset.publisher.first.present? + @meta["DublinCore-Publisher"] = crc_dataset.publisher[0].strip + @meta["DataCite-Publisher"] = crc_dataset.publisher[0].strip + end + + @meta["DublinCore-Coverage"] = "" + if crc_dataset.coverage.present? + @meta["DublinCore-Coverage"] = crc_dataset.coverage.strip + end + + @meta["Software version"] = "1.0" + + @meta["Extra information"] = "" + if crc_dataset.extra_information.any? and crc_dataset.extra_information.first.present? + @meta["Extra information"] = crc_dataset.extra_information[0].strip + end + + @meta["DataCite-rights"] = "" + @meta["DataCite-rightsURI"] = "" + if crc_dataset.license.any? and crc_dataset.license.first.present? + l= + @meta["DataCite-rights"] = LicenseService.new.label(crc_dataset.license.first) + @meta["DataCite-rightsURI"] = crc_dataset.license.first + end + + # Map ARK and DOI + @meta["DataCite-alternateIdentifier"] = "" + @meta["DataCite-alternateIdentifierType"] = "" + @meta["DataCite-Identifier"] = "" + @meta["DataCite-IdentifierType"] = "" + crc_dataset.complex_identifier.each do |idf| + if idf.scheme.any? and idf.scheme[0] == "ARK" and + idf.identifier.any? and idf.identifier[0].present? + @meta["DataCite-alternateIdentifier"] = idf.identifier[0].strip + @meta["DataCite-alternateIdentifierType"] = "ARK" + elsif idf.scheme.any? and idf.scheme[0] == "DOI" and + idf.identifier.any? and idf.identifier[0].present? + @meta["DataCite-Identifier"] = idf.identifier[0].strip + @meta["DataCite-IdentifierType"] = "DOI" + end + end data_from_complex_person(crc_dataset.complex_person) data_from_complex_date(crc_dataset.complex_date) data_from_complex_funding_reference(crc_dataset.complex_funding_reference) + data_from_complex_relation(crc_dataset.complex_relation) end def data_from_complex_person(complex_person) - return unless complex_person.any? + complex_creators, complex_contributors = group_people_by_role(complex_person) + # Creators + creators = get_people_info(complex_creators) + # names, firstnames, lastnames, affiliations, ids, roles + @meta["Creator"] = creators[:names] + @meta["DublinCore-Creator"] = creators[:names] + @meta["DataCite-creatorName"] = creators[:names] + @meta["DataCite-creatorNameType"] = "personal" if creators[:names].present? + @meta["DataCite-creatorFamilyName"] = creators[:lastnames] + @meta["DataCite-creatorGivenName"] = creators[:firstnames] + @meta["DataCite-creatorAffiliation"] = creators[:affiliations] + @meta["DataCite-creatorNameIdentifier"] = "" + @meta["DataCite-creatorNameIdentifierScheme"] = "" + @meta["DataCite-creatorSchemeURI"] = "" + if creators[:ids].present? + @meta["DataCite-creatorNameIdentifier"] = creators[:ids] + @meta["DataCite-creatorNameIdentifierScheme"] = "Orcid" + @meta["DataCite-creatorSchemeURI"] = "https://orcid.org" + end - complex_person = complex_person.group_by{ |p| p.role[0] } + # Contributors + contributors = get_people_info(complex_contributors) + # names, firstnames, lastnames, affiliations, ids, roles + @meta["Contributor"] = contributors[:names] + @meta["DublinCore-Contributor"] = contributors[:names] + @meta["DataCite-contributorName"] = contributors[:names] + @meta["DataCite-contributorNameType"] = "personal" if contributors[:names].present? + @meta["DataCite-contributorFamilyName"] = contributors[:lastnames] + @meta["DataCite-contributorGivenName"] = contributors[:firstnames] + @meta["DataCite-contributorAffiliation"] = contributors[:affiliations] + @meta["DataCite-contributorType"] = contributors[:roles] + @meta["DataCite-contributorNameIdentifier"] = "" + @meta["DataCite-contributorNameIdentifierScheme"] = "" + @meta["DataCite-contributorSchemeURI"] = "" + if contributors[:ids].present? + @meta["DataCite-contributorNameIdentifier"] = contributors[:ids] + @meta["DataCite-contributorNameIdentifierScheme"] = "Orcid" + @meta["DataCite-contributorSchemeURI"] = "https://orcid.org" + end + end - unless complex_person["creator"].nil? - @meta["Creator"] = complex_person["creator"].map{ |creator| "#{creator.first_name[0]}, #{creator.last_name[0]}" }.compact.join("; ") - @meta["DataCite-creatorName"] = @meta["Creator"] - @meta["DublinCore-Creator"] = @meta["Creator"] - @meta["DataCite-creatorFamilyName"] = complex_person["creator"].map{ |creator| creator.first_name[0] }.compact.join("; ") - @meta["DataCite-creatorGivenName"] = complex_person["creator"].map{ |creator| creator.last_name[0] }.compact.join("; ") - @meta["DataCite-creatorAffiliation"] = complex_person["creator"].map{ |creator| creator.affiliation[0] }.compact.join("; ") - @meta["DataCite-creatorNameIdentifier"] = complex_person["creator"].map{ |creator| creator.orcid[0] }.compact.join("; ") + def group_people_by_role(complex_person) + creators = [] + contributors = [] + complex_person.each do |person| + role = person.role[0].strip if person.role.any? and person.role.first.present? + if role == "creator" + creators << person + else + contributors << person + end end + return creators, contributors + end - unless complex_person["contributor"].nil? - @meta["contributor"] = complex_person["contributor"].map{ |contributor| "#{contributor.first_name[0]}, #{contributor.last_name[0]}" }.compact.join("; ") - @meta["DataCite-contributorName"] = @meta["contributor"] - @meta["DublinCore-Contributor"] = @meta["contributor"] - @meta["DataCite-contributorFamilyName"] = complex_person["contributor"].map{ |contributor| contributor.first_name[0] }.compact.join("; ") - @meta["DataCite-contributorGivenName"] = complex_person["contributor"].map{ |contributor| contributor.last_name[0] }.compact.join("; ") - @meta["DataCite-contirbutorAffiliation"] = complex_person["contributor"].map{ |contributor| contributor.affiliation[0] }.compact.join("; ") - @meta["DataCite-contributorNameIdentifier"] = complex_person["contributor"].map{ |contributor| contributor.orcid[0] }.compact.join("; ") + def get_people_info(complex_people) + names = [] + firstnames = [] + lastnames = [] + affiliations = [] + ids = [] + roles = [] + complex_people.each do |person| + name = [] + if person.last_name.any? and person.last_name.first.present? + name << person.last_name[0].strip + lastnames << person.last_name[0].strip + end + if person.first_name.any? and person.first_name.first.present? + name << person.first_name[0].strip + firstnames << person.first_name[0].strip + end + names << name.join(', ') if name.any? + if person.affiliation.any? and person.affiliation.first.present? + affiliations << person.affiliation[0].strip + end + if person.orcid.any? and person.orcid.first.present? + ids << person.orcid[0].strip + end + if person.role.any? and person.role.first.present? + roles << person.role[0].strip + end end + people = { + names: names.any? ? names.join("; ") : "", + firstnames: firstnames.any? ? firstnames.join("; ") : "", + lastnames: lastnames.any? ? lastnames.join("; ") : "", + affiliations: affiliations.any? ? affiliations.join("; ") : "", + ids: ids.any? ? ids.join("; ") : "", + roles: roles.any? ? roles.join("; ") : "" + } + people end def data_from_complex_funding_reference(complex_funding_reference) - return unless complex_funding_reference.any? - - @meta["DataCite-FundingReference"] = complex_funding_reference.map{ |funding| funding.funder_name[0] }.compact.join("; ") - @meta["DataCite-funderIdentifier"] = complex_funding_reference.map{ |funding| funding.funder_identifier[0] }.compact.join("; ") + @meta["DataCite-FundingReference"] = "" + @meta["DataCite-funderIdentifier"] = "" + names = [] + ids = [] + complex_funding_reference.each do |funder| + if funder.funder_name.any? and funder.funder_name.first.present? + names << funder.funder_name[0].strip + end + if funder.funder_identifier.any? and funder.funder_identifier.first.present? + names << funder.funder_identifier[0].strip + end + end + @meta["DataCite-FundingReference"] = names.join("; ") if names.any? + @meta["DataCite-funderIdentifier"] = ids.join("; ") if ids.any? end def data_from_complex_date(complex_dates) - return unless complex_dates.any? + @meta["Record date"] = "" + @meta["DataCite-Date"] = "" + @meta["DataCite-dateType"] = "" + @meta["DublinCore-Date"] = "" + complex_dates.each do |complex_date| + if complex_date.description.any? and complex_date.description[0] == "Recorded" and + complex_date.date.any? and complex_date.date.first.present? + @meta["Record date"] = complex_date.date[0] + @meta["DataCite-Date"] = complex_date.date[0] + @meta["DataCite-dateType"] = "Recorded" + @meta["DublinCore-Date"] = complex_date.date[0] + end + end + end - @meta["Record date"] = complex_dates.map{ |complex_date| complex_date.date[0] if complex_date.description[0] == "Recorded" }.compact.join("; ") - @meta["DataCite-dateType"] = complex_dates.map{ |complex_date| complex_date.description[0] if complex_date.description[0] != "Recorded" }.compact.join("; ") - @meta["DataCite-Date"] = complex_dates.map{ |complex_date| complex_date.date[0] if complex_date.description[0] != "Recorded" }.compact.join("; ") - @meta["DublinCore-Date"] = @meta["DataCite-Date"] + def data_from_complex_relation(complex_relations) + @meta["DublinCore-Relation"] = "" + @meta["DataCite-RelatedIdentifier"] = "" + @meta["DataCite-relatedIdentifierType"] = "" + @meta["DataCite-relationType"] = "" + titles = [] + identifiers = [] + relations = [] + complex_relations.each do |complex_relation| + titles << complex_relation.title[0].strip if complex_relation.title.any? + identifiers << complex_relation.url[0].strip if complex_relation.url.any? + relations << complex_relation.relationship[0].strip if complex_relation.relationship.any? + end + @meta["DublinCore-Relation"] = titles.join("; ") if titles.any? + if identifiers.any? + @meta["DataCite-RelatedIdentifier"] = identifiers.join("; ") + @meta["DataCite-relatedIdentifierType"] = "URL" + end + @meta["DataCite-relationType"] = relations.join("; ") if relations.any? end end diff --git a/hyrax/app/models/crc_dataset.rb b/hyrax/app/models/crc_dataset.rb index 982b8b5ea0e6053b49fd684e09633fdf895d35cf..8d3eb686348765494cb16ff37f82b492da4ba0bc 100755 --- a/hyrax/app/models/crc_dataset.rb +++ b/hyrax/app/models/crc_dataset.rb @@ -12,7 +12,7 @@ class CrcDataset < ActiveFedora::Base # self.valid_child_concerns = [] validates :title, presence: { message: 'Your CRC dataset must have a title.' } validate :validate_parent_collection - after_save :save_work_meta_json_file_to_s3, :handle_meta_file_on_s3 + after_save :save_metadata_as_json_in_s3, :save_meta_json_in_s3 after_create :set_default_source_and_tombstone_status, :set_default_values, :register_ark # ------ properties from core metadata ------ diff --git a/hyrax/app/models/dataset.rb b/hyrax/app/models/dataset.rb index aec1954cdcd0907ec008812081982c54f55b2c7d..f7db7d1b7ec7da745f03df4caa5883828ed906a4 100755 --- a/hyrax/app/models/dataset.rb +++ b/hyrax/app/models/dataset.rb @@ -10,7 +10,7 @@ class Dataset < ActiveFedora::Base self.valid_child_concerns = [Dataset] validates :title, presence: { message: 'Your dataset must have a title.' } - after_save :save_work_meta_json_file_to_s3 + after_save :save_metadata_as_json_in_s3 after_create :set_default_tombstone_status after_create :register_ark validate :validate_parent_collection diff --git a/hyrax/lib/tasks/fix_meta_json_in_s3.rake b/hyrax/lib/tasks/fix_meta_json_in_s3.rake new file mode 100644 index 0000000000000000000000000000000000000000..b00f0a77175d54751f5fef95c45d2b3da2f3a1ea --- /dev/null +++ b/hyrax/lib/tasks/fix_meta_json_in_s3.rake @@ -0,0 +1,15 @@ +# frozen_string_literal: true +namespace :rdms do + desc "Fix meta.json in S3 for all experiments. usage: rdms:fix_meta_json_in_s3" + task fix_meta_json_in_s3: :environment do + CrcDataset.all.each do |w| + # save meta.json for modalities + puts "Saving meta.json for CrcDataset #{w.id}" + begin + w.save_meta_json_in_s3 + rescue + puts "error saving metadata for #{work_type} #{w.id}" + end + end + end +end diff --git a/hyrax/lib/tasks/fix_metadata_in_s3.rake b/hyrax/lib/tasks/fix_metadata_in_s3.rake index 2edac68203cc09d352c0ecc40343a2b9371fe75e..956286a9ae60d15b4b6b12d2ccbee8068803a1e0 100644 --- a/hyrax/lib/tasks/fix_metadata_in_s3.rake +++ b/hyrax/lib/tasks/fix_metadata_in_s3.rake @@ -10,13 +10,13 @@ namespace :rdms do case work_type when 'Dataset', 'CrcDataset' begin - w.save_work_meta_json_file_to_s3 + w.save_metadata_as_json_in_s3 rescue puts "error saving metadata for #{work_type} #{w.id}" end else begin - w.send(:save_work_meta_json_file_to_s3) + w.send(:save_metadata_as_json_in_s3) rescue puts "error saving metadata for #{work_type} #{w.id}" end @@ -25,7 +25,7 @@ namespace :rdms do # save meta.json for modalities puts "Saving meta.json for #{work_type} #{w.id}" begin - w.handle_meta_file_on_s3 + w.save_meta_json_in_s3 rescue puts "error saving metadata for #{work_type} #{w.id}" end