From 9d07352c9428dbbac6dde741617b58a30c2c99b3 Mon Sep 17 00:00:00 2001 From: Gyan Gupta <gyan@cottagelabs.com> Date: Thu, 5 Dec 2024 19:05:11 +0100 Subject: [PATCH] Add size and formats for download --- hyrax/app/actors/hyrax/actors/file_actor.rb | 5 +-- hyrax/app/assets/stylesheets/rdms.scss | 5 +++ .../controllers/download_all_controller.rb | 2 ++ hyrax/app/forms/hyrax/crc_dataset_form.rb | 2 -- hyrax/app/helpers/download_helper.rb | 19 ++++++++-- hyrax/app/helpers/hyrax_helper.rb | 2 +- hyrax/app/models/complex_modality.rb | 7 ++-- hyrax/app/models/complex_session.rb | 6 ++-- hyrax/app/models/complex_subject.rb | 7 ++-- .../app/models/concerns/external_services.rb | 15 ++++++++ .../app/models/concerns/s3_file_handleable.rb | 11 ++++++ hyrax/app/models/crc_dataset.rb | 3 ++ hyrax/app/models/dataset.rb | 9 +++++ hyrax/app/models/file_set.rb | 8 ++++- hyrax/app/services/s3_storage_service.rb | 15 ++++++-- .../views/hyrax/base/_download_all.html.erb | 3 ++ hyrax/config/locales/hyrax.en.yml | 3 ++ .../tasks/set_format_and_size_to_works.rake | 35 +++++++++++++++++++ 18 files changed, 132 insertions(+), 25 deletions(-) create mode 100644 hyrax/lib/tasks/set_format_and_size_to_works.rake diff --git a/hyrax/app/actors/hyrax/actors/file_actor.rb b/hyrax/app/actors/hyrax/actors/file_actor.rb index 889885e4..4df513a5 100644 --- a/hyrax/app/actors/hyrax/actors/file_actor.rb +++ b/hyrax/app/actors/hyrax/actors/file_actor.rb @@ -87,7 +87,7 @@ module Hyrax parent_object = file_set.parent_object siblings = parent_object.file_sets - duplicate_record = siblings.select {|fs| fs.title[0].downcase == file_name && fs.id != file_set.id } + duplicate_record = siblings.select {|fs| fs.title[0]&.downcase == file_name && fs.id != file_set.id } new_title = nil if duplicate_record.present? @@ -165,7 +165,8 @@ module Hyrax s3.init_client source_object_key = uploaded_file.file.path - target_bucket_name = s3.sanitise_name(file_set.parent_works.first.id) + parent_work = file_set.parent_works.first + target_bucket_name = s3.sanitise_name(parent_work.id) target_bucket = Aws::S3::Bucket.new(target_bucket_name) diff --git a/hyrax/app/assets/stylesheets/rdms.scss b/hyrax/app/assets/stylesheets/rdms.scss index e0112466..d2d648a7 100644 --- a/hyrax/app/assets/stylesheets/rdms.scss +++ b/hyrax/app/assets/stylesheets/rdms.scss @@ -155,12 +155,17 @@ form .field-wrapper label[required="required"]::after { display: flex; .download-all { + display: flex; padding: 16px; margin: 0px; #download-all { margin: 7px 7px 7px 0px; } + .download-info { + font-size: 25px; + margin: 9px 0px; + } } .citations .citation-modal-btn { diff --git a/hyrax/app/controllers/download_all_controller.rb b/hyrax/app/controllers/download_all_controller.rb index 411915f1..c6e4a5b1 100644 --- a/hyrax/app/controllers/download_all_controller.rb +++ b/hyrax/app/controllers/download_all_controller.rb @@ -24,6 +24,8 @@ class DownloadAllController < Hyrax::DownloadsController def build_download_file return redirect_to format: :zip if has_zip_file? return redirect_to format: :sh if has_shell_file? + # Created id.txt file that store timestamp when we start download + create_or_modify_ts_file #ToDo: Can we check for this faster than getting list from S3? list_of_objects, _total_size, format = objects_size_and_file_format if format == 'zip' diff --git a/hyrax/app/forms/hyrax/crc_dataset_form.rb b/hyrax/app/forms/hyrax/crc_dataset_form.rb index 49bdd30d..e790fe54 100644 --- a/hyrax/app/forms/hyrax/crc_dataset_form.rb +++ b/hyrax/app/forms/hyrax/crc_dataset_form.rb @@ -30,8 +30,6 @@ module Hyrax :keyword, :complex_relation, :license, - :format, - :files_size ] self.required_fields = [ diff --git a/hyrax/app/helpers/download_helper.rb b/hyrax/app/helpers/download_helper.rb index edce3c2c..ffd48ae3 100644 --- a/hyrax/app/helpers/download_helper.rb +++ b/hyrax/app/helpers/download_helper.rb @@ -24,7 +24,7 @@ module DownloadHelper def has_zip_file? if work.date_modified.present? - File.exists?(zip_file_path) and local_file_last_modified > work.date_modified + File.exists?(zip_file_path) and File.exists?(ts_file_path) and ts_file_last_modified > work.date_modified else File.exists?(zip_file_path) end @@ -42,7 +42,7 @@ module DownloadHelper def shell_file_expired?(file_path) if work.date_modified.present? File.ctime(file_path).to_datetime <= (DateTime.now - 1.day) or - local_file_last_modified <= work.date_modified + (File.exists?(ts_file_path) and ts_file_last_modified <= work.date_modified) else File.ctime(file_path).to_datetime <= (DateTime.now - 1.day) end @@ -60,6 +60,21 @@ module DownloadHelper File.join(ENV.fetch('DOWNLOAD_PATH', "/shared/downloads"), "#{work.id}.sh") end + def ts_file_path + File.join(ENV.fetch('DOWNLOAD_PATH', "/shared/downloads"), "#{work.id}.txt") + end + + def create_or_modify_ts_file + File.open(ts_file_path, "w") do |file| + file.puts(DateTime.now) + end + end + + def ts_file_last_modified + timestamp = File.read(ts_file_path).strip + DateTime.parse(timestamp) + end + def cleanup_path(file_path) FileUtils.rm_rf(file_path) end diff --git a/hyrax/app/helpers/hyrax_helper.rb b/hyrax/app/helpers/hyrax_helper.rb index 8b58d5a9..125a8854 100644 --- a/hyrax/app/helpers/hyrax_helper.rb +++ b/hyrax/app/helpers/hyrax_helper.rb @@ -123,7 +123,7 @@ module HyraxHelper filtered_data = parsed_data.slice('id', 'depositor', 'date_created', 'date_modified', 'title', 'description', 'resource_type', 'keyword', 'license', 'alternative_title', 'subject', 'language', 'based_near', 'software_version', - 'publisher', 'doi', + 'publisher', 'doi', 'files_size', 'format', 'experiment_description', 'crc_resource_type', 'coverage', 'approval_number', 'extra_information') filtered_data['Creators and Contributors'] = parsed_data['complex_person'].map { |person| person.slice('first_name', 'last_name', 'name', 'email', 'role', 'orcid', 'affiliation')} if parsed_data['complex_person'].present? diff --git a/hyrax/app/models/complex_modality.rb b/hyrax/app/models/complex_modality.rb index 4039099a..3ce006a7 100644 --- a/hyrax/app/models/complex_modality.rb +++ b/hyrax/app/models/complex_modality.rb @@ -18,9 +18,10 @@ class ComplexModality < ActiveRecord::Base validates :s3_folder_name, uniqueness: { scope: :parent_source_identifier, message: ->(object, data) { I18n.t('rdms.errors.complex_modality.title.uniqueness', value: object.s3_folder_name) } }, unless: -> { self.is_imported } before_validation :set_s3_folder_name - after_save :save_metadata_as_json_in_s3, :update_crc_dataset_date_modified, :relocate_files_on_folder_name_change, :save_meta_json_in_s3 + after_save :save_metadata_as_json_in_s3, :update_crc_dataset_date_modified_and_files_data, :relocate_files_on_folder_name_change, :save_meta_json_in_s3 after_commit :fix_duplication_of_title, on: :create after_commit :clear_s3_and_all_associated_objects, on: [:destroy] + delegate :crc_dataset, to: :complex_session delegate :complex_subject, to: :complex_session @@ -57,10 +58,6 @@ class ComplexModality < ActiveRecord::Base s3.add_content(bucket_name, "#{complex_subject.s3_folder_name}/#{complex_session.s3_folder_name}/#{s3_folder_name}/metadata.json", filter_metadata_json_file(self)) end - def update_crc_dataset_date_modified - crc_dataset.update(date_modified: DateTime.now) - end - def fix_duplication_of_title return true unless self.is_imported diff --git a/hyrax/app/models/complex_session.rb b/hyrax/app/models/complex_session.rb index 32bfed9c..20158575 100644 --- a/hyrax/app/models/complex_session.rb +++ b/hyrax/app/models/complex_session.rb @@ -20,9 +20,10 @@ class ComplexSession < ActiveRecord::Base validates :s3_folder_name, uniqueness: { scope: :parent_source_identifier, message: ->(object, data) { I18n.t('rdms.errors.complex_session.title.uniqueness', value: object.s3_folder_name) } }, unless: -> { self.is_imported } before_validation :set_s3_folder_name - after_save :save_metadata_as_json_in_s3, :update_crc_dataset_date_modified, :relocate_files_on_folder_name_change, :save_meta_json_in_s3 + after_save :save_metadata_as_json_in_s3, :update_crc_dataset_date_modified_and_files_data, :relocate_files_on_folder_name_change, :save_meta_json_in_s3 after_commit :fix_duplication_of_title, on: :create after_commit :clear_s3_and_all_associated_objects, on: [:destroy] + delegate :crc_dataset, to: :complex_subject @@ -58,9 +59,6 @@ class ComplexSession < ActiveRecord::Base s3.add_content(bucket_name, "#{complex_subject.s3_folder_name}/#{s3_folder_name}/metadata.json", filter_metadata_json_file(self)) end - def update_crc_dataset_date_modified - crc_dataset.update(date_modified: DateTime.now) - end def fix_duplication_of_title return true unless self.is_imported diff --git a/hyrax/app/models/complex_subject.rb b/hyrax/app/models/complex_subject.rb index 01ffbc5e..6f78ca2f 100644 --- a/hyrax/app/models/complex_subject.rb +++ b/hyrax/app/models/complex_subject.rb @@ -19,10 +19,11 @@ class ComplexSubject < ActiveRecord::Base validates :s3_folder_name, uniqueness: { scope: :parent_source_identifier, message: ->(object, data) { I18n.t('rdms.errors.complex_subject.title.uniqueness', value: object.s3_folder_name) } }, unless: -> { self.is_imported } before_validation :set_s3_folder_name - after_save :save_metadata_as_json_in_s3, :update_crc_dataset_date_modified, :relocate_files_on_folder_name_change, :save_meta_json_in_s3 + after_save :save_metadata_as_json_in_s3, :update_crc_dataset_date_modified_and_files_data, :relocate_files_on_folder_name_change, :save_meta_json_in_s3 after_commit :fix_duplication_of_title, on: :create after_commit :clear_s3_and_all_associated_objects, on: [:destroy] + scope :sort_by_title, ->(source_identifier) { where(parent_source_identifier: source_identifier) .sort_by { |item| item.subject_title.downcase.gsub(/[^a-zA-Z0-9]/, '0').split(/(\d+)/).map { |s| s.match(/\d+/) ? s.to_i : s } } @@ -59,10 +60,6 @@ class ComplexSubject < ActiveRecord::Base s3.add_content(bucket_name, "#{s3_folder_name}/metadata.json", filter_metadata_json_file(self)) end - def update_crc_dataset_date_modified - crc_dataset.update(date_modified: DateTime.now) - end - def fix_duplication_of_title return true unless self.is_imported diff --git a/hyrax/app/models/concerns/external_services.rb b/hyrax/app/models/concerns/external_services.rb index 00072817..c2360137 100644 --- a/hyrax/app/models/concerns/external_services.rb +++ b/hyrax/app/models/concerns/external_services.rb @@ -50,5 +50,20 @@ module ExternalServices s3.add_content(bucket_name, "metadata.json", filter_metadata_json_file(self_object)) end + def set_format_and_size + if self.persisted? + s3 = S3StorageService.new + s3.init_client + bucket_name = s3.sanitise_name(self.id) + + if s3.bucket_exists?(bucket_name) + list_of_objects, total_size, list_of_format = s3.list_all_objects(bucket_name, nil, true) + return true if (self.format == list_of_format && self.files_size == [total_size]) + self.files_size = [total_size] + self.format = list_of_format + self.date_modified = DateTime.now + end + end + end end end \ No newline at end of file diff --git a/hyrax/app/models/concerns/s3_file_handleable.rb b/hyrax/app/models/concerns/s3_file_handleable.rb index b8774233..0cc54d75 100644 --- a/hyrax/app/models/concerns/s3_file_handleable.rb +++ b/hyrax/app/models/concerns/s3_file_handleable.rb @@ -51,6 +51,17 @@ module S3FileHandleable private + def update_crc_dataset_date_modified_and_files_data + return true unless crc_dataset.present? + + s3service = S3StorageService.new + s3service.init_client + bucket_name = s3service.sanitise_name(crc_dataset.id) + list_of_objects, total_size, list_of_format = s3service.list_all_objects(bucket_name, nil, true) + crc_dataset.update(date_modified: DateTime.now, files_size: [total_size], format:list_of_format) + end + + def prepare_json(crc_dataset, subject, session, modality) @meta = {} data_from_crc_dataset(crc_dataset) diff --git a/hyrax/app/models/crc_dataset.rb b/hyrax/app/models/crc_dataset.rb index 9926c110..5e9c5f4f 100755 --- a/hyrax/app/models/crc_dataset.rb +++ b/hyrax/app/models/crc_dataset.rb @@ -12,10 +12,13 @@ class CrcDataset < ActiveFedora::Base # self.valid_child_concerns = [] validates :title, presence: { message: 'Your CRC dataset must have a title.' } validate :validate_parent_collection + + before_save :set_format_and_size after_save :save_metadata_as_json_in_s3, :save_meta_json_in_s3 after_create :set_default_source_and_tombstone_status, :set_default_values, :register_ark after_destroy :clear_s3_and_all_associated_objects + CRC_FILTER_FACET_FIELDS = [ "year_recorded_sim", "complex_subject_species_sim", diff --git a/hyrax/app/models/dataset.rb b/hyrax/app/models/dataset.rb index db456a6c..793d4f40 100755 --- a/hyrax/app/models/dataset.rb +++ b/hyrax/app/models/dataset.rb @@ -10,6 +10,7 @@ class Dataset < ActiveFedora::Base self.valid_child_concerns = [Dataset] validates :title, presence: { message: 'Your dataset must have a title.' } + before_save :set_format_and_size after_save :save_metadata_as_json_in_s3 after_create :set_default_tombstone_status after_create :register_ark @@ -95,7 +96,15 @@ class Dataset < ActiveFedora::Base property :complex_identifier, predicate: ::RDF::Vocab::Rdms.identifier, class_name:"ComplexIdentifier" # size and format to be obtained from the files attached + property :format, predicate: ::RDF::Vocab::Rdms.format do |index| + index.as :symbol + end + property :files_size, predicate: ::RDF::Vocab::Rdms.size do |index| + index.type :integer + index.as :stored_searchable + end + # description and abstract is in the basic metadata. Ignoring other descriptions for now. # Geolocation - would this be used? Ignoring for now. diff --git a/hyrax/app/models/file_set.rb b/hyrax/app/models/file_set.rb index 1ca7d46c..52a4c7e4 100644 --- a/hyrax/app/models/file_set.rb +++ b/hyrax/app/models/file_set.rb @@ -3,11 +3,12 @@ require "./lib/vocabularies/rdms" # Generated by hyrax:models:install class FileSet < ActiveFedora::Base self.indexer = CrcFileSetIndexer + include S3FileHandleable after_save :create_file_set_with_fast_load_meta_data after_destroy :destroy_set_meta_data - property :for_complex_identifier, predicate: ::RDF::Vocab::Rdms.forComplexIdentifier, multiple: false do |index| + property :for_complex_identifier, predicate: ::RDF::Vocab::Rdms.forComplexIdentifier, multiple: false do |index| index.as :stored_searchable, :facetable end @@ -76,6 +77,11 @@ class FileSet < ActiveFedora::Base metadata.work_model_name = work_type metadata.file_set_title = self.title&.first || self.label metadata.save + + # Here we are saving parent + # We have callback before save that will set format and size here: ExternalServices#set_format_and_size + # also update date_modified so download will not miss any files + parent.save if parent.present? end def destroy_set_meta_data diff --git a/hyrax/app/services/s3_storage_service.rb b/hyrax/app/services/s3_storage_service.rb index 5b189505..30adbdfa 100644 --- a/hyrax/app/services/s3_storage_service.rb +++ b/hyrax/app/services/s3_storage_service.rb @@ -69,8 +69,9 @@ class S3StorageService return contents, total_size end - def list_all_objects(bucket_name, prefix = nil) + def list_all_objects(bucket_name, prefix = nil, format = false) list_of_objects = [] + list_of_format = [] total_size = 0 resp = @s3_client.list_objects_v2( bucket: bucket_name, @@ -85,13 +86,21 @@ class S3StorageService size: object.size, last_modified_date: object.last_modified } - total_size += object.size + + unless object[:key].include?("system_metadata.json") + total_size += object.size + list_of_format << File.extname(object.key) + end end break unless resp.next_page? resp = resp.next_page end - return list_of_objects, total_size + if format + return list_of_objects, total_size, list_of_format.uniq + else + return list_of_objects, total_size + end end def get_content(bucket_name, object_key, local_file_path) diff --git a/hyrax/app/views/hyrax/base/_download_all.html.erb b/hyrax/app/views/hyrax/base/_download_all.html.erb index 8c2ed2bf..eb4d312e 100644 --- a/hyrax/app/views/hyrax/base/_download_all.html.erb +++ b/hyrax/app/views/hyrax/base/_download_all.html.erb @@ -1,3 +1,6 @@ <div class="download-all"> + <% total_size = (@crc_dataset || @dataset).files_size.first || 0 %> + <% message = total_size < ENV.fetch('DOWNLOAD_FILES_SIZE_LIMIT', '100000000').to_i ? '.zip_file_message' : '.shell_file_message' %> <%= link_to t(:'hyrax.download_all'), main_app.download_all_path(presenter.id), disabled: download_all_button_available?(presenter.id) ? false : true, id: "download-all", class: "btn btn-default matomo_download", data:{ document_id: presenter.id } %> + <span class="fa fa-info-circle download-info" data-toggle="tooltip" title="<%= t("#{message}", size: total_size.to_s(:human_size))%>" aria-hidden="true"></span> </div> \ No newline at end of file diff --git a/hyrax/config/locales/hyrax.en.yml b/hyrax/config/locales/hyrax.en.yml index c0c2836b..f161600a 100644 --- a/hyrax/config/locales/hyrax.en.yml +++ b/hyrax/config/locales/hyrax.en.yml @@ -319,6 +319,9 @@ en: confirm_delete: "Confirm Delete" restore: "Restore" edit: "Edit work (dataset)" + download_all: + zip_file_message: "Download file size: %{size}" + shell_file_message: "You will be downloading a shell script, which you can use to download the files. The size of the complete download will be %{size}. We will send you a notification when the shell script is ready for download." batch: edit: apply_changes_to: "Changes will be applied to: (%{x_number_of} works (datasets))" diff --git a/hyrax/lib/tasks/set_format_and_size_to_works.rake b/hyrax/lib/tasks/set_format_and_size_to_works.rake new file mode 100644 index 00000000..d7eab24d --- /dev/null +++ b/hyrax/lib/tasks/set_format_and_size_to_works.rake @@ -0,0 +1,35 @@ +# frozen_string_literal: true +namespace :rdms do + desc "Set format and size to works. usage: rdms:set_format_and_size_to_works" + task set_format_and_size_to_works: :environment do + logger = Logger.new('set_format_and_size.log') + # Here we are jus saving all dataset and crc_dataset + # We have callback before save that will set format and size here: ExternalServices#set_format_and_size + dataset_errors = {} + dataset_count = 0 + Dataset.all.each do |dataset| + begin + dataset.save + dataset_count += 1 + rescue => err + dataset_errors[dataset.id] = err.message + logger.error("Error saving #{dataset.id}: #{err.message}") + end + end + logger.info("Number of datasets saved successfully: #{dataset_count}") + logger.error("Number of datasets with error saving: #{dataset_errors.size}") + crc_dataset_errors = {} + crc_dataset_count = 0 + CrcDataset.all.each do |crc_dataset| + begin + crc_dataset.save + crc_dataset_count += 1 + rescue => err + crc_dataset_errors[crc_dataset.id] = err.message + logger.error("Error saving #{crc_dataset.id}: #{err.message}") + end + end + logger.info("Number of CRC datasets saved successfully: #{crc_dataset_count}") + logger.error("Number of CRC datasets with error saving: #{crc_dataset_errors.size}") + end +end \ No newline at end of file -- GitLab