From 9d07352c9428dbbac6dde741617b58a30c2c99b3 Mon Sep 17 00:00:00 2001
From: Gyan Gupta <gyan@cottagelabs.com>
Date: Thu, 5 Dec 2024 19:05:11 +0100
Subject: [PATCH] Add size and formats for download

---
 hyrax/app/actors/hyrax/actors/file_actor.rb   |  5 +--
 hyrax/app/assets/stylesheets/rdms.scss        |  5 +++
 .../controllers/download_all_controller.rb    |  2 ++
 hyrax/app/forms/hyrax/crc_dataset_form.rb     |  2 --
 hyrax/app/helpers/download_helper.rb          | 19 ++++++++--
 hyrax/app/helpers/hyrax_helper.rb             |  2 +-
 hyrax/app/models/complex_modality.rb          |  7 ++--
 hyrax/app/models/complex_session.rb           |  6 ++--
 hyrax/app/models/complex_subject.rb           |  7 ++--
 .../app/models/concerns/external_services.rb  | 15 ++++++++
 .../app/models/concerns/s3_file_handleable.rb | 11 ++++++
 hyrax/app/models/crc_dataset.rb               |  3 ++
 hyrax/app/models/dataset.rb                   |  9 +++++
 hyrax/app/models/file_set.rb                  |  8 ++++-
 hyrax/app/services/s3_storage_service.rb      | 15 ++++++--
 .../views/hyrax/base/_download_all.html.erb   |  3 ++
 hyrax/config/locales/hyrax.en.yml             |  3 ++
 .../tasks/set_format_and_size_to_works.rake   | 35 +++++++++++++++++++
 18 files changed, 132 insertions(+), 25 deletions(-)
 create mode 100644 hyrax/lib/tasks/set_format_and_size_to_works.rake

diff --git a/hyrax/app/actors/hyrax/actors/file_actor.rb b/hyrax/app/actors/hyrax/actors/file_actor.rb
index 889885e4..4df513a5 100644
--- a/hyrax/app/actors/hyrax/actors/file_actor.rb
+++ b/hyrax/app/actors/hyrax/actors/file_actor.rb
@@ -87,7 +87,7 @@ module Hyrax
         parent_object = file_set.parent_object
         siblings = parent_object.file_sets
 
-        duplicate_record = siblings.select {|fs| fs.title[0].downcase == file_name && fs.id != file_set.id }
+        duplicate_record = siblings.select {|fs| fs.title[0]&.downcase == file_name && fs.id != file_set.id }
         new_title = nil
 
         if duplicate_record.present?
@@ -165,7 +165,8 @@ module Hyrax
         s3.init_client
 
         source_object_key = uploaded_file.file.path
-        target_bucket_name = s3.sanitise_name(file_set.parent_works.first.id)
+        parent_work = file_set.parent_works.first
+        target_bucket_name = s3.sanitise_name(parent_work.id)
 
         target_bucket = Aws::S3::Bucket.new(target_bucket_name)
 
diff --git a/hyrax/app/assets/stylesheets/rdms.scss b/hyrax/app/assets/stylesheets/rdms.scss
index e0112466..d2d648a7 100644
--- a/hyrax/app/assets/stylesheets/rdms.scss
+++ b/hyrax/app/assets/stylesheets/rdms.scss
@@ -155,12 +155,17 @@ form .field-wrapper label[required="required"]::after {
     display: flex;
 
     .download-all {
+      display: flex;
       padding: 16px;
       margin: 0px;
 
       #download-all {
         margin: 7px 7px 7px 0px;
       }
+      .download-info {
+        font-size: 25px;
+        margin: 9px 0px;
+      }
     }
 
     .citations .citation-modal-btn {
diff --git a/hyrax/app/controllers/download_all_controller.rb b/hyrax/app/controllers/download_all_controller.rb
index 411915f1..c6e4a5b1 100644
--- a/hyrax/app/controllers/download_all_controller.rb
+++ b/hyrax/app/controllers/download_all_controller.rb
@@ -24,6 +24,8 @@ class DownloadAllController < Hyrax::DownloadsController
   def build_download_file
     return redirect_to format: :zip if has_zip_file?
     return redirect_to format: :sh if has_shell_file?
+    # Created id.txt file that store timestamp when we start download
+    create_or_modify_ts_file
     #ToDo: Can we check for this faster than getting list from S3?
     list_of_objects, _total_size, format = objects_size_and_file_format
     if format == 'zip'
diff --git a/hyrax/app/forms/hyrax/crc_dataset_form.rb b/hyrax/app/forms/hyrax/crc_dataset_form.rb
index 49bdd30d..e790fe54 100644
--- a/hyrax/app/forms/hyrax/crc_dataset_form.rb
+++ b/hyrax/app/forms/hyrax/crc_dataset_form.rb
@@ -30,8 +30,6 @@ module Hyrax
       :keyword,
       :complex_relation,
       :license,
-      :format,
-      :files_size
     ]
 
     self.required_fields = [
diff --git a/hyrax/app/helpers/download_helper.rb b/hyrax/app/helpers/download_helper.rb
index edce3c2c..ffd48ae3 100644
--- a/hyrax/app/helpers/download_helper.rb
+++ b/hyrax/app/helpers/download_helper.rb
@@ -24,7 +24,7 @@ module DownloadHelper
 
   def has_zip_file?
     if work.date_modified.present?
-      File.exists?(zip_file_path) and local_file_last_modified > work.date_modified
+      File.exists?(zip_file_path) and File.exists?(ts_file_path) and ts_file_last_modified > work.date_modified
     else
       File.exists?(zip_file_path)
     end
@@ -42,7 +42,7 @@ module DownloadHelper
   def shell_file_expired?(file_path)
     if work.date_modified.present?
       File.ctime(file_path).to_datetime <= (DateTime.now - 1.day) or
-        local_file_last_modified <= work.date_modified
+      (File.exists?(ts_file_path) and ts_file_last_modified <= work.date_modified)
     else
       File.ctime(file_path).to_datetime <= (DateTime.now - 1.day)
     end
@@ -60,6 +60,21 @@ module DownloadHelper
     File.join(ENV.fetch('DOWNLOAD_PATH', "/shared/downloads"), "#{work.id}.sh")
   end
 
+  def ts_file_path
+    File.join(ENV.fetch('DOWNLOAD_PATH', "/shared/downloads"), "#{work.id}.txt")
+  end
+
+  def create_or_modify_ts_file
+    File.open(ts_file_path, "w") do |file|
+      file.puts(DateTime.now)
+    end
+  end
+
+  def ts_file_last_modified
+    timestamp = File.read(ts_file_path).strip
+    DateTime.parse(timestamp)
+  end
+
   def cleanup_path(file_path)
     FileUtils.rm_rf(file_path)
   end
diff --git a/hyrax/app/helpers/hyrax_helper.rb b/hyrax/app/helpers/hyrax_helper.rb
index 8b58d5a9..125a8854 100644
--- a/hyrax/app/helpers/hyrax_helper.rb
+++ b/hyrax/app/helpers/hyrax_helper.rb
@@ -123,7 +123,7 @@ module HyraxHelper
       filtered_data = parsed_data.slice('id', 'depositor', 'date_created', 'date_modified',
                                         'title', 'description', 'resource_type', 'keyword', 'license',
                                         'alternative_title', 'subject', 'language', 'based_near', 'software_version',
-                                        'publisher', 'doi',
+                                        'publisher', 'doi', 'files_size', 'format',
                                         'experiment_description', 'crc_resource_type', 'coverage', 'approval_number',
                                         'extra_information')
       filtered_data['Creators and Contributors'] = parsed_data['complex_person'].map { |person| person.slice('first_name', 'last_name', 'name', 'email', 'role', 'orcid', 'affiliation')} if parsed_data['complex_person'].present?
diff --git a/hyrax/app/models/complex_modality.rb b/hyrax/app/models/complex_modality.rb
index 4039099a..3ce006a7 100644
--- a/hyrax/app/models/complex_modality.rb
+++ b/hyrax/app/models/complex_modality.rb
@@ -18,9 +18,10 @@ class ComplexModality < ActiveRecord::Base
   validates :s3_folder_name, uniqueness: { scope: :parent_source_identifier, message: ->(object, data) { I18n.t('rdms.errors.complex_modality.title.uniqueness', value: object.s3_folder_name) } }, unless: -> { self.is_imported }
 
   before_validation :set_s3_folder_name
-  after_save :save_metadata_as_json_in_s3, :update_crc_dataset_date_modified, :relocate_files_on_folder_name_change, :save_meta_json_in_s3
+  after_save :save_metadata_as_json_in_s3, :update_crc_dataset_date_modified_and_files_data, :relocate_files_on_folder_name_change, :save_meta_json_in_s3
   after_commit :fix_duplication_of_title, on: :create
   after_commit :clear_s3_and_all_associated_objects, on: [:destroy]
+  
 
   delegate :crc_dataset, to: :complex_session
   delegate :complex_subject, to: :complex_session
@@ -57,10 +58,6 @@ class ComplexModality < ActiveRecord::Base
     s3.add_content(bucket_name, "#{complex_subject.s3_folder_name}/#{complex_session.s3_folder_name}/#{s3_folder_name}/metadata.json", filter_metadata_json_file(self))
   end
 
-  def update_crc_dataset_date_modified
-    crc_dataset.update(date_modified: DateTime.now)
-  end
-
   def fix_duplication_of_title
     return true  unless self.is_imported
 
diff --git a/hyrax/app/models/complex_session.rb b/hyrax/app/models/complex_session.rb
index 32bfed9c..20158575 100644
--- a/hyrax/app/models/complex_session.rb
+++ b/hyrax/app/models/complex_session.rb
@@ -20,9 +20,10 @@ class ComplexSession < ActiveRecord::Base
   validates :s3_folder_name, uniqueness: { scope: :parent_source_identifier,  message: ->(object, data) { I18n.t('rdms.errors.complex_session.title.uniqueness', value: object.s3_folder_name) } }, unless: -> { self.is_imported }
 
   before_validation :set_s3_folder_name
-  after_save :save_metadata_as_json_in_s3, :update_crc_dataset_date_modified, :relocate_files_on_folder_name_change, :save_meta_json_in_s3
+  after_save :save_metadata_as_json_in_s3, :update_crc_dataset_date_modified_and_files_data, :relocate_files_on_folder_name_change, :save_meta_json_in_s3
   after_commit :fix_duplication_of_title, on: :create
   after_commit :clear_s3_and_all_associated_objects, on: [:destroy]
+ 
 
   delegate :crc_dataset, to: :complex_subject
 
@@ -58,9 +59,6 @@ class ComplexSession < ActiveRecord::Base
     s3.add_content(bucket_name, "#{complex_subject.s3_folder_name}/#{s3_folder_name}/metadata.json", filter_metadata_json_file(self))
   end
 
-  def update_crc_dataset_date_modified
-    crc_dataset.update(date_modified: DateTime.now)
-  end
 
   def fix_duplication_of_title
     return true unless self.is_imported
diff --git a/hyrax/app/models/complex_subject.rb b/hyrax/app/models/complex_subject.rb
index 01ffbc5e..6f78ca2f 100644
--- a/hyrax/app/models/complex_subject.rb
+++ b/hyrax/app/models/complex_subject.rb
@@ -19,10 +19,11 @@ class ComplexSubject < ActiveRecord::Base
   validates :s3_folder_name, uniqueness: { scope: :parent_source_identifier,  message: ->(object, data) { I18n.t('rdms.errors.complex_subject.title.uniqueness', value: object.s3_folder_name) } }, unless: -> { self.is_imported }
 
   before_validation :set_s3_folder_name
-  after_save :save_metadata_as_json_in_s3, :update_crc_dataset_date_modified, :relocate_files_on_folder_name_change, :save_meta_json_in_s3
+  after_save :save_metadata_as_json_in_s3, :update_crc_dataset_date_modified_and_files_data, :relocate_files_on_folder_name_change, :save_meta_json_in_s3
   after_commit :fix_duplication_of_title, on: :create
   after_commit :clear_s3_and_all_associated_objects, on: [:destroy]
 
+
   scope :sort_by_title, ->(source_identifier) {
     where(parent_source_identifier: source_identifier)
     .sort_by { |item| item.subject_title.downcase.gsub(/[^a-zA-Z0-9]/, '0').split(/(\d+)/).map { |s| s.match(/\d+/) ? s.to_i : s } }
@@ -59,10 +60,6 @@ class ComplexSubject < ActiveRecord::Base
     s3.add_content(bucket_name, "#{s3_folder_name}/metadata.json", filter_metadata_json_file(self))
   end
 
-  def update_crc_dataset_date_modified
-    crc_dataset.update(date_modified: DateTime.now)
-  end
-
   def fix_duplication_of_title
     return true unless self.is_imported
 
diff --git a/hyrax/app/models/concerns/external_services.rb b/hyrax/app/models/concerns/external_services.rb
index 00072817..c2360137 100644
--- a/hyrax/app/models/concerns/external_services.rb
+++ b/hyrax/app/models/concerns/external_services.rb
@@ -50,5 +50,20 @@ module ExternalServices
       s3.add_content(bucket_name, "metadata.json", filter_metadata_json_file(self_object))
     end
 
+    def set_format_and_size
+      if self.persisted?
+        s3 = S3StorageService.new
+        s3.init_client
+        bucket_name = s3.sanitise_name(self.id)
+
+        if s3.bucket_exists?(bucket_name)
+          list_of_objects, total_size, list_of_format = s3.list_all_objects(bucket_name, nil, true)
+          return true if (self.format == list_of_format && self.files_size == [total_size])
+          self.files_size = [total_size]
+          self.format = list_of_format
+          self.date_modified = DateTime.now
+        end
+      end
+    end
   end
 end
\ No newline at end of file
diff --git a/hyrax/app/models/concerns/s3_file_handleable.rb b/hyrax/app/models/concerns/s3_file_handleable.rb
index b8774233..0cc54d75 100644
--- a/hyrax/app/models/concerns/s3_file_handleable.rb
+++ b/hyrax/app/models/concerns/s3_file_handleable.rb
@@ -51,6 +51,17 @@ module S3FileHandleable
 
   private
 
+  def update_crc_dataset_date_modified_and_files_data
+    return true unless crc_dataset.present?
+    
+    s3service = S3StorageService.new
+    s3service.init_client
+    bucket_name = s3service.sanitise_name(crc_dataset.id)
+    list_of_objects, total_size, list_of_format = s3service.list_all_objects(bucket_name, nil, true)
+    crc_dataset.update(date_modified: DateTime.now, files_size: [total_size], format:list_of_format)
+  end
+
+
   def prepare_json(crc_dataset, subject, session, modality)
     @meta = {}
     data_from_crc_dataset(crc_dataset)
diff --git a/hyrax/app/models/crc_dataset.rb b/hyrax/app/models/crc_dataset.rb
index 9926c110..5e9c5f4f 100755
--- a/hyrax/app/models/crc_dataset.rb
+++ b/hyrax/app/models/crc_dataset.rb
@@ -12,10 +12,13 @@ class CrcDataset < ActiveFedora::Base
   # self.valid_child_concerns = []
   validates :title, presence: { message: 'Your CRC dataset must have a title.' }
   validate :validate_parent_collection
+
+  before_save :set_format_and_size
   after_save :save_metadata_as_json_in_s3, :save_meta_json_in_s3
   after_create :set_default_source_and_tombstone_status, :set_default_values, :register_ark
   after_destroy :clear_s3_and_all_associated_objects
 
+
   CRC_FILTER_FACET_FIELDS = [
     "year_recorded_sim",
     "complex_subject_species_sim",
diff --git a/hyrax/app/models/dataset.rb b/hyrax/app/models/dataset.rb
index db456a6c..793d4f40 100755
--- a/hyrax/app/models/dataset.rb
+++ b/hyrax/app/models/dataset.rb
@@ -10,6 +10,7 @@ class Dataset < ActiveFedora::Base
   self.valid_child_concerns = [Dataset]
   validates :title, presence: { message: 'Your dataset must have a title.' }
 
+  before_save :set_format_and_size
   after_save :save_metadata_as_json_in_s3
   after_create :set_default_tombstone_status
   after_create :register_ark
@@ -95,7 +96,15 @@ class Dataset < ActiveFedora::Base
   property :complex_identifier, predicate: ::RDF::Vocab::Rdms.identifier, class_name:"ComplexIdentifier"
 
   # size and format to be obtained from the files attached
+  property :format, predicate: ::RDF::Vocab::Rdms.format do |index|
+    index.as :symbol
+  end
 
+  property :files_size, predicate: ::RDF::Vocab::Rdms.size do |index|
+    index.type :integer
+    index.as :stored_searchable
+  end
+  
   # description and abstract is in the basic metadata. Ignoring other descriptions for now.
 
   # Geolocation - would this be used? Ignoring for now.
diff --git a/hyrax/app/models/file_set.rb b/hyrax/app/models/file_set.rb
index 1ca7d46c..52a4c7e4 100644
--- a/hyrax/app/models/file_set.rb
+++ b/hyrax/app/models/file_set.rb
@@ -3,11 +3,12 @@ require "./lib/vocabularies/rdms"
 # Generated by hyrax:models:install
 class FileSet < ActiveFedora::Base
   self.indexer = CrcFileSetIndexer
+  include S3FileHandleable
 
   after_save :create_file_set_with_fast_load_meta_data
   after_destroy :destroy_set_meta_data
 
-  property :for_complex_identifier, predicate: ::RDF::Vocab::Rdms.forComplexIdentifier, multiple: false  do |index|
+    property :for_complex_identifier, predicate: ::RDF::Vocab::Rdms.forComplexIdentifier, multiple: false  do |index|
     index.as :stored_searchable, :facetable
   end
 
@@ -76,6 +77,11 @@ class FileSet < ActiveFedora::Base
     metadata.work_model_name = work_type
     metadata.file_set_title = self.title&.first || self.label
     metadata.save
+
+    # Here we are saving parent
+    # We have callback before save that will set format and size here: ExternalServices#set_format_and_size
+    # also update date_modified so download will not miss any files
+    parent.save if parent.present?
   end
 
   def destroy_set_meta_data
diff --git a/hyrax/app/services/s3_storage_service.rb b/hyrax/app/services/s3_storage_service.rb
index 5b189505..30adbdfa 100644
--- a/hyrax/app/services/s3_storage_service.rb
+++ b/hyrax/app/services/s3_storage_service.rb
@@ -69,8 +69,9 @@ class S3StorageService
     return contents, total_size
   end
 
-  def list_all_objects(bucket_name, prefix = nil)
+  def list_all_objects(bucket_name, prefix = nil, format = false)
     list_of_objects = []
+    list_of_format = []
     total_size = 0
     resp = @s3_client.list_objects_v2(
       bucket: bucket_name,
@@ -85,13 +86,21 @@ class S3StorageService
           size: object.size,
           last_modified_date: object.last_modified
         }
-        total_size += object.size
+
+        unless object[:key].include?("system_metadata.json")
+          total_size += object.size
+          list_of_format << File.extname(object.key)
+        end
       end
       break unless resp.next_page?
 
       resp = resp.next_page
     end
-    return list_of_objects, total_size
+    if format
+      return list_of_objects, total_size, list_of_format.uniq
+    else
+      return list_of_objects, total_size
+    end
   end
 
   def get_content(bucket_name, object_key, local_file_path)
diff --git a/hyrax/app/views/hyrax/base/_download_all.html.erb b/hyrax/app/views/hyrax/base/_download_all.html.erb
index 8c2ed2bf..eb4d312e 100644
--- a/hyrax/app/views/hyrax/base/_download_all.html.erb
+++ b/hyrax/app/views/hyrax/base/_download_all.html.erb
@@ -1,3 +1,6 @@
 <div class="download-all">
+  <% total_size = (@crc_dataset || @dataset).files_size.first || 0 %>
+  <% message = total_size < ENV.fetch('DOWNLOAD_FILES_SIZE_LIMIT', '100000000').to_i ? '.zip_file_message' : '.shell_file_message' %>
   <%= link_to t(:'hyrax.download_all'), main_app.download_all_path(presenter.id), disabled: download_all_button_available?(presenter.id) ? false : true, id: "download-all", class: "btn btn-default matomo_download", data:{ document_id: presenter.id } %>
+  <span class="fa fa-info-circle download-info" data-toggle="tooltip" title="<%= t("#{message}", size: total_size.to_s(:human_size))%>" aria-hidden="true"></span>
 </div>
\ No newline at end of file
diff --git a/hyrax/config/locales/hyrax.en.yml b/hyrax/config/locales/hyrax.en.yml
index c0c2836b..f161600a 100644
--- a/hyrax/config/locales/hyrax.en.yml
+++ b/hyrax/config/locales/hyrax.en.yml
@@ -319,6 +319,9 @@ en:
         confirm_delete: "Confirm Delete"
         restore: "Restore"
         edit: "Edit work (dataset)"
+      download_all:
+        zip_file_message: "Download file size: %{size}"
+        shell_file_message: "You will be downloading a shell script, which you can use to download the files. The size of the complete download will be %{size}. We will send you a notification when the shell script is ready for download."
     batch:
       edit:
         apply_changes_to: "Changes will be applied to: (%{x_number_of} works (datasets))"
diff --git a/hyrax/lib/tasks/set_format_and_size_to_works.rake b/hyrax/lib/tasks/set_format_and_size_to_works.rake
new file mode 100644
index 00000000..d7eab24d
--- /dev/null
+++ b/hyrax/lib/tasks/set_format_and_size_to_works.rake
@@ -0,0 +1,35 @@
+# frozen_string_literal: true
+namespace :rdms do
+  desc "Set format and size to works. usage: rdms:set_format_and_size_to_works"
+  task set_format_and_size_to_works: :environment do
+    logger = Logger.new('set_format_and_size.log')
+    # Here we are jus saving all dataset and crc_dataset
+    # We have callback before save that will set format and size here: ExternalServices#set_format_and_size
+    dataset_errors = {}
+    dataset_count = 0
+    Dataset.all.each do |dataset|
+      begin
+        dataset.save
+        dataset_count += 1
+      rescue => err
+        dataset_errors[dataset.id] = err.message
+        logger.error("Error saving #{dataset.id}: #{err.message}")
+      end
+    end
+    logger.info("Number of datasets saved successfully: #{dataset_count}")
+    logger.error("Number of datasets with error saving: #{dataset_errors.size}")
+    crc_dataset_errors = {}
+    crc_dataset_count = 0
+    CrcDataset.all.each do |crc_dataset|
+      begin
+        crc_dataset.save
+        crc_dataset_count += 1
+      rescue => err
+        crc_dataset_errors[crc_dataset.id] = err.message
+        logger.error("Error saving #{crc_dataset.id}: #{err.message}")
+      end
+    end
+    logger.info("Number of CRC datasets saved successfully: #{crc_dataset_count}")
+    logger.error("Number of CRC datasets with error saving: #{crc_dataset_errors.size}")
+  end
+end
\ No newline at end of file
-- 
GitLab