From d0f4e7bd1be8830914aa3b78ab7d17dfb4e30e97 Mon Sep 17 00:00:00 2001
From: Anusha Ranganathan <anusha@cottagelabs.com>
Date: Thu, 16 Jan 2025 18:28:46 +0530
Subject: [PATCH 1/3] Replaced Dir.glob with Find to also get list of hidden
 files

---
 .../app/services/verify_crc_dataset_import.rb | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/hyrax/app/services/verify_crc_dataset_import.rb b/hyrax/app/services/verify_crc_dataset_import.rb
index 539141f6..b74972bf 100644
--- a/hyrax/app/services/verify_crc_dataset_import.rb
+++ b/hyrax/app/services/verify_crc_dataset_import.rb
@@ -66,11 +66,11 @@ class VerifyCRCDatasetImport
       msg = "Error: Import entry #{entry_id} not found"
       return false, msg, paths_compared, nil
     end
-    paths_compared[source_identifier: entry.identifier]
+    paths_compared[:source_identifier] = entry.identifier
 
     # Check import id
     if import_id != entry.importerexporter_id
-      msg = "Error: Importer id #{import_id} does not match id associated with entry #{entry_id}"
+      msg = "Error: Importer id #{import_id} does not match id associated with entry #{entry.importerexporter_id}"
       return false, msg, paths_compared, nil
     end
 
@@ -124,11 +124,16 @@ class VerifyCRCDatasetImport
 
     # Get files to compare
 
-    # -- Get list of files from import directory
-    input_list = Dir.glob(File.join(import_path, '**', '*')).
-      reject {|fn| File.directory?(fn) }.
-      reject{|fn| self.restricted_file_names?(fn.split('/')[-1])}.
-      map{ |fn| fn.sub(import_path, '')}.sort
+    # -- Get list of files from import directory including hidden files
+    input_list = []
+    Find.find(import_path) do |fp|
+      unless FileTest.directory?(fp)
+        unless self.restricted_file_names?(fp.split('/')[-1])
+          input_list << fp.sub(import_path, '')
+        end
+      end
+    end
+    input_list.sort
     # ---- downcase folder name
     sanitised_input_list = {}
     input_list.each do |fp|
-- 
GitLab


From 93a485859d360f9f470eb5370f5388e5e6e73794 Mon Sep 17 00:00:00 2001
From: Anusha Ranganathan <anusha@cottagelabs.com>
Date: Thu, 16 Jan 2025 18:30:06 +0530
Subject: [PATCH 2/3] Add desc to task and convert args to integer

---
 hyrax/lib/tasks/verify_crc_1280_import.rake | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/hyrax/lib/tasks/verify_crc_1280_import.rake b/hyrax/lib/tasks/verify_crc_1280_import.rake
index 3297ccb5..fe04e530 100644
--- a/hyrax/lib/tasks/verify_crc_1280_import.rake
+++ b/hyrax/lib/tasks/verify_crc_1280_import.rake
@@ -2,17 +2,18 @@ require 'json'
 
 namespace :rdms do
   namespace :crc_1280_import do
-    desc 'Verify CRC1280 import or verify CRC1280 import of an experiment. This will compare the files in the file path with the objects in the S3 bucket"]'
-    task :"verify_import", [:import_id] => :environment do |task, args|
-      puts "Verifying import #{args.import_id}"
-      import_status, report_paths = VerifyCRCDatasetImport.verify_import(args.import_id)
+    desc 'Verify all experiments in CRC1280 import. This will compare the files in the file path with the objects in the S3 bucket'
+    task :verify_import, [:import_id] => :environment do |task, args|
+      puts "Verifying import #{args.import_id.to_i}"
+      import_status, report_paths = VerifyCRCDatasetImport.verify_import(args.import_id.to_i)
       puts "import status : #{import_status}"
       puts "Detailed reports are available at:"
       puts JSON.pretty_generate(report_paths)
     end
-    task :"verify_experiment", [:import_id, :entry_id] => :environment do |task, args|
-      puts "Verifying import #{args.import_id}"
-      experiment_status, report_path = VerifyCRCDatasetImport.verify_experiment_and_report(args.import_id, args.entry_id)
+    desc 'Verify experiment in CRC1280 import. This will compare the files in the file path with the objects in the S3 bucket'
+    task :verify_experiment, [:import_id, :entry_id] => :environment do |task, args|
+      puts "Verifying experiment #{args.entry_id.to_i} in import #{args.import_id.to_i}"
+      experiment_status, report_path = VerifyCRCDatasetImport.verify_experiment_and_report(args.import_id.to_i, args.entry_id.to_i)
       puts "Experiment import status : #{experiment_status}"
       puts "Detailed reports is available at: #{report_path}"
     end
-- 
GitLab


From 39f662e3b00abcd00e7a33d278aca7a50d0fc8fa Mon Sep 17 00:00:00 2001
From: Anusha Ranganathan <anusha@cottagelabs.com>
Date: Thu, 16 Jan 2025 21:19:16 +0530
Subject: [PATCH 3/3] require find

---
 hyrax/app/services/verify_crc_dataset_import.rb | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hyrax/app/services/verify_crc_dataset_import.rb b/hyrax/app/services/verify_crc_dataset_import.rb
index b74972bf..aea5668a 100644
--- a/hyrax/app/services/verify_crc_dataset_import.rb
+++ b/hyrax/app/services/verify_crc_dataset_import.rb
@@ -1,5 +1,6 @@
 require 'json'
 require 'csv'
+require 'find'
 
 class VerifyCRCDatasetImport
   extend ComplexHelper
@@ -126,7 +127,7 @@ class VerifyCRCDatasetImport
 
     # -- Get list of files from import directory including hidden files
     input_list = []
-    Find.find(import_path) do |fp|
+    ::Find.find(import_path) do |fp|
       unless FileTest.directory?(fp)
         unless self.restricted_file_names?(fp.split('/')[-1])
           input_list << fp.sub(import_path, '')
-- 
GitLab