From d0f4e7bd1be8830914aa3b78ab7d17dfb4e30e97 Mon Sep 17 00:00:00 2001 From: Anusha Ranganathan <anusha@cottagelabs.com> Date: Thu, 16 Jan 2025 18:28:46 +0530 Subject: [PATCH 1/3] Replaced Dir.glob with Find to also get list of hidden files --- .../app/services/verify_crc_dataset_import.rb | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/hyrax/app/services/verify_crc_dataset_import.rb b/hyrax/app/services/verify_crc_dataset_import.rb index 539141f6..b74972bf 100644 --- a/hyrax/app/services/verify_crc_dataset_import.rb +++ b/hyrax/app/services/verify_crc_dataset_import.rb @@ -66,11 +66,11 @@ class VerifyCRCDatasetImport msg = "Error: Import entry #{entry_id} not found" return false, msg, paths_compared, nil end - paths_compared[source_identifier: entry.identifier] + paths_compared[:source_identifier] = entry.identifier # Check import id if import_id != entry.importerexporter_id - msg = "Error: Importer id #{import_id} does not match id associated with entry #{entry_id}" + msg = "Error: Importer id #{import_id} does not match id associated with entry #{entry.importerexporter_id}" return false, msg, paths_compared, nil end @@ -124,11 +124,16 @@ class VerifyCRCDatasetImport # Get files to compare - # -- Get list of files from import directory - input_list = Dir.glob(File.join(import_path, '**', '*')). - reject {|fn| File.directory?(fn) }. - reject{|fn| self.restricted_file_names?(fn.split('/')[-1])}. - map{ |fn| fn.sub(import_path, '')}.sort + # -- Get list of files from import directory including hidden files + input_list = [] + Find.find(import_path) do |fp| + unless FileTest.directory?(fp) + unless self.restricted_file_names?(fp.split('/')[-1]) + input_list << fp.sub(import_path, '') + end + end + end + input_list.sort # ---- downcase folder name sanitised_input_list = {} input_list.each do |fp| -- GitLab From 93a485859d360f9f470eb5370f5388e5e6e73794 Mon Sep 17 00:00:00 2001 From: Anusha Ranganathan <anusha@cottagelabs.com> Date: Thu, 16 Jan 2025 18:30:06 +0530 Subject: [PATCH 2/3] Add desc to task and convert args to integer --- hyrax/lib/tasks/verify_crc_1280_import.rake | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/hyrax/lib/tasks/verify_crc_1280_import.rake b/hyrax/lib/tasks/verify_crc_1280_import.rake index 3297ccb5..fe04e530 100644 --- a/hyrax/lib/tasks/verify_crc_1280_import.rake +++ b/hyrax/lib/tasks/verify_crc_1280_import.rake @@ -2,17 +2,18 @@ require 'json' namespace :rdms do namespace :crc_1280_import do - desc 'Verify CRC1280 import or verify CRC1280 import of an experiment. This will compare the files in the file path with the objects in the S3 bucket"]' - task :"verify_import", [:import_id] => :environment do |task, args| - puts "Verifying import #{args.import_id}" - import_status, report_paths = VerifyCRCDatasetImport.verify_import(args.import_id) + desc 'Verify all experiments in CRC1280 import. This will compare the files in the file path with the objects in the S3 bucket' + task :verify_import, [:import_id] => :environment do |task, args| + puts "Verifying import #{args.import_id.to_i}" + import_status, report_paths = VerifyCRCDatasetImport.verify_import(args.import_id.to_i) puts "import status : #{import_status}" puts "Detailed reports are available at:" puts JSON.pretty_generate(report_paths) end - task :"verify_experiment", [:import_id, :entry_id] => :environment do |task, args| - puts "Verifying import #{args.import_id}" - experiment_status, report_path = VerifyCRCDatasetImport.verify_experiment_and_report(args.import_id, args.entry_id) + desc 'Verify experiment in CRC1280 import. This will compare the files in the file path with the objects in the S3 bucket' + task :verify_experiment, [:import_id, :entry_id] => :environment do |task, args| + puts "Verifying experiment #{args.entry_id.to_i} in import #{args.import_id.to_i}" + experiment_status, report_path = VerifyCRCDatasetImport.verify_experiment_and_report(args.import_id.to_i, args.entry_id.to_i) puts "Experiment import status : #{experiment_status}" puts "Detailed reports is available at: #{report_path}" end -- GitLab From 39f662e3b00abcd00e7a33d278aca7a50d0fc8fa Mon Sep 17 00:00:00 2001 From: Anusha Ranganathan <anusha@cottagelabs.com> Date: Thu, 16 Jan 2025 21:19:16 +0530 Subject: [PATCH 3/3] require find --- hyrax/app/services/verify_crc_dataset_import.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hyrax/app/services/verify_crc_dataset_import.rb b/hyrax/app/services/verify_crc_dataset_import.rb index b74972bf..aea5668a 100644 --- a/hyrax/app/services/verify_crc_dataset_import.rb +++ b/hyrax/app/services/verify_crc_dataset_import.rb @@ -1,5 +1,6 @@ require 'json' require 'csv' +require 'find' class VerifyCRCDatasetImport extend ComplexHelper @@ -126,7 +127,7 @@ class VerifyCRCDatasetImport # -- Get list of files from import directory including hidden files input_list = [] - Find.find(import_path) do |fp| + ::Find.find(import_path) do |fp| unless FileTest.directory?(fp) unless self.restricted_file_names?(fp.split('/')[-1]) input_list << fp.sub(import_path, '') -- GitLab