Skip to contents

Writes a list of files to a given location. If to does not exist, it is created, and if filename is not provided, a default standardized name is chosen.

Usage

write_standardized_list(
  df_list,
  filenames,
  to,
  write = TRUE,
  return_path = ifelse(write, FALSE, TRUE),
  verbose = TRUE,
  logger = NA
)

Arguments

df_list

The standardized files list to write. If the list is named, the subdirectory structure in the names will be used to replicate the subdirectiry structure in the destination.

filenames

(Optional) vector of customized file names.

to

Destination folder to write to.

write

Should the result be written or only the path returned?

return_path

Should the path be returned?

verbose

Should messages be displayed when creating a folder/file?

logger

a log4r logger object if you want logging (can be created with create_logger), else NA.

Value

Writes the files to/filename1, to/filename2... Also returns the paths to/filename1, to/filename2... if return_path == TRUE.

Examples

# Example with a subdirectory structure (inferred from the filenames)
df_list <- list(zooniverse, digikam, traptagger)
names(df_list) <- c("APN/APN.csv", "MOK/MOK.csv", "ATH/ATH.csv")
std_list <- standardize_snapshot_list(df_list, standard)
#> 3 files to standardize.
#> Standardizing file APN/APN.csv (1/3) ---
#> Initial file: 24 columns, 100 rows.
#> Standardizing columns
#> Standardizing dates/times
#> Fill capture info
#> Cleaning location/camera, species and columns values
#> Final file: 27 columns, 100 rows. Here is a sneak peek:
#> locationID	cameraID	season	roll	eventID	snapshotName	eventDate	eventTime
#> APN	APN_13U	1	1	APN_13U#1#50	kudu	2017-07-25	02:15:22
#> APN	APN_6U	1	1	APN_6U#1#40	steenbok	2017-07-17	08:56:18
#> APN	APN_6U	1	2	APN_6U#2#5	zebraburchells	2017-08-07	04:49:29
#> APN	APN_DW	1	2	APN_DW#2#94	impala	2017-07-06	18:16:19
#> APN	APN_DW	1	1	APN_DW#1#210	zebraburchells	2017-07-21	18:09:16
#> Standardizing file MOK/MOK.csv (2/3) ---
#> Initial file: 22 columns, 100 rows.
#> Standardizing columns
#> Match found in column names: renaming column metadata_Numberofindividuals into metadata_NumberOfIndividuals
#> Standardizing dates/times
#> Getting location code for Digikam data
#> Fill capture info
#> Cleaning location/camera, species and columns values
#> Final file: 27 columns, 100 rows. Here is a sneak peek:
#> locationID	cameraID	season	roll	eventID	snapshotName	eventDate	eventTime
#> MOK	MOK_A09	NA	1	MOK_A09#1#1	giraffe	2018-07-08	12:15:34
#> MOK	MOK_A09	NA	1	MOK_A09#1#2	springbok	2018-08-26	10:45:55
#> MOK	MOK_A09	NA	1	MOK_A09#1#3	unresolvable	2018-09-02	18:11:28
#> MOK	MOK_B07	NA	1	MOK_B07#1#1	zebraburchells	2018-06-28	07:49:42
#> MOK	MOK_B07	NA	1	MOK_B07#1#2	gemsbok	2018-08-19	09:11:55
#> Standardizing file ATH/ATH.csv (3/3) ---
#> Initial file: 9 columns, 100 rows.
#> Standardizing columns
#> Standardizing dates/times
#> Fill capture info
#> Cleaning location/camera, species and columns values
#> Final file: 27 columns, 100 rows. Here is a sneak peek:
#> locationID	cameraID	season	roll	eventID	snapshotName	eventDate	eventTime
#> ATH	ATH_B04	NA	1	ATH_B04#1#242	blank	2020-03-08	12:33:00
#> ATH	ATH_B04	NA	1	ATH_B04#1#95	zebraburchells	2020-03-19	07:05:00
#> ATH	ATH_B04	NA	1	ATH_B04#1#40	zebraburchells	2020-03-24	12:03:00
#> ATH	ATH_B04	NA	1	ATH_B04#1#275	wildebeestblue	2020-03-26	13:49:00
#> ATH	ATH_B04	NA	1	ATH_B04#1#315	blank	2020-04-15	14:28:00

# Don't write data
write_standardized_list(std_list, 
                        to = "/home/lnicvert/Documents/PhD/Snapshot/data/2_standardized_data", 
                        write = FALSE)
#> [1] "/home/lnicvert/Documents/PhD/Snapshot/data/2_standardized_data/APN/APN_S1_R1-2.csv"
#> [2] "/home/lnicvert/Documents/PhD/Snapshot/data/2_standardized_data/MOK/MOK_SNA_R1.csv" 
#> [3] "/home/lnicvert/Documents/PhD/Snapshot/data/2_standardized_data/ATH/ATH_SNA_R1.csv" 
# Don't write data and use custom name
write_standardized_list(std_list, 
                        to = "/home/lnicvert/Documents/PhD/Snapshot/data/2_standardized_data",
                        filenames = c("myname1.csv", "myname2.csv", "myname3.csv"),
                        write = FALSE)
#> [1] "/home/lnicvert/Documents/PhD/Snapshot/data/2_standardized_data/APN/myname1.csv"
#> [2] "/home/lnicvert/Documents/PhD/Snapshot/data/2_standardized_data/MOK/myname2.csv"
#> [3] "/home/lnicvert/Documents/PhD/Snapshot/data/2_standardized_data/ATH/myname3.csv"
# Write files to temporary location
write_standardized_list(std_list, to = tempdir())
#> Creating folder /tmp/Rtmp3MYUYp/APN
#> Writing file APN/APN.csv -> APN/APN_S1_R1-2.csv (1/3) ---
#> Creating folder /tmp/Rtmp3MYUYp/MOK
#> Writing file MOK/MOK.csv -> MOK/MOK_SNA_R1.csv (2/3) ---
#> Creating folder /tmp/Rtmp3MYUYp/ATH
#> Writing file ATH/ATH.csv -> ATH/ATH_SNA_R1.csv (3/3) ---
                        
# Without a subdirectory structure (and without list names)
df_list <- list(zooniverse, digikam, traptagger)
names(df_list)[2] <- "MOK"
std_list <- standardize_snapshot_list(df_list, standard)
#> 3 files to standardize.
#> Standardizing file NA (1/3) ---
#> Initial file: 24 columns, 100 rows.
#> Standardizing columns
#> Standardizing dates/times
#> Fill capture info
#> Cleaning location/camera, species and columns values
#> Final file: 27 columns, 100 rows. Here is a sneak peek:
#> locationID	cameraID	season	roll	eventID	snapshotName	eventDate	eventTime
#> APN	APN_13U	1	1	APN_13U#1#50	kudu	2017-07-25	02:15:22
#> APN	APN_6U	1	1	APN_6U#1#40	steenbok	2017-07-17	08:56:18
#> APN	APN_6U	1	2	APN_6U#2#5	zebraburchells	2017-08-07	04:49:29
#> APN	APN_DW	1	2	APN_DW#2#94	impala	2017-07-06	18:16:19
#> APN	APN_DW	1	1	APN_DW#1#210	zebraburchells	2017-07-21	18:09:16
#> Standardizing file MOK (2/3) ---
#> Initial file: 22 columns, 100 rows.
#> Standardizing columns
#> Match found in column names: renaming column metadata_Numberofindividuals into metadata_NumberOfIndividuals
#> Standardizing dates/times
#> Getting location code for Digikam data
#> Fill capture info
#> Cleaning location/camera, species and columns values
#> Final file: 27 columns, 100 rows. Here is a sneak peek:
#> locationID	cameraID	season	roll	eventID	snapshotName	eventDate	eventTime
#> MOK	MOK_A09	NA	1	MOK_A09#1#1	giraffe	2018-07-08	12:15:34
#> MOK	MOK_A09	NA	1	MOK_A09#1#2	springbok	2018-08-26	10:45:55
#> MOK	MOK_A09	NA	1	MOK_A09#1#3	unresolvable	2018-09-02	18:11:28
#> MOK	MOK_B07	NA	1	MOK_B07#1#1	zebraburchells	2018-06-28	07:49:42
#> MOK	MOK_B07	NA	1	MOK_B07#1#2	gemsbok	2018-08-19	09:11:55
#> Standardizing file NA (3/3) ---
#> Initial file: 9 columns, 100 rows.
#> Standardizing columns
#> Standardizing dates/times
#> Fill capture info
#> Cleaning location/camera, species and columns values
#> Final file: 27 columns, 100 rows. Here is a sneak peek:
#> locationID	cameraID	season	roll	eventID	snapshotName	eventDate	eventTime
#> ATH	ATH_B04	NA	1	ATH_B04#1#242	blank	2020-03-08	12:33:00
#> ATH	ATH_B04	NA	1	ATH_B04#1#95	zebraburchells	2020-03-19	07:05:00
#> ATH	ATH_B04	NA	1	ATH_B04#1#40	zebraburchells	2020-03-24	12:03:00
#> ATH	ATH_B04	NA	1	ATH_B04#1#275	wildebeestblue	2020-03-26	13:49:00
#> ATH	ATH_B04	NA	1	ATH_B04#1#315	blank	2020-04-15	14:28:00

# Write files to temporary location
write_standardized_list(std_list, to = tempdir())
#> Writing file 1 -> APN_S1_R1-2.csv (1/3) ---
#> Writing file MOK -> MOK_SNA_R1.csv (2/3) ---
#> Writing file 3 -> ATH_SNA_R1.csv (3/3) ---