Write the standardized files — write_standardized

Writes a list of files to a given location. If to does not exist, it is created, and if filename is not provided, a default standardized name is chosen.

Usage

write_standardized_list(
  df_list,
  filenames,
  to,
  write = TRUE,
  return_path = ifelse(write, FALSE, TRUE),
  verbose = TRUE,
  logger = NA
)

Arguments

df_list: The standardized files list to write. If the list is named, the subdirectory structure in the names will be used to replicate the subdirectiry structure in the destination.
filenames: (Optional) vector of customized file names.
to: Destination folder to write to.
write: Should the result be written or only the path returned?
return_path: Should the path be returned?
verbose: Should messages be displayed when creating a folder/file?
logger: a log4r logger object if you want logging (can be created with create_logger), else NA.

Value

Writes the files to/filename1, to/filename2... Also returns the paths to/filename1, to/filename2... if return_path == TRUE.

Examples

# Example with a subdirectory structure (inferred from the filenames)
df_list <- list(zooniverse, digikam, traptagger)
names(df_list) <- c("APN/APN.csv", "MOK/MOK.csv", "ATH/ATH.csv")
std_list <- standardize_snapshot_list(df_list, standard)
#> 3 files to standardize.
#> Standardizing file APN/APN.csv (1/3) ---
#> Initial file: 24 columns, 100 rows.
#> Standardizing columns
#> Standardizing dates/times
#> Fill capture info
#> Cleaning location/camera, species and columns values
#> Final file: 27 columns, 100 rows. Here is a sneak peek:
#> locationID	cameraID	season	roll	eventID	snapshotName	eventDate	eventTime
#> APN	APN_13U	1	1	APN_13U#1#50	kudu	2017-07-25	02:15:22
#> APN	APN_6U	1	1	APN_6U#1#40	steenbok	2017-07-17	08:56:18
#> APN	APN_6U	1	2	APN_6U#2#5	zebraburchells	2017-08-07	04:49:29
#> APN	APN_DW	1	2	APN_DW#2#94	impala	2017-07-06	18:16:19
#> APN	APN_DW	1	1	APN_DW#1#210	zebraburchells	2017-07-21	18:09:16
#> Standardizing file MOK/MOK.csv (2/3) ---
#> Initial file: 22 columns, 100 rows.
#> Standardizing columns
#> Match found in column names: renaming column metadata_Numberofindividuals into metadata_NumberOfIndividuals
#> Standardizing dates/times
#> Getting location code for Digikam data
#> Fill capture info
#> Cleaning location/camera, species and columns values
#> Final file: 27 columns, 100 rows. Here is a sneak peek:
#> locationID	cameraID	season	roll	eventID	snapshotName	eventDate	eventTime
#> MOK	MOK_A09	NA	1	MOK_A09#1#1	giraffe	2018-07-08	12:15:34
#> MOK	MOK_A09	NA	1	MOK_A09#1#2	springbok	2018-08-26	10:45:55
#> MOK	MOK_A09	NA	1	MOK_A09#1#3	unresolvable	2018-09-02	18:11:28
#> MOK	MOK_B07	NA	1	MOK_B07#1#1	zebraburchells	2018-06-28	07:49:42
#> MOK	MOK_B07	NA	1	MOK_B07#1#2	gemsbok	2018-08-19	09:11:55
#> Standardizing file ATH/ATH.csv (3/3) ---
#> Initial file: 9 columns, 100 rows.
#> Standardizing columns
#> Standardizing dates/times
#> Fill capture info
#> Cleaning location/camera, species and columns values
#> Final file: 27 columns, 100 rows. Here is a sneak peek:
#> locationID	cameraID	season	roll	eventID	snapshotName	eventDate	eventTime
#> ATH	ATH_B04	NA	1	ATH_B04#1#242	blank	2020-03-08	12:33:00
#> ATH	ATH_B04	NA	1	ATH_B04#1#95	zebraburchells	2020-03-19	07:05:00
#> ATH	ATH_B04	NA	1	ATH_B04#1#40	zebraburchells	2020-03-24	12:03:00
#> ATH	ATH_B04	NA	1	ATH_B04#1#275	wildebeestblue	2020-03-26	13:49:00
#> ATH	ATH_B04	NA	1	ATH_B04#1#315	blank	2020-04-15	14:28:00

# Don't write data
write_standardized_list(std_list, 
                        to = "/home/lnicvert/Documents/PhD/Snapshot/data/2_standardized_data", 
                        write = FALSE)
#> [1] "/home/lnicvert/Documents/PhD/Snapshot/data/2_standardized_data/APN/APN_S1_R1-2.csv"
#> [2] "/home/lnicvert/Documents/PhD/Snapshot/data/2_standardized_data/MOK/MOK_SNA_R1.csv" 
#> [3] "/home/lnicvert/Documents/PhD/Snapshot/data/2_standardized_data/ATH/ATH_SNA_R1.csv" 
# Don't write data and use custom name
write_standardized_list(std_list, 
                        to = "/home/lnicvert/Documents/PhD/Snapshot/data/2_standardized_data",
                        filenames = c("myname1.csv", "myname2.csv", "myname3.csv"),
                        write = FALSE)
#> [1] "/home/lnicvert/Documents/PhD/Snapshot/data/2_standardized_data/APN/myname1.csv"
#> [2] "/home/lnicvert/Documents/PhD/Snapshot/data/2_standardized_data/MOK/myname2.csv"
#> [3] "/home/lnicvert/Documents/PhD/Snapshot/data/2_standardized_data/ATH/myname3.csv"
# Write files to temporary location
write_standardized_list(std_list, to = tempdir())
#> Creating folder /tmp/Rtmp3MYUYp/APN
#> Writing file APN/APN.csv -> APN/APN_S1_R1-2.csv (1/3) ---
#> Creating folder /tmp/Rtmp3MYUYp/MOK
#> Writing file MOK/MOK.csv -> MOK/MOK_SNA_R1.csv (2/3) ---
#> Creating folder /tmp/Rtmp3MYUYp/ATH
#> Writing file ATH/ATH.csv -> ATH/ATH_SNA_R1.csv (3/3) ---
                        
# Without a subdirectory structure (and without list names)
df_list <- list(zooniverse, digikam, traptagger)
names(df_list)[2] <- "MOK"
std_list <- standardize_snapshot_list(df_list, standard)
#> 3 files to standardize.
#> Standardizing file NA (1/3) ---
#> Initial file: 24 columns, 100 rows.
#> Standardizing columns
#> Standardizing dates/times
#> Fill capture info
#> Cleaning location/camera, species and columns values
#> Final file: 27 columns, 100 rows. Here is a sneak peek:
#> locationID	cameraID	season	roll	eventID	snapshotName	eventDate	eventTime
#> APN	APN_13U	1	1	APN_13U#1#50	kudu	2017-07-25	02:15:22
#> APN	APN_6U	1	1	APN_6U#1#40	steenbok	2017-07-17	08:56:18
#> APN	APN_6U	1	2	APN_6U#2#5	zebraburchells	2017-08-07	04:49:29
#> APN	APN_DW	1	2	APN_DW#2#94	impala	2017-07-06	18:16:19
#> APN	APN_DW	1	1	APN_DW#1#210	zebraburchells	2017-07-21	18:09:16
#> Standardizing file MOK (2/3) ---
#> Initial file: 22 columns, 100 rows.
#> Standardizing columns
#> Match found in column names: renaming column metadata_Numberofindividuals into metadata_NumberOfIndividuals
#> Standardizing dates/times
#> Getting location code for Digikam data
#> Fill capture info
#> Cleaning location/camera, species and columns values
#> Final file: 27 columns, 100 rows. Here is a sneak peek:
#> locationID	cameraID	season	roll	eventID	snapshotName	eventDate	eventTime
#> MOK	MOK_A09	NA	1	MOK_A09#1#1	giraffe	2018-07-08	12:15:34
#> MOK	MOK_A09	NA	1	MOK_A09#1#2	springbok	2018-08-26	10:45:55
#> MOK	MOK_A09	NA	1	MOK_A09#1#3	unresolvable	2018-09-02	18:11:28
#> MOK	MOK_B07	NA	1	MOK_B07#1#1	zebraburchells	2018-06-28	07:49:42
#> MOK	MOK_B07	NA	1	MOK_B07#1#2	gemsbok	2018-08-19	09:11:55
#> Standardizing file NA (3/3) ---
#> Initial file: 9 columns, 100 rows.
#> Standardizing columns
#> Standardizing dates/times
#> Fill capture info
#> Cleaning location/camera, species and columns values
#> Final file: 27 columns, 100 rows. Here is a sneak peek:
#> locationID	cameraID	season	roll	eventID	snapshotName	eventDate	eventTime
#> ATH	ATH_B04	NA	1	ATH_B04#1#242	blank	2020-03-08	12:33:00
#> ATH	ATH_B04	NA	1	ATH_B04#1#95	zebraburchells	2020-03-19	07:05:00
#> ATH	ATH_B04	NA	1	ATH_B04#1#40	zebraburchells	2020-03-24	12:03:00
#> ATH	ATH_B04	NA	1	ATH_B04#1#275	wildebeestblue	2020-03-26	13:49:00
#> ATH	ATH_B04	NA	1	ATH_B04#1#315	blank	2020-04-15	14:28:00

# Write files to temporary location
write_standardized_list(std_list, to = tempdir())
#> Writing file 1 -> APN_S1_R1-2.csv (1/3) ---
#> Writing file MOK -> MOK_SNA_R1.csv (2/3) ---
#> Writing file 3 -> ATH_SNA_R1.csv (3/3) ---