CRAN Package Check Results for Package qs

Last updated on 2026-01-08 12:49:57 CET.

Flavor Version Tinstall Tcheck Ttotal Status Flags
r-devel-linux-x86_64-debian-clang 0.27.3 34.68 267.55 302.23 ERROR
r-devel-linux-x86_64-debian-gcc 0.27.3 35.75 230.27 266.02 ERROR
r-devel-linux-x86_64-fedora-clang 0.27.3 58.00 423.64 481.64 ERROR
r-devel-linux-x86_64-fedora-gcc 0.27.3 98.00 412.89 510.89 ERROR
r-devel-windows-x86_64 0.27.3 101.00 300.00 401.00 ERROR
r-patched-linux-x86_64 0.27.3 47.31 267.13 314.44 ERROR
r-release-linux-x86_64 0.27.3 48.27 274.47 322.74 ERROR
r-release-macos-arm64 0.27.3 NOTE
r-release-macos-x86_64 0.27.3 29.00 494.00 523.00 NOTE
r-release-windows-x86_64 0.27.3 95.00 278.00 373.00 ERROR
r-oldrel-macos-arm64 0.27.3 NOTE
r-oldrel-macos-x86_64 0.27.3 28.00 362.00 390.00 NOTE
r-oldrel-windows-x86_64 0.27.3 95.00 345.00 440.00 ERROR

Check Details

Version: 0.27.3
Check: compiled code
Result: WARN File ‘qs/libs/qs.so’: Found non-API calls to R: ‘ATTRIB’, ‘CLOENV’, ‘ENCLOS’, ‘FRAME’, ‘HASHTAB’, ‘IS_S4_OBJECT’, ‘LEVELS’, ‘OBJECT’, ‘PRENV’, ‘Rf_allocSExp’, ‘SETLEVELS’, ‘SET_ATTRIB’, ‘SET_CLOENV’, ‘SET_ENCLOS’, ‘SET_FRAME’, ‘SET_HASHTAB’, ‘SET_OBJECT’, ‘SET_PRENV’, ‘SET_S4_OBJECT’, ‘SET_TRUELENGTH’ These entry points may be removed soon: ‘SET_FRAME’, ‘SET_HASHTAB’, ‘SET_ENCLOS’, ‘SET_S4_OBJECT’, ‘FRAME’, ‘HASHTAB’, ‘IS_S4_OBJECT’, ‘CLOENV’, ‘ENCLOS’, ‘OBJECT’, ‘SET_CLOENV’, ‘LEVELS’, ‘SETLEVELS’ Compiled code should not call non-API entry points in R. See ‘Writing portable packages’ in the ‘Writing R Extensions’ manual, and section ‘Moving into C API compliance’ for issues with the use of non-API entry points. Flavors: r-devel-linux-x86_64-debian-clang, r-devel-linux-x86_64-debian-gcc, r-devel-linux-x86_64-fedora-clang, r-devel-linux-x86_64-fedora-gcc

Version: 0.27.3
Check: tests
Result: ERROR Running ‘correctness_testing.R’ [180s/184s] Running ‘qattributes_testing.R’ [41s/49s] Running ‘qsavemload_testing.R’ [2s/3s] Running the tests in ‘tests/qattributes_testing.R’ failed. Complete output: > total_time <- Sys.time() > > suppressMessages(library(Rcpp)) > suppressMessages(library(dplyr)) > suppressMessages(library(data.table)) > suppressMessages(library(qs)) > suppressMessages(library(stringfish)) > options(warn = 1) > > do_gc <- function() { + if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) { + gc(full = TRUE) + } else { + gc() + } + } > > # because sourceCpp uses setwd, we need absolute path to R_TESTS when run within R CMD check > R_TESTS <- Sys.getenv("R_TESTS") # startup.Rs > if (nzchar(R_TESTS)) { + R_TESTS_absolute <- normalizePath(R_TESTS) + Sys.setenv(R_TESTS = R_TESTS_absolute) + } > sourceCpp(code="#include <Rcpp.h> + using namespace Rcpp; + // [[Rcpp::plugins(cpp11)]] + // [[Rcpp::export(rng=false)]] + CharacterVector splitstr(std::string x, std::vector<double> cuts){ + CharacterVector ret(cuts.size() - 1); + for(uint64_t i=1; i<cuts.size(); i++) { + ret[i-1] = x.substr(std::round(cuts[i-1])-1, std::round(cuts[i])-std::round(cuts[i-1])); + } + return ret; + } + // [[Rcpp::export(rng=false)]] + int setlev(SEXP x, int i) { + return SETLEVELS(x,i); + } + // [[Rcpp::export(rng=false)]] + void setobj(SEXP x, int i) { + return SET_OBJECT(x, i); + } + // [[Rcpp::export(rng=false)]] + List generateList(std::vector<int> list_elements){ + auto randchar = []() -> char + { + const char charset[] = + \"0123456789\" + \"ABCDEFGHIJKLMNOPQRSTUVWXYZ\" + \"abcdefghijklmnopqrstuvwxyz\"; + const size_t max_index = (sizeof(charset) - 1); + return charset[ rand() % max_index ]; + }; + List ret(list_elements.size()); + std::string str(10,0); + for(size_t i=0; i<list_elements.size(); i++) { + switch(list_elements[i]) { + case 1: + ret[i] = R_NilValue; + break; + case 2: + std::generate_n( str.begin(), 10, randchar ); + ret[i] = str; + break; + case 3: + ret[i] = rand(); + break; + case 4: + ret[i] = static_cast<double>(rand()); + break; + } + } + return ret; + }") > if (nzchar(R_TESTS)) Sys.setenv(R_TESTS = R_TESTS) > > args <- commandArgs(T) > if (nzchar(R_TESTS) || ((length(args) > 0) && args[1] == "check")) { # do fewer tests within R CMD check so it completes within a reasonable amount of time + reps <- 2 + test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6) + test_points_slow <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16) # for Character Vector, stringfish and list + max_size <- 1e6 + } else { + reps <- 3 + test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6, 1e7) + test_points_slow <- test_points + max_size <- 1e7 + } > myfile <- tempfile() > > obj_size <- 0 > get_obj_size <- function() { + get("obj_size", envir = globalenv()) + } > set_obj_size <- function(x) { + assign("obj_size", get_obj_size() + as.numeric(object.size(x)), envir = globalenv()) + return(get_obj_size()); + } > random_object_generator <- function(N, with_envs = FALSE) { # additional input: global obj_size, max_size + if (sample(3, 1) == 1) { + ret <- as.list(1:N) + } else if (sample(2, 1) == 1) { + ret <- as.pairlist(1:N) + } else { + ret <- as.pairlist(1:N) + setlev(ret, sample(2L^12L, 1L) - 1L) + setobj(ret, 1L) + } + + for (i in 1:N) { + if (get_obj_size() > get("max_size", envir = globalenv())) break; + otype <- sample(12, size = 1) + z <- NULL + is_attribute <- ifelse(i == 1, F, sample(c(F, T), size = 1)) + if (otype == 1) {z <- rnorm(1e4); set_obj_size(z);} + else if (otype == 2) { z <- sample(1e4) - 5e2; set_obj_size(z); } + else if (otype == 3) { z <- sample(c(T, F, NA), size = 1e4, replace = T); set_obj_size(z); } + else if (otype == 4) { z <- (sample(256, size = 1e4, replace = T) - 1) %>% as.raw; set_obj_size(z); } + else if (otype == 5) { z <- replicate(sample(1e4, size = 1), {rep(letters, length.out = sample(10, size = 1)) %>% paste(collapse = "")}); set_obj_size(z); } + else if (otype == 6) { z <- rep(letters, length.out = sample(1e4, size = 1)) %>% paste(collapse = ""); set_obj_size(z); } + else if (otype == 7) { z <- as.formula("y ~ a + b + c : d", env = globalenv()); attr(z, "blah") <- sample(1e4) - 5e2; set_obj_size(z); } + else if (with_envs && otype %in% c(8, 9)) { z <- function(x) {x + runif(1)} } + # else if(with_envs && otype %in% c(10,11)) { z <- new.env(); z$x <- random_object_generator(N, with_envs); makeActiveBinding("y", function() runif(1), z) } + else { z <- random_object_generator(N, with_envs) } + if (is_attribute) { + attr(ret[[i - 1]], runif(1) %>% as.character()) <- z + } else { + ret[[i]] <- z + } + } + return(ret) + } > > rand_strings <- function(n) { + s <- sample(0:100, size = n, replace = T) + x <- lapply(unique(s), function(si) { + stringfish::random_strings(sum(s == si), si, vector_mode = "normal") + }) %>% unlist %>% sample + x[sample(n, size = n/10)] <- NA + return(x) + } > > nested_tibble <- function() { + sub_tibble <- function(nr = 600, nc = 4) { + z <- lapply(1:nc, function(i) rand_strings(nr)) %>% + setNames(make.unique(paste0(sample(letters, nc), rand_strings(nc)))) %>% + bind_cols %>% + as_tibble + } + tibble( + col1 = rand_strings(100), + col2 = rand_strings(100), + col3 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)), + col4 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)), + col5 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)) + ) %>% setNames(make.unique(paste0(sample(letters, 5), rand_strings(5)))) + } > > printCarriage <- function(x) { + cat(x, "\r") + } > > attributes_serialize_identical <- function(attributes, full_object) { + identical(serialize(attributes(full_object), NULL), serialize(attributes, NULL)) + } > > attributes_identical <- function(attributes, full_object) { + identical(attributes, attributes(full_object)) + } > > ################################################################################################ > > qsave_rand <- function(x, file) { + alg <- sample(c("lz4", "zstd", "lz4hc", "zstd_stream", "uncompressed"), 1) + # alg <- "zstd_stream" + nt <- sample(5,1) + sc <- sample(0:15,1) + cl <- sample(10,1) + ch <- sample(c(T,F),1) + qsave(x, file = file, preset = "custom", algorithm = alg, + compress_level = cl, shuffle_control = sc, nthreads = nt, check_hash = ch) + } > > qattributes_rand <- function(file) { + # ar <- sample(c(T,F),1) + # don't use altrep to avoid serialization differences + # attributes_serialize_identical won't pass with ALTREP + ar <- FALSE + nt <- sample(5,1) + qattributes(file, use_alt_rep = ar, nthreads = nt, strict = T) + } > > ################################################################################################ > > for (q in 1:reps) { + cat("Rep", q, "of", reps, "\n") + # String correctness + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + x1 <- rep(letters, length.out = tp) %>% paste(collapse = "") + x1 <- c(NA, "", x1) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("strings: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # Character vectors + time <- vector("numeric", length = 3) + for (tp in test_points_slow) { + for (i in 1:3) { + # qs_use_alt_rep(F) + x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar + cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric + x1 <- splitstr(x1, cuts) + x1 <- c(NA, "", x1) + qsave_rand(x1, file = myfile) + time[i] <- Sys.time() + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Character Vectors: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # stringfish character vectors -- require R > 3.5.0 + if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) { + time <- vector("numeric", length = 3) + for (tp in test_points_slow) { + for (i in 1:3) { + x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar + cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric + x1 <- splitstr(x1, cuts) + x1 <- c(NA, "", x1) + x1 <- stringfish::convert_to_sf(x1) + qsave_rand(x1, file = myfile) + time[i] <- Sys.time() + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Stringfish: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + } + + # Integers + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + x1 <- sample(1:tp, replace = T) + x1 <- c(NA, x1) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Integers: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # Doubles + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + x1 <- rnorm(tp) + x1 <- c(NA, x1) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Numeric: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # Logical + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + + x1 <- sample(c(T, F, NA), replace = T, size = tp) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Logical: %s, %s s",tp, signif(mean(time),4))) + } + cat("\n") + + # List + time <- vector("numeric", length = 3) + for (tp in test_points_slow) { + for (i in 1:3) { + x1 <- generateList(sample(1:4, replace = T, size = tp)) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("List: %s, %s s",tp, signif(mean(time),4))) + } + cat("\n") + + for (i in 1:3) { + x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 ) + x1 <- data.frame(str = x1,num = runif(1:1000), stringsAsFactors = F) + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + do_gc() + stopifnot(attributes_identical(z, x1)) + } + cat("Data.frame test") + cat("\n") + + for (i in 1:3) { + x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 ) + x1 <- data.table(str = x1,num = runif(1:1e6)) + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + do_gc() + stopifnot(attributes_serialize_identical(z, x1)) + } + cat("Data.table test") + cat("\n") + + for (i in 1:3) { + x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 ) + x1 <- tibble(str = x1,num = runif(1:1e6)) + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + do_gc() + stopifnot(attributes_identical(z, x1)) + } + cat("Tibble test") + cat("\n") + + # Encoding test + if (Sys.info()[['sysname']] != "Windows") { + for (i in 1:3) { + x1 <- "己所不欲,勿施于人" # utf 8 + x2 <- x1 + Encoding(x2) <- "latin1" + x3 <- x1 + Encoding(x3) <- "bytes" + x4 <- rep(x1, x2, length.out = 1e4) %>% paste(collapse = ";") + x1 <- c(x1, x2, x3, x4) + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage("Encoding test") + } else { + printCarriage("(Encoding test not run on windows)") + } + cat("\n") + + # complex vectors + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + re <- rnorm(tp) + im <- runif(tp) + x1 <- complex(real = re, imaginary = im) + x1 <- c(NA_complex_, x1) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Complex: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # factors + for (tp in test_points) { + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- factor(rep(letters, length.out = tp), levels = sample(letters), ordered = TRUE) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Factors: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # Random objects + time <- vector("numeric", length = 8) + for (i in 1:8) { + # qs_use_alt_rep(sample(c(T, F), size = 1)) + obj_size <- 0 + x1 <- random_object_generator(12) + printCarriage(sprintf("Random objects: %s bytes", object.size(x1) %>% as.numeric)) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Random objects: %s s", signif(mean(time), 4))) + cat("\n") + + # nested attributes + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- as.list(1:26) + attr(x1[[26]], letters[26]) <- rnorm(100) + for (i in 25:1) { + attr(x1[[i]], letters[i]) <- x1[[i + 1]] + } + time[i] <- Sys.time() + for(j in 1:length(x1)) { + qsave_rand(x1[[j]], file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1[[j]])) + } + } + printCarriage(sprintf("Nested attributes: %s s", signif(mean(time), 4))) + cat("\n") + + # alt-rep -- should serialize the unpacked object + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- 1:max_size + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Alt rep integer: %s s", signif(mean(time), 4))) + cat("\n") + + + # Environment test + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- new.env() + x1[["a"]] <- 1:max_size + x1[["b"]] <- runif(max_size) + x1[["c"]] <- stringfish::random_strings(1e4, vector_mode = "normal") + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + stopifnot(attributes_identical(z[["a"]], x1[["a"]])) + stopifnot(attributes_identical(z[["b"]], x1[["b"]])) + stopifnot(attributes_identical(z[["c"]], x1[["c"]])) + time[i] <- Sys.time() - time[i] + do_gc() + } + printCarriage(sprintf("Environment test: %s s", signif(mean(time), 4))) + cat("\n") + + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- nested_tibble() + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + stopifnot(attributes_identical(z, x1)) + time[i] <- Sys.time() - time[i] + do_gc() + } + printCarriage(sprintf("nested tibble test: %s s", signif(mean(time), 4))) + cat("\n") + } Rep 1 of 2 strings: 0, 0.02378 s strings: 1, 0.0005799 s strings: 2, 0.01765 s strings: 4, 0.003014 s strings: 8, 0.004374 s strings: 31, 0.01917 s strings: 33, 0.00545 s strings: 32, 0.001835 s strings: 255, 0.02646 s strings: 257, 0.005518 s strings: 256, 0.005094 s strings: 65535, 0.004665 s strings: 65537, 0.005849 s strings: 65536, 0.007319 s strings: 1e+06, 0.01641 s Character Vectors: 0, 0.003385 s Character Vectors: 1, 0.0005538 s Character Vectors: 2, 0.005699 s Character Vectors: 4, 0.003768 s Character Vectors: 8, 0.0003051 s Character Vectors: 31, 0.03536 s Character Vectors: 33, 0.0001672 s Character Vectors: 32, 0.001968 s Character Vectors: 255, 0.004136 s Character Vectors: 257, 0.02079 s Character Vectors: 256, 0.0003208 s Character Vectors: 65535, 0.001663 s Character Vectors: 65537, 0.00722 s Character Vectors: 65536, 0.004782 s Stringfish: 0, 0.001074 s Stringfish: 1, 0.001376 s Stringfish: 2, 0.00685 s Stringfish: 4, 0.002638 s Stringfish: 8, 0.0001243 s Stringfish: 31, 0.005465 s Stringfish: 33, 0.001744 s Stringfish: 32, 0.00121 s Stringfish: 255, 0.001361 s Stringfish: 257, 0.002554 s Stringfish: 256, 0.00272 s Stringfish: 65535, 0.002784 s Stringfish: 65537, 0.002721 s Stringfish: 65536, 0.009487 s Integers: 0, 0.01689 s Integers: 1, 0.03006 s Integers: 2, 0.01194 s Integers: 4, 0.05128 s Integers: 8, 0.04973 s Integers: 31, 0.05942 s Integers: 33, 0.004774 s Integers: 32, 0.002767 s Integers: 255, 0.002491 s Integers: 257, 0.005227 s Integers: 256, 0.01257 s Integers: 65535, 0.01029 s Integers: 65537, 0.005593 s Integers: 65536, 0.007757 s Integers: 1e+06, 0.06102 s Numeric: 0, 0.00711 s Numeric: 1, 0.00549 s Numeric: 2, 0.008363 s Numeric: 4, 0.0263 s Numeric: 8, 0.01033 s Numeric: 31, 0.005927 s Numeric: 33, 0.009673 s Numeric: 32, 0.005593 s Numeric: 255, 0.00476 s Numeric: 257, 0.01141 s Numeric: 256, 0.03422 s Numeric: 65535, 0.005604 s Numeric: 65537, 0.04119 s Numeric: 65536, 0.006106 s Numeric: 1e+06, 0.1282 s Logical: 0, 0.005804 s Logical: 1, 0.02304 s Logical: 2, 0.01054 s Logical: 4, 0.0181 s Logical: 8, 0.009169 s Logical: 31, 0.016 s Logical: 33, 0.009169 s Logical: 32, 0.01002 s Logical: 255, 0.005998 s Logical: 257, 0.01839 s Logical: 256, 0.004688 s Logical: 65535, 0.001108 s Logical: 65537, 0.009999 s Logical: 65536, 0.005573 s Logical: 1e+06, 0.04388 s List: 0, 0.004389 s List: 1, 0.003987 s List: 2, 0.004515 s List: 4, 0.005397 s List: 8, 0.02565 s List: 31, 0.009431 s List: 33, 0.0009557 s List: 32, 0.007282 s List: 255, 0.0006683 s List: 257, 0.00766 s List: 256, 0.009923 s List: 65535, 0.03243 s List: 65537, 0.04485 s List: 65536, 0.02964 s Data.frame test Error: attributes_serialize_identical(z, x1) is not TRUE Execution halted Flavor: r-devel-linux-x86_64-debian-clang

Version: 0.27.3
Check: tests
Result: ERROR Running ‘correctness_testing.R’ [159s/162s] Running ‘qattributes_testing.R’ [36s/46s] Running ‘qsavemload_testing.R’ [1s/2s] Running the tests in ‘tests/qattributes_testing.R’ failed. Complete output: > total_time <- Sys.time() > > suppressMessages(library(Rcpp)) > suppressMessages(library(dplyr)) > suppressMessages(library(data.table)) > suppressMessages(library(qs)) > suppressMessages(library(stringfish)) > options(warn = 1) > > do_gc <- function() { + if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) { + gc(full = TRUE) + } else { + gc() + } + } > > # because sourceCpp uses setwd, we need absolute path to R_TESTS when run within R CMD check > R_TESTS <- Sys.getenv("R_TESTS") # startup.Rs > if (nzchar(R_TESTS)) { + R_TESTS_absolute <- normalizePath(R_TESTS) + Sys.setenv(R_TESTS = R_TESTS_absolute) + } > sourceCpp(code="#include <Rcpp.h> + using namespace Rcpp; + // [[Rcpp::plugins(cpp11)]] + // [[Rcpp::export(rng=false)]] + CharacterVector splitstr(std::string x, std::vector<double> cuts){ + CharacterVector ret(cuts.size() - 1); + for(uint64_t i=1; i<cuts.size(); i++) { + ret[i-1] = x.substr(std::round(cuts[i-1])-1, std::round(cuts[i])-std::round(cuts[i-1])); + } + return ret; + } + // [[Rcpp::export(rng=false)]] + int setlev(SEXP x, int i) { + return SETLEVELS(x,i); + } + // [[Rcpp::export(rng=false)]] + void setobj(SEXP x, int i) { + return SET_OBJECT(x, i); + } + // [[Rcpp::export(rng=false)]] + List generateList(std::vector<int> list_elements){ + auto randchar = []() -> char + { + const char charset[] = + \"0123456789\" + \"ABCDEFGHIJKLMNOPQRSTUVWXYZ\" + \"abcdefghijklmnopqrstuvwxyz\"; + const size_t max_index = (sizeof(charset) - 1); + return charset[ rand() % max_index ]; + }; + List ret(list_elements.size()); + std::string str(10,0); + for(size_t i=0; i<list_elements.size(); i++) { + switch(list_elements[i]) { + case 1: + ret[i] = R_NilValue; + break; + case 2: + std::generate_n( str.begin(), 10, randchar ); + ret[i] = str; + break; + case 3: + ret[i] = rand(); + break; + case 4: + ret[i] = static_cast<double>(rand()); + break; + } + } + return ret; + }") > if (nzchar(R_TESTS)) Sys.setenv(R_TESTS = R_TESTS) > > args <- commandArgs(T) > if (nzchar(R_TESTS) || ((length(args) > 0) && args[1] == "check")) { # do fewer tests within R CMD check so it completes within a reasonable amount of time + reps <- 2 + test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6) + test_points_slow <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16) # for Character Vector, stringfish and list + max_size <- 1e6 + } else { + reps <- 3 + test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6, 1e7) + test_points_slow <- test_points + max_size <- 1e7 + } > myfile <- tempfile() > > obj_size <- 0 > get_obj_size <- function() { + get("obj_size", envir = globalenv()) + } > set_obj_size <- function(x) { + assign("obj_size", get_obj_size() + as.numeric(object.size(x)), envir = globalenv()) + return(get_obj_size()); + } > random_object_generator <- function(N, with_envs = FALSE) { # additional input: global obj_size, max_size + if (sample(3, 1) == 1) { + ret <- as.list(1:N) + } else if (sample(2, 1) == 1) { + ret <- as.pairlist(1:N) + } else { + ret <- as.pairlist(1:N) + setlev(ret, sample(2L^12L, 1L) - 1L) + setobj(ret, 1L) + } + + for (i in 1:N) { + if (get_obj_size() > get("max_size", envir = globalenv())) break; + otype <- sample(12, size = 1) + z <- NULL + is_attribute <- ifelse(i == 1, F, sample(c(F, T), size = 1)) + if (otype == 1) {z <- rnorm(1e4); set_obj_size(z);} + else if (otype == 2) { z <- sample(1e4) - 5e2; set_obj_size(z); } + else if (otype == 3) { z <- sample(c(T, F, NA), size = 1e4, replace = T); set_obj_size(z); } + else if (otype == 4) { z <- (sample(256, size = 1e4, replace = T) - 1) %>% as.raw; set_obj_size(z); } + else if (otype == 5) { z <- replicate(sample(1e4, size = 1), {rep(letters, length.out = sample(10, size = 1)) %>% paste(collapse = "")}); set_obj_size(z); } + else if (otype == 6) { z <- rep(letters, length.out = sample(1e4, size = 1)) %>% paste(collapse = ""); set_obj_size(z); } + else if (otype == 7) { z <- as.formula("y ~ a + b + c : d", env = globalenv()); attr(z, "blah") <- sample(1e4) - 5e2; set_obj_size(z); } + else if (with_envs && otype %in% c(8, 9)) { z <- function(x) {x + runif(1)} } + # else if(with_envs && otype %in% c(10,11)) { z <- new.env(); z$x <- random_object_generator(N, with_envs); makeActiveBinding("y", function() runif(1), z) } + else { z <- random_object_generator(N, with_envs) } + if (is_attribute) { + attr(ret[[i - 1]], runif(1) %>% as.character()) <- z + } else { + ret[[i]] <- z + } + } + return(ret) + } > > rand_strings <- function(n) { + s <- sample(0:100, size = n, replace = T) + x <- lapply(unique(s), function(si) { + stringfish::random_strings(sum(s == si), si, vector_mode = "normal") + }) %>% unlist %>% sample + x[sample(n, size = n/10)] <- NA + return(x) + } > > nested_tibble <- function() { + sub_tibble <- function(nr = 600, nc = 4) { + z <- lapply(1:nc, function(i) rand_strings(nr)) %>% + setNames(make.unique(paste0(sample(letters, nc), rand_strings(nc)))) %>% + bind_cols %>% + as_tibble + } + tibble( + col1 = rand_strings(100), + col2 = rand_strings(100), + col3 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)), + col4 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)), + col5 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)) + ) %>% setNames(make.unique(paste0(sample(letters, 5), rand_strings(5)))) + } > > printCarriage <- function(x) { + cat(x, "\r") + } > > attributes_serialize_identical <- function(attributes, full_object) { + identical(serialize(attributes(full_object), NULL), serialize(attributes, NULL)) + } > > attributes_identical <- function(attributes, full_object) { + identical(attributes, attributes(full_object)) + } > > ################################################################################################ > > qsave_rand <- function(x, file) { + alg <- sample(c("lz4", "zstd", "lz4hc", "zstd_stream", "uncompressed"), 1) + # alg <- "zstd_stream" + nt <- sample(5,1) + sc <- sample(0:15,1) + cl <- sample(10,1) + ch <- sample(c(T,F),1) + qsave(x, file = file, preset = "custom", algorithm = alg, + compress_level = cl, shuffle_control = sc, nthreads = nt, check_hash = ch) + } > > qattributes_rand <- function(file) { + # ar <- sample(c(T,F),1) + # don't use altrep to avoid serialization differences + # attributes_serialize_identical won't pass with ALTREP + ar <- FALSE + nt <- sample(5,1) + qattributes(file, use_alt_rep = ar, nthreads = nt, strict = T) + } > > ################################################################################################ > > for (q in 1:reps) { + cat("Rep", q, "of", reps, "\n") + # String correctness + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + x1 <- rep(letters, length.out = tp) %>% paste(collapse = "") + x1 <- c(NA, "", x1) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("strings: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # Character vectors + time <- vector("numeric", length = 3) + for (tp in test_points_slow) { + for (i in 1:3) { + # qs_use_alt_rep(F) + x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar + cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric + x1 <- splitstr(x1, cuts) + x1 <- c(NA, "", x1) + qsave_rand(x1, file = myfile) + time[i] <- Sys.time() + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Character Vectors: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # stringfish character vectors -- require R > 3.5.0 + if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) { + time <- vector("numeric", length = 3) + for (tp in test_points_slow) { + for (i in 1:3) { + x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar + cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric + x1 <- splitstr(x1, cuts) + x1 <- c(NA, "", x1) + x1 <- stringfish::convert_to_sf(x1) + qsave_rand(x1, file = myfile) + time[i] <- Sys.time() + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Stringfish: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + } + + # Integers + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + x1 <- sample(1:tp, replace = T) + x1 <- c(NA, x1) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Integers: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # Doubles + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + x1 <- rnorm(tp) + x1 <- c(NA, x1) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Numeric: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # Logical + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + + x1 <- sample(c(T, F, NA), replace = T, size = tp) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Logical: %s, %s s",tp, signif(mean(time),4))) + } + cat("\n") + + # List + time <- vector("numeric", length = 3) + for (tp in test_points_slow) { + for (i in 1:3) { + x1 <- generateList(sample(1:4, replace = T, size = tp)) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("List: %s, %s s",tp, signif(mean(time),4))) + } + cat("\n") + + for (i in 1:3) { + x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 ) + x1 <- data.frame(str = x1,num = runif(1:1000), stringsAsFactors = F) + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + do_gc() + stopifnot(attributes_identical(z, x1)) + } + cat("Data.frame test") + cat("\n") + + for (i in 1:3) { + x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 ) + x1 <- data.table(str = x1,num = runif(1:1e6)) + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + do_gc() + stopifnot(attributes_serialize_identical(z, x1)) + } + cat("Data.table test") + cat("\n") + + for (i in 1:3) { + x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 ) + x1 <- tibble(str = x1,num = runif(1:1e6)) + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + do_gc() + stopifnot(attributes_identical(z, x1)) + } + cat("Tibble test") + cat("\n") + + # Encoding test + if (Sys.info()[['sysname']] != "Windows") { + for (i in 1:3) { + x1 <- "己所不欲,勿施于人" # utf 8 + x2 <- x1 + Encoding(x2) <- "latin1" + x3 <- x1 + Encoding(x3) <- "bytes" + x4 <- rep(x1, x2, length.out = 1e4) %>% paste(collapse = ";") + x1 <- c(x1, x2, x3, x4) + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage("Encoding test") + } else { + printCarriage("(Encoding test not run on windows)") + } + cat("\n") + + # complex vectors + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + re <- rnorm(tp) + im <- runif(tp) + x1 <- complex(real = re, imaginary = im) + x1 <- c(NA_complex_, x1) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Complex: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # factors + for (tp in test_points) { + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- factor(rep(letters, length.out = tp), levels = sample(letters), ordered = TRUE) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Factors: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # Random objects + time <- vector("numeric", length = 8) + for (i in 1:8) { + # qs_use_alt_rep(sample(c(T, F), size = 1)) + obj_size <- 0 + x1 <- random_object_generator(12) + printCarriage(sprintf("Random objects: %s bytes", object.size(x1) %>% as.numeric)) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Random objects: %s s", signif(mean(time), 4))) + cat("\n") + + # nested attributes + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- as.list(1:26) + attr(x1[[26]], letters[26]) <- rnorm(100) + for (i in 25:1) { + attr(x1[[i]], letters[i]) <- x1[[i + 1]] + } + time[i] <- Sys.time() + for(j in 1:length(x1)) { + qsave_rand(x1[[j]], file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1[[j]])) + } + } + printCarriage(sprintf("Nested attributes: %s s", signif(mean(time), 4))) + cat("\n") + + # alt-rep -- should serialize the unpacked object + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- 1:max_size + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Alt rep integer: %s s", signif(mean(time), 4))) + cat("\n") + + + # Environment test + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- new.env() + x1[["a"]] <- 1:max_size + x1[["b"]] <- runif(max_size) + x1[["c"]] <- stringfish::random_strings(1e4, vector_mode = "normal") + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + stopifnot(attributes_identical(z[["a"]], x1[["a"]])) + stopifnot(attributes_identical(z[["b"]], x1[["b"]])) + stopifnot(attributes_identical(z[["c"]], x1[["c"]])) + time[i] <- Sys.time() - time[i] + do_gc() + } + printCarriage(sprintf("Environment test: %s s", signif(mean(time), 4))) + cat("\n") + + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- nested_tibble() + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + stopifnot(attributes_identical(z, x1)) + time[i] <- Sys.time() - time[i] + do_gc() + } + printCarriage(sprintf("nested tibble test: %s s", signif(mean(time), 4))) + cat("\n") + } Rep 1 of 2 strings: 0, 0.01385 s strings: 1, 0.00415 s strings: 2, 0.004866 s strings: 4, 0.003556 s strings: 8, 0.007739 s strings: 31, 0.003561 s strings: 33, 0.00798 s strings: 32, 0.005587 s strings: 255, 0.001462 s strings: 257, 0.004565 s strings: 256, 0.006017 s strings: 65535, 0.01524 s strings: 65537, 0.006865 s strings: 65536, 0.03885 s strings: 1e+06, 0.003119 s Character Vectors: 0, 0.002733 s Character Vectors: 1, 0.004914 s Character Vectors: 2, 0.03695 s Character Vectors: 4, 0.002725 s Character Vectors: 8, 0.001717 s Character Vectors: 31, 0.004875 s Character Vectors: 33, 0.0001277 s Character Vectors: 32, 0.005387 s Character Vectors: 255, 0.0001062 s Character Vectors: 257, 0.00532 s Character Vectors: 256, 0.005583 s Character Vectors: 65535, 0.05254 s Character Vectors: 65537, 0.006781 s Character Vectors: 65536, 0.009939 s Stringfish: 0, 0.004668 s Stringfish: 1, 0.004071 s Stringfish: 2, 0.002777 s Stringfish: 4, 0.003392 s Stringfish: 8, 0.0007089 s Stringfish: 31, 0.004069 s Stringfish: 33, 0.002767 s Stringfish: 32, 0.001427 s Stringfish: 255, 0.001433 s Stringfish: 257, 0.004301 s Stringfish: 256, 0.001146 s Stringfish: 65535, 0.002954 s Stringfish: 65537, 0.003759 s Stringfish: 65536, 0.005888 s Integers: 0, 0.001755 s Integers: 1, 0.006501 s Integers: 2, 0.009061 s Integers: 4, 0.004219 s Integers: 8, 0.006278 s Integers: 31, 0.00503 s Integers: 33, 0.002117 s Integers: 32, 0.006638 s Integers: 255, 0.003403 s Integers: 257, 0.0009185 s Integers: 256, 0.00284 s Integers: 65535, 0.01466 s Integers: 65537, 0.006188 s Integers: 65536, 0.005676 s Integers: 1e+06, 0.05608 s Numeric: 0, 0.001954 s Numeric: 1, 0.0009336 s Numeric: 2, 0.005611 s Numeric: 4, 0.005065 s Numeric: 8, 0.008042 s Numeric: 31, 0.002639 s Numeric: 33, 0.00474 s Numeric: 32, 0.0008355 s Numeric: 255, 0.006053 s Numeric: 257, 0.05376 s Numeric: 256, 0.008816 s Numeric: 65535, 0.01545 s Numeric: 65537, 0.006465 s Numeric: 65536, 0.01013 s Numeric: 1e+06, 0.03541 s Logical: 0, 0.06054 s Logical: 1, 0.009596 s Logical: 2, 0.00951 s Logical: 4, 0.01054 s Logical: 8, 0.001554 s Logical: 31, 0.004032 s Logical: 33, 0.007206 s Logical: 32, 0.002662 s Logical: 255, 0.0005469 s Logical: 257, 0.008487 s Logical: 256, 0.01311 s Logical: 65535, 0.003826 s Logical: 65537, 0.003041 s Logical: 65536, 0.01483 s Logical: 1e+06, 0.0392 s List: 0, 0.0151 s List: 1, 0.007898 s List: 2, 0.004359 s List: 4, 0.00533 s List: 8, 0.001892 s List: 31, 0.0004077 s List: 33, 0.01176 s List: 32, 0.005048 s List: 255, 0.002898 s List: 257, 0.0346 s List: 256, 0.02893 s List: 65535, 0.0219 s List: 65537, 0.03607 s List: 65536, 0.02838 s Data.frame test Error: attributes_serialize_identical(z, x1) is not TRUE Execution halted Flavor: r-devel-linux-x86_64-debian-gcc

Version: 0.27.3
Check: tests
Result: ERROR Running ‘correctness_testing.R’ [5m/12m] Running ‘qattributes_testing.R’ [56s/147s] Running ‘qsavemload_testing.R’ Running the tests in ‘tests/qattributes_testing.R’ failed. Complete output: > total_time <- Sys.time() > > suppressMessages(library(Rcpp)) > suppressMessages(library(dplyr)) > suppressMessages(library(data.table)) > suppressMessages(library(qs)) > suppressMessages(library(stringfish)) > options(warn = 1) > > do_gc <- function() { + if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) { + gc(full = TRUE) + } else { + gc() + } + } > > # because sourceCpp uses setwd, we need absolute path to R_TESTS when run within R CMD check > R_TESTS <- Sys.getenv("R_TESTS") # startup.Rs > if (nzchar(R_TESTS)) { + R_TESTS_absolute <- normalizePath(R_TESTS) + Sys.setenv(R_TESTS = R_TESTS_absolute) + } > sourceCpp(code="#include <Rcpp.h> + using namespace Rcpp; + // [[Rcpp::plugins(cpp11)]] + // [[Rcpp::export(rng=false)]] + CharacterVector splitstr(std::string x, std::vector<double> cuts){ + CharacterVector ret(cuts.size() - 1); + for(uint64_t i=1; i<cuts.size(); i++) { + ret[i-1] = x.substr(std::round(cuts[i-1])-1, std::round(cuts[i])-std::round(cuts[i-1])); + } + return ret; + } + // [[Rcpp::export(rng=false)]] + int setlev(SEXP x, int i) { + return SETLEVELS(x,i); + } + // [[Rcpp::export(rng=false)]] + void setobj(SEXP x, int i) { + return SET_OBJECT(x, i); + } + // [[Rcpp::export(rng=false)]] + List generateList(std::vector<int> list_elements){ + auto randchar = []() -> char + { + const char charset[] = + \"0123456789\" + \"ABCDEFGHIJKLMNOPQRSTUVWXYZ\" + \"abcdefghijklmnopqrstuvwxyz\"; + const size_t max_index = (sizeof(charset) - 1); + return charset[ rand() % max_index ]; + }; + List ret(list_elements.size()); + std::string str(10,0); + for(size_t i=0; i<list_elements.size(); i++) { + switch(list_elements[i]) { + case 1: + ret[i] = R_NilValue; + break; + case 2: + std::generate_n( str.begin(), 10, randchar ); + ret[i] = str; + break; + case 3: + ret[i] = rand(); + break; + case 4: + ret[i] = static_cast<double>(rand()); + break; + } + } + return ret; + }") > if (nzchar(R_TESTS)) Sys.setenv(R_TESTS = R_TESTS) > > args <- commandArgs(T) > if (nzchar(R_TESTS) || ((length(args) > 0) && args[1] == "check")) { # do fewer tests within R CMD check so it completes within a reasonable amount of time + reps <- 2 + test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6) + test_points_slow <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16) # for Character Vector, stringfish and list + max_size <- 1e6 + } else { + reps <- 3 + test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6, 1e7) + test_points_slow <- test_points + max_size <- 1e7 + } > myfile <- tempfile() > > obj_size <- 0 > get_obj_size <- function() { + get("obj_size", envir = globalenv()) + } > set_obj_size <- function(x) { + assign("obj_size", get_obj_size() + as.numeric(object.size(x)), envir = globalenv()) + return(get_obj_size()); + } > random_object_generator <- function(N, with_envs = FALSE) { # additional input: global obj_size, max_size + if (sample(3, 1) == 1) { + ret <- as.list(1:N) + } else if (sample(2, 1) == 1) { + ret <- as.pairlist(1:N) + } else { + ret <- as.pairlist(1:N) + setlev(ret, sample(2L^12L, 1L) - 1L) + setobj(ret, 1L) + } + + for (i in 1:N) { + if (get_obj_size() > get("max_size", envir = globalenv())) break; + otype <- sample(12, size = 1) + z <- NULL + is_attribute <- ifelse(i == 1, F, sample(c(F, T), size = 1)) + if (otype == 1) {z <- rnorm(1e4); set_obj_size(z);} + else if (otype == 2) { z <- sample(1e4) - 5e2; set_obj_size(z); } + else if (otype == 3) { z <- sample(c(T, F, NA), size = 1e4, replace = T); set_obj_size(z); } + else if (otype == 4) { z <- (sample(256, size = 1e4, replace = T) - 1) %>% as.raw; set_obj_size(z); } + else if (otype == 5) { z <- replicate(sample(1e4, size = 1), {rep(letters, length.out = sample(10, size = 1)) %>% paste(collapse = "")}); set_obj_size(z); } + else if (otype == 6) { z <- rep(letters, length.out = sample(1e4, size = 1)) %>% paste(collapse = ""); set_obj_size(z); } + else if (otype == 7) { z <- as.formula("y ~ a + b + c : d", env = globalenv()); attr(z, "blah") <- sample(1e4) - 5e2; set_obj_size(z); } + else if (with_envs && otype %in% c(8, 9)) { z <- function(x) {x + runif(1)} } + # else if(with_envs && otype %in% c(10,11)) { z <- new.env(); z$x <- random_object_generator(N, with_envs); makeActiveBinding("y", function() runif(1), z) } + else { z <- random_object_generator(N, with_envs) } + if (is_attribute) { + attr(ret[[i - 1]], runif(1) %>% as.character()) <- z + } else { + ret[[i]] <- z + } + } + return(ret) + } > > rand_strings <- function(n) { + s <- sample(0:100, size = n, replace = T) + x <- lapply(unique(s), function(si) { + stringfish::random_strings(sum(s == si), si, vector_mode = "normal") + }) %>% unlist %>% sample + x[sample(n, size = n/10)] <- NA + return(x) + } > > nested_tibble <- function() { + sub_tibble <- function(nr = 600, nc = 4) { + z <- lapply(1:nc, function(i) rand_strings(nr)) %>% + setNames(make.unique(paste0(sample(letters, nc), rand_strings(nc)))) %>% + bind_cols %>% + as_tibble + } + tibble( + col1 = rand_strings(100), + col2 = rand_strings(100), + col3 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)), + col4 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)), + col5 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)) + ) %>% setNames(make.unique(paste0(sample(letters, 5), rand_strings(5)))) + } > > printCarriage <- function(x) { + cat(x, "\r") + } > > attributes_serialize_identical <- function(attributes, full_object) { + identical(serialize(attributes(full_object), NULL), serialize(attributes, NULL)) + } > > attributes_identical <- function(attributes, full_object) { + identical(attributes, attributes(full_object)) + } > > ################################################################################################ > > qsave_rand <- function(x, file) { + alg <- sample(c("lz4", "zstd", "lz4hc", "zstd_stream", "uncompressed"), 1) + # alg <- "zstd_stream" + nt <- sample(5,1) + sc <- sample(0:15,1) + cl <- sample(10,1) + ch <- sample(c(T,F),1) + qsave(x, file = file, preset = "custom", algorithm = alg, + compress_level = cl, shuffle_control = sc, nthreads = nt, check_hash = ch) + } > > qattributes_rand <- function(file) { + # ar <- sample(c(T,F),1) + # don't use altrep to avoid serialization differences + # attributes_serialize_identical won't pass with ALTREP + ar <- FALSE + nt <- sample(5,1) + qattributes(file, use_alt_rep = ar, nthreads = nt, strict = T) + } > > ################################################################################################ > > for (q in 1:reps) { + cat("Rep", q, "of", reps, "\n") + # String correctness + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + x1 <- rep(letters, length.out = tp) %>% paste(collapse = "") + x1 <- c(NA, "", x1) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("strings: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # Character vectors + time <- vector("numeric", length = 3) + for (tp in test_points_slow) { + for (i in 1:3) { + # qs_use_alt_rep(F) + x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar + cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric + x1 <- splitstr(x1, cuts) + x1 <- c(NA, "", x1) + qsave_rand(x1, file = myfile) + time[i] <- Sys.time() + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Character Vectors: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # stringfish character vectors -- require R > 3.5.0 + if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) { + time <- vector("numeric", length = 3) + for (tp in test_points_slow) { + for (i in 1:3) { + x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar + cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric + x1 <- splitstr(x1, cuts) + x1 <- c(NA, "", x1) + x1 <- stringfish::convert_to_sf(x1) + qsave_rand(x1, file = myfile) + time[i] <- Sys.time() + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Stringfish: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + } + + # Integers + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + x1 <- sample(1:tp, replace = T) + x1 <- c(NA, x1) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Integers: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # Doubles + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + x1 <- rnorm(tp) + x1 <- c(NA, x1) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Numeric: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # Logical + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + + x1 <- sample(c(T, F, NA), replace = T, size = tp) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Logical: %s, %s s",tp, signif(mean(time),4))) + } + cat("\n") + + # List + time <- vector("numeric", length = 3) + for (tp in test_points_slow) { + for (i in 1:3) { + x1 <- generateList(sample(1:4, replace = T, size = tp)) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("List: %s, %s s",tp, signif(mean(time),4))) + } + cat("\n") + + for (i in 1:3) { + x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 ) + x1 <- data.frame(str = x1,num = runif(1:1000), stringsAsFactors = F) + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + do_gc() + stopifnot(attributes_identical(z, x1)) + } + cat("Data.frame test") + cat("\n") + + for (i in 1:3) { + x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 ) + x1 <- data.table(str = x1,num = runif(1:1e6)) + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + do_gc() + stopifnot(attributes_serialize_identical(z, x1)) + } + cat("Data.table test") + cat("\n") + + for (i in 1:3) { + x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 ) + x1 <- tibble(str = x1,num = runif(1:1e6)) + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + do_gc() + stopifnot(attributes_identical(z, x1)) + } + cat("Tibble test") + cat("\n") + + # Encoding test + if (Sys.info()[['sysname']] != "Windows") { + for (i in 1:3) { + x1 <- "己所不欲,勿施于人" # utf 8 + x2 <- x1 + Encoding(x2) <- "latin1" + x3 <- x1 + Encoding(x3) <- "bytes" + x4 <- rep(x1, x2, length.out = 1e4) %>% paste(collapse = ";") + x1 <- c(x1, x2, x3, x4) + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage("Encoding test") + } else { + printCarriage("(Encoding test not run on windows)") + } + cat("\n") + + # complex vectors + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + re <- rnorm(tp) + im <- runif(tp) + x1 <- complex(real = re, imaginary = im) + x1 <- c(NA_complex_, x1) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Complex: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # factors + for (tp in test_points) { + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- factor(rep(letters, length.out = tp), levels = sample(letters), ordered = TRUE) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Factors: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # Random objects + time <- vector("numeric", length = 8) + for (i in 1:8) { + # qs_use_alt_rep(sample(c(T, F), size = 1)) + obj_size <- 0 + x1 <- random_object_generator(12) + printCarriage(sprintf("Random objects: %s bytes", object.size(x1) %>% as.numeric)) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Random objects: %s s", signif(mean(time), 4))) + cat("\n") + + # nested attributes + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- as.list(1:26) + attr(x1[[26]], letters[26]) <- rnorm(100) + for (i in 25:1) { + attr(x1[[i]], letters[i]) <- x1[[i + 1]] + } + time[i] <- Sys.time() + for(j in 1:length(x1)) { + qsave_rand(x1[[j]], file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1[[j]])) + } + } + printCarriage(sprintf("Nested attributes: %s s", signif(mean(time), 4))) + cat("\n") + + # alt-rep -- should serialize the unpacked object + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- 1:max_size + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Alt rep integer: %s s", signif(mean(time), 4))) + cat("\n") + + + # Environment test + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- new.env() + x1[["a"]] <- 1:max_size + x1[["b"]] <- runif(max_size) + x1[["c"]] <- stringfish::random_strings(1e4, vector_mode = "normal") + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + stopifnot(attributes_identical(z[["a"]], x1[["a"]])) + stopifnot(attributes_identical(z[["b"]], x1[["b"]])) + stopifnot(attributes_identical(z[["c"]], x1[["c"]])) + time[i] <- Sys.time() - time[i] + do_gc() + } + printCarriage(sprintf("Environment test: %s s", signif(mean(time), 4))) + cat("\n") + + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- nested_tibble() + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + stopifnot(attributes_identical(z, x1)) + time[i] <- Sys.time() - time[i] + do_gc() + } + printCarriage(sprintf("nested tibble test: %s s", signif(mean(time), 4))) + cat("\n") + } Rep 1 of 2 strings: 0, 0.03374 s strings: 1, 0.01021 s strings: 2, 0.01755 s strings: 4, 0.002539 s strings: 8, 0.01437 s strings: 31, 0.004556 s strings: 33, 0.007298 s strings: 32, 0.01685 s strings: 255, 0.004105 s strings: 257, 0.001192 s strings: 256, 0.009898 s strings: 65535, 0.005106 s strings: 65537, 0.03797 s strings: 65536, 0.001991 s strings: 1e+06, 0.008437 s Character Vectors: 0, 0.001437 s Character Vectors: 1, 0.04018 s Character Vectors: 2, 0.008333 s Character Vectors: 4, 0.002292 s Character Vectors: 8, 0.0001578 s Character Vectors: 31, 0.001188 s Character Vectors: 33, 0.004526 s Character Vectors: 32, 0.005904 s Character Vectors: 255, 0.0002158 s Character Vectors: 257, 0.00374 s Character Vectors: 256, 0.0004427 s Character Vectors: 65535, 0.01718 s Character Vectors: 65537, 0.01492 s Character Vectors: 65536, 0.01253 s Stringfish: 0, 0.007571 s Stringfish: 1, 0.0007939 s Stringfish: 2, 0.002205 s Stringfish: 4, 0.004242 s Stringfish: 8, 0.003065 s Stringfish: 31, 0.01189 s Stringfish: 33, 0.002794 s Stringfish: 32, 0.002528 s Stringfish: 255, 0.003456 s Stringfish: 257, 0.002513 s Stringfish: 256, 0.008207 s Stringfish: 65535, 0.01687 s Stringfish: 65537, 0.04514 s Stringfish: 65536, 0.01555 s Integers: 0, 0.007944 s Integers: 1, 0.01388 s Integers: 2, 0.02272 s Integers: 4, 0.1904 s Integers: 8, 0.03307 s Integers: 31, 0.009177 s Integers: 33, 0.009349 s Integers: 32, 0.01074 s Integers: 255, 0.0119 s Integers: 257, 0.001193 s Integers: 256, 0.01593 s Integers: 65535, 0.0177 s Integers: 65537, 0.01481 s Integers: 65536, 0.03646 s Integers: 1e+06, 0.065 s Numeric: 0, 0.01576 s Numeric: 1, 0.01165 s Numeric: 2, 0.001491 s Numeric: 4, 0.01102 s Numeric: 8, 0.004946 s Numeric: 31, 0.007013 s Numeric: 33, 0.01249 s Numeric: 32, 0.01802 s Numeric: 255, 0.01854 s Numeric: 257, 0.006926 s Numeric: 256, 0.004951 s Numeric: 65535, 0.03113 s Numeric: 65537, 0.02837 s Numeric: 65536, 0.01944 s Numeric: 1e+06, 1.293 s Logical: 0, 0.03408 s Logical: 1, 0.01321 s Logical: 2, 0.02205 s Logical: 4, 0.01747 s Logical: 8, 0.03062 s Logical: 31, 0.005043 s Logical: 33, 0.02257 s Logical: 32, 0.003356 s Logical: 255, 0.04321 s Logical: 257, 0.00791 s Logical: 256, 0.008885 s Logical: 65535, 0.08702 s Logical: 65537, 0.05391 s Logical: 65536, 0.1798 s Logical: 1e+06, 0.08849 s List: 0, 0.009586 s List: 1, 0.02152 s List: 2, 0.03142 s List: 4, 0.008804 s List: 8, 0.01967 s List: 31, 0.01702 s List: 33, 0.009347 s List: 32, 0.01031 s List: 255, 0.02239 s List: 257, 0.00804 s List: 256, 0.009117 s List: 65535, 0.06329 s List: 65537, 0.08402 s List: 65536, 0.07609 s Data.frame test Error: attributes_serialize_identical(z, x1) is not TRUE Execution halted Flavor: r-devel-linux-x86_64-fedora-clang

Version: 0.27.3
Check: tests
Result: ERROR Running ‘correctness_testing.R’ [266s/339s] Running ‘qattributes_testing.R’ [55s/81s] Running ‘qsavemload_testing.R’ Running the tests in ‘tests/qattributes_testing.R’ failed. Complete output: > total_time <- Sys.time() > > suppressMessages(library(Rcpp)) > suppressMessages(library(dplyr)) > suppressMessages(library(data.table)) > suppressMessages(library(qs)) > suppressMessages(library(stringfish)) > options(warn = 1) > > do_gc <- function() { + if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) { + gc(full = TRUE) + } else { + gc() + } + } > > # because sourceCpp uses setwd, we need absolute path to R_TESTS when run within R CMD check > R_TESTS <- Sys.getenv("R_TESTS") # startup.Rs > if (nzchar(R_TESTS)) { + R_TESTS_absolute <- normalizePath(R_TESTS) + Sys.setenv(R_TESTS = R_TESTS_absolute) + } > sourceCpp(code="#include <Rcpp.h> + using namespace Rcpp; + // [[Rcpp::plugins(cpp11)]] + // [[Rcpp::export(rng=false)]] + CharacterVector splitstr(std::string x, std::vector<double> cuts){ + CharacterVector ret(cuts.size() - 1); + for(uint64_t i=1; i<cuts.size(); i++) { + ret[i-1] = x.substr(std::round(cuts[i-1])-1, std::round(cuts[i])-std::round(cuts[i-1])); + } + return ret; + } + // [[Rcpp::export(rng=false)]] + int setlev(SEXP x, int i) { + return SETLEVELS(x,i); + } + // [[Rcpp::export(rng=false)]] + void setobj(SEXP x, int i) { + return SET_OBJECT(x, i); + } + // [[Rcpp::export(rng=false)]] + List generateList(std::vector<int> list_elements){ + auto randchar = []() -> char + { + const char charset[] = + \"0123456789\" + \"ABCDEFGHIJKLMNOPQRSTUVWXYZ\" + \"abcdefghijklmnopqrstuvwxyz\"; + const size_t max_index = (sizeof(charset) - 1); + return charset[ rand() % max_index ]; + }; + List ret(list_elements.size()); + std::string str(10,0); + for(size_t i=0; i<list_elements.size(); i++) { + switch(list_elements[i]) { + case 1: + ret[i] = R_NilValue; + break; + case 2: + std::generate_n( str.begin(), 10, randchar ); + ret[i] = str; + break; + case 3: + ret[i] = rand(); + break; + case 4: + ret[i] = static_cast<double>(rand()); + break; + } + } + return ret; + }") > if (nzchar(R_TESTS)) Sys.setenv(R_TESTS = R_TESTS) > > args <- commandArgs(T) > if (nzchar(R_TESTS) || ((length(args) > 0) && args[1] == "check")) { # do fewer tests within R CMD check so it completes within a reasonable amount of time + reps <- 2 + test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6) + test_points_slow <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16) # for Character Vector, stringfish and list + max_size <- 1e6 + } else { + reps <- 3 + test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6, 1e7) + test_points_slow <- test_points + max_size <- 1e7 + } > myfile <- tempfile() > > obj_size <- 0 > get_obj_size <- function() { + get("obj_size", envir = globalenv()) + } > set_obj_size <- function(x) { + assign("obj_size", get_obj_size() + as.numeric(object.size(x)), envir = globalenv()) + return(get_obj_size()); + } > random_object_generator <- function(N, with_envs = FALSE) { # additional input: global obj_size, max_size + if (sample(3, 1) == 1) { + ret <- as.list(1:N) + } else if (sample(2, 1) == 1) { + ret <- as.pairlist(1:N) + } else { + ret <- as.pairlist(1:N) + setlev(ret, sample(2L^12L, 1L) - 1L) + setobj(ret, 1L) + } + + for (i in 1:N) { + if (get_obj_size() > get("max_size", envir = globalenv())) break; + otype <- sample(12, size = 1) + z <- NULL + is_attribute <- ifelse(i == 1, F, sample(c(F, T), size = 1)) + if (otype == 1) {z <- rnorm(1e4); set_obj_size(z);} + else if (otype == 2) { z <- sample(1e4) - 5e2; set_obj_size(z); } + else if (otype == 3) { z <- sample(c(T, F, NA), size = 1e4, replace = T); set_obj_size(z); } + else if (otype == 4) { z <- (sample(256, size = 1e4, replace = T) - 1) %>% as.raw; set_obj_size(z); } + else if (otype == 5) { z <- replicate(sample(1e4, size = 1), {rep(letters, length.out = sample(10, size = 1)) %>% paste(collapse = "")}); set_obj_size(z); } + else if (otype == 6) { z <- rep(letters, length.out = sample(1e4, size = 1)) %>% paste(collapse = ""); set_obj_size(z); } + else if (otype == 7) { z <- as.formula("y ~ a + b + c : d", env = globalenv()); attr(z, "blah") <- sample(1e4) - 5e2; set_obj_size(z); } + else if (with_envs && otype %in% c(8, 9)) { z <- function(x) {x + runif(1)} } + # else if(with_envs && otype %in% c(10,11)) { z <- new.env(); z$x <- random_object_generator(N, with_envs); makeActiveBinding("y", function() runif(1), z) } + else { z <- random_object_generator(N, with_envs) } + if (is_attribute) { + attr(ret[[i - 1]], runif(1) %>% as.character()) <- z + } else { + ret[[i]] <- z + } + } + return(ret) + } > > rand_strings <- function(n) { + s <- sample(0:100, size = n, replace = T) + x <- lapply(unique(s), function(si) { + stringfish::random_strings(sum(s == si), si, vector_mode = "normal") + }) %>% unlist %>% sample + x[sample(n, size = n/10)] <- NA + return(x) + } > > nested_tibble <- function() { + sub_tibble <- function(nr = 600, nc = 4) { + z <- lapply(1:nc, function(i) rand_strings(nr)) %>% + setNames(make.unique(paste0(sample(letters, nc), rand_strings(nc)))) %>% + bind_cols %>% + as_tibble + } + tibble( + col1 = rand_strings(100), + col2 = rand_strings(100), + col3 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)), + col4 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)), + col5 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)) + ) %>% setNames(make.unique(paste0(sample(letters, 5), rand_strings(5)))) + } > > printCarriage <- function(x) { + cat(x, "\r") + } > > attributes_serialize_identical <- function(attributes, full_object) { + identical(serialize(attributes(full_object), NULL), serialize(attributes, NULL)) + } > > attributes_identical <- function(attributes, full_object) { + identical(attributes, attributes(full_object)) + } > > ################################################################################################ > > qsave_rand <- function(x, file) { + alg <- sample(c("lz4", "zstd", "lz4hc", "zstd_stream", "uncompressed"), 1) + # alg <- "zstd_stream" + nt <- sample(5,1) + sc <- sample(0:15,1) + cl <- sample(10,1) + ch <- sample(c(T,F),1) + qsave(x, file = file, preset = "custom", algorithm = alg, + compress_level = cl, shuffle_control = sc, nthreads = nt, check_hash = ch) + } > > qattributes_rand <- function(file) { + # ar <- sample(c(T,F),1) + # don't use altrep to avoid serialization differences + # attributes_serialize_identical won't pass with ALTREP + ar <- FALSE + nt <- sample(5,1) + qattributes(file, use_alt_rep = ar, nthreads = nt, strict = T) + } > > ################################################################################################ > > for (q in 1:reps) { + cat("Rep", q, "of", reps, "\n") + # String correctness + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + x1 <- rep(letters, length.out = tp) %>% paste(collapse = "") + x1 <- c(NA, "", x1) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("strings: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # Character vectors + time <- vector("numeric", length = 3) + for (tp in test_points_slow) { + for (i in 1:3) { + # qs_use_alt_rep(F) + x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar + cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric + x1 <- splitstr(x1, cuts) + x1 <- c(NA, "", x1) + qsave_rand(x1, file = myfile) + time[i] <- Sys.time() + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Character Vectors: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # stringfish character vectors -- require R > 3.5.0 + if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) { + time <- vector("numeric", length = 3) + for (tp in test_points_slow) { + for (i in 1:3) { + x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar + cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric + x1 <- splitstr(x1, cuts) + x1 <- c(NA, "", x1) + x1 <- stringfish::convert_to_sf(x1) + qsave_rand(x1, file = myfile) + time[i] <- Sys.time() + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Stringfish: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + } + + # Integers + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + x1 <- sample(1:tp, replace = T) + x1 <- c(NA, x1) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Integers: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # Doubles + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + x1 <- rnorm(tp) + x1 <- c(NA, x1) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Numeric: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # Logical + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + + x1 <- sample(c(T, F, NA), replace = T, size = tp) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Logical: %s, %s s",tp, signif(mean(time),4))) + } + cat("\n") + + # List + time <- vector("numeric", length = 3) + for (tp in test_points_slow) { + for (i in 1:3) { + x1 <- generateList(sample(1:4, replace = T, size = tp)) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("List: %s, %s s",tp, signif(mean(time),4))) + } + cat("\n") + + for (i in 1:3) { + x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 ) + x1 <- data.frame(str = x1,num = runif(1:1000), stringsAsFactors = F) + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + do_gc() + stopifnot(attributes_identical(z, x1)) + } + cat("Data.frame test") + cat("\n") + + for (i in 1:3) { + x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 ) + x1 <- data.table(str = x1,num = runif(1:1e6)) + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + do_gc() + stopifnot(attributes_serialize_identical(z, x1)) + } + cat("Data.table test") + cat("\n") + + for (i in 1:3) { + x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 ) + x1 <- tibble(str = x1,num = runif(1:1e6)) + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + do_gc() + stopifnot(attributes_identical(z, x1)) + } + cat("Tibble test") + cat("\n") + + # Encoding test + if (Sys.info()[['sysname']] != "Windows") { + for (i in 1:3) { + x1 <- "己所不欲,勿施于人" # utf 8 + x2 <- x1 + Encoding(x2) <- "latin1" + x3 <- x1 + Encoding(x3) <- "bytes" + x4 <- rep(x1, x2, length.out = 1e4) %>% paste(collapse = ";") + x1 <- c(x1, x2, x3, x4) + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage("Encoding test") + } else { + printCarriage("(Encoding test not run on windows)") + } + cat("\n") + + # complex vectors + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + re <- rnorm(tp) + im <- runif(tp) + x1 <- complex(real = re, imaginary = im) + x1 <- c(NA_complex_, x1) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Complex: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # factors + for (tp in test_points) { + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- factor(rep(letters, length.out = tp), levels = sample(letters), ordered = TRUE) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Factors: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # Random objects + time <- vector("numeric", length = 8) + for (i in 1:8) { + # qs_use_alt_rep(sample(c(T, F), size = 1)) + obj_size <- 0 + x1 <- random_object_generator(12) + printCarriage(sprintf("Random objects: %s bytes", object.size(x1) %>% as.numeric)) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Random objects: %s s", signif(mean(time), 4))) + cat("\n") + + # nested attributes + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- as.list(1:26) + attr(x1[[26]], letters[26]) <- rnorm(100) + for (i in 25:1) { + attr(x1[[i]], letters[i]) <- x1[[i + 1]] + } + time[i] <- Sys.time() + for(j in 1:length(x1)) { + qsave_rand(x1[[j]], file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1[[j]])) + } + } + printCarriage(sprintf("Nested attributes: %s s", signif(mean(time), 4))) + cat("\n") + + # alt-rep -- should serialize the unpacked object + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- 1:max_size + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Alt rep integer: %s s", signif(mean(time), 4))) + cat("\n") + + + # Environment test + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- new.env() + x1[["a"]] <- 1:max_size + x1[["b"]] <- runif(max_size) + x1[["c"]] <- stringfish::random_strings(1e4, vector_mode = "normal") + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + stopifnot(attributes_identical(z[["a"]], x1[["a"]])) + stopifnot(attributes_identical(z[["b"]], x1[["b"]])) + stopifnot(attributes_identical(z[["c"]], x1[["c"]])) + time[i] <- Sys.time() - time[i] + do_gc() + } + printCarriage(sprintf("Environment test: %s s", signif(mean(time), 4))) + cat("\n") + + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- nested_tibble() + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + stopifnot(attributes_identical(z, x1)) + time[i] <- Sys.time() - time[i] + do_gc() + } + printCarriage(sprintf("nested tibble test: %s s", signif(mean(time), 4))) + cat("\n") + } Rep 1 of 2 strings: 0, 0.01625 s strings: 1, 0.008384 s strings: 2, 0.005765 s strings: 4, 0.004268 s strings: 8, 0.00215 s strings: 31, 0.01354 s strings: 33, 0.04644 s strings: 32, 0.01836 s strings: 255, 0.03722 s strings: 257, 0.006896 s strings: 256, 0.004255 s strings: 65535, 0.005347 s strings: 65537, 0.02391 s strings: 65536, 0.02305 s strings: 1e+06, 0.01001 s Character Vectors: 0, 0.0002426 s Character Vectors: 1, 0.0002229 s Character Vectors: 2, 0.001755 s Character Vectors: 4, 0.002418 s Character Vectors: 8, 0.0004577 s Character Vectors: 31, 0.003469 s Character Vectors: 33, 0.00275 s Character Vectors: 32, 0.003638 s Character Vectors: 255, 0.0002109 s Character Vectors: 257, 0.004082 s Character Vectors: 256, 0.002112 s Character Vectors: 65535, 0.01292 s Character Vectors: 65537, 0.01015 s Character Vectors: 65536, 0.01066 s Stringfish: 0, 0.003009 s Stringfish: 1, 0.0001962 s Stringfish: 2, 0.002359 s Stringfish: 4, 0.00149 s Stringfish: 8, 0.002509 s Stringfish: 31, 0.003465 s Stringfish: 33, 0.0002818 s Stringfish: 32, 0.001496 s Stringfish: 255, 0.005925 s Stringfish: 257, 0.0004855 s Stringfish: 256, 0.003994 s Stringfish: 65535, 0.009526 s Stringfish: 65537, 0.006693 s Stringfish: 65536, 0.008884 s Integers: 0, 0.03565 s Integers: 1, 0.01148 s Integers: 2, 0.006643 s Integers: 4, 0.007366 s Integers: 8, 0.007081 s Integers: 31, 0.003193 s Integers: 33, 0.006194 s Integers: 32, 0.00324 s Integers: 255, 0.006704 s Integers: 257, 0.002273 s Integers: 256, 0.007023 s Integers: 65535, 0.0329 s Integers: 65537, 0.0101 s Integers: 65536, 0.01208 s Integers: 1e+06, 0.0947 s Numeric: 0, 0.0122 s Numeric: 1, 0.005965 s Numeric: 2, 0.009375 s Numeric: 4, 0.007404 s Numeric: 8, 0.01099 s Numeric: 31, 0.008941 s Numeric: 33, 0.002302 s Numeric: 32, 0.006604 s Numeric: 255, 0.007849 s Numeric: 257, 0.029 s Numeric: 256, 0.004494 s Numeric: 65535, 0.03153 s Numeric: 65537, 0.05846 s Numeric: 65536, 0.03066 s Numeric: 1e+06, 0.3513 s Logical: 0, 0.009847 s Logical: 1, 0.006932 s Logical: 2, 0.0005867 s Logical: 4, 0.002073 s Logical: 8, 0.022 s Logical: 31, 0.007829 s Logical: 33, 0.001772 s Logical: 32, 0.005439 s Logical: 255, 0.006536 s Logical: 257, 0.006328 s Logical: 256, 0.007436 s Logical: 65535, 0.03718 s Logical: 65537, 0.03998 s Logical: 65536, 0.08235 s Logical: 1e+06, 0.1906 s List: 0, 0.002882 s List: 1, 0.000797 s List: 2, 0.003479 s List: 4, 0.001575 s List: 8, 0.005349 s List: 31, 0.004354 s List: 33, 0.005066 s List: 32, 0.003362 s List: 255, 0.005554 s List: 257, 0.03385 s List: 256, 0.004453 s List: 65535, 0.05462 s List: 65537, 0.02323 s List: 65536, 0.06778 s Data.frame test Error: attributes_serialize_identical(z, x1) is not TRUE Execution halted Flavor: r-devel-linux-x86_64-fedora-gcc

Version: 0.27.3
Check: compiled code
Result: WARN File 'qs/libs/x64/qs.dll': Found non-API calls to R: 'ATTRIB', 'CLOENV', 'ENCLOS', 'FRAME', 'HASHTAB', 'IS_S4_OBJECT', 'LEVELS', 'OBJECT', 'PRENV', 'Rf_allocSExp', 'SETLEVELS', 'SET_ATTRIB', 'SET_CLOENV', 'SET_ENCLOS', 'SET_FRAME', 'SET_HASHTAB', 'SET_OBJECT', 'SET_PRENV', 'SET_S4_OBJECT', 'SET_TRUELENGTH' These entry points may be removed soon: 'SET_FRAME', 'SET_HASHTAB', 'SET_ENCLOS', 'SET_S4_OBJECT', 'FRAME', 'HASHTAB', 'IS_S4_OBJECT', 'CLOENV', 'ENCLOS', 'OBJECT', 'SET_CLOENV', 'LEVELS', 'SETLEVELS' Compiled code should not call non-API entry points in R. See 'Writing portable packages' in the 'Writing R Extensions' manual, and section 'Moving into C API compliance' for issues with the use of non-API entry points. Flavor: r-devel-windows-x86_64

Version: 0.27.3
Check: tests
Result: ERROR Running 'correctness_testing.R' [170s] Running 'qattributes_testing.R' [34s] Running 'qsavemload_testing.R' [2s] Running the tests in 'tests/qattributes_testing.R' failed. Complete output: > total_time <- Sys.time() > > suppressMessages(library(Rcpp)) > suppressMessages(library(dplyr)) > suppressMessages(library(data.table)) > suppressMessages(library(qs)) > suppressMessages(library(stringfish)) > options(warn = 1) > > do_gc <- function() { + if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) { + gc(full = TRUE) + } else { + gc() + } + } > > # because sourceCpp uses setwd, we need absolute path to R_TESTS when run within R CMD check > R_TESTS <- Sys.getenv("R_TESTS") # startup.Rs > if (nzchar(R_TESTS)) { + R_TESTS_absolute <- normalizePath(R_TESTS) + Sys.setenv(R_TESTS = R_TESTS_absolute) + } > sourceCpp(code="#include <Rcpp.h> + using namespace Rcpp; + // [[Rcpp::plugins(cpp11)]] + // [[Rcpp::export(rng=false)]] + CharacterVector splitstr(std::string x, std::vector<double> cuts){ + CharacterVector ret(cuts.size() - 1); + for(uint64_t i=1; i<cuts.size(); i++) { + ret[i-1] = x.substr(std::round(cuts[i-1])-1, std::round(cuts[i])-std::round(cuts[i-1])); + } + return ret; + } + // [[Rcpp::export(rng=false)]] + int setlev(SEXP x, int i) { + return SETLEVELS(x,i); + } + // [[Rcpp::export(rng=false)]] + void setobj(SEXP x, int i) { + return SET_OBJECT(x, i); + } + // [[Rcpp::export(rng=false)]] + List generateList(std::vector<int> list_elements){ + auto randchar = []() -> char + { + const char charset[] = + \"0123456789\" + \"ABCDEFGHIJKLMNOPQRSTUVWXYZ\" + \"abcdefghijklmnopqrstuvwxyz\"; + const size_t max_index = (sizeof(charset) - 1); + return charset[ rand() % max_index ]; + }; + List ret(list_elements.size()); + std::string str(10,0); + for(size_t i=0; i<list_elements.size(); i++) { + switch(list_elements[i]) { + case 1: + ret[i] = R_NilValue; + break; + case 2: + std::generate_n( str.begin(), 10, randchar ); + ret[i] = str; + break; + case 3: + ret[i] = rand(); + break; + case 4: + ret[i] = static_cast<double>(rand()); + break; + } + } + return ret; + }") > if (nzchar(R_TESTS)) Sys.setenv(R_TESTS = R_TESTS) > > args <- commandArgs(T) > if (nzchar(R_TESTS) || ((length(args) > 0) && args[1] == "check")) { # do fewer tests within R CMD check so it completes within a reasonable amount of time + reps <- 2 + test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6) + test_points_slow <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16) # for Character Vector, stringfish and list + max_size <- 1e6 + } else { + reps <- 3 + test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6, 1e7) + test_points_slow <- test_points + max_size <- 1e7 + } > myfile <- tempfile() > > obj_size <- 0 > get_obj_size <- function() { + get("obj_size", envir = globalenv()) + } > set_obj_size <- function(x) { + assign("obj_size", get_obj_size() + as.numeric(object.size(x)), envir = globalenv()) + return(get_obj_size()); + } > random_object_generator <- function(N, with_envs = FALSE) { # additional input: global obj_size, max_size + if (sample(3, 1) == 1) { + ret <- as.list(1:N) + } else if (sample(2, 1) == 1) { + ret <- as.pairlist(1:N) + } else { + ret <- as.pairlist(1:N) + setlev(ret, sample(2L^12L, 1L) - 1L) + setobj(ret, 1L) + } + + for (i in 1:N) { + if (get_obj_size() > get("max_size", envir = globalenv())) break; + otype <- sample(12, size = 1) + z <- NULL + is_attribute <- ifelse(i == 1, F, sample(c(F, T), size = 1)) + if (otype == 1) {z <- rnorm(1e4); set_obj_size(z);} + else if (otype == 2) { z <- sample(1e4) - 5e2; set_obj_size(z); } + else if (otype == 3) { z <- sample(c(T, F, NA), size = 1e4, replace = T); set_obj_size(z); } + else if (otype == 4) { z <- (sample(256, size = 1e4, replace = T) - 1) %>% as.raw; set_obj_size(z); } + else if (otype == 5) { z <- replicate(sample(1e4, size = 1), {rep(letters, length.out = sample(10, size = 1)) %>% paste(collapse = "")}); set_obj_size(z); } + else if (otype == 6) { z <- rep(letters, length.out = sample(1e4, size = 1)) %>% paste(collapse = ""); set_obj_size(z); } + else if (otype == 7) { z <- as.formula("y ~ a + b + c : d", env = globalenv()); attr(z, "blah") <- sample(1e4) - 5e2; set_obj_size(z); } + else if (with_envs && otype %in% c(8, 9)) { z <- function(x) {x + runif(1)} } + # else if(with_envs && otype %in% c(10,11)) { z <- new.env(); z$x <- random_object_generator(N, with_envs); makeActiveBinding("y", function() runif(1), z) } + else { z <- random_object_generator(N, with_envs) } + if (is_attribute) { + attr(ret[[i - 1]], runif(1) %>% as.character()) <- z + } else { + ret[[i]] <- z + } + } + return(ret) + } > > rand_strings <- function(n) { + s <- sample(0:100, size = n, replace = T) + x <- lapply(unique(s), function(si) { + stringfish::random_strings(sum(s == si), si, vector_mode = "normal") + }) %>% unlist %>% sample + x[sample(n, size = n/10)] <- NA + return(x) + } > > nested_tibble <- function() { + sub_tibble <- function(nr = 600, nc = 4) { + z <- lapply(1:nc, function(i) rand_strings(nr)) %>% + setNames(make.unique(paste0(sample(letters, nc), rand_strings(nc)))) %>% + bind_cols %>% + as_tibble + } + tibble( + col1 = rand_strings(100), + col2 = rand_strings(100), + col3 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)), + col4 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)), + col5 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)) + ) %>% setNames(make.unique(paste0(sample(letters, 5), rand_strings(5)))) + } > > printCarriage <- function(x) { + cat(x, "\r") + } > > attributes_serialize_identical <- function(attributes, full_object) { + identical(serialize(attributes(full_object), NULL), serialize(attributes, NULL)) + } > > attributes_identical <- function(attributes, full_object) { + identical(attributes, attributes(full_object)) + } > > ################################################################################################ > > qsave_rand <- function(x, file) { + alg <- sample(c("lz4", "zstd", "lz4hc", "zstd_stream", "uncompressed"), 1) + # alg <- "zstd_stream" + nt <- sample(5,1) + sc <- sample(0:15,1) + cl <- sample(10,1) + ch <- sample(c(T,F),1) + qsave(x, file = file, preset = "custom", algorithm = alg, + compress_level = cl, shuffle_control = sc, nthreads = nt, check_hash = ch) + } > > qattributes_rand <- function(file) { + # ar <- sample(c(T,F),1) + # don't use altrep to avoid serialization differences + # attributes_serialize_identical won't pass with ALTREP + ar <- FALSE + nt <- sample(5,1) + qattributes(file, use_alt_rep = ar, nthreads = nt, strict = T) + } > > ################################################################################################ > > for (q in 1:reps) { + cat("Rep", q, "of", reps, "\n") + # String correctness + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + x1 <- rep(letters, length.out = tp) %>% paste(collapse = "") + x1 <- c(NA, "", x1) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("strings: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # Character vectors + time <- vector("numeric", length = 3) + for (tp in test_points_slow) { + for (i in 1:3) { + # qs_use_alt_rep(F) + x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar + cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric + x1 <- splitstr(x1, cuts) + x1 <- c(NA, "", x1) + qsave_rand(x1, file = myfile) + time[i] <- Sys.time() + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Character Vectors: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # stringfish character vectors -- require R > 3.5.0 + if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) { + time <- vector("numeric", length = 3) + for (tp in test_points_slow) { + for (i in 1:3) { + x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar + cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric + x1 <- splitstr(x1, cuts) + x1 <- c(NA, "", x1) + x1 <- stringfish::convert_to_sf(x1) + qsave_rand(x1, file = myfile) + time[i] <- Sys.time() + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Stringfish: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + } + + # Integers + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + x1 <- sample(1:tp, replace = T) + x1 <- c(NA, x1) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Integers: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # Doubles + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + x1 <- rnorm(tp) + x1 <- c(NA, x1) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Numeric: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # Logical + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + + x1 <- sample(c(T, F, NA), replace = T, size = tp) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Logical: %s, %s s",tp, signif(mean(time),4))) + } + cat("\n") + + # List + time <- vector("numeric", length = 3) + for (tp in test_points_slow) { + for (i in 1:3) { + x1 <- generateList(sample(1:4, replace = T, size = tp)) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("List: %s, %s s",tp, signif(mean(time),4))) + } + cat("\n") + + for (i in 1:3) { + x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 ) + x1 <- data.frame(str = x1,num = runif(1:1000), stringsAsFactors = F) + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + do_gc() + stopifnot(attributes_identical(z, x1)) + } + cat("Data.frame test") + cat("\n") + + for (i in 1:3) { + x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 ) + x1 <- data.table(str = x1,num = runif(1:1e6)) + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + do_gc() + stopifnot(attributes_serialize_identical(z, x1)) + } + cat("Data.table test") + cat("\n") + + for (i in 1:3) { + x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 ) + x1 <- tibble(str = x1,num = runif(1:1e6)) + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + do_gc() + stopifnot(attributes_identical(z, x1)) + } + cat("Tibble test") + cat("\n") + + # Encoding test + if (Sys.info()[['sysname']] != "Windows") { + for (i in 1:3) { + x1 <- "己所不欲,勿施于人" # utf 8 + x2 <- x1 + Encoding(x2) <- "latin1" + x3 <- x1 + Encoding(x3) <- "bytes" + x4 <- rep(x1, x2, length.out = 1e4) %>% paste(collapse = ";") + x1 <- c(x1, x2, x3, x4) + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage("Encoding test") + } else { + printCarriage("(Encoding test not run on windows)") + } + cat("\n") + + # complex vectors + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + re <- rnorm(tp) + im <- runif(tp) + x1 <- complex(real = re, imaginary = im) + x1 <- c(NA_complex_, x1) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Complex: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # factors + for (tp in test_points) { + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- factor(rep(letters, length.out = tp), levels = sample(letters), ordered = TRUE) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Factors: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # Random objects + time <- vector("numeric", length = 8) + for (i in 1:8) { + # qs_use_alt_rep(sample(c(T, F), size = 1)) + obj_size <- 0 + x1 <- random_object_generator(12) + printCarriage(sprintf("Random objects: %s bytes", object.size(x1) %>% as.numeric)) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Random objects: %s s", signif(mean(time), 4))) + cat("\n") + + # nested attributes + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- as.list(1:26) + attr(x1[[26]], letters[26]) <- rnorm(100) + for (i in 25:1) { + attr(x1[[i]], letters[i]) <- x1[[i + 1]] + } + time[i] <- Sys.time() + for(j in 1:length(x1)) { + qsave_rand(x1[[j]], file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1[[j]])) + } + } + printCarriage(sprintf("Nested attributes: %s s", signif(mean(time), 4))) + cat("\n") + + # alt-rep -- should serialize the unpacked object + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- 1:max_size + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Alt rep integer: %s s", signif(mean(time), 4))) + cat("\n") + + + # Environment test + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- new.env() + x1[["a"]] <- 1:max_size + x1[["b"]] <- runif(max_size) + x1[["c"]] <- stringfish::random_strings(1e4, vector_mode = "normal") + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + stopifnot(attributes_identical(z[["a"]], x1[["a"]])) + stopifnot(attributes_identical(z[["b"]], x1[["b"]])) + stopifnot(attributes_identical(z[["c"]], x1[["c"]])) + time[i] <- Sys.time() - time[i] + do_gc() + } + printCarriage(sprintf("Environment test: %s s", signif(mean(time), 4))) + cat("\n") + + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- nested_tibble() + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + stopifnot(attributes_identical(z, x1)) + time[i] <- Sys.time() - time[i] + do_gc() + } + printCarriage(sprintf("nested tibble test: %s s", signif(mean(time), 4))) + cat("\n") + } Rep 1 of 2 strings: 0, 0.008997 s strings: 1, 0.003789 s strings: 2, 0.003569 s strings: 4, 0.004111 s strings: 8, 0.001921 s strings: 31, 0.001714 s strings: 33, 0.007817 s strings: 32, 0.006311 s strings: 255, 0.003146 s strings: 257, 0.002332 s strings: 256, 0.002429 s strings: 65535, 0.003937 s strings: 65537, 0.002857 s strings: 65536, 0.00364 s strings: 1e+06, 0.003728 s Character Vectors: 0, 0.0004983 s Character Vectors: 1, 0.000917 s Character Vectors: 2, 0.0003113 s Character Vectors: 4, 0.0003307 s Character Vectors: 8, 0.0012 s Character Vectors: 31, 0.001789 s Character Vectors: 33, 0.0001789 s Character Vectors: 32, 0.0004029 s Character Vectors: 255, 0.0002546 s Character Vectors: 257, 0.000315 s Character Vectors: 256, 0.001566 s Character Vectors: 65535, 0.00359 s Character Vectors: 65537, 0.002461 s Character Vectors: 65536, 0.004192 s Stringfish: 0, 0.002558 s Stringfish: 1, 0.0007869 s Stringfish: 2, 0.0004977 s Stringfish: 4, 0.001169 s Stringfish: 8, 0.0004266 s Stringfish: 31, 0.000306 s Stringfish: 33, 0.0002334 s Stringfish: 32, 0.000914 s Stringfish: 255, 0.001232 s Stringfish: 257, 0.0003196 s Stringfish: 256, 0.001071 s Stringfish: 65535, 0.003051 s Stringfish: 65537, 0.004424 s Stringfish: 65536, 0.003686 s Integers: 0, 0.005866 s Integers: 1, 0.005257 s Integers: 2, 0.01274 s Integers: 4, 0.002099 s Integers: 8, 0.001986 s Integers: 31, 0.001481 s Integers: 33, 0.00121 s Integers: 32, 0.001927 s Integers: 255, 0.002432 s Integers: 257, 0.002957 s Integers: 256, 0.001551 s Integers: 65535, 0.009364 s Integers: 65537, 0.008974 s Integers: 65536, 0.00961 s Integers: 1e+06, 0.01312 s Numeric: 0, 0.004293 s Numeric: 1, 0.003859 s Numeric: 2, 0.01039 s Numeric: 4, 0.003402 s Numeric: 8, 0.002165 s Numeric: 31, 0.002926 s Numeric: 33, 0.006702 s Numeric: 32, 0.002032 s Numeric: 255, 0.002284 s Numeric: 257, 0.002464 s Numeric: 256, 0.00466 s Numeric: 65535, 0.008719 s Numeric: 65537, 0.01645 s Numeric: 65536, 0.01112 s Numeric: 1e+06, 0.02205 s Logical: 0, 0.002248 s Logical: 1, 0.0023 s Logical: 2, 0.001358 s Logical: 4, 0.001029 s Logical: 8, 0.003431 s Logical: 31, 0.001796 s Logical: 33, 0.0007277 s Logical: 32, 0.002247 s Logical: 255, 0.002279 s Logical: 257, 0.001732 s Logical: 256, 0.001398 s Logical: 65535, 0.001826 s Logical: 65537, 0.002931 s Logical: 65536, 0.002498 s Logical: 1e+06, 0.07047 s List: 0, 0.004243 s List: 1, 0.002478 s List: 2, 0.0017 s List: 4, 0.002943 s List: 8, 0.002262 s List: 31, 0.0007854 s List: 33, 0.005188 s List: 32, 0.002345 s List: 255, 0.001812 s List: 257, 0.001297 s List: 256, 0.001919 s List: 65535, 0.02075 s List: 65537, 0.03808 s List: 65536, 0.02565 s Data.frame test Error: attributes_serialize_identical(z, x1) is not TRUE Execution halted Flavor: r-devel-windows-x86_64

Version: 0.27.3
Check: compiled code
Result: NOTE File ‘qs/libs/qs.so’: Found non-API calls to R: ‘CLOENV’, ‘ENCLOS’, ‘FRAME’, ‘HASHTAB’, ‘IS_S4_OBJECT’, ‘LEVELS’, ‘OBJECT’, ‘PRENV’, ‘Rf_allocSExp’, ‘SETLEVELS’, ‘SET_CLOENV’, ‘SET_ENCLOS’, ‘SET_FRAME’, ‘SET_HASHTAB’, ‘SET_PRENV’, ‘SET_S4_OBJECT’, ‘SET_TRUELENGTH’ Compiled code should not call non-API entry points in R. See ‘Writing portable packages’ in the ‘Writing R Extensions’ manual, and section ‘Moving into C API compliance’ for issues with the use of non-API entry points. Flavors: r-patched-linux-x86_64, r-release-linux-x86_64, r-release-macos-arm64, r-release-macos-x86_64

Version: 0.27.3
Check: tests
Result: ERROR Running ‘correctness_testing.R’ [187s/204s] Running ‘qattributes_testing.R’ [39s/47s] Running ‘qsavemload_testing.R’ [2s/2s] Running the tests in ‘tests/qattributes_testing.R’ failed. Complete output: > total_time <- Sys.time() > > suppressMessages(library(Rcpp)) > suppressMessages(library(dplyr)) > suppressMessages(library(data.table)) > suppressMessages(library(qs)) > suppressMessages(library(stringfish)) > options(warn = 1) > > do_gc <- function() { + if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) { + gc(full = TRUE) + } else { + gc() + } + } > > # because sourceCpp uses setwd, we need absolute path to R_TESTS when run within R CMD check > R_TESTS <- Sys.getenv("R_TESTS") # startup.Rs > if (nzchar(R_TESTS)) { + R_TESTS_absolute <- normalizePath(R_TESTS) + Sys.setenv(R_TESTS = R_TESTS_absolute) + } > sourceCpp(code="#include <Rcpp.h> + using namespace Rcpp; + // [[Rcpp::plugins(cpp11)]] + // [[Rcpp::export(rng=false)]] + CharacterVector splitstr(std::string x, std::vector<double> cuts){ + CharacterVector ret(cuts.size() - 1); + for(uint64_t i=1; i<cuts.size(); i++) { + ret[i-1] = x.substr(std::round(cuts[i-1])-1, std::round(cuts[i])-std::round(cuts[i-1])); + } + return ret; + } + // [[Rcpp::export(rng=false)]] + int setlev(SEXP x, int i) { + return SETLEVELS(x,i); + } + // [[Rcpp::export(rng=false)]] + void setobj(SEXP x, int i) { + return SET_OBJECT(x, i); + } + // [[Rcpp::export(rng=false)]] + List generateList(std::vector<int> list_elements){ + auto randchar = []() -> char + { + const char charset[] = + \"0123456789\" + \"ABCDEFGHIJKLMNOPQRSTUVWXYZ\" + \"abcdefghijklmnopqrstuvwxyz\"; + const size_t max_index = (sizeof(charset) - 1); + return charset[ rand() % max_index ]; + }; + List ret(list_elements.size()); + std::string str(10,0); + for(size_t i=0; i<list_elements.size(); i++) { + switch(list_elements[i]) { + case 1: + ret[i] = R_NilValue; + break; + case 2: + std::generate_n( str.begin(), 10, randchar ); + ret[i] = str; + break; + case 3: + ret[i] = rand(); + break; + case 4: + ret[i] = static_cast<double>(rand()); + break; + } + } + return ret; + }") > if (nzchar(R_TESTS)) Sys.setenv(R_TESTS = R_TESTS) > > args <- commandArgs(T) > if (nzchar(R_TESTS) || ((length(args) > 0) && args[1] == "check")) { # do fewer tests within R CMD check so it completes within a reasonable amount of time + reps <- 2 + test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6) + test_points_slow <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16) # for Character Vector, stringfish and list + max_size <- 1e6 + } else { + reps <- 3 + test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6, 1e7) + test_points_slow <- test_points + max_size <- 1e7 + } > myfile <- tempfile() > > obj_size <- 0 > get_obj_size <- function() { + get("obj_size", envir = globalenv()) + } > set_obj_size <- function(x) { + assign("obj_size", get_obj_size() + as.numeric(object.size(x)), envir = globalenv()) + return(get_obj_size()); + } > random_object_generator <- function(N, with_envs = FALSE) { # additional input: global obj_size, max_size + if (sample(3, 1) == 1) { + ret <- as.list(1:N) + } else if (sample(2, 1) == 1) { + ret <- as.pairlist(1:N) + } else { + ret <- as.pairlist(1:N) + setlev(ret, sample(2L^12L, 1L) - 1L) + setobj(ret, 1L) + } + + for (i in 1:N) { + if (get_obj_size() > get("max_size", envir = globalenv())) break; + otype <- sample(12, size = 1) + z <- NULL + is_attribute <- ifelse(i == 1, F, sample(c(F, T), size = 1)) + if (otype == 1) {z <- rnorm(1e4); set_obj_size(z);} + else if (otype == 2) { z <- sample(1e4) - 5e2; set_obj_size(z); } + else if (otype == 3) { z <- sample(c(T, F, NA), size = 1e4, replace = T); set_obj_size(z); } + else if (otype == 4) { z <- (sample(256, size = 1e4, replace = T) - 1) %>% as.raw; set_obj_size(z); } + else if (otype == 5) { z <- replicate(sample(1e4, size = 1), {rep(letters, length.out = sample(10, size = 1)) %>% paste(collapse = "")}); set_obj_size(z); } + else if (otype == 6) { z <- rep(letters, length.out = sample(1e4, size = 1)) %>% paste(collapse = ""); set_obj_size(z); } + else if (otype == 7) { z <- as.formula("y ~ a + b + c : d", env = globalenv()); attr(z, "blah") <- sample(1e4) - 5e2; set_obj_size(z); } + else if (with_envs && otype %in% c(8, 9)) { z <- function(x) {x + runif(1)} } + # else if(with_envs && otype %in% c(10,11)) { z <- new.env(); z$x <- random_object_generator(N, with_envs); makeActiveBinding("y", function() runif(1), z) } + else { z <- random_object_generator(N, with_envs) } + if (is_attribute) { + attr(ret[[i - 1]], runif(1) %>% as.character()) <- z + } else { + ret[[i]] <- z + } + } + return(ret) + } > > rand_strings <- function(n) { + s <- sample(0:100, size = n, replace = T) + x <- lapply(unique(s), function(si) { + stringfish::random_strings(sum(s == si), si, vector_mode = "normal") + }) %>% unlist %>% sample + x[sample(n, size = n/10)] <- NA + return(x) + } > > nested_tibble <- function() { + sub_tibble <- function(nr = 600, nc = 4) { + z <- lapply(1:nc, function(i) rand_strings(nr)) %>% + setNames(make.unique(paste0(sample(letters, nc), rand_strings(nc)))) %>% + bind_cols %>% + as_tibble + } + tibble( + col1 = rand_strings(100), + col2 = rand_strings(100), + col3 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)), + col4 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)), + col5 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)) + ) %>% setNames(make.unique(paste0(sample(letters, 5), rand_strings(5)))) + } > > printCarriage <- function(x) { + cat(x, "\r") + } > > attributes_serialize_identical <- function(attributes, full_object) { + identical(serialize(attributes(full_object), NULL), serialize(attributes, NULL)) + } > > attributes_identical <- function(attributes, full_object) { + identical(attributes, attributes(full_object)) + } > > ################################################################################################ > > qsave_rand <- function(x, file) { + alg <- sample(c("lz4", "zstd", "lz4hc", "zstd_stream", "uncompressed"), 1) + # alg <- "zstd_stream" + nt <- sample(5,1) + sc <- sample(0:15,1) + cl <- sample(10,1) + ch <- sample(c(T,F),1) + qsave(x, file = file, preset = "custom", algorithm = alg, + compress_level = cl, shuffle_control = sc, nthreads = nt, check_hash = ch) + } > > qattributes_rand <- function(file) { + # ar <- sample(c(T,F),1) + # don't use altrep to avoid serialization differences + # attributes_serialize_identical won't pass with ALTREP + ar <- FALSE + nt <- sample(5,1) + qattributes(file, use_alt_rep = ar, nthreads = nt, strict = T) + } > > ################################################################################################ > > for (q in 1:reps) { + cat("Rep", q, "of", reps, "\n") + # String correctness + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + x1 <- rep(letters, length.out = tp) %>% paste(collapse = "") + x1 <- c(NA, "", x1) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("strings: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # Character vectors + time <- vector("numeric", length = 3) + for (tp in test_points_slow) { + for (i in 1:3) { + # qs_use_alt_rep(F) + x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar + cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric + x1 <- splitstr(x1, cuts) + x1 <- c(NA, "", x1) + qsave_rand(x1, file = myfile) + time[i] <- Sys.time() + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Character Vectors: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # stringfish character vectors -- require R > 3.5.0 + if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) { + time <- vector("numeric", length = 3) + for (tp in test_points_slow) { + for (i in 1:3) { + x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar + cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric + x1 <- splitstr(x1, cuts) + x1 <- c(NA, "", x1) + x1 <- stringfish::convert_to_sf(x1) + qsave_rand(x1, file = myfile) + time[i] <- Sys.time() + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Stringfish: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + } + + # Integers + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + x1 <- sample(1:tp, replace = T) + x1 <- c(NA, x1) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Integers: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # Doubles + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + x1 <- rnorm(tp) + x1 <- c(NA, x1) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Numeric: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # Logical + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + + x1 <- sample(c(T, F, NA), replace = T, size = tp) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Logical: %s, %s s",tp, signif(mean(time),4))) + } + cat("\n") + + # List + time <- vector("numeric", length = 3) + for (tp in test_points_slow) { + for (i in 1:3) { + x1 <- generateList(sample(1:4, replace = T, size = tp)) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("List: %s, %s s",tp, signif(mean(time),4))) + } + cat("\n") + + for (i in 1:3) { + x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 ) + x1 <- data.frame(str = x1,num = runif(1:1000), stringsAsFactors = F) + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + do_gc() + stopifnot(attributes_identical(z, x1)) + } + cat("Data.frame test") + cat("\n") + + for (i in 1:3) { + x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 ) + x1 <- data.table(str = x1,num = runif(1:1e6)) + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + do_gc() + stopifnot(attributes_serialize_identical(z, x1)) + } + cat("Data.table test") + cat("\n") + + for (i in 1:3) { + x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 ) + x1 <- tibble(str = x1,num = runif(1:1e6)) + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + do_gc() + stopifnot(attributes_identical(z, x1)) + } + cat("Tibble test") + cat("\n") + + # Encoding test + if (Sys.info()[['sysname']] != "Windows") { + for (i in 1:3) { + x1 <- "己所不欲,勿施于人" # utf 8 + x2 <- x1 + Encoding(x2) <- "latin1" + x3 <- x1 + Encoding(x3) <- "bytes" + x4 <- rep(x1, x2, length.out = 1e4) %>% paste(collapse = ";") + x1 <- c(x1, x2, x3, x4) + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage("Encoding test") + } else { + printCarriage("(Encoding test not run on windows)") + } + cat("\n") + + # complex vectors + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + re <- rnorm(tp) + im <- runif(tp) + x1 <- complex(real = re, imaginary = im) + x1 <- c(NA_complex_, x1) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Complex: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # factors + for (tp in test_points) { + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- factor(rep(letters, length.out = tp), levels = sample(letters), ordered = TRUE) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Factors: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # Random objects + time <- vector("numeric", length = 8) + for (i in 1:8) { + # qs_use_alt_rep(sample(c(T, F), size = 1)) + obj_size <- 0 + x1 <- random_object_generator(12) + printCarriage(sprintf("Random objects: %s bytes", object.size(x1) %>% as.numeric)) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Random objects: %s s", signif(mean(time), 4))) + cat("\n") + + # nested attributes + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- as.list(1:26) + attr(x1[[26]], letters[26]) <- rnorm(100) + for (i in 25:1) { + attr(x1[[i]], letters[i]) <- x1[[i + 1]] + } + time[i] <- Sys.time() + for(j in 1:length(x1)) { + qsave_rand(x1[[j]], file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1[[j]])) + } + } + printCarriage(sprintf("Nested attributes: %s s", signif(mean(time), 4))) + cat("\n") + + # alt-rep -- should serialize the unpacked object + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- 1:max_size + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Alt rep integer: %s s", signif(mean(time), 4))) + cat("\n") + + + # Environment test + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- new.env() + x1[["a"]] <- 1:max_size + x1[["b"]] <- runif(max_size) + x1[["c"]] <- stringfish::random_strings(1e4, vector_mode = "normal") + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + stopifnot(attributes_identical(z[["a"]], x1[["a"]])) + stopifnot(attributes_identical(z[["b"]], x1[["b"]])) + stopifnot(attributes_identical(z[["c"]], x1[["c"]])) + time[i] <- Sys.time() - time[i] + do_gc() + } + printCarriage(sprintf("Environment test: %s s", signif(mean(time), 4))) + cat("\n") + + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- nested_tibble() + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + stopifnot(attributes_identical(z, x1)) + time[i] <- Sys.time() - time[i] + do_gc() + } + printCarriage(sprintf("nested tibble test: %s s", signif(mean(time), 4))) + cat("\n") + } Rep 1 of 2 strings: 0, 0.01305 s strings: 1, 0.03852 s strings: 2, 0.01388 s strings: 4, 0.005422 s strings: 8, 0.001124 s strings: 31, 0.004696 s strings: 33, 0.001637 s strings: 32, 0.01332 s strings: 255, 0.02774 s strings: 257, 0.005321 s strings: 256, 0.01265 s strings: 65535, 0.003081 s strings: 65537, 0.00541 s strings: 65536, 0.06099 s strings: 1e+06, 0.009417 s Character Vectors: 0, 0.001466 s Character Vectors: 1, 0.003962 s Character Vectors: 2, 0.001845 s Character Vectors: 4, 0.001413 s Character Vectors: 8, 0.0001554 s Character Vectors: 31, 0.001773 s Character Vectors: 33, 0.001976 s Character Vectors: 32, 0.001143 s Character Vectors: 255, 0.0003963 s Character Vectors: 257, 0.001022 s Character Vectors: 256, 0.002908 s Character Vectors: 65535, 0.006415 s Character Vectors: 65537, 0.007073 s Character Vectors: 65536, 0.003652 s Stringfish: 0, 0.005261 s Stringfish: 1, 0.00375 s Stringfish: 2, 0.0009209 s Stringfish: 4, 0.002637 s Stringfish: 8, 0.00233 s Stringfish: 31, 0.004954 s Stringfish: 33, 0.0003138 s Stringfish: 32, 0.001446 s Stringfish: 255, 0.008055 s Stringfish: 257, 0.002775 s Stringfish: 256, 0.005306 s Stringfish: 65535, 0.004382 s Stringfish: 65537, 0.006754 s Stringfish: 65536, 0.005119 s Integers: 0, 0.006482 s Integers: 1, 0.007229 s Integers: 2, 0.0007215 s Integers: 4, 0.01262 s Integers: 8, 0.005672 s Integers: 31, 0.003107 s Integers: 33, 0.01145 s Integers: 32, 0.008399 s Integers: 255, 0.005686 s Integers: 257, 0.0005411 s Integers: 256, 0.004487 s Integers: 65535, 0.02636 s Integers: 65537, 0.01611 s Integers: 65536, 0.01309 s Integers: 1e+06, 0.05896 s Numeric: 0, 0.01373 s Numeric: 1, 0.008508 s Numeric: 2, 0.001695 s Numeric: 4, 0.0007639 s Numeric: 8, 0.00123 s Numeric: 31, 0.005012 s Numeric: 33, 0.009466 s Numeric: 32, 0.005141 s Numeric: 255, 0.00957 s Numeric: 257, 0.0006337 s Numeric: 256, 0.007156 s Numeric: 65535, 0.002221 s Numeric: 65537, 0.00852 s Numeric: 65536, 0.008231 s Numeric: 1e+06, 0.0698 s Logical: 0, 0.005279 s Logical: 1, 0.003274 s Logical: 2, 0.0007532 s Logical: 4, 0.003337 s Logical: 8, 0.008381 s Logical: 31, 0.0009964 s Logical: 33, 0.001366 s Logical: 32, 0.00223 s Logical: 255, 0.00508 s Logical: 257, 0.03466 s Logical: 256, 0.02355 s Logical: 65535, 0.09149 s Logical: 65537, 0.01752 s Logical: 65536, 0.007563 s Logical: 1e+06, 0.1534 s List: 0, 0.009598 s List: 1, 0.01341 s List: 2, 0.002691 s List: 4, 0.001322 s List: 8, 0.002184 s List: 31, 0.005664 s List: 33, 0.002624 s List: 32, 0.01945 s List: 255, 0.0007585 s List: 257, 0.02725 s List: 256, 0.003825 s List: 65535, 0.03091 s List: 65537, 0.04585 s List: 65536, 0.01979 s Data.frame test Error: attributes_serialize_identical(z, x1) is not TRUE Execution halted Flavor: r-patched-linux-x86_64

Version: 0.27.3
Check: tests
Result: ERROR Running ‘correctness_testing.R’ [192s/213s] Running ‘qattributes_testing.R’ [41s/47s] Running ‘qsavemload_testing.R’ [2s/3s] Running the tests in ‘tests/qattributes_testing.R’ failed. Complete output: > total_time <- Sys.time() > > suppressMessages(library(Rcpp)) > suppressMessages(library(dplyr)) > suppressMessages(library(data.table)) > suppressMessages(library(qs)) > suppressMessages(library(stringfish)) > options(warn = 1) > > do_gc <- function() { + if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) { + gc(full = TRUE) + } else { + gc() + } + } > > # because sourceCpp uses setwd, we need absolute path to R_TESTS when run within R CMD check > R_TESTS <- Sys.getenv("R_TESTS") # startup.Rs > if (nzchar(R_TESTS)) { + R_TESTS_absolute <- normalizePath(R_TESTS) + Sys.setenv(R_TESTS = R_TESTS_absolute) + } > sourceCpp(code="#include <Rcpp.h> + using namespace Rcpp; + // [[Rcpp::plugins(cpp11)]] + // [[Rcpp::export(rng=false)]] + CharacterVector splitstr(std::string x, std::vector<double> cuts){ + CharacterVector ret(cuts.size() - 1); + for(uint64_t i=1; i<cuts.size(); i++) { + ret[i-1] = x.substr(std::round(cuts[i-1])-1, std::round(cuts[i])-std::round(cuts[i-1])); + } + return ret; + } + // [[Rcpp::export(rng=false)]] + int setlev(SEXP x, int i) { + return SETLEVELS(x,i); + } + // [[Rcpp::export(rng=false)]] + void setobj(SEXP x, int i) { + return SET_OBJECT(x, i); + } + // [[Rcpp::export(rng=false)]] + List generateList(std::vector<int> list_elements){ + auto randchar = []() -> char + { + const char charset[] = + \"0123456789\" + \"ABCDEFGHIJKLMNOPQRSTUVWXYZ\" + \"abcdefghijklmnopqrstuvwxyz\"; + const size_t max_index = (sizeof(charset) - 1); + return charset[ rand() % max_index ]; + }; + List ret(list_elements.size()); + std::string str(10,0); + for(size_t i=0; i<list_elements.size(); i++) { + switch(list_elements[i]) { + case 1: + ret[i] = R_NilValue; + break; + case 2: + std::generate_n( str.begin(), 10, randchar ); + ret[i] = str; + break; + case 3: + ret[i] = rand(); + break; + case 4: + ret[i] = static_cast<double>(rand()); + break; + } + } + return ret; + }") > if (nzchar(R_TESTS)) Sys.setenv(R_TESTS = R_TESTS) > > args <- commandArgs(T) > if (nzchar(R_TESTS) || ((length(args) > 0) && args[1] == "check")) { # do fewer tests within R CMD check so it completes within a reasonable amount of time + reps <- 2 + test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6) + test_points_slow <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16) # for Character Vector, stringfish and list + max_size <- 1e6 + } else { + reps <- 3 + test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6, 1e7) + test_points_slow <- test_points + max_size <- 1e7 + } > myfile <- tempfile() > > obj_size <- 0 > get_obj_size <- function() { + get("obj_size", envir = globalenv()) + } > set_obj_size <- function(x) { + assign("obj_size", get_obj_size() + as.numeric(object.size(x)), envir = globalenv()) + return(get_obj_size()); + } > random_object_generator <- function(N, with_envs = FALSE) { # additional input: global obj_size, max_size + if (sample(3, 1) == 1) { + ret <- as.list(1:N) + } else if (sample(2, 1) == 1) { + ret <- as.pairlist(1:N) + } else { + ret <- as.pairlist(1:N) + setlev(ret, sample(2L^12L, 1L) - 1L) + setobj(ret, 1L) + } + + for (i in 1:N) { + if (get_obj_size() > get("max_size", envir = globalenv())) break; + otype <- sample(12, size = 1) + z <- NULL + is_attribute <- ifelse(i == 1, F, sample(c(F, T), size = 1)) + if (otype == 1) {z <- rnorm(1e4); set_obj_size(z);} + else if (otype == 2) { z <- sample(1e4) - 5e2; set_obj_size(z); } + else if (otype == 3) { z <- sample(c(T, F, NA), size = 1e4, replace = T); set_obj_size(z); } + else if (otype == 4) { z <- (sample(256, size = 1e4, replace = T) - 1) %>% as.raw; set_obj_size(z); } + else if (otype == 5) { z <- replicate(sample(1e4, size = 1), {rep(letters, length.out = sample(10, size = 1)) %>% paste(collapse = "")}); set_obj_size(z); } + else if (otype == 6) { z <- rep(letters, length.out = sample(1e4, size = 1)) %>% paste(collapse = ""); set_obj_size(z); } + else if (otype == 7) { z <- as.formula("y ~ a + b + c : d", env = globalenv()); attr(z, "blah") <- sample(1e4) - 5e2; set_obj_size(z); } + else if (with_envs && otype %in% c(8, 9)) { z <- function(x) {x + runif(1)} } + # else if(with_envs && otype %in% c(10,11)) { z <- new.env(); z$x <- random_object_generator(N, with_envs); makeActiveBinding("y", function() runif(1), z) } + else { z <- random_object_generator(N, with_envs) } + if (is_attribute) { + attr(ret[[i - 1]], runif(1) %>% as.character()) <- z + } else { + ret[[i]] <- z + } + } + return(ret) + } > > rand_strings <- function(n) { + s <- sample(0:100, size = n, replace = T) + x <- lapply(unique(s), function(si) { + stringfish::random_strings(sum(s == si), si, vector_mode = "normal") + }) %>% unlist %>% sample + x[sample(n, size = n/10)] <- NA + return(x) + } > > nested_tibble <- function() { + sub_tibble <- function(nr = 600, nc = 4) { + z <- lapply(1:nc, function(i) rand_strings(nr)) %>% + setNames(make.unique(paste0(sample(letters, nc), rand_strings(nc)))) %>% + bind_cols %>% + as_tibble + } + tibble( + col1 = rand_strings(100), + col2 = rand_strings(100), + col3 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)), + col4 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)), + col5 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)) + ) %>% setNames(make.unique(paste0(sample(letters, 5), rand_strings(5)))) + } > > printCarriage <- function(x) { + cat(x, "\r") + } > > attributes_serialize_identical <- function(attributes, full_object) { + identical(serialize(attributes(full_object), NULL), serialize(attributes, NULL)) + } > > attributes_identical <- function(attributes, full_object) { + identical(attributes, attributes(full_object)) + } > > ################################################################################################ > > qsave_rand <- function(x, file) { + alg <- sample(c("lz4", "zstd", "lz4hc", "zstd_stream", "uncompressed"), 1) + # alg <- "zstd_stream" + nt <- sample(5,1) + sc <- sample(0:15,1) + cl <- sample(10,1) + ch <- sample(c(T,F),1) + qsave(x, file = file, preset = "custom", algorithm = alg, + compress_level = cl, shuffle_control = sc, nthreads = nt, check_hash = ch) + } > > qattributes_rand <- function(file) { + # ar <- sample(c(T,F),1) + # don't use altrep to avoid serialization differences + # attributes_serialize_identical won't pass with ALTREP + ar <- FALSE + nt <- sample(5,1) + qattributes(file, use_alt_rep = ar, nthreads = nt, strict = T) + } > > ################################################################################################ > > for (q in 1:reps) { + cat("Rep", q, "of", reps, "\n") + # String correctness + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + x1 <- rep(letters, length.out = tp) %>% paste(collapse = "") + x1 <- c(NA, "", x1) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("strings: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # Character vectors + time <- vector("numeric", length = 3) + for (tp in test_points_slow) { + for (i in 1:3) { + # qs_use_alt_rep(F) + x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar + cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric + x1 <- splitstr(x1, cuts) + x1 <- c(NA, "", x1) + qsave_rand(x1, file = myfile) + time[i] <- Sys.time() + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Character Vectors: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # stringfish character vectors -- require R > 3.5.0 + if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) { + time <- vector("numeric", length = 3) + for (tp in test_points_slow) { + for (i in 1:3) { + x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar + cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric + x1 <- splitstr(x1, cuts) + x1 <- c(NA, "", x1) + x1 <- stringfish::convert_to_sf(x1) + qsave_rand(x1, file = myfile) + time[i] <- Sys.time() + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Stringfish: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + } + + # Integers + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + x1 <- sample(1:tp, replace = T) + x1 <- c(NA, x1) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Integers: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # Doubles + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + x1 <- rnorm(tp) + x1 <- c(NA, x1) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Numeric: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # Logical + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + + x1 <- sample(c(T, F, NA), replace = T, size = tp) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Logical: %s, %s s",tp, signif(mean(time),4))) + } + cat("\n") + + # List + time <- vector("numeric", length = 3) + for (tp in test_points_slow) { + for (i in 1:3) { + x1 <- generateList(sample(1:4, replace = T, size = tp)) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("List: %s, %s s",tp, signif(mean(time),4))) + } + cat("\n") + + for (i in 1:3) { + x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 ) + x1 <- data.frame(str = x1,num = runif(1:1000), stringsAsFactors = F) + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + do_gc() + stopifnot(attributes_identical(z, x1)) + } + cat("Data.frame test") + cat("\n") + + for (i in 1:3) { + x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 ) + x1 <- data.table(str = x1,num = runif(1:1e6)) + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + do_gc() + stopifnot(attributes_serialize_identical(z, x1)) + } + cat("Data.table test") + cat("\n") + + for (i in 1:3) { + x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 ) + x1 <- tibble(str = x1,num = runif(1:1e6)) + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + do_gc() + stopifnot(attributes_identical(z, x1)) + } + cat("Tibble test") + cat("\n") + + # Encoding test + if (Sys.info()[['sysname']] != "Windows") { + for (i in 1:3) { + x1 <- "己所不欲,勿施于人" # utf 8 + x2 <- x1 + Encoding(x2) <- "latin1" + x3 <- x1 + Encoding(x3) <- "bytes" + x4 <- rep(x1, x2, length.out = 1e4) %>% paste(collapse = ";") + x1 <- c(x1, x2, x3, x4) + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage("Encoding test") + } else { + printCarriage("(Encoding test not run on windows)") + } + cat("\n") + + # complex vectors + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + re <- rnorm(tp) + im <- runif(tp) + x1 <- complex(real = re, imaginary = im) + x1 <- c(NA_complex_, x1) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Complex: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # factors + for (tp in test_points) { + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- factor(rep(letters, length.out = tp), levels = sample(letters), ordered = TRUE) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Factors: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # Random objects + time <- vector("numeric", length = 8) + for (i in 1:8) { + # qs_use_alt_rep(sample(c(T, F), size = 1)) + obj_size <- 0 + x1 <- random_object_generator(12) + printCarriage(sprintf("Random objects: %s bytes", object.size(x1) %>% as.numeric)) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Random objects: %s s", signif(mean(time), 4))) + cat("\n") + + # nested attributes + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- as.list(1:26) + attr(x1[[26]], letters[26]) <- rnorm(100) + for (i in 25:1) { + attr(x1[[i]], letters[i]) <- x1[[i + 1]] + } + time[i] <- Sys.time() + for(j in 1:length(x1)) { + qsave_rand(x1[[j]], file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1[[j]])) + } + } + printCarriage(sprintf("Nested attributes: %s s", signif(mean(time), 4))) + cat("\n") + + # alt-rep -- should serialize the unpacked object + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- 1:max_size + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Alt rep integer: %s s", signif(mean(time), 4))) + cat("\n") + + + # Environment test + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- new.env() + x1[["a"]] <- 1:max_size + x1[["b"]] <- runif(max_size) + x1[["c"]] <- stringfish::random_strings(1e4, vector_mode = "normal") + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + stopifnot(attributes_identical(z[["a"]], x1[["a"]])) + stopifnot(attributes_identical(z[["b"]], x1[["b"]])) + stopifnot(attributes_identical(z[["c"]], x1[["c"]])) + time[i] <- Sys.time() - time[i] + do_gc() + } + printCarriage(sprintf("Environment test: %s s", signif(mean(time), 4))) + cat("\n") + + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- nested_tibble() + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + stopifnot(attributes_identical(z, x1)) + time[i] <- Sys.time() - time[i] + do_gc() + } + printCarriage(sprintf("nested tibble test: %s s", signif(mean(time), 4))) + cat("\n") + } Rep 1 of 2 strings: 0, 0.03024 s strings: 1, 0.02569 s strings: 2, 0.003852 s strings: 4, 0.006124 s strings: 8, 0.01191 s strings: 31, 0.00565 s strings: 33, 0.01231 s strings: 32, 0.008515 s strings: 255, 0.02055 s strings: 257, 0.007413 s strings: 256, 0.006497 s strings: 65535, 0.001037 s strings: 65537, 0.004558 s strings: 65536, 0.004844 s strings: 1e+06, 0.01527 s Character Vectors: 0, 0.001137 s Character Vectors: 1, 0.001953 s Character Vectors: 2, 0.0005882 s Character Vectors: 4, 0.006126 s Character Vectors: 8, 0.004092 s Character Vectors: 31, 0.004596 s Character Vectors: 33, 0.005508 s Character Vectors: 32, 0.004881 s Character Vectors: 255, 0.004713 s Character Vectors: 257, 0.0001411 s Character Vectors: 256, 0.001521 s Character Vectors: 65535, 0.00766 s Character Vectors: 65537, 0.003603 s Character Vectors: 65536, 0.01526 s Stringfish: 0, 0.004656 s Stringfish: 1, 0.001448 s Stringfish: 2, 0.005292 s Stringfish: 4, 0.002041 s Stringfish: 8, 0.003663 s Stringfish: 31, 0.002011 s Stringfish: 33, 0.002274 s Stringfish: 32, 0.004938 s Stringfish: 255, 0.003249 s Stringfish: 257, 0.001679 s Stringfish: 256, 0.0001094 s Stringfish: 65535, 0.002944 s Stringfish: 65537, 0.004002 s Stringfish: 65536, 0.00591 s Integers: 0, 0.01831 s Integers: 1, 0.005373 s Integers: 2, 0.00603 s Integers: 4, 0.02979 s Integers: 8, 0.003574 s Integers: 31, 0.01541 s Integers: 33, 0.01003 s Integers: 32, 0.008678 s Integers: 255, 0.01622 s Integers: 257, 0.004014 s Integers: 256, 0.001849 s Integers: 65535, 0.01372 s Integers: 65537, 0.009075 s Integers: 65536, 0.02629 s Integers: 1e+06, 0.0355 s Numeric: 0, 0.004976 s Numeric: 1, 0.01204 s Numeric: 2, 0.003928 s Numeric: 4, 0.01054 s Numeric: 8, 0.03556 s Numeric: 31, 0.000809 s Numeric: 33, 0.007052 s Numeric: 32, 0.004026 s Numeric: 255, 0.002298 s Numeric: 257, 0.006143 s Numeric: 256, 0.004002 s Numeric: 65535, 0.04934 s Numeric: 65537, 0.03054 s Numeric: 65536, 0.01449 s Numeric: 1e+06, 0.05485 s Logical: 0, 0.004338 s Logical: 1, 0.004719 s Logical: 2, 0.007448 s Logical: 4, 0.002327 s Logical: 8, 0.004858 s Logical: 31, 0.0008652 s Logical: 33, 0.01246 s Logical: 32, 0.001068 s Logical: 255, 0.00514 s Logical: 257, 0.005454 s Logical: 256, 0.01557 s Logical: 65535, 0.0106 s Logical: 65537, 0.01305 s Logical: 65536, 0.01192 s Logical: 1e+06, 0.2385 s List: 0, 0.00554 s List: 1, 0.003136 s List: 2, 0.01895 s List: 4, 0.004718 s List: 8, 0.01827 s List: 31, 0.000813 s List: 33, 0.006669 s List: 32, 0.008275 s List: 255, 0.008603 s List: 257, 0.005483 s List: 256, 0.0005851 s List: 65535, 0.03412 s List: 65537, 0.02591 s List: 65536, 0.02381 s Data.frame test Error: attributes_serialize_identical(z, x1) is not TRUE Execution halted Flavor: r-release-linux-x86_64

Version: 0.27.3
Check: compiled code
Result: NOTE File 'qs/libs/x64/qs.dll': Found non-API calls to R: 'CLOENV', 'ENCLOS', 'FRAME', 'HASHTAB', 'IS_S4_OBJECT', 'LEVELS', 'OBJECT', 'PRENV', 'Rf_allocSExp', 'SETLEVELS', 'SET_CLOENV', 'SET_ENCLOS', 'SET_FRAME', 'SET_HASHTAB', 'SET_PRENV', 'SET_S4_OBJECT', 'SET_TRUELENGTH' Compiled code should not call non-API entry points in R. See 'Writing portable packages' in the 'Writing R Extensions' manual, and section 'Moving into C API compliance' for issues with the use of non-API entry points. Flavor: r-release-windows-x86_64

Version: 0.27.3
Check: tests
Result: ERROR Running 'correctness_testing.R' [147s] Running 'qattributes_testing.R' [35s] Running 'qsavemload_testing.R' [1s] Running the tests in 'tests/qattributes_testing.R' failed. Complete output: > total_time <- Sys.time() > > suppressMessages(library(Rcpp)) > suppressMessages(library(dplyr)) > suppressMessages(library(data.table)) > suppressMessages(library(qs)) > suppressMessages(library(stringfish)) > options(warn = 1) > > do_gc <- function() { + if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) { + gc(full = TRUE) + } else { + gc() + } + } > > # because sourceCpp uses setwd, we need absolute path to R_TESTS when run within R CMD check > R_TESTS <- Sys.getenv("R_TESTS") # startup.Rs > if (nzchar(R_TESTS)) { + R_TESTS_absolute <- normalizePath(R_TESTS) + Sys.setenv(R_TESTS = R_TESTS_absolute) + } > sourceCpp(code="#include <Rcpp.h> + using namespace Rcpp; + // [[Rcpp::plugins(cpp11)]] + // [[Rcpp::export(rng=false)]] + CharacterVector splitstr(std::string x, std::vector<double> cuts){ + CharacterVector ret(cuts.size() - 1); + for(uint64_t i=1; i<cuts.size(); i++) { + ret[i-1] = x.substr(std::round(cuts[i-1])-1, std::round(cuts[i])-std::round(cuts[i-1])); + } + return ret; + } + // [[Rcpp::export(rng=false)]] + int setlev(SEXP x, int i) { + return SETLEVELS(x,i); + } + // [[Rcpp::export(rng=false)]] + void setobj(SEXP x, int i) { + return SET_OBJECT(x, i); + } + // [[Rcpp::export(rng=false)]] + List generateList(std::vector<int> list_elements){ + auto randchar = []() -> char + { + const char charset[] = + \"0123456789\" + \"ABCDEFGHIJKLMNOPQRSTUVWXYZ\" + \"abcdefghijklmnopqrstuvwxyz\"; + const size_t max_index = (sizeof(charset) - 1); + return charset[ rand() % max_index ]; + }; + List ret(list_elements.size()); + std::string str(10,0); + for(size_t i=0; i<list_elements.size(); i++) { + switch(list_elements[i]) { + case 1: + ret[i] = R_NilValue; + break; + case 2: + std::generate_n( str.begin(), 10, randchar ); + ret[i] = str; + break; + case 3: + ret[i] = rand(); + break; + case 4: + ret[i] = static_cast<double>(rand()); + break; + } + } + return ret; + }") > if (nzchar(R_TESTS)) Sys.setenv(R_TESTS = R_TESTS) > > args <- commandArgs(T) > if (nzchar(R_TESTS) || ((length(args) > 0) && args[1] == "check")) { # do fewer tests within R CMD check so it completes within a reasonable amount of time + reps <- 2 + test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6) + test_points_slow <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16) # for Character Vector, stringfish and list + max_size <- 1e6 + } else { + reps <- 3 + test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6, 1e7) + test_points_slow <- test_points + max_size <- 1e7 + } > myfile <- tempfile() > > obj_size <- 0 > get_obj_size <- function() { + get("obj_size", envir = globalenv()) + } > set_obj_size <- function(x) { + assign("obj_size", get_obj_size() + as.numeric(object.size(x)), envir = globalenv()) + return(get_obj_size()); + } > random_object_generator <- function(N, with_envs = FALSE) { # additional input: global obj_size, max_size + if (sample(3, 1) == 1) { + ret <- as.list(1:N) + } else if (sample(2, 1) == 1) { + ret <- as.pairlist(1:N) + } else { + ret <- as.pairlist(1:N) + setlev(ret, sample(2L^12L, 1L) - 1L) + setobj(ret, 1L) + } + + for (i in 1:N) { + if (get_obj_size() > get("max_size", envir = globalenv())) break; + otype <- sample(12, size = 1) + z <- NULL + is_attribute <- ifelse(i == 1, F, sample(c(F, T), size = 1)) + if (otype == 1) {z <- rnorm(1e4); set_obj_size(z);} + else if (otype == 2) { z <- sample(1e4) - 5e2; set_obj_size(z); } + else if (otype == 3) { z <- sample(c(T, F, NA), size = 1e4, replace = T); set_obj_size(z); } + else if (otype == 4) { z <- (sample(256, size = 1e4, replace = T) - 1) %>% as.raw; set_obj_size(z); } + else if (otype == 5) { z <- replicate(sample(1e4, size = 1), {rep(letters, length.out = sample(10, size = 1)) %>% paste(collapse = "")}); set_obj_size(z); } + else if (otype == 6) { z <- rep(letters, length.out = sample(1e4, size = 1)) %>% paste(collapse = ""); set_obj_size(z); } + else if (otype == 7) { z <- as.formula("y ~ a + b + c : d", env = globalenv()); attr(z, "blah") <- sample(1e4) - 5e2; set_obj_size(z); } + else if (with_envs && otype %in% c(8, 9)) { z <- function(x) {x + runif(1)} } + # else if(with_envs && otype %in% c(10,11)) { z <- new.env(); z$x <- random_object_generator(N, with_envs); makeActiveBinding("y", function() runif(1), z) } + else { z <- random_object_generator(N, with_envs) } + if (is_attribute) { + attr(ret[[i - 1]], runif(1) %>% as.character()) <- z + } else { + ret[[i]] <- z + } + } + return(ret) + } > > rand_strings <- function(n) { + s <- sample(0:100, size = n, replace = T) + x <- lapply(unique(s), function(si) { + stringfish::random_strings(sum(s == si), si, vector_mode = "normal") + }) %>% unlist %>% sample + x[sample(n, size = n/10)] <- NA + return(x) + } > > nested_tibble <- function() { + sub_tibble <- function(nr = 600, nc = 4) { + z <- lapply(1:nc, function(i) rand_strings(nr)) %>% + setNames(make.unique(paste0(sample(letters, nc), rand_strings(nc)))) %>% + bind_cols %>% + as_tibble + } + tibble( + col1 = rand_strings(100), + col2 = rand_strings(100), + col3 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)), + col4 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)), + col5 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)) + ) %>% setNames(make.unique(paste0(sample(letters, 5), rand_strings(5)))) + } > > printCarriage <- function(x) { + cat(x, "\r") + } > > attributes_serialize_identical <- function(attributes, full_object) { + identical(serialize(attributes(full_object), NULL), serialize(attributes, NULL)) + } > > attributes_identical <- function(attributes, full_object) { + identical(attributes, attributes(full_object)) + } > > ################################################################################################ > > qsave_rand <- function(x, file) { + alg <- sample(c("lz4", "zstd", "lz4hc", "zstd_stream", "uncompressed"), 1) + # alg <- "zstd_stream" + nt <- sample(5,1) + sc <- sample(0:15,1) + cl <- sample(10,1) + ch <- sample(c(T,F),1) + qsave(x, file = file, preset = "custom", algorithm = alg, + compress_level = cl, shuffle_control = sc, nthreads = nt, check_hash = ch) + } > > qattributes_rand <- function(file) { + # ar <- sample(c(T,F),1) + # don't use altrep to avoid serialization differences + # attributes_serialize_identical won't pass with ALTREP + ar <- FALSE + nt <- sample(5,1) + qattributes(file, use_alt_rep = ar, nthreads = nt, strict = T) + } > > ################################################################################################ > > for (q in 1:reps) { + cat("Rep", q, "of", reps, "\n") + # String correctness + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + x1 <- rep(letters, length.out = tp) %>% paste(collapse = "") + x1 <- c(NA, "", x1) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("strings: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # Character vectors + time <- vector("numeric", length = 3) + for (tp in test_points_slow) { + for (i in 1:3) { + # qs_use_alt_rep(F) + x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar + cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric + x1 <- splitstr(x1, cuts) + x1 <- c(NA, "", x1) + qsave_rand(x1, file = myfile) + time[i] <- Sys.time() + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Character Vectors: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # stringfish character vectors -- require R > 3.5.0 + if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) { + time <- vector("numeric", length = 3) + for (tp in test_points_slow) { + for (i in 1:3) { + x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar + cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric + x1 <- splitstr(x1, cuts) + x1 <- c(NA, "", x1) + x1 <- stringfish::convert_to_sf(x1) + qsave_rand(x1, file = myfile) + time[i] <- Sys.time() + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Stringfish: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + } + + # Integers + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + x1 <- sample(1:tp, replace = T) + x1 <- c(NA, x1) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Integers: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # Doubles + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + x1 <- rnorm(tp) + x1 <- c(NA, x1) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Numeric: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # Logical + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + + x1 <- sample(c(T, F, NA), replace = T, size = tp) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Logical: %s, %s s",tp, signif(mean(time),4))) + } + cat("\n") + + # List + time <- vector("numeric", length = 3) + for (tp in test_points_slow) { + for (i in 1:3) { + x1 <- generateList(sample(1:4, replace = T, size = tp)) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("List: %s, %s s",tp, signif(mean(time),4))) + } + cat("\n") + + for (i in 1:3) { + x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 ) + x1 <- data.frame(str = x1,num = runif(1:1000), stringsAsFactors = F) + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + do_gc() + stopifnot(attributes_identical(z, x1)) + } + cat("Data.frame test") + cat("\n") + + for (i in 1:3) { + x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 ) + x1 <- data.table(str = x1,num = runif(1:1e6)) + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + do_gc() + stopifnot(attributes_serialize_identical(z, x1)) + } + cat("Data.table test") + cat("\n") + + for (i in 1:3) { + x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 ) + x1 <- tibble(str = x1,num = runif(1:1e6)) + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + do_gc() + stopifnot(attributes_identical(z, x1)) + } + cat("Tibble test") + cat("\n") + + # Encoding test + if (Sys.info()[['sysname']] != "Windows") { + for (i in 1:3) { + x1 <- "己所不欲,勿施于人" # utf 8 + x2 <- x1 + Encoding(x2) <- "latin1" + x3 <- x1 + Encoding(x3) <- "bytes" + x4 <- rep(x1, x2, length.out = 1e4) %>% paste(collapse = ";") + x1 <- c(x1, x2, x3, x4) + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage("Encoding test") + } else { + printCarriage("(Encoding test not run on windows)") + } + cat("\n") + + # complex vectors + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + re <- rnorm(tp) + im <- runif(tp) + x1 <- complex(real = re, imaginary = im) + x1 <- c(NA_complex_, x1) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Complex: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # factors + for (tp in test_points) { + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- factor(rep(letters, length.out = tp), levels = sample(letters), ordered = TRUE) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Factors: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # Random objects + time <- vector("numeric", length = 8) + for (i in 1:8) { + # qs_use_alt_rep(sample(c(T, F), size = 1)) + obj_size <- 0 + x1 <- random_object_generator(12) + printCarriage(sprintf("Random objects: %s bytes", object.size(x1) %>% as.numeric)) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Random objects: %s s", signif(mean(time), 4))) + cat("\n") + + # nested attributes + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- as.list(1:26) + attr(x1[[26]], letters[26]) <- rnorm(100) + for (i in 25:1) { + attr(x1[[i]], letters[i]) <- x1[[i + 1]] + } + time[i] <- Sys.time() + for(j in 1:length(x1)) { + qsave_rand(x1[[j]], file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1[[j]])) + } + } + printCarriage(sprintf("Nested attributes: %s s", signif(mean(time), 4))) + cat("\n") + + # alt-rep -- should serialize the unpacked object + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- 1:max_size + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Alt rep integer: %s s", signif(mean(time), 4))) + cat("\n") + + + # Environment test + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- new.env() + x1[["a"]] <- 1:max_size + x1[["b"]] <- runif(max_size) + x1[["c"]] <- stringfish::random_strings(1e4, vector_mode = "normal") + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + stopifnot(attributes_identical(z[["a"]], x1[["a"]])) + stopifnot(attributes_identical(z[["b"]], x1[["b"]])) + stopifnot(attributes_identical(z[["c"]], x1[["c"]])) + time[i] <- Sys.time() - time[i] + do_gc() + } + printCarriage(sprintf("Environment test: %s s", signif(mean(time), 4))) + cat("\n") + + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- nested_tibble() + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + stopifnot(attributes_identical(z, x1)) + time[i] <- Sys.time() - time[i] + do_gc() + } + printCarriage(sprintf("nested tibble test: %s s", signif(mean(time), 4))) + cat("\n") + } Rep 1 of 2 strings: 0, 0.008145 s strings: 1, 0.004759 s strings: 2, 0.004126 s strings: 4, 0.002624 s strings: 8, 0.004648 s strings: 31, 0.001341 s strings: 33, 0.002102 s strings: 32, 0.001331 s strings: 255, 0.001528 s strings: 257, 0.004855 s strings: 256, 0.001973 s strings: 65535, 0.002005 s strings: 65537, 0.002956 s strings: 65536, 0.005843 s strings: 1e+06, 0.007074 s Character Vectors: 0, 0.0001577 s Character Vectors: 1, 0.001433 s Character Vectors: 2, 0.0009696 s Character Vectors: 4, 0.001728 s Character Vectors: 8, 0.0001514 s Character Vectors: 31, 0.001395 s Character Vectors: 33, 0.0006684 s Character Vectors: 32, 0.001729 s Character Vectors: 255, 0.0005263 s Character Vectors: 257, 0.001342 s Character Vectors: 256, 0.0001803 s Character Vectors: 65535, 0.003348 s Character Vectors: 65537, 0.002962 s Character Vectors: 65536, 0.003407 s Stringfish: 0, 0.0009772 s Stringfish: 1, 0.0005012 s Stringfish: 2, 0.0002343 s Stringfish: 4, 0.001995 s Stringfish: 8, 0.001407 s Stringfish: 31, 0.0003917 s Stringfish: 33, 0.0003936 s Stringfish: 32, 0.0009816 s Stringfish: 255, 0.001335 s Stringfish: 257, 0.001367 s Stringfish: 256, 0.0005395 s Stringfish: 65535, 0.002509 s Stringfish: 65537, 0.004305 s Stringfish: 65536, 0.004049 s Integers: 0, 0.002257 s Integers: 1, 0.002666 s Integers: 2, 0.001557 s Integers: 4, 0.003662 s Integers: 8, 0.004208 s Integers: 31, 0.001356 s Integers: 33, 0.004156 s Integers: 32, 0.001477 s Integers: 255, 0.00238 s Integers: 257, 0.004024 s Integers: 256, 0.002541 s Integers: 65535, 0.007276 s Integers: 65537, 0.002201 s Integers: 65536, 0.008258 s Integers: 1e+06, 0.015 s Numeric: 0, 0.003075 s Numeric: 1, 0.002935 s Numeric: 2, 0.001423 s Numeric: 4, 0.000955 s Numeric: 8, 0.001943 s Numeric: 31, 0.002446 s Numeric: 33, 0.002546 s Numeric: 32, 0.0007737 s Numeric: 255, 0.0007413 s Numeric: 257, 0.002134 s Numeric: 256, 0.001227 s Numeric: 65535, 0.009394 s Numeric: 65537, 0.003317 s Numeric: 65536, 0.01312 s Numeric: 1e+06, 0.06174 s Logical: 0, 0.004049 s Logical: 1, 0.004119 s Logical: 2, 0.003524 s Logical: 4, 0.009798 s Logical: 8, 0.001536 s Logical: 31, 0.003259 s Logical: 33, 0.002323 s Logical: 32, 0.00207 s Logical: 255, 0.004744 s Logical: 257, 0.002756 s Logical: 256, 0.002352 s Logical: 65535, 0.01794 s Logical: 65537, 0.005719 s Logical: 65536, 0.006068 s Logical: 1e+06, 0.08729 s List: 0, 0.004186 s List: 1, 0.002581 s List: 2, 0.002238 s List: 4, 0.003792 s List: 8, 0.002439 s List: 31, 0.004815 s List: 33, 0.007244 s List: 32, 0.001671 s List: 255, 0.00146 s List: 257, 0.001188 s List: 256, 0.005881 s List: 65535, 0.02215 s List: 65537, 0.02327 s List: 65536, 0.02337 s Data.frame test Error: attributes_serialize_identical(z, x1) is not TRUE Execution halted Flavor: r-release-windows-x86_64

Version: 0.27.3
Check: installed package size
Result: NOTE installed size is 9.2Mb sub-directories of 1Mb or more: doc 1.1Mb libs 7.8Mb Flavors: r-oldrel-macos-arm64, r-oldrel-macos-x86_64

Version: 0.27.3
Check: tests
Result: ERROR Running 'correctness_testing.R' [194s] Running 'qattributes_testing.R' [44s] Running 'qsavemload_testing.R' [2s] Running the tests in 'tests/qattributes_testing.R' failed. Complete output: > total_time <- Sys.time() > > suppressMessages(library(Rcpp)) > suppressMessages(library(dplyr)) > suppressMessages(library(data.table)) > suppressMessages(library(qs)) > suppressMessages(library(stringfish)) > options(warn = 1) > > do_gc <- function() { + if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) { + gc(full = TRUE) + } else { + gc() + } + } > > # because sourceCpp uses setwd, we need absolute path to R_TESTS when run within R CMD check > R_TESTS <- Sys.getenv("R_TESTS") # startup.Rs > if (nzchar(R_TESTS)) { + R_TESTS_absolute <- normalizePath(R_TESTS) + Sys.setenv(R_TESTS = R_TESTS_absolute) + } > sourceCpp(code="#include <Rcpp.h> + using namespace Rcpp; + // [[Rcpp::plugins(cpp11)]] + // [[Rcpp::export(rng=false)]] + CharacterVector splitstr(std::string x, std::vector<double> cuts){ + CharacterVector ret(cuts.size() - 1); + for(uint64_t i=1; i<cuts.size(); i++) { + ret[i-1] = x.substr(std::round(cuts[i-1])-1, std::round(cuts[i])-std::round(cuts[i-1])); + } + return ret; + } + // [[Rcpp::export(rng=false)]] + int setlev(SEXP x, int i) { + return SETLEVELS(x,i); + } + // [[Rcpp::export(rng=false)]] + void setobj(SEXP x, int i) { + return SET_OBJECT(x, i); + } + // [[Rcpp::export(rng=false)]] + List generateList(std::vector<int> list_elements){ + auto randchar = []() -> char + { + const char charset[] = + \"0123456789\" + \"ABCDEFGHIJKLMNOPQRSTUVWXYZ\" + \"abcdefghijklmnopqrstuvwxyz\"; + const size_t max_index = (sizeof(charset) - 1); + return charset[ rand() % max_index ]; + }; + List ret(list_elements.size()); + std::string str(10,0); + for(size_t i=0; i<list_elements.size(); i++) { + switch(list_elements[i]) { + case 1: + ret[i] = R_NilValue; + break; + case 2: + std::generate_n( str.begin(), 10, randchar ); + ret[i] = str; + break; + case 3: + ret[i] = rand(); + break; + case 4: + ret[i] = static_cast<double>(rand()); + break; + } + } + return ret; + }") > if (nzchar(R_TESTS)) Sys.setenv(R_TESTS = R_TESTS) > > args <- commandArgs(T) > if (nzchar(R_TESTS) || ((length(args) > 0) && args[1] == "check")) { # do fewer tests within R CMD check so it completes within a reasonable amount of time + reps <- 2 + test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6) + test_points_slow <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16) # for Character Vector, stringfish and list + max_size <- 1e6 + } else { + reps <- 3 + test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6, 1e7) + test_points_slow <- test_points + max_size <- 1e7 + } > myfile <- tempfile() > > obj_size <- 0 > get_obj_size <- function() { + get("obj_size", envir = globalenv()) + } > set_obj_size <- function(x) { + assign("obj_size", get_obj_size() + as.numeric(object.size(x)), envir = globalenv()) + return(get_obj_size()); + } > random_object_generator <- function(N, with_envs = FALSE) { # additional input: global obj_size, max_size + if (sample(3, 1) == 1) { + ret <- as.list(1:N) + } else if (sample(2, 1) == 1) { + ret <- as.pairlist(1:N) + } else { + ret <- as.pairlist(1:N) + setlev(ret, sample(2L^12L, 1L) - 1L) + setobj(ret, 1L) + } + + for (i in 1:N) { + if (get_obj_size() > get("max_size", envir = globalenv())) break; + otype <- sample(12, size = 1) + z <- NULL + is_attribute <- ifelse(i == 1, F, sample(c(F, T), size = 1)) + if (otype == 1) {z <- rnorm(1e4); set_obj_size(z);} + else if (otype == 2) { z <- sample(1e4) - 5e2; set_obj_size(z); } + else if (otype == 3) { z <- sample(c(T, F, NA), size = 1e4, replace = T); set_obj_size(z); } + else if (otype == 4) { z <- (sample(256, size = 1e4, replace = T) - 1) %>% as.raw; set_obj_size(z); } + else if (otype == 5) { z <- replicate(sample(1e4, size = 1), {rep(letters, length.out = sample(10, size = 1)) %>% paste(collapse = "")}); set_obj_size(z); } + else if (otype == 6) { z <- rep(letters, length.out = sample(1e4, size = 1)) %>% paste(collapse = ""); set_obj_size(z); } + else if (otype == 7) { z <- as.formula("y ~ a + b + c : d", env = globalenv()); attr(z, "blah") <- sample(1e4) - 5e2; set_obj_size(z); } + else if (with_envs && otype %in% c(8, 9)) { z <- function(x) {x + runif(1)} } + # else if(with_envs && otype %in% c(10,11)) { z <- new.env(); z$x <- random_object_generator(N, with_envs); makeActiveBinding("y", function() runif(1), z) } + else { z <- random_object_generator(N, with_envs) } + if (is_attribute) { + attr(ret[[i - 1]], runif(1) %>% as.character()) <- z + } else { + ret[[i]] <- z + } + } + return(ret) + } > > rand_strings <- function(n) { + s <- sample(0:100, size = n, replace = T) + x <- lapply(unique(s), function(si) { + stringfish::random_strings(sum(s == si), si, vector_mode = "normal") + }) %>% unlist %>% sample + x[sample(n, size = n/10)] <- NA + return(x) + } > > nested_tibble <- function() { + sub_tibble <- function(nr = 600, nc = 4) { + z <- lapply(1:nc, function(i) rand_strings(nr)) %>% + setNames(make.unique(paste0(sample(letters, nc), rand_strings(nc)))) %>% + bind_cols %>% + as_tibble + } + tibble( + col1 = rand_strings(100), + col2 = rand_strings(100), + col3 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)), + col4 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)), + col5 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)) + ) %>% setNames(make.unique(paste0(sample(letters, 5), rand_strings(5)))) + } > > printCarriage <- function(x) { + cat(x, "\r") + } > > attributes_serialize_identical <- function(attributes, full_object) { + identical(serialize(attributes(full_object), NULL), serialize(attributes, NULL)) + } > > attributes_identical <- function(attributes, full_object) { + identical(attributes, attributes(full_object)) + } > > ################################################################################################ > > qsave_rand <- function(x, file) { + alg <- sample(c("lz4", "zstd", "lz4hc", "zstd_stream", "uncompressed"), 1) + # alg <- "zstd_stream" + nt <- sample(5,1) + sc <- sample(0:15,1) + cl <- sample(10,1) + ch <- sample(c(T,F),1) + qsave(x, file = file, preset = "custom", algorithm = alg, + compress_level = cl, shuffle_control = sc, nthreads = nt, check_hash = ch) + } > > qattributes_rand <- function(file) { + # ar <- sample(c(T,F),1) + # don't use altrep to avoid serialization differences + # attributes_serialize_identical won't pass with ALTREP + ar <- FALSE + nt <- sample(5,1) + qattributes(file, use_alt_rep = ar, nthreads = nt, strict = T) + } > > ################################################################################################ > > for (q in 1:reps) { + cat("Rep", q, "of", reps, "\n") + # String correctness + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + x1 <- rep(letters, length.out = tp) %>% paste(collapse = "") + x1 <- c(NA, "", x1) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("strings: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # Character vectors + time <- vector("numeric", length = 3) + for (tp in test_points_slow) { + for (i in 1:3) { + # qs_use_alt_rep(F) + x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar + cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric + x1 <- splitstr(x1, cuts) + x1 <- c(NA, "", x1) + qsave_rand(x1, file = myfile) + time[i] <- Sys.time() + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Character Vectors: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # stringfish character vectors -- require R > 3.5.0 + if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) { + time <- vector("numeric", length = 3) + for (tp in test_points_slow) { + for (i in 1:3) { + x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar + cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric + x1 <- splitstr(x1, cuts) + x1 <- c(NA, "", x1) + x1 <- stringfish::convert_to_sf(x1) + qsave_rand(x1, file = myfile) + time[i] <- Sys.time() + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Stringfish: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + } + + # Integers + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + x1 <- sample(1:tp, replace = T) + x1 <- c(NA, x1) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Integers: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # Doubles + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + x1 <- rnorm(tp) + x1 <- c(NA, x1) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Numeric: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # Logical + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + + x1 <- sample(c(T, F, NA), replace = T, size = tp) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Logical: %s, %s s",tp, signif(mean(time),4))) + } + cat("\n") + + # List + time <- vector("numeric", length = 3) + for (tp in test_points_slow) { + for (i in 1:3) { + x1 <- generateList(sample(1:4, replace = T, size = tp)) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("List: %s, %s s",tp, signif(mean(time),4))) + } + cat("\n") + + for (i in 1:3) { + x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 ) + x1 <- data.frame(str = x1,num = runif(1:1000), stringsAsFactors = F) + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + do_gc() + stopifnot(attributes_identical(z, x1)) + } + cat("Data.frame test") + cat("\n") + + for (i in 1:3) { + x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 ) + x1 <- data.table(str = x1,num = runif(1:1e6)) + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + do_gc() + stopifnot(attributes_serialize_identical(z, x1)) + } + cat("Data.table test") + cat("\n") + + for (i in 1:3) { + x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 ) + x1 <- tibble(str = x1,num = runif(1:1e6)) + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + do_gc() + stopifnot(attributes_identical(z, x1)) + } + cat("Tibble test") + cat("\n") + + # Encoding test + if (Sys.info()[['sysname']] != "Windows") { + for (i in 1:3) { + x1 <- "己所不欲,勿施于人" # utf 8 + x2 <- x1 + Encoding(x2) <- "latin1" + x3 <- x1 + Encoding(x3) <- "bytes" + x4 <- rep(x1, x2, length.out = 1e4) %>% paste(collapse = ";") + x1 <- c(x1, x2, x3, x4) + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage("Encoding test") + } else { + printCarriage("(Encoding test not run on windows)") + } + cat("\n") + + # complex vectors + time <- vector("numeric", length = 3) + for (tp in test_points) { + for (i in 1:3) { + re <- rnorm(tp) + im <- runif(tp) + x1 <- complex(real = re, imaginary = im) + x1 <- c(NA_complex_, x1) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Complex: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # factors + for (tp in test_points) { + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- factor(rep(letters, length.out = tp), levels = sample(letters), ordered = TRUE) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Factors: %s, %s s",tp, signif(mean(time), 4))) + } + cat("\n") + + # Random objects + time <- vector("numeric", length = 8) + for (i in 1:8) { + # qs_use_alt_rep(sample(c(T, F), size = 1)) + obj_size <- 0 + x1 <- random_object_generator(12) + printCarriage(sprintf("Random objects: %s bytes", object.size(x1) %>% as.numeric)) + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Random objects: %s s", signif(mean(time), 4))) + cat("\n") + + # nested attributes + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- as.list(1:26) + attr(x1[[26]], letters[26]) <- rnorm(100) + for (i in 25:1) { + attr(x1[[i]], letters[i]) <- x1[[i + 1]] + } + time[i] <- Sys.time() + for(j in 1:length(x1)) { + qsave_rand(x1[[j]], file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1[[j]])) + } + } + printCarriage(sprintf("Nested attributes: %s s", signif(mean(time), 4))) + cat("\n") + + # alt-rep -- should serialize the unpacked object + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- 1:max_size + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + time[i] <- Sys.time() - time[i] + do_gc() + stopifnot(attributes_identical(z, x1)) + } + printCarriage(sprintf("Alt rep integer: %s s", signif(mean(time), 4))) + cat("\n") + + + # Environment test + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- new.env() + x1[["a"]] <- 1:max_size + x1[["b"]] <- runif(max_size) + x1[["c"]] <- stringfish::random_strings(1e4, vector_mode = "normal") + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + stopifnot(attributes_identical(z[["a"]], x1[["a"]])) + stopifnot(attributes_identical(z[["b"]], x1[["b"]])) + stopifnot(attributes_identical(z[["c"]], x1[["c"]])) + time[i] <- Sys.time() - time[i] + do_gc() + } + printCarriage(sprintf("Environment test: %s s", signif(mean(time), 4))) + cat("\n") + + time <- vector("numeric", length = 3) + for (i in 1:3) { + x1 <- nested_tibble() + time[i] <- Sys.time() + qsave_rand(x1, file = myfile) + z <- qattributes_rand(file = myfile) + stopifnot(attributes_identical(z, x1)) + time[i] <- Sys.time() - time[i] + do_gc() + } + printCarriage(sprintf("nested tibble test: %s s", signif(mean(time), 4))) + cat("\n") + } Rep 1 of 2 strings: 0, 0.006169 s strings: 1, 0.007327 s strings: 2, 0.001866 s strings: 4, 0.002096 s strings: 8, 0.003265 s strings: 31, 0.002337 s strings: 33, 0.001981 s strings: 32, 0.001839 s strings: 255, 0.01221 s strings: 257, 0.003793 s strings: 256, 0.007139 s strings: 65535, 0.002969 s strings: 65537, 0.003412 s strings: 65536, 0.003754 s strings: 1e+06, 0.015 s Character Vectors: 0, 0.001374 s Character Vectors: 1, 0.0008787 s Character Vectors: 2, 0.001394 s Character Vectors: 4, 0.001415 s Character Vectors: 8, 0.001047 s Character Vectors: 31, 0.0004343 s Character Vectors: 33, 0.0003966 s Character Vectors: 32, 0.000508 s Character Vectors: 255, 0.00065 s Character Vectors: 257, 0.001168 s Character Vectors: 256, 0.002031 s Character Vectors: 65535, 0.003004 s Character Vectors: 65537, 0.004233 s Character Vectors: 65536, 0.005212 s Stringfish: 0, 0.00119 s Stringfish: 1, 0.003825 s Stringfish: 2, 0.0002847 s Stringfish: 4, 0.001333 s Stringfish: 8, 0.0006677 s Stringfish: 31, 0.000411 s Stringfish: 33, 0.001108 s Stringfish: 32, 0.000444 s Stringfish: 255, 0.002189 s Stringfish: 257, 0.0008314 s Stringfish: 256, 0.001644 s Stringfish: 65535, 0.002693 s Stringfish: 65537, 0.00486 s Stringfish: 65536, 0.006438 s Integers: 0, 0.00742 s Integers: 1, 0.008533 s Integers: 2, 0.009217 s Integers: 4, 0.005772 s Integers: 8, 0.007577 s Integers: 31, 0.002871 s Integers: 33, 0.0028 s Integers: 32, 0.003356 s Integers: 255, 0.007434 s Integers: 257, 0.002554 s Integers: 256, 0.01185 s Integers: 65535, 0.008971 s Integers: 65537, 0.006546 s Integers: 65536, 0.009211 s Integers: 1e+06, 0.09163 s Numeric: 0, 0.005385 s Numeric: 1, 0.007798 s Numeric: 2, 0.002623 s Numeric: 4, 0.003132 s Numeric: 8, 0.003254 s Numeric: 31, 0.001896 s Numeric: 33, 0.003466 s Numeric: 32, 0.003285 s Numeric: 255, 0.004089 s Numeric: 257, 0.007526 s Numeric: 256, 0.01273 s Numeric: 65535, 0.02625 s Numeric: 65537, 0.01153 s Numeric: 65536, 0.02822 s Numeric: 1e+06, 0.1103 s Logical: 0, 0.009957 s Logical: 1, 0.002651 s Logical: 2, 0.00454 s Logical: 4, 0.00352 s Logical: 8, 0.003782 s Logical: 31, 0.002078 s Logical: 33, 0.008487 s Logical: 32, 0.003318 s Logical: 255, 0.002876 s Logical: 257, 0.002998 s Logical: 256, 0.002466 s Logical: 65535, 0.01314 s Logical: 65537, 0.005936 s Logical: 65536, 0.004112 s Logical: 1e+06, 0.1285 s List: 0, 0.009209 s List: 1, 0.002523 s List: 2, 0.002975 s List: 4, 0.004267 s List: 8, 0.009939 s List: 31, 0.002467 s List: 33, 0.001734 s List: 32, 0.001238 s List: 255, 0.003524 s List: 257, 0.003227 s List: 256, 0.002887 s List: 65535, 0.02588 s List: 65537, 0.02604 s List: 65536, 0.02572 s Data.frame test Error: attributes_serialize_identical(z, x1) is not TRUE Execution halted Flavor: r-oldrel-windows-x86_64