fdth builds frequency distribution tables (fdt) and their associated graphics from vectors, data frames, and matrices for both numerical and categorical variables.
Core functions:
| Function | Purpose |
|---|---|
fdt() |
Frequency table for numerical data |
fdt_cat() |
Frequency table for categorical data |
make.fdt() |
Reconstruct a table from frequencies alone |
make.fdt_cat() |
Reconstruct a categorical table from frequencies |
mfv() |
Most frequent value (mode) |
sd() / var() |
Standard deviation / variance for grouped data |
library(fdth)
#>
#> Anexando pacote: 'fdth'
#> Os seguintes objetos são mascarados por 'package:stats':
#>
#> sd, varfdt()set.seed(42)
x <- rnorm(200,
mean = 10,
sd = 2)
ft <- fdt(x)
ft
#> Class limits f rf rf(%) cf cf(%)
#> [3.9737,5.2608) 4 0.02 2.0 4 2.0
#> [5.2608,6.5479) 4 0.02 2.0 8 4.0
#> [6.5479,7.8351) 20 0.10 10.0 28 14.0
#> [7.8351,9.1222) 36 0.18 18.0 64 32.0
#> [9.1222,10.409) 57 0.28 28.5 121 60.5
#> [10.409,11.696) 44 0.22 22.0 165 82.5
#> [11.696,12.984) 25 0.12 12.5 190 95.0
#> [12.984,14.271) 8 0.04 4.0 198 99.0
#> [14.271,15.558) 2 0.01 1.0 200 100.0The default table has six columns:
| Column | Description |
|---|---|
Class limits |
Interval notation |
f |
Absolute frequency |
rf |
Relative frequency |
rf(%) |
Relative frequency (%) |
cf |
Cumulative frequency |
cf(%) |
Cumulative frequency (%) |
# Sturges (default)
fdt(x, breaks = "Sturges")
# Scott
fdt(x, breaks = "Scott")
# Freedman-Diaconis
fdt(x, breaks = "FD")
# Fixed number of classes
fdt(x, k = 8)# Fixed start, end and width
ft2 <- fdt(x,
start = 4,
end = 16,
h = 2)
ft2
#> Class limits f rf rf(%) cf cf(%)
#> [4,6) 5 0.03 2.5 5 2.5
#> [6,8) 27 0.14 13.5 32 16.0
#> [8,10) 71 0.36 35.5 103 51.5
#> [10,12) 66 0.33 33.0 169 84.5
#> [12,14) 28 0.14 14.0 197 98.5
#> [14,16) 3 0.01 1.5 200 100.0Use format.classes = TRUE together with pattern to control the number of decimal places displayed in the class limits:
# Two decimal places
print(ft,
format.classes = TRUE,
pattern = "%.2f")
#> Class limits f rf rf(%) cf cf(%)
#> [3.97, 5.26) 4 0.02 2.0 4 2.0
#> [5.26, 6.55) 4 0.02 2.0 8 4.0
#> [6.55, 7.84) 20 0.10 10.0 28 14.0
#> [7.84, 9.12) 36 0.18 18.0 64 32.0
#> [9.12, 10.41) 57 0.28 28.5 121 60.5
#> [10.41, 11.70) 44 0.22 22.0 165 82.5
#> [11.70, 12.98) 25 0.12 12.5 190 95.0
#> [12.98, 14.27) 8 0.04 4.0 198 99.0
#> [14.27, 15.56) 2 0.01 1.0 200 100.0
# Summary with the same formatting
summary(ft,
format.classes = TRUE,
pattern = "%.2f")
#> Class limits f rf rf(%) cf cf(%)
#> [3.97, 5.26) 4 0.02 2.0 4 2.0
#> [5.26, 6.55) 4 0.02 2.0 8 4.0
#> [6.55, 7.84) 20 0.10 10.0 28 14.0
#> [7.84, 9.12) 36 0.18 18.0 64 32.0
#> [9.12, 10.41) 57 0.28 28.5 121 60.5
#> [10.41, 11.70) 44 0.22 22.0 165 82.5
#> [11.70, 12.98) 25 0.12 12.5 190 95.0
#> [12.98, 14.27) 8 0.04 4.0 198 99.0
#> [14.27, 15.56) 2 0.01 1.0 200 100.0By default intervals are left-closed [a, b). Use right = TRUE for right-closed (a, b]:
fdt(x, right = TRUE)x_na <- c(x,
NA,
NA)
# This errors by design:
tryCatch(fdt(x_na), error = function(e) message("Error: ", e$message))
#> Error: The data has <NA> values and na.rm=FALSE by default.
# Remove NAs explicitly:
fdt(x_na, na.rm = TRUE)plot.fdt.default()All plot types are selected with the type argument.
plot(ft,
type = "fh",
main = "Frequency histogram")
plot(ft,
type = "fp",
main = "Frequency polygon")plot(ft,
type = "rfh",
main = "Relative frequency histogram")
plot(ft,
type = "rfph",
main = "Relative frequency (%) histogram")plot(ft,
type = "d",
main = "Density histogram")plot(ft,
type = "cfp",
main = "Cumulative frequency polygon")
plot(ft,
type = "cfpp",
main = "Cumulative frequency (%) polygon")plot(ft,
type = "fh",
v = TRUE,
v.round = 0,
main = "Histogram with counts")Once an fdt object exists, the usual statistics can be computed directly from the grouped (tabulated) data — no access to the original vector is needed.
ft3 <- fdt(x)
mean(ft3)
#> [1] 9.907335
median(ft3)
#> [1] 9.935109
mfv(ft3) # mode(s)
#> [1] 9.917177
var(ft3)
#> [1] 3.842699
sd(ft3)
#> [1] 1.96028
# Quartiles (default)
quantile(ft3)
#> 25%
#> 8.621638
# Deciles
quantile(ft3,
i = 1:9,
probs = seq(0,
1,
0.1))
#> 10% 20% 30% 40% 50% 60% 70% 80%
#> 7.320210 8.264103 8.979173 9.483486 9.935109 10.386733 10.965119 11.550176
#> 90%
#> 12.468716fdt.data.frame()When the input is a data frame or matrix, fdt() builds one table per numeric column and returns an fdt.multiple object.
ft_iris <- fdt(iris[, 1:4])
ft_iris
#> Sepal.Length
#> Class limits f rf rf(%) cf cf(%)
#> [4.257,4.671) 9 0.06 6.00 9 6.00
#> [4.671,5.084) 23 0.15 15.33 32 21.33
#> [5.084,5.498) 20 0.13 13.33 52 34.67
#> [5.498,5.911) 31 0.21 20.67 83 55.33
#> [5.911,6.325) 25 0.17 16.67 108 72.00
#> [6.325,6.738) 22 0.15 14.67 130 86.67
#> [6.738,7.152) 9 0.06 6.00 139 92.67
#> [7.152,7.565) 5 0.03 3.33 144 96.00
#> [7.565,7.979) 6 0.04 4.00 150 100.00
#>
#> Sepal.Width
#> Class limits f rf rf(%) cf cf(%)
#> [1.98,2.254) 4 0.03 2.67 4 2.67
#> [2.254,2.528) 15 0.10 10.00 19 12.67
#> [2.528,2.801) 28 0.19 18.67 47 31.33
#> [2.801,3.075) 36 0.24 24.00 83 55.33
#> [3.075,3.349) 30 0.20 20.00 113 75.33
#> [3.349,3.623) 22 0.15 14.67 135 90.00
#> [3.623,3.896) 9 0.06 6.00 144 96.00
#> [3.896,4.17) 4 0.03 2.67 148 98.67
#> [4.17,4.444) 2 0.01 1.33 150 100.00
#>
#> Petal.Length
#> Class limits f rf rf(%) cf cf(%)
#> [0.99,1.654) 44 0.29 29.33 44 29.33
#> [1.654,2.319) 6 0.04 4.00 50 33.33
#> [2.319,2.983) 0 0.00 0.00 50 33.33
#> [2.983,3.647) 6 0.04 4.00 56 37.33
#> [3.647,4.312) 19 0.13 12.67 75 50.00
#> [4.312,4.976) 29 0.19 19.33 104 69.33
#> [4.976,5.64) 27 0.18 18.00 131 87.33
#> [5.64,6.305) 14 0.09 9.33 145 96.67
#> [6.305,6.969) 5 0.03 3.33 150 100.00
#>
#> Petal.Width
#> Class limits f rf rf(%) cf cf(%)
#> [0.099,0.3686) 41 0.27 27.33 41 27.33
#> [0.3686,0.6381) 9 0.06 6.00 50 33.33
#> [0.6381,0.9077) 0 0.00 0.00 50 33.33
#> [0.9077,1.177) 10 0.07 6.67 60 40.00
#> [1.177,1.447) 26 0.17 17.33 86 57.33
#> [1.447,1.716) 18 0.12 12.00 104 69.33
#> [1.716,1.986) 17 0.11 11.33 121 80.67
#> [1.986,2.255) 15 0.10 10.00 136 90.67
#> [2.255,2.525) 14 0.09 9.33 150 100.00Use the by argument to stratify each numeric variable by a categorical column:
ft_by <- fdt(iris[, c(1, 2, 5)],
k = 5,
by = "Species")
ft_by
#> setosa.Sepal.Length
#> Class limits f rf rf(%) cf cf(%)
#> [4.257,4.577) 5 0.10 10 5 10
#> [4.577,4.897) 11 0.22 22 16 32
#> [4.897,5.218) 23 0.46 46 39 78
#> [5.218,5.538) 8 0.16 16 47 94
#> [5.538,5.858) 3 0.06 6 50 100
#>
#> setosa.Sepal.Width
#> Class limits f rf rf(%) cf cf(%)
#> [2.277,2.71) 1 0.02 2 1 2
#> [2.71,3.144) 11 0.22 22 12 24
#> [3.144,3.577) 22 0.44 44 34 68
#> [3.577,4.011) 13 0.26 26 47 94
#> [4.011,4.444) 3 0.06 6 50 100
#>
#> versicolor.Sepal.Length
#> Class limits f rf rf(%) cf cf(%)
#> [4.851,5.295) 5 0.10 10 5 10
#> [5.295,5.739) 16 0.32 32 21 42
#> [5.739,6.182) 13 0.26 26 34 68
#> [6.182,6.626) 10 0.20 20 44 88
#> [6.626,7.07) 6 0.12 12 50 100
#>
#> versicolor.Sepal.Width
#> Class limits f rf rf(%) cf cf(%)
#> [1.98,2.271) 3 0.06 6 3 6
#> [2.271,2.562) 10 0.20 20 13 26
#> [2.562,2.852) 14 0.28 28 27 54
#> [2.852,3.143) 18 0.36 36 45 90
#> [3.143,3.434) 5 0.10 10 50 100
#>
#> virginica.Sepal.Length
#> Class limits f rf rf(%) cf cf(%)
#> [4.851,5.477) 1 0.02 2 1 2
#> [5.477,6.102) 10 0.20 20 11 22
#> [6.102,6.728) 22 0.44 44 33 66
#> [6.728,7.353) 10 0.20 20 43 86
#> [7.353,7.979) 7 0.14 14 50 100
#>
#> virginica.Sepal.Width
#> Class limits f rf rf(%) cf cf(%)
#> [2.178,2.51) 5 0.10 10 5 10
#> [2.51,2.842) 14 0.28 28 19 38
#> [2.842,3.174) 18 0.36 36 37 74
#> [3.174,3.506) 10 0.20 20 47 94
#> [3.506,3.838) 3 0.06 6 50 100plot(ft_iris, type = "fh")mean(ft_iris)
#> $Sepal.Length
#> [1] 5.850567
#>
#> $Sepal.Width
#> [1] 3.042258
#>
#> $Petal.Length
#> [1] 3.735911
#>
#> $Petal.Width
#> [1] 1.225742fdt_cat()set.seed(7)
fruits <- sample(c("apple",
"banana",
"cherry",
"strawberry",
"melon"),
size = 150,
replace = TRUE)
ft_cat <- fdt_cat(fruits)
ft_cat
#> Category f rf rf(%) cf cf(%)
#> banana 40 0.27 26.67 40 26.67
#> strawberry 32 0.21 21.33 72 48.00
#> cherry 28 0.19 18.67 100 66.67
#> apple 25 0.17 16.67 125 83.33
#> melon 25 0.17 16.67 150 100.00By default the table is sorted by descending frequency.
fdt_cat(fruits, sort = FALSE)print(ft_cat, round = 3)
#> Category f rf rf(%) cf cf(%)
#> banana 40 0.267 26.667 40 26.667
#> strawberry 32 0.213 21.333 72 48.000
#> cherry 28 0.187 18.667 100 66.667
#> apple 25 0.167 16.667 125 83.333
#> melon 25 0.167 16.667 150 100.000plot(ft_cat,
type = "fb",
main = "Frequency bar chart")plot(ft_cat,
type = "fd",
main = "Frequency dotchart")plot(ft_cat,
type = "pa",
main = "Pareto chart")If the original data is no longer available but the frequency table is known, make.fdt() and make.fdt_cat() rebuild complete fdt objects.
# Numerical
ft_ref <- fdt(x)
ft_new <- make.fdt(f = ft_ref$table$f,
start = ft_ref$breaks["start"],
end = ft_ref$breaks["end"])
print(ft_new,
format.classes = TRUE,
pattern = "%.2f")
#> Class limits f rf rf(%) cf cf(%)
#> [3.97, 5.26) 4 0.02 2.0 4 2.0
#> [5.26, 6.55) 4 0.02 2.0 8 4.0
#> [6.55, 7.84) 20 0.10 10.0 28 14.0
#> [7.84, 9.12) 36 0.18 18.0 64 32.0
#> [9.12, 10.41) 57 0.28 28.5 121 60.5
#> [10.41, 11.70) 44 0.22 22.0 165 82.5
#> [11.70, 12.98) 25 0.12 12.5 190 95.0
#> [12.98, 14.27) 8 0.04 4.0 198 99.0
#> [14.27, 15.56) 2 0.01 1.0 200 100.0# Categorical
ft_new_cat <- make.fdt_cat(f = ft_cat$f,
categories = ft_cat$Category)
ft_new_cat
#> Category f rf rf(%) cf cf(%)
#> banana 40 0.27 26.67 40 26.67
#> strawberry 32 0.21 21.33 72 48.00
#> cherry 28 0.19 18.67 100 66.67
#> apple 25 0.17 16.67 125 83.33
#> melon 25 0.17 16.67 150 100.00For publication-ready LaTeX tables use xtable::xtable() on any fdt object. A dedicated vignette covers this workflow in detail:
vignette("latex_fdt", package = "fdth")sessionInfo()
#> R Under development (unstable) (2026-04-23 r89955 ucrt)
#> Platform: x86_64-w64-mingw32/x64
#> Running under: Windows 10 x64 (build 19045)
#>
#> Matrix products: default
#> LAPACK version 3.12.1
#>
#> locale:
#> [1] LC_COLLATE=C LC_CTYPE=Portuguese_Brazil.utf8
#> [3] LC_MONETARY=Portuguese_Brazil.utf8 LC_NUMERIC=C
#> [5] LC_TIME=Portuguese_Brazil.utf8
#>
#> time zone: America/Bahia
#> tzcode source: internal
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] fdth_1.5-0
#>
#> loaded via a namespace (and not attached):
#> [1] digest_0.6.39 R6_2.6.1 fastmap_1.2.0 xfun_0.57
#> [5] cachem_1.1.0 knitr_1.51 htmltools_0.5.9 rmarkdown_2.31
#> [9] lifecycle_1.0.5 cli_3.6.6 xtable_1.8-8 sass_0.4.10
#> [13] jquerylib_0.1.4 compiler_4.7.0 tools_4.7.0 evaluate_1.0.5
#> [17] bslib_0.10.0 yaml_2.3.12 otel_0.2.0 rlang_1.2.0
#> [21] jsonlite_2.0.0