Fields in ridigbio

There are many fields that can be obtained via the Search and Media API. This tutorial is meant to aid in identifying these fields.

General Overview

In this demo we will cover how to:

  1. Identifying both raw and indexed record fields with idig_meta_fields()
  2. Downloading fields of interest with idig_search_records()
  3. Identifying both raw and indexed media record fields with idig_meta_fields()
  4. Downloading fields of interest with idig_search_media()

Search API Fields

To identify the raw, or verbatim, fields available with the Search API, you can run the following:

record_fields <- idig_meta_fields(type = "records", subset = "raw")
rfall <- data.frame(matrix(ncol = 3, nrow = 0))

for(i in 1:length(record_fields)){
  if(length(record_fields[[i]]) == 2){
    rf <- data.frame(matrix(ncol = 3, nrow = 0))
    rf[1, 1] <-  names(record_fields[i])
    rf[1, 2] <-  (record_fields[[i]])$type
    rf[1, 3] <-  (record_fields[[i]])$fieldName
    rfall <- rbind(rfall, rf)
   sub <- record_fields[[i]]
    for(j in 1:length(sub)){
      rf <- data.frame(matrix(ncol = 3, nrow = 0))
      rf[1, 1] <-  names(sub[j])
      rf[1, 2] <-  (sub[[j]])$type
      rf[1, 3] <-  (sub[[j]])$fieldName
      rfall <- rbind(rfall, rf)
colnames(rfall) <- c("name", "type", "fieldName")
## [1] 323

In addition to the fields provided by data providers, the Search API contains fields that have been indexed and potentially modified by iDigBio. To identify the indexed fields available with the Search API, you can run the following:

record_fields_index <- idig_meta_fields(type = "records", subset = "indexed")
rfalli <- data.frame(matrix(ncol = 3, nrow = 0))

  for(i in 1:length(record_fields_index)){
    if(length(record_fields_index[[i]]) == 2){
      rf <- data.frame(matrix(ncol = 3, nrow = 0))
      rf[1, 1] <-  names(record_fields_index[i])
      rf[1, 2] <-  (record_fields_index[[i]])$type
      rf[1, 3] <-  (record_fields_index[[i]])$fieldName
      rfalli <- rbind(rfalli, rf)
     sub <- record_fields_index[[i]]
      for(j in 1:length(sub)){
        rf <- data.frame(matrix(ncol = 3, nrow = 0))
        rf[1, 1] <-  names(sub[j])
        rf[1, 2] <-  (sub[[j]])$type
        rf[1, 3] <-  (sub[[j]])$fieldName
        rfalli <- rbind(rfalli, rf)
colnames(rfalli) <- c("name", "type", "fieldName")
## [1] 81

Note, these fields lists are different.

setequal(rfall, rfalli)
## [1] FALSE

However, they contain similar information. For example, scientificName can be found in both the raw and index fields.

##                    name   type                fieldName
## 238 dwc:sampleSizeValue string data.dwc:sampleSizeValue
##              name   type      fieldName
## 69 scientificname string scientificname

Retain records with these fields

To obtain records with these fields with the idig_search_records() function, you simply list the fields arugument as equal to these fields.

out <- idig_search_records(rq=list(scientificname="Galax urceolata"), 
                           fields = rfall$fieldName)

Media Record API Fields

To identify the raw, or verbatim, fields available with the Media Record API, you can run the following:

media_fields <- idig_meta_fields(type = "media", subset = "raw")
mfall <- data.frame(matrix(ncol = 3, nrow = 0))

for(i in 1:length(media_fields)){
    if(length(media_fields[[i]]) == 2){
      mf <- data.frame(matrix(ncol = 3, nrow = 0))
      mf[1, 1] <-  names(media_fields[i])
      mf[1, 2] <-  (media_fields[[i]])$type
      mf[1, 3] <-  (media_fields[[i]])$fieldName
      mfall <- rbind(mfall, mf)
     sub <- media_fields[[i]]
      for(j in 1:length(sub)){
        mf <- data.frame(matrix(ncol = 3, nrow = 0))
        mf[1, 1] <-  names(sub[j])
        mf[1, 2] <-  (sub[[j]])$type
        mf[1, 3] <-  (sub[[j]])$fieldName
        mfall <- rbind(mfall, mf)
colnames(mfall) <- c("name", "type", "fieldName")
## [1] 150

Like the Search API, the Media Record API has fields that have been indexed and potentially modified by iDigBio. To identify the indexed fields available with the Media Record API, you can run the following:

media_fields_index <- idig_meta_fields(type = "media", subset = "indexed")
mfalli <- data.frame(matrix(ncol = 3, nrow = 0))

for(i in 1:length(media_fields_index)){
    if(length(media_fields_index[[i]]) == 2){
      mf <- data.frame(matrix(ncol = 3, nrow = 0))
      mf[1, 1] <-  names(media_fields_index[i])
      mf[1, 2] <-  (media_fields_index[[i]])$type
      mf[1, 3] <-  (media_fields_index[[i]])$fieldName
      mfalli <- rbind(mfalli, mf)
     sub <- media_fields_index[[i]]
      for(j in 1:length(sub)){
        mf <- data.frame(matrix(ncol = 3, nrow = 0))
        mf[1, 1] <-  names(sub[j])
        mf[1, 2] <-  (sub[[j]])$type
        mf[1, 3] <-  (sub[[j]])$fieldName
        mfalli <- rbind(mfalli, mf)
colnames(mfalli) <- c("name", "type", "fieldName")
## [1] 21

Note, all index fields associated with the Media Record API are returned by default with the idig_search_media() function.

##  [1] "accessuri"      "datemodified"   "dqs"            "etag"          
##  [5] "flags"          "format"         "hasSpecimen"    "licenselogourl"
##  [9] "mediatype"      "modified"       "recordids"      "records"       
## [13] "recordset"      "rights"         "tag"            "type"          
## [17] "uuid"           "version"        "webstatement"   "xpixels"       
## [21] "ypixels"

Retain records with these fields

To obtain records with these fields with the idig_search_media() function, you simply list the fields arugument as equal to these fields.

out <- idig_search_media(rq=list(scientificname="Galax urceolata"), 
                           fields = mfalli$fieldName)