## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  eval = any(dir.exists(c("working_example_data", "benchmark_data", "new_benchmark_data", "topic_data", "valid_data", "new_stage_data"))),
  comment = "#>",
  warning = FALSE,
  fig.width = 6,
  fig.height = 6
  )

## ----results = FALSE, message=FALSE, warning=FALSE----------------------------
#install.packages("CiteSource")
library(CiteSource)

## -----------------------------------------------------------------------------
citation_files <- list.files(path = "valid_data", pattern = "\\.ris", full.names = TRUE)
citation_files

citations <- read_citations(citation_files,
                            cite_sources = c(NA, "psycinfo", "pubmed", "wos"),
                            cite_labels  = c("benchmark", "search", "search", "search"),
                            tag_naming   = "best_guess")

## ----results = FALSE, message=FALSE, warning=FALSE----------------------------
unique_citations <- dedup_citations(citations)
n_unique         <- count_unique(unique_citations)
source_comparison <- compare_sources(unique_citations, comp_type = "sources")

## -----------------------------------------------------------------------------
plot_source_overlap_heatmap(source_comparison)

## -----------------------------------------------------------------------------
plot_source_overlap_heatmap(source_comparison, plot_type = "percentages")

## -----------------------------------------------------------------------------
plot_source_overlap_upset(source_comparison, decreasing = c(TRUE, TRUE))

## -----------------------------------------------------------------------------
plot_contributions(n_unique, center = TRUE)

## -----------------------------------------------------------------------------
unique_psycinfo <- n_unique |>
  dplyr::filter(cite_source == "psycinfo", unique == TRUE) |>
  dplyr::inner_join(unique_citations, by = "duplicate_id")

unique_pubmed <- n_unique |>
  dplyr::filter(cite_source == "pubmed", unique == TRUE) |>
  dplyr::inner_join(unique_citations, by = "duplicate_id")

unique_wos <- n_unique |>
  dplyr::filter(cite_source == "wos", unique == TRUE) |>
  dplyr::inner_join(unique_citations, by = "duplicate_id")

# To export for manual review:
# export_csv(unique_pubmed, "pubmed_unique.csv")

## -----------------------------------------------------------------------------
unique_citations |>
  dplyr::filter(stringr::str_detect(cite_label, "benchmark")) |>
  record_level_table(return = "DT")

## -----------------------------------------------------------------------------
citation_summary_table(unique_citations, screening_label = "benchmark")

## -----------------------------------------------------------------------------
#export_csv(unique_citations, filename = "unique-by-source.csv", separate = "cite_source")
#export_ris(unique_citations, filename = "unique_citations.ris", source_field = "DB", label_field = "N1")
#export_bib(unique_citations, filename = "unique_citations.bib", include = c("sources", "labels", "strings"))
#reimport_csv("unique-by-source.csv")

