{
  "_id": "6a107a86acfb0bcc41cadba5",
  "Package": "arete",
  "Title": "Automated REtrieval from TExt",
  "Version": "0.1",
  "Date": "2025-09-29",
  "Author": "Vasco V. Branco [cre, aut]\n(<https://orcid.org/0000-0001-7797-3183>), Vaughn Shirey [ctb]\n(<https://orcid.org/0000-0002-3589-9699>), Thomas Merrien [ctb]\n(<https://orcid.org/0000-0002-0339-5656>), Pedro Cardoso [aut]\n(<https://orcid.org/0000-0001-8119-9960>)",
  "Authors@R": "c(\nperson(\"Vasco V.\", \"Branco\", role = c(\"cre\",\"aut\"),\nemail = \"vasco.branco@helsinki.fi\",\ncomment = c(ORCID = \"0000-0001-7797-3183\")),\nperson(\"Vaughn\", \"Shirey\", role = c(\"ctb\"),\nemail = \"vms55@georgetown.edu\",\ncomment = c(ORCID = \"0000-0002-3589-9699\")),\nperson(\"Thomas\", \"Merrien\", role = c(\"ctb\"),\nemail = \"thomas.merrien@helsinki.fi\",\ncomment = c(ORCID = \"0000-0002-0339-5656\")),\nperson(\"Pedro\", \"Cardoso\", role = c(\"aut\"),\nemail = \"pedro.cardoso@helsinki.fi\",\ncomment = c(ORCID = \"0000-0001-8119-9960\"))\n)",
  "Maintainer": "Vasco V. Branco <vasco.branco@helsinki.fi>",
  "Description": "A Python based pipeline for extraction of species\noccurrence data through the usage of large language models.\nIncludes validation tools designed to handle model\nhallucinations for a scientific, rigorous use of LLM. Currently\nsupports usage of GPT with more planned, including local and\nnon-proprietary models. For more details on the methodology\nused please consult the references listed under each function,\nsuch as Kent, A. et al. (1995) <doi:10.1002/asi.5090060209>,\nvan Rijsbergen, C.J. (1979, ISBN:978-0408709293, Levenshtein,\nV.I. (1966)\n<https://nymity.ch/sybilhunting/pdf/Levenshtein1966a.pdf> and\nKlaus Krippendorff (2011)\n<https://repository.upenn.edu/handle/20.500.14332/2089>.",
  "License": "GPL-3",
  "Encoding": "UTF-8",
  "RoxygenNote": "7.3.2",
  "NeedsCompilation": "no",
  "Packaged": {
    "Date": "2026-05-06 08:05:57 UTC",
    "User": "root"
  },
  "Config/Needs/website": "rmarkdown",
  "VignetteBuilder": "knitr",
  "Config/pak/sysreqs": "libabsl-dev cmake libfontconfig1-dev\nlibfreetype6-dev libfribidi-dev libgdal-dev gdal-bin\nlibgeos-dev libglpk-dev libgmp3-dev make libharfbuzz-dev\ndefault-jdk libicu-dev libjpeg-dev libpng-dev libuv1-dev\nlibxml2-dev libssl-dev libpoppler-cpp-dev poppler-data\nlibproj-dev python3 libsqlite3-dev libudunits2-dev",
  "Repository": "https://vascobranco.r-universe.dev",
  "Date/Publication": "2025-11-06 17:51:06 UTC",
  "RemoteUrl": "https://github.com/vascobranco/arete",
  "RemoteRef": "HEAD",
  "RemoteSha": "cc738ca101f4ea25df71db19945a2b0c59437ce1",
  "MD5sum": "5e57bba95bf0ad0e3444f230d23664e5",
  "_user": "vascobranco",
  "_type": "src",
  "_file": "arete_0.1.tar.gz",
  "_fileid": "d27b56b6f824b2ed3f1a511d47024f36e4f767061e1e71b30c63b1d522303786",
  "_filesize": 1083653,
  "_sha256": "d27b56b6f824b2ed3f1a511d47024f36e4f767061e1e71b30c63b1d522303786",
  "_created": "2026-05-06T08:05:57.000Z",
  "_published": "2026-05-22T15:47:18.606Z",
  "_distro": "noble",
  "_jobs": [
    {
      "job": 77413721460,
      "time": 422,
      "config": "linux-devel-x86_64",
      "r": "4.7.0",
      "check": "NOTE",
      "artifact": "6825527898"
    },
    {
      "job": 77413721348,
      "time": 394,
      "config": "linux-release-x86_64",
      "r": "4.6.0",
      "check": "NOTE",
      "artifact": "6825521372"
    },
    {
      "job": 77413721738,
      "time": 458,
      "config": "macos-oldrel-arm64",
      "r": "4.5.3",
      "check": "NOTE",
      "artifact": "6825530350"
    },
    {
      "job": 77413721695,
      "time": 677,
      "config": "macos-release-arm64",
      "r": "4.6.0",
      "check": "NOTE",
      "artifact": "6825581883"
    },
    {
      "job": 77413720889,
      "time": 321,
      "config": "source",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "6825418728"
    },
    {
      "job": 77413720800,
      "time": 199,
      "config": "wasm-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7164645821"
    },
    {
      "job": 77413721296,
      "time": 766,
      "config": "windows-devel",
      "r": "4.7.0",
      "check": "NOTE",
      "artifact": "6825615281"
    },
    {
      "job": 77413721710,
      "time": 800,
      "config": "windows-oldrel",
      "r": "4.5.3",
      "check": "NOTE",
      "artifact": "6825623120"
    },
    {
      "job": 77413721441,
      "time": 820,
      "config": "windows-release",
      "r": "4.6.0",
      "check": "NOTE",
      "artifact": "6825628637"
    }
  ],
  "_buildurl": "https://github.com/r-universe/vascobranco/actions/runs/25423469812",
  "_status": "success",
  "_host": "GitHub-Actions",
  "_upstream": "https://github.com/vascobranco/arete",
  "_commit": {
    "id": "cc738ca101f4ea25df71db19945a2b0c59437ce1",
    "author": "Vasco Branco <bio.vbranco@gmail.com>",
    "committer": "GitHub <noreply@github.com>",
    "message": "Update README.md",
    "time": 1762451466
  },
  "_maintainer": {
    "name": "Vasco V. Branco",
    "email": "vasco.branco@helsinki.fi",
    "login": "vascobranco",
    "twitter": "@VV_Branco",
    "uuid": 56229977,
    "orcid": "0000-0001-7797-3183"
  },
  "_registered": true,
  "_dependencies": [
    {
      "package": "R",
      "version": ">= 4.3.0",
      "role": "Depends"
    },
    {
      "package": "terra",
      "role": "Imports"
    },
    {
      "package": "cld2",
      "role": "Imports"
    },
    {
      "package": "stringr",
      "role": "Imports"
    },
    {
      "package": "reticulate",
      "role": "Imports"
    },
    {
      "package": "pdftools",
      "role": "Imports"
    },
    {
      "package": "fedmatch",
      "role": "Imports"
    },
    {
      "package": "kableExtra",
      "role": "Imports"
    },
    {
      "package": "dplyr",
      "role": "Imports"
    },
    {
      "package": "gecko",
      "role": "Imports"
    },
    {
      "package": "methods",
      "role": "Imports"
    },
    {
      "package": "ggplot2",
      "role": "Imports"
    },
    {
      "package": "jsonlite",
      "role": "Imports"
    },
    {
      "package": "googledrive",
      "role": "Imports"
    },
    {
      "package": "irr",
      "role": "Imports"
    },
    {
      "package": "rmarkdown",
      "role": "Imports"
    },
    {
      "package": "knitr",
      "role": "Suggests"
    }
  ],
  "_owner": "vascobranco",
  "_selfowned": true,
  "_usedby": 0,
  "_updates": [
    {
      "week": "2025-45",
      "n": 5
    }
  ],
  "_tags": [],
  "_topics": [
    "ecology",
    "large-language-models",
    "wildlife-conservation"
  ],
  "_stars": 1,
  "_contributors": [
    {
      "user": "vascobranco",
      "count": 5,
      "uuid": 56229977
    }
  ],
  "_userbio": {
    "uuid": 56229977,
    "type": "user",
    "name": "Vasco Branco",
    "description": "Researcher and R programmer uncovering the secrets of the flesh."
  },
  "_downloads": {
    "count": 217,
    "source": "https://cranlogs.r-pkg.org/downloads/total/last-month/arete"
  },
  "_devurl": "https://github.com/vascobranco/arete",
  "_searchresults": 3,
  "_rbuild": "4.6.0",
  "_assets": [
    "extra/arete.html",
    "extra/citation.cff",
    "extra/citation.html",
    "extra/citation.json",
    "extra/citation.txt",
    "extra/contents.json",
    "extra/readme.html",
    "extra/readme.md",
    "manual.pdf"
  ],
  "_homeurl": "https://github.com/vascobranco/arete",
  "_realowner": "vascobranco",
  "_cranurl": true,
  "_releases": [
    {
      "version": "0.1",
      "date": "2025-10-20"
    }
  ],
  "_exports": [
    "arete_data",
    "arete_setup",
    "aux_string_to_coords",
    "check_lang",
    "compare_IUCN",
    "create_training_data",
    "file_comparison",
    "gazetteer",
    "get_geodata",
    "install_OCR_packages",
    "install_python_packages",
    "labels",
    "labels_unique",
    "performance_report",
    "process_document",
    "process_species_names",
    "string_to_coords",
    "webanno_open",
    "webanno_summary"
  ],
  "_help": [
    {
      "page": "arete_data",
      "title": "Example data packaged with gecko",
      "topics": [
        "arete_data"
      ]
    },
    {
      "page": "ARETE_package",
      "title": "Summary of methods in the arete package",
      "topics": [
        "arete",
        "arete_package"
      ]
    },
    {
      "page": "arete_setup",
      "title": "Setup arete",
      "topics": [
        "arete_setup"
      ]
    },
    {
      "page": "aux_string_to_coords",
      "title": "Mechanical coordinate conversion",
      "topics": [
        "aux_string_to_coords"
      ]
    },
    {
      "page": "check_lang",
      "title": "Check if text is language-appropriate",
      "topics": [
        "check_lang"
      ]
    },
    {
      "page": "compare_IUCN",
      "title": "Check EOO differences between two sets of coordinates",
      "topics": [
        "compare_IUCN"
      ]
    },
    {
      "page": "create_training_data",
      "title": "Create training data for GPT",
      "topics": [
        "create_training_data"
      ]
    },
    {
      "page": "file_comparison",
      "title": "Compare the contents of two WebAnno tsv files.",
      "topics": [
        "file_comparison"
      ]
    },
    {
      "page": "gazetteer",
      "title": "Get geographic coordinates from localities",
      "topics": [
        "gazetteer"
      ]
    },
    {
      "page": "get_geodata",
      "title": "Call a Large Language Model (LLM) to extract species geographic data",
      "topics": [
        "get_geodata"
      ]
    },
    {
      "page": "install_OCR_packages",
      "title": "Update OCR dependencies",
      "topics": [
        "install_OCR_packages"
      ]
    },
    {
      "page": "install_python_packages",
      "title": "Update python dependencies",
      "topics": [
        "install_python_packages"
      ]
    },
    {
      "page": "labels",
      "title": "Labels for model training",
      "topics": [
        "labels"
      ]
    },
    {
      "page": "labels_unique",
      "title": "Get the unique labels of a WebAnno document",
      "topics": [
        "labels_unique"
      ]
    },
    {
      "page": "OCR_document",
      "title": "Scan PDF with optical character recognition (OCR)",
      "topics": [
        "OCR_document"
      ]
    },
    {
      "page": "performance_report",
      "title": "Evaluate the performance of a LLM",
      "topics": [
        "performance_report"
      ]
    },
    {
      "page": "process_document",
      "title": "Extract and process text from a document",
      "topics": [
        "process_document"
      ]
    },
    {
      "page": "process_species_names",
      "title": "Process and fix species names",
      "topics": [
        "process_species_names"
      ]
    },
    {
      "page": "string_to_coords",
      "title": "Convert strings to numerical coordinates",
      "topics": [
        "string_to_coords"
      ]
    },
    {
      "page": "webanno_open",
      "title": "Open a WebAnno TSV v3.3 file.",
      "topics": [
        "webanno_open"
      ]
    },
    {
      "page": "webanno_summary",
      "title": "Summarize the contents of a group of WebAnno tsv files",
      "topics": [
        "webanno_summary"
      ]
    },
    {
      "page": "WebAnnoTSV-class",
      "title": "WebAnno TSV v3.3 class creator.",
      "topics": [
        "WebAnnoTSV-class",
        "webanno_creator"
      ]
    }
  ],
  "_pkglogo": "https://github.com/vascobranco/arete/raw/HEAD/man/figures/logo.png",
  "_readme": "https://github.com/vascobranco/arete/raw/HEAD/README.md",
  "_rundeps": [
    "abind",
    "ape",
    "askpass",
    "base64enc",
    "BAT",
    "BH",
    "biomod2",
    "bit",
    "bit64",
    "boot",
    "bslib",
    "cachem",
    "caret",
    "class",
    "classInt",
    "cld2",
    "cli",
    "clock",
    "cluster",
    "clusterGeneration",
    "coda",
    "codetools",
    "combinat",
    "cpp11",
    "crayon",
    "curl",
    "data.table",
    "DBI",
    "DEoptim",
    "diagram",
    "digest",
    "dismo",
    "doParallel",
    "dplyr",
    "e1071",
    "evaluate",
    "expm",
    "farver",
    "fastcluster",
    "fastmap",
    "fastmatch",
    "fedmatch",
    "FNN",
    "fontawesome",
    "forcats",
    "foreach",
    "fs",
    "future",
    "future.apply",
    "gargle",
    "gbm",
    "gdistance",
    "gecko",
    "generics",
    "geometry",
    "geosphere",
    "ggplot2",
    "globals",
    "glue",
    "googledrive",
    "gower",
    "gtable",
    "hardhat",
    "here",
    "highr",
    "hitandrun",
    "hms",
    "htmltools",
    "httr",
    "hypervolume",
    "igraph",
    "ipred",
    "irr",
    "isoband",
    "iterators",
    "jquerylib",
    "jsonlite",
    "kableExtra",
    "kernlab",
    "KernSmooth",
    "knitr",
    "ks",
    "labeling",
    "lattice",
    "lava",
    "lifecycle",
    "linprog",
    "listenv",
    "lpSolve",
    "lubridate",
    "magic",
    "magrittr",
    "maps",
    "MASS",
    "Matrix",
    "mclust",
    "memoise",
    "mgcv",
    "mime",
    "mnormt",
    "ModelMetrics",
    "multicool",
    "mvtnorm",
    "nlme",
    "nls2",
    "nnet",
    "numDeriv",
    "openssl",
    "optimParallel",
    "palmerpenguins",
    "parallelly",
    "pbapply",
    "pdftools",
    "pdist",
    "permute",
    "phangorn",
    "phytools",
    "pillar",
    "pkgconfig",
    "PlotTools",
    "plyr",
    "png",
    "pracma",
    "predicts",
    "PresenceAbsence",
    "prettyunits",
    "pROC",
    "prodlim",
    "progress",
    "progressr",
    "proto",
    "proxy",
    "purrr",
    "qpdf",
    "quadprog",
    "R6",
    "rappdirs",
    "raster",
    "rbibutils",
    "rcdd",
    "RColorBrewer",
    "Rcpp",
    "RcppArmadillo",
    "RcppProgress",
    "RcppTOML",
    "Rdpack",
    "recipes",
    "red",
    "reshape",
    "reshape2",
    "reticulate",
    "rlang",
    "rmarkdown",
    "rpart",
    "rprojroot",
    "rstudioapi",
    "s2",
    "S7",
    "sass",
    "scales",
    "scatterplot3d",
    "sf",
    "shape",
    "SnowballC",
    "sp",
    "sparsevctrs",
    "SQUAREM",
    "stringdist",
    "stringi",
    "stringr",
    "survival",
    "svglite",
    "sys",
    "systemfonts",
    "terra",
    "textshaping",
    "tibble",
    "tidyr",
    "tidyselect",
    "timechange",
    "timeDate",
    "tinytex",
    "TreeTools",
    "tzdb",
    "units",
    "utf8",
    "uuid",
    "vctrs",
    "vegan",
    "viridisLite",
    "withr",
    "wk",
    "xfun",
    "xml2",
    "yaml"
  ],
  "_vignettes": [
    {
      "source": "request_example.Rmd",
      "filename": "request_example.html",
      "title": "Package workflow",
      "engine": "knitr::rmarkdown",
      "headings": [
        "Data extraction",
        "Process coordinates",
        "Process species names",
        "Process outliers",
        "Create performance reports"
      ],
      "created": "2025-11-06 17:02:00",
      "modified": "2025-11-06 17:02:00",
      "commits": 1
    }
  ],
  "_score": 3.6989700043360187,
  "_indexed": true,
  "_nocasepkg": "arete",
  "_universes": [
    "vascobranco"
  ],
  "_binaries": [
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "0.1",
      "date": "2026-05-06T08:09:57.000Z",
      "distro": "noble",
      "commit": "cc738ca101f4ea25df71db19945a2b0c59437ce1",
      "fileid": "e9a8b32a7921bf82c5cefa36afc836e14e620d6bdb5e0d159f4d18d6c15af895",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/vascobranco/actions/runs/25423469812"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "0.1",
      "date": "2026-05-06T08:09:49.000Z",
      "distro": "noble",
      "commit": "cc738ca101f4ea25df71db19945a2b0c59437ce1",
      "fileid": "a2feb3c1e0c8b64a9e21cda56454dfb782a33fb62baec402133e11feee048c4f",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/vascobranco/actions/runs/25423469812"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "0.1",
      "date": "2026-05-06T08:11:42.000Z",
      "commit": "cc738ca101f4ea25df71db19945a2b0c59437ce1",
      "fileid": "c5acfd312d3e9aaaf97ed3153e8e1f5f1deca948f67bc126c67ac2e14fa3117d",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/vascobranco/actions/runs/25423469812"
    },
    {
      "r": "4.6.0",
      "os": "mac",
      "version": "0.1",
      "date": "2026-05-06T08:13:29.000Z",
      "commit": "cc738ca101f4ea25df71db19945a2b0c59437ce1",
      "fileid": "47095d9ed626c21ec58b631a815fce91a92a6835aabbeacc5f95997fd1c5eacf",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/vascobranco/actions/runs/25423469812"
    },
    {
      "r": "4.7.0",
      "os": "win",
      "version": "0.1",
      "date": "2026-05-06T08:15:32.000Z",
      "commit": "cc738ca101f4ea25df71db19945a2b0c59437ce1",
      "fileid": "7ac0d48dcd714b4dc8de007eaed27c305a9e7efd9b9dc10ac0d57f228f3a1051",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/vascobranco/actions/runs/25423469812"
    },
    {
      "r": "4.5.3",
      "os": "win",
      "version": "0.1",
      "date": "2026-05-06T08:16:00.000Z",
      "commit": "cc738ca101f4ea25df71db19945a2b0c59437ce1",
      "fileid": "b13a072deb56263fd29acfd63ca68278ff7c4660fda8af09a7dda752ad8759b7",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/vascobranco/actions/runs/25423469812"
    },
    {
      "r": "4.6.0",
      "os": "win",
      "version": "0.1",
      "date": "2026-05-06T08:15:23.000Z",
      "commit": "cc738ca101f4ea25df71db19945a2b0c59437ce1",
      "fileid": "9eac2bc1a656b25a0596aca7f882a77c3bbbf7ae521c146eafc2705f4371237c",
      "status": "success",
      "check": "NOTE",
      "buildurl": "https://github.com/r-universe/vascobranco/actions/runs/25423469812"
    },
    {
      "r": "4.6.0",
      "os": "wasm",
      "version": "0.1",
      "date": "2026-05-22T15:46:52.000Z",
      "commit": "cc738ca101f4ea25df71db19945a2b0c59437ce1",
      "fileid": "02b040dfed9b627a987ad02f0715714548e7fa679c4dbd1c965a02acfd76af43",
      "status": "success",
      "buildurl": "https://github.com/r-universe/vascobranco/actions/runs/25423469812"
    }
  ]
}