% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/occ_data.R
\name{occ_data}
\alias{occ_data}
\title{Search for GBIF occurrences - simplified for speed}
\usage{
occ_data(
  taxonKey = NULL,
  scientificName = NULL,
  country = NULL,
  publishingCountry = NULL,
  hasCoordinate = NULL,
  typeStatus = NULL,
  recordNumber = NULL,
  lastInterpreted = NULL,
  continent = NULL,
  geometry = NULL,
  geom_big = "asis",
  geom_size = 40,
  geom_n = 10,
  recordedBy = NULL,
  recordedByID = NULL,
  identifiedByID = NULL,
  basisOfRecord = NULL,
  datasetKey = NULL,
  eventDate = NULL,
  catalogNumber = NULL,
  year = NULL,
  month = NULL,
  decimalLatitude = NULL,
  decimalLongitude = NULL,
  elevation = NULL,
  depth = NULL,
  institutionCode = NULL,
  collectionCode = NULL,
  hasGeospatialIssue = NULL,
  issue = NULL,
  search = NULL,
  mediaType = NULL,
  subgenusKey = NULL,
  repatriated = NULL,
  phylumKey = NULL,
  kingdomKey = NULL,
  classKey = NULL,
  orderKey = NULL,
  familyKey = NULL,
  genusKey = NULL,
  speciesKey = NULL,
  establishmentMeans = NULL,
  degreeOfEstablishment = NULL,
  protocol = NULL,
  license = NULL,
  organismId = NULL,
  publishingOrg = NULL,
  stateProvince = NULL,
  waterBody = NULL,
  locality = NULL,
  limit = 500,
  start = 0,
  skip_validate = TRUE,
  occurrenceStatus = "PRESENT",
  gadmGid = NULL,
  coordinateUncertaintyInMeters = NULL,
  verbatimScientificName = NULL,
  eventId = NULL,
  identifiedBy = NULL,
  networkKey = NULL,
  verbatimTaxonId = NULL,
  occurrenceId = NULL,
  organismQuantity = NULL,
  organismQuantityType = NULL,
  relativeOrganismQuantity = NULL,
  iucnRedListCategory = NULL,
  lifeStage = NULL,
  isInCluster = NULL,
  curlopts = list()
)
}
\arguments{
\item{taxonKey}{(numeric) A taxon key from the GBIF backbone. All included
and synonym taxa are included in the search, so a search for aves with
taxononKey=212 will match all birds, no matter which species. You can pass
many keys to \code{occ_search(taxonKey=c(1,212))}.}

\item{scientificName}{A scientific name from the GBIF backbone. All included
and synonym taxa are included in the search.}

\item{country}{(character) The 2-letter country code (ISO-3166-1)
in which the occurrence was recorded. \code{enumeration_country()}.}

\item{publishingCountry}{The 2-letter country code (as per ISO-3166-1) of
the country in which the occurrence was recorded. See
\code{enumeration_country()}.}

\item{hasCoordinate}{(logical) Return only occurrence records with lat/long
data (\code{TRUE}) or all records (\code{FALSE}, default).}

\item{typeStatus}{Type status of the specimen. One of many
\href{https://www.gbif.org/occurrence/search?type_status=PARATYPE}{options}.}

\item{recordNumber}{Number recorded by collector of the data, different from
GBIF record number.}

\item{lastInterpreted}{Date the record was last modified in GBIF, in ISO
8601 format: yyyy, yyyy-MM, yyyy-MM-dd, or MM-dd.  Supports range queries,
'smaller,larger' (e.g., '1990,1991', whereas '1991,1990' wouldn't work).}

\item{continent}{The source supplied continent.

\itemize{
\item "africa"
\item "antarctica"
\item "asia"
\item "europe"
\item "north_america"
\item "oceania"
\item "south_america"
}

Continent is not inferred but only populated if provided by the
dataset publisher. Applying this filter may exclude many relevant records.}

\item{geometry}{(character) Searches for occurrences inside a polygon in
Well Known Text (WKT) format. A WKT shape written as either

\itemize{
\item "POINT"
\item "LINESTRING"
\item "LINEARRING"
\item "POLYGON"
\item "MULTIPOLYGON"
}

For Example, "POLYGON((37.08 46.86,38.06 46.86,38.06 47.28,37.08 47.28,
37.0 46.8))". See also the section \strong{WKT} below.}

\item{geom_big}{(character) One of "axe", "bbox", or "asis" (default).}

\item{geom_size}{(integer) An integer indicating size of the cell. Default:
40.}

\item{geom_n}{(integer) An integer indicating number of cells in each
dimension. Default: 10.}

\item{recordedBy}{(character) The person who recorded the occurrence.}

\item{recordedByID}{(character) Identifier (e.g. ORCID) for the person who
recorded the occurrence}

\item{identifiedByID}{(character) Identifier (e.g. ORCID) for the person who
provided the taxonomic identification of the occurrence.}

\item{basisOfRecord}{(character) The specific nature of the data record. See
\href{https://gbif.github.io/parsers/apidocs/org/gbif/api/vocabulary/BasisOfRecord.html}{here}.

\itemize{
\item "FOSSIL_SPECIMEN"
\item "HUMAN_OBSERVATION"
\item "MATERIAL_CITATION"
\item "MATERIAL_SAMPLE"
\item "LIVING_SPECIMEN"
\item "MACHINE_OBSERVATION"
\item "OBSERVATION"
\item "PRESERVED_SPECIMEN"
\item "OCCURRENCE"
}}

\item{datasetKey}{(character) The occurrence dataset uuid key. That can be
found in the dataset page url. For example, "7e380070-f762-11e1-a439-00145
eb45e9a" is the key for \href{https://www.gbif.org/dataset/7e380070-f762-11e1-a439-00145eb45e9a}{Natural History Museum (London) Collection Specimens}.}

\item{eventDate}{(character) Occurrence date in ISO 8601 format: yyyy,
yyyy-MM, yyyy-MM-dd, or MM-dd. Supports range queries, 'smaller,larger'
('1990,1991', whereas '1991,1990' wouldn't work).}

\item{catalogNumber}{(character) An identifier of any form assigned by the
source within a physical collection or digital dataset for the record which
may not unique, but should be fairly unique in combination with the
institution and collection code.}

\item{year}{The 4 digit year. A year of 98 will be interpreted as AD 98.
Supports range queries, 'smaller,larger' (e.g., '1990,1991', whereas 1991,
1990' wouldn't work).}

\item{month}{The month of the year, starting with 1 for January. Supports
range queries, 'smaller,larger' (e.g., '1,2', whereas '2,1' wouldn't work).}

\item{decimalLatitude}{Latitude in decimals between -90 and 90 based on
WGS84. Supports range queries, 'smaller,larger' (e.g., '25,30', whereas
'30,25' wouldn't work).}

\item{decimalLongitude}{Longitude in decimals between -180 and 180 based on
WGS84. Supports range queries (e.g., '-0.4,-0.2', whereas '-0.2,-0.4'
wouldn't work).}

\item{elevation}{Elevation in meters above sea level. Supports range
queries, 'smaller,larger' (e.g., '5,30', whereas '30,5' wouldn't work).}

\item{depth}{Depth in meters relative to elevation. For example 10 meters
below a lake surface with given elevation. Supports range queries,
'smaller,larger' (e.g., '5,30', whereas '30,5' wouldn't work).}

\item{institutionCode}{An identifier of any form assigned by the source to
identify the institution the record belongs to.}

\item{collectionCode}{(character) An identifier of any form assigned by the
source to identify the physical collection or digital dataset uniquely within
the text of an institution.}

\item{hasGeospatialIssue}{(logical) Includes/excludes occurrence records
which contain spatial issues (as determined in our record interpretation),
i.e. \code{hasGeospatialIssue=TRUE} returns only those records with spatial
issues while \code{hasGeospatialIssue=FALSE} includes only records without
spatial issues. The absence of this parameter returns any record with or
without spatial issues.}

\item{issue}{(character) One or more of many possible issues with each
occurrence record. Issues passed to this parameter filter results by
the issue. One of many \href{https://gbif.github.io/gbif-api/apidocs/org/gbif/api/vocabulary/OccurrenceIssue.html}{options}.
See \href{https://data-blog.gbif.org/post/issues-and-flags/}{here} for definitions.}

\item{search}{(character) Query terms. The value for this parameter can be a
simple word or a phrase. For example, \href{https://www.gbif.org/occurrence/search?q=puma}{search="puma"}}

\item{mediaType}{(character) Media type of "MovingImage", "Sound", or
"StillImage".}

\item{subgenusKey}{(numeric) Subgenus classification key.}

\item{repatriated}{(character) Searches for records whose publishing country
is different to the country where the record was recorded in.}

\item{phylumKey}{(numeric) Phylum classification key.}

\item{kingdomKey}{(numeric) Kingdom classification key.}

\item{classKey}{(numeric) Class classification key.}

\item{orderKey}{(numeric) Order classification key.}

\item{familyKey}{(numeric) Family classification key.}

\item{genusKey}{(numeric) Genus classification key.}

\item{speciesKey}{(numeric) Species classification key.}

\item{establishmentMeans}{(character) provides information about whether an
organism or organisms have been introduced to a given place and time through
the direct or indirect activity of modern humans.

\itemize{
\item "Introduced"
\item "Native"
\item "NativeReintroduced"
\item "Vagrant"
\item "Uncertain"
\item "IntroducedAssistedColonisation"
}}

\item{degreeOfEstablishment}{(character) Provides information about degree to
which an Organism survives, reproduces, and expands its range at the given
place and time. One of many \href{https://www.gbif.org/occurrence/search?advanced=1&degree_of_establishment=Managed}{options}.}

\item{protocol}{(character) Protocol or mechanism used to provide the
occurrence record. One of many \href{https://www.gbif.org/occurrence/search?protocol=DWC_ARCHIVE&advanced=1}{options}.}

\item{license}{(character) The type license applied to the dataset or record.

\itemize{
\item "CC0_1_0"
\item "CC_BY_4_0"
\item "CC_BY_NC_4_0"
}}

\item{organismId}{(numeric) An identifier for the Organism instance (as
opposed to a particular digital record of the Organism). May be a globally
unique identifier or an identifier specific to the data set.}

\item{publishingOrg}{(character) The publishing organization key (a UUID).}

\item{stateProvince}{(character) The name of the next smaller administrative
region than country (state, province, canton, department, region, etc.) in
which the Location occurs.}

\item{waterBody}{(character) The name of the water body in which the
locations occur}

\item{locality}{(character) The specific description of the place.}

\item{limit}{Number of records to return. Default: 500. Note that the per
request maximum is 300, but since we set it at 500 for the function, we
do two requests to get you the 500 records (if there are that many).
Note that there is a hard maximum of 100,000, which is calculated as the
\code{limit+start}, so \code{start=99,000} and \code{limit=2000} won't work}

\item{start}{Record number to start at. Use in combination with limit to
page through results. Note that we do the paging internally for you, but
you can manually set the \code{start} parameter}

\item{skip_validate}{(logical) whether to skip \code{wellknown::validate_wkt}
call or not. passed down to \code{\link[=check_wkt]{check_wkt()}}. Default: \code{TRUE}}

\item{occurrenceStatus}{(character)  Default is "PRESENT". Specify whether
search should return "PRESENT" or "ABSENT" data.}

\item{gadmGid}{(character) The gadm id of the area occurrences are desired
from. https://gadm.org/.}

\item{coordinateUncertaintyInMeters}{A number or range between 0-1,000,000
which specifies the desired coordinate uncertainty. A coordinateUncertainty
InMeters=1000 will be interpreted all records with exactly 1000m. Supports
range queries, 'smaller,larger' (e.g., '1000,10000', whereas '10000,1000'
wouldn't work).}

\item{verbatimScientificName}{(character) Scientific name as provided by the
source.}

\item{eventId}{(character) identifier(s) for a sampling event.}

\item{identifiedBy}{(character)  names of people, groups, or organizations.}

\item{networkKey}{(character) The occurrence network key (a uuid)
who assigned the Taxon to the subject.}

\item{verbatimTaxonId}{(character) The taxon identifier provided to GBIF by
the data publisher.}

\item{occurrenceId}{(character) occurrence id from source.}

\item{organismQuantity}{A number or range which
specifies the desired organism quantity. An organismQuantity=5
will be interpreted all records with exactly 5. Supports range queries,
smaller,larger (e.g., '5,20', whereas '20,5' wouldn't work).}

\item{organismQuantityType}{(character) The type of quantification system
used for the quantity of organisms. For example, "individuals" or "biomass".}

\item{relativeOrganismQuantity}{(numeric) A relativeOrganismQuantity=0.1 will
be interpreted all records with exactly 0.1 The relative measurement of the
quantity of the organism (a number between 0-1). Supports range queries,
"smaller,larger" (e.g., '0.1,0.5', whereas '0.5,0.1' wouldn't work).}

\item{iucnRedListCategory}{(character) The IUCN threat status category.

\itemize{
\item "NE" (Not Evaluated)
\item "DD" (Data Deficient)
\item "LC" (Least Concern)
\item "NT" (Near Threatened)
\item "VU" (Vulnerable)
\item "EN" (Endangered)
\item "CR" (Critically Endangered)
\item "EX" (Extinct)
\item "EW" (Extinct in the Wild)
}}

\item{lifeStage}{(character) the life stage of the occurrence. One of many
\href{https://www.gbif.org/occurrence/search?advanced=1&life_stage=Tadpole}{options}.}

\item{isInCluster}{(logical) identify potentially related records on GBIF.}

\item{curlopts}{list of named curl options passed on to
\code{\link[crul]{HttpClient}}. see \code{curl::curl_options}
for curl options}
}
\value{
An object of class \code{gbif_data}, which is a S3 class list, with
slots for metadata (\code{meta}) and the occurrence data itself
(\code{data}), and with attributes listing the user supplied arguments
and whether it was a "single" or "many" search; that is, if you supply
two values of the \code{datasetKey} parameter to searches are done, and
it's a "many". \code{meta} is a list of length four with offset, limit,
endOfRecords and count fields. \code{data} is a tibble (aka data.frame)
}
\description{
Search for GBIF occurrences - simplified for speed
}
\note{
Maximum number of records you can get with this function is 100,000.
See https://www.gbif.org/developer/occurrence
}
\section{Multiple values passed to a parameter}{

There are some parameters you can pass multiple values to in a vector,
each value of which produces a different request (multiple different
requests = c("a","b")). Some parameters allow multiple values to be passed
in the same request (multiple same request = "a;b") in a semicolon separated
string (e.g., 'a;b'); if given we'll do a single request with that parameter
repeated for each value given (e.g., \code{foo=a&foo=b} if the parameter
is \code{foo}).

See article \href{https://docs.ropensci.org/rgbif/articles/multiple_values.html}{Multiple Values}.
}

\section{Hierarchies}{

Hierarchies are returned with each occurrence object. There is no
option to return them from the API. However, within the \code{occ_search}
function you can select whether to return just hierarchies, just data, all
of data and hierarchies and metadata, or just metadata. If all hierarchies
are the same we just return one for you.
}

\section{curl debugging}{

You can pass parameters not defined in this function into the call to
the GBIF API to control things about the call itself using \code{curlopts}.
See an example below that passes in the \code{verbose} function to get
details on the http call.
}

\section{WKT}{

Examples of valid WKT objects:
\itemize{
\item 'POLYGON((-19.5 34.1, 27.8 34.1, 35.9 68.1, -25.3 68.1, -19.5 34.1))'
\item 'MULTIPOLYGON(((-123 38,-116 38,-116 43,-123 43,-123 38)),((-97 41,-93 41,-93 45,-97 45,-97 41)))'
\item 'POINT(-120 40)'
\item 'LINESTRING(3 4,10 50,20 25)'
}

Note that GBIF expects counter-clockwise winding order for WKT. You can
supply clockwise WKT, but GBIF treats it as an exclusion, so you get all
data not inside the WKT area. \code{\link[=occ_download]{occ_download()}} behaves differently
in that you should simply get no data back at all with clockwise WKT.
}

\section{Long WKT}{

Options for handling long WKT strings:
Note that long WKT strings are specially handled when using \code{\link{occ_search}} or
\code{\link{occ_data}}. Here are the three options for long WKT strings (> 1500 characters),
set one of these three via the parameter \code{geom_big}:
\itemize{
\item asis - the default setting. This means we don't do anything internally. That is,
we just pass on your WKT string just as we've done before in this package.
\item axe - this option uses the \pkg{sf} package to chop up your WKT string in
to many polygons, which then leads to a separate data request for each polygon piece,
then we combine all dat back together to give to you. Note that if your WKT string
is not of type polygon, we drop back to \code{asis}as there's no way to chop up
linestrings, etc. This option will in most cases be slower than the other two options.
However, this polygon splitting approach won't have the problem of
the disconnect between how many records you want and what you actually get back as
with the bbox option.

This method uses \code{sf::st_make_grid} and \code{sf::st_intersection}, which has
two parameters \code{cellsize} and \code{n}. You can tweak those parameters here by
tweaking \code{geom_size} and \code{geom_n}. \code{geom_size} seems to be more useful in
toggling the number of WKT strings you get back.

See \code{\link{wkt_parse}} to manually break make WKT bounding box from a larger WKT
string, or break a larger WKT string into many smaller ones.

\item bbox - this option checks whether your WKT string is longer than 1500 characters,
and if it is we create a bounding box from the WKT, do the GBIF search with that
bounding box, then prune the resulting data to only those occurrences in your original
WKT string. There is a big caveat however. Because we create a bounding box from the WKT,
and the \code{limit} parameter determines some subset of records to get, then when we
prune the resulting data to the WKT, the number of records you get could be less than
what you set with your \code{limit} parameter. However, you could set the limit to be
high enough so that you get all records back found in that bounding box, then you'll
get all the records available within the WKT.
}
}

\section{Counts}{

There is a slight difference in the way records are counted here vs.
results from \code{\link{occ_count}}. For equivalent outcomes, in this
function use \code{hasCoordinate=TRUE}, and \code{hasGeospatialIssue=FALSE}
to have the same outcome using \code{\link{occ_count}} with
\code{isGeoreferenced=TRUE}
}

\section{occ_data vs. occ_search}{

This does nearly the same thing as \code{\link[=occ_search]{occ_search()}}, but
is simplified for speed, and is for the most common use case where
user just wants occurrence data, and not other information like taxon
hierarchies and media (e.g., images). Alot of time in \code{\link[=occ_search]{occ_search()}}
is used parsing data to be more useable downstream. We do less of that
in this function.

There are a number of data fields GBIF returns that we drop to speed up
processing time within R. These fields take extra time to process
because they are deeply nested and so take extra time to check if
they are empty or not, and if not, figure out how to parse them
into a data.frame. The fields are:
\itemize{
\item \code{gadm}
\item \code{media}
\item \code{facts}
\item \code{relations}
\item \code{extensions}
\item \code{identifiers}
\item \code{recordedByIDs}
\item \code{identifiedByIDs}
}

To get these fields use \code{\link[=occ_search]{occ_search()}} instead.
}

\examples{
\dontrun{
(key <- name_backbone(name='Encelia californica')$speciesKey)
occ_data(taxonKey = key, limit = 4)
(res <- occ_data(taxonKey = key, limit = 400))

# Return 20 results, this is the default by the way
(key <- name_suggest(q='Helianthus annuus', rank='species')$data$key[1])
occ_data(taxonKey=key, limit=20)

# Instead of getting a taxon key first, you can search for a name directly
## However, note that using this approach (with \code{scientificName="..."})
## you are getting synonyms too. The results for using \code{scientifcName}
## and \code{taxonKey} parameters are the same in this case, but I wouldn't
## be surprised if for some names they return different results
occ_data(scientificName = 'Ursus americanus', curlopts=list(verbose=TRUE))
key <- name_backbone(name = 'Ursus americanus', rank='species')$usageKey
occ_data(taxonKey = key)

# Search by dataset key
occ_data(datasetKey='7b5d6a48-f762-11e1-a439-00145eb45e9a', limit=10)

# Search by catalog number
occ_data(catalogNumber="49366", limit=10)
## separate requests: use a vector of strings
occ_data(catalogNumber=c("49366","Bird.27847588"), limit=10)
## one request, many instances of same parameter: use semi-colon sep. string
occ_data(catalogNumber="49366;Bird.27847588", limit=10)

# Use paging parameters (limit and start) to page. Note the different results
# for the two queries below.
occ_data(datasetKey='7b5d6a48-f762-11e1-a439-00145eb45e9a',start=10,limit=5)
occ_data(datasetKey='7b5d6a48-f762-11e1-a439-00145eb45e9a',start=20,limit=5)

# Many dataset keys
## separate requests: use a vector of strings
occ_data(datasetKey=c("50c9509d-22c7-4a22-a47d-8c48425ef4a7",
   "7b5d6a48-f762-11e1-a439-00145eb45e9a"), limit=20)
## one request, many instances of same parameter: use semi-colon sep. string
v="50c9509d-22c7-4a22-a47d-8c48425ef4a7;7b5d6a48-f762-11e1-a439-00145eb45e9a"
occ_data(datasetKey = v, limit=20)

# Search by recorder
occ_data(recordedBy="smith", limit=20)

# Many collector names
## separate requests: use a vector of strings
occ_data(recordedBy=c("smith","BJ Stacey"), limit=10)
## one request, many instances of same parameter: use semi-colon sep. string
occ_data(recordedBy="smith;BJ Stacey", limit=10)

# recordedByID
occ_data(recordedByID="https://orcid.org/0000-0003-1691-239X", limit=20)
## many at once
### separate searches
ids <- c("https://orcid.org/0000-0003-1691-239X",
  "https://orcid.org/0000-0001-7569-1828",
  "https://orcid.org/0000-0002-0596-5376")
res <- occ_data(recordedByID=ids, limit=20)
res[[1]]$data$recordedByIDs[[1]]
res[[2]]$data$recordedByIDs[[1]]
res[[3]]$data$recordedByIDs[[1]]
### all in one search
res <- occ_data(recordedByID=paste0(ids, collapse=";"), limit=20)
unique(vapply(res$data$recordedByIDs, "[[", "", "value"))

# identifiedByID
occ_data(identifiedByID="https://orcid.org/0000-0003-4710-2648", limit=20)

# Pass in curl options for extra fun
occ_data(taxonKey=2433407, limit=20, curlopts=list(verbose=TRUE))
occ_data(taxonKey=2433407, limit=20,
  curlopts = list(
    noprogress = FALSE,
    progressfunction = function(down, up) {
      cat(sprintf("up: \%d | down \%d\n", up, down))
      return(TRUE)
    }
  )
)
# occ_data(taxonKey=2433407, limit=20, curlopts=list(timeout_ms=1))

# Search for many species
splist <- c('Cyanocitta stelleri', 'Junco hyemalis', 'Aix sponsa')
keys <- sapply(splist, function(x) name_suggest(x)$data$key[1], USE.NAMES=FALSE)
## separate requests: use a vector of strings
occ_data(taxonKey = keys, limit=5)
## one request, many instances of same parameter: use semi-colon sep. string
occ_data(taxonKey = paste0(keys, collapse = ";"), limit=5)

# Search using a synonym name
#  Note that you'll see a message printing out that the accepted name will
# be used
occ_data(scientificName = 'Pulsatilla patens', limit=5)

# Search on latitidue and longitude
occ_data(decimalLatitude=40, decimalLongitude=-120, limit = 10)

# Search on a bounding box
## in well known text format
### polygon
occ_data(geometry='POLYGON((30.1 10.1,40 40,20 40,10 20,30.1 10.1))',
  limit=20)
### multipolygon
wkt <- 'MULTIPOLYGON(((-123 38,-116 38,-116 43,-123 43,-123 38)),
   ((-97 41,-93 41,-93 45,-97 45,-97 41)))'
occ_data(geometry = gsub("\n\\\\s+", "", wkt), limit = 20)
### polygon and taxonkey
key <- name_suggest(q='Aesculus hippocastanum')$data$key[1]
occ_data(taxonKey=key,
 geometry='POLYGON((30.1 10.1,40 40,20 40,10 20,30.1 10.1))',
 limit=20)
## or using bounding box, converted to WKT internally
occ_data(geometry=c(-125.0,38.4,-121.8,40.9), limit=20)

## you can seaerch on many geometry objects
### separate requests: use a vector of strings
wkts <-
c('POLYGON((-102.2 46,-102.2 43.7,-93.9 43.7,-93.9 46,-102.2 46))',
'POLYGON((30.1 10.1,40 40,20 40,10 20,30.1 10.1))')
occ_data(geometry = wkts, limit=20)
### one request, many instances of same parameter: use semi-colon sep. string
occ_data(geometry = paste0(wkts, collapse = ";"), limit=20)


# Search on a long WKT string - too long for a GBIF search API request
## By default, a very long WKT string will likely cause a request failure as
## GBIF only handles strings up to about 1500 characters long. You can leave as is, or
##  - Alternatively, you can choose to break up your polygon into many, and do a
##      data request on each piece, and the output is put back together (see below)
##  - Or, 2nd alternatively, you could use the GBIF download API
wkt <- "POLYGON((-9.178796777343678 53.22769021556159,
-12.167078027343678 51.56540789297837,
-12.958093652343678 49.78333685689162,-11.024499902343678 49.21251756301334,
-12.079187402343678 46.68179685941719,-15.067468652343678 45.83103608186854,
-15.770593652343678 43.58271629699817,-15.067468652343678 41.57676278827219,
-11.815515527343678 40.44938999172728,-12.958093652343678 37.72112962230871,
-11.639734277343678 36.52987439429357,-8.299890527343678 34.96062625095747,
-8.739343652343678 32.62357394385735,-5.223718652343678 30.90497915232165,
1.1044063476563224 31.80562077746643,1.1044063476563224 30.754036557416256,
6.905187597656322 32.02942785462211,5.147375097656322 32.99292810780193,
9.629796972656322 34.164474406524725,10.860265722656322 32.91918014319603,
14.551671972656322 33.72700959356651,13.409093847656322 34.888564192275204,
16.748937597656322 35.104560368110114,19.561437597656322 34.81643887792552,
18.594640722656322 36.38849705969625,22.989171972656322 37.162874858929854,
19.825109472656322 39.50651757842751,13.760656347656322 38.89353140585116,
14.112218847656322 42.36091601976124,10.596593847656322 41.11488736647705,
9.366125097656322 43.70991402658437,5.059484472656322 42.62015372417812,
2.3348750976563224 45.21526500321446,-0.7412967773436776 46.80225692528942,
6.114171972656322 47.102229890207894,8.047765722656322 45.52399303437107,
12.881750097656322 48.22681126957933,9.190343847656322 48.693079457106684,
8.750890722656322 50.68283120621287,5.059484472656322 50.40356146487845,
4.268468847656322 52.377558897655156,1.4559688476563224 53.28027243658647,
0.8407344726563224 51.62000971578333,0.5770625976563224 49.32721423860726,
-2.5869999023436776 49.49875947592088,-2.4991092773436776 51.18135535408638,
-2.0596561523436776 52.53822562473851,-4.696374902343678 51.67454591918756,
-5.311609277343678 50.009802108095776,-6.629968652343678 48.75106196817059,
-7.684656152343678 50.12263634382465,-6.190515527343678 51.83776110910459,
-5.047937402343678 54.267098895684235,-6.893640527343678 53.69860705549198,
-8.915124902343678 54.77719740243195,-12.079187402343678 54.52294465763567,
-13.573328027343678 53.437631551347174,
-11.288171777343678 53.48995552517918,
-9.178796777343678 53.22769021556159))"
wkt <- gsub("\n", " ", wkt)

#### Default option with large WKT string fails
# res <- occ_data(geometry = wkt)

#### if WKT too long, with 'geom_big=bbox': makes into bounding box
if (interactive()){
res <- occ_data(geometry = wkt, geom_big = "bbox")
}

#### Or, use 'geom_big=axe'
(res <- occ_data(geometry = wkt, geom_big = "axe"))
##### manipulate essentially number of polygons that result, so number of requests
###### default geom_size is 40
###### fewer calls
(res <- occ_data(geometry = wkt, geom_big = "axe", geom_size=50))
###### more calls
(res <- occ_data(geometry = wkt, geom_big = "axe", geom_size=30))

# Search on country
occ_data(country='US', limit=20)
isocodes[grep("France", isocodes$name),"code"]
occ_data(country='FR', limit=20)
occ_data(country='DE', limit=20)
### separate requests: use a vector of strings
occ_data(country=c('US','DE'), limit=20)
### one request, many instances of same parameter: use semi-colon sep. string
occ_data(country = 'US;DE', limit=20)

# Get only occurrences with lat/long data
occ_data(taxonKey=key, hasCoordinate=TRUE, limit=20)

# Get only occurrences that were recorded as living specimens
occ_data(basisOfRecord="LIVING_SPECIMEN", hasCoordinate=TRUE, limit=20)
## multiple values in a vector = a separate request for each value
occ_data(taxonKey=key,
  basisOfRecord=c("OBSERVATION", "HUMAN_OBSERVATION"), limit=20)
## mutiple values in a single string, ";" separated = one request including all values
occ_data(taxonKey=key,
  basisOfRecord="OBSERVATION;HUMAN_OBSERVATION", limit=20)

# Get occurrences for a particular eventDate
occ_data(taxonKey=key, eventDate="2013", limit=20)
occ_data(taxonKey=key, year="2013", limit=20)
occ_data(taxonKey=key, month="6", limit=20)

# Get occurrences based on depth
key <- name_backbone(name='Salmo salar', kingdom='animals')$speciesKey
occ_data(taxonKey=key, depth=1, limit=20)

# Get occurrences based on elevation
key <- name_backbone(name='Puma concolor', kingdom='animals')$speciesKey
occ_data(taxonKey=key, elevation=50, hasCoordinate=TRUE, limit=20)

# Get occurrences based on institutionCode
occ_data(institutionCode="TLMF", limit=20)
### separate requests: use a vector of strings
occ_data(institutionCode=c("TLMF","ArtDatabanken"), limit=20)
### one request, many instances of same parameter: use semi-colon sep. string
occ_data(institutionCode = "TLMF;ArtDatabanken", limit=20)

# Get occurrences based on collectionCode
occ_data(collectionCode="Floristic Databases MV - Higher Plants", limit=20)
### separate requests: use a vector of strings
occ_data(collectionCode=c("Floristic Databases MV - Higher Plants",
  "Artport"), limit = 20)
### one request, many instances of same parameter: use semi-colon sep. string
occ_data(collectionCode = "Floristic Databases MV - Higher Plants;Artport",
  limit = 20)

# Get only those occurrences with spatial issues
occ_data(taxonKey=key, hasGeospatialIssue=TRUE, limit=20)

# Search using a query string
occ_data(search="kingfisher", limit=20)

# search on repatriated - doesn't work right now
# occ_data(repatriated = "")

# search on phylumKey
occ_data(phylumKey = 7707728, limit = 5)

# search on kingdomKey
occ_data(kingdomKey = 1, limit = 5)

# search on classKey
occ_data(classKey = 216, limit = 5)

# search on orderKey
occ_data(orderKey = 7192402, limit = 5)

# search on familyKey
occ_data(familyKey = 3925, limit = 5)

# search on genusKey
occ_data(genusKey = 1935496, limit = 5)

# search on establishmentMeans
occ_data(establishmentMeans = "INVASIVE", limit = 5)
occ_data(establishmentMeans = "NATIVE", limit = 5)
occ_data(establishmentMeans = "UNCERTAIN", limit = 5)
### separate requests: use a vector of strings
occ_data(establishmentMeans = c("INVASIVE", "NATIVE"), limit = 5)
### one request, many instances of same parameter: use semi-colon sep. string
occ_data(establishmentMeans = "INVASIVE;NATIVE", limit = 5)

# search on protocol
occ_data(protocol = "DIGIR", limit = 5)

# search on license
occ_data(license = "CC_BY_4_0", limit = 5)

# search on organismId
occ_data(organismId = "100", limit = 5)

# search on publishingOrg
occ_data(publishingOrg = "28eb1a3f-1c15-4a95-931a-4af90ecb574d", limit = 5)

# search on stateProvince
occ_data(stateProvince = "California", limit = 5)

# search on waterBody
occ_data(waterBody = "pacific ocean", limit = 5)

# search on locality
occ_data(locality = "Trondheim", limit = 5)
### separate requests: use a vector of strings
res <- occ_data(locality = c("Trondheim", "Hovekilen"), limit = 5)
res$Trondheim$data
res$Hovekilen$data
### one request, many instances of same parameter: use semi-colon sep. string
occ_data(locality = "Trondheim;Hovekilen", limit = 5)


# Range queries
## See Detail for parameters that support range queries
occ_data(depth='50,100', limit = 20)
### this is not a range search, but does two searches for each depth
occ_data(depth=c(50,100), limit = 20)

## Range search with year
occ_data(year='1999,2000', limit=20)

## Range search with latitude
occ_data(decimalLatitude='29.59,29.6', limit = 20)

# Search by specimen type status
## Look for possible values of the typeStatus parameter looking at the typestatus dataset
occ_data(typeStatus = 'allotype', limit = 20)$data[,c('name','typeStatus')]

# Search by specimen record number
## This is the record number of the person/group that submitted the data, not GBIF's numbers
## You can see that many different groups have record number 1, so not super helpful
occ_data(recordNumber = 1, limit = 20)$data[,c('name','recordNumber','recordedBy')]

# Search by last time interpreted: Date the record was last modified in GBIF
## The lastInterpreted parameter accepts ISO 8601 format dates, including
## yyyy, yyyy-MM, yyyy-MM-dd, or MM-dd. Range queries are accepted for lastInterpreted
occ_data(lastInterpreted = '2016-04-02', limit = 20)

# Search for occurrences with images
occ_data(mediaType = 'StillImage', limit = 20)
occ_data(mediaType = 'MovingImage', limit = 20)
occ_data(mediaType = 'Sound', limit = 20)

# Search by continent
## One of africa, antarctica, asia, europe, north_america, oceania, or
## south_america
occ_data(continent = 'south_america', limit = 20)$meta
occ_data(continent = 'africa', limit = 20)$meta
occ_data(continent = 'oceania', limit = 20)$meta
occ_data(continent = 'antarctica', limit = 20)$meta
### separate requests: use a vector of strings
occ_data(continent = c('south_america', 'oceania'), limit = 20)
### one request, many instances of same parameter: use semi-colon sep. string
occ_data(continent = 'south_america;oceania', limit = 20)

# Query based on issues - see Details for options
## one issue
x <- occ_data(taxonKey=1, issue='DEPTH_UNLIKELY', limit = 20)
x$data[,c('name','key','decimalLatitude','decimalLongitude','depth')]
## two issues
occ_data(taxonKey=1, issue=c('DEPTH_UNLIKELY','COORDINATE_ROUNDED'), limit = 20)
# Show all records in the Arizona State Lichen Collection that cant be matched to the GBIF
# backbone properly:
occ_data(datasetKey='84c0e1a0-f762-11e1-a439-00145eb45e9a',
   issue=c('TAXON_MATCH_NONE','TAXON_MATCH_HIGHERRANK'), limit = 20)

# Parsing output by issue
(res <- occ_data(geometry='POLYGON((30.1 10.1,40 40,20 40,10 20,30.1 10.1))', limit = 50))
## what do issues mean, can print whole table, or search for matches
head(gbif_issues())
gbif_issues()[ gbif_issues()$code \%in\% c('cdround','cudc','gass84','txmathi'), ]
## or parse issues in various ways
### remove data rows with certain issue classes
library('magrittr')
res \%>\% occ_issues(gass84)
### split issues into separate columns
res \%>\% occ_issues(mutate = "split")
### expand issues to more descriptive names
res \%>\% occ_issues(mutate = "expand")
### split and expand
res \%>\% occ_issues(mutate = "split_expand")
### split, expand, and remove an issue class
res \%>\% occ_issues(-cudc, mutate = "split_expand")
}
}
\references{
https://www.gbif.org/developer/occurrence#search
}
\seealso{
\code{\link[=downloads]{downloads()}}, \code{\link[=occ_search]{occ_search()}}
}
