#' Collect occurrence records from a pre-existing DOI or URL
#'
#' @description `r lifecycle::badge("experimental")` 
#' 
#' Download occurrence records using an existing DOI or URL. Pre-existing DOIs 
#' and URLs come from previously generated downloads using `atlas_occurrences` 
#' or online.
#'
#' @param url `string`: Retrieve occurrence
#' records previously downloaded from the ALA, using the URL provided via email.
#' @param doi `string`: Retrieve occurrence
#' records previously downloaded from the ALA, using the DOI generated by the
#' data.
#'
#' @return An object of class `tbl_df` and `data.frame` (aka a tibble) of 
#' occurrences 
#' 
#' @examples 
#' \dontrun{
#' # Download previously retrieved records using an existing DOI or URL
#' collect_occurrences(doi = "your-doi")
#' 
#' # DOIs can be minted by adding `mint_doi = TRUE` to `atlas_occurrences()`
#' records <- 
#'   galah_call() |>
#'   galah_identify("perameles") |>
#'   galah_filter(year == 2001) |>
#'   atlas_occurrences(mint_doi = TRUE)
#' 
#' attributes(records)$doi # return minted doi
#' }
#' 
#' @importFrom readr read_csv
#' @importFrom stringr str_remove
#' @export collect_occurrences

collect_occurrences <- function(url, doi){

  if(missing(doi) & missing(url)){
    abort("A DOI or URL must be specified.")
  }

  if(!missing(doi)){
    if(is.null(doi)){
      abort("Please specify a valid DOI.")
    }else if(grepl("^http", doi) & !grepl("doi", doi)){
      bullets <- c(
        "URL supplied as DOI.",
        i = "The `doi` argument does not accept DOIs formatted as URLs.",
        i = "If you are supplying an ALA download URL, pass it to `url` instead.")
      abort(bullets)
    }else{
      result <- collect_occurrences_doi(doi)
    }
  }
  
  if(!missing(url)){
    if(is.null(url)){
      abort("Please specify a valid `url`")
    }else{
      result <- collect_occurrences_url(url)
    }
  } 
    
  if(is.null(result)){
    system_down_message("collect_occurrences")
    return(tibble())
  }else{
    return(tibble(result))
  }
  
}


collect_occurrences_doi <- function(doi, error_call = caller_env()) {
  
  # remove "https://" if present
  if (grepl("^http", doi)) {
    doi <- stringr::str_remove(doi, "https://doi.org/") # TODO: remove once better solution is found
  }
  
  # strip useful part of DOI
  doi_str <- str_split(doi, "ala.")[[1]][2]
  if (is.na(doi_str)) {
    bullets <- c(
      "DOI has not been generated by the ALA.",
      i = "DOIs created by the ALA have a prefix of 10.26197/ala."
    )
    abort(bullets, call = error_call)
  }
  
  verbose <- getOption("galah_config")$package$verbose
  if(verbose) {
    cat("Downloading\n")
  }

  url_complete <- url_lookup("doi_download", doi_string = doi_str)
  result <- url_download(url_complete, 
                         ext = "zip")
  if(is.null(result)){
    inform("Download failed")
    return(tibble())
  }else{
    attr(result, "doi") <- doi
    attr(result, "call") <- "atlas_occurrences"
    return(result)
  }
}

# TODO: fix multiple file import
collect_occurrences_url <- function(url){
  
  verbose <- getOption("galah_config")$package$verbose
  if(verbose) {
    cat("Downloading\n")
  }
  
  local_file <- url_download(url) 
    # cache_file = tempfile(pattern = "data"),
    # ext = ".zip")

  if(is.null(local_file)){
    inform("Download failed")
    return(tibble())
  }
  
  # unzip files and read in any named "data.csv" or similar
  local_file_uz <- unzip(local_file, list = TRUE)$Name
  data_files <- local_file_uz[
    grepl("data", local_file_uz) & grepl(".csv$", local_file_uz)]
  
  if(length(data_files) < 1){
    inform("There was a problem reading the occurrence data and it looks like no data were returned.")
  }else{
    result <- do.call(rbind, 
                      lapply(data_files, function(a){
                        read_csv(unz(local_file, a), col_types = cols())}))
  }
  
  # rename cols so they match requested cols
  names(result) <- rename_columns(names(result), type = "occurrence")
  
  # replace 'true' and 'false' with boolean
  valid_assertions <- show_all_assertions()$id
  assertions_check <- names(result) %in% valid_assertions
  if(any(assertions_check)){
    result <- fix_assertion_cols(result, names(result)[assertions_check])
  }
  
  # get DOI, if one exists
  doi_lookup <- grepl("doi.txt", local_file_uz)
  if(any(doi_lookup)){
    doi_text <- as.character(
      read.table(unz(local_file, "doi.txt"))$V1)
    attr(result, "doi") <- doi_text
  }
  
  # return tibble with correct info
  attr(result, "data_type") <- "occurrences"
  attr(result, "call") <- "atlas_occurrences"
  
  return(result)
}
