Ressource allocation example

library(lpSolveAPI)
library(doParallel)
library(foreach)
library(compiler)
#library(datamart)

#' internal function used by ds_user_parallel
#' assign capacities to workload for one region and determines transferable workload
ds_user <- function(
  workload, capacity, min_workertype, min_worktype, region, month_map, NbMonth, categ_map, worktype_map, NbWT, Nbtot, NbCat, dimsToIndex,
  no_work, max_offshore
) {
  if(length(region)!=1) stop("region must be character of length 1.")

  ### set up lp
  # the first Nbtot are the decision variables we are interested in (effort per month/work_type/categ/workplace/work_source). These do not
  # go into the objective (zero coefficients). They are, however, the main result.
  # the next NbCat variables are the "unmet workload" that is the work for which there is no capacity. Ideally, these would all be zero.
  # the next NbCat variables are "max effort over the months for a given category". This is used to evenly distribute the effort.
  # the next NbWT variables are "min capacity required" or "max effort over the months for a given worker type.
  lprec <- make.lp(0, ncol=Nbtot+NbCat+NbCat+NbWT+NbCat)
  set.objfn(lprec, obj=c(rep(0, Nbtot), rep(1000, NbCat), rep(1, NbCat), c(100, 80, 60), rep(500, NbCat)))

  ### constraints 1: no work in certain months for some types of work
  for (categ in names(no_work))
    for (m in intersect(names(month_map), no_work[[categ]])) {
      idx <- dimsToIndex(month=m, categ=categ)
      add.constraint(lprec, xt=rep(1, length(idx)), type="=", rhs=0, indices=idx)
    }

  ### constraints 2: workload must be met
  for (categ in names(categ_map)) {
    add.constraint(
      lprec, xt=rep(1, NbMonth*NbWT+2), "=", rhs=workload[region, categ],
      indices=c(
        dimsToIndex(categ=categ),
        Nbtot+categ_map[categ], # unmet untransferable effort
        Nbtot+NbCat+NbCat+NbWT+categ_map[categ] # unmet transferable effort
      )
    )
  }

  ### constraints 3: effort <= capacity
  for (m in names(month_map)) {
    idx <- which(capacity[,"mon"]==m)
    add.constraint(
      lprec, xt=rep(1, NbCat), type="<=", rhs=capacity[idx, "dcutil"],
      indices=dimsToIndex(month=m, worker_type="data_collector")
    )
    add.constraint(
      lprec, xt=rep(1, NbCat), type="<=", rhs=capacity[idx, "regutil"],
      indices=dimsToIndex(month=m, worker_type="regular")
    )
    add.constraint(
      lprec, xt=rep(1, NbCat), type="<=", rhs=capacity[idx, "nonregutil"],
      indices=dimsToIndex(month=m, worker_type="nonregular")
    )
  }

  ### constraints 4: minimum worker type utilization
  ## for each month, region and category, we have two constraints:
  ## 1) effort_regular >= perc * (effort_dc + effort_nonreg + effort_regular)
  ## 2) effort_dc <= perc * (effort_dc + effort_nonreg + effort_regular)
  ## where perc are input constants given in the min_workertype data.frame
  ## By simple algebra, this normalizes to
  ## 1') -perc * effort_dc - perc * effort_nonreg + (1-perc) * effort_regular >= 0
  ## 2') (1-perc) * effort_dc - perc * effort_nonreg - perc * effort_regular <= 0
  ## the second constraint only applies if there is actually data_collector capacity in that region
  for (m in names(month_map))
    for (categ in names(categ_map)) {
      cap_idx <- which(capacity[,"mon"]==m)
      if(capacity[cap_idx, "regutil"]>0){
        idx1 <- dimsToIndex(month=m, categ=categ, worker_type=c("data_collector", "nonregular"))
        idx2 <- dimsToIndex(month=m, categ=categ, worker_type="regular")
        perc <- min_workertype[categ, "reg_min"]
        add.constraint(lprec, xt=c(rep(-perc, 2), (1-perc)), type=">=", rhs=0, indices=c(idx1, idx2))
      }
      if(capacity[cap_idx, "dcutil"]>0) {
        idx1 <- dimsToIndex(month=m, categ=categ, worker_type=c("regular", "nonregular"))
        idx2 <- dimsToIndex(month=m, categ=categ, worker_type="data_collector")
        perc <- min_workertype[categ, "dc_min"]
        add.constraint(lprec, xt=c(rep(-perc, 2), (1-perc)), type="<=", rhs=0, indices=c(idx1, idx2))
      }
    }

  ### constraints 5: maximum work that can be done offshore
  ## only a certain percentage can be done offshore
  for (m in names(month_map))
    for (categ in names(categ_map))
      add.constraint(lprec, xt=1/max_offshore[categ], "<=", rhs=workload[region, categ], indices=Nbtot+NbCat+NbCat+NbWT+categ_map[categ])

  ### constraints 6: minimum work for some worktypes
  for (categ in names(min_worktype)) {
    rhs <- workload[region, categ] * min_worktype[categ]
    for (m in names(month_map)) add.constraint(lprec, xt=rep(1, NbWT), ">=", rhs=rhs, indices=dimsToIndex(month=m, categ=categ))
  }

  ### helper variable: maximum effort over all months for each category
  for (m in names(month_map))
    for (categ in names(categ_map))
      add.constraint(
        lprec, xt=c(rep(1, NbWT),-1), "<=", rhs=0,
        indices=c(dimsToIndex(month=m, categ=categ), Nbtot+NbCat+categ_map[categ])
      )

  ### helper variable: maximum effort over all months for each worker type
  for (m in names(month_map))
    for (wt in names(worktype_map))
      add.constraint(
        lprec, xt=c(rep(1, NbCat), -1), "<=", rhs=0,
        indices=c(dimsToIndex(month=m, worker_type=wt), Nbtot+NbCat+NbCat+worktype_map[wt])
      )

  # browser()
  ### solve
  status <- solve(lprec)

  ### no solution? halt
  if(status!=0) stop(paste("no solution found, error code ", status, "; region ", region))

  # unmet, non-transferable.
  unmet <- matrix(get.variables(lprec)[seq(Nbtot+1, length.out=NbCat)], nrow=1, dimnames=list(region, names(categ_map)))

  # resulting effort
  effort <- get.variables(lprec)[1:Nbtot]
  ans <- expand.grid(
    Month=names(month_map), Category=names(categ_map),
    Region=region, Worker_Type=names(worktype_map),
    stringsAsFactors=FALSE
  )
  lookup_one <- function(x) effort[dimsToIndex(month=x[["Month"]], categ=x[["Category"]], worker_type=x[["Worker_Type"]])]
  ans[,"Effort"] <- apply(ans, 1, lookup_one)

  # transferable
  transferable <- get.variables(lprec)[seq(Nbtot+NbCat+NbCat+NbWT+1, length.out=NbCat)]
  idx <- which(transferable>.Machine$double.eps^0.5)
  if(length(idx)>0) {
    transferable_df <- data.frame(
      region=region,
      categ=names(categ_map)[idx],
      transferable=transferable[idx],
      stringsAsFactors=FALSE
    )
  } else transferable_df <- data.frame()

  # if( nrow(transferable_df)>0) browser()
  return(list(effort=ans, unmet=unmet, transferable=transferable_df))
}

#' internal function used by ds_user_parallel
#' distributes transferable workload to regions with free capacities
ds_user_transfer <- function(effort, capacity, transferable, min_workertype, month_map, NbMonth, categ_map, worktype_map, NbWT, NbCat, no_work) {
  # calculate the capacity remaining after onshore work
  eff <- aggregate(Effort ~ Month + Region + Worker_Type, data=effort, sum)
  eff <- reshape(eff, timevar="Worker_Type", idvar=c("Month","Region"), direction="wide")
  eff <- eff[, c("Month", "Region", "Effort.data_collector", "Effort.nonregular", "Effort.regular")]
  colnames(eff) <- c("mon", "region", "dceff", "nonregeff", "regeff")
  remaining_capa <- merge(eff, capacity)
  remaining_capa[, "regremain"] <- pmax(0, remaining_capa[, "regutil"] - remaining_capa[, "regeff"])
  remaining_capa[, "nonregremain"] <- pmax(0, remaining_capa[, "nonregutil"] - remaining_capa[, "nonregeff"])
  remaining_capa[, "dcremain"] <- pmax(0, remaining_capa[, "dcutil"] - remaining_capa[, "dceff"])
  remaining_capa <- remaining_capa[,c("mon", "region", "regremain", "nonregremain", "dcremain")]

  # from where to where can work be shifted?
  from_regions <- unique(transferable[,"region"])
  NbShifts <- nrow(transferable) # region / categ combinations
  idx <- !(remaining_capa[,"region"] %in% from_regions) &
    (remaining_capa[,"regremain"]+remaining_capa[,"nonregremain"]+remaining_capa[,"dcremain"])>0
  to_regions <- unique(remaining_capa[idx, "region"])
  NbRegTo <- length(to_regions)

  # is there at least one region where we can transfer work to?
  if(length(to_regions)==0) {
    unmet_df <- reshape(transferable, direction="wide", timevar="categ", idvar="region")
    rownames(unmet_df) <- unmet_df[,"region"]
    unmet_df[,"region"] <- NULL
    unmet_df <- as.matrix(unmet_df)
    unmet_df[is.na(unmet_df)] <- 0
    colnames(unmet_df) <- substring(colnames(unmet_df), 14)
    return(list(transfers=data.frame(), still_unmet=unmet_df))
  }

  # set up lp
  # first NbShifts*NbRegTo*NbWT*NbMonth entries: effort for the transfers
  # next NbRegTo*NbWT entries: capacity needed after the shifts (this is to minimize)
  # next NbShifts entries: unmet untransferable work (hopefully zero, this is penalized)
  lprec <- make.lp(0, ncol=NbShifts*NbRegTo*NbWT*NbMonth+NbRegTo*NbWT+NbShifts)
  set.objfn(lprec, obj=c(rep(0, NbShifts*NbRegTo*NbWT*NbMonth), rep(c(100, 80, 60), NbRegTo), rep(1000, NbShifts)))
  cat("#Vars: ", NbShifts*NbRegTo*NbWT*NbMonth+NbRegTo*NbWT+NbShifts, "\n")

  ### constraints 1: no work in certain months for some types of work
  for (categ in names(no_work))
    for (m in intersect(names(month_map), no_work[[categ]])) {
      idx_shifts <- which(transferable[,"categ"]==categ)
      for (i in idx_shifts)
        for (reg in seq.int(NbRegTo))
          for (wt in names(worktype_map)) {
            add.constraint(
              lprec,
              xt=1, type="=", rhs=0,
              indices=(i-1)*NbRegTo*NbWT*NbMonth+(reg-1)*NbMonth + (worktype_map[wt]-1)*NbMonth + month_map[m]
            )
          }
    }

  ### constraints 2: workload must be met
  for (i in seq.int(nrow(transferable))) {
    add.constraint(
      lprec, xt=rep(1, NbRegTo*NbWT*NbMonth+1), "=", rhs=transferable[i, "transferable"] ,
      indices=c(seq((i-1)*NbRegTo*NbWT*NbMonth+1, i*NbRegTo*NbWT*NbMonth), NbShifts*NbRegTo*NbWT*NbMonth+NbRegTo*NbWT+i)
    )
  }

  ### constraints 3: transfered effort <= remaining capacity
  for (m in names(month_map))
    for (reg in seq.int(NbRegTo)) {
      idx <- which(remaining_capa[,"mon"]==m & remaining_capa[,"region"]==to_regions[reg])
      add.constraint(
        lprec, xt=rep(1, NbShifts), type="<=", rhs=capacity[idx, "dcutil"],
        indices=(seq.int(NbShifts)-1)*NbRegTo*NbWT*NbMonth+(reg-1)*NbWT*NbMonth+(worktype_map["data_collector"]-1)*NbMonth+month_map[m]
      )
      add.constraint(
        lprec, xt=rep(1, NbShifts), type="<=", rhs=capacity[idx, "regutil"],
        indices=(seq.int(NbShifts)-1)*NbRegTo*NbWT*NbMonth+(reg-1)*NbWT*NbMonth+(worktype_map["regular"]-1)*NbMonth+month_map[m]
      )
      add.constraint(
        lprec, xt=rep(1, NbShifts), type="<=", rhs=capacity[idx, "nonregutil"],
        indices=(seq.int(NbShifts)-1)*NbRegTo*NbWT*NbMonth+(reg-1)*NbWT*NbMonth+(worktype_map["nonregular"]-1)*NbMonth+month_map[m]
      )
    }

  ### constraints 4: minimum worker type utilization
  ## for each month, region and category, we have two constraints:
  ## 1) effort_regular_from_offshore + effort_regular_onsite >= perc * (effort_total_from_offshore + effort_total_onsite)
  ## 2) effort_dc_from_offshore + effort_dc_onsite <= perc * (effort_total_offshore + effort_total_onsite)
  ## where perc are input constants depending on the category given in the min_workertype data.frame
  ## By simple algebra, this normalizes to
  ## 1') (1-perc) effort_regular_offshore >= perc effort_total_onsite - effort_regular_onsite
  ## 2') (1-perc) effort_dc_offshore <= perc effort_total_onsite - effort_dc_onsite
  ## the second constraint only applies if there is actually data_collector capacity in that region
  for (reg in seq.int(NbRegTo))
    for (m in names(month_map))
      for (categ in names(categ_map)) {
        # lookup onsite values
        idx <- which(effort[,"Month"]==m & effort[, "Category"]==categ & effort[, "Region"]==to_regions[reg])
        onsite <- setNames(effort[idx, "Effort"], effort[idx, "Worker_Type"])
        perc <- min_workertype[categ, "reg_min"]
        add.constraint(
          lprec, xt=rep((1-perc), NbShifts), type=">=",
          rhs=perc*sum(onsite)-onsite["regular"],
          indices=(seq.int(NbShifts)-1)*NbRegTo*NbWT*NbMonth+(reg-1)*NbWT*NbMonth+(worktype_map["regular"]-1)*NbMonth+month_map[m]
        )
        perc <- min_workertype[categ, "dc_min"]
        add.constraint(
          lprec, xt=rep((1-perc), NbShifts), type="<=",
          rhs=perc*sum(onsite)-onsite["data_collector"],
          indices=(seq.int(NbShifts)-1)*NbRegTo*NbWT*NbMonth+(reg-1)*NbWT*NbMonth+(worktype_map["data_collector"]-1)*NbMonth+month_map[m]
        )
      }

  ### helper variable: maximum effort over all months for each worker type
  for (reg in seq.int(NbRegTo))
    for (m in names(month_map))
      for (wt in names(worktype_map)) {
        # lookup onsite values
        idx <- which(effort[,"Month"]==m & effort[, "Region"]==to_regions[reg] & effort[, "Worker_Type"]==wt)
        onsite <- sum(effort[idx, "Effort"]) # sum over categories
        add.constraint(
          lprec, xt=c(rep(1, NbShifts), -1), "<=", rhs=-onsite,
          indices=c((seq.int(NbShifts)-1)*NbRegTo*NbWT*NbMonth+(reg-1)*NbWT*NbMonth+(worktype_map[wt]-1)*NbMonth+month_map[m],
                    NbShifts*NbRegTo*NbWT*NbMonth+(reg-1)*NbWT+worktype_map[wt])
        )
      }

  ### solve
  status <- solve(lprec)

  ### no solution? halt
  if(status!=0) stop(paste("no solution found, error code ", status))

  ### gather results
  one_transfer <- function(i) {
    vars <- get.variables(lprec)[seq((i-1)*NbRegTo*NbWT*NbMonth+1, i*NbRegTo*NbWT*NbMonth)]
    idx1 <- which(vars > .Machine$double.eps^0.5)
    if(length(idx1)>0) {
      reg <- idx1 %/% (NbWT*NbMonth) + 1
      idx <- idx1 - (reg - 1) * (NbWT*NbMonth)
      wt <- idx %/% NbMonth + 1
      m <- idx - (wt - 1) * NbMonth + 1
      # browser()

      ret <- data.frame(
        region_from=transferable[i, "region"],
        categ=transferable[i,"categ"],
        region_to=to_regions[reg],
        worker_type=names(worktype_map)[wt],
        mon=names(month_map)[m],
        effort=vars[idx1],
        stringsAsFactors=FALSE
      )
    } else ret <- data.frame()
    return(ret)
  }

  transf <- lapply(seq.int(nrow(transferable)), FUN=one_transfer)
  transf <- do.call(rbind, transf)

  ### still unmet workload?
  unmet <- get.variables(lprec)[seq(NbShifts*NbRegTo*NbWT*NbMonth+NbRegTo*NbWT, length.out=NbShifts)]
  if(any(unmet > .Machine$double.eps^0.5)) {
    unmet_df <- transferable
    unmet_df[, "transferable"] <- NULL
    unmet_df[, "unmet"] <- unmet
    unmet_df <- reshape(transferable, direction="wide", timevar="categ", idvar="region")
    rownames(unmet_df) <- unmet_df[,"region"]
    unmet_df[,"region"] <- NULL
    unmet_df <- as.matrix(unmet_df)
    unmet_df[is.na(unmet_df)] <- 0
    colnames(unmet_df) <- substring(colnames(unmet_df), 14)
  } else unmet_df <- data.frame()

  return(list(transfers=transf, still_unmet=unmet_df))
}

#' Solves a resource allocation problem
#'
#' @param workload workload data.frame (rows=regions, columns=worktype)
#' @param capacity work capacity per month data.frame (columns: region, month, regutil, nonregutil, dcutil)
#' @param min_workertype minimum requirements data.frame (columns=region, month, worktype, regmin, dcmin)
#' @param min_worktype optional minimum requirement for work types.
#' @param start_month start month: from here to Dec the workload needs to be processed (three letter month abbreviation). Default "Jan"
#' @param no_work named list of worktype with the months (three letter month abbreviation) where no work on that worktype is possible
#' @param max_offshore named vector with the percentages of work that can be done offshore
#'
#' @return a list with three elements named "effort" with the distributed workload
#'        (data.frame, columns: region, month, worktype, worker type, effort),
#'        "unmet", a (hopefully all zero) matrix (columns: category, rows: regions) where capacity problems arise, and
#'        (only if transfers==TRUE) "transfers" a data.frame with distributed offshore work
#'        (data.frame, columns: region_from, categ, region_to, worker_type, mon, effort)
ds_user_parallel <- function(
  workload,
  capacity,
  min_workertype,
  min_worktype=c(Subdivisions=1/12, HomeValue=0.05),
  start_month="Jan",
  no_work=list(
    "Reval_Days"=c("Jan", "Feb", "Mar", "Apr", "May", "Jun"),
    "Asset"=c("Sep", "Oct", "Nov", "Dec", "Jan"),
    "Reval_Objections"=c("Jul", "Aug", "Sep", "Oct", "Nov", "Dec")
  ),
  max_offshore=c(
    Building_Consents=0.5, Subdivisions=0.9, S12_Sales=0.5, RM_Objections=0.1, Reval_Days=0.3, Reval_Objections=0.1,
    Asset=0.1, HomeValue=0.2, Rural=0.2, Urgent_New_Imp=0.2
  ),
  transfers=TRUE
){
  ### helper functions
  month_map <- setNames(
    rev(seq.int(12)),
    c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")
  )
  if(!start_month %in% names(month_map)) stop("invalid start_month, must be a three letter month abbreviation.")
  NbMonth <- month_map[start_month]
  month_map <- tail(month_map, NbMonth)
  categ_map <- unique(rownames(min_workertype))
  NbCat <- length(categ_map)
  categ_map <- setNames(seq.int(NbCat), categ_map)

  ### sanity check 1: consistent use of work category names
  if(length(intersect(colnames(workload), names(categ_map)))<max(c(length(colnames(workload)), length(categ_map))))
    stop("work categories in workload and min_req argument do not match.")
  if(length(setdiff(names(no_work), names(categ_map))) > 0)
    stop("work categories in no_work and min_req argument do not match.")

  worktype_map <- c(regular=1, nonregular=2, data_collector=3)
  NbWT <- length(worktype_map)
  Nbtot <- (NbMonth * NbCat * NbWT)

  ### sanity check 2: need more capacity than workload
  capacity <- capacity[capacity[,"mon"] %in% names(month_map),]
  #if(sum(workload)>sum(capacity[, c("regutil", "nonregutil", "dcutil")])) stop("workload exceeds capacity")

  ### map subscripts of the decision vars to index and vice versa
  dimsToIndex <- function(month=names(month_map), categ=names(categ_map), worker_type=names(worktype_map)) {
    args <- expand.grid(
      month=month_map[month]-1,
      categ=categ_map[categ]-1,
      worker_type=worktype_map[worker_type],
      stringsAsFactors=FALSE
    )
    w <- c(NbCat*NbWT, NbWT, 1)
    ret <- apply(args, 1, function(x) {ans <- x %*% w; dim(ans) <- NULL; return(ans)})
    return(ret)
  }
  dimsToIndex <- cmpfun(dimsToIndex)

  subset_wl <- function(reg) matrix(workload[reg,], nrow=1, dimnames=list(reg, colnames(workload)))
  splitted_cap <- split(capacity, capacity[,"region"])
  my_combine <- function(x, y)
    list(
      effort=rbind(x[["effort"]], y[["effort"]]),
      unmet=rbind(x[["unmet"]], y[["unmet"]]),
      transferable=rbind(x[["transferable"]], y[["transferable"]])
    )
  arg <- list(
    month_map=month_map, NbMonth=NbMonth, categ_map=categ_map, worktype_map=worktype_map, min_workertype=min_workertype, min_worktype=min_worktype,
    NbWT=NbWT, Nbtot=Nbtot, NbCat=NbCat, dimsToIndex=dimsToIndex, no_work=no_work, max_offshore=max_offshore
  )
  ret <- foreach(reg=names(splitted_cap), .combine=my_combine) %do% {
    arg[["workload"]] <- subset_wl(reg)
    arg[["capacity"]] <- splitted_cap[[reg]]
    arg[["region"]] <- reg
    do.call(ds_user, arg)
  }

  if(nrow(ret[["transferable"]])>0)
    if(transfers) {
      transf_ret <- ds_user_transfer(
        effort=ret[["effort"]], capacity=capacity, transferable=ret[["transferable"]], min_workertype=min_workertype,
        month_map=month_map, NbMonth=NbMonth, categ_map=categ_map, worktype_map, NbWT=NbWT, NbCat, no_work=no_work
      )
      still_unmet <- transf_ret[["still_unmet"]]
      if(nrow(still_unmet)>0)
        ret[["unmet"]][rownames(still_unmet), colnames(still_unmet)] <-
        ret[["unmet"]][rownames(still_unmet), colnames(still_unmet)] + still_unmet
      ret[["transferable"]] <- NULL
      ret[["transfers"]] <- transf_ret[["transfers"]]
    } else {
      # no transfers wished -- add to unmet
      unmet_df <- reshape(ret[["transferable"]], direction="wide", timevar="categ", idvar="region")
      rownames(unmet_df) <- unmet_df[,"region"]
      unmet_df[,"region"] <- NULL
      unmet_df <- as.matrix(unmet_df)
      unmet_df[is.na(unmet_df)] <- 0
      colnames(unmet_df) <- substring(colnames(unmet_df), 14)
      ret[["unmet"]][rownames(unmet_df), colnames(unmet_df)] <- ret[["unmet"]][rownames(unmet_df), colnames(unmet_df)] + unmet_df
      ret[["transferable"]] <- NULL
    }

  if(any(ret[["unmet"]]>.Machine$double.eps^0.5)) warning("there is unmet ", if(transfers) "untransferable", "workload")

  # browser()
  return(ret)
}