######################################################################### # # # The purpose of this R script is to determine optimal locations for # # new monitoring wells. It was written as part of a graduate research # # project in the Department of Geography at IUPUI. # # # ######################################################################### # LOAD LIBRARIES library(fields) library(rgdal) library(lpSolveAPI) library(sqldf) library(plyr) library(reshape2) # SET WORKING DIRECTORY setwd("C:/temp") # IMPORT DATA topN = read.csv("topN.csv") settingsTopN <- topN # REFORMAT DATA topN$LANDCOVER <- as.character(topN$LANDCOVER) topN$TOPO <- as.character(topN$TOPO) topN$AQ_SYSTEM <- as.character(topN$AQ_SYSTEM) topN$CLIMATE <- as.character(topN$CLIMATE) topN$LANDCOVER[is.na(topN$LANDCOVER)] <- "NA" topN$TOPO[is.na(topN$TOPO)] <- "NA" # SUBSET DATA topN <- subset(topN, LANDCOVER != "Open Water" & BASELINE==0, select=c("INDEX","SETTING","CLIMATE","AQ_SYSTEM","LANDCOVER","TOPO","SQMI","ANNUAL","RANK")) # IMPORT DATA divisionWells_shape <- readOGR(dsn="C:/temp/Shapefiles",layer="ClimateDivisions") divisionWells <- as.data.frame(divisionWells_shape@data) # SUBSET DATA divisionWells <- subset(divisionWells, BUFFER == 0, select = c("CLIMATE","BUFFER","SQMI")) divisionWells$HEATH <- round(divisionWells$SQMI / 500,digits=0) # CREATE VECTOR OF CLIMATE DIVISION NAMES divisions <- as.vector(unique(divisionWells[,c("CLIMATE")])) # SELECT THE TOP N AQUIFER SCENARIOS FROM EACH CLIMATE DIVISION loopTopN <- matrix(nrow=0,ncol=9) for (i in divisions){ step1 <- divisionWells[match(i, divisionWells[,1]),4] step2 <- with(topN, topN[CLIMATE %in% i,]) step3 <- head(step2,step1) loopTopN <- rbind(loopTopN,step3) } topNSource <- subset(loopTopN, select=c("INDEX","SETTING","CLIMATE","AQ_SYSTEM","LANDCOVER","TOPO","SQMI","ANNUAL","RANK")) # IMPORT DATA candidateSites_shape <- readOGR(dsn="C:/temp/Shapefiles",layer="CandidateSites") candidateSitesSource <- as.data.frame(candidateSites_shape@data) # REFORMAT/RECLASSIFY DATA candidateSitesSource$LANDCOVER <- as.character(candidateSitesSource$LANDCOVER) candidateSitesSource$TOPO <- as.character(candidateSitesSource$TOPO) candidateSitesSource$AQ_SYSTEM <- as.character(candidateSitesSource$AQ_SYSTEM) candidateSitesSource$CLIMATE <- as.character(candidateSitesSource$CLIMATE) candidateSitesSource$TOPO[candidateSitesSource$TOPO == "Very Low"] <- "Low" candidateSitesSource$TOPO[candidateSitesSource$TOPO == "Low"] <- "Low" candidateSitesSource$TOPO[candidateSitesSource$TOPO == "Moderate"] <- "Moderate" candidateSitesSource$TOPO[candidateSitesSource$TOPO == "High"] <- "High" candidateSitesSource$TOPO[candidateSitesSource$TOPO == "Very High"] <- "High" candidateSitesSource$LANDCOVER[candidateSitesSource$LANDCOVER == "11"] <- "Open Water" candidateSitesSource$LANDCOVER[candidateSitesSource$LANDCOVER == "21"] <- "Developed" candidateSitesSource$LANDCOVER[candidateSitesSource$LANDCOVER == "22"] <- "Developed" candidateSitesSource$LANDCOVER[candidateSitesSource$LANDCOVER == "23"] <- "Developed" candidateSitesSource$LANDCOVER[candidateSitesSource$LANDCOVER == "24"] <- "Developed" candidateSitesSource$LANDCOVER[candidateSitesSource$LANDCOVER == "31"] <- "Barren Land" candidateSitesSource$LANDCOVER[candidateSitesSource$LANDCOVER == "41"] <- "Forest" candidateSitesSource$LANDCOVER[candidateSitesSource$LANDCOVER == "42"] <- "Forest" candidateSitesSource$LANDCOVER[candidateSitesSource$LANDCOVER == "43"] <- "Forest" candidateSitesSource$LANDCOVER[candidateSitesSource$LANDCOVER == "52"] <- "Shrub/Scrub" candidateSitesSource$LANDCOVER[candidateSitesSource$LANDCOVER == "71"] <- "Grassland/Herbaceous" candidateSitesSource$LANDCOVER[candidateSitesSource$LANDCOVER == "81"] <- "Pasture/Hay" candidateSitesSource$LANDCOVER[candidateSitesSource$LANDCOVER == "82"] <- "Cultivated Crops" candidateSitesSource$LANDCOVER[candidateSitesSource$LANDCOVER == "90"] <- "Wetlands" candidateSitesSource$LANDCOVER[candidateSitesSource$LANDCOVER == "95"] <- "Wetlands" candidateSitesSource$CLIMATE[candidateSitesSource$CLIMATE == "NORTHWEST"] <- "Northwest" candidateSitesSource$CLIMATE[candidateSitesSource$CLIMATE == "NORTH CENTRAL"] <- "North Central" candidateSitesSource$CLIMATE[candidateSitesSource$CLIMATE == "NORTHEAST"] <- "Northeast" candidateSitesSource$CLIMATE[candidateSitesSource$CLIMATE == "WEST CENTRAL"] <- "West Central" candidateSitesSource$CLIMATE[candidateSitesSource$CLIMATE == "CENTRAL"] <- "Central" candidateSitesSource$CLIMATE[candidateSitesSource$CLIMATE == "EAST CENTRAL"] <- "East Central" candidateSitesSource$CLIMATE[candidateSitesSource$CLIMATE == "SOUTHWEST"] <- "Southwest" candidateSitesSource$CLIMATE[candidateSitesSource$CLIMATE == "SOUTH CENTRAL"] <- "South Central" candidateSitesSource$CLIMATE[candidateSitesSource$CLIMATE == "SOUTHEAST"] <- "Southeast" candidateSitesSource$TOPO[candidateSitesSource$TOPO == "Very Low"] <- "Low" candidateSitesSource$TOPO[candidateSitesSource$TOPO == "Low"] <- "Low" candidateSitesSource$TOPO[candidateSitesSource$TOPO == "Moderate"] <- "Moderate" candidateSitesSource$TOPO[candidateSitesSource$TOPO == "High"] <- "High" candidateSitesSource$TOPO[candidateSitesSource$TOPO == "Very High"] <- "High" # IMPORT DATA demandPoints_shape <- readOGR(dsn="C:/temp/Shapefiles",layer="CivilTownships") demandPointsSource <- as.data.frame(demandPoints_shape@data) # CREATE EMPTY TABLES FOR RESULTS iterationTable <- matrix(nrow=0,ncol=4) comparisonTable <- matrix(nrow=0,ncol=3) selectedSitesAll <- matrix(nrow=0,ncol=6) # BEGIN FOR LOOP TO SOLVE MODELS BY CLIMATE DIVISION for(i in divisions){ query <- paste("select * from topNSource where topNSource.CLIMATE = ", "'", i, "'", sep="") query <- sqldf(query) topN <- subset(query, select = c("INDEX","SETTING","CLIMATE","AQ_SYSTEM","LANDCOVER","TOPO","SQMI","ANNUAL","RANK")) query <- paste("select * from demandPointsSource where demandPointsSource.CLIMATE = ", "'", i, "'", sep="") query <- sqldf(query) demandPoints <- subset(query, select = c("REFNO","UTMNORTH","UTMEAST")) query <- paste("select * from candidateSitesSource where candidateSitesSource.CLIMATE = ", "'", i, "'", sep="") query <- sqldf(query) # SUBSET POTENTIAL MONITORING SITES candidateSites <- subset(candidateSitesSource, HEATH == 1, select = c("REFNO","REFNORTH","REFEAST","SETTING","CLIMATE","AQ_SYSTEM","LANDCOVER","TOPO","NEAR_2MI","NEAR_4MI","NEAR_6MI","NEAR_8MI","NEAR_10MI","NEAR_12MI","HEATH")) colnames(topN)[which(names(topN) == "INDEX")] <- "UNIQUE_INDEX" indexMerge <- as.data.frame(sqldf("select distinct topN.UNIQUE_INDEX, topN.SETTING, topN.AQ_SYSTEM, topN.CLIMATE, topN.TOPO, topN.LANDCOVER from topN, candidateSites where topN.SETTING = candidateSites.SETTING AND topN.TOPO = candidateSites.TOPO AND topN.AQ_SYSTEM = candidateSites.AQ_SYSTEM AND topN.CLIMATE = candidateSites.CLIMATE AND topN.LANDCOVER = candidateSites.LANDCOVER")) candidateSites <- merge(candidateSites, indexMerge, by = c("SETTING","CLIMATE","AQ_SYSTEM","LANDCOVER","TOPO")) candidateSites <- candidateSites[,c("REFNO","UNIQUE_INDEX","REFNORTH","REFEAST","SETTING","CLIMATE","AQ_SYSTEM","LANDCOVER","TOPO")] candidateSites <- as.data.frame(sqldf("select distinct candidateSites.UNIQUE_INDEX, candidateSites.REFNO, candidateSites.REFNORTH, candidateSites.REFEAST from candidateSites")) colnames(candidateSites)[which(names(candidateSites) == "UNIQUE_INDEX")] <- "INDEX" colnames(topN)[which(names(topN) == "UNIQUE_INDEX")] <- "INDEX" SITE <- seq(1,nrow(candidateSites),by=1) candidateSites <- cbind(candidateSites, SITE) # RANDOMLY SELECT MONITORING SITES FOR EACH AQUIFER SCENARIO siteNumbersRandom <- candidateSites[unlist(tapply(1:nrow(candidateSites),candidateSites$INDEX,function(x) sample(x,1))),] siteNumbersRandom <- siteNumbersRandom[,c("SITE")] # CREATE VECTOR OF SITE IDs siteNumbers <- candidateSites[,c("SITE")] demandNumbers <- demandPoints[,c("REFNO")] # RANDOM UNIFORM DISTRIBUTION BETWEEN 0 AND 1 randomNumbers <- runif(length(siteNumbers),0,1) # CREATE DISTANCE MATRIX candidateSites.coordinates <- candidateSites[,c("REFNORTH","REFEAST")] demandPoints.coordinates <- demandPoints[,c("UTMNORTH","UTMEAST")] distanceMatrix <- rdist(demandPoints.coordinates, candidateSites.coordinates) distanceMatrixRandom <- matrix(data = 1, nrow = length(demandNumbers), ncol = length(siteNumbers)) colnames(distanceMatrix) <- siteNumbers rownames(distanceMatrix) <- demandNumbers colnames(distanceMatrixRandom) <- siteNumbers rownames(distanceMatrixRandom) <- demandNumbers distanceMatrix <- (distanceMatrix * 3.28084)/5280 distanceMatrix <- round(distanceMatrix, digits=4) # CREATE MATRIX FOR NAMES OF PAIRS pairsMatrix <- matrix(seq(1,length(distanceMatrix),by=1), nrow = nrow(distanceMatrix), ncol = ncol(distanceMatrix)) colnames(pairsMatrix) <- siteNumbers rownames(pairsMatrix) <- demandNumbers # CREATE VECTORS FOR VARIABLE NAMES AND DISTANCE COEFFICIENTS sitesVector <- paste("y",siteNumbers,sep="") pairsVector <- paste("x",c(pairsMatrix),sep="") distanceVector <- round(c(distanceMatrix),digits=4) distanceVectorRandom <- round(c(distanceMatrixRandom),digits=4) randomVector <- round(randomNumbers,digits=2) # Yj VARIABLES (CANDIDATE NODES, MONITORING SITES) variablesYj <- t(t(siteNumbers)) variablesYj <- sub("^", "y", variablesYj ) # Xij VARIABLES (PAIRS OF CANDIDATE NODES, MONITORING SITES AND TOWNSHIP CENTROIDS, DEMAND NODES) variablesXij <- t(t(pairsVector)) # MODEL FORMULATION : OBJECTIVE FUNCTION : MINIMIZE SUM OF DISTANCES objective <- paste(distanceVector, pairsVector, sep=" ") objective <- t(paste(objective, collapse = " + ")) objective <- paste("min:", objective) objective <- sub("$", ";", objective) objective <- t(objective) # MODEL FORMULATION : CONSTRAINT 1 : EACH DEMAND NODE MUST ASSIGN TO EXACTLY ONE FACILITY (MONITORING SITE) constraint1 <- matrix(apply(pairsMatrix, 1, function(x) paste(x, collapse = " + x"))) constraint1 <- matrix(paste(constraint1,"= 1;")) constraint1 <- sub("^", "x", constraint1) # MODEL FORMULATION : CONSTRAINT 2 : P FACILITIES (MONITORING SITES) WILL BE LOCATED constraint2 <- t(paste(variablesYj, collapse = " + ")) constraint2 <- sub("$", " = ", constraint2) constraint2 <- sub("$", length(unique(candidateSites[,c("INDEX")])), constraint2) constraint2 <- sub("$", ";", constraint2) # MODEL FORMULATION : CONSTRAINT 3 : DEMAND NODES CAN ONLY ASSIGN TO FACILITY NODES THAT HAVE BEEN SELECTED FOR FINAL SOLUTION siteNamesMatrix <- t(matrix(rep(colnames(pairsMatrix),times=nrow(pairsMatrix)),nrow= ncol(pairsMatrix), ncol= nrow(pairsMatrix))) constraint3 <- paste("x", pairsMatrix, " <= ", "y", siteNamesMatrix, ";", sep="") constraint3 <- matrix(constraint3) # MODEL FORMULATION : CONSTRAINT 4 : MINIMUM NUMBER OF FACILITIES (MONITORING SITES) TO BE ALLOCATED TO EACH REGION (AQUIFER SCENARIO) constraint4 <- matrix(nrow=0, ncol=1) uniqueSettings <- as.vector(unique(candidateSites[,("INDEX")])) for (i in uniqueSettings){ step1a <- with(candidateSites, candidateSites[INDEX %in% i,]) step1b <- as.vector(step1a[,c("SITE")]) step2 <- as.character(step1b) step3 <- t(paste(step2, collapse = " + y")) step4 <- sub("^", "y", step3) step5 <- t(t(step4)) step6 <- sub("$", " <= 1;", step5) constraint4 <- rbind(constraint4, step6) } # RANDOM MODEL : CONSTRAINT 4 randomConstraint4 <- paste("y",siteNumbersRandom," <= 1;",sep="") randomConstraint4 <- t(t(randomConstraint4)) # MODEL FORMULATION : CONSTRAINT 5 : MAXIMUM NUMBER OF FACILITIES (MONITORING SITES) TO BE ALLOCATED TO EACH REGION (AQUIFER SCENARIO) constraint5 <- matrix(nrow=0, ncol=1) uniqueSettings <- as.vector(unique(candidateSites[,("INDEX")])) for (i in uniqueSettings){ step1a <- with(candidateSites, candidateSites[INDEX %in% i,]) step1b <- as.vector(step1a[,c("SITE")]) step2 <- as.character(step1b) step3 <- t(paste(step2, collapse = " + y")) step4 <- sub("^", "y", step3) step5 <- t(t(step4)) step6 <- sub("$", " >= 1;", step5) constraint5 <- rbind(constraint5, step6) } # RANDOM MODEL : CONSTRAINT 5 randomConstraint5 <- paste("y",siteNumbersRandom," >= 1;",sep="") randomConstraint5 <- t(t(randomConstraint5)) # MODEL FORMULATION : CONSTRAINT 6 : Xij VARIABLES CAN HAVE A VALUE OF 0 OR 1 constraint6Xij <- paste("0 <=", pairsVector, "<= 1;") constraint6Xij <- t(t(constraint6Xij)) # MODEL FORMULATION : CONSTRAINT 6 : Yj VARIABLES CAN HAVE A VALUE OF 0 OR 1 constraint6Yj <- paste("0 <=", variablesYj, "<= 1;") constraint6Yj <- t(t(constraint6Yj)) # MODEL FORMULATION : INTEGER CONSTRAINT FOR Yj integerYj <- t(paste(variablesYj, collapse = ", ")) integerYj <- t(paste("int", integerYj)) integerYj <- sub("$", ";", integerYj) # MODEL FORMULATION : INTEGER CONSTRAINT FOR Xij integerXij <- t(paste(variablesXij, collapse = ", ")) integerXij <- t(paste("int", integerXij)) integerXij <- sub("$", ";", integerXij) # SOLVE P-MEDIAN MODEL AND GET RESULTS model <- rbind(objective, constraint1, constraint2, constraint3, constraint4, constraint5, constraint6Xij, constraint6Yj, integerYj, integerXij) write.table(model, "model.lp", sep="\t", row.names = F, col.names = F, quote = FALSE) lprec <- read.lp("C:/temp/model.lp", type = "lp", verbose = "neutral") solve(lprec) pMedianObjective <- get.objective(lprec) pMedianObjective <- round(pMedianObjective,digits=1) variables <- get.variables(lprec) # SOLVE RANDOM MODEL AND GET RESULTS modelRandom <- rbind(objective, constraint1, constraint2, constraint3, randomConstraint4, randomConstraint5, constraint6Xij, constraint6Yj, integerYj, integerXij) write.table(modelRandom, "modelRandom.lp", sep="\t", row.names = F, col.names = F, quote = FALSE) lprecRandom <- read.lp("C:/temp/modelRandom.lp", type = "lp", verbose = "neutral") solve(lprecRandom) randomObjective <- get.objective(lprecRandom) randomObjective <- round(randomObjective,digits=1) variablesRandom <- get.variables(lprecRandom) # GET THE NUMBER OF ITERATIONS AND NODES ASSOCIATED WITH MODEL iterations <- get.total.iter(lprec) countCandidateNodes <- ncol(pairsMatrix) countDemandNodes <- nrow(pairsMatrix) lengthMatrix <- length(pairsMatrix) climDivision <- as.character(i) # ORDER OF DECISION VARIABLES IN LP MODEL : X,Y # VECTOR LENGTHS OF DECISION VARIABLES lengthX = ncol(pairsMatrix) * nrow(pairsMatrix) lengthY = ncol(pairsMatrix) # COMBINE VECTOR OF DECISIONS (0,1) WITH VECTOR OF VARIABLE NAMES decisions <- cbind(as.vector(c(pairsVector, sitesVector)), variables) decisions <- as.data.frame(decisions) colnames(decisions) <- c("VARIABLE", "DECISION") # SELECTED SITES (AGGREGATION UNITS) selectedSites <- subset(decisions[as.integer(lengthX + 1) : as.integer(lengthX + lengthY),], DECISION == 1, select = c("VARIABLE", "DECISION")) colnames(selectedSites) <- c("SITE", "DECISION") selectedSites[,c("SITE")] <- sub("y", "", selectedSites[,c("SITE")]) selectedSites[,c("SITE")] <- as.integer(selectedSites[,c("SITE")]) selectedSites <- merge(candidateSites, selectedSites, by="SITE") selectedSitesAll <- rbind(selectedSitesAll, selectedSites) # GET THE NUMBER OF ITERATIONS AND NODES ASSOCIATED WITH MODEL iterations <- get.total.iter(lprec) countCandidateNodes <- ncol(pairsMatrix) countDemandNodes <- nrow(pairsMatrix) lengthMatrix <- length(pairsMatrix) iterationTable <- rbind(iterationTable,c(iterations,countCandidateNodes,countDemandNodes,lengthMatrix)) # COMPARISON OF OBJECTIVE VALUE FROM RANDOM VS. P-MEDIAN SOLUTION pctImprovement <- abs(round(((pMedianObjective - randomObjective)/randomObjective)*100,digits=0)) comparisonTable <- rbind(comparisonTable,c(pMedianObjective,randomObjective,pctImprovement)) # EXIT LOOP } # CREATE VECTOR OF CLIMATE DIVISION NAMES divisions <- as.vector(unique(divisionWells[,c("CLIMATE")])) # RESULTS : COMPARISON OF SOLUTIONS FOR P-MEDIAN VS. RANDOM SELECTION comparisonTable <- as.data.frame(cbind(divisions,comparisonTable)) colnames(comparisonTable) <- c("Climate Division","Sum of distances (p-Median solution)","Sum of distances (random selections)","Improvement (%)") write.table(comparisonTable, file = paste("C:/temp/results/comparisonTable", ".xls" , sep = ""), row.names=FALSE, sep="\t") # RESULTS : MODEL SIZE BY CLIMATE DIVISION iterationTable <- as.data.frame(cbind(divisions,iterationTable)) colnames(iterationTable) <- c("Climate Division","Iterations","Candidate Nodes","Demand Nodes","Matrix Length") write.table(iterationTable, file = paste("C:/temp/results/iterationTable", ".xls" , sep = ""), row.names=FALSE, sep="\t") # RESULTS : TABLE OF SELECTED SITES (AGGREREGATION UNITS) selectedSitesAll <- merge(selectedSitesAll, topNSource,by= c("INDEX")) write.table(selectedSitesAll, file = paste("C:/temp/results/siteDecisions", ".xls" , sep = ""), row.names=FALSE, sep="\t") # RESULTS : TABLE OF POTENTIAL MONITORING SITES listSites <- sqldf("SELECT t1.* FROM candidateSitesSource t1 WHERE EXISTS (SELECT DISTINCT t2.REFNO, t2.SETTING, t2.CLIMATE, t2.AQ_SYSTEM, t2.LANDCOVER, t2.TOPO FROM selectedSitesAll t2 WHERE t1.REFNO = t2.REFNO AND t1.SETTING = t2.SETTING AND t1.CLIMATE = t2.CLIMATE AND t1.AQ_SYSTEM = t2.AQ_SYSTEM AND t1.LANDCOVER = t2.LANDCOVER AND t1.TOPO = t2.TOPO)") listSites <- merge(listSites, topNSource, by = c("SETTING","CLIMATE","AQ_SYSTEM","LANDCOVER","TOPO")) listSites <- subset(listSites, HEATH == 1, select=c("INDEX","REFNO","TYPE","SETTING","CLIMATE","AQ_SYSTEM","LANDCOVER","TOPO","SITE","SITECODE","COUNTY","TOWNSHIP","SQMI","ANNUAL","RANK","NEAR_2MI","NEAR_4MI","NEAR_6MI","NEAR_8MI","NEAR_10MI","NEAR_12MI","HEATH")) write.table(listSites, file = paste("C:/temp/results/siteCodes", ".xls" , sep = ""), row.names=FALSE, sep="\t") # RESULTS : SUMMARY OF POTENTIAL MONITORING SITES COUNTED BY AQUIFER SCENARIO BY CLASS summarySites <- count(listSites, c("CLIMATE","INDEX","RANK","TYPE")) summarySites <- dcast(summarySites, CLIMATE + INDEX + RANK ~ TYPE, value.var="freq", fill=0) summarySites <- summarySites[order(summarySites$CLIMATE,summarySites$RANK),] write.table(summarySites, file = paste("C:/temp/results/summarySites", ".xls" , sep = ""), row.names=FALSE, sep="\t") # RESULTS : TABLE OF AQUIFER SCENARIOS SELECTED TO RECEIVE NEW MONITORING WELLS settingsTopN$LANDCOVER <- as.character(settingsTopN$LANDCOVER) settingsTopN$TOPO <- as.character(settingsTopN$TOPO) settingsTopN$AQ_SYSTEM <- as.character(settingsTopN$AQ_SYSTEM) settingsTopN$CLIMATE <- as.character(settingsTopN$CLIMATE) settingsTopN$LANDCOVER[is.na(settingsTopN$LANDCOVER)] <- "NA" settingsTopN$TOPO[is.na(settingsTopN$TOPO)] <- "NA" indices <- unique(selectedSitesAll$INDEX) settingsTopN <- cbind(settingsTopN, SELECTED = as.vector(settingsTopN$INDEX %in% indices)) colnames(settingsTopN) <- c("Index","Setting","Climate Division","Aquifer System","Majority Land Cover","Topography","# Baseline Wells","# Affected Wells","Areal Extent","Annual Withdrawals","Rank","Average Pumping Depth","Selected") write.table(settingsTopN, file = paste("C:/temp/results/settingsTable", ".xls" , sep = ""), row.names=FALSE, sep="\t")