# Script for Generating Absolute Wealth Estimate (AWE) from DHS Wealth Index # Introduction: This script is intended as a guide to those who would like to generate the # absolute wealth estimates (AWE) from DHS data, using the method described in: # Hruschka, D.J., Gerkey, D. & Hadley, C. 2015. "Estimating the Absolute Wealth of Households." # Bulletin of the World Health Organization. Article ID: BLT.147082 # Data Requirements: #1. DHS data from one or more surveys. Each survey represents a combination of a country and year. # Specifically, at least four variables are needed from the DHS Data: # A. Country ID. Hereafter = "country" # B. Year ID. Hereafter = "year" # C. DHS Wealth Factor Score. Hereafter = "wealth.factor" #2. Country GDP per capita based on purchasing power parity (PPP) in constant 2011 international dollars. Hereafter = "GDPpc2011" # Source: World Bank: http://data.worldbank.org/indicator/NY.GDP.PCAP.PP.KD #3. Country Gini Coefficient for household wealth. Hereafter = "gini" # Source: Davies, J.B., Sandstrom, S., Shorrocks, A.B., Wolff, E.N. 2009. # "The Level and Distribution of Global Household Wealth." # National Bureau of Economic Research Working Paper Series: Working Paper 15508 # http://www.nber.org/papers/w15508 # See Appendix V #4. OilRents (% of GDP) . Hereafter = "OilRentCode" # Source: World Bank: http://data.worldbank.org/indicator/NY.GDP.PETR.RT.ZS #Load Data: The script below assumes you have loaded a data table in R, hereafter "my.dhs.data," # with rows of individuals and columns for the following variables: #1. DHS Country ID = "country" #2. DHS Year ID = "year" #3. DHS Wealth Factor = "wealth.factor" #4. Country GDP per capita PPP 2011 International $ = "GDPpc" #5. Country Gini for HH Wealth = "gini" #6. OilRentCode for the percentage of GDP derived from Oil rents ##NOTE: The Hruschka et al. 2015 paper uses a specific formulation for deriving country level wealth per capita based on GDP and oil rents. ##A researcher may choose a different method of calculating mean wealth per capita used in the formulas below (lnPPPwpc and PPPwpc) # Load R Packages: library(data.table) # Generate Unique DHS Survey ID ( = Country + Survey Year) my.dhs.data$country.year.id2 <- paste0(my.dhs.data$country, my.dhs.data$year) my.dhs.data$country.year.id <- as.numeric(as.factor(my.dhs.data$country.year.id2)) # Generate dataset containing counts for each survey names <- unique(my.dhs.data$country.year.id) #object to store unique DHS Survey IDs holder <- data.frame(aggregate(my.dhs.data$country.year.id, by = list(my.dhs.data$country.year.id), FUN = length)) #object to store unique DHS Survey IDs and Counts of Individuals in each Survey names(holder) <- c("country.year.id","numIND") # Changes name of 1st column to indicate number of individuals in each DHS survey my.dhs.data <- merge(x=my.dhs.data, y=holder, by.x="country.year.id", by.y="country.year.id", all.x=TRUE) # Merges column indicating number of individuals in a survey with DHS data #Creates proportional rank of individuals within countries. my.dhs.data$wrank1 <- ave(my.dhs.data$wealth.factor, my.dhs.data$country.year.id, FUN=rank) # Generates new variable (wrank1) = ordered rank of Individual wealth (1 to N, where N is wealthiest individual in a survey) my.dhs.data$wrank3 <- (my.dhs.data$wrank1/my.dhs.data$numIND) * (my.dhs.data$numIND - 1)/my.dhs.data$numIND # normalizes proportional rank so that 0 <= p < 1 # Generating Country-level Wealth Per Capita - Uses regression equation from Davies et al 2009 & Davies et al 2011 my.dhs.data$GDPpc <- as.numeric(as.character(my.dhs.data$GDPpc)) #converting GDPpc from factor to numeric my.dhs.data$lnPPPwpc <- (1.129 * log(my.dhs.data$GDPpc)*(1-my.dhs.data$OilRentCode/100) - 0.293) # This converts GDP per capita to natural log of Country Wealth per capita PPP in 2011 constant international $ (new variable "lnPPPwpc") my.dhs.data$PPPwpc <- exp(my.dhs.data$lnPPPwpc) # This generates new variable (PPPwpc) = Country Wealth per capita PPP in 2011 constant internatioanl dollars ### Generating Absolute Wealth Estimates ### #Note: As explained in Hruschka et al. 2015, we generate AWE using one of three distributions: # 1. Pareto Distribution # 2. Log-normal Distribution # 3. Combined Distribution (i.e. weighted average of AWE from Pareto and Log-normal Distribution) #1. Absolute Wealth Estimate -Pareto # Generating ICDF # Shape Parameter - Sigma my.dhs.data$shape <- (1+my.dhs.data$gini)/(2*my.dhs.data$gini) # Threshold - Mu my.dhs.data$threshold <- 1-(1/my.dhs.data$shape) * my.dhs.data$PPPwpc # ICDF my.dhs.data$icdf <- my.dhs.data$threshold / ((1-my.dhs.data$wrank3)^(1/my.dhs.data$shape)) # Generating Mean ICDF at Country level mean.icdf.country<- tapply(my.dhs.data$icdf, my.dhs.data$country.year.id2, mean) mean.icdf.country <- as.data.frame(mean.icdf.country) # Merging with Data my.dhs.data <- merge(x=my.dhs.data, y=mean.icdf.country, by.x="country.year.id2", by.y="row.names", all.x=TRUE) # New column in my.dhs.data "mean.icdf.country" = Mean ICDF at Country Level # Absolute Wealth Estimate for Household (in PPP 2011 International Constant Dollars) my.dhs.data$wealthpc <- my.dhs.data$icdf * my.dhs.data$PPPwpc / my.dhs.data$mean.icdf.country #The new variable "wealthpc" represents the Pareto Absolute Wealth Estimate #2. Absolute Wealth Estimate -Log-normal # Generating ICDF # Shape Parameter - Sigma my.dhs.data$sigma <- sqrt(2) * (qnorm(( (my.dhs.data$gini + 1) / 2))) # Threshold - Mu my.dhs.data$mu <- ((log(my.dhs.data$PPPwpc)) - ((my.dhs.data$sigma^2)/2)) # ICDF my.dhs.data$icdfln<- qlnorm(my.dhs.data$wrank3, meanlog=my.dhs.data$mu, sdlog=my.dhs.data$sigma) #Generating Mean ICDF at Country Level mean.icdf.country.ln <- tapply(my.dhs.data$icdfln, my.dhs.data$country.year.id2, mean) mean.icdf.country.ln <- as.data.frame(mean.icdf.country.ln) #Merging with Data my.dhs.data <- merge(x=my.dhs.data, y=mean.icdf.country.ln, by.x="country.year.id2", by.y="row.names", all.x=TRUE) # New column in my.dhs.data "mean.icdf.country.ln" = Mean ICDF at Country Level # Estimating Household Wealth Per Capita PPP in 2011 International Constant $ my.dhs.data$wealthpcln <- my.dhs.data$icdfln * my.dhs.data$PPPwpc / my.dhs.data$mean.icdf.country.ln #The new variable "wealthpcln" represents the Log-normal Absolute Wealth Estimate #3. Combined Pareto and Log-normal Absolute Wealth Estimates # Wealth(G) = Pareto AWE ^ G x Log-normal AWE ^ 1-G # Where 0 <= G <= 1 # Gamma (G) Weight = 0.32 my.dhs.data$wealthpcg32 <- (my.dhs.data$wealthpc ^ 0.32) * (my.dhs.data$wealthpcln ^ (1-0.32)) #The new variable "wealthpcg32" represents the Combined AWE with a gamma weight = 0.32 ## Summary of Results ## # The code above generates three Absolute Wealth Estimates (AWE): #Pareto AWE = my.dhs.data$wealthpc #Log-normal AWE = my.dhs.data$wealthpcln #Combined AWE (gamma weight=0.32) = my.dhs.data$wealthpcg32 # Of these three estimates, Hruschka et al 2015 suggests the Combined AWE provides the closest match with World Bank poverty headcounts