“Indigenous institutions in Sub-Saharan Africa” – R script mapping data points to panel dataset

#setwd("")

# read data points
prov_list <- read.csv("introduced_provisions_varnames.csv")
rep_list <- read.csv("repeals_replacements_varnames.csv")

# vectors to define countries, year, and variables
country_names <- unique(sort(prov_list$country))
country_names <- c(country_names, "Central African Republic (the)", "Gabon", "Tanzania")
year_range_long <- seq(min(prov_list$year), max(prov_list$year))
var_names <- c("country", "year", sort(unique(prov_list$varNameDetailed)))

# intoduced provisions ########################################################

# create data frame filled with 0's
prov_df_long <- data.frame(
  matrix(0, ncol=length(var_names), 
         nrow=length(country_names)*length(year_range_long)))
colnames(prov_df_long) <- var_names

# fill in country and year columns
prov_df_long$country <- unlist(
  lapply(country_names, FUN=function(x) {
    rep(x, times=length(year_range_long))}))
prov_df_long$year <- rep(year_range_long, times=length(country_names))

# fill in other variables with data points
for (i in 1:length(prov_list$id)) {
  prov_df_long[ 
    which(prov_df_long$country==prov_list$country[i] & 
            prov_df_long$year==prov_list$year[i]),
    prov_list$varNameDetailed[i]] <- 1
}
###############################################################################

# repeals and replacements ####################################################

# create data frame filled with 0's
rep_df_long <- data.frame(
  matrix(0, ncol=length(var_names), 
         nrow=length(country_names)*length(year_range_long)))
colnames(rep_df_long) <- var_names

# fill in country and year columns
rep_df_long$country <- unlist(
  lapply(country_names, FUN=function(x) {
    rep(x, times=length(year_range_long))}))
rep_df_long$year <- rep(year_range_long, times=length(country_names))

# fill in other variables with data points
for (i in 1:length(rep_list$id)) {
  rep_df_long[ 
    which(rep_df_long$country==rep_list$country[i] & 
            rep_df_long$year==rep_list$repealingLawYear[i]),
    rep_list$varNameDetailed[i]] <- (-1)
}
###############################################################################

# applying repeals to introduced provisions ###################################

# cumulative sums of legal provisions 
prov_df_long_cumsum <- prov_df_long
prov_df_long_cumsum[-c(1,2)] <- 
  apply(prov_df_long[-c(1,2)], 2, 
        FUN=function(x) { 
          cumsums_list <- tapply(x, factor(prov_df_long$country), FUN=cumsum)
          return(unlist(cumsums_list, use.names=FALSE))
        })

# cumulative sums of repeals
rep_df_long_cumsum <- rep_df_long
rep_df_long_cumsum[-c(1,2)] <- 
  apply(rep_df_long[-c(1,2)], 2, 
        FUN=function(x) { 
          cumsums_list <- tapply(x, factor(rep_df_long$country), FUN=cumsum)
          return(unlist(cumsums_list, use.names=FALSE))
        })

# discount repeals from cumulative sums
valid_df_long_count <- prov_df_long_cumsum
valid_df_long_count[-c(1,2)] <- 
  prov_df_long_cumsum[-c(1,2)] + rep_df_long_cumsum[-c(1,2)]

# reduce back to binary coding
valid_df_long <- valid_df_long_count
valid_df_long[-c(1,2)] <- 
  apply(valid_df_long_count[-c(1,2)], 2, 
        FUN=function(x) {
          ifelse(x>0, 1, 0)
        })
###############################################################################

# short dataframes ############################################################

# cutoff years per country 
start_years <- matrix(
  c("Angola",	1975, 
    "Benin",	1960,
    "Botswana",	1961,
    "Burundi",	1962,
    "Cameroon",	1960,
    "Central African Republic (the)",	1960,
    "Chad",	1960,
    "Congo (the Democratic Republic of the)",	1960,
    "Congo (the)",	1960,
    "Côte d'Ivoire",	1960,
    "Djibouti",	1977,
    "Eritrea",	1991,
    "Eswatini",	1968,
    "Ethiopia",	1955,
    "Gabon",	1960,
    "Gambia (the)",	1965,
    "Ghana",	1957,
    "Kenya",	1963,
    "Lesotho",	1966,
    "Liberia", 	1955,
    "Madagascar",	1960,
    "Malawi",	1962,
    "Mali",	1960,
    "Mozambique",	1975,
    "Namibia",	1990,
    "Niger (the)",	1960,
    "Nigeria",	1960,
    "Rwanda",	1960,
    "Senegal", 	1960,
    "Sierra Leone",	1961,
    "South Africa",	1955,
    "South Sudan",	2009,
    "Sudan (the)",	1956,
    "Tanzania",	1961,
    "Togo",	1960,
    "Uganda",	1962,
    "Zambia", 	1964,
    "Zimbabwe",	1980),
  ncol=2, byrow=TRUE)

# introduced provisions
prov_df_short_li <- vector("list", length(start_years[,1]))
for (i in 1:length(start_years[,1])) { 
  prov_df_short_li[[i]] <- subset(prov_df_long, 
                                  prov_df_long$country==start_years[i,1] & 
                                    prov_df_long$year>=start_years[i,2])
}
prov_df_short <- do.call("rbind", prov_df_short_li)

# stock of introduced provisions
prov_stock_df_short_li <- vector("list", length(start_years[,1]))
for (i in 1:length(start_years[,1])) { 
  prov_stock_df_short_li[[i]] <- subset(prov_df_long_cumsum, 
                                        prov_df_long_cumsum$country==start_years[i,1] & 
                                          prov_df_long_cumsum$year>=start_years[i,2])
}
prov_stock_df_short <- do.call("rbind", prov_stock_df_short_li)

# valid provisions
valid_df_short_li <- vector("list", length(start_years[,1]))
for (i in 1:length(start_years[,1])) { 
  valid_df_short_li[[i]] <- subset(valid_df_long, 
                                   valid_df_long$country==start_years[i,1] & 
                                     valid_df_long$year>=start_years[i,2])
}
valid_df_short <- do.call("rbind", valid_df_short_li)

# stock of valid provisions
valid_stock_df_short_li <- vector("list", length(start_years[,1]))
for (i in 1:length(start_years[,1])) { 
  valid_stock_df_short_li[[i]] <- subset(valid_df_long_count, 
                                         valid_df_long_count$country==start_years[i,1] & 
                                           valid_df_long_count$year>=start_years[i,2])
}
valid_stock_df_short <- do.call("rbind", valid_stock_df_short_li)
###############################################################################

# export results ##############################################################
write.csv(prov_df_short,  
          file=paste("df_introduced_provisions_named_", 
                     strftime(Sys.time(), "%Y%m%d%H%M"), ".csv", sep=""), 
          row.names = FALSE, fileEncoding = "UTF-8")
write.csv(prov_stock_df_short, 
          file=paste("df_introduced_provisions_stock_named_", 
                     strftime(Sys.time(), "%Y%m%d%H%M"), ".csv", sep=""), 
          row.names = FALSE, fileEncoding = "UTF-8")
write.csv(valid_df_short, 
          file=paste("df_valid_provisions_named_", 
                     strftime(Sys.time(), "%Y%m%d%H%M"), ".csv", sep=""), 
          row.names = FALSE, fileEncoding = "UTF-8")
write.csv(valid_stock_df_short, 
          file=paste("df_valid_provisions_stock_named_", 
                     strftime(Sys.time(), "%Y%m%d%H%M"), ".csv", sep=""), 
          row.names = FALSE, fileEncoding = "UTF-8")
save.image(file="prov_df_named_env.RData")