raw.data <- read.csv("subnationalViews_dataset.csv")
#select countries with higher representativeness
#select variables that will be used in imputations and analyses
raw.data.select <- subset(raw.data, country=="Belgium" |
country=="France" |
country=="Germany" |
country=="Ireland" |
country=="Italy" |
country=="Netherlands" |
country=="Norway" |
country=="Spain" |
country=="Sweden" |
country=="Switzerland" |
country=="United Kingdom",
select=c(surveyId, lang, country,
countryPop, sneType, mlgS1, mlgS2,
mlgS3, mlgS4, mlgS5, mlgC1, mlgC2,
euCompSocpol, euCompEc, euCompInAf,
euCompForAf, euCompOv, snaCompSocpol,
snaCompEc, snaCompInAf, snaCompForAf,
snaCompOv, snaCompC1, snaCompC2,
effectiveIntOrg, effectiveDirRel,
effectiveNatDel, euFundImp.bin,
represent, represent.bin,
norepMotiv, age, gender, eduLev,
polLeftRight, polIdLoc, polIdNat,
polIdEu, polIdCul, polIdIdeo))
rm(raw.data)
#calculate survey weights based on population
pop.list <- aggregate(countryPop ~ country, data=raw.data.select, mean)
freq.list <- data.frame(table(raw.data.select$country))
weight.calc <- data.frame(pop.list, freq.list)
weight.calc$percPop <- weight.calc$countryPop/sum(weight.calc$countryPop)
weight.calc$percFreq <- weight.calc$Freq/sum(weight.calc$Freq)
weight.calc$weight <- weight.calc$percPop/weight.calc$percFreq
#add the weights to the main data
raw.data.select$weight <- rep(NA, length(raw.data.select$surveyId))
for (i in 1:11) {
raw.data.select$weight[raw.data.select$country==weight.calc$country[i]] <-
weight.calc$weight[i]
}
rm(pop.list, freq.list, weight.calc)
#calculate factor analysis loadings for indices
mlgComps <- subset(raw.data.select,
select = c(mlgC1, mlgC2, mlgS1, mlgS2, mlgS3, mlgS4, mlgS5))
euComps <- subset(raw.data.select,
select = c(euCompEc, euCompForAf, euCompInAf, euCompOv, euCompSocpol))
snaComps <- subset(raw.data.select,
select = c(snaCompEc, snaCompForAf, snaCompInAf, snaCompOv, snaCompSocpol))
mlgFA <- factanal(na.omit(mlgComps), factor=1, scores="regression")
euFA <- factanal(na.omit(euComps), factor=1, scores="regression")
snaFA <- factanal(na.omit(snaComps), factor=1, scores="regression")
rm(mlgComps, euComps, snaComps)
#turn variables into factors, ordinal where necessary
raw.data.select$surveyId <- factor(raw.data.select$surveyId)
raw.data.select$lang <- factor(raw.data.select$lang)
raw.data.select$country <- factor(raw.data.select$country)
raw.data.select$sneType <- factor(raw.data.select$sneType)
raw.data.select$mlgS1 <- ordered(raw.data.select$mlgS1)
raw.data.select$mlgS2 <- ordered(raw.data.select$mlgS2)
raw.data.select$mlgS3 <- ordered(raw.data.select$mlgS3)
raw.data.select$mlgS4 <- ordered(raw.data.select$mlgS4)
raw.data.select$mlgS5 <- ordered(raw.data.select$mlgS5)
raw.data.select$mlgC1 <- ordered(raw.data.select$mlgC1)
raw.data.select$mlgC2 <- ordered(raw.data.select$mlgC2)
raw.data.select$euCompSocpol <- ordered(raw.data.select$euCompSocpol)
raw.data.select$euCompEc <- ordered(raw.data.select$euCompEc)
raw.data.select$euCompInAf <- ordered(raw.data.select$euCompInAf)
raw.data.select$euCompForAf <- ordered(raw.data.select$euCompForAf)
raw.data.select$euCompOv <- ordered(raw.data.select$euCompOv)
raw.data.select$snaCompSocpol <- ordered(raw.data.select$snaCompSocpol)
raw.data.select$snaCompEc <- ordered(raw.data.select$snaCompEc)
raw.data.select$snaCompInAf <- ordered(raw.data.select$snaCompInAf)
raw.data.select$snaCompForAf <- ordered(raw.data.select$snaCompForAf)
raw.data.select$snaCompOv <- ordered(raw.data.select$snaCompOv)
raw.data.select$snaCompC1 <- ordered(raw.data.select$snaCompC1)
raw.data.select$snaCompC2 <- ordered(raw.data.select$snaCompC2)
raw.data.select$effectiveIntOrg <- factor(raw.data.select$effectiveIntOrg)
raw.data.select$effectiveDirRel <- factor(raw.data.select$effectiveDirRel)
raw.data.select$effectiveNatDel <- factor(raw.data.select$effectiveNatDel)
raw.data.select$euFundImp.bin <- factor(raw.data.select$euFundImp.bin)
raw.data.select$represent <- factor(raw.data.select$represent)
raw.data.select$represent.bin <- factor(raw.data.select$represent.bin)
raw.data.select$norepMotiv <- ordered(raw.data.select$norepMotiv)
raw.data.select$gender <- factor(raw.data.select$gender)
raw.data.select$eduLev <- ordered(raw.data.select$eduLev)
raw.data.select$polLeftRight <- ordered(raw.data.select$polLeftRight)
raw.data.select$polIdLoc <- factor(raw.data.select$polIdLoc)
raw.data.select$polIdNat <- factor(raw.data.select$polIdNat)
raw.data.select$polIdEu <- factor(raw.data.select$polIdEu)
raw.data.select$polIdCul <- factor(raw.data.select$polIdCul)
raw.data.select$polIdIdeo <- factor(raw.data.select$polIdIdeo)
#imputations
require(Amelia)
a.out <- amelia(raw.data.select, empri=6,
idvars=c("surveyId", "lang", "norepMotiv", "weight",
"countryPop", "represent"),
cs="country",
noms=c("sneType", "effectiveIntOrg", "effectiveDirRel",
"effectiveNatDel", "euFundImp.bin", "represent.bin",
"polIdNat", "polIdLoc", "polIdEu", "polIdCul",
"polIdIdeo", "gender"),
ords=c("mlgS1", "mlgS2", "mlgS3", "mlgS4", "mlgS5", "mlgC1",
"mlgC2", "euCompSocpol", "euCompEc", "euCompInAf",
"euCompForAf", "euCompOv", "snaCompSocpol", "snaCompEc",
"snaCompInAf", "snaCompForAf", "snaCompOv", "snaCompC1",
"snaCompC2", "eduLev", "polLeftRight"))
#save imputed datasets for future replications
#dat1 <- a.out$imputations[[1]]
#save(dat1, file="SNWdata_imp1.RData")
#dat2 <- a.out$imputations[[2]]
#save(dat2, file="SNWdata_imp2.RData")
#dat3 <- a.out$imputations[[3]]
#save(dat3, file="SNWdata_imp3.RData")
#dat4 <- a.out$imputations[[4]]
#save(dat4, file="SNWdata_imp4.RData")
#dat5 <- a.out$imputations[[5]]
#save(dat5, file="SNWdata_imp5.RData")
#for replication of exact results, retrieve data from csv files
#a.out$imputations[[1]] <- load("SNWdata_imp1.RData")
#a.out$imputations[[2]] <- load("SNWdata_imp2.RData")
#a.out$imputations[[3]] <- load("SNWdata_imp3.RData")
#a.out$imputations[[4]] <- load("SNWdata_imp4.RData")
#a.out$imputations[[5]] <- load("SNWdata_imp5.RData")
#construct index variables in each imputed dataset, normalised and standardised
require(bestNormalize)
for (i in 1:5) {
a.out$imputations[[i]]$mlgInd <- rep(NA, 649)
a.out$imputations[[i]]$mlgInd <-
as.numeric(a.out$imputations[[i]]$mlgC1)*mlgFA$loadings[1] +
as.numeric(a.out$imputations[[i]]$mlgC2)*mlgFA$loadings[2] +
as.numeric(a.out$imputations[[i]]$mlgS1)*mlgFA$loadings[3] +
as.numeric(a.out$imputations[[i]]$mlgS2)*mlgFA$loadings[4] +
as.numeric(a.out$imputations[[i]]$mlgS3)*mlgFA$loadings[5] +
as.numeric(a.out$imputations[[i]]$mlgS4)*mlgFA$loadings[6] +
as.numeric(a.out$imputations[[i]]$mlgS5)*mlgFA$loadings[7]
a.out$imputations[[i]]$euInd <- rep(NA, 649)
a.out$imputations[[i]]$euInd <-
as.numeric(a.out$imputations[[i]]$euCompEc)*euFA$loadings[1] +
as.numeric(a.out$imputations[[i]]$euCompForAf)*euFA$loadings[2] +
as.numeric(a.out$imputations[[i]]$euCompInAf)*euFA$loadings[3] +
as.numeric(a.out$imputations[[i]]$euCompOv)*euFA$loadings[4] +
as.numeric(a.out$imputations[[i]]$euCompSocpol)*euFA$loadings[5]
a.out$imputations[[i]]$snaInd <- rep(NA, 649)
a.out$imputations[[i]]$snaInd <-
as.numeric(a.out$imputations[[i]]$snaCompEc)*snaFA$loadings[1] +
as.numeric(a.out$imputations[[i]]$snaCompForAf)*snaFA$loadings[2] +
as.numeric(a.out$imputations[[i]]$snaCompInAf)*snaFA$loadings[3] +
as.numeric(a.out$imputations[[i]]$snaCompOv)*snaFA$loadings[4] +
as.numeric(a.out$imputations[[i]]$snaCompSocpol)*snaFA$loadings[5]
mlgNormObj <- orderNorm(a.out$imputations[[i]]$mlgInd)
euNormObj <- orderNorm(a.out$imputations[[i]]$euInd)
snaNormObj <- orderNorm(a.out$imputations[[i]]$snaInd)
a.out$imputations[[i]]$mlgInd.norm <- scale(mlgNormObj$x.t)
a.out$imputations[[i]]$euInd.norm <- scale(euNormObj$x.t)
a.out$imputations[[i]]$snaInd.norm <- scale(snaNormObj$x.t)
}
rm(euFA, mlgFA, snaFA, euNormObj, mlgNormObj, snaNormObj)
#categorical left-right-centre variable, binary education var, relevel inst. type
for (i in 1:5) {
a.out$imputations[[i]]$LRC <- rep(NA, 649)
a.out$imputations[[i]][a.out$imputations[[i]]$polLeftRight<4, ]$LRC <- "Left"
a.out$imputations[[i]][a.out$imputations[[i]]$polLeftRight>3 &
a.out$imputations[[i]]$polLeftRight<7, ]$LRC <- "Centre"
a.out$imputations[[i]][a.out$imputations[[i]]$polLeftRight>6, ]$LRC <- "Right"
a.out$imputations[[i]]$LRC <- factor(a.out$imputations[[i]]$LRC)
a.out$imputations[[i]]$edu.bin <- ifelse(a.out$imputations[[i]]$eduLev==1, 0, 1)
a.out$imputations[[i]]$edu.bin <- factor(a.out$imputations[[i]]$edu.bin)
a.out$imputations[[i]]$represent <- factor(a.out$imputations[[i]]$represent,
levels=c("NONE", "COR", "CLRAE", "OTHER"))
}
#descriptives, using 5th imputation
attach(a.out$imputations[[5]])
mean(mlgInd)
median(mlgInd)
min(mlgInd)
max(mlgInd)
sd(mlgInd)
prop.table(table(represent.bin))
mean(euInd)
median(euInd)
sd(euInd)
min(euInd)
max(euInd)
mean(snaInd)
median(snaInd)
sd(snaInd)
min(snaInd)
max(snaInd)
prop.table(table(effectiveIntOrg))
prop.table(table(effectiveDirRel))
prop.table(table(euFundImp.bin))
prop.table(table(polIdEu))
prop.table(table(polIdCul))
prop.table(table(polIdIdeo))
prop.table(table(LRC))
mean(age)
median(age)
sd(age)
min(age)
max(age)
prop.table(table(gender))
prop.table(table(sneType))
mean(as.numeric(na.omit(norepMotiv)))
median(as.numeric(na.omit(norepMotiv)))
sd(as.numeric(na.omit(norepMotiv)))
min(as.numeric(na.omit(norepMotiv)))
max(as.numeric(na.omit(norepMotiv)))
prop.table(table(eduLev))
detach(a.out$imputations[[5]])
#run regressions in each imputed dataset
mainFormula <- as.formula("mlgInd.norm ~ country - 1 + represent.bin +
euInd.norm + snaInd.norm + effectiveIntOrg +
effectiveDirRel + euFundImp.bin + polIdEu +
polIdCul + polIdIdeo + LRC + age + gender + eduLev")
require(plm)
resultsList <- vector("list", 5)
resultsListPanel <- vector("list", 5)
for (i in 1:5) {
resultsList[[i]] <- lm(mainFormula, weights=weight,
data=a.out$imputations[[i]])
resultsListPanel[[i]] <- plm(mlgInd.norm ~ represent.bin +
euInd.norm + snaInd.norm + effectiveIntOrg +
effectiveDirRel + euFundImp.bin + polIdEu +
polIdCul + polIdIdeo + LRC + age + gender + eduLev,
effect="individual", model="within", index="country",
data=a.out$imputations[[i]])
}
#compare results from different imputations
sumTable1 <- summary(resultsList[[1]])
sumTable2 <- summary(resultsList[[2]])
sumTable3 <- summary(resultsList[[3]])
sumTable4 <- summary(resultsList[[4]])
sumTable5 <- summary(resultsList[[5]])
coefTable <- data.frame(cbind(sumTable1$coefficients[,1], sumTable2$coefficients[,1],
sumTable3$coefficients[,1], sumTable4$coefficients[,1],
sumTable5$coefficients[,1]))
coefTable$average <- (coefTable$X1+coefTable$X2+coefTable$X3+coefTable$X4+coefTable$X5)/5
pvalTable <- data.frame(cbind(sumTable1$coefficients[,4], sumTable2$coefficients[,4],
sumTable3$coefficients[,4], sumTable4$coefficients[,4],
sumTable5$coefficients[,4]))
pvalTable$average <- (pvalTable$X1+pvalTable$X2+pvalTable$X3+pvalTable$X4+pvalTable$X5)/5
pvalTable$star <- ifelse(pvalTable$average<0.001, "***",
ifelse(pvalTable$average>=0.001 & pvalTable$average<0.01, "**",
ifelse(pvalTable$average>=0.01 & pvalTable$average<0.05, "*",
ifelse(pvalTable$average>=0.05 & pvalTable$average<0.1, ".", ""))))
#imputation no.4 gives the most conservative results
#imputation no.2 gives the median conservative results
rm(sumTable1, sumTable2, sumTable3, sumTable4, sumTable5)
summary(resultsList[[2]])
summary(resultsList[[4]])
#robustness
require(lmtest)
require(sandwich)
require(car)
resids <- resultsList[[4]]$residuals
plot(resids ~ a.out$imputations[[4]]$mlgInd.norm) #heteroskedasticity, few outliers
outlierTest(resultsList[[4]]) #check obs.s no 692, 497, 196
bptest(resultsList[[4]]) #check rob. std. ettots
vif(resultsList[[4]])
coeftest(resultsList[[4]], vcov=vcovHC(resultsList[[4]], type="HC3")) #HC3 most cons.
coeftest(resultsListPanel[[4]], vcov=vcovHC(resultsListPanel[[1]], type="HC3", cluster="group"))
dataCons <- a.out$imputations[[4]]
dataCons <- dataCons[!(rownames(dataCons)=="692" |
rownames(dataCons)=="497" |
rownames(dataCons)=="196"), ]
consModel <- lm(mainFormula, weights=weight, data=dataCons)
summary(consModel) #smaller but signf. coef.
rm(resids, dataCons, consModel)
#model with institution type
modelInstType <- lm(mlgInd.norm ~ country - 1 + represent +
euInd.norm + snaInd.norm + effectiveIntOrg +
effectiveDirRel + euFundImp.bin + polIdEu +
polIdCul + polIdIdeo + LRC +
age + gender + eduLev, weights=weight,
data=a.out$imputations[[2]])
summary(modelInstType)
#subsamples with different levels of motivation
motivResults <- vector("list", 4)
for (i in 1:4) {
motivData <- a.out$imputations[[2]]
motivData <- subset(motivData, norepMotiv>i | represent.bin==1)
motivResults[[i]] <- lm(mainFormula, data=motivData) #weights dropped, small samples
}
rm(motivData)
summary(resultsList[[2]])
summary(motivResults[[1]])
summary(motivResults[[2]])
summary(motivResults[[3]])
summary(motivResults[[4]])
#simulating and plotting, using the median-conservative imputed data
require(Zelig)
z.out <- zelig(mlgInd.norm ~ country - 1 + represent.bin +
euInd.norm + snaInd.norm + effectiveIntOrg +
effectiveDirRel + euFundImp.bin + polIdEu +
polIdCul + polIdIdeo + LRC +
age + gender, model="ls",
weights=a.out$imputations[[2]]$weight,
data=a.out$imputations[[2]])
z.out <- setx(z.out, represent.bin=0)
z.out <- setx1(z.out, represent.bin=1)
z.out <- sim(z.out)
evX0 <- get_qi(z.out, xvalue="x", qi="ev")
evX1 <- get_qi(z.out, xvalue="x1", qi="ev")
expVal <- rbind(evX0, evX1)
rm(evX0, evX1)
#same for motivation differences
motivSim1 <- vector("list", 2)
motivSim2 <- vector("list", 2)
motivSim3 <- vector("list", 2)
motivSim4 <- vector("list", 2)
motivSimAll <- list(motivSim1, motivSim2, motivSim3, motivSim4)
rm(motivSim1, motivSim2, motivSim3, motivSim4)
for(i in 1:4) {
motivData <- a.out$imputations[[2]]
motivData <- subset(motivData, norepMotiv>i | represent.bin==1)
z.out <- zelig(mlgInd.norm ~ country - 1 + represent.bin +
euInd.norm + snaInd.norm + effectiveIntOrg +
effectiveDirRel + euFundImp.bin + polIdEu +
polIdCul + polIdIdeo + LRC +
age + gender, model="ls", data=motivData)
z.out <- setx(z.out, represent.bin=0)
z.out <- setx1(z.out, represent.bin=1)
z.out <- sim(z.out)
motivSimAll[[i]][[1]] <- get_qi(z.out, xvalue="x", qi="ev")
motivSimAll[[i]][[2]] <- get_qi(z.out, xvalue="x1", qi="ev")
}
expVal1 <- rbind(motivSimAll[[1]][[1]], motivSimAll[[1]][[2]])
expVal2 <- rbind(motivSimAll[[2]][[1]], motivSimAll[[2]][[2]])
expVal3 <- rbind(motivSimAll[[3]][[1]], motivSimAll[[3]][[2]])
expVal4 <- rbind(motivSimAll[[4]][[1]], motivSimAll[[4]][[2]])
comparison <- rbind(matrix(rep("w/out experience", 1000)),
matrix(rep("with experience", 1000)))
plotData <- data.frame(cbind(expVal, expVal1, expVal2, expVal3, expVal4, comparison))
colnames(plotData) <-c("expVal", "expVal1", "expVal2", "expVal3", "expVal4", "XP")
plotData$expVal <- as.numeric(plotData$expVal)
plotData$expVal1 <- as.numeric(plotData$expVal1)
plotData$expVal2 <- as.numeric(plotData$expVal2)
plotData$expVal3 <- as.numeric(plotData$expVal3)
plotData$expVal4 <- as.numeric(plotData$expVal4)
plotData$XP <- factor(plotData$XP)
rm(comparison, expVal, expVal1, expVal2, expVal3, expVal4, z.out)
require(ggplot2)
plot0box <- ggplot(plotData, aes(x=XP, y=expVal, fill=XP)) + geom_boxplot() +
labs(x="Experience", y="Expected values",
title="Boxplot") +
guides(fill=FALSE)
plot0dens <- ggplot(plotData, aes(x=expVal, fill=XP)) + geom_density(alpha=.3) +
labs(x="Expected values", y="Density",
title="Density plot") +
guides(fill=guide_legend(title="Experience")) +
theme(axis.text.y=element_blank(), axis.ticks=element_blank(),
panel.grid.major=element_blank(), panel.grid.minor=element_blank(),
legend.title = element_text(size=9), legend.text = element_text(size=7),
legend.position="bottom")
plot1dens <- ggplot(plotData, aes(x=expVal1, fill=XP)) + geom_density(alpha=.3) +
labs(x="Expected values", y="Density",
subtitle="Motivation>1") +
guides(fill=guide_legend(title="Experience")) + xlim(-0.25, 1) +
theme(axis.text.y=element_blank(), axis.ticks=element_blank(),
panel.grid.major=element_blank(), panel.grid.minor=element_blank(),
legend.title = element_text(size=9), legend.text = element_text(size=7))
plot2dens <- ggplot(plotData, aes(x=expVal2, fill=XP)) + geom_density(alpha=.3) +
labs(x="Expected values", y="Density",
subtitle="Motivation>2") +
guides(fill=guide_legend(title="Experience")) + xlim(-0.25, 1) +
theme(axis.text.y=element_blank(), axis.ticks=element_blank(),
panel.grid.major=element_blank(), panel.grid.minor=element_blank(),
legend.title = element_text(size=9), legend.text = element_text(size=7))
plot3dens <- ggplot(plotData, aes(x=expVal3, fill=XP)) + geom_density(alpha=.3) +
labs(x="Expected values", y="Density",
subtitle="Motivation>3") +
guides(fill=guide_legend(title="Experience")) + xlim(-0.25, 1) +
theme(axis.text.y=element_blank(), axis.ticks=element_blank(),
panel.grid.major=element_blank(), panel.grid.minor=element_blank(),
legend.title = element_text(size=9), legend.text = element_text(size=7))
plot4dens <- ggplot(plotData, aes(x=expVal4, fill=XP)) + geom_density(alpha=.3) +
labs(x="Expected values", y="Density",
subtitle="Motivation>4") +
guides(fill=guide_legend(title="Experience")) + xlim(-0.25, 1) +
theme(axis.text.y=element_blank(), axis.ticks=element_blank(),
panel.grid.major=element_blank(), panel.grid.minor=element_blank(),
legend.title = element_text(size=9), legend.text = element_text(size=7))
require(gridExtra)
grid.arrange(plot0box, plot0dens, ncol=2, nrow=1)
grid.arrange(plot1dens, plot2dens, plot3dens, plot4dens, nrow=4, ncol=1)
# plot regression coefficients for presentation
require(jtools)
coefPlotVars <- c("represent.bin1", "euInd.norm", "snaInd.norm",
"effectiveIntOrg1", "effectiveDirRel1", "euFundImp.bin1",
"polIdEu1", "polIdCul1", "polIdIdeo1",
"LRCLeft", "LRCRight")
names(coefPlotVars) <- c("Experience in institutions",
"Views on European institutions",
"Views on decentralisation",
"Effectiveness of IOs",
"Effectiveness of dir. rel.",
"Importance of EU funds",
"Pol. identity: European",
"Pol. identity: Ethnocultural",
"Pol. identity: Ideological",
"Pol. ideology: Left",
"Pol. ideology: Right")
plot_summs(resultsList[[2]], coefs=coefPlotVars)
coefPlotVars2 <- c("representCOR", "representCLRAE", "representOTHER",
"euInd.norm", "snaInd.norm",
"effectiveIntOrg1", "effectiveDirRel1", "euFundImp.bin1",
"polIdEu1", "polIdCul1", "polIdIdeo1",
"LRCLeft", "LRCRight")
names(coefPlotVars2) <- c("Experience in CoR",
"Experience in CLRAE",
"Experience in Others",
"Views on European institutions",
"Views on decentralisation",
"Effectiveness of IOs",
"Effectiveness of dir. rel.",
"Importance of EU funds",
"Pol. identity: European",
"Pol. identity: Ethnocultural",
"Pol. identity: Ideological",
"Pol. ideology: Left",
"Pol. ideology: Right")
plot_summs(modelInstType, coefs=coefPlotVars2)