#---
#title: "IV Regression Analysis - Case Study Attitudes toward Justice"
#author: "Conrad Ziller, University of Duisburg-Essen"
#---

packages <- c("haven", "readxl" , "dplyr", "descr", "lattice", "knitr", "ggplot2", "Hmisc", "stargazer", "interactions", "car", "lmtest", "kableExtra")
installed_packages <- packages %in% rownames(installed.packages())
if (any(installed_packages == FALSE)) {
  install.packages(packages[!installed_packages], repos = "https://cran.r-project.org/")
}
invisible(lapply(packages, library, character.only = TRUE))


#Data Preparation and bivariate Regression
data_allbus <- read_dta("data/allbus2021_reduziert.dta") 

# Here all cases are deleted, which have missing values on any of the variables. This is only recommended for reduced data sets with few variables, otherwise you would "throw out" observations based on non-relevant variables
data_allbus <- na.omit(data_allbus)

# We also delete three observations that indicated "diverse" for the gender variable, as this simplifies hypothesis generation
data_allbus<-subset(data_allbus, sex!=3)

# We now recode variables
data_allbus$income <- data_allbus$incc

data_allbus <- data_allbus %>%  
  mutate(female =
           case_when(sex == 1 ~ 0,
                     sex == 2 ~ 1))

data_allbus <- data_allbus %>%  
  mutate(east =
           case_when(eastwest == 1 ~ 0,
                     eastwest == 2 ~ 1))

data_allbus <- data_allbus %>%  
  mutate(unemployed =
           case_when(dw18 == 1 ~ 1,
                     dw18 == 2 ~ 0))

# Summarize and overview data
summary(data_allbus)
head(data_allbus, 10)


#Correlation matrix im19-21
vars <- c("im19", "im20", "im21")
cor.vars <- data_allbus[vars]
rcorr(as.matrix(cor.vars))


#Index Creation
data_allbus$morejustice <- (data_allbus$im19 + data_allbus$im20 + data_allbus$im21)/3
hist(data_allbus$morejustice)


#Read NRW data and Regression
data_nrw <- read_excel("data/inkar_nrw.xls") 
model1 <- lm(crimerate ~ 1 + unemp, data = data_nrw)
stargazer(model1, type = "text")


#Bivariate Regression
model_biv <- lm(morejustice ~ 1 + income, data = data_allbus) # 1 refers to the constant or intercept 
summary(model_biv)
stargazer(model_biv, type = "text")


# Standardized regression coefficient by hand
sdx <- sd(data_allbus$income)
sdy <- sd(data_allbus$morejustice)

beta_s <- (-0.018281*sdx)/sdy
beta_s

# Standardized regression coefficient with model
model_biv_s <- lm(scale(morejustice) ~ 1 + scale(income), data = data_allbus)
summary(model_biv_s)

# Calculate correlation
vars <- c("income", "morejustice")
cor.vars <- data_allbus[vars]
rcorr(as.matrix(cor.vars))


#Estimation and result interpretation
model_mult <- lm(morejustice ~ 1 + income + female + unemployed + east, data = data_allbus)
stargazer(model_biv, model_mult, type = "text")


#VIF
vif(model_mult)


#Normality of the residuals
predict.model <- fitted(model_mult)
error.model <- residuals(model_mult)
hist(error.model)

qqPlot(error.model)


#Homoscadasticity
scatterplot(error.model~predict.model, regLine=TRUE, smooth=FALSE, boxplots=FALSE)

bptest(model_mult)


#Non-linear effects
sc1 <- ggplot(data=data_allbus, aes(income, morejustice)) +
  geom_point() 
sc1

sc2 <- ggplot(data=data_allbus, aes(income, morejustice)) +
  geom_jitter(aes(income = morejustice), size = 0.5) + 
  geom_smooth()
sc2


#Test for non-linearity
data_allbus$income_squared <-  data_allbus$income*data_allbus$income
model_nlin <- lm(morejustice ~ 1 + income + income_squared + female + unemployed + east, data = data_allbus)
stargazer(model_biv, model_mult, model_nlin, type = "text")


#Interaction Effects
# Letting R know that the two nominal variables "female" and "east" are indeed nominal or "factor variables" is important for the graphical representation of the interaction below
data_allbus$female <- factor(data_allbus$female)
data_allbus$east <- factor(data_allbus$east)

# The interactions can be specified with ":" or "*".
model_interaktion <- lm(morejustice ~ 1 + income + unemployed + female + east + female : east , data = data_allbus)

stargazer(model_interaktion, type = "text")


#Interaction Plot
cat_plot(model_interaktion, pred = female, modx = east, geom = "line", point.shape = TRUE,
         vary.lty = TRUE)