Use the below codes to solve the questions: I am expecting a R file and a word dUse the below codes to solve the questions: I am expecting a R file and a word document
if (!require(mlba)) {
library(devtools)
install_github(“gedeck/mlba/mlba”, force=TRUE)
}
options(scipen=999)
# Logistic Regression
## The Logistic Regression Model
library(ggplot2)
library(gridExtra)
p <- seq="seq">%
select(-c(ID, ZIP.Code)) %>% # Drop ID and zip code columns.
mutate(
Education = factor(Education, levels=c(1:3),
labels=c(“Undergrad”, “Graduate”, “Advanced/Professional”)),
Personal.Loan = factor(Personal.Loan, levels=c(0, 1),
labels=c(“No”, “Yes”))
)
# partition data
set.seed(2)
idx <- caret::createDataPartition="caret::createDataPartition"> 0.5, 1, 0)
)
# Z evalulate performance ##################################################################
# predict training set
logit.reg.pred.train <- predict="predict"> 0.5, 1, 0), levels = c(0, 1), labels = c(“No”, “Yes”))
# accuracy train
confusionMatrix(train.df$Personal.Loan ,predicted.train)
# predict holdout
predicted.holdout <- factor="factor"> 0.5, 1, 0), levels = c(0, 1), labels = c(“No”, “Yes”))
# accuracy holdout
confusionMatrix(holdout.df$Personal.Loan ,predicted.holdout)
#########################################################################################
library(gains)
actual <- ifelse="ifelse">%
group_by(Bin) %>%
summarize(MeanResidual = mean(ActualOutcome – PredictedProb))
# Visualize mean residuals
barplot(binned_residuals_summary$MeanResidual, names.arg = binned_residuals_summary$Bin,
xlab = “Bin”, ylab = “Mean Residual”, main = “Mean Residuals by Bin”)
# Load the ggplot2 package if not already loaded
library(ggplot2)
# Assuming you have calculated binned residuals as describe
d earlier
# Create a scatter plot of binned residuals
binned_residuals_summary %>% ggplot(aes(x = Bin, y = MeanResidual)) +
geom_col() +
labs(x = “Predicted Probabilities”, y = “Binned Residuals”) +
ggtitle(“Scatter Plot of Binned Residuals”) +
theme_minimal()
library(arm)
# confidence bands
binnedplot(predicted_probs,residuals)
STIMULATION:
library(ggplot2)
library(reshape)
library(glmnet)
rm(list=ls())
set.seed(1)
x = rnorm(1000, sd=3) # A random variable
hist(x)
summary(x)
p = 1/(1+exp(-(1+10*x)))
hist(p)
plot(x,p)
y = rbinom(1000,1,p) # bernoulli response variable
plot(x,y)
# two approaches to set outcome categories
# 1 cutoff
#y = ifelse(p>=0.5,1,0)
# bernoulli response
#set.seed(1)
#prob <- c="c">% ggplot(aes(x)) +
geom_line(aes(y=p)) +
geom_point(aes(y=y,color=factor(y)))+
theme_bw()
# plot category
data.plot %>% ggplot(aes(x,y,color=y)) +
geom_point() +
theme_bw()
# plot linear regression
data.plot %>% ggplot(aes(x,y,color=y)) +
geom_point() +
geom_smooth(method=’lm’,se=FALSE) +
theme_bw()
# plot glm
data.plot %>% ggplot(aes(x,y,color=y)) +
geom_point() +
geom_smooth(method=’glm’,
method.args = list(family = “binomial”),
se=FALSE) +
theme_bw()
# get coefficients through linear regression
logit = log(p/(1-p))
plot(x,logit)
data = data.frame(cbind(logit,x))
summary(data)
# remove infinite in all columns:
data = data %>%
filter_all(all_vars(!is.infinite(.)))
summary(data)
logit.model = lm(logit~x,data)
logit.model
# get coefficients through logistic regression via glm
data.glm = t(rbind(as.numeric(y),as.numeric(x)))
data.glm = data.frame(data.glm)
names(data.glm) = c(“y”,”x”)
#data.glm = data.glm %>% rename(y=X1,x=X2)
data.glm$y = as.factor(data.glm$y)
summary(data.glm)
plot(data.glm$x,data.glm$y)
set.seed(1)
glm.model <- glm="glm">