Data

We’ll use ebeer and telco data sets from before. We’ll drop the ID column, and select customers that received a mailing only.

rm(list=ls())
library(tree)
library(dplyr)
library(janitor)
library(car)
library(pROC)
library(ranger)
library(glmnet)
library(readr)

options("scipen"=200, "digits"=3)

Ebeer

# set working directory using however you want to folder where data is stored.  I'll use 
ebeer <- read_csv("ebeer.csv")

# load ebeer, remove account number column
ebeer<-ebeer[-c(1)]

Training-Test Samples:

# drop the ID column, select customers that received a mailing only
ebeer_test<-subset(ebeer, mailing ==1)

# create ebeer rollout data
ebeer_rollout<-subset(ebeer, mailing ==0)

# rename ebeer_test ebeer
ebeer<-ebeer_test

Telco

# load telco
telco <- read_csv("telco.csv")
telco <- strings2factors(telco)
## 
## The following character variables were converted to factors
##  gender Partner Dependents PhoneService MultipleLines InternetService OnlineSecurity OnlineBackup DeviceProtection TechSupport StreamingTV StreamingMovies Contract PaperlessBilling PaymentMethod Churn
# drop ID column, divide Total charges by 1000
telco<-subset(telco, select=-customerID)
telco$TotalCharges<-telco$TotalCharges/1000

Training-Test:

# create 70% test and 30% holdout sample
set.seed(19103)
n <- nrow(telco)
sample <- sample(c(TRUE, FALSE), n, replace=TRUE, prob=c(0.7, 0.3))
telco.test <- telco[sample, ]
telco.holdout <- telco[!sample, ]

#call test telco, and full data set telco.all
telco.all<-telco
telco<-telco.test

Forward selection

The biggest model we consider is with all of the variables in churn plus all the two-way interactions with the variable tenure.

full <- glm(Churn ~ . + tenure:(.), data=telco, family = "binomial")

summary(full)
## 
## Call:
## glm(formula = Churn ~ . + tenure:(.), family = "binomial", data = telco)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -2.041  -0.679  -0.270   0.663   3.389  
## 
## Coefficients:
##                                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                                  2.11968    1.60886    1.32  0.18767    
## genderMale                                  -0.19315    0.11501   -1.68  0.09307 .  
## SeniorCitizen                                0.28498    0.15931    1.79  0.07364 .  
## PartnerYes                                  -0.03859    0.14764   -0.26  0.79383    
## DependentsYes                               -0.30125    0.16618   -1.81  0.06987 .  
## tenure                                      -0.07764    0.04432   -1.75  0.07980 .  
## PhoneServiceYes                              1.23723    1.29950    0.95  0.34106    
## MultipleLinesYes                             0.77555    0.35704    2.17  0.02984 *  
## InternetServiceFiber optic                   2.77993    1.60230    1.73  0.08275 .  
## InternetServiceNo                           -2.76954    1.61687   -1.71  0.08673 .  
## OnlineSecurityYes                            0.02269    0.35709    0.06  0.94933    
## OnlineBackupYes                              0.07578    0.35252    0.21  0.82979    
## DeviceProtectionYes                          0.67997    0.35579    1.91  0.05599 .  
## TechSupportYes                               0.04285    0.35803    0.12  0.90474    
## StreamingTVYes                               0.89952    0.65311    1.38  0.16842    
## StreamingMoviesYes                           0.92049    0.65598    1.40  0.16055    
## ContractOne year                            -1.22765    0.29419   -4.17  0.00003 ***
## ContractTwo year                            -2.96166    0.85564   -3.46  0.00054 ***
## PaperlessBillingYes                          0.35041    0.12769    2.74  0.00606 ** 
## PaymentMethodCredit card (automatic)        -0.02210    0.23438   -0.09  0.92488    
## PaymentMethodElectronic check                0.12510    0.18612    0.67  0.50147    
## PaymentMethodMailed check                   -0.06152    0.20751   -0.30  0.76689    
## MonthlyCharges                              -0.07838    0.06377   -1.23  0.21907    
## TotalCharges                                 0.13415    0.65472    0.20  0.83765    
## genderMale:tenure                            0.00755    0.00370    2.04  0.04133 *  
## SeniorCitizen:tenure                        -0.00335    0.00465   -0.72  0.47063    
## PartnerYes:tenure                            0.00554    0.00459    1.21  0.22744    
## DependentsYes:tenure                         0.00437    0.00479    0.91  0.36116    
## tenure:PhoneServiceYes                      -0.03561    0.03594   -0.99  0.32178    
## tenure:MultipleLinesYes                     -0.00858    0.01024   -0.84  0.40193    
## tenure:InternetServiceFiber optic           -0.03176    0.04395   -0.72  0.46984    
## tenure:InternetServiceNo                     0.02563    0.04552    0.56  0.57345    
## tenure:OnlineSecurityYes                    -0.00703    0.00997   -0.71  0.48047    
## tenure:OnlineBackupYes                      -0.00321    0.00987   -0.32  0.74531    
## tenure:DeviceProtectionYes                  -0.01604    0.00987   -1.63  0.10406    
## tenure:TechSupportYes                       -0.00870    0.00989   -0.88  0.37902    
## tenure:StreamingTVYes                       -0.00543    0.01807   -0.30  0.76387    
## tenure:StreamingMoviesYes                   -0.00726    0.01802   -0.40  0.68687    
## tenure:ContractOne year                      0.01247    0.00692    1.80  0.07134 .  
## tenure:ContractTwo year                      0.02349    0.01438    1.63  0.10229    
## tenure:PaperlessBillingYes                  -0.00424    0.00437   -0.97  0.33244    
## tenure:PaymentMethodCredit card (automatic) -0.00270    0.00609   -0.44  0.65695    
## tenure:PaymentMethodElectronic check         0.00386    0.00514    0.75  0.45336    
## tenure:PaymentMethodMailed check            -0.01138    0.00869   -1.31  0.19039    
## tenure:MonthlyCharges                        0.00105    0.00186    0.56  0.57317    
## tenure:TotalCharges                          0.00369    0.00183    2.02  0.04315 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 5697.6  on 4948  degrees of freedom
## Residual deviance: 3997.5  on 4903  degrees of freedom
## AIC: 4090
## 
## Number of Fisher Scoring iterations: 7

We do forward selection. The algorithm uses AIC, but we could modify it by including k=0 to consider deviance (with no penalty).

You can see it going through all the models. The <none> in the output below shows the best model (lowest AIC) at the current step.

# intercept only
null <- glm(Churn ~ 1, data=telco, family = "binomial")
start_time <- Sys.time()
fwd.model <- step(null, direction = 'forward', scope=formula(full), keep = function(model, aic) list(model = model, aic = aic))
## Start:  AIC=5700
## Churn ~ 1
## 
##                    Df Deviance  AIC
## + Contract          2     4693 4699
## + tenure            1     5020 5024
## + InternetService   2     5160 5166
## + PaymentMethod     3     5275 5283
## + TotalCharges      1     5483 5487
## + MonthlyCharges    1     5512 5516
## + PaperlessBilling  1     5519 5523
## + TechSupport       1     5536 5540
## + OnlineSecurity    1     5539 5543
## + Dependents        1     5561 5565
## + SeniorCitizen     1     5590 5594
## + Partner           1     5599 5603
## + OnlineBackup      1     5655 5659
## + StreamingTV       1     5673 5677
## + StreamingMovies   1     5678 5682
## + DeviceProtection  1     5679 5683
## + MultipleLines     1     5690 5694
## <none>                    5698 5700
## + gender            1     5697 5701
## + PhoneService      1     5697 5701
## 
## Step:  AIC=4699
## Churn ~ Contract
## 
##                    Df Deviance  AIC
## + InternetService   2     4413 4423
## + MonthlyCharges    1     4503 4511
## + PaymentMethod     3     4548 4560
## + PaperlessBilling  1     4611 4619
## + StreamingTV       1     4614 4622
## + StreamingMovies   1     4618 4626
## + tenure            1     4630 4638
## + SeniorCitizen     1     4649 4657
## + MultipleLines     1     4655 4663
## + OnlineSecurity    1     4658 4666
## + Dependents        1     4661 4669
## + TechSupport       1     4671 4679
## + DeviceProtection  1     4686 4694
## + OnlineBackup      1     4690 4698
## + Partner           1     4691 4699
## <none>                    4693 4699
## + TotalCharges      1     4692 4700
## + PhoneService      1     4693 4701
## + gender            1     4693 4701
## 
## Step:  AIC=4423
## Churn ~ Contract + InternetService
## 
##                    Df Deviance  AIC
## + tenure            1     4226 4238
## + TotalCharges      1     4286 4298
## + OnlineSecurity    1     4358 4370
## + PaymentMethod     3     4360 4376
## + TechSupport       1     4373 4385
## + OnlineBackup      1     4375 4387
## + Dependents        1     4395 4407
## + PaperlessBilling  1     4398 4410
## + MonthlyCharges    1     4401 4413
## + Partner           1     4402 4414
## + PhoneService      1     4403 4415
## + StreamingTV       1     4405 4417
## + SeniorCitizen     1     4406 4418
## + StreamingMovies   1     4406 4418
## + DeviceProtection  1     4410 4422
## <none>                    4413 4423
## + MultipleLines     1     4412 4424
## + gender            1     4413 4425
## 
## Step:  AIC=4238
## Churn ~ Contract + InternetService + tenure
## 
##                          Df Deviance  AIC
## + PaymentMethod           3     4190 4208
## + StreamingMovies         1     4197 4211
## + OnlineSecurity          1     4198 4212
## + StreamingTV             1     4198 4212
## + TechSupport             1     4203 4217
## + tenure:Contract         2     4202 4218
## + PaperlessBilling        1     4205 4219
## + TotalCharges            1     4209 4223
## + SeniorCitizen           1     4210 4224
## + PhoneService            1     4211 4225
## + Dependents              1     4214 4228
## + MultipleLines           1     4215 4229
## + tenure:InternetService  2     4219 4235
## + OnlineBackup            1     4221 4235
## <none>                          4226 4238
## + DeviceProtection        1     4224 4238
## + MonthlyCharges          1     4225 4239
## + Partner                 1     4226 4240
## + gender                  1     4226 4240
## 
## Step:  AIC=4208
## Churn ~ Contract + InternetService + tenure + PaymentMethod
## 
##                          Df Deviance  AIC
## + OnlineSecurity          1     4166 4186
## + StreamingMovies         1     4168 4188
## + StreamingTV             1     4168 4188
## + tenure:Contract         2     4167 4189
## + TechSupport             1     4171 4191
## + TotalCharges            1     4172 4192
## + PaperlessBilling        1     4173 4193
## + PhoneService            1     4178 4198
## + SeniorCitizen           1     4178 4198
## + tenure:PaymentMethod    3     4175 4199
## + MultipleLines           1     4180 4200
## + Dependents              1     4181 4201
## + tenure:InternetService  2     4181 4203
## + OnlineBackup            1     4186 4206
## <none>                          4190 4208
## + DeviceProtection        1     4188 4208
## + MonthlyCharges          1     4189 4209
## + gender                  1     4190 4210
## + Partner                 1     4190 4210
## 
## Step:  AIC=4186
## Churn ~ Contract + InternetService + tenure + PaymentMethod + 
##     OnlineSecurity
## 
##                          Df Deviance  AIC
## + TotalCharges            1     4144 4166
## + tenure:Contract         2     4143 4167
## + StreamingMovies         1     4145 4167
## + StreamingTV             1     4146 4168
## + TechSupport             1     4150 4172
## + PaperlessBilling        1     4151 4173
## + SeniorCitizen           1     4156 4178
## + PhoneService            1     4156 4178
## + tenure:PaymentMethod    3     4152 4178
## + MultipleLines           1     4156 4178
## + tenure:InternetService  2     4156 4180
## + Dependents              1     4158 4180
## + MonthlyCharges          1     4163 4185
## + OnlineBackup            1     4163 4185
## <none>                          4166 4186
## + tenure:OnlineSecurity   1     4165 4187
## + DeviceProtection        1     4165 4187
## + Partner                 1     4166 4188
## + gender                  1     4166 4188
## 
## Step:  AIC=4166
## Churn ~ Contract + InternetService + tenure + PaymentMethod + 
##     OnlineSecurity + TotalCharges
## 
##                          Df Deviance  AIC
## + TechSupport             1     4121 4145
## + PhoneService            1     4126 4150
## + tenure:TotalCharges     1     4129 4153
## + PaperlessBilling        1     4129 4153
## + tenure:Contract         2     4128 4154
## + StreamingMovies         1     4133 4157
## + SeniorCitizen           1     4133 4157
## + StreamingTV             1     4133 4157
## + Dependents              1     4136 4160
## + OnlineBackup            1     4137 4161
## + MultipleLines           1     4138 4162
## + tenure:PaymentMethod    3     4137 4165
## <none>                          4144 4166
## + tenure:InternetService  2     4140 4166
## + tenure:OnlineSecurity   1     4143 4167
## + MonthlyCharges          1     4144 4168
## + DeviceProtection        1     4144 4168
## + Partner                 1     4144 4168
## + gender                  1     4144 4168
## 
## Step:  AIC=4145
## Churn ~ Contract + InternetService + tenure + PaymentMethod + 
##     OnlineSecurity + TotalCharges + TechSupport
## 
##                          Df Deviance  AIC
## + PhoneService            1     4103 4129
## + PaperlessBilling        1     4106 4132
## + tenure:Contract         2     4106 4134
## + StreamingMovies         1     4108 4134
## + StreamingTV             1     4109 4135
## + tenure:TotalCharges     1     4110 4136
## + SeniorCitizen           1     4112 4138
## + OnlineBackup            1     4114 4140
## + Dependents              1     4114 4140
## + tenure:InternetService  2     4114 4142
## + MultipleLines           1     4116 4142
## <none>                          4121 4145
## + tenure:PaymentMethod    3     4115 4145
## + MonthlyCharges          1     4120 4146
## + tenure:OnlineSecurity   1     4120 4146
## + DeviceProtection        1     4120 4146
## + tenure:TechSupport      1     4120 4146
## + gender                  1     4120 4146
## + Partner                 1     4121 4147
## 
## Step:  AIC=4129
## Churn ~ Contract + InternetService + tenure + PaymentMethod + 
##     OnlineSecurity + TotalCharges + TechSupport + PhoneService
## 
##                          Df Deviance  AIC
## + tenure:PhoneService     1     4088 4116
## + PaperlessBilling        1     4089 4117
## + tenure:Contract         2     4089 4119
## + MonthlyCharges          1     4092 4120
## + StreamingTV             1     4093 4121
## + tenure:InternetService  2     4091 4121
## + StreamingMovies         1     4094 4122
## + MultipleLines           1     4095 4123
## + OnlineBackup            1     4095 4123
## + tenure:TotalCharges     1     4095 4123
## + SeniorCitizen           1     4096 4124
## + Dependents              1     4097 4125
## <none>                          4103 4129
## + tenure:PaymentMethod    3     4098 4130
## + tenure:OnlineSecurity   1     4103 4131
## + DeviceProtection        1     4103 4131
## + tenure:TechSupport      1     4103 4131
## + Partner                 1     4103 4131
## + gender                  1     4103 4131
## 
## Step:  AIC=4116
## Churn ~ Contract + InternetService + tenure + PaymentMethod + 
##     OnlineSecurity + TotalCharges + TechSupport + PhoneService + 
##     tenure:PhoneService
## 
##                          Df Deviance  AIC
## + PaperlessBilling        1     4075 4105
## + tenure:Contract         2     4076 4108
## + MultipleLines           1     4079 4109
## + OnlineBackup            1     4079 4109
## + SeniorCitizen           1     4082 4112
## + tenure:TotalCharges     1     4082 4112
## + tenure:InternetService  2     4080 4112
## + StreamingMovies         1     4083 4113
## + Dependents              1     4083 4113
## + StreamingTV             1     4083 4113
## + MonthlyCharges          1     4083 4113
## <none>                          4088 4116
## + DeviceProtection        1     4088 4118
## + tenure:TechSupport      1     4088 4118
## + Partner                 1     4088 4118
## + tenure:OnlineSecurity   1     4088 4118
## + gender                  1     4088 4118
## + tenure:PaymentMethod    3     4085 4119
## 
## Step:  AIC=4105
## Churn ~ Contract + InternetService + tenure + PaymentMethod + 
##     OnlineSecurity + TotalCharges + TechSupport + PhoneService + 
##     PaperlessBilling + tenure:PhoneService
## 
##                           Df Deviance  AIC
## + OnlineBackup             1     4066 4098
## + tenure:Contract          2     4064 4098
## + MultipleLines            1     4067 4099
## + tenure:TotalCharges      1     4069 4101
## + SeniorCitizen            1     4070 4102
## + StreamingMovies          1     4071 4103
## + Dependents               1     4071 4103
## + tenure:InternetService   2     4069 4103
## + StreamingTV              1     4071 4103
## + MonthlyCharges           1     4071 4103
## <none>                           4075 4105
## + tenure:PaperlessBilling  1     4074 4106
## + DeviceProtection         1     4075 4107
## + tenure:TechSupport       1     4075 4107
## + Partner                  1     4075 4107
## + tenure:OnlineSecurity    1     4075 4107
## + gender                   1     4075 4107
## + tenure:PaymentMethod     3     4072 4108
## 
## Step:  AIC=4098
## Churn ~ Contract + InternetService + tenure + PaymentMethod + 
##     OnlineSecurity + TotalCharges + TechSupport + PhoneService + 
##     PaperlessBilling + OnlineBackup + tenure:PhoneService
## 
##                           Df Deviance  AIC
## + tenure:Contract          2     4055 4091
## + MultipleLines            1     4057 4091
## + MonthlyCharges           1     4058 4092
## + tenure:InternetService   2     4057 4093
## + SeniorCitizen            1     4060 4094
## + tenure:TotalCharges      1     4060 4094
## + Dependents               1     4061 4095
## + StreamingMovies          1     4062 4096
## + StreamingTV              1     4062 4096
## <none>                           4066 4098
## + tenure:OnlineBackup      1     4065 4099
## + tenure:PaperlessBilling  1     4065 4099
## + tenure:TechSupport       1     4065 4099
## + DeviceProtection         1     4066 4100
## + Partner                  1     4066 4100
## + gender                   1     4066 4100
## + tenure:OnlineSecurity    1     4066 4100
## + tenure:PaymentMethod     3     4063 4101
## 
## Step:  AIC=4091
## Churn ~ Contract + InternetService + tenure + PaymentMethod + 
##     OnlineSecurity + TotalCharges + TechSupport + PhoneService + 
##     PaperlessBilling + OnlineBackup + tenure:PhoneService + Contract:tenure
## 
##                           Df Deviance  AIC
## + MonthlyCharges           1     4043 4081
## + MultipleLines            1     4045 4083
## + StreamingMovies          1     4049 4087
## + SeniorCitizen            1     4049 4087
## + StreamingTV              1     4049 4087
## + tenure:InternetService   2     4049 4089
## + Dependents               1     4051 4089
## <none>                           4055 4091
## + tenure:TechSupport       1     4054 4092
## + tenure:PaperlessBilling  1     4054 4092
## + tenure:TotalCharges      1     4054 4092
## + tenure:OnlineBackup      1     4055 4093
## + tenure:OnlineSecurity    1     4055 4093
## + Partner                  1     4055 4093
## + gender                   1     4055 4093
## + DeviceProtection         1     4055 4093
## + tenure:PaymentMethod     3     4051 4093
## 
## Step:  AIC=4081
## Churn ~ Contract + InternetService + tenure + PaymentMethod + 
##     OnlineSecurity + TotalCharges + TechSupport + PhoneService + 
##     PaperlessBilling + OnlineBackup + MonthlyCharges + tenure:PhoneService + 
##     Contract:tenure
## 
##                           Df Deviance  AIC
## + SeniorCitizen            1     4038 4078
## + tenure:TotalCharges      1     4038 4078
## + MultipleLines            1     4039 4079
## + Dependents               1     4040 4080
## <none>                           4043 4081
## + DeviceProtection         1     4041 4081
## + tenure:OnlineBackup      1     4042 4082
## + tenure:PaymentMethod     3     4039 4083
## + tenure:TechSupport       1     4043 4083
## + tenure:PaperlessBilling  1     4043 4083
## + Partner                  1     4043 4083
## + tenure:OnlineSecurity    1     4043 4083
## + tenure:MonthlyCharges    1     4043 4083
## + gender                   1     4043 4083
## + StreamingMovies          1     4043 4083
## + StreamingTV              1     4043 4083
## + tenure:InternetService   2     4043 4085
## 
## Step:  AIC=4078
## Churn ~ Contract + InternetService + tenure + PaymentMethod + 
##     OnlineSecurity + TotalCharges + TechSupport + PhoneService + 
##     PaperlessBilling + OnlineBackup + MonthlyCharges + SeniorCitizen + 
##     tenure:PhoneService + Contract:tenure
## 
##                           Df Deviance  AIC
## + tenure:TotalCharges      1     4033 4075
## + MultipleLines            1     4034 4076
## + Dependents               1     4036 4078
## <none>                           4038 4078
## + DeviceProtection         1     4036 4078
## + tenure:OnlineBackup      1     4036 4078
## + tenure:PaymentMethod     3     4033 4079
## + SeniorCitizen:tenure     1     4037 4079
## + tenure:TechSupport       1     4037 4079
## + tenure:PaperlessBilling  1     4037 4079
## + tenure:OnlineSecurity    1     4038 4080
## + Partner                  1     4038 4080
## + StreamingTV              1     4038 4080
## + gender                   1     4038 4080
## + tenure:MonthlyCharges    1     4038 4080
## + StreamingMovies          1     4038 4080
## + tenure:InternetService   2     4037 4081
## 
## Step:  AIC=4075
## Churn ~ Contract + InternetService + tenure + PaymentMethod + 
##     OnlineSecurity + TotalCharges + TechSupport + PhoneService + 
##     PaperlessBilling + OnlineBackup + MonthlyCharges + SeniorCitizen + 
##     tenure:PhoneService + Contract:tenure + tenure:TotalCharges
## 
##                           Df Deviance  AIC
## + MultipleLines            1     4028 4072
## + Dependents               1     4030 4074
## <none>                           4033 4075
## + DeviceProtection         1     4031 4075
## + tenure:PaymentMethod     3     4028 4076
## + SeniorCitizen:tenure     1     4032 4076
## + tenure:OnlineBackup      1     4032 4076
## + tenure:TechSupport       1     4032 4076
## + tenure:PaperlessBilling  1     4032 4076
## + tenure:OnlineSecurity    1     4032 4076
## + Partner                  1     4033 4077
## + StreamingTV              1     4033 4077
## + gender                   1     4033 4077
## + tenure:MonthlyCharges    1     4033 4077
## + StreamingMovies          1     4033 4077
## + tenure:InternetService   2     4032 4078
## 
## Step:  AIC=4072
## Churn ~ Contract + InternetService + tenure + PaymentMethod + 
##     OnlineSecurity + TotalCharges + TechSupport + PhoneService + 
##     PaperlessBilling + OnlineBackup + MonthlyCharges + SeniorCitizen + 
##     MultipleLines + tenure:PhoneService + Contract:tenure + tenure:TotalCharges
## 
##                           Df Deviance  AIC
## + Dependents               1     4026 4072
## <none>                           4028 4072
## + DeviceProtection         1     4027 4073
## + SeniorCitizen:tenure     1     4027 4073
## + StreamingTV              1     4027 4073
## + tenure:PaymentMethod     3     4023 4073
## + tenure:TechSupport       1     4028 4074
## + tenure:OnlineBackup      1     4028 4074
## + tenure:PaperlessBilling  1     4028 4074
## + StreamingMovies          1     4028 4074
## + tenure:MultipleLines     1     4028 4074
## + tenure:OnlineSecurity    1     4028 4074
## + Partner                  1     4028 4074
## + gender                   1     4028 4074
## + tenure:MonthlyCharges    1     4028 4074
## + tenure:InternetService   2     4028 4076
## 
## Step:  AIC=4072
## Churn ~ Contract + InternetService + tenure + PaymentMethod + 
##     OnlineSecurity + TotalCharges + TechSupport + PhoneService + 
##     PaperlessBilling + OnlineBackup + MonthlyCharges + SeniorCitizen + 
##     MultipleLines + Dependents + tenure:PhoneService + Contract:tenure + 
##     tenure:TotalCharges
## 
##                           Df Deviance  AIC
## + Dependents:tenure        1     4024 4072
## <none>                           4026 4072
## + DeviceProtection         1     4025 4073
## + Partner                  1     4025 4073
## + SeniorCitizen:tenure     1     4025 4073
## + StreamingTV              1     4025 4073
## + tenure:PaymentMethod     3     4021 4073
## + tenure:TechSupport       1     4025 4073
## + tenure:OnlineBackup      1     4026 4074
## + tenure:PaperlessBilling  1     4026 4074
## + tenure:MultipleLines     1     4026 4074
## + StreamingMovies          1     4026 4074
## + tenure:OnlineSecurity    1     4026 4074
## + gender                   1     4026 4074
## + tenure:MonthlyCharges    1     4026 4074
## + tenure:InternetService   2     4026 4076
## 
## Step:  AIC=4072
## Churn ~ Contract + InternetService + tenure + PaymentMethod + 
##     OnlineSecurity + TotalCharges + TechSupport + PhoneService + 
##     PaperlessBilling + OnlineBackup + MonthlyCharges + SeniorCitizen + 
##     MultipleLines + Dependents + tenure:PhoneService + Contract:tenure + 
##     tenure:TotalCharges + tenure:Dependents
## 
##                           Df Deviance  AIC
## <none>                           4024 4072
## + Partner                  1     4023 4073
## + DeviceProtection         1     4023 4073
## + tenure:PaymentMethod     3     4019 4073
## + StreamingTV              1     4023 4073
## + tenure:TechSupport       1     4023 4073
## + tenure:PaperlessBilling  1     4023 4073
## + tenure:OnlineBackup      1     4023 4073
## + SeniorCitizen:tenure     1     4023 4073
## + StreamingMovies          1     4023 4073
## + tenure:OnlineSecurity    1     4023 4073
## + tenure:MultipleLines     1     4024 4074
## + gender                   1     4024 4074
## + tenure:MonthlyCharges    1     4024 4074
## + tenure:InternetService   2     4024 4076
end_time <- Sys.time()
t <- end_time-start_time

Here is the table showing the variables added per step.

fwd.model$anova
##                     Step Df Deviance Resid. Df Resid. Dev  AIC
## 1                        NA       NA      4948       5698 5700
## 2             + Contract -2  1004.41      4946       4693 4699
## 3      + InternetService -2   279.93      4944       4413 4423
## 4               + tenure -1   187.48      4943       4226 4238
## 5        + PaymentMethod -3    35.71      4940       4190 4208
## 6       + OnlineSecurity -1    23.61      4939       4166 4186
## 7         + TotalCharges -1    22.61      4938       4144 4166
## 8          + TechSupport -1    23.36      4937       4121 4145
## 9         + PhoneService -1    17.64      4936       4103 4129
## 10 + tenure:PhoneService -1    14.74      4935       4088 4116
## 11    + PaperlessBilling -1    12.70      4934       4075 4105
## 12        + OnlineBackup -1     9.76      4933       4066 4098
## 13     + tenure:Contract -2    10.69      4931       4055 4091
## 14      + MonthlyCharges -1    11.57      4930       4043 4081
## 15       + SeniorCitizen -1     5.59      4929       4038 4078
## 16 + tenure:TotalCharges -1     5.24      4928       4033 4075
## 17       + MultipleLines -1     4.51      4927       4028 4072
## 18          + Dependents -1     2.01      4926       4026 4072
## 19   + Dependents:tenure -1     2.25      4925       4024 4072

Note not all coefficients were added.

## Coeficients included in Forward Selection: 24
## Total number of coeficients: 46

Now let’s test the sequence of models with varying numbers of covariates using cross validation. We focus on OOS R2:

M <- dim(fwd.model$keep)[2]

OOS=data.frame(R2=rep(NA,M), rank=rep(NA, M))


## pred must be probabilities (0<pred<1) for binomial
deviance <- function(y, pred, family=c("gaussian","binomial")){
    family <- match.arg(family)
    if(family=="gaussian"){
      return( sum( (y-pred)^2 ) )
    }else{
      if(is.factor(y)) y <- as.numeric(y)>1
      return( -2*sum( y*log(pred) + (1-y)*log(1-pred) ) )
    }
  }

## get null devaince too, and return R2
  R2 <- function(y, pred, family=c("gaussian","binomial")){
  fam <- match.arg(family)
  if(fam=="binomial"){
    if(is.factor(y)){ y <- as.numeric(y)>1 }
  }
  dev <- deviance(y, pred, family=fam)
  dev0 <- deviance(y, mean(y), family=fam)
  return(1-dev/dev0)
  }  

for(k in 1:M){

pred = predict(fwd.model$keep[["model",k]], newdata=telco.holdout, type = "response")

OOS$R2[k]<-R2(y = telco.holdout$Churn,pred=pred, family="binomial")
OOS$rank[k]<-fwd.model$keep[["model",k]]$rank
  
  
}
ax=c(1:max(OOS$rank))
par(mai=c(.9,.8,.2,.2))
plot(x=OOS$rank, y = OOS$R2, type="b", ylab=expression(paste("Out-of-Sample R"^"2")), xlab="# of model parameters estimated (rank)", xaxt="n")
axis(1, at=ax, labels=ax)

max.idx <- which.max(OOS$R2)

OOS$rank[max.idx]
## [1] 23
abline(v=OOS$rank[max.idx], lty=3)

model <- fwd.model$keep[["model",max.idx]]

From the forward selection model path, the model with 23 coefficients has the best OOS R2.

We then “choose” this model, and re-estimate it using the entire dataset. The resulting coefficients are:

model_full_data<-glm(model$formula, data = telco.all, family = binomial(link = "logit"))

summary(model_full_data)
## 
## Call:
## glm(formula = model$formula, family = binomial(link = "logit"), 
##     data = telco.all)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -2.001  -0.691  -0.285   0.684   3.404  
## 
## Coefficients:
##                                      Estimate Std. Error z value        Pr(>|z|)    
## (Intercept)                          -0.52328    0.23680   -2.21          0.0271 *  
## ContractOne year                     -1.31286    0.24392   -5.38 0.0000000735218 ***
## ContractTwo year                     -2.58618    0.61182   -4.23 0.0000236777268 ***
## InternetServiceFiber optic            0.34007    0.15409    2.21          0.0273 *  
## InternetServiceNo                    -0.24861    0.19895   -1.25          0.2114    
## tenure                               -0.04705    0.00662   -7.11 0.0000000000012 ***
## PaymentMethodCredit card (automatic) -0.09320    0.11373   -0.82          0.4125    
## PaymentMethodElectronic check         0.29825    0.09460    3.15          0.0016 ** 
## PaymentMethodMailed check            -0.06041    0.11586   -0.52          0.6021    
## OnlineSecurityYes                    -0.47927    0.08646   -5.54 0.0000000297124 ***
## TotalCharges                         -0.00294    0.14259   -0.02          0.9836    
## TechSupportYes                       -0.44905    0.09063   -4.96 0.0000007232129 ***
## PhoneServiceYes                      -0.81891    0.20049   -4.08 0.0000441469786 ***
## PaperlessBillingYes                   0.33734    0.07491    4.50 0.0000066918015 ***
## OnlineBackupYes                      -0.25933    0.08017   -3.23          0.0012 ** 
## MonthlyCharges                        0.02049    0.00520    3.94 0.0000810908098 ***
## SeniorCitizen                         0.22900    0.08408    2.72          0.0065 ** 
## MultipleLinesYes                      0.22056    0.08502    2.59          0.0095 ** 
## DependentsYes                        -0.12190    0.08184   -1.49          0.1363    
## tenure:PhoneServiceYes               -0.01037    0.00658   -1.58          0.1150    
## ContractOne year:tenure               0.01695    0.00566    3.00          0.0027 ** 
## ContractTwo year:tenure               0.02054    0.01045    1.97          0.0492 *  
## tenure:TotalCharges                   0.00354    0.00134    2.64          0.0084 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 8143.4  on 7031  degrees of freedom
## Residual deviance: 5791.8  on 7009  degrees of freedom
## AIC: 5838
## 
## Number of Fisher Scoring iterations: 6

LASSO in R

We have to create our own matrix of dummy variables for factors. So we do that using the model.matrix() command.

# all the factor variables
xfactors<- model.matrix(Churn ~ SeniorCitizen + Partner + Dependents + PhoneService + MultipleLines + InternetService + OnlineSecurity + OnlineBackup + DeviceProtection + TechSupport + StreamingTV + StreamingMovies + Contract + PaperlessBilling + PaymentMethod, data = telco)

# remove intercept
xfactors<-xfactors[,-1]

# all continuous variables
x<-as.matrix(data.frame(telco$tenure, telco$MonthlyCharges, telco$TotalCharges, xfactors))                        

We then attach the continuous variables for that and run the model. \(alpha = 1\) means that we are running LASSO. \(nlambda = 200\) means we are selecting 200 grid points for \(\lambda\).We will decrease \(\lambda\) slowly until changes are small or reach 200.

lasso_telco<-glmnet(x, y=as.factor(telco$Churn), alpha = 1, family = "binomial", nlambda = 100)   

LASSO gives us a path of possible models.

par(mai=c(.9,.8,.8,.8))
par(mfrow=c(1,1))
plot(lasso_telco, xvar="lambda", label = TRUE, )

Here are the dimnames to interpret the graph. We can see that 9 is Fiber optic cable, for example, which is one of the first variables with a non-zero coefficient.

dimnames(x)[2]
## [[1]]
##  [1] "telco.tenure"                         "telco.MonthlyCharges"                 "telco.TotalCharges"                   "SeniorCitizen"                        "PartnerYes"                          
##  [6] "DependentsYes"                        "PhoneServiceYes"                      "MultipleLinesYes"                     "InternetServiceFiber.optic"           "InternetServiceNo"                   
## [11] "OnlineSecurityYes"                    "OnlineBackupYes"                      "DeviceProtectionYes"                  "TechSupportYes"                       "StreamingTVYes"                      
## [16] "StreamingMoviesYes"                   "ContractOne.year"                     "ContractTwo.year"                     "PaperlessBillingYes"                  "PaymentMethodCredit.card..automatic."
## [21] "PaymentMethodElectronic.check"        "PaymentMethodMailed.check"

Here’s the printed sequence of non-zero coefficients, \(R^2\) in terms of deviance, and \(\lambda\)

print(lasso_telco)
## 
## Call:  glmnet(x = x, y = as.factor(telco$Churn), family = "binomial",      alpha = 1, nlambda = 100) 
## 
##    Df  %Dev Lambda
## 1   0  0.00 0.1550
## 2   1  1.85 0.1420
## 3   2  3.95 0.1290
## 4   2  6.61 0.1180
## 5   3  9.04 0.1070
## 6   3 11.13 0.0976
## 7   3 12.90 0.0889
## 8   3 14.40 0.0810
## 9   4 15.71 0.0738
## 10  5 16.96 0.0673
## 11  5 18.10 0.0613
## 12  5 19.08 0.0558
## 13  5 19.94 0.0509
## 14  7 20.82 0.0464
## 15  7 21.65 0.0422
## 16  7 22.37 0.0385
## 17  9 22.99 0.0351
## 18 10 23.66 0.0320
## 19 10 24.23 0.0291
## 20 10 24.71 0.0265
## 21 10 25.13 0.0242
## 22 13 25.56 0.0220
## 23 13 25.95 0.0201
## 24 13 26.29 0.0183
## 25 13 26.57 0.0167
## 26 13 26.82 0.0152
## 27 14 27.04 0.0138
## 28 15 27.25 0.0126
## 29 15 27.44 0.0115
## 30 16 27.61 0.0105
## 31 16 27.75 0.0095
## 32 16 27.88 0.0087
## 33 16 27.98 0.0079
## 34 16 28.07 0.0072
## 35 16 28.14 0.0066
## 36 16 28.21 0.0060
## 37 16 28.26 0.0055
## 38 16 28.30 0.0050
## 39 17 28.34 0.0045
## 40 17 28.37 0.0041
## 41 17 28.40 0.0038
## 42 17 28.42 0.0034
## 43 17 28.44 0.0031
## 44 18 28.46 0.0028
## 45 20 28.50 0.0026
## 46 20 28.56 0.0024
## 47 20 28.60 0.0022
## 48 20 28.65 0.0020
## 49 20 28.68 0.0018
## 50 20 28.71 0.0016
## 51 20 28.74 0.0015
## 52 20 28.76 0.0014
## 53 20 28.78 0.0012
## 54 20 28.79 0.0011
## 55 20 28.80 0.0010
## 56 20 28.81 0.0009
## 57 20 28.82 0.0008
## 58 20 28.83 0.0008
## 59 20 28.84 0.0007
## 60 20 28.84 0.0006
## 61 21 28.85 0.0006
## 62 21 28.85 0.0005
## 63 21 28.85 0.0005
## 64 21 28.86 0.0004
## 65 21 28.86 0.0004
## 66 21 28.86 0.0004
## 67 21 28.86 0.0003
## 68 21 28.86 0.0003
## 69 21 28.87 0.0003
## 70 21 28.87 0.0003

You can also look at this in terms of \(R^2\), or deviance explained.

plot(lasso_telco, xvar = "dev", label = TRUE)

Choosing \(\lambda\)

We use K-fold cross-validation to “tune” \(\lambda\), in other words, to choose the right penalty weight \(\lambda\) that minimizes validation error.

lasso_cv <- cv.glmnet(x, y=telco$Churn, family = "binomial", type.measure = "deviance")
plot(lasso_cv)

The coefficients associated with the \(\lambda\) that minimizes error are:

coef(lasso_cv, s = "lambda.min")
## 23 x 1 sparse Matrix of class "dgCMatrix"
##                                            s1
## (Intercept)                           0.19753
## telco.tenure                         -0.05534
## telco.MonthlyCharges                  .      
## telco.TotalCharges                    0.27960
## SeniorCitizen                         0.18331
## PartnerYes                            0.06915
## DependentsYes                        -0.20694
## PhoneServiceYes                      -0.57916
## MultipleLinesYes                      0.23567
## InternetServiceFiber.optic            0.69623
## InternetServiceNo                    -0.82512
## OnlineSecurityYes                    -0.41733
## OnlineBackupYes                      -0.22499
## DeviceProtectionYes                  -0.00803
## TechSupportYes                       -0.45742
## StreamingTVYes                        0.22753
## StreamingMoviesYes                    0.19424
## ContractOne.year                     -0.76049
## ContractTwo.year                     -1.46237
## PaperlessBillingYes                   0.28493
## PaymentMethodCredit.card..automatic. -0.09086
## PaymentMethodElectronic.check         0.24163
## PaymentMethodMailed.check            -0.15133

Same but with telco: Here we can apply that model to a holdout data set

# use holdout telco data
xfactors<- model.matrix(Churn ~ SeniorCitizen + Partner + Dependents + PhoneService + MultipleLines + InternetService + OnlineSecurity + OnlineBackup + DeviceProtection + TechSupport + StreamingTV + StreamingMovies + Contract + PaperlessBilling + PaymentMethod, data = telco.holdout)
# remove intercept
xfactors<-xfactors[,-1]

# all continuous variables

x<-as.matrix(data.frame(telco.holdout$tenure, telco.holdout$MonthlyCharges, telco.holdout$TotalCharges, xfactors))
pred <- predict(lasso_cv, newx=x,  s = "lambda.min", type = "response")
churn <- as.numeric(telco.holdout$Churn)-1

head(cbind(churn, pred))
##   churn lambda.min
## 1     0    0.04427
## 2     0    0.30151
## 3     0    0.20451
## 4     1    0.43055
## 5     0    0.00892
## 6     0    0.54018
par(mfrow=c(1,1))
par(mai=c(.9,.8,.2,.2))
plot(roc(churn, pred), print.auc=TRUE, ylim=c(0,1), levels=c("Churn", "Stay"),
     col="black", lwd=1, main="ROC curve", xlab="Specificity: true negative rate",      ylab="Sensitivity: true positive rate", xlim=c(1,0), direction="<")

LS0tDQp0aXRsZTogIlR1dG9yaWFsIDQ6IFN1YnNldCBTZWxlY3Rpb24gJiBMQVNTTyINCmRhdGU6ICIyMDIzLTAxLTMwIg0Kb3V0cHV0OiANCiAgaHRtbF9kb2N1bWVudDoNCiAgICB0b2M6IFRSVUUNCiAgICB0b2NfZmxvYXQ6IFRSVUUNCiAgICBjb2RlX2Rvd25sb2FkOiBUUlVFDQotLS0NCg0KIyMjIERhdGENCg0KV2UnbGwgdXNlIGViZWVyIGFuZCB0ZWxjbyBkYXRhIHNldHMgZnJvbSBiZWZvcmUuIFdlJ2xsIGRyb3AgdGhlIElEIGNvbHVtbiwgYW5kIHNlbGVjdCBjdXN0b21lcnMgdGhhdCByZWNlaXZlZCBhIG1haWxpbmcgb25seS4NCg0KYGBge3IsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9DQpybShsaXN0PWxzKCkpDQpsaWJyYXJ5KHRyZWUpDQpsaWJyYXJ5KGRwbHlyKQ0KbGlicmFyeShqYW5pdG9yKQ0KbGlicmFyeShjYXIpDQpsaWJyYXJ5KHBST0MpDQpsaWJyYXJ5KHJhbmdlcikNCmxpYnJhcnkoZ2xtbmV0KQ0KbGlicmFyeShyZWFkcikNCg0Kb3B0aW9ucygic2NpcGVuIj0yMDAsICJkaWdpdHMiPTMpDQpgYGANCg0KRWJlZXINCg0KYGBge3IsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9DQojIHNldCB3b3JraW5nIGRpcmVjdG9yeSB1c2luZyBob3dldmVyIHlvdSB3YW50IHRvIGZvbGRlciB3aGVyZSBkYXRhIGlzIHN0b3JlZC4gIEknbGwgdXNlIA0KZWJlZXIgPC0gcmVhZF9jc3YoImViZWVyLmNzdiIpDQoNCiMgbG9hZCBlYmVlciwgcmVtb3ZlIGFjY291bnQgbnVtYmVyIGNvbHVtbg0KZWJlZXI8LWViZWVyWy1jKDEpXQ0KYGBgDQoNClRyYWluaW5nLVRlc3QgU2FtcGxlczoNCg0KYGBge3J9DQojIGRyb3AgdGhlIElEIGNvbHVtbiwgc2VsZWN0IGN1c3RvbWVycyB0aGF0IHJlY2VpdmVkIGEgbWFpbGluZyBvbmx5DQplYmVlcl90ZXN0PC1zdWJzZXQoZWJlZXIsIG1haWxpbmcgPT0xKQ0KDQojIGNyZWF0ZSBlYmVlciByb2xsb3V0IGRhdGENCmViZWVyX3JvbGxvdXQ8LXN1YnNldChlYmVlciwgbWFpbGluZyA9PTApDQoNCiMgcmVuYW1lIGViZWVyX3Rlc3QgZWJlZXINCmViZWVyPC1lYmVlcl90ZXN0DQpgYGANCg0KVGVsY28NCg0KYGBge3IsIHdhcm5pbmc9RkFMU0UsIG1lc3NhZ2U9RkFMU0V9DQojIGxvYWQgdGVsY28NCnRlbGNvIDwtIHJlYWRfY3N2KCJ0ZWxjby5jc3YiKQ0KdGVsY28gPC0gc3RyaW5nczJmYWN0b3JzKHRlbGNvKQ0KDQojIGRyb3AgSUQgY29sdW1uLCBkaXZpZGUgVG90YWwgY2hhcmdlcyBieSAxMDAwDQp0ZWxjbzwtc3Vic2V0KHRlbGNvLCBzZWxlY3Q9LWN1c3RvbWVySUQpDQp0ZWxjbyRUb3RhbENoYXJnZXM8LXRlbGNvJFRvdGFsQ2hhcmdlcy8xMDAwDQpgYGANCg0KVHJhaW5pbmctVGVzdDoNCg0KYGBge3J9DQojIGNyZWF0ZSA3MCUgdGVzdCBhbmQgMzAlIGhvbGRvdXQgc2FtcGxlDQpzZXQuc2VlZCgxOTEwMykNCm4gPC0gbnJvdyh0ZWxjbykNCnNhbXBsZSA8LSBzYW1wbGUoYyhUUlVFLCBGQUxTRSksIG4sIHJlcGxhY2U9VFJVRSwgcHJvYj1jKDAuNywgMC4zKSkNCnRlbGNvLnRlc3QgPC0gdGVsY29bc2FtcGxlLCBdDQp0ZWxjby5ob2xkb3V0IDwtIHRlbGNvWyFzYW1wbGUsIF0NCg0KI2NhbGwgdGVzdCB0ZWxjbywgYW5kIGZ1bGwgZGF0YSBzZXQgdGVsY28uYWxsDQp0ZWxjby5hbGw8LXRlbGNvDQp0ZWxjbzwtdGVsY28udGVzdA0KYGBgDQoNCiMjIyBGb3J3YXJkIHNlbGVjdGlvbg0KDQpUaGUgYmlnZ2VzdCBtb2RlbCB3ZSBjb25zaWRlciBpcyB3aXRoIGFsbCBvZiB0aGUgdmFyaWFibGVzIGluIGNodXJuIHBsdXMgYWxsIHRoZSB0d28td2F5IGludGVyYWN0aW9ucyB3aXRoIHRoZSB2YXJpYWJsZSBgdGVudXJlYC4NCg0KYGBge3J9DQpmdWxsIDwtIGdsbShDaHVybiB+IC4gKyB0ZW51cmU6KC4pLCBkYXRhPXRlbGNvLCBmYW1pbHkgPSAiYmlub21pYWwiKQ0KDQpzdW1tYXJ5KGZ1bGwpDQpgYGANCg0KV2UgZG8gZm9yd2FyZCBzZWxlY3Rpb24uIFRoZSBhbGdvcml0aG0gdXNlcyBBSUMsIGJ1dCB3ZSBjb3VsZCBtb2RpZnkgaXQgYnkgaW5jbHVkaW5nIGBrPTBgIHRvIGNvbnNpZGVyIGRldmlhbmNlICh3aXRoIG5vIHBlbmFsdHkpLg0KDQpZb3UgY2FuIHNlZSBpdCBnb2luZyB0aHJvdWdoIGFsbCB0aGUgbW9kZWxzLiBUaGUgYDxub25lPmAgaW4gdGhlIG91dHB1dCBiZWxvdyBzaG93cyB0aGUgYmVzdCBtb2RlbCAobG93ZXN0IEFJQykgYXQgdGhlIGN1cnJlbnQgc3RlcC4NCg0KYGBge3IsIGNhY2hlPVRSVUV9DQojIGludGVyY2VwdCBvbmx5DQpudWxsIDwtIGdsbShDaHVybiB+IDEsIGRhdGE9dGVsY28sIGZhbWlseSA9ICJiaW5vbWlhbCIpDQpzdGFydF90aW1lIDwtIFN5cy50aW1lKCkNCmZ3ZC5tb2RlbCA8LSBzdGVwKG51bGwsIGRpcmVjdGlvbiA9ICdmb3J3YXJkJywgc2NvcGU9Zm9ybXVsYShmdWxsKSwga2VlcCA9IGZ1bmN0aW9uKG1vZGVsLCBhaWMpIGxpc3QobW9kZWwgPSBtb2RlbCwgYWljID0gYWljKSkNCmVuZF90aW1lIDwtIFN5cy50aW1lKCkNCnQgPC0gZW5kX3RpbWUtc3RhcnRfdGltZQ0KYGBgDQoNCkhlcmUgaXMgdGhlIHRhYmxlIHNob3dpbmcgdGhlIHZhcmlhYmxlcyBhZGRlZCBwZXIgc3RlcC4NCg0KYGBge3J9DQpmd2QubW9kZWwkYW5vdmENCmBgYA0KDQpOb3RlIG5vdCBhbGwgY29lZmZpY2llbnRzIHdlcmUgYWRkZWQuDQoNCmBgYHtyLCBlY2hvPUZBTFNFfQ0KY2F0KCJDb2VmaWNpZW50cyBpbmNsdWRlZCBpbiBGb3J3YXJkIFNlbGVjdGlvbjoiLCBsZW5ndGgoZndkLm1vZGVsJGNvZWZmaWNpZW50cyksICJcbiIpDQpjYXQoIlRvdGFsIG51bWJlciBvZiBjb2VmaWNpZW50czoiLGxlbmd0aChmdWxsJGNvZWZmaWNpZW50cykpDQpgYGANCg0KTm93IGxldCdzIHRlc3QgdGhlIHNlcXVlbmNlIG9mIG1vZGVscyB3aXRoIHZhcnlpbmcgbnVtYmVycyBvZiBjb3ZhcmlhdGVzIHVzaW5nIGNyb3NzIHZhbGlkYXRpb24uIFdlIGZvY3VzIG9uIE9PUyBSMjoNCg0KYGBge3IsIGNhY2hlPVRSVUV9DQpNIDwtIGRpbShmd2QubW9kZWwka2VlcClbMl0NCg0KT09TPWRhdGEuZnJhbWUoUjI9cmVwKE5BLE0pLCByYW5rPXJlcChOQSwgTSkpDQoNCg0KIyMgcHJlZCBtdXN0IGJlIHByb2JhYmlsaXRpZXMgKDA8cHJlZDwxKSBmb3IgYmlub21pYWwNCmRldmlhbmNlIDwtIGZ1bmN0aW9uKHksIHByZWQsIGZhbWlseT1jKCJnYXVzc2lhbiIsImJpbm9taWFsIikpew0KICAgIGZhbWlseSA8LSBtYXRjaC5hcmcoZmFtaWx5KQ0KICAgIGlmKGZhbWlseT09ImdhdXNzaWFuIil7DQogICAgICByZXR1cm4oIHN1bSggKHktcHJlZCleMiApICkNCiAgICB9ZWxzZXsNCiAgICAgIGlmKGlzLmZhY3Rvcih5KSkgeSA8LSBhcy5udW1lcmljKHkpPjENCiAgICAgIHJldHVybiggLTIqc3VtKCB5KmxvZyhwcmVkKSArICgxLXkpKmxvZygxLXByZWQpICkgKQ0KICAgIH0NCiAgfQ0KDQojIyBnZXQgbnVsbCBkZXZhaW5jZSB0b28sIGFuZCByZXR1cm4gUjINCiAgUjIgPC0gZnVuY3Rpb24oeSwgcHJlZCwgZmFtaWx5PWMoImdhdXNzaWFuIiwiYmlub21pYWwiKSl7DQogIGZhbSA8LSBtYXRjaC5hcmcoZmFtaWx5KQ0KICBpZihmYW09PSJiaW5vbWlhbCIpew0KICAgIGlmKGlzLmZhY3Rvcih5KSl7IHkgPC0gYXMubnVtZXJpYyh5KT4xIH0NCiAgfQ0KICBkZXYgPC0gZGV2aWFuY2UoeSwgcHJlZCwgZmFtaWx5PWZhbSkNCiAgZGV2MCA8LSBkZXZpYW5jZSh5LCBtZWFuKHkpLCBmYW1pbHk9ZmFtKQ0KICByZXR1cm4oMS1kZXYvZGV2MCkNCiAgfSAgDQoNCmZvcihrIGluIDE6TSl7DQoNCnByZWQgPSBwcmVkaWN0KGZ3ZC5tb2RlbCRrZWVwW1sibW9kZWwiLGtdXSwgbmV3ZGF0YT10ZWxjby5ob2xkb3V0LCB0eXBlID0gInJlc3BvbnNlIikNCg0KT09TJFIyW2tdPC1SMih5ID0gdGVsY28uaG9sZG91dCRDaHVybixwcmVkPXByZWQsIGZhbWlseT0iYmlub21pYWwiKQ0KT09TJHJhbmtba108LWZ3ZC5tb2RlbCRrZWVwW1sibW9kZWwiLGtdXSRyYW5rDQogIA0KICANCn0NCmF4PWMoMTptYXgoT09TJHJhbmspKQ0KcGFyKG1haT1jKC45LC44LC4yLC4yKSkNCnBsb3QoeD1PT1MkcmFuaywgeSA9IE9PUyRSMiwgdHlwZT0iYiIsIHlsYWI9ZXhwcmVzc2lvbihwYXN0ZSgiT3V0LW9mLVNhbXBsZSBSIl4iMiIpKSwgeGxhYj0iIyBvZiBtb2RlbCBwYXJhbWV0ZXJzIGVzdGltYXRlZCAocmFuaykiLCB4YXh0PSJuIikNCmF4aXMoMSwgYXQ9YXgsIGxhYmVscz1heCkNCg0KbWF4LmlkeCA8LSB3aGljaC5tYXgoT09TJFIyKQ0KDQpPT1MkcmFua1ttYXguaWR4XQ0KYWJsaW5lKHY9T09TJHJhbmtbbWF4LmlkeF0sIGx0eT0zKQ0KDQptb2RlbCA8LSBmd2QubW9kZWwka2VlcFtbIm1vZGVsIixtYXguaWR4XV0NCg0KYGBgDQoNCkZyb20gdGhlIGZvcndhcmQgc2VsZWN0aW9uIG1vZGVsIHBhdGgsIHRoZSBtb2RlbCB3aXRoIGByIE9PUyRyYW5rW21heC5pZHhdYCBjb2VmZmljaWVudHMgaGFzIHRoZSBiZXN0IE9PUyBSMi4NCg0KV2UgdGhlbiAiY2hvb3NlIiB0aGlzIG1vZGVsLCBhbmQgcmUtZXN0aW1hdGUgaXQgdXNpbmcgdGhlIGVudGlyZSBkYXRhc2V0LiBUaGUgcmVzdWx0aW5nIGNvZWZmaWNpZW50cyBhcmU6DQoNCmBgYHtyfQ0KbW9kZWxfZnVsbF9kYXRhPC1nbG0obW9kZWwkZm9ybXVsYSwgZGF0YSA9IHRlbGNvLmFsbCwgZmFtaWx5ID0gYmlub21pYWwobGluayA9ICJsb2dpdCIpKQ0KDQpzdW1tYXJ5KG1vZGVsX2Z1bGxfZGF0YSkNCmBgYA0KDQojIyMgTEFTU08gaW4gUg0KDQpXZSBoYXZlIHRvIGNyZWF0ZSBvdXIgb3duIG1hdHJpeCBvZiBkdW1teSB2YXJpYWJsZXMgZm9yIGZhY3RvcnMuIFNvIHdlIGRvIHRoYXQgdXNpbmcgdGhlIG1vZGVsLm1hdHJpeCgpIGNvbW1hbmQuDQoNCmBgYHtyfQ0KIyBhbGwgdGhlIGZhY3RvciB2YXJpYWJsZXMNCnhmYWN0b3JzPC0gbW9kZWwubWF0cml4KENodXJuIH4gU2VuaW9yQ2l0aXplbiArIFBhcnRuZXIgKyBEZXBlbmRlbnRzICsgUGhvbmVTZXJ2aWNlICsgTXVsdGlwbGVMaW5lcyArIEludGVybmV0U2VydmljZSArIE9ubGluZVNlY3VyaXR5ICsgT25saW5lQmFja3VwICsgRGV2aWNlUHJvdGVjdGlvbiArIFRlY2hTdXBwb3J0ICsgU3RyZWFtaW5nVFYgKyBTdHJlYW1pbmdNb3ZpZXMgKyBDb250cmFjdCArIFBhcGVybGVzc0JpbGxpbmcgKyBQYXltZW50TWV0aG9kLCBkYXRhID0gdGVsY28pDQoNCiMgcmVtb3ZlIGludGVyY2VwdA0KeGZhY3RvcnM8LXhmYWN0b3JzWywtMV0NCg0KIyBhbGwgY29udGludW91cyB2YXJpYWJsZXMNCng8LWFzLm1hdHJpeChkYXRhLmZyYW1lKHRlbGNvJHRlbnVyZSwgdGVsY28kTW9udGhseUNoYXJnZXMsIHRlbGNvJFRvdGFsQ2hhcmdlcywgeGZhY3RvcnMpKSAgICAgICAgICAgICAgICAgICAgICAgIA0KYGBgDQoNCldlIHRoZW4gYXR0YWNoIHRoZSBjb250aW51b3VzIHZhcmlhYmxlcyBmb3IgdGhhdCBhbmQgcnVuIHRoZSBtb2RlbC4gJGFscGhhID0gMSQgbWVhbnMgdGhhdCB3ZSBhcmUgcnVubmluZyBMQVNTTy4gJG5sYW1iZGEgPSAyMDAkIG1lYW5zIHdlIGFyZSBzZWxlY3RpbmcgMjAwIGdyaWQgcG9pbnRzIGZvciAkXGxhbWJkYSQuV2Ugd2lsbCBkZWNyZWFzZSAkXGxhbWJkYSQgc2xvd2x5IHVudGlsIGNoYW5nZXMgYXJlIHNtYWxsIG9yIHJlYWNoIDIwMC4NCg0KYGBge3J9DQpsYXNzb190ZWxjbzwtZ2xtbmV0KHgsIHk9YXMuZmFjdG9yKHRlbGNvJENodXJuKSwgYWxwaGEgPSAxLCBmYW1pbHkgPSAiYmlub21pYWwiLCBubGFtYmRhID0gMTAwKSAgIA0KYGBgDQoNCkxBU1NPIGdpdmVzIHVzIGEgKipwYXRoKiogb2YgcG9zc2libGUgbW9kZWxzLg0KDQpgYGB7cn0NCnBhcihtYWk9YyguOSwuOCwuOCwuOCkpDQpwYXIobWZyb3c9YygxLDEpKQ0KcGxvdChsYXNzb190ZWxjbywgeHZhcj0ibGFtYmRhIiwgbGFiZWwgPSBUUlVFLCApDQpgYGANCg0KSGVyZSBhcmUgdGhlIGRpbW5hbWVzIHRvIGludGVycHJldCB0aGUgZ3JhcGguIFdlIGNhbiBzZWUgdGhhdCA5IGlzIEZpYmVyIG9wdGljIGNhYmxlLCBmb3IgZXhhbXBsZSwgd2hpY2ggaXMgb25lIG9mIHRoZSBmaXJzdCB2YXJpYWJsZXMgd2l0aCBhIG5vbi16ZXJvIGNvZWZmaWNpZW50Lg0KDQpgYGB7cn0NCmRpbW5hbWVzKHgpWzJdDQpgYGANCg0KSGVyZSdzIHRoZSBwcmludGVkIHNlcXVlbmNlIG9mIG5vbi16ZXJvIGNvZWZmaWNpZW50cywgJFJeMiQgaW4gdGVybXMgb2YgZGV2aWFuY2UsIGFuZCAkXGxhbWJkYSQNCg0KYGBge3J9DQpwcmludChsYXNzb190ZWxjbykNCmBgYA0KDQpZb3UgY2FuIGFsc28gbG9vayBhdCB0aGlzIGluIHRlcm1zIG9mICRSXjIkLCBvciBkZXZpYW5jZSBleHBsYWluZWQuDQoNCmBgYHtyfQ0KcGxvdChsYXNzb190ZWxjbywgeHZhciA9ICJkZXYiLCBsYWJlbCA9IFRSVUUpDQpgYGANCg0KIyMjIENob29zaW5nICRcbGFtYmRhJA0KDQpXZSB1c2UgSy1mb2xkIGNyb3NzLXZhbGlkYXRpb24gdG8gInR1bmUiICRcbGFtYmRhJCwgaW4gb3RoZXIgd29yZHMsIHRvIGNob29zZSB0aGUgcmlnaHQgcGVuYWx0eSB3ZWlnaHQgJFxsYW1iZGEkIHRoYXQgbWluaW1pemVzIHZhbGlkYXRpb24gZXJyb3IuDQoNCmBgYHtyfQ0KbGFzc29fY3YgPC0gY3YuZ2xtbmV0KHgsIHk9dGVsY28kQ2h1cm4sIGZhbWlseSA9ICJiaW5vbWlhbCIsIHR5cGUubWVhc3VyZSA9ICJkZXZpYW5jZSIpDQpwbG90KGxhc3NvX2N2KQ0KYGBgDQoNClRoZSBjb2VmZmljaWVudHMgYXNzb2NpYXRlZCB3aXRoIHRoZSAkXGxhbWJkYSQgdGhhdCBtaW5pbWl6ZXMgZXJyb3IgYXJlOg0KDQpgYGB7cn0NCmNvZWYobGFzc29fY3YsIHMgPSAibGFtYmRhLm1pbiIpDQpgYGANCg0KU2FtZSBidXQgd2l0aCAqKnRlbGNvKio6IEhlcmUgd2UgY2FuIGFwcGx5IHRoYXQgbW9kZWwgdG8gYSBob2xkb3V0IGRhdGEgc2V0DQoNCmBgYHtyfQ0KIyB1c2UgaG9sZG91dCB0ZWxjbyBkYXRhDQp4ZmFjdG9yczwtIG1vZGVsLm1hdHJpeChDaHVybiB+IFNlbmlvckNpdGl6ZW4gKyBQYXJ0bmVyICsgRGVwZW5kZW50cyArIFBob25lU2VydmljZSArIE11bHRpcGxlTGluZXMgKyBJbnRlcm5ldFNlcnZpY2UgKyBPbmxpbmVTZWN1cml0eSArIE9ubGluZUJhY2t1cCArIERldmljZVByb3RlY3Rpb24gKyBUZWNoU3VwcG9ydCArIFN0cmVhbWluZ1RWICsgU3RyZWFtaW5nTW92aWVzICsgQ29udHJhY3QgKyBQYXBlcmxlc3NCaWxsaW5nICsgUGF5bWVudE1ldGhvZCwgZGF0YSA9IHRlbGNvLmhvbGRvdXQpDQojIHJlbW92ZSBpbnRlcmNlcHQNCnhmYWN0b3JzPC14ZmFjdG9yc1ssLTFdDQoNCiMgYWxsIGNvbnRpbnVvdXMgdmFyaWFibGVzDQoNCng8LWFzLm1hdHJpeChkYXRhLmZyYW1lKHRlbGNvLmhvbGRvdXQkdGVudXJlLCB0ZWxjby5ob2xkb3V0JE1vbnRobHlDaGFyZ2VzLCB0ZWxjby5ob2xkb3V0JFRvdGFsQ2hhcmdlcywgeGZhY3RvcnMpKQ0KYGBgDQoNCmBgYHtyfQ0KcHJlZCA8LSBwcmVkaWN0KGxhc3NvX2N2LCBuZXd4PXgsICBzID0gImxhbWJkYS5taW4iLCB0eXBlID0gInJlc3BvbnNlIikNCmNodXJuIDwtIGFzLm51bWVyaWModGVsY28uaG9sZG91dCRDaHVybiktMQ0KDQpoZWFkKGNiaW5kKGNodXJuLCBwcmVkKSkNCmBgYA0KDQpgYGB7ciwgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0NCg0KcGFyKG1mcm93PWMoMSwxKSkNCnBhcihtYWk9YyguOSwuOCwuMiwuMikpDQpwbG90KHJvYyhjaHVybiwgcHJlZCksIHByaW50LmF1Yz1UUlVFLCB5bGltPWMoMCwxKSwgbGV2ZWxzPWMoIkNodXJuIiwgIlN0YXkiKSwNCiAgICAgY29sPSJibGFjayIsIGx3ZD0xLCBtYWluPSJST0MgY3VydmUiLCB4bGFiPSJTcGVjaWZpY2l0eTogdHJ1ZSBuZWdhdGl2ZSByYXRlIiwgICAgICB5bGFiPSJTZW5zaXRpdml0eTogdHJ1ZSBwb3NpdGl2ZSByYXRlIiwgeGxpbT1jKDEsMCksIGRpcmVjdGlvbj0iPCIpDQpgYGANCg==