# Import packages
import os
import sys
import warnings
import pandas as pd
import numpy as np
import pyspark.pandas as ps
import matplotlib.pyplot as plt
import seaborn as sns

# Set environment
os.environ['PYSPARK_PYTHON'] = sys.executable
os.environ['PYSPARK_DRIVER_PYTHON'] = sys.executable
os.environ["PYARROW_IGNORE_TIMEZONE"] = "1"

# Remove warnings from rendered output
warnings.filterwarnings("ignore")


# Set figure size
plt.rcParams["figure.figsize"] = (10,7)

# Spark Session builder
from pyspark.sql import SparkSession
spark = SparkSession.builder.config("spark.debug.maxToStringFields", "100").getOrCreate()


# Read in data as pandas-on-Spark data frame
psdf_heart = ps.read_csv("heart.csv")
# Checking if import was successful
psdf_heart.head()


psdf_heart.info()

<class 'pyspark.pandas.frame.DataFrame'>
Int64Index: 918 entries, 0 to 917
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Age             918 non-null    int32  
 1   Sex             918 non-null    object 
 2   ChestPainType   918 non-null    object 
 3   RestingBP       918 non-null    int32  
 4   Cholesterol     918 non-null    int32  
 5   FastingBS       918 non-null    int32  
 6   RestingECG      918 non-null    object 
 7   MaxHR           918 non-null    int32  
 8   ExerciseAngina  918 non-null    object 
 9   Oldpeak         918 non-null    float64
 10  ST_Slope        918 non-null    object 
 11  HeartDisease    918 non-null    int32  
dtypes: float64(1), int32(6), object(5)


# Check for null values
print(psdf_heart.isnull().sum())

Age               0
Sex               0
ChestPainType     0
RestingBP         0
Cholesterol       0
FastingBS         0
RestingECG        0
MaxHR             0
ExerciseAngina    0
Oldpeak           0
ST_Slope          0
HeartDisease      0
dtype: int64


# Make a Pyspark SQL dataframe from pandas-on-Spark df
sql_heart = psdf_heart.to_spark()


train, test = sql_heart.randomSplit([0.75, 0.25], seed = 1234)
print(train.count(), test.count())

686 232


# Make a pandas DF from PySpark SQL df
pd_train = train.toPandas()


# Make a pandas-on-Spark DF from PySpark SQL df
psdf_train = train.to_pandas_on_spark()


psdf_train[['Age','RestingBP','Cholesterol','MaxHR','Oldpeak']].describe()


# Display counts for RestingBP of 0
print(train.filter(train.RestingBP == 0).count())

1


# Display counts for Cholesterol of 0 and respective ratios for training and test data
print(train.filter(train.Cholesterol == 0).count(),
      train.filter(train.Cholesterol == 0)
      .filter(train.HeartDisease == 1).count()/
      train.filter(train.HeartDisease == 1).count())

135 0.3151041666666667


psdf_train.corr().style.background_gradient(cmap='coolwarm').set_precision(3)


cat_data = pd_train[['HeartDisease', 'Sex', 'ChestPainType', 'FastingBS','RestingECG', 'ExerciseAngina', 'ST_Slope']]


num_data = [col for col in pd_train.columns if col not in cat_data]
num_data

['Age', 'RestingBP', 'Cholesterol', 'MaxHR', 'Oldpeak']


sns.pairplot(pd_train[num_data+['HeartDisease']],
             plot_kws={'alpha': 0.5});


psdf_summ_group = psdf_train.groupby('HeartDisease').agg(
    {'Age': ['mean', 'std'], 'RestingBP': ['mean', 'std'], 'Cholesterol': ['mean', 'std'],
     'MaxHR': ['mean', 'std'], 'Oldpeak': ['mean', 'std']})
psdf_summ_group


psdf_summ_sex = psdf_train.groupby('Sex').agg(
    {'Age': ['mean', 'std'], 'RestingBP': ['mean', 'std'], 'Cholesterol': ['mean', 'std'],
     'MaxHR': ['mean', 'std'], 'Oldpeak': ['mean', 'std']})
psdf_summ_sex


pd_train.Age.hist(bins = 10)
plt.xlabel("Age")
plt.title("Histogram of Age of Participants")
plt.show()


pd_train.RestingBP.hist(bins = 20)
plt.xlabel("RestingBP")
plt.title("Histogram of Participant Resting Blood Pressure")
plt.show()


pd_train.Cholesterol.hist(bins = 20)
plt.xlabel("Cholesterol")
plt.title("Histogram of Participant Cholesterol Measurement")
plt.show()


pd_train.MaxHR.hist(bins = 20)
plt.xlabel("MaxHR")
plt.title("Histogram of Participant Maximum Heart Rate")
plt.show()

Exception ignored in: <function JavaWrapper.__del__ at 0x0000028BD71B7D30>
Traceback (most recent call last):
  File "C:\Users\kar_d\anaconda3\lib\site-packages\pyspark\ml\wrapper.py", line 39, in __del__
    if SparkContext._active_spark_context and self._java_obj is not None:
AttributeError: 'OneHotEncoder' object has no attribute '_java_obj'


pd_train.Oldpeak.hist(bins = 50)
plt.xlabel("Oldpeak")
plt.title("Histogram of Participant Oldpeak")
plt.show()


pd_train.boxplot(column = ['Age'], by = 'HeartDisease').legend(('0=heart Disease Non-Carriers',"1=heart Disease Carriers"),loc='upper center')
plt.show()


pd_train.boxplot(column = ['RestingBP'], by = 'HeartDisease').legend(('0=heart Disease Non-Carriers',"1=heart Disease Carriers"),loc='upper center')
plt.show()


pd_train.boxplot(column = ['Cholesterol'], by = 'HeartDisease').legend(('0=heart Disease Non-Carriers',"1=heart Disease Carriers"),loc='upper center')
plt.show()


pd_train.boxplot(column = ['MaxHR'], by = 'HeartDisease').legend(('0=heart Disease Non-Carriers',"1=heart Disease Carriers"),loc='upper center')
plt.show()


pd_train.boxplot(column = ['Oldpeak'], by = 'HeartDisease').legend(('0=heart Disease Non-Carriers',"1=heart Disease Carriers"),loc='upper center')
plt.show()


train.crosstab('HeartDisease', 'Sex').show()

+----------------+---+---+
|HeartDisease_Sex|  F|  M|
+----------------+---+---+
|               1| 38|346|
|               0|104|198|
+----------------+---+---+


train.crosstab('HeartDisease', 'ChestPainType').show()

+--------------------------+---+---+---+---+
|HeartDisease_ChestPainType|ASY|ATA|NAP| TA|
+--------------------------+---+---+---+---+
|                         1|299| 18| 53| 14|
|                         0| 78|106| 97| 21|
+--------------------------+---+---+---+---+


train.crosstab('HeartDisease', 'FastingBS').show()

+----------------------+---+---+
|HeartDisease_FastingBS|  0|  1|
+----------------------+---+---+
|                     1|250|134|
|                     0|269| 33|
+----------------------+---+---+


train.crosstab('HeartDisease', 'RestingECG').show()

+-----------------------+---+------+---+
|HeartDisease_RestingECG|LVH|Normal| ST|
+-----------------------+---+------+---+
|                      1| 69|   219| 96|
|                      0| 65|   194| 43|
+-----------------------+---+------+---+


train.crosstab('HeartDisease', 'ExerciseAngina').show()

+---------------------------+---+---+
|HeartDisease_ExerciseAngina|  N|  Y|
+---------------------------+---+---+
|                          1|144|240|
|                          0|262| 40|
+---------------------------+---+---+


train.crosstab('HeartDisease', 'ST_Slope').show()

+---------------------+----+----+---+
|HeartDisease_ST_Slope|Down|Flat| Up|
+---------------------+----+----+---+
|                    1|  36| 282| 66|
|                    0|   9|  59|234|
+---------------------+----+----+---+


table = pd.crosstab(cat_data.HeartDisease, cat_data.Sex)
table.plot.bar()
plt.title("Bar Plot of Heart Disease by Sex")
plt.show()


table = pd.crosstab(cat_data.HeartDisease, cat_data.ChestPainType)
table.plot.bar()
plt.title("Bar Plot of Heart Disease by Chest Pain Type")
plt.show()


table = pd.crosstab(cat_data.HeartDisease, cat_data.FastingBS)
table.plot.bar()
plt.title("Bar Plot of Heart Disease by Fasting BS")
plt.show()


table = pd.crosstab(cat_data.HeartDisease, cat_data.RestingECG)
table.plot.bar()
plt.title("Bar Plot of Heart Disease by Resting ECG")
plt.show()


plt.style.use('fivethirtyeight')
table = pd.crosstab(cat_data.HeartDisease, cat_data.ExerciseAngina)
table.plot.bar()
plt.title("Bar Plot of Heart Disease by Exercise Angina")
plt.legend(loc="upper center")
plt.show()


plt.style.use('fivethirtyeight')
table = pd.crosstab(cat_data.HeartDisease, cat_data.ST_Slope)
table.plot.bar()
plt.title("Bar Plot of Heart Disease by ST Slope")
plt.show()


import seaborn as sns
sns.scatterplot(pd_train['Age'],pd_train['Oldpeak'], hue=pd_train['HeartDisease'])

<AxesSubplot:xlabel='Age', ylabel='Oldpeak'>


sns.scatterplot(pd_train['MaxHR'],pd_train['ChestPainType'], hue=pd_train['HeartDisease'])

<AxesSubplot:xlabel='MaxHR', ylabel='ChestPainType'>


sns.violinplot(x=cat_data["ChestPainType"],y=pd_train["MaxHR"],hue=cat_data["HeartDisease"],palette="viridis")
plt.xlabel("Chest Pain Type")
plt.ylabel("Maximum heart rate achieved")
plt.title("Maximum heart rate achieved vs Chest Pain Type vs Heart Disease Carrier")
plt.legend(loc=4);


#import required library
from pyspark.ml.feature import StringIndexer, OneHotEncoder, SQLTransformer, VectorAssembler


sex_indexer = StringIndexer(inputCol = "Sex", outputCol="SexIndex")

chestPain_indexer = StringIndexer(inputCol = "ChestPainType", outputCol="ChestPainTypeIndex")

RestingECG_indexer = StringIndexer(inputCol = "RestingECG", outputCol="RestingECGIndex")

ExerciseAngina_indexer = StringIndexer(inputCol = "ExerciseAngina", outputCol="ExerciseAnginaIndex")

ST_Slope_indexer  = StringIndexer(inputCol = "ST_Slope", outputCol="ST_SlopeIndex")


encoder = OneHotEncoder().setInputCols(["SexIndex", "ChestPainTypeIndex", "RestingECGIndex",
                                        "ExerciseAnginaIndex", "ST_SlopeIndex"])\
                                 .setOutputCols(["Sex_encoded", "ChestPainType_encoded",
                                                 "RestingECG_encoded", "ExerciseAngina_encoded",
                                                 "ST_Slope_encoded"])


sqlTrans1 = SQLTransformer(
    statement = "SELECT Age, Sex_encoded, ChestPainType_encoded,"+
                "log(RestingBP+1) as log_RestingBP," + 
                "log(Cholesterol+1) as log_Cholesterol, FastingBS, RestingECG_encoded," +
                "log(MaxHR) as log_MaxHR, ExerciseAngina_encoded, Oldpeak," +
                "ST_Slope_encoded," +
                "HeartDisease as label FROM __THIS__"
)


assembler = VectorAssembler(inputCols = ["Age","Sex_encoded", "ChestPainType_encoded","FastingBS","RestingECG_encoded",
                                        "ExerciseAngina_encoded","Oldpeak","ST_Slope_encoded","log_MaxHR","log_RestingBP","log_Cholesterol"],
                            outputCol = "features",
                            handleInvalid = 'keep')


from pyspark.ml.classification import LogisticRegression
lr = LogisticRegression()


from pyspark.ml import Pipeline
pipeline = Pipeline(stages = [sex_indexer, chestPain_indexer, RestingECG_indexer,
                              ExerciseAngina_indexer, ST_Slope_indexer, encoder,
                              sqlTrans1,  assembler, lr])
model = pipeline.fit(sql_heart).transform(sql_heart)
model.select("label", "features").show()

+-----+--------------------+
|label|            features|
+-----+--------------------+
|    0|[40.0,1.0,0.0,0.0...|
|    1|[49.0,0.0,0.0,1.0...|
|    0|(15,[0,1,4,8,11,1...|
|    1|(15,[0,2,6,9,10,1...|
|    0|[54.0,1.0,0.0,1.0...|
|    0|[39.0,1.0,0.0,1.0...|
|    0|(15,[0,4,6,8,11,1...|
|    0|[54.0,1.0,0.0,0.0...|
|    1|[37.0,1.0,1.0,0.0...|
|    0|(15,[0,4,6,8,11,1...|
|    0|(15,[0,3,6,8,11,1...|
|    1|(15,[0,1,4,9,10,1...|
|    0|[39.0,1.0,0.0,0.0...|
|    1|[49.0,1.0,1.0,0.0...|
|    0|(15,[0,3,8,11,12,...|
|    0|[54.0,0.0,0.0,0.0...|
|    1|[38.0,1.0,1.0,0.0...|
|    0|(15,[0,4,6,8,11,1...|
|    1|[60.0,1.0,1.0,0.0...|
|    1|[36.0,1.0,0.0,0.0...|
+-----+--------------------+
only showing top 20 rows


from pyspark.ml.tuning import ParamGridBuilder, CrossValidator
from pyspark.ml.evaluation import BinaryClassificationEvaluator

paramGrid = (ParamGridBuilder()
             .addGrid(lr.regParam, [0.01, 0.1, 0.5, 1.0, 2.0])
             .addGrid(lr.elasticNetParam, [0.0, 0.25, 0.5, 0.75, 1.0])
             .addGrid(lr.maxIter, [1, 5, 10, 20, 50])
             .build())


# Evaluate model
lrevaluator = BinaryClassificationEvaluator()

# Create 5-fold CrossValidator
crossval = CrossValidator(estimator = pipeline,
                          estimatorParamMaps = paramGrid,
                          evaluator = lrevaluator,
                          numFolds = 5)


cvmodel = crossval.fit(train)


# check which model is best
list(zip(cvmodel.avgMetrics, paramGrid))

[(0.921698236655667,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.01,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 1}),
 (0.921876533412266,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.01,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 5}),
 (0.9253820866178969,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.01,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.9231798740796417,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.01,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 20}),
 (0.9228313885886124,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.01,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 50}),
 (0.9217520666549949,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.01,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.25,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 1}),
 (0.9199346225920013,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.01,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.25,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 5}),
 (0.9206359851919063,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.01,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.25,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.9229306982354585,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.01,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.25,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 20}),
 (0.9221178894247135,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.01,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.25,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 50}),
 (0.9215982780450014,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.01,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 1}),
 (0.9200155520121336,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.01,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 5}),
 (0.9207239794636595,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.01,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.9235616160296888,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.01,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 20}),
 (0.9227068378545418,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.01,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 50}),
 (0.9216927707575282,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.01,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.75,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 1}),
 (0.9205062484893496,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.01,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.75,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 5}),
 (0.9206764960828429,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.01,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.75,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.9217284815436905,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.01,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.75,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 20}),
 (0.9224004483985779,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.01,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.75,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 50}),
 (0.9216990345602316,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.01,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 1}),
 (0.9213131743075629,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.01,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 5}),
 (0.920880294362944,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.01,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.9238808776379728,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.01,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 20}),
 (0.9241391446709429,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.01,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 50}),
 (0.921698236655667,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.1,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 1}),
 (0.9218508175035522,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.1,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 5}),
 (0.9244825533299477,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.1,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.9243307067255726,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.1,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 20}),
 (0.9249160927743874,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.1,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 50}),
 (0.9220537329751081,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.1,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.25,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 1}),
 (0.919317068002891,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.1,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.25,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 5}),
 (0.92019071555804,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.1,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.25,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.9256304374225555,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.1,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.25,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 20}),
 (0.9241867701942522,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.1,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.25,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 50}),
 (0.9207711003828134,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.1,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 1}),
 (0.9177818330553108,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.1,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 5}),
 (0.9187030441093909,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.1,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.9219464268113003,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.1,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 20}),
 (0.9203431651041946,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.1,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 50}),
 (0.919006094267317,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.1,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.75,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 1}),
 (0.9149693355269335,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.1,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.75,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 5}),
 (0.915611839718522,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.1,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.75,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.9155929586882892,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.1,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.75,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 20}),
 (0.9175641589905652,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.1,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.75,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 50}),
 (0.9157544718287107,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.1,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 1}),
 (0.909549044425528,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.1,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 5}),
 (0.9052989932359772,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.1,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.9035818069180428,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.1,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 20}),
 (0.9055935834347858,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.1,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 50}),
 (0.921698236655667,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 1}),
 (0.9214301633822402,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 5}),
 (0.9235642257233458,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.9237412199002757,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 20}),
 (0.9248698194438002,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 50}),
 (0.9084701515090448,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.25,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 1}),
 (0.9032914986959106,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.25,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 5}),
 (0.901107403245561,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.25,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.8995250455244649,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.25,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 20}),
 (0.9008647731590198,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.25,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 50}),
 (0.8400116447064009,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 1}),
 (0.8373774271437777,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 5}),
 (0.8391005740423633,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.8241198861175963,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 20}),
 (0.8241198861175963,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 50}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.75,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 1}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.75,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 5}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.75,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.75,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 20}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.75,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 50}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 1}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 5}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 20}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 50}),
 (0.921698236655667,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 1}),
 (0.9212270623180709,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 5}),
 (0.9230339581265148,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.923257114480347,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 20}),
 (0.9241171933977016,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 50}),
 (0.8400116447064009,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.25,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 1}),
 (0.8400116447064009,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.25,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 5}),
 (0.8400116447064009,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.25,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.8374548265245827,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.25,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 20}),
 (0.8400116447064009,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.25,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 50}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 1}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 5}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 20}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 50}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.75,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 1}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.75,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 5}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.75,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.75,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 20}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.75,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 50}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 1}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 5}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 20}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 50}),
 (0.921698236655667,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 2.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 1}),
 (0.9217135653603661,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 2.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 5}),
 (0.9222601536649523,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 2.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.9219214918776323,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 2.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 20}),
 (0.9228879198999813,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 2.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 50}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 2.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.25,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 1}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 2.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.25,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 5}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 2.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.25,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 2.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.25,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 20}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 2.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.25,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 50}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 2.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 1}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 2.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 5}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 2.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 2.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 20}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 2.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.5,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 50}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 2.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.75,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 1}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 2.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.75,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 5}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 2.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.75,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 2.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.75,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 20}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 2.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.75,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 50}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 2.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 1}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 2.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 5}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 2.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 2.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 20}),
 (0.5,
  {Param(parent='LogisticRegression_09daa73064c4', name='regParam', doc='regularization parameter (>= 0).'): 2.0,
   Param(parent='LogisticRegression_09daa73064c4', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 1.0,
   Param(parent='LogisticRegression_09daa73064c4', name='maxIter', doc='max number of iterations (>= 0).'): 50})]


# use the best model

cvmodel.transform(test).show(5)

+---+-------------+---------------------+------------------+-----------------+---------+------------------+-----------------+----------------------+-------+----------------+-----+--------------------+--------------------+--------------------+----------+
|Age|  Sex_encoded|ChestPainType_encoded|     log_RestingBP|  log_Cholesterol|FastingBS|RestingECG_encoded|        log_MaxHR|ExerciseAngina_encoded|Oldpeak|ST_Slope_encoded|label|            features|       rawPrediction|         probability|prediction|
+---+-------------+---------------------+------------------+-----------------+---------+------------------+-----------------+----------------------+-------+----------------+-----+--------------------+--------------------+--------------------+----------+
| 29|(1,[0],[1.0])|        (3,[2],[1.0])| 4.795790545596741|5.497168225293202|        0|     (2,[0],[1.0])|5.075173815233827|         (1,[0],[1.0])|    0.0|   (2,[1],[1.0])|    0|[29.0,1.0,0.0,0.0...|[2.08694552674690...|[0.88962786326173...|       0.0|
| 30|    (1,[],[])|            (3,[],[])|  5.14166355650266|5.472270673671475|        0|     (2,[1],[1.0])|5.135798437050262|         (1,[0],[1.0])|    0.0|   (2,[1],[1.0])|    0|(15,[0,7,8,11,12,...|[2.36858881296343...|[0.91440046835339...|       0.0|
| 35|    (1,[],[])|        (3,[0],[1.0])|4.9344739331306915|5.214935757608986|        0|     (2,[0],[1.0])|5.204006687076795|         (1,[0],[1.0])|    1.4|   (2,[1],[1.0])|    0|[35.0,0.0,1.0,0.0...|[1.26141899229498...|[0.77927028268269...|       0.0|
| 35|(1,[0],[1.0])|        (3,[0],[1.0])| 4.795790545596741|              0.0|        1|     (2,[0],[1.0])|4.867534450455582|             (1,[],[])|    1.2|   (2,[0],[1.0])|    1|[35.0,1.0,1.0,0.0...|[-2.5756088964456...|[0.07072478302241...|       1.0|
| 35|(1,[0],[1.0])|        (3,[0],[1.0])| 4.795790545596741|5.293304824724492|        0|     (2,[0],[1.0])|4.867534450455582|             (1,[],[])|    1.6|   (2,[0],[1.0])|    1|[35.0,1.0,1.0,0.0...|[-1.4607612263304...|[0.18835092524094...|       1.0|
+---+-------------+---------------------+------------------+-----------------+---------+------------------+-----------------+----------------------+-------+----------------+-----+--------------------+--------------------+--------------------+----------+
only showing top 5 rows


lrROC = lrevaluator.evaluate(cvmodel.transform(test))
print(lrROC)

0.9363052568697732


### Random Forest
from pyspark.ml.classification import RandomForestClassifier

rf = RandomForestClassifier(featuresCol = 'features', labelCol = 'label')
pipeline = Pipeline(stages = [sex_indexer, chestPain_indexer, RestingECG_indexer,
                              ExerciseAngina_indexer, ST_Slope_indexer, encoder,
                              sqlTrans1,  assembler, rf])
model = pipeline.fit(sql_heart).transform(sql_heart)
model.select("label", "features").show(2)

+-----+--------------------+
|label|            features|
+-----+--------------------+
|    0|[40.0,1.0,0.0,0.0...|
|    1|[49.0,0.0,0.0,1.0...|
+-----+--------------------+
only showing top 2 rows


paramGrid = ParamGridBuilder() \
    .addGrid(rf.numTrees, [int(x) for x in np.linspace(start = 10, stop = 50, num = 3)]) \
    .addGrid(rf.maxDepth, [int(x) for x in np.linspace(start = 5, stop = 25, num = 3)]) \
    .build()
# Evaluate model
rfevaluator = BinaryClassificationEvaluator()

# Create 5-fold CrossValidator
crossval = CrossValidator(estimator = pipeline,
                          estimatorParamMaps = paramGrid,
                          evaluator = rfevaluator,
                          numFolds = 5)

cvmodel = crossval.fit(train)


# check which model is best
list(zip(cvmodel.avgMetrics, paramGrid))

[(0.9111788690211999,
  {Param(parent='RandomForestClassifier_1f369599bd84', name='numTrees', doc='Number of trees to train (>= 1).'): 10,
   Param(parent='RandomForestClassifier_1f369599bd84', name='maxDepth', doc='Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. Must be in range [0, 30].'): 5}),
 (0.8972880540427625,
  {Param(parent='RandomForestClassifier_1f369599bd84', name='numTrees', doc='Number of trees to train (>= 1).'): 10,
   Param(parent='RandomForestClassifier_1f369599bd84', name='maxDepth', doc='Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. Must be in range [0, 30].'): 15}),
 (0.8969978063647439,
  {Param(parent='RandomForestClassifier_1f369599bd84', name='numTrees', doc='Number of trees to train (>= 1).'): 10,
   Param(parent='RandomForestClassifier_1f369599bd84', name='maxDepth', doc='Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. Must be in range [0, 30].'): 25}),
 (0.921390158382225,
  {Param(parent='RandomForestClassifier_1f369599bd84', name='numTrees', doc='Number of trees to train (>= 1).'): 30,
   Param(parent='RandomForestClassifier_1f369599bd84', name='maxDepth', doc='Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. Must be in range [0, 30].'): 5}),
 (0.9194762757480442,
  {Param(parent='RandomForestClassifier_1f369599bd84', name='numTrees', doc='Number of trees to train (>= 1).'): 30,
   Param(parent='RandomForestClassifier_1f369599bd84', name='maxDepth', doc='Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. Must be in range [0, 30].'): 15}),
 (0.9194762757480442,
  {Param(parent='RandomForestClassifier_1f369599bd84', name='numTrees', doc='Number of trees to train (>= 1).'): 30,
   Param(parent='RandomForestClassifier_1f369599bd84', name='maxDepth', doc='Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. Must be in range [0, 30].'): 25}),
 (0.9226541832127487,
  {Param(parent='RandomForestClassifier_1f369599bd84', name='numTrees', doc='Number of trees to train (>= 1).'): 50,
   Param(parent='RandomForestClassifier_1f369599bd84', name='maxDepth', doc='Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. Must be in range [0, 30].'): 5}),
 (0.9246970601500079,
  {Param(parent='RandomForestClassifier_1f369599bd84', name='numTrees', doc='Number of trees to train (>= 1).'): 50,
   Param(parent='RandomForestClassifier_1f369599bd84', name='maxDepth', doc='Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. Must be in range [0, 30].'): 15}),
 (0.9248342566413711,
  {Param(parent='RandomForestClassifier_1f369599bd84', name='numTrees', doc='Number of trees to train (>= 1).'): 50,
   Param(parent='RandomForestClassifier_1f369599bd84', name='maxDepth', doc='Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. Must be in range [0, 30].'): 25})]


# use the best model

cvmodel.transform(test).show(5)

+---+-------------+---------------------+------------------+-----------------+---------+------------------+-----------------+----------------------+-------+----------------+-----+--------------------+--------------------+--------------------+----------+
|Age|  Sex_encoded|ChestPainType_encoded|     log_RestingBP|  log_Cholesterol|FastingBS|RestingECG_encoded|        log_MaxHR|ExerciseAngina_encoded|Oldpeak|ST_Slope_encoded|label|            features|       rawPrediction|         probability|prediction|
+---+-------------+---------------------+------------------+-----------------+---------+------------------+-----------------+----------------------+-------+----------------+-----+--------------------+--------------------+--------------------+----------+
| 29|(1,[0],[1.0])|        (3,[2],[1.0])| 4.795790545596741|5.497168225293202|        0|     (2,[0],[1.0])|5.075173815233827|         (1,[0],[1.0])|    0.0|   (2,[1],[1.0])|    0|[29.0,1.0,0.0,0.0...|[49.8957446062709...|[0.99791489212541...|       0.0|
| 30|    (1,[],[])|            (3,[],[])|  5.14166355650266|5.472270673671475|        0|     (2,[1],[1.0])|5.135798437050262|         (1,[0],[1.0])|    0.0|   (2,[1],[1.0])|    0|(15,[0,7,8,11,12,...|[43.9101496768557...|[0.87820299353711...|       0.0|
| 35|    (1,[],[])|        (3,[0],[1.0])|4.9344739331306915|5.214935757608986|        0|     (2,[0],[1.0])|5.204006687076795|         (1,[0],[1.0])|    1.4|   (2,[1],[1.0])|    0|[35.0,0.0,1.0,0.0...|[31.9546509845149...|[0.63909301969029...|       0.0|
| 35|(1,[0],[1.0])|        (3,[0],[1.0])| 4.795790545596741|              0.0|        1|     (2,[0],[1.0])|4.867534450455582|             (1,[],[])|    1.2|   (2,[0],[1.0])|    1|[35.0,1.0,1.0,0.0...|[3.58490108022818...|[0.07169802160456...|       1.0|
| 35|(1,[0],[1.0])|        (3,[0],[1.0])| 4.795790545596741|5.293304824724492|        0|     (2,[0],[1.0])|4.867534450455582|             (1,[],[])|    1.6|   (2,[0],[1.0])|    1|[35.0,1.0,1.0,0.0...|[3.07678419711129...|[0.06153568394222...|       1.0|
+---+-------------+---------------------+------------------+-----------------+---------+------------------+-----------------+----------------------+-------+----------------+-----+--------------------+--------------------+--------------------+----------+
only showing top 5 rows


rfROC = lrevaluator.evaluate(cvmodel.transform(test))
print(rfROC)

0.9318623058542413


from pyspark.ml.classification import GBTClassifier
gbt = GBTClassifier(maxIter=10)


pipeline = Pipeline(stages = [sex_indexer, chestPain_indexer, RestingECG_indexer,
                              ExerciseAngina_indexer, ST_Slope_indexer, encoder,
                              sqlTrans1,  assembler, gbt])
model = pipeline.fit(sql_heart).transform(sql_heart)
model.select("label", "features").show(2)

+-----+--------------------+
|label|            features|
+-----+--------------------+
|    0|[40.0,1.0,0.0,0.0...|
|    1|[49.0,0.0,0.0,1.0...|
+-----+--------------------+
only showing top 2 rows


# Create 5-fold CrossValidator

paramGrid = (ParamGridBuilder()
             .addGrid(gbt.maxDepth, [2, 4, 6])
             .addGrid(gbt.maxBins, [20, 60])
             .addGrid(gbt.maxIter, [10, 20])
             .build())

crossval = CrossValidator(estimator = pipeline,
                          estimatorParamMaps = paramGrid,
                          evaluator = evaluator,
                          numFolds = 5)

cvmodel = crossval.fit(train)


# check which model is best
list(zip(cvmodel.avgMetrics, paramGrid))

[(0.9096798679414457,
  {Param(parent='GBTClassifier_5c51dd42dc44', name='maxDepth', doc='Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. Must be in range [0, 30].'): 2,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxBins', doc='Max number of bins for discretizing continuous features.  Must be >=2 and >= number of categories for any categorical feature.'): 20,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.9156012613588078,
  {Param(parent='GBTClassifier_5c51dd42dc44', name='maxDepth', doc='Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. Must be in range [0, 30].'): 2,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxBins', doc='Max number of bins for discretizing continuous features.  Must be >=2 and >= number of categories for any categorical feature.'): 20,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxIter', doc='max number of iterations (>= 0).'): 20}),
 (0.9085977987092215,
  {Param(parent='GBTClassifier_5c51dd42dc44', name='maxDepth', doc='Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. Must be in range [0, 30].'): 2,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxBins', doc='Max number of bins for discretizing continuous features.  Must be >=2 and >= number of categories for any categorical feature.'): 60,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.9135681150201598,
  {Param(parent='GBTClassifier_5c51dd42dc44', name='maxDepth', doc='Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. Must be in range [0, 30].'): 2,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxBins', doc='Max number of bins for discretizing continuous features.  Must be >=2 and >= number of categories for any categorical feature.'): 60,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxIter', doc='max number of iterations (>= 0).'): 20}),
 (0.9114822513993694,
  {Param(parent='GBTClassifier_5c51dd42dc44', name='maxDepth', doc='Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. Must be in range [0, 30].'): 4,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxBins', doc='Max number of bins for discretizing continuous features.  Must be >=2 and >= number of categories for any categorical feature.'): 20,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.9171626983143366,
  {Param(parent='GBTClassifier_5c51dd42dc44', name='maxDepth', doc='Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. Must be in range [0, 30].'): 4,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxBins', doc='Max number of bins for discretizing continuous features.  Must be >=2 and >= number of categories for any categorical feature.'): 20,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxIter', doc='max number of iterations (>= 0).'): 20}),
 (0.9047018979423522,
  {Param(parent='GBTClassifier_5c51dd42dc44', name='maxDepth', doc='Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. Must be in range [0, 30].'): 4,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxBins', doc='Max number of bins for discretizing continuous features.  Must be >=2 and >= number of categories for any categorical feature.'): 60,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.9098797841912523,
  {Param(parent='GBTClassifier_5c51dd42dc44', name='maxDepth', doc='Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. Must be in range [0, 30].'): 4,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxBins', doc='Max number of bins for discretizing continuous features.  Must be >=2 and >= number of categories for any categorical feature.'): 60,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxIter', doc='max number of iterations (>= 0).'): 20}),
 (0.8851504202139524,
  {Param(parent='GBTClassifier_5c51dd42dc44', name='maxDepth', doc='Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. Must be in range [0, 30].'): 6,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxBins', doc='Max number of bins for discretizing continuous features.  Must be >=2 and >= number of categories for any categorical feature.'): 20,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.886531155652834,
  {Param(parent='GBTClassifier_5c51dd42dc44', name='maxDepth', doc='Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. Must be in range [0, 30].'): 6,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxBins', doc='Max number of bins for discretizing continuous features.  Must be >=2 and >= number of categories for any categorical feature.'): 20,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxIter', doc='max number of iterations (>= 0).'): 20}),
 (0.8704223486989228,
  {Param(parent='GBTClassifier_5c51dd42dc44', name='maxDepth', doc='Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. Must be in range [0, 30].'): 6,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxBins', doc='Max number of bins for discretizing continuous features.  Must be >=2 and >= number of categories for any categorical feature.'): 60,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.8803257386042476,
  {Param(parent='GBTClassifier_5c51dd42dc44', name='maxDepth', doc='Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. Must be in range [0, 30].'): 6,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxBins', doc='Max number of bins for discretizing continuous features.  Must be >=2 and >= number of categories for any categorical feature.'): 60,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxIter', doc='max number of iterations (>= 0).'): 20})]


# use the best model

cvmodel.transform(test).show(5)

+---+-------------+---------------------+------------------+-----------------+---------+------------------+-----------------+----------------------+-------+----------------+-----+--------------------+--------------------+--------------------+----------+
|Age|  Sex_encoded|ChestPainType_encoded|     log_RestingBP|  log_Cholesterol|FastingBS|RestingECG_encoded|        log_MaxHR|ExerciseAngina_encoded|Oldpeak|ST_Slope_encoded|label|            features|       rawPrediction|         probability|prediction|
+---+-------------+---------------------+------------------+-----------------+---------+------------------+-----------------+----------------------+-------+----------------+-----+--------------------+--------------------+--------------------+----------+
| 29|(1,[0],[1.0])|        (3,[2],[1.0])| 4.795790545596741|5.497168225293202|        0|     (2,[0],[1.0])|5.075173815233827|         (1,[0],[1.0])|    0.0|   (2,[1],[1.0])|    0|[29.0,1.0,0.0,0.0...|[1.29550740802887...|[0.93028106501865...|       0.0|
| 30|    (1,[],[])|            (3,[],[])|  5.14166355650266|5.472270673671475|        0|     (2,[1],[1.0])|5.135798437050262|         (1,[0],[1.0])|    0.0|   (2,[1],[1.0])|    0|(15,[0,7,8,11,12,...|[0.84109146598563...|[0.84319337096572...|       0.0|
| 35|    (1,[],[])|        (3,[0],[1.0])|4.9344739331306915|5.214935757608986|        0|     (2,[0],[1.0])|5.204006687076795|         (1,[0],[1.0])|    1.4|   (2,[1],[1.0])|    0|[35.0,0.0,1.0,0.0...|[0.96705524005898...|[0.87370369024086...|       0.0|
| 35|(1,[0],[1.0])|        (3,[0],[1.0])| 4.795790545596741|              0.0|        1|     (2,[0],[1.0])|4.867534450455582|             (1,[],[])|    1.2|   (2,[0],[1.0])|    1|[35.0,1.0,1.0,0.0...|[-1.1716780737366...|[0.08759531203936...|       1.0|
| 35|(1,[0],[1.0])|        (3,[0],[1.0])| 4.795790545596741|5.293304824724492|        0|     (2,[0],[1.0])|4.867534450455582|             (1,[],[])|    1.6|   (2,[0],[1.0])|    1|[35.0,1.0,1.0,0.0...|[-1.2415565571794...|[0.07705052522522...|       1.0|
+---+-------------+---------------------+------------------+-----------------+---------+------------------+-----------------+----------------------+-------+----------------+-----+--------------------+--------------------+--------------------+----------+
only showing top 5 rows


gbtROC = evaluator.evaluate(cvmodel.transform(test))
print(gbtROC)

0.9171146953405022


from pyspark.ml.classification import DecisionTreeClassifier
dt = DecisionTreeClassifier(featuresCol = 'features', labelCol = 'label',  maxDepth = 3)


pipeline = Pipeline(stages = [sex_indexer, chestPain_indexer, RestingECG_indexer,
                              ExerciseAngina_indexer, ST_Slope_indexer, encoder,
                              sqlTrans1,  assembler, dt])
model = pipeline.fit(sql_heart).transform(sql_heart)
model.select("label", "features").show(2)

+-----+--------------------+
|label|            features|
+-----+--------------------+
|    0|[40.0,1.0,0.0,0.0...|
|    1|[49.0,0.0,0.0,1.0...|
+-----+--------------------+
only showing top 2 rows


dtevaluator = BinaryClassificationEvaluator()

# Create ParamGrid for Cross Validation
dtparamGrid = (ParamGridBuilder()
             .addGrid(dt.maxDepth, [2, 5, 10])
             .addGrid(dt.maxBins, [10, 20, 40, 80, 100])
             .build())


# Create 5-fold CrossValidator
crossval = CrossValidator(estimator = pipeline,
                          estimatorParamMaps = dtparamGrid,
                          evaluator = dtevaluator,
                          numFolds = 5)

cvmodel = crossval.fit(train)


# check which model is best
list(zip(cvmodel.avgMetrics, paramGrid))

[(0.7792443486530496,
  {Param(parent='GBTClassifier_5c51dd42dc44', name='maxDepth', doc='Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. Must be in range [0, 30].'): 2,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxBins', doc='Max number of bins for discretizing continuous features.  Must be >=2 and >= number of categories for any categorical feature.'): 20,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.7792443486530496,
  {Param(parent='GBTClassifier_5c51dd42dc44', name='maxDepth', doc='Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. Must be in range [0, 30].'): 2,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxBins', doc='Max number of bins for discretizing continuous features.  Must be >=2 and >= number of categories for any categorical feature.'): 20,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxIter', doc='max number of iterations (>= 0).'): 20}),
 (0.7792443486530496,
  {Param(parent='GBTClassifier_5c51dd42dc44', name='maxDepth', doc='Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. Must be in range [0, 30].'): 2,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxBins', doc='Max number of bins for discretizing continuous features.  Must be >=2 and >= number of categories for any categorical feature.'): 60,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.7792443486530496,
  {Param(parent='GBTClassifier_5c51dd42dc44', name='maxDepth', doc='Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. Must be in range [0, 30].'): 2,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxBins', doc='Max number of bins for discretizing continuous features.  Must be >=2 and >= number of categories for any categorical feature.'): 60,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxIter', doc='max number of iterations (>= 0).'): 20}),
 (0.7792443486530496,
  {Param(parent='GBTClassifier_5c51dd42dc44', name='maxDepth', doc='Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. Must be in range [0, 30].'): 4,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxBins', doc='Max number of bins for discretizing continuous features.  Must be >=2 and >= number of categories for any categorical feature.'): 20,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.8434679871865438,
  {Param(parent='GBTClassifier_5c51dd42dc44', name='maxDepth', doc='Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. Must be in range [0, 30].'): 4,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxBins', doc='Max number of bins for discretizing continuous features.  Must be >=2 and >= number of categories for any categorical feature.'): 20,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxIter', doc='max number of iterations (>= 0).'): 20}),
 (0.8204128078958767,
  {Param(parent='GBTClassifier_5c51dd42dc44', name='maxDepth', doc='Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. Must be in range [0, 30].'): 4,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxBins', doc='Max number of bins for discretizing continuous features.  Must be >=2 and >= number of categories for any categorical feature.'): 60,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.8073469584329684,
  {Param(parent='GBTClassifier_5c51dd42dc44', name='maxDepth', doc='Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. Must be in range [0, 30].'): 4,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxBins', doc='Max number of bins for discretizing continuous features.  Must be >=2 and >= number of categories for any categorical feature.'): 60,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxIter', doc='max number of iterations (>= 0).'): 20}),
 (0.7986465938728258,
  {Param(parent='GBTClassifier_5c51dd42dc44', name='maxDepth', doc='Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. Must be in range [0, 30].'): 6,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxBins', doc='Max number of bins for discretizing continuous features.  Must be >=2 and >= number of categories for any categorical feature.'): 20,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.7945293721569198,
  {Param(parent='GBTClassifier_5c51dd42dc44', name='maxDepth', doc='Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. Must be in range [0, 30].'): 6,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxBins', doc='Max number of bins for discretizing continuous features.  Must be >=2 and >= number of categories for any categorical feature.'): 20,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxIter', doc='max number of iterations (>= 0).'): 20}),
 (0.8307560098307648,
  {Param(parent='GBTClassifier_5c51dd42dc44', name='maxDepth', doc='Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. Must be in range [0, 30].'): 6,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxBins', doc='Max number of bins for discretizing continuous features.  Must be >=2 and >= number of categories for any categorical feature.'): 60,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxIter', doc='max number of iterations (>= 0).'): 10}),
 (0.8255715819650933,
  {Param(parent='GBTClassifier_5c51dd42dc44', name='maxDepth', doc='Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. Must be in range [0, 30].'): 6,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxBins', doc='Max number of bins for discretizing continuous features.  Must be >=2 and >= number of categories for any categorical feature.'): 60,
   Param(parent='GBTClassifier_5c51dd42dc44', name='maxIter', doc='max number of iterations (>= 0).'): 20})]


# use the best model

dtpred = cvmodel.transform(test)
dtpred.show(5)

+---+-------------+---------------------+------------------+-----------------+---------+------------------+-----------------+----------------------+-------+----------------+-----+--------------------+--------------------+--------------------+----------+
|Age|  Sex_encoded|ChestPainType_encoded|     log_RestingBP|  log_Cholesterol|FastingBS|RestingECG_encoded|        log_MaxHR|ExerciseAngina_encoded|Oldpeak|ST_Slope_encoded|label|            features|       rawPrediction|         probability|prediction|
+---+-------------+---------------------+------------------+-----------------+---------+------------------+-----------------+----------------------+-------+----------------+-----+--------------------+--------------------+--------------------+----------+
| 29|(1,[0],[1.0])|        (3,[2],[1.0])| 4.795790545596741|5.497168225293202|        0|     (2,[0],[1.0])|5.075173815233827|         (1,[0],[1.0])|    0.0|   (2,[1],[1.0])|    0|[29.0,1.0,0.0,0.0...|[49.8957446062709...|[0.99791489212541...|       0.0|
| 30|    (1,[],[])|            (3,[],[])|  5.14166355650266|5.472270673671475|        0|     (2,[1],[1.0])|5.135798437050262|         (1,[0],[1.0])|    0.0|   (2,[1],[1.0])|    0|(15,[0,7,8,11,12,...|[43.9101496768557...|[0.87820299353711...|       0.0|
| 35|    (1,[],[])|        (3,[0],[1.0])|4.9344739331306915|5.214935757608986|        0|     (2,[0],[1.0])|5.204006687076795|         (1,[0],[1.0])|    1.4|   (2,[1],[1.0])|    0|[35.0,0.0,1.0,0.0...|[31.9546509845149...|[0.63909301969029...|       0.0|
| 35|(1,[0],[1.0])|        (3,[0],[1.0])| 4.795790545596741|              0.0|        1|     (2,[0],[1.0])|4.867534450455582|             (1,[],[])|    1.2|   (2,[0],[1.0])|    1|[35.0,1.0,1.0,0.0...|[3.58490108022818...|[0.07169802160456...|       1.0|
| 35|(1,[0],[1.0])|        (3,[0],[1.0])| 4.795790545596741|5.293304824724492|        0|     (2,[0],[1.0])|4.867534450455582|             (1,[],[])|    1.6|   (2,[0],[1.0])|    1|[35.0,1.0,1.0,0.0...|[3.07678419711129...|[0.06153568394222...|       1.0|
+---+-------------+---------------------+------------------+-----------------+---------+------------------+-----------------+----------------------+-------+----------------+-----+--------------------+--------------------+--------------------+----------+
only showing top 5 rows


dtROC = dtevaluator.evaluate(cvmodel.transform(test))
print(dtROC)

0.8903076463560335


sns.set_style("whitegrid")
plt.figure(figsize=(16,5))
plt.yticks(np.arange(0,100,10))
plt.ylabel("AreaUnderROC")

plt.xlabel("Algorithms")
sns.lineplot(data=range(1,100), x=["Logistic",'RandomForest','GradientBosting','DecisionTree'], 
             y=[0.9363052568697732,0.9318623058542413,0.9171146953405022,0.8903076463560335])
plt.show()

	Age	RestingBP	Cholesterol	MaxHR	Oldpeak
count	686.000000	686.000000	686.000000	686.000000	686.000000
mean	53.690962	132.091837	196.749271	136.677843	0.907289
std	9.282468	18.546025	111.301411	25.526051	1.074837
min	28.000000	0.000000	0.000000	60.000000	-2.000000
25%	48.000000	120.000000	170.000000	120.000000	0.000000
50%	54.000000	130.000000	222.000000	138.000000	0.600000
75%	60.000000	140.000000	265.000000	156.000000	1.500000
max	76.000000	200.000000	603.000000	202.000000	6.200000

	Age	RestingBP	Cholesterol	FastingBS	MaxHR	Oldpeak	HeartDisease
Age	1.000	0.263	-0.114	0.217	-0.418	0.240	0.298
RestingBP	0.263	1.000	0.093	0.044	-0.113	0.151	0.103
Cholesterol	-0.114	0.093	1.000	-0.275	0.250	0.057	-0.246
FastingBS	0.217	0.044	-0.275	1.000	-0.169	0.049	0.277
MaxHR	-0.418	-0.113	0.250	-0.169	1.000	-0.185	-0.430
Oldpeak	0.240	0.151	0.057	0.049	-0.185	1.000	0.406
HeartDisease	0.298	0.103	-0.246	0.277	-0.430	0.406	1.000

	Age		RestingBP		Cholesterol		MaxHR		Oldpeak
	mean	std	mean	std	mean	std	mean	std	mean	std
HeartDisease
1	56.140625	8.400299	133.783854	20.245559	172.460938	128.289952	126.945312	23.208998	1.294271	1.161353
0	50.576159	9.429181	129.940397	15.903251	227.632450	74.434305	149.052980	22.867752	0.415232	0.692076

	Age		RestingBP		Cholesterol		MaxHR		Oldpeak
	mean	std	mean	std	mean	std	mean	std	mean	std
Sex
F	52.514085	9.956455	131.873239	19.182904	238.049296	92.569570	145.556338	22.463374	0.687324	0.978119
M	53.998162	9.082900	132.148897	18.393837	185.968750	113.313598	134.360294	25.786608	0.964706	1.092248

Algorithm	AreaUnderROC
Logistic Regression	93.63%
Random Forest	93.18%
Gradient Bosting	91.71%
Decision Tree	89.03%

Supervised Learning Model Selection
¶

MLlib Pipline and Cross-Validation Utilizing Heart Failure Data¶

Pramodini Karwande and Ashley Ko ¶

Introduction¶

Supervised Learning Idea and Data Split¶

Data Splitting¶

Exploratory Data Analysis¶

Histogram***¶

Box Plot**¶

Contingency Tables¶

Bar Plot**¶

Scatter Plot**¶

Modeling¶

Logistic Regression¶

MLlib Pipeline¶

Cross Validation¶

Evaluator¶

Random Forest Classifier¶

Gradient-Boosted Tree Classifier¶

Decision Tree Classifier¶

Conclusion¶

Whether the patient is having heart disease or not?¶

	Age	Sex	ChestPainType	RestingBP	Cholesterol	RestingECG	MaxHR	ExerciseAngina	Oldpeak	ST_Slope	HeartDisease
0	40	M	ATA	140	289	Normal	172	N	0.0	Up	0
1	49	F	NAP	160	180	Normal	156	N	1.0	Flat	1
2	37	M	ATA	130	283	ST	98	N	0.0	Up	0
3	48	F	ASY	138	214	Normal	108	Y	1.5	Flat	1
4	54	M	NAP	150	195	Normal	122	N	0.0	Up	0

Supervised Learning Model Selection ¶

MLlib Pipline and Cross-Validation Utilizing Heart Failure Data¶

Pramodini Karwande and Ashley Ko ¶

Introduction¶

Supervised Learning Idea and Data Split¶

Data Splitting¶

Exploratory Data Analysis¶

Histogram***¶

Box Plot**¶

Contingency Tables¶

Bar Plot**¶

Scatter Plot**¶

Modeling¶

Logistic Regression¶

MLlib Pipeline¶

Cross Validation¶

Evaluator¶

Random Forest Classifier¶

Gradient-Boosted Tree Classifier¶

Decision Tree Classifier¶

Conclusion¶

Whether the patient is having heart disease or not?¶

Supervised Learning Model Selection
¶