diff options
Diffstat (limited to 'backend/microservice/api/newmlservice.py')
-rw-r--r-- | backend/microservice/api/newmlservice.py | 128 |
1 files changed, 63 insertions, 65 deletions
diff --git a/backend/microservice/api/newmlservice.py b/backend/microservice/api/newmlservice.py index 6a863013..99e3cae5 100644 --- a/backend/microservice/api/newmlservice.py +++ b/backend/microservice/api/newmlservice.py @@ -179,6 +179,7 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback): ###KATEGORIJSKE KOLONE kategorijskekolone=[] ###PRETVARANJE NUMERICKIH U KATREGORIJSKE AKO JE KORISNIK TAKO OZNACIO + columnInfo=paramsDataset['columnInfo'] columnTypes=paramsExperiment['columnTypes'] for i in range(len(columnInfo)): @@ -187,25 +188,27 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback): data[col['columnName']]=data[col['columnName']].apply(str) kategorijskekolone.append(col['columnName']) #kategorijskekolone=data.select_dtypes(include=['object']).columns - print(kategorijskekolone) + #print(kategorijskekolone) ###NULL - null_value_options = paramsExperiment["nullValues"] - null_values_replacers = paramsExperiment["nullValuesReplacers"] + #null_value_options = paramsExperiment["nullValues"] # + null_values_replacers = paramsExperiment["nullValuesReplacers"] #{"column":"naziv","opt":"tip promene","value":"vrednost za zamenu"} - if(null_value_options=='replace'): + #if(null_value_options=='replace'): #print("replace null") - dict=null_values_replacers - while(len(dict)>0): - replace=dict.pop() - col=replace['column'] - opt=replace['option'] - if(opt=='replace'): - replacevalue=replace['value'] - data[col]=data[col].fillna(replacevalue) - elif(null_value_options=='delete_rows'): - data=data.dropna() - elif(null_value_options=='delete_columns'): - data=data.dropna(axis=1) + dict=null_values_replacers + while(len(dict)>0): + replace=dict.pop() + col=replace['column'] + opt=replace['option'] + + if(opt=='replace'): + replacevalue=replace['value'] + data[col]=data[col].fillna(replacevalue) + + data=data.dropna() + + #print(data) + #print(data.shape) # @@ -237,11 +240,12 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback): elif(encoding=='onehot'): - category_columns=[] - for col in data.columns: - if(data[col].dtype==np.object_): - category_columns.append(col) - data=pd.get_dummies(data, columns=category_columns, prefix=category_columns) + if((len(pd.unique(data[kolona]))>20)or (kolona==output_column)): + encoder=LabelEncoder() + data[kolona]=encoder.fit_transform(data[kolona]) + + else: + data=pd.get_dummies(data, columns=kolona, prefix=kolona) elif(encoding=='ordinal'): encoder = OrdinalEncoder() @@ -291,18 +295,19 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback): random=123 else: random=0 + + #x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=test, random_state=random) #print(x_train,x_test) x, x_test, y, y_test = train_test_split(x, y, test_size=test, random_state=random, shuffle=True) - x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.15, shuffle=True) - # + x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=(1.0-paramsModel['validationSize'])) # Treniranje modela # # ###OPTIMIZATORI print(paramsModel['optimizer']) if(paramsModel['optimizer']=='Adam'): - opt=tf.keras.optimizers.Adam(learning_rate=3) + opt=tf.keras.optimizers.Adam(learning_rate=float(paramsModel['learningRate'])) elif(paramsModel['optimizer']=='Adadelta'): opt=tf.keras.optimizers.Adadelta(learning_rate=float(paramsModel['learningRate'])) @@ -382,9 +387,9 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback): scores = classifier.evaluate(x_test, y_test) #print("\n%s: %.2f%%" % (classifier.metrics_names[1], scores[1]*100)) - + ''' classifier.save(filepath, save_format='h5') - metrics={} + macro_averaged_precision=sm.precision_score(y_test, y_pred, average = 'macro') micro_averaged_precision=sm.precision_score(y_test, y_pred, average = 'micro') macro_averaged_recall=sm.recall_score(y_test, y_pred, average = 'macro') @@ -392,20 +397,20 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback): macro_averaged_f1=sm.f1_score(y_test, y_pred, average = 'macro') micro_averaged_f1=sm.f1_score(y_test, y_pred, average = 'micro') - metrics= { - "macro_averaged_precision" :float(macro_averaged_precision), - "micro_averaged_precision" : float(micro_averaged_precision), - "macro_averaged_recall" : float(macro_averaged_recall), - "micro_averaged_recall" : float(micro_averaged_recall), - "macro_averaged_f1" : float(macro_averaged_f1), - "micro_averaged_f1" : float(micro_averaged_f1) - } - + metrics= [ + {"Name":"macro_averaged_precision", "JsonValue":str(macro_averaged_precision)}, + {"Name":"micro_averaged_precision" ,"JsonValue":str(micro_averaged_precision)}, + {"Name":"macro_averaged_recall", "JsonValue":str(macro_averaged_recall)}, + {"Name":"micro_averaged_recall" ,"JsonValue":str(micro_averaged_recall)}, + {"Name":"macro_averaged_f1","JsonValue": str(macro_averaged_f1)}, + {"Name":"micro_averaged_f1", "JsonValue": str(micro_averaged_f1)} + ] + ''' #vizuelizacija u python-u #from ann_visualizer.visualize import ann_viz; #ann_viz(classifier, title="My neural network") - return filepath,hist,metrics + return filepath,[hist['loss'],hist['val_loss'],hist['accuracy'],hist['val_accuracy'],hist['mae'],hist['val_mae'],hist['mse'],hist['val_mse']] elif(problem_type=='binarni-klasifikacioni'): #print('*************************************************************************binarni') @@ -443,6 +448,7 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback): history=classifier.fit( x=x_train, y=y_train, epochs = paramsModel['epochs'],batch_size=int(paramsModel['batchSize']),callbacks=callback(x_test, y_test,paramsModel['_id']),validation_data=(x_val, y_val)) hist=history.history + y_pred=classifier.predict(x_test) y_pred=(y_pred>=0.5).astype('int') @@ -451,7 +457,7 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback): # ann_viz(classifier, title="My neural network") classifier.save(filepath, save_format='h5') - + """ accuracy = float(sm.accuracy_score(y_test,y_pred)) precision = float(sm.precision_score(y_test,y_pred)) recall = float(sm.recall_score(y_test,y_pred)) @@ -460,22 +466,9 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback): f1 = float(sm.f1_score(y_test,y_pred)) fpr, tpr, _ = sm.roc_curve(y_test,y_pred) logloss = float(sm.log_loss(y_test, y_pred)) - metrics= { - "accuracy" : accuracy, - "precision" : precision, - "recall" : recall, - "specificity" : specificity, - "f1" : f1, - "tn" : float(tn), - "fp" : float(fp), - "fn" : float(fn), - "tp" : float(tp), - "fpr" : fpr.tolist(), - "tpr" : tpr.tolist(), - "logloss" : logloss - } + """ - return filepath,hist,metrics + return filepath,[hist['loss'],hist['val_loss'],hist['accuracy'],hist['val_accuracy'],hist['mae'],hist['val_mae'],hist['mse'],hist['val_mse']] elif(problem_type=='regresioni'): reg=paramsModel['layers'][0]['regularisation'] @@ -507,19 +500,21 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback): classifier.add(tf.keras.layers.Dense(units=paramsModel['layers'][i+1]['neurons'], activation=paramsModel['layers'][i+1]['activationFunction'],kernel_regularizer=kernelreg, bias_regularizer=biasreg, activity_regularizer=activityreg))#i-ti skriveni sloj - classifier.add(tf.keras.layers.Dense(units=1,activation=paramsModel['outputLayerActivationFunction'])) + classifier.add(tf.keras.layers.Dense(units=1)) - classifier.compile(loss =paramsModel["lossFunction"] , optimizer = opt , metrics = ['accuracy','mae','mse']) + classifier.compile(loss =paramsModel["lossFunction"] , optimizer = opt , metrics = ['mae','mse']) history=classifier.fit( x=x_train, y=y_train, epochs = paramsModel['epochs'],batch_size=int(paramsModel['batchSize']),callbacks=callback(x_test, y_test,paramsModel['_id']),validation_data=(x_val, y_val)) hist=history.history + y_pred=classifier.predict(x_test) #print(classifier.evaluate(x_test, y_test)) classifier.save(filepath, save_format='h5') - + ''' mse = float(sm.mean_squared_error(y_test,y_pred)) + mae = float(sm.mean_absolute_error(y_test,y_pred)) mape = float(sm.mean_absolute_percentage_error(y_test,y_pred)) rmse = float(np.sqrt(sm.mean_squared_error(y_test,y_pred))) @@ -530,16 +525,19 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback): n = 40 k = 2 adj_r2 = float(1 - ((1-r2)*(n-1)/(n-k-1))) - metrics= {"mse" : mse, - "mae" : mae, - "mape" : mape, - "rmse" : rmse, - "rmsle" : rmsle, - "r2" : r2, - "adj_r2" : adj_r2 - } - - return filepath,hist,metrics + + metrics= [ + {"Name":"mse","JsonValue":str(mse)}, + + {"Name":"mae","JsonValue":str(mae)}, + {"Name":"mape","JsonValue":str( mape)}, + {"Name":"rmse","JsonValue":str(rmse)}, + {"Name":"rmsle","JsonValue":str(rmsle)}, + {"Name":"r2","JsonValue":str( r2)}, + {"Name":"adj_r2","JsonValue":str(adj_r2)} + ] + ''' + return filepath,[hist['loss'],hist['val_loss'],[],[],hist['mae'],hist['val_mae'],hist['mse'],hist['val_mse']] def roc_auc_score_multiclass(actual_class, pred_class, average = "macro"): |