aboutsummaryrefslogtreecommitdiff
path: root/backend/microservice/api/newmlservice.py
diff options
context:
space:
mode:
authorDanijel Andjelkovic <adanijel99@gmail.com>2022-04-20 00:12:42 +0000
committerDanijel Andjelkovic <adanijel99@gmail.com>2022-04-20 00:12:42 +0000
commitb814ef17d31dca80a3f23b3fbe4ce56885192a4c (patch)
treed7a297db46d57267b5516a8c20ee906dd39571ed /backend/microservice/api/newmlservice.py
parent9a480b28ac9b93dee082925b9cb4beef3244b135 (diff)
parente6d9e3fd2dcf83c90db8560e749544dfd9910d07 (diff)
Merge branch 'dev' into 'master'
Merge master See merge request igrannonica/neuronstellar!27
Diffstat (limited to 'backend/microservice/api/newmlservice.py')
-rw-r--r--backend/microservice/api/newmlservice.py217
1 files changed, 136 insertions, 81 deletions
diff --git a/backend/microservice/api/newmlservice.py b/backend/microservice/api/newmlservice.py
index d19a4e44..6e65c876 100644
--- a/backend/microservice/api/newmlservice.py
+++ b/backend/microservice/api/newmlservice.py
@@ -1,5 +1,6 @@
from enum import unique
from itertools import count
+import os
import pandas as pd
from sklearn import datasets, multiclass
import tensorflow as tf
@@ -21,6 +22,7 @@ from sklearn.model_selection import train_test_split
from dataclasses import dataclass
import statistics as s
from sklearn.metrics import roc_auc_score
+import matplotlib.pyplot as plt
#from ann_visualizer.visualize import ann_viz;
def returnColumnsInfo(dataset):
dict=[]
@@ -112,35 +114,48 @@ class TrainingResult:
metrics: dict
'''
-def train(dataset, params, callback):
- problem_type = params["type"]
+def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback):
+ problem_type = paramsModel["type"]
#print(problem_type)
data = pd.DataFrame()
#print(data)
- for col in params["inputColumns"]:
+ for col in paramsExperiment["inputColumns"]:
#print(col)
data[col]=dataset[col]
- output_column = params["columnToPredict"]
+ output_column = paramsExperiment["outputColumn"]
data[output_column] = dataset[output_column]
#print(data)
###NULL
- null_value_options = params["nullValues"]
- null_values_replacers = params["nullValuesReplacers"]
-
+ null_value_options = paramsExperiment["nullValues"]
+ null_values_replacers = paramsExperiment["nullValuesReplacers"]
+ kategorijskekolone=data.select_dtypes(include=['object']).columns.copy()
+ #print(kategorijskekolone)
if(null_value_options=='replace'):
#print("replace null") #
- dict=params['null_values_replacers']
+ dict=null_values_replacers
while(len(dict)>0):
replace=dict.pop()
col=replace['column']
opt=replace['option']
if(opt=='replace'):
- replacevalue=replace['value']
- data[col]=data[col].fillna(replacevalue)
+ val = replace['value']
+ if(data[col].dtype == 'int64'):
+ val = np.int64(val)
+ elif(data[col].dtype == 'float64'):
+ val = np.float64(val)
+ data[col]=data[col].fillna(val)
elif(null_value_options=='delete_rows'):
data=data.dropna()
elif(null_value_options=='delete_columns'):
+ if(data[output_column].isnull().sum()>0):
+ if(output_column in kategorijskekolone):
+ replace=data[output_column].value_counts().index[0]
+ #print(replace)
+ else:
+ replace=data[output_column].mean()
+ data[output_column]=data[output_column].fillna(replace)
+ #print(data[output_column].isnull().sum())
data=data.dropna(axis=1)
#print(data.shape)
@@ -153,49 +168,74 @@ def train(dataset, params, callback):
data.pop(col)
#
### Enkodiranje
- encoding=params["encoding"]
- if(encoding=='label'):
- encoder=LabelEncoder()
- for col in data.columns:
- if(data[col].dtype==np.object_):
- data[col]=encoder.fit_transform(data[col])
+ '''
+ encodings=paramsExperiment["encodings"]
+
+ from sklearn.preprocessing import LabelEncoder
+ kategorijskekolone=data.select_dtypes(include=['object']).columns
+ encoder=LabelEncoder()
+ for kolona in data.columns:
+ if(kolona in kategorijskekolone):
+ data[kolona]=encoder.fit_transform(data[kolona])
+ '''
- elif(encoding=='onehot'):
- category_columns=[]
- for col in data.columns:
- if(data[col].dtype==np.object_):
- category_columns.append(col)
- data=pd.get_dummies(data, columns=category_columns, prefix=category_columns)
+ encodings=paramsExperiment["encodings"]
+ datafront=dataset.copy()
+ svekolone=datafront.columns
- elif(encoding=='ordinal'):
- encoder = OrdinalEncoder()
- for col in data.columns:
- if(data[col].dtype==np.object_):
- data[col]=encoder.fit_transform(data[col])
-
- elif(encoding=='hashing'):
- category_columns=[]
- for col in data.columns:
- if(data[col].dtype==np.object_):
- category_columns.append(col)
- encoder=ce.HashingEncoder(cols=category_columns, n_components=len(category_columns))
- encoder.fit_transform(data)
- elif(encoding=='binary'):
- category_columns=[]
- for col in data.columns:
- if(data[col].dtype==np.object_):
- category_columns.append(col)
- encoder=ce.BinaryEncoder(cols=category_columns, return_df=True)
- encoder.fit_transform(data)
-
- elif(encoding=='baseN'):
- category_columns=[]
- for col in data.columns:
- if(data[col].dtype==np.object_):
- category_columns.append(col)
- encoder=ce.BaseNEncoder(cols=category_columns, return_df=True, base=5)
- encoder.fit_transform(data)
+
+ for kolonaEncoding in encodings:
+
+ kolona = kolonaEncoding["columnName"]
+ if kolona in data.columns:
+ encoding = kolonaEncoding["encoding"]
+
+ if(kolona in kategorijskekolone):
+ if(encoding=='label'):
+ encoder=LabelEncoder()
+ for col in data.columns:
+ if(data[col].dtype==np.object_):
+ data[col]=encoder.fit_transform(data[col])
+
+
+ elif(encoding=='onehot'):
+ category_columns=[]
+ for col in data.columns:
+ if(data[col].dtype==np.object_):
+ category_columns.append(col)
+ data=pd.get_dummies(data, columns=category_columns, prefix=category_columns)
+
+ elif(encoding=='ordinal'):
+ encoder = OrdinalEncoder()
+ for col in data.columns:
+ if(data[col].dtype==np.object_):
+ data[col]=encoder.fit_transform(data[col])
+
+ elif(encoding=='hashing'):
+ category_columns=[]
+ for col in data.columns:
+ if(data[col].dtype==np.object_):
+ category_columns.append(col)
+ encoder=ce.HashingEncoder(cols=category_columns, n_components=len(category_columns))
+ encoder.fit_transform(data)
+ elif(encoding=='binary'):
+ category_columns=[]
+ for col in data.columns:
+ if(data[col].dtype==np.object_):
+ category_columns.append(col)
+ encoder=ce.BinaryEncoder(cols=category_columns, return_df=True)
+ encoder.fit_transform(data)
+
+ elif(encoding=='baseN'):
+ category_columns=[]
+ for col in data.columns:
+ if(data[col].dtype==np.object_):
+ category_columns.append(col)
+ encoder=ce.BaseNEncoder(cols=category_columns, return_df=True, base=5)
+ encoder.fit_transform(data)
+
+
#
# Input - output
#
@@ -210,8 +250,8 @@ def train(dataset, params, callback):
#
# Podela na test i trening skupove
#
- test=params["randomTestSetDistribution"]
- randomOrder = params["randomOrder"]
+ test=paramsExperiment["randomTestSetDistribution"]
+ randomOrder = paramsExperiment["randomOrder"]
if(randomOrder):
random=123
else:
@@ -224,7 +264,7 @@ def train(dataset, params, callback):
#
#
###OPTIMIZATORI
-
+ """
if(params['optimizer']=='adam'):
opt=tf.keras.optimizers.Adam(learning_rate=params['learningRate'])
@@ -250,7 +290,7 @@ def train(dataset, params, callback):
opt=tf.keras.optimizers.RMSprop(learning_rate=params['learningRate'])
###REGULARIZACIJA
- #regularisation={'kernelType':'l1 ili l2 ili l1_l2','krenelRate':default=0.01 ili jedna od vrednosti(0.0001,0.001,0.1,1,2,3) ili neka koju je korisnik zadao,'biasType':'','biasRate':'','activityType','activityRate'}
+ #regularisation={'kernelType':'l1 ili l2 ili l1_l2','kernelRate':default=0.01 ili jedna od vrednosti(0.0001,0.001,0.1,1,2,3) ili neka koju je korisnik zadao,'biasType':'','biasRate':'','activityType','activityRate'}
reg=params['regularisation']
###Kernel
@@ -276,49 +316,56 @@ def train(dataset, params, callback):
activityreg=tf.keras.regularizers.l2(reg['activityRate'])
elif(reg['kernelType']=='l1l2'):
activityreg=tf.keras.regularizers.l1_l2(l1=reg['activityRate'][0],l2=reg['activityRate'][1])
-
-
+ """
+ filepath=os.path.join("temp/",paramsExperiment['_id']+"_"+paramsModel['_id']+".h5")
if(problem_type=='multi-klasifikacioni'):
#print('multi')
classifier=tf.keras.Sequential()
- classifier.add(tf.keras.layers.Dense(units=params['hiddenLayerNeurons'], activation=params['hiddenLayerActivationFunctions'][0],input_dim=x_train.shape[1]))#prvi skriveni + definisanje prethodnog-ulaznog
- for i in range(params['hiddenLayers']-1):#ako postoji vise od jednog skrivenog sloja
+ classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][0],input_dim=x_train.shape[1]))#prvi skriveni + definisanje prethodnog-ulaznog
+ for i in range(paramsModel['hiddenLayers']-1):#ako postoji vise od jednog skrivenog sloja
#print(i)
- classifier.add(tf.keras.layers.Dense(units=params['hiddenLayerNeurons'], activation=params['hiddenLayerActivationFunctions'][i+1]))#i-ti skriveni sloj
- classifier.add(tf.keras.layers.Dense(units=5, activation=params['outputLayerActivationFunction']))#izlazni sloj
+ classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][i+1]))#i-ti skriveni sloj
+ classifier.add(tf.keras.layers.Dense(units=5, activation=paramsModel['outputLayerActivationFunction']))#izlazni sloj
- classifier.compile(loss =params["lossFunction"] , optimizer = params['optimizer'] , metrics =params['metrics'])
-
- history=classifier.fit(x_train, y_train, epochs = params['epochs'],batch_size=params['batchSize'])
+ classifier.compile(loss =paramsModel["lossFunction"] , optimizer = paramsModel['optimizer'] , metrics =['accuracy','mae','mse'])
+ history=classifier.fit(x_train, y_train, epochs = paramsModel['epochs'],batch_size=paramsModel['batchSize'],callbacks=callback(x_test, y_test,paramsModel['_id']))
+
+ hist=history.history
+ #plt.plot(hist['accuracy'])
+ #plt.show()
y_pred=classifier.predict(x_test)
y_pred=np.argmax(y_pred,axis=1)
- #print(y_pred.flatten())
- #print(y_test)
+
scores = classifier.evaluate(x_test, y_test)
#print("\n%s: %.2f%%" % (classifier.metrics_names[1], scores[1]*100))
- classifier.save("temp/"+params['name'], save_format='h5')
+
+
+ classifier.save(filepath, save_format='h5')
+
#vizuelizacija u python-u
#from ann_visualizer.visualize import ann_viz;
#ann_viz(classifier, title="My neural network")
+
+ return filepath,hist
elif(problem_type=='binarni-klasifikacioni'):
#print('*************************************************************************binarni')
classifier=tf.keras.Sequential()
- classifier.add(tf.keras.layers.Dense(units=params['hiddenLayerNeurons'], activation=params['hiddenLayerActivationFunctions'][0],input_dim=x_train.shape[1]))#prvi skriveni + definisanje prethodnog-ulaznog
- for i in range(params['hiddenLayers']-1):#ako postoji vise od jednog skrivenog sloja
+ classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][0],input_dim=x_train.shape[1]))#prvi skriveni + definisanje prethodnog-ulaznog
+ for i in range(paramsModel['hiddenLayers']-1):#ako postoji vise od jednog skrivenog sloja
#print(i)
- classifier.add(tf.keras.layers.Dense(units=params['hiddenLayerNeurons'], activation=params['hiddenLayerActivationFunctions'][i+1]))#i-ti skriveni sloj
- classifier.add(tf.keras.layers.Dense(units=1, activation=params['outputLayerActivationFunction']))#izlazni sloj
-
- classifier.compile(loss =params["lossFunction"] , optimizer = params['optimizer'] , metrics =params['metrics'])
+ classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][i+1]))#i-ti skriveni sloj
+ classifier.add(tf.keras.layers.Dense(units=1, activation=paramsModel['outputLayerActivationFunction']))#izlazni sloj
- history=classifier.fit(x_train, y_train, epochs = params['epochs'],batch_size=params['batchSize'])
+ classifier.compile(loss =paramsModel["lossFunction"] , optimizer = paramsModel['optimizer'] , metrics =['accuracy'])
+ history=classifier.fit(x_train, y_train, epochs = paramsModel['epochs'],batch_size=paramsModel['batchSize'],callbacks=callback(x_test, y_test,paramsModel['_id']))
+ hist=history.history
y_pred=classifier.predict(x_test)
y_pred=(y_pred>=0.5).astype('int')
@@ -329,23 +376,26 @@ def train(dataset, params, callback):
#print("\n%s: %.2f%%" % (classifier.metrics_names[1], scores[1]*100))
#ann_viz(classifier, title="My neural network")
- classifier.save("temp/"+params['name'], save_format='h5')
+ classifier.save(filepath, save_format='h5')
+ return filepath,hist
elif(problem_type=='regresioni'):
classifier=tf.keras.Sequential()
- classifier.add(tf.keras.layers.Dense(units=params['hiddenLayerNeurons'], activation=params['hiddenLayerActivationFunctions'][0],input_dim=x_train.shape[1]))#prvi skriveni + definisanje prethodnog-ulaznog
- for i in range(params['hiddenLayers']-1):#ako postoji vise od jednog skrivenog sloja
+ classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][0],input_dim=x_train.shape[1]))#prvi skriveni + definisanje prethodnog-ulaznog
+ for i in range(paramsModel['hiddenLayers']-1):#ako postoji vise od jednog skrivenog sloja
#print(i)
- classifier.add(tf.keras.layers.Dense(units=params['hiddenLayerNeurons'], activation=params['hiddenLayerActivationFunctions'][i+1]))#i-ti skriveni sloj
+ classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][i+1]))#i-ti skriveni sloj
classifier.add(tf.keras.layers.Dense(units=1))
- classifier.compile(loss =params["lossFunction"] , optimizer = params['optimizer'] , metrics =params['metrics'])
+ classifier.compile(loss =paramsModel["lossFunction"] , optimizer = paramsModel['optimizer'] , metrics =['accuracy','mae','mse'])
- history=classifier.fit(x_train, y_train, epochs = params['epochs'],batch_size=params['batchSize'])
+ history=classifier.fit(x_train, y_train, epochs = paramsModel['epochs'],batch_size=paramsModel['batchSize'],callbacks=callback(x_test, y_test,paramsModel['_id']))
+ hist=history.history
y_pred=classifier.predict(x_test)
#print(classifier.evaluate(x_test, y_test))
-
+ classifier.save(filepath, save_format='h5')
+ return filepath,hist
def roc_auc_score_multiclass(actual_class, pred_class, average = "macro"):
#creating a set of all the unique classes using the actual class list
@@ -427,6 +477,11 @@ def train(dataset, params, callback):
micro_averaged_f1=metrics.f1_score(y_test, y_pred, average = 'micro')
roc_auc_dict=roc_auc_score_multiclass(y_test, y_pred)
'''
+def predict(experiment, predictor, model):
+ #model.predict()
+ # ovo je pre bilo manageH5
+ return "TODO"
+
def manageH5(dataset,params,h5model):
problem_type = params["type"]
@@ -503,7 +558,7 @@ def manageH5(dataset,params,h5model):
h5model.summary()
#ann_viz(h5model, title="My neural network")
- h5model.compile(loss=params['lossFunction'], optimizer=params['optimizer'], metrics=params['metrics'])
+ h5model.compile(loss=params['lossFunction'], optimizer=params['optimizer'], metrics=params['accuracy',''])
history=h5model.fit(x2, y2, epochs = params['epochs'],batch_size=params['batchSize'])