aboutsummaryrefslogtreecommitdiff
path: root/backend/microservice
diff options
context:
space:
mode:
Diffstat (limited to 'backend/microservice')
-rw-r--r--backend/microservice/api/controller.py57
-rw-r--r--backend/microservice/api/newmlservice.py231
2 files changed, 178 insertions, 110 deletions
diff --git a/backend/microservice/api/controller.py b/backend/microservice/api/controller.py
index 988ad987..7852b63d 100644
--- a/backend/microservice/api/controller.py
+++ b/backend/microservice/api/controller.py
@@ -54,14 +54,22 @@ class train_callback(tf.keras.callbacks.Callback):
@app.route('/train', methods = ['POST'])
def train():
print("******************************TRAIN*************************************************")
-
- f = request.files.get("file")
- data = pd.read_csv(f)
paramsModel = json.loads(request.form["model"])
paramsExperiment = json.loads(request.form["experiment"])
paramsDataset = json.loads(request.form["dataset"])
+ f = request.files.get("file")
+ if(paramsDataset['delimiter']=='novi red'):
+ separation='\n'
+
+ elif(paramsDataset['delimiter']=='razmak'):
+ separation=' '
+ else:
+ separation=paramsDataset['delimiter']
+ data = pd.read_csv(f,sep=separation)
+
+
#dataset, paramsModel, paramsExperiment, callback)
- filepath,result = newmlservice.train(data, paramsModel, paramsExperiment,paramsDataset, train_callback)
+ filepath,result,finalMetrics= newmlservice.train(data, paramsModel, paramsExperiment,paramsDataset, train_callback)
"""
f = request.json['filepath']
dataset = pd.read_csv(f)
@@ -69,6 +77,7 @@ def train():
print(result)
"""
+
url = config.api_url + "/file/h5"
files = {'file': open(filepath, 'rb')}
r=requests.post(url, files=files,data={"uploaderId":paramsExperiment['uploaderId']})
@@ -86,9 +95,11 @@ def train():
"experimentId" : paramsExperiment["_id"],
"modelId" : paramsModel["_id"],
"h5FileId" : fileId,
- "metrics" : m
+ "metrics" : m,
+ "finalMetrics":finalMetrics
+
}
- print(predictor)
+ #print(predictor)
url = config.api_url + "/Predictor/add"
r = requests.post(url, json=predictor).text
print(r)
@@ -101,16 +112,46 @@ def predict():
paramsExperiment = json.loads(request.form["experiment"])
paramsPredictor = json.loads(request.form["predictor"])
print("********************************model loaded*******************************")
- result = newmlservice.predict(paramsExperiment, paramsPredictor, model)
+ result = newmlservice.predict(paramsExperiment, paramsPredictor, model,train_callback)
+ return result
+
+@app.route('/manageH5', methods = ['POST'])
+def manageH5():
+ h5 = request.files.get("h5file")
+ model = tf.keras.models.load_model(h5)
+
+ paramsExperiment = json.loads(request.form["experiment"])
+ paramsModel = json.loads(request.form["model"])
+ paramsDataset = json.loads(request.form["dataset"])
+
+ f = request.files.get("file")
+ if(paramsDataset['delimiter']=='novi red'):
+ separation='\n'
+ elif(paramsDataset['delimiter']=='razmak'):
+ separation=' '
+ else:
+ separation=paramsDataset['delimiter']
+
+ data = pd.read_csv(f,sep=separation)
+
+ result = newmlservice.manageH5(data,paramsModel,paramsExperiment,paramsDataset,model,train_callback)
return result
+
@app.route('/preprocess',methods=['POST'])
def returnColumnsInfo():
print("********************************PREPROCESS*******************************")
dataset = json.loads(request.form["dataset"])
file = request.files.get("file")
- data=pd.read_csv(file)
+ if(dataset['delimiter']=='novi red'):
+ separation='\n'
+
+ elif(dataset['delimiter']=='razmak'):
+ separation=' '
+ else:
+ separation=dataset['delimiter']
+ data=pd.read_csv(file,sep=separation)
'''
#f = request.json['filepath']
#data=pd.read_csv(f)
diff --git a/backend/microservice/api/newmlservice.py b/backend/microservice/api/newmlservice.py
index 2f08d4b4..6a863013 100644
--- a/backend/microservice/api/newmlservice.py
+++ b/backend/microservice/api/newmlservice.py
@@ -291,17 +291,18 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback):
random=123
else:
random=0
- x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=test, random_state=random)
+ #x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=test, random_state=random)
#print(x_train,x_test)
-
+ x, x_test, y, y_test = train_test_split(x, y, test_size=test, random_state=random, shuffle=True)
+ x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.15, shuffle=True)
#
# Treniranje modela
#
#
###OPTIMIZATORI
-
+ print(paramsModel['optimizer'])
if(paramsModel['optimizer']=='Adam'):
- opt=tf.keras.optimizers.Adam(learning_rate=float(paramsModel['learningRate']))
+ opt=tf.keras.optimizers.Adam(learning_rate=3)
elif(paramsModel['optimizer']=='Adadelta'):
opt=tf.keras.optimizers.Adadelta(learning_rate=float(paramsModel['learningRate']))
@@ -315,13 +316,16 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback):
elif(paramsModel['optimizer']=='Nadam'):
opt=tf.keras.optimizers.Nadam(learning_rate=float(paramsModel['learningRate']))
- elif(paramsModel['optimizer']=='Sgd'):
+ elif(paramsModel['optimizer']=='SGD'):
+ opt=tf.keras.optimizers.SGD(learning_rate=float(paramsModel['learningRate']))
+
+ if(paramsModel['optimizer']=='SGDMomentum'):
opt=tf.keras.optimizers.SGD(learning_rate=float(paramsModel['learningRate']))
elif(paramsModel['optimizer']=='Ftrl'):
opt=tf.keras.optimizers.Ftrl(learning_rate=float(paramsModel['learningRate']))
- elif(paramsModel['optimizer']=='Rmsprop'):
+ elif(paramsModel['optimizer']=='RMSprop'):
opt=tf.keras.optimizers.RMSprop(learning_rate=float(paramsModel['learningRate']))
###REGULARIZACIJA
@@ -331,7 +335,7 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback):
filepath=os.path.join("temp/",paramsExperiment['_id']+"_"+paramsModel['_id']+".h5")
if(problem_type=='multi-klasifikacioni'):
#print('multi')
-
+ #print(paramsModel)
reg=paramsModel['layers'][0]['regularisation']
regRate=float(paramsModel['layers'][0]['regularisationRate'])
if(reg=='l1'):
@@ -365,10 +369,10 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback):
- classifier.compile(loss =paramsModel["lossFunction"] , optimizer = opt, metrics = ['accuracy','mae','mse'])
+ classifier.compile(loss =paramsModel["lossFunction"] , optimizer =opt, metrics = ['accuracy','mae','mse'])
+
+ history=classifier.fit( x=x_train, y=y_train, epochs = paramsModel['epochs'],batch_size=int(paramsModel['batchSize']),callbacks=callback(x_test, y_test,paramsModel['_id']),validation_data=(x_val, y_val))
- history=classifier.fit(x_train, y_train, epochs = paramsModel['epochs'],batch_size=int(paramsModel['batchSize']),callbacks=callback(x_test, y_test,paramsModel['_id']))
-
hist=history.history
#plt.plot(hist['accuracy'])
#plt.show()
@@ -380,12 +384,28 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback):
classifier.save(filepath, save_format='h5')
-
+ metrics={}
+ macro_averaged_precision=sm.precision_score(y_test, y_pred, average = 'macro')
+ micro_averaged_precision=sm.precision_score(y_test, y_pred, average = 'micro')
+ macro_averaged_recall=sm.recall_score(y_test, y_pred, average = 'macro')
+ micro_averaged_recall=sm.recall_score(y_test, y_pred, average = 'micro')
+ macro_averaged_f1=sm.f1_score(y_test, y_pred, average = 'macro')
+ micro_averaged_f1=sm.f1_score(y_test, y_pred, average = 'micro')
+
+ metrics= {
+ "macro_averaged_precision" :float(macro_averaged_precision),
+ "micro_averaged_precision" : float(micro_averaged_precision),
+ "macro_averaged_recall" : float(macro_averaged_recall),
+ "micro_averaged_recall" : float(micro_averaged_recall),
+ "macro_averaged_f1" : float(macro_averaged_f1),
+ "micro_averaged_f1" : float(micro_averaged_f1)
+ }
+
#vizuelizacija u python-u
#from ann_visualizer.visualize import ann_viz;
#ann_viz(classifier, title="My neural network")
- return filepath,hist
+ return filepath,hist,metrics
elif(problem_type=='binarni-klasifikacioni'):
#print('*************************************************************************binarni')
@@ -415,26 +435,47 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback):
kernelreg=tf.keras.regularizers.l2(regRate)
biasreg=tf.keras.regularizers.l2(regRate)
activityreg=tf.keras.regularizers.l2(regRate)
- classifier.add(tf.keras.layers.Dense(units=paramsModel['layers'][i+1]['neurons'], activation=paramsModel['layers'][i+1]['activationFunction'],kernel_regularizer=kernelreg, bias_regularizer=biasreg, activity_regularizer=activityreg))#i-ti skriveni sloj
+ classifier.add(tf.keras.layers.Dense(units=paramsModel['layers'][i+1]['neurons'], activation=paramsModel['layers'][i+1]['activationFunction']))#i-ti skriveni sloj
classifier.add(tf.keras.layers.Dense(units=1, activation=paramsModel['outputLayerActivationFunction']))#izlazni sloj
- classifier.compile(loss =paramsModel["lossFunction"] , optimizer = opt , metrics = ['accuracy','mae','mse'])
+ classifier.compile(loss =paramsModel["lossFunction"] , optimizer =opt , metrics = ['accuracy','mae','mse'])
- history=classifier.fit(x_train, y_train, epochs = paramsModel['epochs'],batch_size=int(paramsModel['batchSize']),callbacks=callback(x_test, y_test,paramsModel['_id']))
+ history=classifier.fit( x=x_train, y=y_train, epochs = paramsModel['epochs'],batch_size=int(paramsModel['batchSize']),callbacks=callback(x_test, y_test,paramsModel['_id']),validation_data=(x_val, y_val))
hist=history.history
y_pred=classifier.predict(x_test)
y_pred=(y_pred>=0.5).astype('int')
-
- #print(y_pred.flatten())
- #print(y_test)
scores = classifier.evaluate(x_test, y_test)
#print("\n%s: %.2f%%" % (classifier.metrics_names[1], scores[1]*100))
- #ann_viz(classifier, title="My neural network")
+ # ann_viz(classifier, title="My neural network")
classifier.save(filepath, save_format='h5')
- return filepath,hist
+
+ accuracy = float(sm.accuracy_score(y_test,y_pred))
+ precision = float(sm.precision_score(y_test,y_pred))
+ recall = float(sm.recall_score(y_test,y_pred))
+ tn, fp, fn, tp = sm.confusion_matrix(y_test,y_pred).ravel()
+ specificity = float(tn / (tn+fp))
+ f1 = float(sm.f1_score(y_test,y_pred))
+ fpr, tpr, _ = sm.roc_curve(y_test,y_pred)
+ logloss = float(sm.log_loss(y_test, y_pred))
+ metrics= {
+ "accuracy" : accuracy,
+ "precision" : precision,
+ "recall" : recall,
+ "specificity" : specificity,
+ "f1" : f1,
+ "tn" : float(tn),
+ "fp" : float(fp),
+ "fn" : float(fn),
+ "tp" : float(tp),
+ "fpr" : fpr.tolist(),
+ "tpr" : tpr.tolist(),
+ "logloss" : logloss
+ }
+
+ return filepath,hist,metrics
elif(problem_type=='regresioni'):
reg=paramsModel['layers'][0]['regularisation']
@@ -470,12 +511,36 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback):
classifier.compile(loss =paramsModel["lossFunction"] , optimizer = opt , metrics = ['accuracy','mae','mse'])
- history=classifier.fit(x_train, y_train, epochs = paramsModel['epochs'],batch_size=int(paramsModel['batchSize']),callbacks=callback(x_test, y_test,paramsModel['_id']))
+ history=classifier.fit( x=x_train, y=y_train, epochs = paramsModel['epochs'],batch_size=int(paramsModel['batchSize']),callbacks=callback(x_test, y_test,paramsModel['_id']),validation_data=(x_val, y_val))
hist=history.history
y_pred=classifier.predict(x_test)
#print(classifier.evaluate(x_test, y_test))
+
classifier.save(filepath, save_format='h5')
- return filepath,hist
+
+
+ mse = float(sm.mean_squared_error(y_test,y_pred))
+ mae = float(sm.mean_absolute_error(y_test,y_pred))
+ mape = float(sm.mean_absolute_percentage_error(y_test,y_pred))
+ rmse = float(np.sqrt(sm.mean_squared_error(y_test,y_pred)))
+ rmsle = float(np.sqrt(sm.mean_squared_error(y_test, y_pred)))
+ r2 = float(sm.r2_score(y_test, y_pred))
+ # n - num of observations
+ # k - num of independent variables
+ n = 40
+ k = 2
+ adj_r2 = float(1 - ((1-r2)*(n-1)/(n-k-1)))
+ metrics= {"mse" : mse,
+ "mae" : mae,
+ "mape" : mape,
+ "rmse" : rmse,
+ "rmsle" : rmsle,
+ "r2" : r2,
+ "adj_r2" : adj_r2
+ }
+
+ return filepath,hist,metrics
+
def roc_auc_score_multiclass(actual_class, pred_class, average = "macro"):
#creating a set of all the unique classes using the actual class list
@@ -498,30 +563,8 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback):
#
# Metrike
#
-
- if(problem_type=="binarni-klasifikacioni"):
- accuracy = float(sm.accuracy_score(y_test,y_pred))
- precision = float(sm.precision_score(y_test,y_pred))
- recall = float(sm.recall_score(y_test,y_pred))
- tn, fp, fn, tp = sm.confusion_matrix(y_test,y_pred).ravel()
- specificity = float(tn / (tn+fp))
- f1 = float(sm.f1_score(y_test,y_pred))
- fpr, tpr, _ = sm.roc_curve(y_test,y_pred)
- logloss = float(sm.log_loss(y_test, y_pred))
- metrics= {"accuracy" : accuracy,
- "precision" : precision,
- "recall" : recall,
- "specificity" : specificity,
- "f1" : f1,
- "tn" : float(tn),
- "fp" : float(fp),
- "fn" : float(fn),
- "tp" : float(tp),
- "fpr" : fpr.tolist(),
- "tpr" : tpr.tolist(),
- "logloss" : logloss
- }
- elif(problem_type=="regresioni"):
+ '''
+ if(problem_type=="regresioni"):
# https://www.analyticsvidhya.com/blog/2021/05/know-the-best-evaluation-metrics-for-your-regression-model/
mse = float(sm.mean_squared_error(y_test,y_pred))
mae = float(sm.mean_absolute_error(y_test,y_pred))
@@ -542,7 +585,7 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback):
"r2" : r2,
"adj_r2" : adj_r2
}
- '''
+
elif(problem_type=="multi-klasifikacioni"):
cr=sm.classification_report(y_test, y_pred)
@@ -556,32 +599,36 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback):
macro_averaged_f1=metrics.f1_score(y_test, y_pred, average = 'macro')
micro_averaged_f1=metrics.f1_score(y_test, y_pred, average = 'micro')
roc_auc_dict=roc_auc_score_multiclass(y_test, y_pred)
- '''
+'''
def predict(experiment, predictor, model):
#model.predict()
# ovo je pre bilo manageH5
return "TODO"
-def manageH5(dataset,params,h5model):
- problem_type = params["type"]
- #print(problem_type)
+def manageH5(dataset,paramsModel,paramsExperiment,paramsDataset,h5model,callback):
+ problem_type = paramsModel["type"]
data = pd.DataFrame()
- #print(data)
- for col in params["inputColumns"]:
- #print(col)
- data[col]=dataset[col]
- output_column = params["columnToPredict"]
+ for col in paramsExperiment["inputColumns"]:
+ if(col!=paramsExperiment["outputColumn"]):
+ data[col]=dataset[col]
+ output_column = paramsExperiment["outputColumn"]
data[output_column] = dataset[output_column]
- #print(data)
- ###NULL
- null_value_options = params["nullValues"]
- null_values_replacers = params["nullValuesReplacers"]
+ kategorijskekolone=[]
+ columnInfo=paramsDataset['columnInfo']
+ columnTypes=paramsExperiment['columnTypes']
+ for i in range(len(columnInfo)):
+ col=columnInfo[i]
+ if(columnTypes[i]=="categorical" and col['columnName'] in paramsExperiment['inputColumns']):
+ data[col['columnName']]=data[col['columnName']].apply(str)
+ kategorijskekolone.append(col['columnName'])
+
+ null_value_options = paramsExperiment["nullValues"]
+ null_values_replacers = paramsExperiment["nullValuesReplacers"]
if(null_value_options=='replace'):
- #print("replace null") # TODO
- dict=params['null_values_replacers']
+ dict=null_values_replacers
while(len(dict)>0):
replace=dict.pop()
col=replace['column']
@@ -592,58 +639,38 @@ def manageH5(dataset,params,h5model):
elif(null_value_options=='delete_rows'):
data=data.dropna()
elif(null_value_options=='delete_columns'):
- data=data.dropna()
-
- #print(data.shape)
-
- #
- # Brisanje kolona koje ne uticu na rezultat
- #
- num_rows=data.shape[0]
- for col in data.columns:
- if((data[col].nunique()==(num_rows)) and (data[col].dtype==np.object_)):
- data.pop(col)
- #
- ### Enkodiranje
- encoding=params["encoding"]
- if(encoding=='label'):
- encoder=LabelEncoder()
- for col in data.columns:
- if(data[col].dtype==np.object_):
- data[col]=encoder.fit_transform(data[col])
-
-
- elif(encoding=='onehot'):
- category_columns=[]
- for col in data.columns:
- if(data[col].dtype==np.object_):
- category_columns.append(col)
- data=pd.get_dummies(data, columns=category_columns, prefix=category_columns)
- #print(data)
+ data=data.dropna(axis=1)
+
+ encodings=paramsExperiment["encodings"]
+ for kolonaEncoding in encodings:
+ kolona = kolonaEncoding["columnName"]
+ encoding = kolonaEncoding["encoding"]
+ if(kolona in kategorijskekolone):
+ if(encoding=='label'):
+ encoder=LabelEncoder()
+ for col in data.columns:
+ if(data[col].dtype==np.object_):
+ data[col]=encoder.fit_transform(data[col])
+ elif(encoding=='onehot'):
+ category_columns=[]
+ for col in data.columns:
+ if(data[col].dtype==np.object_):
+ category_columns.append(col)
+ data=pd.get_dummies(data, columns=category_columns, prefix=category_columns)
- #
- # Input - output
- #
x_columns = []
for col in data.columns:
if(col!=output_column):
x_columns.append(col)
- #print(x_columns)
x2 = data[x_columns]
- #print(x2)
- #print(x2.values)
x2 = data[x_columns].values
- #print(x2)
y2 = data[output_column].values
- h5model.summary()
+ #h5model.summary()
#ann_viz(h5model, title="My neural network")
- h5model.compile(loss=params['lossFunction'], optimizer=params['optimizer'], metrics = ['accuracy','mae','mse'])
- history=h5model.fit(x2, y2, epochs = params['epochs'],batch_size=int(params['batchSize']))
-
+ history=h5model.fit(x2, y2, epochs = paramsModel['epochs'],batch_size=int(paramsModel['batchSize']),callbacks=callback(x2, y2,paramsModel['_id']))
y_pred2=h5model.predict(x2)
-
y_pred2=np.argmax(y_pred2,axis=1)
#y_pred=h5model.predict_classes(x)
score = h5model.evaluate(x2,y_pred2, verbose=0)