aboutsummaryrefslogtreecommitdiff
path: root/backend/microservice/api
diff options
context:
space:
mode:
Diffstat (limited to 'backend/microservice/api')
-rw-r--r--backend/microservice/api/config.py2
-rw-r--r--backend/microservice/api/controller.py94
-rw-r--r--backend/microservice/api/newmlservice.py217
3 files changed, 213 insertions, 100 deletions
diff --git a/backend/microservice/api/config.py b/backend/microservice/api/config.py
index 2b6fbe89..8fb088a7 100644
--- a/backend/microservice/api/config.py
+++ b/backend/microservice/api/config.py
@@ -1,2 +1,2 @@
api_url = "http://localhost:5283/api"
-
+hostIP = "127.0.0.1:5543" \ No newline at end of file
diff --git a/backend/microservice/api/controller.py b/backend/microservice/api/controller.py
index 8e12c41d..f870b2b1 100644
--- a/backend/microservice/api/controller.py
+++ b/backend/microservice/api/controller.py
@@ -1,3 +1,7 @@
+from cmath import log
+from dataclasses import dataclass
+from distutils.command.upload import upload
+from gc import callbacks
import flask
from flask import request, jsonify
import newmlservice
@@ -9,47 +13,101 @@ import config
app = flask.Flask(__name__)
app.config["DEBUG"] = True
-app.config["SERVER_NAME"] = "127.0.0.1:5543"
-
+app.config["SERVER_NAME"] = config.hostIP
+
+#@dataclass
+#class Predictor:
+# _id : str
+ # username: str
+ # inputs : list
+ # output : str
+ # isPublic: bool
+ # accessibleByLink: bool
+ # dateCreated: DateTime
+ # experimentId: str
+ # modelId: str
+ # h5FileId: str
+ # metrics: list
+
+
class train_callback(tf.keras.callbacks.Callback):
- def __init__(self, x_test, y_test):
+ def __init__(self, x_test, y_test,modelId):
self.x_test = x_test
self.y_test = y_test
+ self.modelId=modelId
#
def on_epoch_end(self, epoch, logs=None):
- print(epoch)
+ #print('Evaluation: ', self.model.evaluate(self.x_test,self.y_test),"\n")
+
+ #print(epoch)
+
+ #print(logs)
+
#ml_socket.send(epoch)
#file = request.files.get("file")
url = config.api_url + "/Model/epoch"
- requests.post(url, epoch).text
+ r=requests.post(url, json={"Stat":str(logs),"ModelId":str(self.modelId),"EpochNum":epoch}).text
+
+ #print(r)
#print('Evaluation: ', self.model.evaluate(self.x_test,self.y_test),"\n") #broj parametara zavisi od izabranih metrika loss je default
@app.route('/train', methods = ['POST'])
def train():
- print("******************************TRAIN*************************************************")
+ #print("******************************TRAIN*************************************************")
+
f = request.files.get("file")
data = pd.read_csv(f)
paramsModel = json.loads(request.form["model"])
paramsExperiment = json.loads(request.form["experiment"])
paramsDataset = json.loads(request.form["dataset"])
#dataset, paramsModel, paramsExperiment, callback)
- result = newmlservice.train(data, paramsModel, paramsExperiment,paramsDataset, train_callback)
+ filepath,result = newmlservice.train(data, paramsModel, paramsExperiment,paramsDataset, train_callback)
+ """
+ f = request.json['filepath']
+ dataset = pd.read_csv(f)
+ filepath,result=newmlservice.train(dataset,request.json['model'],train_callback)
print(result)
- return jsonify(result)
+ """
+
+ url = config.api_url + "/file/h5"
+ files = {'file': open(filepath, 'rb')}
+ r=requests.post(url, files=files,data={"uploaderId":paramsExperiment['uploaderId']})
+ fileId=r.text
+ m = []
+ for attribute, value in result.items():
+ m.append({"Name" : attribute, "JsonValue" : value})
+ predictor = {
+ "_id" : "",
+ "uploaderId" : paramsModel["uploaderId"],
+ "inputs" : paramsExperiment["inputColumns"],
+ "output" : paramsExperiment["outputColumn"],
+ "isPublic" : False,
+ "accessibleByLink" : False,
+ "experimentId" : paramsExperiment["_id"],
+ "modelId" : paramsModel["_id"],
+ "h5FileId" : fileId,
+ "metrics" : m
+ }
+ #print(predictor)
+ #print('\n')
+ url = config.api_url + "/Predictor/add"
+ r = requests.post(url, json=predictor).text
+ #print(r)
+ return r
@app.route('/predict', methods = ['POST'])
def predict():
- f = request.json['filepath']
- dataset = pd.read_csv(f)
- m = request.json['modelpath']
- model = tf.keras.models.load_model(m)
- print("********************************model loaded*******************************")
- newmlservice.manageH5(dataset,request.json['model'],model)
- return "done"
+ h5 = request.files.get("h5file")
+ model = tf.keras.models.load_model(h5)
+ paramsExperiment = json.loads(request.form["experiment"])
+ paramsPredictor = json.loads(request.form["predictor"])
+ #print("********************************model loaded*******************************")
+ result = newmlservice.predict(paramsExperiment, paramsPredictor, model)
+ return result
@app.route('/preprocess',methods=['POST'])
def returnColumnsInfo():
- print("********************************PREPROCESS*******************************")
+ #print("********************************PREPROCESS*******************************")
dataset = json.loads(request.form["dataset"])
file = request.files.get("file")
data=pd.read_csv(file)
@@ -69,8 +127,8 @@ def returnColumnsInfo():
dataset["colCount"] = preprocess["colCount"]
dataset["rowCount"] = preprocess["rowCount"]
dataset["isPreProcess"] = True
- print(dataset)
+ #print(dataset)
return jsonify(dataset)
-print("App loaded.")
+#print("App loaded.")
app.run() \ No newline at end of file
diff --git a/backend/microservice/api/newmlservice.py b/backend/microservice/api/newmlservice.py
index d19a4e44..6e65c876 100644
--- a/backend/microservice/api/newmlservice.py
+++ b/backend/microservice/api/newmlservice.py
@@ -1,5 +1,6 @@
from enum import unique
from itertools import count
+import os
import pandas as pd
from sklearn import datasets, multiclass
import tensorflow as tf
@@ -21,6 +22,7 @@ from sklearn.model_selection import train_test_split
from dataclasses import dataclass
import statistics as s
from sklearn.metrics import roc_auc_score
+import matplotlib.pyplot as plt
#from ann_visualizer.visualize import ann_viz;
def returnColumnsInfo(dataset):
dict=[]
@@ -112,35 +114,48 @@ class TrainingResult:
metrics: dict
'''
-def train(dataset, params, callback):
- problem_type = params["type"]
+def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback):
+ problem_type = paramsModel["type"]
#print(problem_type)
data = pd.DataFrame()
#print(data)
- for col in params["inputColumns"]:
+ for col in paramsExperiment["inputColumns"]:
#print(col)
data[col]=dataset[col]
- output_column = params["columnToPredict"]
+ output_column = paramsExperiment["outputColumn"]
data[output_column] = dataset[output_column]
#print(data)
###NULL
- null_value_options = params["nullValues"]
- null_values_replacers = params["nullValuesReplacers"]
-
+ null_value_options = paramsExperiment["nullValues"]
+ null_values_replacers = paramsExperiment["nullValuesReplacers"]
+ kategorijskekolone=data.select_dtypes(include=['object']).columns.copy()
+ #print(kategorijskekolone)
if(null_value_options=='replace'):
#print("replace null") #
- dict=params['null_values_replacers']
+ dict=null_values_replacers
while(len(dict)>0):
replace=dict.pop()
col=replace['column']
opt=replace['option']
if(opt=='replace'):
- replacevalue=replace['value']
- data[col]=data[col].fillna(replacevalue)
+ val = replace['value']
+ if(data[col].dtype == 'int64'):
+ val = np.int64(val)
+ elif(data[col].dtype == 'float64'):
+ val = np.float64(val)
+ data[col]=data[col].fillna(val)
elif(null_value_options=='delete_rows'):
data=data.dropna()
elif(null_value_options=='delete_columns'):
+ if(data[output_column].isnull().sum()>0):
+ if(output_column in kategorijskekolone):
+ replace=data[output_column].value_counts().index[0]
+ #print(replace)
+ else:
+ replace=data[output_column].mean()
+ data[output_column]=data[output_column].fillna(replace)
+ #print(data[output_column].isnull().sum())
data=data.dropna(axis=1)
#print(data.shape)
@@ -153,49 +168,74 @@ def train(dataset, params, callback):
data.pop(col)
#
### Enkodiranje
- encoding=params["encoding"]
- if(encoding=='label'):
- encoder=LabelEncoder()
- for col in data.columns:
- if(data[col].dtype==np.object_):
- data[col]=encoder.fit_transform(data[col])
+ '''
+ encodings=paramsExperiment["encodings"]
+
+ from sklearn.preprocessing import LabelEncoder
+ kategorijskekolone=data.select_dtypes(include=['object']).columns
+ encoder=LabelEncoder()
+ for kolona in data.columns:
+ if(kolona in kategorijskekolone):
+ data[kolona]=encoder.fit_transform(data[kolona])
+ '''
- elif(encoding=='onehot'):
- category_columns=[]
- for col in data.columns:
- if(data[col].dtype==np.object_):
- category_columns.append(col)
- data=pd.get_dummies(data, columns=category_columns, prefix=category_columns)
+ encodings=paramsExperiment["encodings"]
+ datafront=dataset.copy()
+ svekolone=datafront.columns
- elif(encoding=='ordinal'):
- encoder = OrdinalEncoder()
- for col in data.columns:
- if(data[col].dtype==np.object_):
- data[col]=encoder.fit_transform(data[col])
-
- elif(encoding=='hashing'):
- category_columns=[]
- for col in data.columns:
- if(data[col].dtype==np.object_):
- category_columns.append(col)
- encoder=ce.HashingEncoder(cols=category_columns, n_components=len(category_columns))
- encoder.fit_transform(data)
- elif(encoding=='binary'):
- category_columns=[]
- for col in data.columns:
- if(data[col].dtype==np.object_):
- category_columns.append(col)
- encoder=ce.BinaryEncoder(cols=category_columns, return_df=True)
- encoder.fit_transform(data)
-
- elif(encoding=='baseN'):
- category_columns=[]
- for col in data.columns:
- if(data[col].dtype==np.object_):
- category_columns.append(col)
- encoder=ce.BaseNEncoder(cols=category_columns, return_df=True, base=5)
- encoder.fit_transform(data)
+
+ for kolonaEncoding in encodings:
+
+ kolona = kolonaEncoding["columnName"]
+ if kolona in data.columns:
+ encoding = kolonaEncoding["encoding"]
+
+ if(kolona in kategorijskekolone):
+ if(encoding=='label'):
+ encoder=LabelEncoder()
+ for col in data.columns:
+ if(data[col].dtype==np.object_):
+ data[col]=encoder.fit_transform(data[col])
+
+
+ elif(encoding=='onehot'):
+ category_columns=[]
+ for col in data.columns:
+ if(data[col].dtype==np.object_):
+ category_columns.append(col)
+ data=pd.get_dummies(data, columns=category_columns, prefix=category_columns)
+
+ elif(encoding=='ordinal'):
+ encoder = OrdinalEncoder()
+ for col in data.columns:
+ if(data[col].dtype==np.object_):
+ data[col]=encoder.fit_transform(data[col])
+
+ elif(encoding=='hashing'):
+ category_columns=[]
+ for col in data.columns:
+ if(data[col].dtype==np.object_):
+ category_columns.append(col)
+ encoder=ce.HashingEncoder(cols=category_columns, n_components=len(category_columns))
+ encoder.fit_transform(data)
+ elif(encoding=='binary'):
+ category_columns=[]
+ for col in data.columns:
+ if(data[col].dtype==np.object_):
+ category_columns.append(col)
+ encoder=ce.BinaryEncoder(cols=category_columns, return_df=True)
+ encoder.fit_transform(data)
+
+ elif(encoding=='baseN'):
+ category_columns=[]
+ for col in data.columns:
+ if(data[col].dtype==np.object_):
+ category_columns.append(col)
+ encoder=ce.BaseNEncoder(cols=category_columns, return_df=True, base=5)
+ encoder.fit_transform(data)
+
+
#
# Input - output
#
@@ -210,8 +250,8 @@ def train(dataset, params, callback):
#
# Podela na test i trening skupove
#
- test=params["randomTestSetDistribution"]
- randomOrder = params["randomOrder"]
+ test=paramsExperiment["randomTestSetDistribution"]
+ randomOrder = paramsExperiment["randomOrder"]
if(randomOrder):
random=123
else:
@@ -224,7 +264,7 @@ def train(dataset, params, callback):
#
#
###OPTIMIZATORI
-
+ """
if(params['optimizer']=='adam'):
opt=tf.keras.optimizers.Adam(learning_rate=params['learningRate'])
@@ -250,7 +290,7 @@ def train(dataset, params, callback):
opt=tf.keras.optimizers.RMSprop(learning_rate=params['learningRate'])
###REGULARIZACIJA
- #regularisation={'kernelType':'l1 ili l2 ili l1_l2','krenelRate':default=0.01 ili jedna od vrednosti(0.0001,0.001,0.1,1,2,3) ili neka koju je korisnik zadao,'biasType':'','biasRate':'','activityType','activityRate'}
+ #regularisation={'kernelType':'l1 ili l2 ili l1_l2','kernelRate':default=0.01 ili jedna od vrednosti(0.0001,0.001,0.1,1,2,3) ili neka koju je korisnik zadao,'biasType':'','biasRate':'','activityType','activityRate'}
reg=params['regularisation']
###Kernel
@@ -276,49 +316,56 @@ def train(dataset, params, callback):
activityreg=tf.keras.regularizers.l2(reg['activityRate'])
elif(reg['kernelType']=='l1l2'):
activityreg=tf.keras.regularizers.l1_l2(l1=reg['activityRate'][0],l2=reg['activityRate'][1])
-
-
+ """
+ filepath=os.path.join("temp/",paramsExperiment['_id']+"_"+paramsModel['_id']+".h5")
if(problem_type=='multi-klasifikacioni'):
#print('multi')
classifier=tf.keras.Sequential()
- classifier.add(tf.keras.layers.Dense(units=params['hiddenLayerNeurons'], activation=params['hiddenLayerActivationFunctions'][0],input_dim=x_train.shape[1]))#prvi skriveni + definisanje prethodnog-ulaznog
- for i in range(params['hiddenLayers']-1):#ako postoji vise od jednog skrivenog sloja
+ classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][0],input_dim=x_train.shape[1]))#prvi skriveni + definisanje prethodnog-ulaznog
+ for i in range(paramsModel['hiddenLayers']-1):#ako postoji vise od jednog skrivenog sloja
#print(i)
- classifier.add(tf.keras.layers.Dense(units=params['hiddenLayerNeurons'], activation=params['hiddenLayerActivationFunctions'][i+1]))#i-ti skriveni sloj
- classifier.add(tf.keras.layers.Dense(units=5, activation=params['outputLayerActivationFunction']))#izlazni sloj
+ classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][i+1]))#i-ti skriveni sloj
+ classifier.add(tf.keras.layers.Dense(units=5, activation=paramsModel['outputLayerActivationFunction']))#izlazni sloj
- classifier.compile(loss =params["lossFunction"] , optimizer = params['optimizer'] , metrics =params['metrics'])
-
- history=classifier.fit(x_train, y_train, epochs = params['epochs'],batch_size=params['batchSize'])
+ classifier.compile(loss =paramsModel["lossFunction"] , optimizer = paramsModel['optimizer'] , metrics =['accuracy','mae','mse'])
+ history=classifier.fit(x_train, y_train, epochs = paramsModel['epochs'],batch_size=paramsModel['batchSize'],callbacks=callback(x_test, y_test,paramsModel['_id']))
+
+ hist=history.history
+ #plt.plot(hist['accuracy'])
+ #plt.show()
y_pred=classifier.predict(x_test)
y_pred=np.argmax(y_pred,axis=1)
- #print(y_pred.flatten())
- #print(y_test)
+
scores = classifier.evaluate(x_test, y_test)
#print("\n%s: %.2f%%" % (classifier.metrics_names[1], scores[1]*100))
- classifier.save("temp/"+params['name'], save_format='h5')
+
+
+ classifier.save(filepath, save_format='h5')
+
#vizuelizacija u python-u
#from ann_visualizer.visualize import ann_viz;
#ann_viz(classifier, title="My neural network")
+
+ return filepath,hist
elif(problem_type=='binarni-klasifikacioni'):
#print('*************************************************************************binarni')
classifier=tf.keras.Sequential()
- classifier.add(tf.keras.layers.Dense(units=params['hiddenLayerNeurons'], activation=params['hiddenLayerActivationFunctions'][0],input_dim=x_train.shape[1]))#prvi skriveni + definisanje prethodnog-ulaznog
- for i in range(params['hiddenLayers']-1):#ako postoji vise od jednog skrivenog sloja
+ classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][0],input_dim=x_train.shape[1]))#prvi skriveni + definisanje prethodnog-ulaznog
+ for i in range(paramsModel['hiddenLayers']-1):#ako postoji vise od jednog skrivenog sloja
#print(i)
- classifier.add(tf.keras.layers.Dense(units=params['hiddenLayerNeurons'], activation=params['hiddenLayerActivationFunctions'][i+1]))#i-ti skriveni sloj
- classifier.add(tf.keras.layers.Dense(units=1, activation=params['outputLayerActivationFunction']))#izlazni sloj
-
- classifier.compile(loss =params["lossFunction"] , optimizer = params['optimizer'] , metrics =params['metrics'])
+ classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][i+1]))#i-ti skriveni sloj
+ classifier.add(tf.keras.layers.Dense(units=1, activation=paramsModel['outputLayerActivationFunction']))#izlazni sloj
- history=classifier.fit(x_train, y_train, epochs = params['epochs'],batch_size=params['batchSize'])
+ classifier.compile(loss =paramsModel["lossFunction"] , optimizer = paramsModel['optimizer'] , metrics =['accuracy'])
+ history=classifier.fit(x_train, y_train, epochs = paramsModel['epochs'],batch_size=paramsModel['batchSize'],callbacks=callback(x_test, y_test,paramsModel['_id']))
+ hist=history.history
y_pred=classifier.predict(x_test)
y_pred=(y_pred>=0.5).astype('int')
@@ -329,23 +376,26 @@ def train(dataset, params, callback):
#print("\n%s: %.2f%%" % (classifier.metrics_names[1], scores[1]*100))
#ann_viz(classifier, title="My neural network")
- classifier.save("temp/"+params['name'], save_format='h5')
+ classifier.save(filepath, save_format='h5')
+ return filepath,hist
elif(problem_type=='regresioni'):
classifier=tf.keras.Sequential()
- classifier.add(tf.keras.layers.Dense(units=params['hiddenLayerNeurons'], activation=params['hiddenLayerActivationFunctions'][0],input_dim=x_train.shape[1]))#prvi skriveni + definisanje prethodnog-ulaznog
- for i in range(params['hiddenLayers']-1):#ako postoji vise od jednog skrivenog sloja
+ classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][0],input_dim=x_train.shape[1]))#prvi skriveni + definisanje prethodnog-ulaznog
+ for i in range(paramsModel['hiddenLayers']-1):#ako postoji vise od jednog skrivenog sloja
#print(i)
- classifier.add(tf.keras.layers.Dense(units=params['hiddenLayerNeurons'], activation=params['hiddenLayerActivationFunctions'][i+1]))#i-ti skriveni sloj
+ classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][i+1]))#i-ti skriveni sloj
classifier.add(tf.keras.layers.Dense(units=1))
- classifier.compile(loss =params["lossFunction"] , optimizer = params['optimizer'] , metrics =params['metrics'])
+ classifier.compile(loss =paramsModel["lossFunction"] , optimizer = paramsModel['optimizer'] , metrics =['accuracy','mae','mse'])
- history=classifier.fit(x_train, y_train, epochs = params['epochs'],batch_size=params['batchSize'])
+ history=classifier.fit(x_train, y_train, epochs = paramsModel['epochs'],batch_size=paramsModel['batchSize'],callbacks=callback(x_test, y_test,paramsModel['_id']))
+ hist=history.history
y_pred=classifier.predict(x_test)
#print(classifier.evaluate(x_test, y_test))
-
+ classifier.save(filepath, save_format='h5')
+ return filepath,hist
def roc_auc_score_multiclass(actual_class, pred_class, average = "macro"):
#creating a set of all the unique classes using the actual class list
@@ -427,6 +477,11 @@ def train(dataset, params, callback):
micro_averaged_f1=metrics.f1_score(y_test, y_pred, average = 'micro')
roc_auc_dict=roc_auc_score_multiclass(y_test, y_pred)
'''
+def predict(experiment, predictor, model):
+ #model.predict()
+ # ovo je pre bilo manageH5
+ return "TODO"
+
def manageH5(dataset,params,h5model):
problem_type = params["type"]
@@ -503,7 +558,7 @@ def manageH5(dataset,params,h5model):
h5model.summary()
#ann_viz(h5model, title="My neural network")
- h5model.compile(loss=params['lossFunction'], optimizer=params['optimizer'], metrics=params['metrics'])
+ h5model.compile(loss=params['lossFunction'], optimizer=params['optimizer'], metrics=params['accuracy',''])
history=h5model.fit(x2, y2, epochs = params['epochs'],batch_size=params['batchSize'])