diff options
Diffstat (limited to 'backend/microservice/api')
-rw-r--r-- | backend/microservice/api/config.py | 2 | ||||
-rw-r--r-- | backend/microservice/api/controller.py | 94 | ||||
-rw-r--r-- | backend/microservice/api/newmlservice.py | 217 |
3 files changed, 213 insertions, 100 deletions
diff --git a/backend/microservice/api/config.py b/backend/microservice/api/config.py index 2b6fbe89..8fb088a7 100644 --- a/backend/microservice/api/config.py +++ b/backend/microservice/api/config.py @@ -1,2 +1,2 @@ api_url = "http://localhost:5283/api" - +hostIP = "127.0.0.1:5543"
\ No newline at end of file diff --git a/backend/microservice/api/controller.py b/backend/microservice/api/controller.py index 8e12c41d..f870b2b1 100644 --- a/backend/microservice/api/controller.py +++ b/backend/microservice/api/controller.py @@ -1,3 +1,7 @@ +from cmath import log +from dataclasses import dataclass +from distutils.command.upload import upload +from gc import callbacks import flask from flask import request, jsonify import newmlservice @@ -9,47 +13,101 @@ import config app = flask.Flask(__name__) app.config["DEBUG"] = True -app.config["SERVER_NAME"] = "127.0.0.1:5543" - +app.config["SERVER_NAME"] = config.hostIP + +#@dataclass +#class Predictor: +# _id : str + # username: str + # inputs : list + # output : str + # isPublic: bool + # accessibleByLink: bool + # dateCreated: DateTime + # experimentId: str + # modelId: str + # h5FileId: str + # metrics: list + + class train_callback(tf.keras.callbacks.Callback): - def __init__(self, x_test, y_test): + def __init__(self, x_test, y_test,modelId): self.x_test = x_test self.y_test = y_test + self.modelId=modelId # def on_epoch_end(self, epoch, logs=None): - print(epoch) + #print('Evaluation: ', self.model.evaluate(self.x_test,self.y_test),"\n") + + #print(epoch) + + #print(logs) + #ml_socket.send(epoch) #file = request.files.get("file") url = config.api_url + "/Model/epoch" - requests.post(url, epoch).text + r=requests.post(url, json={"Stat":str(logs),"ModelId":str(self.modelId),"EpochNum":epoch}).text + + #print(r) #print('Evaluation: ', self.model.evaluate(self.x_test,self.y_test),"\n") #broj parametara zavisi od izabranih metrika loss je default @app.route('/train', methods = ['POST']) def train(): - print("******************************TRAIN*************************************************") + #print("******************************TRAIN*************************************************") + f = request.files.get("file") data = pd.read_csv(f) paramsModel = json.loads(request.form["model"]) paramsExperiment = json.loads(request.form["experiment"]) paramsDataset = json.loads(request.form["dataset"]) #dataset, paramsModel, paramsExperiment, callback) - result = newmlservice.train(data, paramsModel, paramsExperiment,paramsDataset, train_callback) + filepath,result = newmlservice.train(data, paramsModel, paramsExperiment,paramsDataset, train_callback) + """ + f = request.json['filepath'] + dataset = pd.read_csv(f) + filepath,result=newmlservice.train(dataset,request.json['model'],train_callback) print(result) - return jsonify(result) + """ + + url = config.api_url + "/file/h5" + files = {'file': open(filepath, 'rb')} + r=requests.post(url, files=files,data={"uploaderId":paramsExperiment['uploaderId']}) + fileId=r.text + m = [] + for attribute, value in result.items(): + m.append({"Name" : attribute, "JsonValue" : value}) + predictor = { + "_id" : "", + "uploaderId" : paramsModel["uploaderId"], + "inputs" : paramsExperiment["inputColumns"], + "output" : paramsExperiment["outputColumn"], + "isPublic" : False, + "accessibleByLink" : False, + "experimentId" : paramsExperiment["_id"], + "modelId" : paramsModel["_id"], + "h5FileId" : fileId, + "metrics" : m + } + #print(predictor) + #print('\n') + url = config.api_url + "/Predictor/add" + r = requests.post(url, json=predictor).text + #print(r) + return r @app.route('/predict', methods = ['POST']) def predict(): - f = request.json['filepath'] - dataset = pd.read_csv(f) - m = request.json['modelpath'] - model = tf.keras.models.load_model(m) - print("********************************model loaded*******************************") - newmlservice.manageH5(dataset,request.json['model'],model) - return "done" + h5 = request.files.get("h5file") + model = tf.keras.models.load_model(h5) + paramsExperiment = json.loads(request.form["experiment"]) + paramsPredictor = json.loads(request.form["predictor"]) + #print("********************************model loaded*******************************") + result = newmlservice.predict(paramsExperiment, paramsPredictor, model) + return result @app.route('/preprocess',methods=['POST']) def returnColumnsInfo(): - print("********************************PREPROCESS*******************************") + #print("********************************PREPROCESS*******************************") dataset = json.loads(request.form["dataset"]) file = request.files.get("file") data=pd.read_csv(file) @@ -69,8 +127,8 @@ def returnColumnsInfo(): dataset["colCount"] = preprocess["colCount"] dataset["rowCount"] = preprocess["rowCount"] dataset["isPreProcess"] = True - print(dataset) + #print(dataset) return jsonify(dataset) -print("App loaded.") +#print("App loaded.") app.run()
\ No newline at end of file diff --git a/backend/microservice/api/newmlservice.py b/backend/microservice/api/newmlservice.py index d19a4e44..6e65c876 100644 --- a/backend/microservice/api/newmlservice.py +++ b/backend/microservice/api/newmlservice.py @@ -1,5 +1,6 @@ from enum import unique from itertools import count +import os import pandas as pd from sklearn import datasets, multiclass import tensorflow as tf @@ -21,6 +22,7 @@ from sklearn.model_selection import train_test_split from dataclasses import dataclass import statistics as s from sklearn.metrics import roc_auc_score +import matplotlib.pyplot as plt #from ann_visualizer.visualize import ann_viz; def returnColumnsInfo(dataset): dict=[] @@ -112,35 +114,48 @@ class TrainingResult: metrics: dict ''' -def train(dataset, params, callback): - problem_type = params["type"] +def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback): + problem_type = paramsModel["type"] #print(problem_type) data = pd.DataFrame() #print(data) - for col in params["inputColumns"]: + for col in paramsExperiment["inputColumns"]: #print(col) data[col]=dataset[col] - output_column = params["columnToPredict"] + output_column = paramsExperiment["outputColumn"] data[output_column] = dataset[output_column] #print(data) ###NULL - null_value_options = params["nullValues"] - null_values_replacers = params["nullValuesReplacers"] - + null_value_options = paramsExperiment["nullValues"] + null_values_replacers = paramsExperiment["nullValuesReplacers"] + kategorijskekolone=data.select_dtypes(include=['object']).columns.copy() + #print(kategorijskekolone) if(null_value_options=='replace'): #print("replace null") # - dict=params['null_values_replacers'] + dict=null_values_replacers while(len(dict)>0): replace=dict.pop() col=replace['column'] opt=replace['option'] if(opt=='replace'): - replacevalue=replace['value'] - data[col]=data[col].fillna(replacevalue) + val = replace['value'] + if(data[col].dtype == 'int64'): + val = np.int64(val) + elif(data[col].dtype == 'float64'): + val = np.float64(val) + data[col]=data[col].fillna(val) elif(null_value_options=='delete_rows'): data=data.dropna() elif(null_value_options=='delete_columns'): + if(data[output_column].isnull().sum()>0): + if(output_column in kategorijskekolone): + replace=data[output_column].value_counts().index[0] + #print(replace) + else: + replace=data[output_column].mean() + data[output_column]=data[output_column].fillna(replace) + #print(data[output_column].isnull().sum()) data=data.dropna(axis=1) #print(data.shape) @@ -153,49 +168,74 @@ def train(dataset, params, callback): data.pop(col) # ### Enkodiranje - encoding=params["encoding"] - if(encoding=='label'): - encoder=LabelEncoder() - for col in data.columns: - if(data[col].dtype==np.object_): - data[col]=encoder.fit_transform(data[col]) + ''' + encodings=paramsExperiment["encodings"] + + from sklearn.preprocessing import LabelEncoder + kategorijskekolone=data.select_dtypes(include=['object']).columns + encoder=LabelEncoder() + for kolona in data.columns: + if(kolona in kategorijskekolone): + data[kolona]=encoder.fit_transform(data[kolona]) + ''' - elif(encoding=='onehot'): - category_columns=[] - for col in data.columns: - if(data[col].dtype==np.object_): - category_columns.append(col) - data=pd.get_dummies(data, columns=category_columns, prefix=category_columns) + encodings=paramsExperiment["encodings"] + datafront=dataset.copy() + svekolone=datafront.columns - elif(encoding=='ordinal'): - encoder = OrdinalEncoder() - for col in data.columns: - if(data[col].dtype==np.object_): - data[col]=encoder.fit_transform(data[col]) - - elif(encoding=='hashing'): - category_columns=[] - for col in data.columns: - if(data[col].dtype==np.object_): - category_columns.append(col) - encoder=ce.HashingEncoder(cols=category_columns, n_components=len(category_columns)) - encoder.fit_transform(data) - elif(encoding=='binary'): - category_columns=[] - for col in data.columns: - if(data[col].dtype==np.object_): - category_columns.append(col) - encoder=ce.BinaryEncoder(cols=category_columns, return_df=True) - encoder.fit_transform(data) - - elif(encoding=='baseN'): - category_columns=[] - for col in data.columns: - if(data[col].dtype==np.object_): - category_columns.append(col) - encoder=ce.BaseNEncoder(cols=category_columns, return_df=True, base=5) - encoder.fit_transform(data) + + for kolonaEncoding in encodings: + + kolona = kolonaEncoding["columnName"] + if kolona in data.columns: + encoding = kolonaEncoding["encoding"] + + if(kolona in kategorijskekolone): + if(encoding=='label'): + encoder=LabelEncoder() + for col in data.columns: + if(data[col].dtype==np.object_): + data[col]=encoder.fit_transform(data[col]) + + + elif(encoding=='onehot'): + category_columns=[] + for col in data.columns: + if(data[col].dtype==np.object_): + category_columns.append(col) + data=pd.get_dummies(data, columns=category_columns, prefix=category_columns) + + elif(encoding=='ordinal'): + encoder = OrdinalEncoder() + for col in data.columns: + if(data[col].dtype==np.object_): + data[col]=encoder.fit_transform(data[col]) + + elif(encoding=='hashing'): + category_columns=[] + for col in data.columns: + if(data[col].dtype==np.object_): + category_columns.append(col) + encoder=ce.HashingEncoder(cols=category_columns, n_components=len(category_columns)) + encoder.fit_transform(data) + elif(encoding=='binary'): + category_columns=[] + for col in data.columns: + if(data[col].dtype==np.object_): + category_columns.append(col) + encoder=ce.BinaryEncoder(cols=category_columns, return_df=True) + encoder.fit_transform(data) + + elif(encoding=='baseN'): + category_columns=[] + for col in data.columns: + if(data[col].dtype==np.object_): + category_columns.append(col) + encoder=ce.BaseNEncoder(cols=category_columns, return_df=True, base=5) + encoder.fit_transform(data) + + # # Input - output # @@ -210,8 +250,8 @@ def train(dataset, params, callback): # # Podela na test i trening skupove # - test=params["randomTestSetDistribution"] - randomOrder = params["randomOrder"] + test=paramsExperiment["randomTestSetDistribution"] + randomOrder = paramsExperiment["randomOrder"] if(randomOrder): random=123 else: @@ -224,7 +264,7 @@ def train(dataset, params, callback): # # ###OPTIMIZATORI - + """ if(params['optimizer']=='adam'): opt=tf.keras.optimizers.Adam(learning_rate=params['learningRate']) @@ -250,7 +290,7 @@ def train(dataset, params, callback): opt=tf.keras.optimizers.RMSprop(learning_rate=params['learningRate']) ###REGULARIZACIJA - #regularisation={'kernelType':'l1 ili l2 ili l1_l2','krenelRate':default=0.01 ili jedna od vrednosti(0.0001,0.001,0.1,1,2,3) ili neka koju je korisnik zadao,'biasType':'','biasRate':'','activityType','activityRate'} + #regularisation={'kernelType':'l1 ili l2 ili l1_l2','kernelRate':default=0.01 ili jedna od vrednosti(0.0001,0.001,0.1,1,2,3) ili neka koju je korisnik zadao,'biasType':'','biasRate':'','activityType','activityRate'} reg=params['regularisation'] ###Kernel @@ -276,49 +316,56 @@ def train(dataset, params, callback): activityreg=tf.keras.regularizers.l2(reg['activityRate']) elif(reg['kernelType']=='l1l2'): activityreg=tf.keras.regularizers.l1_l2(l1=reg['activityRate'][0],l2=reg['activityRate'][1]) - - + """ + filepath=os.path.join("temp/",paramsExperiment['_id']+"_"+paramsModel['_id']+".h5") if(problem_type=='multi-klasifikacioni'): #print('multi') classifier=tf.keras.Sequential() - classifier.add(tf.keras.layers.Dense(units=params['hiddenLayerNeurons'], activation=params['hiddenLayerActivationFunctions'][0],input_dim=x_train.shape[1]))#prvi skriveni + definisanje prethodnog-ulaznog - for i in range(params['hiddenLayers']-1):#ako postoji vise od jednog skrivenog sloja + classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][0],input_dim=x_train.shape[1]))#prvi skriveni + definisanje prethodnog-ulaznog + for i in range(paramsModel['hiddenLayers']-1):#ako postoji vise od jednog skrivenog sloja #print(i) - classifier.add(tf.keras.layers.Dense(units=params['hiddenLayerNeurons'], activation=params['hiddenLayerActivationFunctions'][i+1]))#i-ti skriveni sloj - classifier.add(tf.keras.layers.Dense(units=5, activation=params['outputLayerActivationFunction']))#izlazni sloj + classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][i+1]))#i-ti skriveni sloj + classifier.add(tf.keras.layers.Dense(units=5, activation=paramsModel['outputLayerActivationFunction']))#izlazni sloj - classifier.compile(loss =params["lossFunction"] , optimizer = params['optimizer'] , metrics =params['metrics']) - - history=classifier.fit(x_train, y_train, epochs = params['epochs'],batch_size=params['batchSize']) + classifier.compile(loss =paramsModel["lossFunction"] , optimizer = paramsModel['optimizer'] , metrics =['accuracy','mae','mse']) + history=classifier.fit(x_train, y_train, epochs = paramsModel['epochs'],batch_size=paramsModel['batchSize'],callbacks=callback(x_test, y_test,paramsModel['_id'])) + + hist=history.history + #plt.plot(hist['accuracy']) + #plt.show() y_pred=classifier.predict(x_test) y_pred=np.argmax(y_pred,axis=1) - #print(y_pred.flatten()) - #print(y_test) + scores = classifier.evaluate(x_test, y_test) #print("\n%s: %.2f%%" % (classifier.metrics_names[1], scores[1]*100)) - classifier.save("temp/"+params['name'], save_format='h5') + + + classifier.save(filepath, save_format='h5') + #vizuelizacija u python-u #from ann_visualizer.visualize import ann_viz; #ann_viz(classifier, title="My neural network") + + return filepath,hist elif(problem_type=='binarni-klasifikacioni'): #print('*************************************************************************binarni') classifier=tf.keras.Sequential() - classifier.add(tf.keras.layers.Dense(units=params['hiddenLayerNeurons'], activation=params['hiddenLayerActivationFunctions'][0],input_dim=x_train.shape[1]))#prvi skriveni + definisanje prethodnog-ulaznog - for i in range(params['hiddenLayers']-1):#ako postoji vise od jednog skrivenog sloja + classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][0],input_dim=x_train.shape[1]))#prvi skriveni + definisanje prethodnog-ulaznog + for i in range(paramsModel['hiddenLayers']-1):#ako postoji vise od jednog skrivenog sloja #print(i) - classifier.add(tf.keras.layers.Dense(units=params['hiddenLayerNeurons'], activation=params['hiddenLayerActivationFunctions'][i+1]))#i-ti skriveni sloj - classifier.add(tf.keras.layers.Dense(units=1, activation=params['outputLayerActivationFunction']))#izlazni sloj - - classifier.compile(loss =params["lossFunction"] , optimizer = params['optimizer'] , metrics =params['metrics']) + classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][i+1]))#i-ti skriveni sloj + classifier.add(tf.keras.layers.Dense(units=1, activation=paramsModel['outputLayerActivationFunction']))#izlazni sloj - history=classifier.fit(x_train, y_train, epochs = params['epochs'],batch_size=params['batchSize']) + classifier.compile(loss =paramsModel["lossFunction"] , optimizer = paramsModel['optimizer'] , metrics =['accuracy']) + history=classifier.fit(x_train, y_train, epochs = paramsModel['epochs'],batch_size=paramsModel['batchSize'],callbacks=callback(x_test, y_test,paramsModel['_id'])) + hist=history.history y_pred=classifier.predict(x_test) y_pred=(y_pred>=0.5).astype('int') @@ -329,23 +376,26 @@ def train(dataset, params, callback): #print("\n%s: %.2f%%" % (classifier.metrics_names[1], scores[1]*100)) #ann_viz(classifier, title="My neural network") - classifier.save("temp/"+params['name'], save_format='h5') + classifier.save(filepath, save_format='h5') + return filepath,hist elif(problem_type=='regresioni'): classifier=tf.keras.Sequential() - classifier.add(tf.keras.layers.Dense(units=params['hiddenLayerNeurons'], activation=params['hiddenLayerActivationFunctions'][0],input_dim=x_train.shape[1]))#prvi skriveni + definisanje prethodnog-ulaznog - for i in range(params['hiddenLayers']-1):#ako postoji vise od jednog skrivenog sloja + classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][0],input_dim=x_train.shape[1]))#prvi skriveni + definisanje prethodnog-ulaznog + for i in range(paramsModel['hiddenLayers']-1):#ako postoji vise od jednog skrivenog sloja #print(i) - classifier.add(tf.keras.layers.Dense(units=params['hiddenLayerNeurons'], activation=params['hiddenLayerActivationFunctions'][i+1]))#i-ti skriveni sloj + classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][i+1]))#i-ti skriveni sloj classifier.add(tf.keras.layers.Dense(units=1)) - classifier.compile(loss =params["lossFunction"] , optimizer = params['optimizer'] , metrics =params['metrics']) + classifier.compile(loss =paramsModel["lossFunction"] , optimizer = paramsModel['optimizer'] , metrics =['accuracy','mae','mse']) - history=classifier.fit(x_train, y_train, epochs = params['epochs'],batch_size=params['batchSize']) + history=classifier.fit(x_train, y_train, epochs = paramsModel['epochs'],batch_size=paramsModel['batchSize'],callbacks=callback(x_test, y_test,paramsModel['_id'])) + hist=history.history y_pred=classifier.predict(x_test) #print(classifier.evaluate(x_test, y_test)) - + classifier.save(filepath, save_format='h5') + return filepath,hist def roc_auc_score_multiclass(actual_class, pred_class, average = "macro"): #creating a set of all the unique classes using the actual class list @@ -427,6 +477,11 @@ def train(dataset, params, callback): micro_averaged_f1=metrics.f1_score(y_test, y_pred, average = 'micro') roc_auc_dict=roc_auc_score_multiclass(y_test, y_pred) ''' +def predict(experiment, predictor, model): + #model.predict() + # ovo je pre bilo manageH5 + return "TODO" + def manageH5(dataset,params,h5model): problem_type = params["type"] @@ -503,7 +558,7 @@ def manageH5(dataset,params,h5model): h5model.summary() #ann_viz(h5model, title="My neural network") - h5model.compile(loss=params['lossFunction'], optimizer=params['optimizer'], metrics=params['metrics']) + h5model.compile(loss=params['lossFunction'], optimizer=params['optimizer'], metrics=params['accuracy','']) history=h5model.fit(x2, y2, epochs = params['epochs'],batch_size=params['batchSize']) |