From 98d580ca1ba501bc059ff417dd1794e0f6a68407 Mon Sep 17 00:00:00 2001 From: Danijel Andjelkovic Date: Tue, 29 Mar 2022 12:24:38 +0200 Subject: Povezao treniranje modela. --- backend/microservice/api/controller.py | 42 +++++++++ backend/microservice/api/ml_service.py | 155 +++++++++++++++++++++++++++++++++ backend/microservice/api/ml_socket.py | 28 ++++++ 3 files changed, 225 insertions(+) create mode 100644 backend/microservice/api/controller.py create mode 100644 backend/microservice/api/ml_service.py create mode 100644 backend/microservice/api/ml_socket.py (limited to 'backend/microservice/api') diff --git a/backend/microservice/api/controller.py b/backend/microservice/api/controller.py new file mode 100644 index 00000000..ceed02ad --- /dev/null +++ b/backend/microservice/api/controller.py @@ -0,0 +1,42 @@ +import flask +from flask import request, jsonify +import ml_socket +import ml_service +import tensorflow as tf +import pandas as pd + +app = flask.Flask(__name__) +app.config["DEBUG"] = True +app.config["SERVER_NAME"] = "127.0.0.1:5543" + +class train_callback(tf.keras.callbacks.Callback): + def __init__(self, x_test, y_test): + self.x_test = x_test + self.y_test = y_test + # + def on_epoch_end(self, epoch, logs=None): + print(epoch) + #print('Evaluation: ', self.model.evaluate(self.x_test,self.y_test),"\n") #broj parametara zavisi od izabranih metrika loss je default + +@app.route('/train', methods = ['POST']) +def train(): + print("******************************TRAIN*************************************************") + f = request.json["dataset"] + dataset = pd.read_csv(f) + # + result = ml_service.train(dataset, request.json["model"], train_callback) + print(result) + return jsonify(result) + +@app.route('/predict', methods = ['POST']) +def predict(): + f = request.json['filepath'] + dataset = pd.read_csv(f) + m = request.json['modelpath'] + #model = tf.keras.models.load_model(m) + # + #model.predict? + +print("App loaded.") +ml_socket.start() +app.run() \ No newline at end of file diff --git a/backend/microservice/api/ml_service.py b/backend/microservice/api/ml_service.py new file mode 100644 index 00000000..efd24fdc --- /dev/null +++ b/backend/microservice/api/ml_service.py @@ -0,0 +1,155 @@ +import pandas as pd +import tensorflow as tf +import keras +import numpy as np +import csv +import json +import h5py +import sklearn.metrics as sm +from statistics import mode +from typing_extensions import Self +from copyreg import constructor +from flask import request, jsonify, render_template +from sklearn.preprocessing import LabelEncoder +from sklearn.preprocessing import StandardScaler +from sklearn.model_selection import train_test_split +from dataclasses import dataclass + +@dataclass +class TrainingResult: + accuracy: float + precision: float + recall: float + tn: float + fp: float + fn: float + tp: float + specificity: float + f1: float + mse: float + mae: float + mape: float + rmse: float + fpr: float + tpr: float + +def train(dataset, params, callback): + data = pd.DataFrame() + for col in params["inputColumns"]: + data[col]=dataset[col] + output_column = params["columnToPredict"] + data[output_column] = dataset[output_column] + # + # Brisanje null kolona / redova / zamena + #nullreplace=[ + # {"column":"Embarked","value":"C","deleteRow":false,"deleteCol":true}, + # {"column": "Cabin","value":"C123","deleteRow":"0","deleteCol":"0"}] + + null_value_options = params["nullValues"] + null_values_replacers = params["nullValuesReplacers"] + + if(null_value_options=='replace'): + print("replace null") # TODO + elif(null_value_options=='delete_rows'): + data=data.dropna() + elif(null_value_options=='delete_columns'): + data=data.dropna() + # + #print(data.isnull().any()) + # + # Brisanje kolona koje ne uticu na rezultat + # + num_rows=data.shape[0] + for col in data.columns: + if((data[col].nunique()==(num_rows)) and (data[col].dtype==np.object_)): + data.pop(col) + # + # Enkodiranje + # + encoding=params["encoding"] + if(encoding=='label'): + encoder=LabelEncoder() + for col in data.columns: + if(data[col].dtype==np.object_): + data[col]=encoder.fit_transform(data[col]) + elif(encoding=='onehot'): + category_columns=[] + for col in data.columns: + if(data[col].dtype==np.object_): + category_columns.append(col) + data=pd.get_dummies(data, columns=category_columns, prefix=category_columns) + # + # Input - output + # + x_columns = [] + for col in data.columns: + if(col!=output_column): + x_columns.append(col) + x = data[x_columns].values + y = data[output_column].values + # + # Podela na test i trening skupove + # + test=params["randomTestSetDistribution"] + randomOrder = params["randomOrder"] + if(randomOrder): + random=50 + else: + random=0 + x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=test, random_state=random) + # + # Skaliranje vrednosti + # + scaler=StandardScaler() + scaler.fit(x_train) + x_test=scaler.transform(x_test) + x_train=scaler.transform(x_train) + # + # Treniranje modela + # + classifier=tf.keras.Sequential() + hidden_layer_neurons = params["hiddenLayerNeurons"] + for func in params["hiddenLayerActivationFunctions"]: + classifier.add(tf.keras.layers.Dense(units=hidden_layer_neurons,activation=func)) + output_func = params["outputLayerActivationFunction"] + classifier.add(tf.keras.layers.Dense(units=1,activation=output_func)) + optimizer = params["optimizer"] + metrics=params['metrics'] + loss_func=params["lossFunction"] + classifier.compile(optimizer=optimizer, loss=loss_func,metrics=metrics) + batch_size = params["batchSize"] + epochs = params["epochs"] + history=classifier.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, callbacks=callback(x_test, y_test), validation_split=0.2) # TODO params["validationSplit"] + # + # Test + # + y_pred=classifier.predict(x_test) + y_pred=(y_pred>=0.5).astype('int') + #y_pred=(y_pred * 100).astype('int') + y_pred=y_pred.flatten() + result=pd.DataFrame({"Actual":y_test,"Predicted":y_pred}) + model_name = params['_id'] + classifier.save("temp/"+model_name, save_format='h5') + # + # Metrike + # + print("HELLO???") + print(result) + print("HELLO???") + accuracy = float(sm.accuracy_score(y_test,y_pred)) + precision = float(sm.precision_score(y_test,y_pred)) + recall = float(sm.recall_score(y_test,y_pred)) + tn, fp, fn, tp = sm.confusion_matrix(y_test,y_pred).ravel() + specificity = float(tn / (tn+fp)) + f1 = float(sm.f1_score(y_test,y_pred)) + mse = float(sm.mean_squared_error(y_test,y_pred)) + mae = float(sm.mean_absolute_error(y_test,y_pred)) + mape = float(sm.mean_absolute_percentage_error(y_test,y_pred)) + rmse = float(np.sqrt(sm.mean_squared_error(y_test,y_pred))) + fpr, tpr, _ = sm.roc_curve(y_test,y_pred) + # TODO upload trenirani model nazad na backend + return TrainingResult(accuracy, precision, recall, float(tn), float(fp), float(fn), float(tp), specificity, f1, mse, mae, mape, rmse, fpr.tolist(), tpr.tolist()) + + + + diff --git a/backend/microservice/api/ml_socket.py b/backend/microservice/api/ml_socket.py new file mode 100644 index 00000000..65dd7321 --- /dev/null +++ b/backend/microservice/api/ml_socket.py @@ -0,0 +1,28 @@ +import asyncio +import websockets +import json + +def get_or_create_eventloop(): + try: + return asyncio.get_event_loop() + except RuntimeError as ex: + if "There is no current event loop in thread" in str(ex): + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + return asyncio.get_event_loop() + +# create handler for each connection +async def handler(websocket, path): + #data = json.loads(await websocket.recv()) + #print(data['test']) + msg = await websocket.recv() + print(msg) + +async def start(): + start_server = websockets.serve(handler, "localhost", 5027) + print('Websocket starting...') + get_or_create_eventloop().run_until_complete(start_server) + get_or_create_eventloop().run_forever() + +async def send(msg): + await websocket.send(msg) \ No newline at end of file -- cgit v1.2.3