diff options
Diffstat (limited to 'backend/microservice/api')
-rw-r--r-- | backend/microservice/api/controller.py | 23 | ||||
-rw-r--r-- | backend/microservice/api/newmlservice.py | 12 |
2 files changed, 21 insertions, 14 deletions
diff --git a/backend/microservice/api/controller.py b/backend/microservice/api/controller.py index 1b17f727..ff803358 100644 --- a/backend/microservice/api/controller.py +++ b/backend/microservice/api/controller.py @@ -4,6 +4,7 @@ import ml_socket import newmlservice import tensorflow as tf import pandas as pd +import json app = flask.Flask(__name__) app.config["DEBUG"] = True @@ -41,14 +42,20 @@ def predict(): @app.route('/preprocess',methods=['POST']) def returnColumnsInfo(): - f=request.json['filepathcolinfo'] - dataset=pd.read_csv(f) - - result=newmlservice.returnColumnsInfo(dataset) - - return jsonify(result) - - + print("********************************PREPROCESS*******************************") + dataset = json.loads(request.form["dataset"]) + file = request.files.get("file") + data=pd.read_csv(file) + preprocess = newmlservice.returnColumnsInfo(data) + #samo 10 jedinstvenih posto ih ima previse, bilo bi dobro da promenimo ovo da to budu 10 najzastupljenijih vrednosti + for col in preprocess["columnInfo"]: + col["uniqueValues"] = col["uniqueValues"][0:10] + dataset["columnInfo"] = preprocess["columnInfo"] + dataset["nullCols"] = preprocess["allNullColl"] + dataset["nullRows"] = preprocess["allNullRows"] + dataset["isPreProcess"] = True + print(dataset) + return jsonify(dataset) print("App loaded.") ml_socket.start() diff --git a/backend/microservice/api/newmlservice.py b/backend/microservice/api/newmlservice.py index 2ea31702..02f2ad6d 100644 --- a/backend/microservice/api/newmlservice.py +++ b/backend/microservice/api/newmlservice.py @@ -21,7 +21,7 @@ from sklearn.model_selection import train_test_split from dataclasses import dataclass import statistics as s from sklearn.metrics import roc_auc_score -from ann_visualizer.visualize import ann_viz; +#from ann_visualizer.visualize import ann_viz; def returnColumnsInfo(dataset): dict=[] datafront=dataset.copy() @@ -43,7 +43,7 @@ def returnColumnsInfo(dataset): 'uniqueValues':uniquevalues.tolist(), 'median':float(mean), 'mean':float(median), - 'numNulls':float(nullCount), + 'numNulls':int(nullCount), 'min':float(minimum), 'max':float(maximum) } @@ -52,7 +52,7 @@ def returnColumnsInfo(dataset): minimum=min(datafront[kolona]) maximum=max(datafront[kolona]) mean=datafront[kolona].mean() - median=s.median(datafront[kolona]) + median=s.median(datafront[kolona].copy().dropna()) nullCount=datafront[kolona].isnull().sum() if(nullCount>0): allNullCols=allNullCols+1 @@ -61,7 +61,7 @@ def returnColumnsInfo(dataset): 'uniqueValues':[], 'mean':float(mean), 'median':float(median), - 'numNulls':float(nullCount), + 'numNulls':int(nullCount), 'min':float(minimum), 'max':float(maximum) } @@ -71,7 +71,7 @@ def returnColumnsInfo(dataset): #print(len(NullRows)) allNullRows=len(NullRows) - return {'columnInfo':dict,'allNullColl':allNullCols,'allNullRows':allNullRows} + return {'columnInfo':dict,'allNullColl':int(allNullCols),'allNullRows':int(allNullRows)} @dataclass class TrainingResultClassification: @@ -433,7 +433,7 @@ def manageH5(dataset,params,h5model): #print(x2) y2 = data[output_column].values h5model.summary() - ann_viz(h5model, title="My neural network") + #ann_viz(h5model, title="My neural network") h5model.compile(loss=params['lossFunction'], optimizer=params['optimizer'], metrics=params['metrics']) |