diff options
author | Danijel Andjelkovic <adanijel99@gmail.com> | 2022-04-07 13:25:02 +0200 |
---|---|---|
committer | Danijel Andjelkovic <adanijel99@gmail.com> | 2022-04-07 13:25:02 +0200 |
commit | 724000d1dc30f456d77d39a233a309bb9e36f5a9 (patch) | |
tree | 3e77444701c1def532ddbbb2905e20fc2d09303c /backend | |
parent | ba4eba6116cba39fab60a7ade8cb9f436dee0bca (diff) |
Ispravio mlkrontroler backend i frontend tako da je dataset sinhronizovan i osposobio preprocesiranje.
Diffstat (limited to 'backend')
-rw-r--r-- | backend/api/api/Controllers/DatasetController.cs | 2 | ||||
-rw-r--r-- | backend/api/api/Models/ColumnInfo.cs | 2 | ||||
-rw-r--r-- | backend/api/api/Models/Dataset.cs | 4 | ||||
-rw-r--r-- | backend/api/api/Services/MlConnectionService.cs | 3 | ||||
-rw-r--r-- | backend/microservice/api/controller.py | 23 | ||||
-rw-r--r-- | backend/microservice/api/newmlservice.py | 12 |
6 files changed, 28 insertions, 18 deletions
diff --git a/backend/api/api/Controllers/DatasetController.cs b/backend/api/api/Controllers/DatasetController.cs index 6eb1b9e6..5f01c867 100644 --- a/backend/api/api/Controllers/DatasetController.cs +++ b/backend/api/api/Controllers/DatasetController.cs @@ -148,8 +148,6 @@ namespace api.Controllers /*za pretragu vratiti dataset koji je public public ActionResult<Dataset> Get(string name) { - - var dataset = _datasetService.GetOneDataset(username, name); if (dataset == null) diff --git a/backend/api/api/Models/ColumnInfo.cs b/backend/api/api/Models/ColumnInfo.cs index ee4cee0d..99418732 100644 --- a/backend/api/api/Models/ColumnInfo.cs +++ b/backend/api/api/Models/ColumnInfo.cs @@ -6,6 +6,8 @@ public bool isNumber { get; set; } public int numNulls { get; set; } public float mean { get; set; } + public float min { get; set; } + public float max { get; set; } public float median { get; set; } public string[] uniqueValues { get; set; } diff --git a/backend/api/api/Models/Dataset.cs b/backend/api/api/Models/Dataset.cs index 2b3efa3c..12dcfa08 100644 --- a/backend/api/api/Models/Dataset.cs +++ b/backend/api/api/Models/Dataset.cs @@ -6,6 +6,7 @@ namespace api.Models { public class Dataset { + public Dataset() { } public string username { get; set; } [BsonId] @@ -24,7 +25,8 @@ namespace api.Models public bool hasHeader { get; set; } public ColumnInfo[] columnInfo { get; set; } - public int totalNulls; + public int nullCols { get; set; } + public int nullRows { get; set; } public bool isPreProcess { get; set; } } diff --git a/backend/api/api/Services/MlConnectionService.cs b/backend/api/api/Services/MlConnectionService.cs index 66f7882a..3df22c4f 100644 --- a/backend/api/api/Services/MlConnectionService.cs +++ b/backend/api/api/Services/MlConnectionService.cs @@ -26,12 +26,13 @@ namespace api.Services } public async Task PreProcess(Dataset dataset,string filePath)//(Dataset dataset,byte[] file,string filename) { - var request=new RestRequest("preprocess", Method.Post);//USKLADITI SA ML API + var request=new RestRequest("preprocess", Method.Post); request.AddParameter("dataset", JsonConvert.SerializeObject(dataset)); //request.AddFile("file", file,filename); request.AddFile("file", filePath); request.AddHeader("Content-Type", "multipart/form-data"); var result=await this.client.ExecuteAsync(request); + Dataset newDataset = JsonConvert.DeserializeObject<Dataset>(result.Content); newDataset.isPreProcess = true; _datasetService.Update(newDataset); diff --git a/backend/microservice/api/controller.py b/backend/microservice/api/controller.py index 1b17f727..ff803358 100644 --- a/backend/microservice/api/controller.py +++ b/backend/microservice/api/controller.py @@ -4,6 +4,7 @@ import ml_socket import newmlservice import tensorflow as tf import pandas as pd +import json app = flask.Flask(__name__) app.config["DEBUG"] = True @@ -41,14 +42,20 @@ def predict(): @app.route('/preprocess',methods=['POST']) def returnColumnsInfo(): - f=request.json['filepathcolinfo'] - dataset=pd.read_csv(f) - - result=newmlservice.returnColumnsInfo(dataset) - - return jsonify(result) - - + print("********************************PREPROCESS*******************************") + dataset = json.loads(request.form["dataset"]) + file = request.files.get("file") + data=pd.read_csv(file) + preprocess = newmlservice.returnColumnsInfo(data) + #samo 10 jedinstvenih posto ih ima previse, bilo bi dobro da promenimo ovo da to budu 10 najzastupljenijih vrednosti + for col in preprocess["columnInfo"]: + col["uniqueValues"] = col["uniqueValues"][0:10] + dataset["columnInfo"] = preprocess["columnInfo"] + dataset["nullCols"] = preprocess["allNullColl"] + dataset["nullRows"] = preprocess["allNullRows"] + dataset["isPreProcess"] = True + print(dataset) + return jsonify(dataset) print("App loaded.") ml_socket.start() diff --git a/backend/microservice/api/newmlservice.py b/backend/microservice/api/newmlservice.py index 2ea31702..02f2ad6d 100644 --- a/backend/microservice/api/newmlservice.py +++ b/backend/microservice/api/newmlservice.py @@ -21,7 +21,7 @@ from sklearn.model_selection import train_test_split from dataclasses import dataclass import statistics as s from sklearn.metrics import roc_auc_score -from ann_visualizer.visualize import ann_viz; +#from ann_visualizer.visualize import ann_viz; def returnColumnsInfo(dataset): dict=[] datafront=dataset.copy() @@ -43,7 +43,7 @@ def returnColumnsInfo(dataset): 'uniqueValues':uniquevalues.tolist(), 'median':float(mean), 'mean':float(median), - 'numNulls':float(nullCount), + 'numNulls':int(nullCount), 'min':float(minimum), 'max':float(maximum) } @@ -52,7 +52,7 @@ def returnColumnsInfo(dataset): minimum=min(datafront[kolona]) maximum=max(datafront[kolona]) mean=datafront[kolona].mean() - median=s.median(datafront[kolona]) + median=s.median(datafront[kolona].copy().dropna()) nullCount=datafront[kolona].isnull().sum() if(nullCount>0): allNullCols=allNullCols+1 @@ -61,7 +61,7 @@ def returnColumnsInfo(dataset): 'uniqueValues':[], 'mean':float(mean), 'median':float(median), - 'numNulls':float(nullCount), + 'numNulls':int(nullCount), 'min':float(minimum), 'max':float(maximum) } @@ -71,7 +71,7 @@ def returnColumnsInfo(dataset): #print(len(NullRows)) allNullRows=len(NullRows) - return {'columnInfo':dict,'allNullColl':allNullCols,'allNullRows':allNullRows} + return {'columnInfo':dict,'allNullColl':int(allNullCols),'allNullRows':int(allNullRows)} @dataclass class TrainingResultClassification: @@ -433,7 +433,7 @@ def manageH5(dataset,params,h5model): #print(x2) y2 = data[output_column].values h5model.summary() - ann_viz(h5model, title="My neural network") + #ann_viz(h5model, title="My neural network") h5model.compile(loss=params['lossFunction'], optimizer=params['optimizer'], metrics=params['metrics']) |