aboutsummaryrefslogtreecommitdiff
path: root/backend
diff options
context:
space:
mode:
authorDanijel Andjelkovic <adanijel99@gmail.com>2022-04-07 13:25:02 +0200
committerDanijel Andjelkovic <adanijel99@gmail.com>2022-04-07 13:25:02 +0200
commit724000d1dc30f456d77d39a233a309bb9e36f5a9 (patch)
tree3e77444701c1def532ddbbb2905e20fc2d09303c /backend
parentba4eba6116cba39fab60a7ade8cb9f436dee0bca (diff)
Ispravio mlkrontroler backend i frontend tako da je dataset sinhronizovan i osposobio preprocesiranje.
Diffstat (limited to 'backend')
-rw-r--r--backend/api/api/Controllers/DatasetController.cs2
-rw-r--r--backend/api/api/Models/ColumnInfo.cs2
-rw-r--r--backend/api/api/Models/Dataset.cs4
-rw-r--r--backend/api/api/Services/MlConnectionService.cs3
-rw-r--r--backend/microservice/api/controller.py23
-rw-r--r--backend/microservice/api/newmlservice.py12
6 files changed, 28 insertions, 18 deletions
diff --git a/backend/api/api/Controllers/DatasetController.cs b/backend/api/api/Controllers/DatasetController.cs
index 6eb1b9e6..5f01c867 100644
--- a/backend/api/api/Controllers/DatasetController.cs
+++ b/backend/api/api/Controllers/DatasetController.cs
@@ -148,8 +148,6 @@ namespace api.Controllers
/*za pretragu vratiti dataset koji je public
public ActionResult<Dataset> Get(string name)
{
-
-
var dataset = _datasetService.GetOneDataset(username, name);
if (dataset == null)
diff --git a/backend/api/api/Models/ColumnInfo.cs b/backend/api/api/Models/ColumnInfo.cs
index ee4cee0d..99418732 100644
--- a/backend/api/api/Models/ColumnInfo.cs
+++ b/backend/api/api/Models/ColumnInfo.cs
@@ -6,6 +6,8 @@
public bool isNumber { get; set; }
public int numNulls { get; set; }
public float mean { get; set; }
+ public float min { get; set; }
+ public float max { get; set; }
public float median { get; set; }
public string[] uniqueValues { get; set; }
diff --git a/backend/api/api/Models/Dataset.cs b/backend/api/api/Models/Dataset.cs
index 2b3efa3c..12dcfa08 100644
--- a/backend/api/api/Models/Dataset.cs
+++ b/backend/api/api/Models/Dataset.cs
@@ -6,6 +6,7 @@ namespace api.Models
{
public class Dataset
{
+ public Dataset() { }
public string username { get; set; }
[BsonId]
@@ -24,7 +25,8 @@ namespace api.Models
public bool hasHeader { get; set; }
public ColumnInfo[] columnInfo { get; set; }
- public int totalNulls;
+ public int nullCols { get; set; }
+ public int nullRows { get; set; }
public bool isPreProcess { get; set; }
}
diff --git a/backend/api/api/Services/MlConnectionService.cs b/backend/api/api/Services/MlConnectionService.cs
index 66f7882a..3df22c4f 100644
--- a/backend/api/api/Services/MlConnectionService.cs
+++ b/backend/api/api/Services/MlConnectionService.cs
@@ -26,12 +26,13 @@ namespace api.Services
}
public async Task PreProcess(Dataset dataset,string filePath)//(Dataset dataset,byte[] file,string filename)
{
- var request=new RestRequest("preprocess", Method.Post);//USKLADITI SA ML API
+ var request=new RestRequest("preprocess", Method.Post);
request.AddParameter("dataset", JsonConvert.SerializeObject(dataset));
//request.AddFile("file", file,filename);
request.AddFile("file", filePath);
request.AddHeader("Content-Type", "multipart/form-data");
var result=await this.client.ExecuteAsync(request);
+
Dataset newDataset = JsonConvert.DeserializeObject<Dataset>(result.Content);
newDataset.isPreProcess = true;
_datasetService.Update(newDataset);
diff --git a/backend/microservice/api/controller.py b/backend/microservice/api/controller.py
index 1b17f727..ff803358 100644
--- a/backend/microservice/api/controller.py
+++ b/backend/microservice/api/controller.py
@@ -4,6 +4,7 @@ import ml_socket
import newmlservice
import tensorflow as tf
import pandas as pd
+import json
app = flask.Flask(__name__)
app.config["DEBUG"] = True
@@ -41,14 +42,20 @@ def predict():
@app.route('/preprocess',methods=['POST'])
def returnColumnsInfo():
- f=request.json['filepathcolinfo']
- dataset=pd.read_csv(f)
-
- result=newmlservice.returnColumnsInfo(dataset)
-
- return jsonify(result)
-
-
+ print("********************************PREPROCESS*******************************")
+ dataset = json.loads(request.form["dataset"])
+ file = request.files.get("file")
+ data=pd.read_csv(file)
+ preprocess = newmlservice.returnColumnsInfo(data)
+ #samo 10 jedinstvenih posto ih ima previse, bilo bi dobro da promenimo ovo da to budu 10 najzastupljenijih vrednosti
+ for col in preprocess["columnInfo"]:
+ col["uniqueValues"] = col["uniqueValues"][0:10]
+ dataset["columnInfo"] = preprocess["columnInfo"]
+ dataset["nullCols"] = preprocess["allNullColl"]
+ dataset["nullRows"] = preprocess["allNullRows"]
+ dataset["isPreProcess"] = True
+ print(dataset)
+ return jsonify(dataset)
print("App loaded.")
ml_socket.start()
diff --git a/backend/microservice/api/newmlservice.py b/backend/microservice/api/newmlservice.py
index 2ea31702..02f2ad6d 100644
--- a/backend/microservice/api/newmlservice.py
+++ b/backend/microservice/api/newmlservice.py
@@ -21,7 +21,7 @@ from sklearn.model_selection import train_test_split
from dataclasses import dataclass
import statistics as s
from sklearn.metrics import roc_auc_score
-from ann_visualizer.visualize import ann_viz;
+#from ann_visualizer.visualize import ann_viz;
def returnColumnsInfo(dataset):
dict=[]
datafront=dataset.copy()
@@ -43,7 +43,7 @@ def returnColumnsInfo(dataset):
'uniqueValues':uniquevalues.tolist(),
'median':float(mean),
'mean':float(median),
- 'numNulls':float(nullCount),
+ 'numNulls':int(nullCount),
'min':float(minimum),
'max':float(maximum)
}
@@ -52,7 +52,7 @@ def returnColumnsInfo(dataset):
minimum=min(datafront[kolona])
maximum=max(datafront[kolona])
mean=datafront[kolona].mean()
- median=s.median(datafront[kolona])
+ median=s.median(datafront[kolona].copy().dropna())
nullCount=datafront[kolona].isnull().sum()
if(nullCount>0):
allNullCols=allNullCols+1
@@ -61,7 +61,7 @@ def returnColumnsInfo(dataset):
'uniqueValues':[],
'mean':float(mean),
'median':float(median),
- 'numNulls':float(nullCount),
+ 'numNulls':int(nullCount),
'min':float(minimum),
'max':float(maximum)
}
@@ -71,7 +71,7 @@ def returnColumnsInfo(dataset):
#print(len(NullRows))
allNullRows=len(NullRows)
- return {'columnInfo':dict,'allNullColl':allNullCols,'allNullRows':allNullRows}
+ return {'columnInfo':dict,'allNullColl':int(allNullCols),'allNullRows':int(allNullRows)}
@dataclass
class TrainingResultClassification:
@@ -433,7 +433,7 @@ def manageH5(dataset,params,h5model):
#print(x2)
y2 = data[output_column].values
h5model.summary()
- ann_viz(h5model, title="My neural network")
+ #ann_viz(h5model, title="My neural network")
h5model.compile(loss=params['lossFunction'], optimizer=params['optimizer'], metrics=params['metrics'])