aboutsummaryrefslogtreecommitdiff
path: root/backend/microservice
diff options
context:
space:
mode:
authorTAMARA JERINIC <tamara.jerinic@gmail.com>2022-05-02 00:56:32 +0200
committerTAMARA JERINIC <tamara.jerinic@gmail.com>2022-05-02 18:40:22 +0200
commit61d8c3e8a88d0787f34b03fcd4fe2b533c571e1b (patch)
tree940ee9e2c4123ae8e61f7a9a36aafc1adcc1ae06 /backend/microservice
parent7cabfa3d4220d840b47f268ffbc31901cae52167 (diff)
Dodato računanje novih statističkih podataka u mlservice fajl, usklađeno sa frontom.
Diffstat (limited to 'backend/microservice')
-rw-r--r--backend/microservice/api/controller.py18
-rw-r--r--backend/microservice/api/newmlservice.py49
2 files changed, 53 insertions, 14 deletions
diff --git a/backend/microservice/api/controller.py b/backend/microservice/api/controller.py
index 9b83b8e7..fad6e181 100644
--- a/backend/microservice/api/controller.py
+++ b/backend/microservice/api/controller.py
@@ -107,26 +107,32 @@ def predict():
@app.route('/preprocess',methods=['POST'])
def returnColumnsInfo():
print("********************************PREPROCESS*******************************")
+
dataset = json.loads(request.form["dataset"])
file = request.files.get("file")
data=pd.read_csv(file)
-
- #dataset={}
+ '''
#f = request.json['filepath']
#data=pd.read_csv(f)
-
+ dataset={}
+ '''
preprocess = newmlservice.returnColumnsInfo(data)
#samo 10 jedinstvenih posto ih ima previse, bilo bi dobro da promenimo ovo da to budu 10 najzastupljenijih vrednosti
+
for col in preprocess["columnInfo"]:
- col["uniqueValues"] = col["uniqueValues"][0:10]
- col["uniqueValuesCount"] = col["uniqueValuesCount"][0:10]
+ col["uniqueValues"] = col["uniqueValues"][0:5]
+ col["uniqueValuesCount"] = col["uniqueValuesCount"][0:5]
+ col['uniqueValuesPercent']=col['uniqueValuesPercent'][0:5]
dataset["columnInfo"] = preprocess["columnInfo"]
dataset["nullCols"] = preprocess["allNullColl"]
dataset["nullRows"] = preprocess["allNullRows"]
dataset["colCount"] = preprocess["colCount"]
dataset["rowCount"] = preprocess["rowCount"]
dataset["isPreProcess"] = True
- print(dataset)
+ #print(dataset)
+
+
+
return jsonify(dataset)
print("App loaded.")
diff --git a/backend/microservice/api/newmlservice.py b/backend/microservice/api/newmlservice.py
index 604e4d3c..f74f8386 100644
--- a/backend/microservice/api/newmlservice.py
+++ b/backend/microservice/api/newmlservice.py
@@ -1,6 +1,7 @@
from enum import unique
from itertools import count
import os
+from sys import breakpointhook
import pandas as pd
from sklearn import datasets, multiclass
import tensorflow as tf
@@ -38,27 +39,38 @@ def returnColumnsInfo(dataset):
unique=datafront[kolona].value_counts()
uniquevalues=[]
uniquevaluescount=[]
+ uniquevaluespercent=[]
for val, count in unique.iteritems():
- uniquevalues.append(val)
- uniquevaluescount.append(count)
+ if(val):
+ uniquevalues.append(val)
+ uniquevaluescount.append(count)
+ percent=count/rowCount
+ uniquevaluespercent.append(percent)
#print(uniquevalues)
#print(uniquevaluescount)
mean=0
median=0
minimum=0
maximum=0
+ q1=0
+ q3=0
nullCount=datafront[kolona].isnull().sum()
if(nullCount>0):
allNullCols=allNullCols+1
- frontreturn={'columnName':kolona,
+ frontreturn={
+ 'columnName':kolona,
'isNumber':False,
'uniqueValues':uniquevalues,
'uniqueValuesCount':uniquevaluescount,
- 'median':float(mean),
- 'mean':float(median),
+ 'uniqueValuesPercent':uniquevaluespercent,
+ 'mean':float(mean),
+ 'median':float(median),
'numNulls':int(nullCount),
'min':float(minimum),
'max':float(maximum),
+ 'q1':float(q1),
+ 'q3':float(q3),
+
}
dict.append(frontreturn)
else:
@@ -66,18 +78,39 @@ def returnColumnsInfo(dataset):
maximum=max(datafront[kolona])
mean=datafront[kolona].mean()
median=s.median(datafront[kolona].copy().dropna())
+ q1= np.percentile(datafront[kolona].copy().dropna(), 25)
+ q3= np.percentile(datafront[kolona].copy().dropna(), 75)
nullCount=datafront[kolona].isnull().sum()
if(nullCount>0):
allNullCols=allNullCols+1
- frontreturn={'columnName':kolona,
+
+ #pretvaranje u kategorijsku
+ datafront = datafront.astype({kolona: str})
+ print(datafront.dtypes)
+ unique=datafront[kolona].value_counts()
+ uniquevaluesn=[]
+ uniquevaluescountn=[]
+ uniquevaluespercentn=[]
+ for val, count in unique.iteritems():
+ if(val):
+ uniquevaluesn.append(val)
+ uniquevaluescountn.append(count)
+ percent=count/rowCount
+ uniquevaluespercentn.append(percent)
+ frontreturn={
+ 'columnName':kolona,
'isNumber':1,
- 'uniqueValues':[],
- 'uniqueValuesCount':[],
+ 'uniqueValues':uniquevaluesn,
+ 'uniqueValuesCount':uniquevaluescountn,
+ 'uniqueValuesPercent':uniquevaluespercentn,
'mean':float(mean),
'median':float(median),
'numNulls':int(nullCount),
'min':float(minimum),
'max':float(maximum),
+ 'q1':float(q1),
+ 'q3':float(q3),
+
}
dict.append(frontreturn)
NullRows = datafront[datafront.isnull().any(axis=1)]