aboutsummaryrefslogtreecommitdiff
path: root/backend
diff options
context:
space:
mode:
Diffstat (limited to 'backend')
-rw-r--r--backend/api/api/Models/ColumnInfo.cs12
-rw-r--r--backend/microservice/api/controller.py18
-rw-r--r--backend/microservice/api/newmlservice.py49
3 files changed, 64 insertions, 15 deletions
diff --git a/backend/api/api/Models/ColumnInfo.cs b/backend/api/api/Models/ColumnInfo.cs
index be3c7251..f2cae104 100644
--- a/backend/api/api/Models/ColumnInfo.cs
+++ b/backend/api/api/Models/ColumnInfo.cs
@@ -4,7 +4,7 @@
{
public ColumnInfo() { }
- public ColumnInfo(string columnName, string columnType, bool isNumber, int numNulls, float mean, float min, float max, float median, string[] uniqueValues)
+ public ColumnInfo(string columnName, string columnType, bool isNumber, int numNulls, float mean, float min, float max, float median,float q1,float q3, string[] uniqueValues, int[]uniqueValuesCount, float[] uniqueValuesPercent)
{
this.columnName = columnName;
this.columnType = columnType;
@@ -13,8 +13,12 @@
this.mean = mean;
this.min = min;
this.max = max;
+ this.q1 = q1;
+ this.q3 = q3;
this.median = median;
this.uniqueValues = uniqueValues;
+ this.uniqueValuesPercent = uniqueValuesPercent;
+ this.uniqueValuesCount = uniqueValuesCount;
}
public string columnName { get; set; }
@@ -25,7 +29,13 @@
public float min { get; set; }
public float max { get; set; }
public float median { get; set; }
+ public float q1 { get; set; }
+ public float q3 { get; set; }
+
public string[] uniqueValues { get; set; }
+ public int[] uniqueValuesCount { get; set; }
+ public float[] uniqueValuesPercent { get; set; }
+
}
}
diff --git a/backend/microservice/api/controller.py b/backend/microservice/api/controller.py
index 9b83b8e7..fad6e181 100644
--- a/backend/microservice/api/controller.py
+++ b/backend/microservice/api/controller.py
@@ -107,26 +107,32 @@ def predict():
@app.route('/preprocess',methods=['POST'])
def returnColumnsInfo():
print("********************************PREPROCESS*******************************")
+
dataset = json.loads(request.form["dataset"])
file = request.files.get("file")
data=pd.read_csv(file)
-
- #dataset={}
+ '''
#f = request.json['filepath']
#data=pd.read_csv(f)
-
+ dataset={}
+ '''
preprocess = newmlservice.returnColumnsInfo(data)
#samo 10 jedinstvenih posto ih ima previse, bilo bi dobro da promenimo ovo da to budu 10 najzastupljenijih vrednosti
+
for col in preprocess["columnInfo"]:
- col["uniqueValues"] = col["uniqueValues"][0:10]
- col["uniqueValuesCount"] = col["uniqueValuesCount"][0:10]
+ col["uniqueValues"] = col["uniqueValues"][0:5]
+ col["uniqueValuesCount"] = col["uniqueValuesCount"][0:5]
+ col['uniqueValuesPercent']=col['uniqueValuesPercent'][0:5]
dataset["columnInfo"] = preprocess["columnInfo"]
dataset["nullCols"] = preprocess["allNullColl"]
dataset["nullRows"] = preprocess["allNullRows"]
dataset["colCount"] = preprocess["colCount"]
dataset["rowCount"] = preprocess["rowCount"]
dataset["isPreProcess"] = True
- print(dataset)
+ #print(dataset)
+
+
+
return jsonify(dataset)
print("App loaded.")
diff --git a/backend/microservice/api/newmlservice.py b/backend/microservice/api/newmlservice.py
index 604e4d3c..f74f8386 100644
--- a/backend/microservice/api/newmlservice.py
+++ b/backend/microservice/api/newmlservice.py
@@ -1,6 +1,7 @@
from enum import unique
from itertools import count
import os
+from sys import breakpointhook
import pandas as pd
from sklearn import datasets, multiclass
import tensorflow as tf
@@ -38,27 +39,38 @@ def returnColumnsInfo(dataset):
unique=datafront[kolona].value_counts()
uniquevalues=[]
uniquevaluescount=[]
+ uniquevaluespercent=[]
for val, count in unique.iteritems():
- uniquevalues.append(val)
- uniquevaluescount.append(count)
+ if(val):
+ uniquevalues.append(val)
+ uniquevaluescount.append(count)
+ percent=count/rowCount
+ uniquevaluespercent.append(percent)
#print(uniquevalues)
#print(uniquevaluescount)
mean=0
median=0
minimum=0
maximum=0
+ q1=0
+ q3=0
nullCount=datafront[kolona].isnull().sum()
if(nullCount>0):
allNullCols=allNullCols+1
- frontreturn={'columnName':kolona,
+ frontreturn={
+ 'columnName':kolona,
'isNumber':False,
'uniqueValues':uniquevalues,
'uniqueValuesCount':uniquevaluescount,
- 'median':float(mean),
- 'mean':float(median),
+ 'uniqueValuesPercent':uniquevaluespercent,
+ 'mean':float(mean),
+ 'median':float(median),
'numNulls':int(nullCount),
'min':float(minimum),
'max':float(maximum),
+ 'q1':float(q1),
+ 'q3':float(q3),
+
}
dict.append(frontreturn)
else:
@@ -66,18 +78,39 @@ def returnColumnsInfo(dataset):
maximum=max(datafront[kolona])
mean=datafront[kolona].mean()
median=s.median(datafront[kolona].copy().dropna())
+ q1= np.percentile(datafront[kolona].copy().dropna(), 25)
+ q3= np.percentile(datafront[kolona].copy().dropna(), 75)
nullCount=datafront[kolona].isnull().sum()
if(nullCount>0):
allNullCols=allNullCols+1
- frontreturn={'columnName':kolona,
+
+ #pretvaranje u kategorijsku
+ datafront = datafront.astype({kolona: str})
+ print(datafront.dtypes)
+ unique=datafront[kolona].value_counts()
+ uniquevaluesn=[]
+ uniquevaluescountn=[]
+ uniquevaluespercentn=[]
+ for val, count in unique.iteritems():
+ if(val):
+ uniquevaluesn.append(val)
+ uniquevaluescountn.append(count)
+ percent=count/rowCount
+ uniquevaluespercentn.append(percent)
+ frontreturn={
+ 'columnName':kolona,
'isNumber':1,
- 'uniqueValues':[],
- 'uniqueValuesCount':[],
+ 'uniqueValues':uniquevaluesn,
+ 'uniqueValuesCount':uniquevaluescountn,
+ 'uniqueValuesPercent':uniquevaluespercentn,
'mean':float(mean),
'median':float(median),
'numNulls':int(nullCount),
'min':float(minimum),
'max':float(maximum),
+ 'q1':float(q1),
+ 'q3':float(q3),
+
}
dict.append(frontreturn)
NullRows = datafront[datafront.isnull().any(axis=1)]