aboutsummaryrefslogtreecommitdiff
path: root/backend/microservice/api/newmlservice.py
diff options
context:
space:
mode:
authorSonja Galovic <galovicsonja@gmail.com>2022-05-02 20:02:16 +0200
committerSonja Galovic <galovicsonja@gmail.com>2022-05-02 20:02:16 +0200
commit3cea9915b981c8848b206acb2dad2b4c8a448462 (patch)
tree5f5b913a26557eab4341fd29bc7f54e1b53ddf6c /backend/microservice/api/newmlservice.py
parent763eaf066e3b831facd278c32f5c6241c57fa639 (diff)
parenta344f3d937e6984350b06b9ee6aa7da48b757961 (diff)
Merge branch 'redesign' of http://gitlab.pmf.kg.ac.rs/igrannonica/neuronstellar into redesign
Diffstat (limited to 'backend/microservice/api/newmlservice.py')
-rw-r--r--backend/microservice/api/newmlservice.py49
1 files changed, 41 insertions, 8 deletions
diff --git a/backend/microservice/api/newmlservice.py b/backend/microservice/api/newmlservice.py
index 604e4d3c..f74f8386 100644
--- a/backend/microservice/api/newmlservice.py
+++ b/backend/microservice/api/newmlservice.py
@@ -1,6 +1,7 @@
from enum import unique
from itertools import count
import os
+from sys import breakpointhook
import pandas as pd
from sklearn import datasets, multiclass
import tensorflow as tf
@@ -38,27 +39,38 @@ def returnColumnsInfo(dataset):
unique=datafront[kolona].value_counts()
uniquevalues=[]
uniquevaluescount=[]
+ uniquevaluespercent=[]
for val, count in unique.iteritems():
- uniquevalues.append(val)
- uniquevaluescount.append(count)
+ if(val):
+ uniquevalues.append(val)
+ uniquevaluescount.append(count)
+ percent=count/rowCount
+ uniquevaluespercent.append(percent)
#print(uniquevalues)
#print(uniquevaluescount)
mean=0
median=0
minimum=0
maximum=0
+ q1=0
+ q3=0
nullCount=datafront[kolona].isnull().sum()
if(nullCount>0):
allNullCols=allNullCols+1
- frontreturn={'columnName':kolona,
+ frontreturn={
+ 'columnName':kolona,
'isNumber':False,
'uniqueValues':uniquevalues,
'uniqueValuesCount':uniquevaluescount,
- 'median':float(mean),
- 'mean':float(median),
+ 'uniqueValuesPercent':uniquevaluespercent,
+ 'mean':float(mean),
+ 'median':float(median),
'numNulls':int(nullCount),
'min':float(minimum),
'max':float(maximum),
+ 'q1':float(q1),
+ 'q3':float(q3),
+
}
dict.append(frontreturn)
else:
@@ -66,18 +78,39 @@ def returnColumnsInfo(dataset):
maximum=max(datafront[kolona])
mean=datafront[kolona].mean()
median=s.median(datafront[kolona].copy().dropna())
+ q1= np.percentile(datafront[kolona].copy().dropna(), 25)
+ q3= np.percentile(datafront[kolona].copy().dropna(), 75)
nullCount=datafront[kolona].isnull().sum()
if(nullCount>0):
allNullCols=allNullCols+1
- frontreturn={'columnName':kolona,
+
+ #pretvaranje u kategorijsku
+ datafront = datafront.astype({kolona: str})
+ print(datafront.dtypes)
+ unique=datafront[kolona].value_counts()
+ uniquevaluesn=[]
+ uniquevaluescountn=[]
+ uniquevaluespercentn=[]
+ for val, count in unique.iteritems():
+ if(val):
+ uniquevaluesn.append(val)
+ uniquevaluescountn.append(count)
+ percent=count/rowCount
+ uniquevaluespercentn.append(percent)
+ frontreturn={
+ 'columnName':kolona,
'isNumber':1,
- 'uniqueValues':[],
- 'uniqueValuesCount':[],
+ 'uniqueValues':uniquevaluesn,
+ 'uniqueValuesCount':uniquevaluescountn,
+ 'uniqueValuesPercent':uniquevaluespercentn,
'mean':float(mean),
'median':float(median),
'numNulls':int(nullCount),
'min':float(minimum),
'max':float(maximum),
+ 'q1':float(q1),
+ 'q3':float(q3),
+
}
dict.append(frontreturn)
NullRows = datafront[datafront.isnull().any(axis=1)]