aboutsummaryrefslogtreecommitdiff
path: root/backend/microservice/api
diff options
context:
space:
mode:
authorTAMARA JERINIC <tamara.jerinic@gmail.com>2022-04-11 10:40:23 +0200
committerTAMARA JERINIC <tamara.jerinic@gmail.com>2022-04-11 10:41:04 +0200
commit65acafb6c148e270f557454fff69d96bd0c95456 (patch)
tree770967f2296f650477aec406be7af430618c607d /backend/microservice/api
parent9d22837d13af697e6650247a4d6b758e283fcfc1 (diff)
Dodati su podaci o broju redova i kolona.
Diffstat (limited to 'backend/microservice/api')
-rw-r--r--backend/microservice/api/controller.py7
-rw-r--r--backend/microservice/api/newmlservice.py12
2 files changed, 14 insertions, 5 deletions
diff --git a/backend/microservice/api/controller.py b/backend/microservice/api/controller.py
index ff803358..83741ce1 100644
--- a/backend/microservice/api/controller.py
+++ b/backend/microservice/api/controller.py
@@ -46,6 +46,11 @@ def returnColumnsInfo():
dataset = json.loads(request.form["dataset"])
file = request.files.get("file")
data=pd.read_csv(file)
+
+ #dataset={}
+ #f = request.json['filepath']
+ #data=pd.read_csv(f)
+
preprocess = newmlservice.returnColumnsInfo(data)
#samo 10 jedinstvenih posto ih ima previse, bilo bi dobro da promenimo ovo da to budu 10 najzastupljenijih vrednosti
for col in preprocess["columnInfo"]:
@@ -53,6 +58,8 @@ def returnColumnsInfo():
dataset["columnInfo"] = preprocess["columnInfo"]
dataset["nullCols"] = preprocess["allNullColl"]
dataset["nullRows"] = preprocess["allNullRows"]
+ dataset["colCount"] = preprocess["colCount"]
+ dataset["rowCount"] = preprocess["rowCount"]
dataset["isPreProcess"] = True
print(dataset)
return jsonify(dataset)
diff --git a/backend/microservice/api/newmlservice.py b/backend/microservice/api/newmlservice.py
index ea51b58e..77cc59d0 100644
--- a/backend/microservice/api/newmlservice.py
+++ b/backend/microservice/api/newmlservice.py
@@ -28,6 +28,9 @@ def returnColumnsInfo(dataset):
svekolone=datafront.columns
kategorijskekolone=datafront.select_dtypes(include=['object']).columns
allNullCols=0
+ rowCount=datafront.shape[0]#ukupan broj redova
+ colCount=len(datafront.columns)#ukupan broj kolona
+
for kolona in svekolone:
if(kolona in kategorijskekolone):
uniquevalues=datafront[kolona].unique()
@@ -45,7 +48,7 @@ def returnColumnsInfo(dataset):
'mean':float(median),
'numNulls':int(nullCount),
'min':float(minimum),
- 'max':float(maximum)
+ 'max':float(maximum),
}
dict.append(frontreturn)
else:
@@ -63,15 +66,14 @@ def returnColumnsInfo(dataset):
'median':float(median),
'numNulls':int(nullCount),
'min':float(minimum),
- 'max':float(maximum)
+ 'max':float(maximum),
}
dict.append(frontreturn)
NullRows = datafront[datafront.isnull().any(axis=1)]
#print(NullRows)
#print(len(NullRows))
allNullRows=len(NullRows)
-
- return {'columnInfo':dict,'allNullColl':int(allNullCols),'allNullRows':int(allNullRows)}
+ return {'columnInfo':dict,'allNullColl':int(allNullCols),'allNullRows':int(allNullRows),'rowCount':int(rowCount),'colCount':int(colCount)}
@dataclass
class TrainingResultClassification:
@@ -118,7 +120,7 @@ def train(dataset, params, callback):
null_values_replacers = params["nullValuesReplacers"]
if(null_value_options=='replace'):
- #print("replace null") # TODO
+ #print("replace null") #
dict=params['null_values_replacers']
while(len(dict)>0):
replace=dict.pop()