diff options
Diffstat (limited to 'backend/microservice')
-rw-r--r-- | backend/microservice/api/controller.py | 7 | ||||
-rw-r--r-- | backend/microservice/api/newmlservice.py | 12 |
2 files changed, 14 insertions, 5 deletions
diff --git a/backend/microservice/api/controller.py b/backend/microservice/api/controller.py index ff803358..83741ce1 100644 --- a/backend/microservice/api/controller.py +++ b/backend/microservice/api/controller.py @@ -46,6 +46,11 @@ def returnColumnsInfo(): dataset = json.loads(request.form["dataset"]) file = request.files.get("file") data=pd.read_csv(file) + + #dataset={} + #f = request.json['filepath'] + #data=pd.read_csv(f) + preprocess = newmlservice.returnColumnsInfo(data) #samo 10 jedinstvenih posto ih ima previse, bilo bi dobro da promenimo ovo da to budu 10 najzastupljenijih vrednosti for col in preprocess["columnInfo"]: @@ -53,6 +58,8 @@ def returnColumnsInfo(): dataset["columnInfo"] = preprocess["columnInfo"] dataset["nullCols"] = preprocess["allNullColl"] dataset["nullRows"] = preprocess["allNullRows"] + dataset["colCount"] = preprocess["colCount"] + dataset["rowCount"] = preprocess["rowCount"] dataset["isPreProcess"] = True print(dataset) return jsonify(dataset) diff --git a/backend/microservice/api/newmlservice.py b/backend/microservice/api/newmlservice.py index ea51b58e..77cc59d0 100644 --- a/backend/microservice/api/newmlservice.py +++ b/backend/microservice/api/newmlservice.py @@ -28,6 +28,9 @@ def returnColumnsInfo(dataset): svekolone=datafront.columns kategorijskekolone=datafront.select_dtypes(include=['object']).columns allNullCols=0 + rowCount=datafront.shape[0]#ukupan broj redova + colCount=len(datafront.columns)#ukupan broj kolona + for kolona in svekolone: if(kolona in kategorijskekolone): uniquevalues=datafront[kolona].unique() @@ -45,7 +48,7 @@ def returnColumnsInfo(dataset): 'mean':float(median), 'numNulls':int(nullCount), 'min':float(minimum), - 'max':float(maximum) + 'max':float(maximum), } dict.append(frontreturn) else: @@ -63,15 +66,14 @@ def returnColumnsInfo(dataset): 'median':float(median), 'numNulls':int(nullCount), 'min':float(minimum), - 'max':float(maximum) + 'max':float(maximum), } dict.append(frontreturn) NullRows = datafront[datafront.isnull().any(axis=1)] #print(NullRows) #print(len(NullRows)) allNullRows=len(NullRows) - - return {'columnInfo':dict,'allNullColl':int(allNullCols),'allNullRows':int(allNullRows)} + return {'columnInfo':dict,'allNullColl':int(allNullCols),'allNullRows':int(allNullRows),'rowCount':int(rowCount),'colCount':int(colCount)} @dataclass class TrainingResultClassification: @@ -118,7 +120,7 @@ def train(dataset, params, callback): null_values_replacers = params["nullValuesReplacers"] if(null_value_options=='replace'): - #print("replace null") # TODO + #print("replace null") # dict=params['null_values_replacers'] while(len(dict)>0): replace=dict.pop() |