From 61d8c3e8a88d0787f34b03fcd4fe2b533c571e1b Mon Sep 17 00:00:00 2001 From: TAMARA JERINIC Date: Mon, 2 May 2022 00:56:32 +0200 Subject: Dodato računanje novih statističkih podataka u mlservice fajl, usklađeno sa frontom. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/microservice/api/controller.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'backend/microservice/api/controller.py') diff --git a/backend/microservice/api/controller.py b/backend/microservice/api/controller.py index 9b83b8e7..fad6e181 100644 --- a/backend/microservice/api/controller.py +++ b/backend/microservice/api/controller.py @@ -107,26 +107,32 @@ def predict(): @app.route('/preprocess',methods=['POST']) def returnColumnsInfo(): print("********************************PREPROCESS*******************************") + dataset = json.loads(request.form["dataset"]) file = request.files.get("file") data=pd.read_csv(file) - - #dataset={} + ''' #f = request.json['filepath'] #data=pd.read_csv(f) - + dataset={} + ''' preprocess = newmlservice.returnColumnsInfo(data) #samo 10 jedinstvenih posto ih ima previse, bilo bi dobro da promenimo ovo da to budu 10 najzastupljenijih vrednosti + for col in preprocess["columnInfo"]: - col["uniqueValues"] = col["uniqueValues"][0:10] - col["uniqueValuesCount"] = col["uniqueValuesCount"][0:10] + col["uniqueValues"] = col["uniqueValues"][0:5] + col["uniqueValuesCount"] = col["uniqueValuesCount"][0:5] + col['uniqueValuesPercent']=col['uniqueValuesPercent'][0:5] dataset["columnInfo"] = preprocess["columnInfo"] dataset["nullCols"] = preprocess["allNullColl"] dataset["nullRows"] = preprocess["allNullRows"] dataset["colCount"] = preprocess["colCount"] dataset["rowCount"] = preprocess["rowCount"] dataset["isPreProcess"] = True - print(dataset) + #print(dataset) + + + return jsonify(dataset) print("App loaded.") -- cgit v1.2.3 From f32ec4fe8ae54f2154fa3393040a07cdb579b07f Mon Sep 17 00:00:00 2001 From: TAMARA JERINIC Date: Tue, 3 May 2022 21:28:57 +0200 Subject: Usklađen izbor kategorijskih kolona na frontu sa ml servisom. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/microservice/api/controller.py | 6 +++--- backend/microservice/api/newmlservice.py | 16 ++++++++++++++-- 2 files changed, 17 insertions(+), 5 deletions(-) (limited to 'backend/microservice/api/controller.py') diff --git a/backend/microservice/api/controller.py b/backend/microservice/api/controller.py index fad6e181..41035cc7 100644 --- a/backend/microservice/api/controller.py +++ b/backend/microservice/api/controller.py @@ -120,9 +120,9 @@ def returnColumnsInfo(): #samo 10 jedinstvenih posto ih ima previse, bilo bi dobro da promenimo ovo da to budu 10 najzastupljenijih vrednosti for col in preprocess["columnInfo"]: - col["uniqueValues"] = col["uniqueValues"][0:5] - col["uniqueValuesCount"] = col["uniqueValuesCount"][0:5] - col['uniqueValuesPercent']=col['uniqueValuesPercent'][0:5] + col["uniqueValues"] = col["uniqueValues"][0:6] + col["uniqueValuesCount"] = col["uniqueValuesCount"][0:6] + col['uniqueValuesPercent']=col['uniqueValuesPercent'][0:6] dataset["columnInfo"] = preprocess["columnInfo"] dataset["nullCols"] = preprocess["allNullColl"] dataset["nullRows"] = preprocess["allNullRows"] diff --git a/backend/microservice/api/newmlservice.py b/backend/microservice/api/newmlservice.py index 647c3b79..631837e5 100644 --- a/backend/microservice/api/newmlservice.py +++ b/backend/microservice/api/newmlservice.py @@ -148,6 +148,7 @@ class TrainingResult: ''' def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback): + ###UCITAVANJE SETA problem_type = paramsModel["type"] #print(problem_type) data = pd.DataFrame() @@ -159,6 +160,15 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback): data[output_column] = dataset[output_column] #print(data) + ###KATEGORIJSKE KOLONE + kategorijskekolone=[] + ###PRETVARANJE NUMERICKIH U KATREGORIJSKE AKO JE KORISNIK TAKO OZNACIO + columnInfo=paramsDataset['columnInfo'] + for col in columnInfo: + if(col['columnType']=="Kategorijski"): + data[col['columnName']]=data[col['columnName']].apply(str) + kategorijskekolone.append(col['coumnName']) + ###NULL null_value_options = paramsExperiment["nullValues"] null_values_replacers = paramsExperiment["nullValuesReplacers"] @@ -182,16 +192,18 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback): # # Brisanje kolona koje ne uticu na rezultat # + ''' num_rows=data.shape[0] for col in data.columns: if((data[col].nunique()==(num_rows)) and (data[col].dtype==np.object_)): data.pop(col) # + ''' ### Enkodiranje encodings=paramsExperiment["encodings"] datafront=dataset.copy() - svekolone=datafront.columns - kategorijskekolone=datafront.select_dtypes(include=['object']).columns + #svekolone=datafront.columns + #kategorijskekolone=datafront.select_dtypes(include=['object']).columns for kolonaEncoding in encodings: kolona = kolonaEncoding["columnName"] -- cgit v1.2.3