From ab2c4e59a7b187bf84cabeb03f5b9575de5fe0ee Mon Sep 17 00:00:00 2001 From: TAMARA JERINIC Date: Thu, 24 Mar 2022 15:17:50 +0100 Subject: Dodata mogučnost učitavanja i preprocesiranja fajla za predikciju, izmenjena je funkcija za obradu null vrednosti, izmenjen je api.py fajl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../microservice/PythonServer/project/api/api.py | 8 +- backend/microservice/mlservice.py | 196 +++++++++++++++------ 2 files changed, 147 insertions(+), 57 deletions(-) (limited to 'backend/microservice') diff --git a/backend/microservice/PythonServer/project/api/api.py b/backend/microservice/PythonServer/project/api/api.py index 75df26b8..4768f34c 100644 --- a/backend/microservice/PythonServer/project/api/api.py +++ b/backend/microservice/PythonServer/project/api/api.py @@ -9,7 +9,8 @@ import csv import json import mlservice import h5py -from mlservice import obuka +from mlservice2 import unositok + app = flask.Flask(__name__) app.config["DEBUG"] = True @@ -27,9 +28,12 @@ def data(): data1 = pd.read_csv(f) + d2=request.json['filepath2'] + data2=pd.read_csv(d2) + m=request.json['modelpath'] model=tf.keras.models.load_model(m) #print(data) - return obuka(data1,request.json,model) + return unositok(data1,data2,request.json,model) app.run() \ No newline at end of file diff --git a/backend/microservice/mlservice.py b/backend/microservice/mlservice.py index 9b6b3789..3385a18e 100644 --- a/backend/microservice/mlservice.py +++ b/backend/microservice/mlservice.py @@ -34,13 +34,14 @@ class fCallback(tf.keras.callbacks.Callback): def on_epoch_end(self, epoch, logs=None): print('Evaluation: ', self.model.evaluate(self.x_test,self.y_test),"\n")#broj parametara zavisi od izabranih metrika loss je default -def obuka(dataunos,params,modelunos): +def obuka(dataunos,params,modelunos,dataunosdrugog): import numpy as np import pandas as pd import tensorflow as tf import matplotlib.pyplot as plt import keras ### -1) Ucitavanje h5 modela PART3 + if(modelunos!=None): print("Model je unet") model=modelunos @@ -75,7 +76,7 @@ def obuka(dataunos,params,modelunos): ### 2)Proveravanje svih kolona za null vrednosti i popunjavanje medijanom ili srednjom vrednosti ili birisanje #####Part2 ##### - + ''' #brisanje=input("DA LI ZELITE DA IZBRSETE SVE KOLONE SA NULL VREDNOSTIMA? ") brisanje='ne' if(brisanje=='da'): @@ -108,39 +109,97 @@ def obuka(dataunos,params,modelunos): elif(tippodataka==np.object_): najcescavrednost=kolona.value_counts().index[0] data[kolone[i]]=data[kolone[i]].fillna(najcescavrednost) + ''' + + nullreplace=[ + {"column":"Embarked","value":"C","deleteRow":"0","deleteCol":"0"}, + {"column": "Cabin","value":"C123","deleteRow":"0","deleteCol":"0"}] + + + nullopt=params["NullValueOptions"] + + zamena=nullreplace + nulldf=pd.DataFrame(zamena) + nulldf=nulldf.transpose() + nredova=data.shape[0] + if(nullopt=='replace'): + + + p=0 + + while(1): + if(p in nulldf.columns): + print("3") + parametri=nulldf[p] + print(parametri) + #print(data[parametri['column']]) + col=parametri['column'] + print(col) + val=parametri['value'] + print(val) + if(data[col].isnull().any()): + + if(parametri['value']!='0'): + + print(data[parametri['column']]) + data[parametri['column']]=data[parametri['column']].fillna(val) + print(data[parametri['column']]) + elif(parametri['deleteRow']==1): + data=data.dropna(subset=[col]) + print("brisi") + + + elif(parametri['deleteCol']==1): + data.pop(col) + p+=1 + continue + else: + break + + elif(nullopt=='deleteRow'): + data=data.dropna() + + elif(nullopt=='deleteCol'): + data=data.dropna() + + print(data.isnull().any()) + + kolone=data.columns - + print("null done") ### 3)Izbacivanje kolona koje ne uticu na rezultat PART2 nredova=data.shape[0] + for i in range(len(kolone)): - if((data[kolone[i]].nunique()>(nredova/2)) and( data[kolone[i]].dtype==np.object_)): + if((data[kolone[i]].nunique()==(nredova)) and( data[kolone[i]].dtype==np.object_)): data.pop(kolone[i]) #print(data.head(10)) ### 4)izbor tipa enkodiranja kolone=data.columns ### Azuriranje postojecih kolona nakon moguceg brisanja - + #enc=input("UNETI TIP ENKODIRANJA ") enc=params["encoding"] - onehot=0 - + + ### 5)Enkodiranje svih kategorijskih promenjivih label-encode metodom - + if(enc=='label'): + from sklearn.preprocessing import LabelEncoder encoder=LabelEncoder() for k in range(len(kolone)): if(data[kolone[k]].dtype==np.object_): data[kolone[k]]=encoder.fit_transform(data[kolone[k]]) #print(data.head(20)) - + ### 6)Enkodiranje svih kategorijskih promenjivih onehot metodom elif(enc=='onehot'): - ### PART2### - onehot==1 + ### PART2 ### + kategorijskekolone=[] for k in range(len(kolone)): if(data[kolone[k]].dtype==np.object_): @@ -148,7 +207,7 @@ def obuka(dataunos,params,modelunos): kategorijskekolone.append(kolone[k]) ###U kategorijske kolone smestaju se nazivi svih kolona sa kategorijskim podacima #print(kategorijskekolone) - + ### Enkodiranje data=pd.get_dummies(data,columns=kategorijskekolone,prefix=kategorijskekolone) #print(data.head(10)) @@ -199,8 +258,10 @@ def obuka(dataunos,params,modelunos): x_test=scaler.transform(x_test) x_train=scaler.transform(x_train) + ### 9)CUVANJE IZLAZNIH PODATAKA PART3 + #####ZAVRSENA PRIPREMA PODATAKA##### - + #####OBUCAVANJE MODELA##### ### 9)Inicijalizacija vestacke neuronske mreze @@ -245,13 +306,7 @@ def obuka(dataunos,params,modelunos): metrike=params['metrics'] #metrike=[] lossf=params["lossFunction"] - ''' - while(1): - m=params['lossFunction'] - - if(m=='KRAJ'): - break - metrike.append(m)''' + classifier.compile(optimizer=optimizator, loss=lossf,metrics=metrike) performance_simple = fCallback(x_test, y_test) ### 14) @@ -260,7 +315,7 @@ def obuka(dataunos,params,modelunos): uzorci=params["batchSize"] epohe=params["epochs"] history=classifier.fit(x_train,y_train,batch_size=uzorci,epochs=epohe,callbacks=[performance_simple],validation_split=0.2) - + ### 14.1)Parametri grafika iz history PART2 metrikedf=pd.DataFrame() ###DataFrame u kom se nalaze podaci o rezultatima metrika za iscrtavanje na grafiku. Svaka kolona sadrzi vrednost metrike po epohama for i in range(len(metrike)): @@ -360,15 +415,16 @@ def obuka(dataunos,params,modelunos): plt.ylabel('True Positive Rate') plt.show() ''' - + + r=Response(float(tacnost),float(preciznost),float(recall),float(spec),float(f1),float(mse),float(mae),float(mape),float(rmse)) import jsonpickle return json.dumps(json.loads(jsonpickle.encode(r)), indent=2) - return "Done" +#####KRAJ OBUKE JEDNOG##### -##### UCITAVANJE DRUGOG SETA PODATAKA ##### PART3 +##### UCITAVANJE I OBUKA DRUGOG SETA PODATAKA ##### PART3 def ucitavanjeipreprocesiranjedrugog(dataunosdrugog,params): data2=dataunosdrugog.copy() @@ -383,38 +439,56 @@ def ucitavanjeipreprocesiranjedrugog(dataunosdrugog,params): kolone=data2.columns ### 3)NULL vrednosti - brisanje='ne' - if(brisanje=='da'): - data2=data2.dropna(axis=1) - elif(brisanje=='ne'): - # brisanjer=input("DA LI ZELITE DA IZBRISETE SVE REDOVE SA NULL VREDNOSTIMA? ") - brisanjer='ne' - if(brisanjer=='da'): - data2=data2.dropna() - elif(brisanjer=='ne'): - - for i in range(len(kolone)): - - if(data2[kolone[i]].isnull().any()): - tippodataka=data2[kolone[i]].dtype - kolona=data2[kolone[i]].copy() - - if(tippodataka==np.float64 or tippodataka==np.int64): - #popunjavanje=input("UNETI NACIN POPUNJAVANJA PROMENJIVIH SA NULL VREDNOSTIMA ") - popunjavanje='medijana' - if(popunjavanje=='medijana'): - medijana=kolona.mean() - data2[kolone[i]]=data2[kolone[i]].fillna(medijana) - if(popunjavanje=='srednjavrednost'): - sv=data2[kolone[i]].sum()/data2[kolone[i]].count() - data2[kolone[i]]=sv - if(popunjavanje=='brisanjekolone'): - data2=data2.dropna(axis=1) - - elif(tippodataka==np.object_): - najcescavrednost=kolona.value_counts().index[0] - data2[kolone[i]]=data2[kolone[i]].fillna(najcescavrednost) + nullreplace=[ + {"column":"Embarked","value":"C","deleteRow":"0","deleteCol":"0"}, + {"column": "Cabin","value":"C123","deleteRow":"0","deleteCol":"0"}] + + + nullopt=params["NullValueOptions"] + + zamena=nullreplace + + nulldf=pd.DataFrame(zamena) + nulldf=nulldf.transpose() + if(nullopt=='replace'): + + + p=0 + + while(1): + if(p in nulldf.columns): + print("3") + parametri=nulldf[p] + print(parametri) + #print(data[parametri['column']]) + col=parametri['column'] + print(col) + + if(data2[col].isnull().any()): + + #print(parametri['value']) + if(parametri['value']!=''): + data2[col]=data2[col].fillna(parametri["value"]) + + elif(parametri['deleteRow']==1): + data2=data2.dropna(subset=[col]) + print("brisi") + + + elif(parametri['deleteCol']==1): + data2.pop(col) + p+=1 + continue + else: + break + + elif(nullopt=='deleteRow'): + data2=data2.dropna() + + elif(nullopt=='deleteCol'): + data2=data2.dropna() + kolone=data2.columns ### 4)Enkodiranje @@ -454,3 +528,15 @@ def ucitavanjeipreprocesiranjedrugog(dataunosdrugog,params): xkolone.append(kolone[k]) x2=data2[xkolone].values() + print(x2) + return x2 + #####OBUCAVANJE MODELA##### + + +def unositok(dataunos,dataunosdrugi,params,model): + data=obuka(dataunos,params,model,dataunosdrugi) + return(data) + + + + \ No newline at end of file -- cgit v1.2.3