aboutsummaryrefslogtreecommitdiff
path: root/backend/microservice
diff options
context:
space:
mode:
Diffstat (limited to 'backend/microservice')
-rw-r--r--backend/microservice/PythonServer/project/api/api.py8
-rw-r--r--backend/microservice/mlservice.py196
2 files changed, 147 insertions, 57 deletions
diff --git a/backend/microservice/PythonServer/project/api/api.py b/backend/microservice/PythonServer/project/api/api.py
index 75df26b8..4768f34c 100644
--- a/backend/microservice/PythonServer/project/api/api.py
+++ b/backend/microservice/PythonServer/project/api/api.py
@@ -9,7 +9,8 @@ import csv
import json
import mlservice
import h5py
-from mlservice import obuka
+from mlservice2 import unositok
+
app = flask.Flask(__name__)
app.config["DEBUG"] = True
@@ -27,9 +28,12 @@ def data():
data1 = pd.read_csv(f)
+ d2=request.json['filepath2']
+ data2=pd.read_csv(d2)
+
m=request.json['modelpath']
model=tf.keras.models.load_model(m)
#print(data)
- return obuka(data1,request.json,model)
+ return unositok(data1,data2,request.json,model)
app.run() \ No newline at end of file
diff --git a/backend/microservice/mlservice.py b/backend/microservice/mlservice.py
index 9b6b3789..3385a18e 100644
--- a/backend/microservice/mlservice.py
+++ b/backend/microservice/mlservice.py
@@ -34,13 +34,14 @@ class fCallback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs=None):
print('Evaluation: ', self.model.evaluate(self.x_test,self.y_test),"\n")#broj parametara zavisi od izabranih metrika loss je default
-def obuka(dataunos,params,modelunos):
+def obuka(dataunos,params,modelunos,dataunosdrugog):
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import keras
### -1) Ucitavanje h5 modela PART3
+
if(modelunos!=None):
print("Model je unet")
model=modelunos
@@ -75,7 +76,7 @@ def obuka(dataunos,params,modelunos):
### 2)Proveravanje svih kolona za null vrednosti i popunjavanje medijanom ili srednjom vrednosti ili birisanje
#####Part2 #####
-
+ '''
#brisanje=input("DA LI ZELITE DA IZBRSETE SVE KOLONE SA NULL VREDNOSTIMA? ")
brisanje='ne'
if(brisanje=='da'):
@@ -108,39 +109,97 @@ def obuka(dataunos,params,modelunos):
elif(tippodataka==np.object_):
najcescavrednost=kolona.value_counts().index[0]
data[kolone[i]]=data[kolone[i]].fillna(najcescavrednost)
+ '''
+
+ nullreplace=[
+ {"column":"Embarked","value":"C","deleteRow":"0","deleteCol":"0"},
+ {"column": "Cabin","value":"C123","deleteRow":"0","deleteCol":"0"}]
+
+
+ nullopt=params["NullValueOptions"]
+
+ zamena=nullreplace
+ nulldf=pd.DataFrame(zamena)
+ nulldf=nulldf.transpose()
+ nredova=data.shape[0]
+ if(nullopt=='replace'):
+
+
+ p=0
+
+ while(1):
+ if(p in nulldf.columns):
+ print("3")
+ parametri=nulldf[p]
+ print(parametri)
+ #print(data[parametri['column']])
+ col=parametri['column']
+ print(col)
+ val=parametri['value']
+ print(val)
+ if(data[col].isnull().any()):
+
+ if(parametri['value']!='0'):
+
+ print(data[parametri['column']])
+ data[parametri['column']]=data[parametri['column']].fillna(val)
+ print(data[parametri['column']])
+ elif(parametri['deleteRow']==1):
+ data=data.dropna(subset=[col])
+ print("brisi")
+
+
+ elif(parametri['deleteCol']==1):
+ data.pop(col)
+ p+=1
+ continue
+ else:
+ break
+
+ elif(nullopt=='deleteRow'):
+ data=data.dropna()
+
+ elif(nullopt=='deleteCol'):
+ data=data.dropna()
+
+ print(data.isnull().any())
+
+
kolone=data.columns
-
+ print("null done")
### 3)Izbacivanje kolona koje ne uticu na rezultat PART2
nredova=data.shape[0]
+
for i in range(len(kolone)):
- if((data[kolone[i]].nunique()>(nredova/2)) and( data[kolone[i]].dtype==np.object_)):
+ if((data[kolone[i]].nunique()==(nredova)) and( data[kolone[i]].dtype==np.object_)):
data.pop(kolone[i])
#print(data.head(10))
### 4)izbor tipa enkodiranja
kolone=data.columns ### Azuriranje postojecih kolona nakon moguceg brisanja
-
+
#enc=input("UNETI TIP ENKODIRANJA ")
enc=params["encoding"]
- onehot=0
-
+
+
### 5)Enkodiranje svih kategorijskih promenjivih label-encode metodom
-
+
if(enc=='label'):
+
from sklearn.preprocessing import LabelEncoder
encoder=LabelEncoder()
for k in range(len(kolone)):
if(data[kolone[k]].dtype==np.object_):
data[kolone[k]]=encoder.fit_transform(data[kolone[k]])
#print(data.head(20))
-
+
### 6)Enkodiranje svih kategorijskih promenjivih onehot metodom
elif(enc=='onehot'):
- ### PART2###
- onehot==1
+ ### PART2 ###
+
kategorijskekolone=[]
for k in range(len(kolone)):
if(data[kolone[k]].dtype==np.object_):
@@ -148,7 +207,7 @@ def obuka(dataunos,params,modelunos):
kategorijskekolone.append(kolone[k]) ###U kategorijske kolone smestaju se nazivi svih kolona sa kategorijskim podacima
#print(kategorijskekolone)
-
+
### Enkodiranje
data=pd.get_dummies(data,columns=kategorijskekolone,prefix=kategorijskekolone)
#print(data.head(10))
@@ -199,8 +258,10 @@ def obuka(dataunos,params,modelunos):
x_test=scaler.transform(x_test)
x_train=scaler.transform(x_train)
+ ### 9)CUVANJE IZLAZNIH PODATAKA PART3
+
#####ZAVRSENA PRIPREMA PODATAKA#####
-
+
#####OBUCAVANJE MODELA#####
### 9)Inicijalizacija vestacke neuronske mreze
@@ -245,13 +306,7 @@ def obuka(dataunos,params,modelunos):
metrike=params['metrics']
#metrike=[]
lossf=params["lossFunction"]
- '''
- while(1):
- m=params['lossFunction']
-
- if(m=='KRAJ'):
- break
- metrike.append(m)'''
+
classifier.compile(optimizer=optimizator, loss=lossf,metrics=metrike)
performance_simple = fCallback(x_test, y_test)
### 14)
@@ -260,7 +315,7 @@ def obuka(dataunos,params,modelunos):
uzorci=params["batchSize"]
epohe=params["epochs"]
history=classifier.fit(x_train,y_train,batch_size=uzorci,epochs=epohe,callbacks=[performance_simple],validation_split=0.2)
-
+
### 14.1)Parametri grafika iz history PART2
metrikedf=pd.DataFrame() ###DataFrame u kom se nalaze podaci o rezultatima metrika za iscrtavanje na grafiku. Svaka kolona sadrzi vrednost metrike po epohama
for i in range(len(metrike)):
@@ -360,15 +415,16 @@ def obuka(dataunos,params,modelunos):
plt.ylabel('True Positive Rate')
plt.show()
'''
-
+
+
r=Response(float(tacnost),float(preciznost),float(recall),float(spec),float(f1),float(mse),float(mae),float(mape),float(rmse))
import jsonpickle
return json.dumps(json.loads(jsonpickle.encode(r)), indent=2)
- return "Done"
+#####KRAJ OBUKE JEDNOG#####
-##### UCITAVANJE DRUGOG SETA PODATAKA ##### PART3
+##### UCITAVANJE I OBUKA DRUGOG SETA PODATAKA ##### PART3
def ucitavanjeipreprocesiranjedrugog(dataunosdrugog,params):
data2=dataunosdrugog.copy()
@@ -383,38 +439,56 @@ def ucitavanjeipreprocesiranjedrugog(dataunosdrugog,params):
kolone=data2.columns
### 3)NULL vrednosti
- brisanje='ne'
- if(brisanje=='da'):
- data2=data2.dropna(axis=1)
- elif(brisanje=='ne'):
- # brisanjer=input("DA LI ZELITE DA IZBRISETE SVE REDOVE SA NULL VREDNOSTIMA? ")
- brisanjer='ne'
- if(brisanjer=='da'):
- data2=data2.dropna()
- elif(brisanjer=='ne'):
-
- for i in range(len(kolone)):
-
- if(data2[kolone[i]].isnull().any()):
- tippodataka=data2[kolone[i]].dtype
- kolona=data2[kolone[i]].copy()
-
- if(tippodataka==np.float64 or tippodataka==np.int64):
- #popunjavanje=input("UNETI NACIN POPUNJAVANJA PROMENJIVIH SA NULL VREDNOSTIMA ")
- popunjavanje='medijana'
- if(popunjavanje=='medijana'):
- medijana=kolona.mean()
- data2[kolone[i]]=data2[kolone[i]].fillna(medijana)
- if(popunjavanje=='srednjavrednost'):
- sv=data2[kolone[i]].sum()/data2[kolone[i]].count()
- data2[kolone[i]]=sv
- if(popunjavanje=='brisanjekolone'):
- data2=data2.dropna(axis=1)
-
- elif(tippodataka==np.object_):
- najcescavrednost=kolona.value_counts().index[0]
- data2[kolone[i]]=data2[kolone[i]].fillna(najcescavrednost)
+ nullreplace=[
+ {"column":"Embarked","value":"C","deleteRow":"0","deleteCol":"0"},
+ {"column": "Cabin","value":"C123","deleteRow":"0","deleteCol":"0"}]
+
+
+ nullopt=params["NullValueOptions"]
+
+ zamena=nullreplace
+
+ nulldf=pd.DataFrame(zamena)
+ nulldf=nulldf.transpose()
+ if(nullopt=='replace'):
+
+
+ p=0
+
+ while(1):
+ if(p in nulldf.columns):
+ print("3")
+ parametri=nulldf[p]
+ print(parametri)
+ #print(data[parametri['column']])
+ col=parametri['column']
+ print(col)
+
+ if(data2[col].isnull().any()):
+
+ #print(parametri['value'])
+ if(parametri['value']!=''):
+ data2[col]=data2[col].fillna(parametri["value"])
+
+ elif(parametri['deleteRow']==1):
+ data2=data2.dropna(subset=[col])
+ print("brisi")
+
+
+ elif(parametri['deleteCol']==1):
+ data2.pop(col)
+ p+=1
+ continue
+ else:
+ break
+
+ elif(nullopt=='deleteRow'):
+ data2=data2.dropna()
+
+ elif(nullopt=='deleteCol'):
+ data2=data2.dropna()
+
kolone=data2.columns
### 4)Enkodiranje
@@ -454,3 +528,15 @@ def ucitavanjeipreprocesiranjedrugog(dataunosdrugog,params):
xkolone.append(kolone[k])
x2=data2[xkolone].values()
+ print(x2)
+ return x2
+ #####OBUCAVANJE MODELA#####
+
+
+def unositok(dataunos,dataunosdrugi,params,model):
+ data=obuka(dataunos,params,model,dataunosdrugi)
+ return(data)
+
+
+
+ \ No newline at end of file