aboutsummaryrefslogtreecommitdiff
path: root/backend/microservice/api
diff options
context:
space:
mode:
Diffstat (limited to 'backend/microservice/api')
-rw-r--r--backend/microservice/api/controller.py18
-rw-r--r--backend/microservice/api/newmlservice.py231
2 files changed, 173 insertions, 76 deletions
diff --git a/backend/microservice/api/controller.py b/backend/microservice/api/controller.py
index 9b83b8e7..41035cc7 100644
--- a/backend/microservice/api/controller.py
+++ b/backend/microservice/api/controller.py
@@ -107,26 +107,32 @@ def predict():
@app.route('/preprocess',methods=['POST'])
def returnColumnsInfo():
print("********************************PREPROCESS*******************************")
+
dataset = json.loads(request.form["dataset"])
file = request.files.get("file")
data=pd.read_csv(file)
-
- #dataset={}
+ '''
#f = request.json['filepath']
#data=pd.read_csv(f)
-
+ dataset={}
+ '''
preprocess = newmlservice.returnColumnsInfo(data)
#samo 10 jedinstvenih posto ih ima previse, bilo bi dobro da promenimo ovo da to budu 10 najzastupljenijih vrednosti
+
for col in preprocess["columnInfo"]:
- col["uniqueValues"] = col["uniqueValues"][0:10]
- col["uniqueValuesCount"] = col["uniqueValuesCount"][0:10]
+ col["uniqueValues"] = col["uniqueValues"][0:6]
+ col["uniqueValuesCount"] = col["uniqueValuesCount"][0:6]
+ col['uniqueValuesPercent']=col['uniqueValuesPercent'][0:6]
dataset["columnInfo"] = preprocess["columnInfo"]
dataset["nullCols"] = preprocess["allNullColl"]
dataset["nullRows"] = preprocess["allNullRows"]
dataset["colCount"] = preprocess["colCount"]
dataset["rowCount"] = preprocess["rowCount"]
dataset["isPreProcess"] = True
- print(dataset)
+ #print(dataset)
+
+
+
return jsonify(dataset)
print("App loaded.")
diff --git a/backend/microservice/api/newmlservice.py b/backend/microservice/api/newmlservice.py
index 604e4d3c..9e26b03a 100644
--- a/backend/microservice/api/newmlservice.py
+++ b/backend/microservice/api/newmlservice.py
@@ -1,6 +1,7 @@
from enum import unique
from itertools import count
import os
+from sys import breakpointhook
import pandas as pd
from sklearn import datasets, multiclass
import tensorflow as tf
@@ -38,27 +39,38 @@ def returnColumnsInfo(dataset):
unique=datafront[kolona].value_counts()
uniquevalues=[]
uniquevaluescount=[]
+ uniquevaluespercent=[]
for val, count in unique.iteritems():
- uniquevalues.append(val)
- uniquevaluescount.append(count)
+ if(val):
+ uniquevalues.append(val)
+ uniquevaluescount.append(count)
+ percent=count/rowCount
+ uniquevaluespercent.append(percent)
#print(uniquevalues)
#print(uniquevaluescount)
mean=0
median=0
minimum=0
maximum=0
+ q1=0
+ q3=0
nullCount=datafront[kolona].isnull().sum()
if(nullCount>0):
allNullCols=allNullCols+1
- frontreturn={'columnName':kolona,
+ frontreturn={
+ 'columnName':kolona,
'isNumber':False,
'uniqueValues':uniquevalues,
'uniqueValuesCount':uniquevaluescount,
- 'median':float(mean),
- 'mean':float(median),
+ 'uniqueValuesPercent':uniquevaluespercent,
+ 'mean':float(mean),
+ 'median':float(median),
'numNulls':int(nullCount),
'min':float(minimum),
'max':float(maximum),
+ 'q1':float(q1),
+ 'q3':float(q3),
+
}
dict.append(frontreturn)
else:
@@ -66,18 +78,39 @@ def returnColumnsInfo(dataset):
maximum=max(datafront[kolona])
mean=datafront[kolona].mean()
median=s.median(datafront[kolona].copy().dropna())
+ q1= np.percentile(datafront[kolona].copy().dropna(), 25)
+ q3= np.percentile(datafront[kolona].copy().dropna(), 75)
nullCount=datafront[kolona].isnull().sum()
if(nullCount>0):
allNullCols=allNullCols+1
- frontreturn={'columnName':kolona,
+
+ #pretvaranje u kategorijsku
+ datafront = datafront.astype({kolona: str})
+ print(datafront.dtypes)
+ unique=datafront[kolona].value_counts()
+ uniquevaluesn=[]
+ uniquevaluescountn=[]
+ uniquevaluespercentn=[]
+ for val, count in unique.iteritems():
+ if(val):
+ uniquevaluesn.append(val)
+ uniquevaluescountn.append(count)
+ percent=count/rowCount
+ uniquevaluespercentn.append(percent)
+ frontreturn={
+ 'columnName':kolona,
'isNumber':1,
- 'uniqueValues':[],
- 'uniqueValuesCount':[],
+ 'uniqueValues':uniquevaluesn,
+ 'uniqueValuesCount':uniquevaluescountn,
+ 'uniqueValuesPercent':uniquevaluespercentn,
'mean':float(mean),
'median':float(median),
'numNulls':int(nullCount),
'min':float(minimum),
'max':float(maximum),
+ 'q1':float(q1),
+ 'q3':float(q3),
+
}
dict.append(frontreturn)
NullRows = datafront[datafront.isnull().any(axis=1)]
@@ -115,17 +148,28 @@ class TrainingResult:
'''
def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback):
+ ###UCITAVANJE SETA
problem_type = paramsModel["type"]
#print(problem_type)
data = pd.DataFrame()
#print(data)
for col in paramsExperiment["inputColumns"]:
#print(col)
- data[col]=dataset[col]
+ if(col!=paramsExperiment["outputColumn"]):
+ data[col]=dataset[col]
output_column = paramsExperiment["outputColumn"]
data[output_column] = dataset[output_column]
#print(data)
+ ###KATEGORIJSKE KOLONE
+ kategorijskekolone=[]
+ ###PRETVARANJE NUMERICKIH U KATREGORIJSKE AKO JE KORISNIK TAKO OZNACIO
+ columnInfo=paramsDataset['columnInfo']
+ for col in columnInfo:
+ if(col['columnType']=="Kategorijski"):
+ data[col['columnName']]=data[col['columnName']].apply(str)
+ kategorijskekolone.append(col['coumnName'])
+
###NULL
null_value_options = paramsExperiment["nullValues"]
null_values_replacers = paramsExperiment["nullValuesReplacers"]
@@ -149,16 +193,18 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback):
#
# Brisanje kolona koje ne uticu na rezultat
#
+ '''
num_rows=data.shape[0]
for col in data.columns:
if((data[col].nunique()==(num_rows)) and (data[col].dtype==np.object_)):
data.pop(col)
#
+ '''
### Enkodiranje
encodings=paramsExperiment["encodings"]
- datafront=dataset.copy()
- svekolone=datafront.columns
- kategorijskekolone=datafront.select_dtypes(include=['object']).columns
+ #datafront=dataset.copy()
+ #svekolone=datafront.columns
+ #kategorijskekolone=datafront.select_dtypes(include=['object']).columns
for kolonaEncoding in encodings:
kolona = kolonaEncoding["columnName"]
@@ -235,75 +281,75 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback):
#
#
###OPTIMIZATORI
- """
- if(params['optimizer']=='adam'):
- opt=tf.keras.optimizers.Adam(learning_rate=params['learningRate'])
+
+ if(paramsModel['optimizer']=='Adam'):
+ opt=tf.keras.optimizers.Adam(learning_rate=float(paramsModel['learningRate']))
- elif(params['optimizer']=='adadelta'):
- opt=tf.keras.optimizers.Adadelta(learning_rate=params['learningRate'])
+ elif(paramsModel['optimizer']=='Adadelta'):
+ opt=tf.keras.optimizers.Adadelta(learning_rate=float(paramsModel['learningRate']))
- elif(params['optimizer']=='adagrad'):
- opt=tf.keras.optimizers.Adagrad(learning_rate=params['learningRate'])
+ elif(paramsModel['optimizer']=='Adagrad'):
+ opt=tf.keras.optimizers.Adagrad(learning_rate=float(paramsModel['learningRate']))
- elif(params['optimizer']=='adamax'):
- opt=tf.keras.optimizers.Adamax(learning_rate=params['learningRate'])
+ elif(paramsModel['optimizer']=='Adamax'):
+ opt=tf.keras.optimizers.Adamax(learning_rate=float(paramsModel['learningRate']))
- elif(params['optimizer']=='nadam'):
- opt=tf.keras.optimizers.Nadam(learning_rate=params['learningRate'])
+ elif(paramsModel['optimizer']=='Nadam'):
+ opt=tf.keras.optimizers.Nadam(learning_rate=float(paramsModel['learningRate']))
- elif(params['optimizer']=='sgd'):
- opt=tf.keras.optimizers.SGD(learning_rate=params['learningRate'])
+ elif(paramsModel['optimizer']=='Sgd'):
+ opt=tf.keras.optimizers.SGD(learning_rate=float(paramsModel['learningRate']))
- elif(params['optimizer']=='ftrl'):
- opt=tf.keras.optimizers.Ftrl(learning_rate=params['learningRate'])
+ elif(paramsModel['optimizer']=='Ftrl'):
+ opt=tf.keras.optimizers.Ftrl(learning_rate=float(paramsModel['learningRate']))
- elif(params['optimizer']=='rmsprop'):
- opt=tf.keras.optimizers.RMSprop(learning_rate=params['learningRate'])
+ elif(paramsModel['optimizer']=='Rmsprop'):
+ opt=tf.keras.optimizers.RMSprop(learning_rate=float(paramsModel['learningRate']))
###REGULARIZACIJA
#regularisation={'kernelType':'l1 ili l2 ili l1_l2','kernelRate':default=0.01 ili jedna od vrednosti(0.0001,0.001,0.1,1,2,3) ili neka koju je korisnik zadao,'biasType':'','biasRate':'','activityType','activityRate'}
- reg=params['regularisation']
-
- ###Kernel
- if(reg['kernelType']=='l1'):
- kernelreg=tf.keras.regularizers.l1(reg['kernelRate'])
- elif(reg['kernelType']=='l2'):
- kernelreg=tf.keras.regularizers.l2(reg['kernelRate'])
- elif(reg['kernelType']=='l1l2'):
- kernelreg=tf.keras.regularizers.l1_l2(l1=reg['kernelRate'][0],l2=reg['kernelRate'][1])
-
- ###Bias
- if(reg['biasType']=='l1'):
- biasreg=tf.keras.regularizers.l1(reg['biasRate'])
- elif(reg['biasType']=='l2'):
- biasreg=tf.keras.regularizers.l2(reg['biasRate'])
- elif(reg['biasType']=='l1l2'):
- biasreg=tf.keras.regularizers.l1_l2(l1=reg['biasRate'][0],l2=reg['biasRate'][1])
-
- ###Activity
- if(reg['kernelType']=='l1'):
- activityreg=tf.keras.regularizers.l1(reg['activityRate'])
- elif(reg['kernelType']=='l2'):
- activityreg=tf.keras.regularizers.l2(reg['activityRate'])
- elif(reg['kernelType']=='l1l2'):
- activityreg=tf.keras.regularizers.l1_l2(l1=reg['activityRate'][0],l2=reg['activityRate'][1])
- """
+
+
filepath=os.path.join("temp/",paramsExperiment['_id']+"_"+paramsModel['_id']+".h5")
if(problem_type=='multi-klasifikacioni'):
#print('multi')
+
+ reg=paramsModel['regularisation'][0]
+ regRate=float(paramsModel['regularisationRate'][0])
+ if(reg=='l1'):
+ kernelreg=tf.keras.regularizers.l1(regRate)
+ biasreg=tf.keras.regularizers.l1(regRate)
+ activityreg=tf.keras.regularizers.l1(regRate)
+ elif(reg=='l2'):
+ kernelreg=tf.keras.regularizers.l2(regRate)
+ biasreg=tf.keras.regularizers.l2(regRate)
+ activityreg=tf.keras.regularizers.l2(regRate)
+
classifier=tf.keras.Sequential()
-
- classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][0],input_dim=x_train.shape[1]))#prvi skriveni + definisanje prethodnog-ulaznog
+ classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][0],input_dim=x_train.shape[1], kernel_regularizer=kernelreg, bias_regularizer=biasreg, activity_regularizer=activityreg))#prvi skriveni + definisanje prethodnog-ulaznog
+
for i in range(paramsModel['hiddenLayers']-1):#ako postoji vise od jednog skrivenog sloja
- #print(i)
- classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][i+1]))#i-ti skriveni sloj
+ ###Kernel
+ reg=paramsModel['regularisation'][i+1]
+ regRate=float(paramsModel['regularisationRate'][i+1])
+ if(reg=='l1'):
+ kernelreg=tf.keras.regularizers.l1(regRate)
+ biasreg=tf.keras.regularizers.l1(regRate)
+ activityreg=tf.keras.regularizers.l1(regRate)
+ elif(reg=='l2'):
+ kernelreg=tf.keras.regularizers.l2(regRate)
+ biasreg=tf.keras.regularizers.l2(regRate)
+ activityreg=tf.keras.regularizers.l2(regRate)
+
+ classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][i+1],kernel_regularizer=kernelreg, bias_regularizer=biasreg, activity_regularizer=activityreg))#i-ti skriveni sloj
+
classifier.add(tf.keras.layers.Dense(units=5, activation=paramsModel['outputLayerActivationFunction']))#izlazni sloj
- classifier.compile(loss =paramsModel["lossFunction"] , optimizer = paramsModel['optimizer'] , metrics =paramsModel['metrics'])
+ classifier.compile(loss =paramsModel["lossFunction"] , optimizer = opt, metrics =paramsModel['metrics'])
- history=classifier.fit(x_train, y_train, epochs = paramsModel['epochs'],batch_size=paramsModel['batchSize'],callbacks=callback(x_test, y_test,paramsModel['_id']))
+ history=classifier.fit(x_train, y_train, epochs = paramsModel['epochs'],batch_size=float(paramsModel['batchSize']),callbacks=callback(x_test, y_test,paramsModel['_id']))
hist=history.history
#plt.plot(hist['accuracy'])
@@ -325,17 +371,39 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback):
elif(problem_type=='binarni-klasifikacioni'):
#print('*************************************************************************binarni')
+ reg=paramsModel['regularisation'][0]
+ regRate=float(paramsModel['regularisationRate'][0])
+ if(reg=='l1'):
+ kernelreg=tf.keras.regularizers.l1(regRate)
+ biasreg=tf.keras.regularizers.l1(regRate)
+ activityreg=tf.keras.regularizers.l1(regRate)
+ elif(reg=='l2'):
+ kernelreg=tf.keras.regularizers.l2(regRate)
+ biasreg=tf.keras.regularizers.l2(regRate)
+ activityreg=tf.keras.regularizers.l2(regRate)
classifier=tf.keras.Sequential()
- classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][0],input_dim=x_train.shape[1]))#prvi skriveni + definisanje prethodnog-ulaznog
+ classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][0],input_dim=x_train.shape[1],kernel_regularizer=kernelreg, bias_regularizer=biasreg, activity_regularizer=activityreg))#prvi skriveni + definisanje prethodnog-ulaznog
+
for i in range(paramsModel['hiddenLayers']-1):#ako postoji vise od jednog skrivenog sloja
#print(i)
- classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][i+1]))#i-ti skriveni sloj
+ reg=paramsModel['regularisation'][i+1]
+ regRate=float(paramsModel['regularisationRate'][i+1])
+ if(reg=='l1'):
+ kernelreg=tf.keras.regularizers.l1(regRate)
+ biasreg=tf.keras.regularizers.l1(regRate)
+ activityreg=tf.keras.regularizers.l1(regRate)
+ elif(reg=='l2'):
+ kernelreg=tf.keras.regularizers.l2(regRate)
+ biasreg=tf.keras.regularizers.l2(regRate)
+ activityreg=tf.keras.regularizers.l2(regRate)
+ classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][i+1],kernel_regularizer=kernelreg, bias_regularizer=biasreg, activity_regularizer=activityreg))#i-ti skriveni sloj
+
classifier.add(tf.keras.layers.Dense(units=1, activation=paramsModel['outputLayerActivationFunction']))#izlazni sloj
- classifier.compile(loss =paramsModel["lossFunction"] , optimizer = paramsModel['optimizer'] , metrics =paramsModel['metrics'])
+ classifier.compile(loss =paramsModel["lossFunction"] , optimizer = opt , metrics =paramsModel['metrics'])
- history=classifier.fit(x_train, y_train, epochs = paramsModel['epochs'],batch_size=paramsModel['batchSize'],callbacks=callback(x_test, y_test,paramsModel['_id']))
+ history=classifier.fit(x_train, y_train, epochs = paramsModel['epochs'],batch_size=float(paramsModel['batchSize']),callbacks=callback(x_test, y_test,paramsModel['_id']))
hist=history.history
y_pred=classifier.predict(x_test)
y_pred=(y_pred>=0.5).astype('int')
@@ -351,17 +419,40 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback):
return filepath,hist
elif(problem_type=='regresioni'):
+ reg=paramsModel['regularisation'][0]
+ regRate=float(paramsModel['regularisationRate'][0])
+ if(reg=='l1'):
+ kernelreg=tf.keras.regularizers.l1(regRate)
+ biasreg=tf.keras.regularizers.l1(regRate)
+ activityreg=tf.keras.regularizers.l1(regRate)
+ elif(reg=='l2'):
+ kernelreg=tf.keras.regularizers.l2(regRate)
+ biasreg=tf.keras.regularizers.l2(regRate)
+ activityreg=tf.keras.regularizers.l2(regRate)
classifier=tf.keras.Sequential()
- classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][0],input_dim=x_train.shape[1]))#prvi skriveni + definisanje prethodnog-ulaznog
+ classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][0],input_dim=x_train.shape[1],kernel_regularizer=kernelreg, bias_regularizer=biasreg, activity_regularizer=activityreg))#prvi skriveni + definisanje prethodnog-ulaznog
+
for i in range(paramsModel['hiddenLayers']-1):#ako postoji vise od jednog skrivenog sloja
#print(i)
- classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][i+1]))#i-ti skriveni sloj
- classifier.add(tf.keras.layers.Dense(units=1))
+ reg=paramsModel['regularisation'][i+1]
+ regRate=float(paramsModel['regularisationRate'][i+1])
+ if(reg=='l1'):
+ kernelreg=tf.keras.regularizers.l1(regRate)
+ biasreg=tf.keras.regularizers.l1(regRate)
+ activityreg=tf.keras.regularizers.l1(regRate)
+ elif(reg=='l2'):
+ kernelreg=tf.keras.regularizers.l2(regRate)
+ biasreg=tf.keras.regularizers.l2(regRate)
+ activityreg=tf.keras.regularizers.l2(regRate)
+
+ classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][i+1],kernel_regularizer=kernelreg, bias_regularizer=biasreg, activity_regularizer=activityreg))#i-ti skriveni sloj
+
+ classifier.add(tf.keras.layers.Dense(units=1),activation=paramsModel['outputLayerActivationFunction'])
- classifier.compile(loss =paramsModel["lossFunction"] , optimizer = paramsModel['optimizer'] , metrics =paramsModel['metrics'])
+ classifier.compile(loss =paramsModel["lossFunction"] , optimizer = opt , metrics =paramsModel['metrics'])
- history=classifier.fit(x_train, y_train, epochs = paramsModel['epochs'],batch_size=paramsModel['batchSize'],callbacks=callback(x_test, y_test,paramsModel['_id']))
+ history=classifier.fit(x_train, y_train, epochs = paramsModel['epochs'],batch_size=float(paramsModel['batchSize']),callbacks=callback(x_test, y_test,paramsModel['_id']))
hist=history.history
y_pred=classifier.predict(x_test)
#print(classifier.evaluate(x_test, y_test))