diff options
author | Ognjen Cirkovic <ciraboxkg@gmail.com> | 2022-05-04 20:29:40 +0200 |
---|---|---|
committer | Ognjen Cirkovic <ciraboxkg@gmail.com> | 2022-05-04 20:29:40 +0200 |
commit | 2c852c832bf310feeb6045380a533bb4f832ccfd (patch) | |
tree | 6fa81c0c45a4e0e2550e10371050c07b32a74dd0 /backend/microservice | |
parent | 45c519d53fee1124c2882c7b353cd930fd311f9e (diff) | |
parent | 3ac7a37690765d6c116463dc8a6ef08b18afea50 (diff) |
Mergovana grana sa granom redesign. Sredjeni konflikti.
Diffstat (limited to 'backend/microservice')
-rw-r--r-- | backend/microservice/api/controller.py | 18 | ||||
-rw-r--r-- | backend/microservice/api/newmlservice.py | 231 |
2 files changed, 173 insertions, 76 deletions
diff --git a/backend/microservice/api/controller.py b/backend/microservice/api/controller.py index 9b83b8e7..41035cc7 100644 --- a/backend/microservice/api/controller.py +++ b/backend/microservice/api/controller.py @@ -107,26 +107,32 @@ def predict(): @app.route('/preprocess',methods=['POST']) def returnColumnsInfo(): print("********************************PREPROCESS*******************************") + dataset = json.loads(request.form["dataset"]) file = request.files.get("file") data=pd.read_csv(file) - - #dataset={} + ''' #f = request.json['filepath'] #data=pd.read_csv(f) - + dataset={} + ''' preprocess = newmlservice.returnColumnsInfo(data) #samo 10 jedinstvenih posto ih ima previse, bilo bi dobro da promenimo ovo da to budu 10 najzastupljenijih vrednosti + for col in preprocess["columnInfo"]: - col["uniqueValues"] = col["uniqueValues"][0:10] - col["uniqueValuesCount"] = col["uniqueValuesCount"][0:10] + col["uniqueValues"] = col["uniqueValues"][0:6] + col["uniqueValuesCount"] = col["uniqueValuesCount"][0:6] + col['uniqueValuesPercent']=col['uniqueValuesPercent'][0:6] dataset["columnInfo"] = preprocess["columnInfo"] dataset["nullCols"] = preprocess["allNullColl"] dataset["nullRows"] = preprocess["allNullRows"] dataset["colCount"] = preprocess["colCount"] dataset["rowCount"] = preprocess["rowCount"] dataset["isPreProcess"] = True - print(dataset) + #print(dataset) + + + return jsonify(dataset) print("App loaded.") diff --git a/backend/microservice/api/newmlservice.py b/backend/microservice/api/newmlservice.py index 604e4d3c..9e26b03a 100644 --- a/backend/microservice/api/newmlservice.py +++ b/backend/microservice/api/newmlservice.py @@ -1,6 +1,7 @@ from enum import unique from itertools import count import os +from sys import breakpointhook import pandas as pd from sklearn import datasets, multiclass import tensorflow as tf @@ -38,27 +39,38 @@ def returnColumnsInfo(dataset): unique=datafront[kolona].value_counts() uniquevalues=[] uniquevaluescount=[] + uniquevaluespercent=[] for val, count in unique.iteritems(): - uniquevalues.append(val) - uniquevaluescount.append(count) + if(val): + uniquevalues.append(val) + uniquevaluescount.append(count) + percent=count/rowCount + uniquevaluespercent.append(percent) #print(uniquevalues) #print(uniquevaluescount) mean=0 median=0 minimum=0 maximum=0 + q1=0 + q3=0 nullCount=datafront[kolona].isnull().sum() if(nullCount>0): allNullCols=allNullCols+1 - frontreturn={'columnName':kolona, + frontreturn={ + 'columnName':kolona, 'isNumber':False, 'uniqueValues':uniquevalues, 'uniqueValuesCount':uniquevaluescount, - 'median':float(mean), - 'mean':float(median), + 'uniqueValuesPercent':uniquevaluespercent, + 'mean':float(mean), + 'median':float(median), 'numNulls':int(nullCount), 'min':float(minimum), 'max':float(maximum), + 'q1':float(q1), + 'q3':float(q3), + } dict.append(frontreturn) else: @@ -66,18 +78,39 @@ def returnColumnsInfo(dataset): maximum=max(datafront[kolona]) mean=datafront[kolona].mean() median=s.median(datafront[kolona].copy().dropna()) + q1= np.percentile(datafront[kolona].copy().dropna(), 25) + q3= np.percentile(datafront[kolona].copy().dropna(), 75) nullCount=datafront[kolona].isnull().sum() if(nullCount>0): allNullCols=allNullCols+1 - frontreturn={'columnName':kolona, + + #pretvaranje u kategorijsku + datafront = datafront.astype({kolona: str}) + print(datafront.dtypes) + unique=datafront[kolona].value_counts() + uniquevaluesn=[] + uniquevaluescountn=[] + uniquevaluespercentn=[] + for val, count in unique.iteritems(): + if(val): + uniquevaluesn.append(val) + uniquevaluescountn.append(count) + percent=count/rowCount + uniquevaluespercentn.append(percent) + frontreturn={ + 'columnName':kolona, 'isNumber':1, - 'uniqueValues':[], - 'uniqueValuesCount':[], + 'uniqueValues':uniquevaluesn, + 'uniqueValuesCount':uniquevaluescountn, + 'uniqueValuesPercent':uniquevaluespercentn, 'mean':float(mean), 'median':float(median), 'numNulls':int(nullCount), 'min':float(minimum), 'max':float(maximum), + 'q1':float(q1), + 'q3':float(q3), + } dict.append(frontreturn) NullRows = datafront[datafront.isnull().any(axis=1)] @@ -115,17 +148,28 @@ class TrainingResult: ''' def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback): + ###UCITAVANJE SETA problem_type = paramsModel["type"] #print(problem_type) data = pd.DataFrame() #print(data) for col in paramsExperiment["inputColumns"]: #print(col) - data[col]=dataset[col] + if(col!=paramsExperiment["outputColumn"]): + data[col]=dataset[col] output_column = paramsExperiment["outputColumn"] data[output_column] = dataset[output_column] #print(data) + ###KATEGORIJSKE KOLONE + kategorijskekolone=[] + ###PRETVARANJE NUMERICKIH U KATREGORIJSKE AKO JE KORISNIK TAKO OZNACIO + columnInfo=paramsDataset['columnInfo'] + for col in columnInfo: + if(col['columnType']=="Kategorijski"): + data[col['columnName']]=data[col['columnName']].apply(str) + kategorijskekolone.append(col['coumnName']) + ###NULL null_value_options = paramsExperiment["nullValues"] null_values_replacers = paramsExperiment["nullValuesReplacers"] @@ -149,16 +193,18 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback): # # Brisanje kolona koje ne uticu na rezultat # + ''' num_rows=data.shape[0] for col in data.columns: if((data[col].nunique()==(num_rows)) and (data[col].dtype==np.object_)): data.pop(col) # + ''' ### Enkodiranje encodings=paramsExperiment["encodings"] - datafront=dataset.copy() - svekolone=datafront.columns - kategorijskekolone=datafront.select_dtypes(include=['object']).columns + #datafront=dataset.copy() + #svekolone=datafront.columns + #kategorijskekolone=datafront.select_dtypes(include=['object']).columns for kolonaEncoding in encodings: kolona = kolonaEncoding["columnName"] @@ -235,75 +281,75 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback): # # ###OPTIMIZATORI - """ - if(params['optimizer']=='adam'): - opt=tf.keras.optimizers.Adam(learning_rate=params['learningRate']) + + if(paramsModel['optimizer']=='Adam'): + opt=tf.keras.optimizers.Adam(learning_rate=float(paramsModel['learningRate'])) - elif(params['optimizer']=='adadelta'): - opt=tf.keras.optimizers.Adadelta(learning_rate=params['learningRate']) + elif(paramsModel['optimizer']=='Adadelta'): + opt=tf.keras.optimizers.Adadelta(learning_rate=float(paramsModel['learningRate'])) - elif(params['optimizer']=='adagrad'): - opt=tf.keras.optimizers.Adagrad(learning_rate=params['learningRate']) + elif(paramsModel['optimizer']=='Adagrad'): + opt=tf.keras.optimizers.Adagrad(learning_rate=float(paramsModel['learningRate'])) - elif(params['optimizer']=='adamax'): - opt=tf.keras.optimizers.Adamax(learning_rate=params['learningRate']) + elif(paramsModel['optimizer']=='Adamax'): + opt=tf.keras.optimizers.Adamax(learning_rate=float(paramsModel['learningRate'])) - elif(params['optimizer']=='nadam'): - opt=tf.keras.optimizers.Nadam(learning_rate=params['learningRate']) + elif(paramsModel['optimizer']=='Nadam'): + opt=tf.keras.optimizers.Nadam(learning_rate=float(paramsModel['learningRate'])) - elif(params['optimizer']=='sgd'): - opt=tf.keras.optimizers.SGD(learning_rate=params['learningRate']) + elif(paramsModel['optimizer']=='Sgd'): + opt=tf.keras.optimizers.SGD(learning_rate=float(paramsModel['learningRate'])) - elif(params['optimizer']=='ftrl'): - opt=tf.keras.optimizers.Ftrl(learning_rate=params['learningRate']) + elif(paramsModel['optimizer']=='Ftrl'): + opt=tf.keras.optimizers.Ftrl(learning_rate=float(paramsModel['learningRate'])) - elif(params['optimizer']=='rmsprop'): - opt=tf.keras.optimizers.RMSprop(learning_rate=params['learningRate']) + elif(paramsModel['optimizer']=='Rmsprop'): + opt=tf.keras.optimizers.RMSprop(learning_rate=float(paramsModel['learningRate'])) ###REGULARIZACIJA #regularisation={'kernelType':'l1 ili l2 ili l1_l2','kernelRate':default=0.01 ili jedna od vrednosti(0.0001,0.001,0.1,1,2,3) ili neka koju je korisnik zadao,'biasType':'','biasRate':'','activityType','activityRate'} - reg=params['regularisation'] - - ###Kernel - if(reg['kernelType']=='l1'): - kernelreg=tf.keras.regularizers.l1(reg['kernelRate']) - elif(reg['kernelType']=='l2'): - kernelreg=tf.keras.regularizers.l2(reg['kernelRate']) - elif(reg['kernelType']=='l1l2'): - kernelreg=tf.keras.regularizers.l1_l2(l1=reg['kernelRate'][0],l2=reg['kernelRate'][1]) - - ###Bias - if(reg['biasType']=='l1'): - biasreg=tf.keras.regularizers.l1(reg['biasRate']) - elif(reg['biasType']=='l2'): - biasreg=tf.keras.regularizers.l2(reg['biasRate']) - elif(reg['biasType']=='l1l2'): - biasreg=tf.keras.regularizers.l1_l2(l1=reg['biasRate'][0],l2=reg['biasRate'][1]) - - ###Activity - if(reg['kernelType']=='l1'): - activityreg=tf.keras.regularizers.l1(reg['activityRate']) - elif(reg['kernelType']=='l2'): - activityreg=tf.keras.regularizers.l2(reg['activityRate']) - elif(reg['kernelType']=='l1l2'): - activityreg=tf.keras.regularizers.l1_l2(l1=reg['activityRate'][0],l2=reg['activityRate'][1]) - """ + + filepath=os.path.join("temp/",paramsExperiment['_id']+"_"+paramsModel['_id']+".h5") if(problem_type=='multi-klasifikacioni'): #print('multi') + + reg=paramsModel['regularisation'][0] + regRate=float(paramsModel['regularisationRate'][0]) + if(reg=='l1'): + kernelreg=tf.keras.regularizers.l1(regRate) + biasreg=tf.keras.regularizers.l1(regRate) + activityreg=tf.keras.regularizers.l1(regRate) + elif(reg=='l2'): + kernelreg=tf.keras.regularizers.l2(regRate) + biasreg=tf.keras.regularizers.l2(regRate) + activityreg=tf.keras.regularizers.l2(regRate) + classifier=tf.keras.Sequential() - - classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][0],input_dim=x_train.shape[1]))#prvi skriveni + definisanje prethodnog-ulaznog + classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][0],input_dim=x_train.shape[1], kernel_regularizer=kernelreg, bias_regularizer=biasreg, activity_regularizer=activityreg))#prvi skriveni + definisanje prethodnog-ulaznog + for i in range(paramsModel['hiddenLayers']-1):#ako postoji vise od jednog skrivenog sloja - #print(i) - classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][i+1]))#i-ti skriveni sloj + ###Kernel + reg=paramsModel['regularisation'][i+1] + regRate=float(paramsModel['regularisationRate'][i+1]) + if(reg=='l1'): + kernelreg=tf.keras.regularizers.l1(regRate) + biasreg=tf.keras.regularizers.l1(regRate) + activityreg=tf.keras.regularizers.l1(regRate) + elif(reg=='l2'): + kernelreg=tf.keras.regularizers.l2(regRate) + biasreg=tf.keras.regularizers.l2(regRate) + activityreg=tf.keras.regularizers.l2(regRate) + + classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][i+1],kernel_regularizer=kernelreg, bias_regularizer=biasreg, activity_regularizer=activityreg))#i-ti skriveni sloj + classifier.add(tf.keras.layers.Dense(units=5, activation=paramsModel['outputLayerActivationFunction']))#izlazni sloj - classifier.compile(loss =paramsModel["lossFunction"] , optimizer = paramsModel['optimizer'] , metrics =paramsModel['metrics']) + classifier.compile(loss =paramsModel["lossFunction"] , optimizer = opt, metrics =paramsModel['metrics']) - history=classifier.fit(x_train, y_train, epochs = paramsModel['epochs'],batch_size=paramsModel['batchSize'],callbacks=callback(x_test, y_test,paramsModel['_id'])) + history=classifier.fit(x_train, y_train, epochs = paramsModel['epochs'],batch_size=float(paramsModel['batchSize']),callbacks=callback(x_test, y_test,paramsModel['_id'])) hist=history.history #plt.plot(hist['accuracy']) @@ -325,17 +371,39 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback): elif(problem_type=='binarni-klasifikacioni'): #print('*************************************************************************binarni') + reg=paramsModel['regularisation'][0] + regRate=float(paramsModel['regularisationRate'][0]) + if(reg=='l1'): + kernelreg=tf.keras.regularizers.l1(regRate) + biasreg=tf.keras.regularizers.l1(regRate) + activityreg=tf.keras.regularizers.l1(regRate) + elif(reg=='l2'): + kernelreg=tf.keras.regularizers.l2(regRate) + biasreg=tf.keras.regularizers.l2(regRate) + activityreg=tf.keras.regularizers.l2(regRate) classifier=tf.keras.Sequential() - classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][0],input_dim=x_train.shape[1]))#prvi skriveni + definisanje prethodnog-ulaznog + classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][0],input_dim=x_train.shape[1],kernel_regularizer=kernelreg, bias_regularizer=biasreg, activity_regularizer=activityreg))#prvi skriveni + definisanje prethodnog-ulaznog + for i in range(paramsModel['hiddenLayers']-1):#ako postoji vise od jednog skrivenog sloja #print(i) - classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][i+1]))#i-ti skriveni sloj + reg=paramsModel['regularisation'][i+1] + regRate=float(paramsModel['regularisationRate'][i+1]) + if(reg=='l1'): + kernelreg=tf.keras.regularizers.l1(regRate) + biasreg=tf.keras.regularizers.l1(regRate) + activityreg=tf.keras.regularizers.l1(regRate) + elif(reg=='l2'): + kernelreg=tf.keras.regularizers.l2(regRate) + biasreg=tf.keras.regularizers.l2(regRate) + activityreg=tf.keras.regularizers.l2(regRate) + classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][i+1],kernel_regularizer=kernelreg, bias_regularizer=biasreg, activity_regularizer=activityreg))#i-ti skriveni sloj + classifier.add(tf.keras.layers.Dense(units=1, activation=paramsModel['outputLayerActivationFunction']))#izlazni sloj - classifier.compile(loss =paramsModel["lossFunction"] , optimizer = paramsModel['optimizer'] , metrics =paramsModel['metrics']) + classifier.compile(loss =paramsModel["lossFunction"] , optimizer = opt , metrics =paramsModel['metrics']) - history=classifier.fit(x_train, y_train, epochs = paramsModel['epochs'],batch_size=paramsModel['batchSize'],callbacks=callback(x_test, y_test,paramsModel['_id'])) + history=classifier.fit(x_train, y_train, epochs = paramsModel['epochs'],batch_size=float(paramsModel['batchSize']),callbacks=callback(x_test, y_test,paramsModel['_id'])) hist=history.history y_pred=classifier.predict(x_test) y_pred=(y_pred>=0.5).astype('int') @@ -351,17 +419,40 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback): return filepath,hist elif(problem_type=='regresioni'): + reg=paramsModel['regularisation'][0] + regRate=float(paramsModel['regularisationRate'][0]) + if(reg=='l1'): + kernelreg=tf.keras.regularizers.l1(regRate) + biasreg=tf.keras.regularizers.l1(regRate) + activityreg=tf.keras.regularizers.l1(regRate) + elif(reg=='l2'): + kernelreg=tf.keras.regularizers.l2(regRate) + biasreg=tf.keras.regularizers.l2(regRate) + activityreg=tf.keras.regularizers.l2(regRate) classifier=tf.keras.Sequential() - classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][0],input_dim=x_train.shape[1]))#prvi skriveni + definisanje prethodnog-ulaznog + classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][0],input_dim=x_train.shape[1],kernel_regularizer=kernelreg, bias_regularizer=biasreg, activity_regularizer=activityreg))#prvi skriveni + definisanje prethodnog-ulaznog + for i in range(paramsModel['hiddenLayers']-1):#ako postoji vise od jednog skrivenog sloja #print(i) - classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][i+1]))#i-ti skriveni sloj - classifier.add(tf.keras.layers.Dense(units=1)) + reg=paramsModel['regularisation'][i+1] + regRate=float(paramsModel['regularisationRate'][i+1]) + if(reg=='l1'): + kernelreg=tf.keras.regularizers.l1(regRate) + biasreg=tf.keras.regularizers.l1(regRate) + activityreg=tf.keras.regularizers.l1(regRate) + elif(reg=='l2'): + kernelreg=tf.keras.regularizers.l2(regRate) + biasreg=tf.keras.regularizers.l2(regRate) + activityreg=tf.keras.regularizers.l2(regRate) + + classifier.add(tf.keras.layers.Dense(units=paramsModel['hiddenLayerNeurons'], activation=paramsModel['hiddenLayerActivationFunctions'][i+1],kernel_regularizer=kernelreg, bias_regularizer=biasreg, activity_regularizer=activityreg))#i-ti skriveni sloj + + classifier.add(tf.keras.layers.Dense(units=1),activation=paramsModel['outputLayerActivationFunction']) - classifier.compile(loss =paramsModel["lossFunction"] , optimizer = paramsModel['optimizer'] , metrics =paramsModel['metrics']) + classifier.compile(loss =paramsModel["lossFunction"] , optimizer = opt , metrics =paramsModel['metrics']) - history=classifier.fit(x_train, y_train, epochs = paramsModel['epochs'],batch_size=paramsModel['batchSize'],callbacks=callback(x_test, y_test,paramsModel['_id'])) + history=classifier.fit(x_train, y_train, epochs = paramsModel['epochs'],batch_size=float(paramsModel['batchSize']),callbacks=callback(x_test, y_test,paramsModel['_id'])) hist=history.history y_pred=classifier.predict(x_test) #print(classifier.evaluate(x_test, y_test)) |