diff options
Diffstat (limited to 'backend/microservice')
-rw-r--r-- | backend/microservice/api/controller.py | 57 | ||||
-rw-r--r-- | backend/microservice/api/newmlservice.py | 231 |
2 files changed, 178 insertions, 110 deletions
diff --git a/backend/microservice/api/controller.py b/backend/microservice/api/controller.py index 988ad987..7852b63d 100644 --- a/backend/microservice/api/controller.py +++ b/backend/microservice/api/controller.py @@ -54,14 +54,22 @@ class train_callback(tf.keras.callbacks.Callback): @app.route('/train', methods = ['POST']) def train(): print("******************************TRAIN*************************************************") - - f = request.files.get("file") - data = pd.read_csv(f) paramsModel = json.loads(request.form["model"]) paramsExperiment = json.loads(request.form["experiment"]) paramsDataset = json.loads(request.form["dataset"]) + f = request.files.get("file") + if(paramsDataset['delimiter']=='novi red'): + separation='\n' + + elif(paramsDataset['delimiter']=='razmak'): + separation=' ' + else: + separation=paramsDataset['delimiter'] + data = pd.read_csv(f,sep=separation) + + #dataset, paramsModel, paramsExperiment, callback) - filepath,result = newmlservice.train(data, paramsModel, paramsExperiment,paramsDataset, train_callback) + filepath,result,finalMetrics= newmlservice.train(data, paramsModel, paramsExperiment,paramsDataset, train_callback) """ f = request.json['filepath'] dataset = pd.read_csv(f) @@ -69,6 +77,7 @@ def train(): print(result) """ + url = config.api_url + "/file/h5" files = {'file': open(filepath, 'rb')} r=requests.post(url, files=files,data={"uploaderId":paramsExperiment['uploaderId']}) @@ -86,9 +95,11 @@ def train(): "experimentId" : paramsExperiment["_id"], "modelId" : paramsModel["_id"], "h5FileId" : fileId, - "metrics" : m + "metrics" : m, + "finalMetrics":finalMetrics + } - print(predictor) + #print(predictor) url = config.api_url + "/Predictor/add" r = requests.post(url, json=predictor).text print(r) @@ -101,16 +112,46 @@ def predict(): paramsExperiment = json.loads(request.form["experiment"]) paramsPredictor = json.loads(request.form["predictor"]) print("********************************model loaded*******************************") - result = newmlservice.predict(paramsExperiment, paramsPredictor, model) + result = newmlservice.predict(paramsExperiment, paramsPredictor, model,train_callback) + return result + +@app.route('/manageH5', methods = ['POST']) +def manageH5(): + h5 = request.files.get("h5file") + model = tf.keras.models.load_model(h5) + + paramsExperiment = json.loads(request.form["experiment"]) + paramsModel = json.loads(request.form["model"]) + paramsDataset = json.loads(request.form["dataset"]) + + f = request.files.get("file") + if(paramsDataset['delimiter']=='novi red'): + separation='\n' + elif(paramsDataset['delimiter']=='razmak'): + separation=' ' + else: + separation=paramsDataset['delimiter'] + + data = pd.read_csv(f,sep=separation) + + result = newmlservice.manageH5(data,paramsModel,paramsExperiment,paramsDataset,model,train_callback) return result + @app.route('/preprocess',methods=['POST']) def returnColumnsInfo(): print("********************************PREPROCESS*******************************") dataset = json.loads(request.form["dataset"]) file = request.files.get("file") - data=pd.read_csv(file) + if(dataset['delimiter']=='novi red'): + separation='\n' + + elif(dataset['delimiter']=='razmak'): + separation=' ' + else: + separation=dataset['delimiter'] + data=pd.read_csv(file,sep=separation) ''' #f = request.json['filepath'] #data=pd.read_csv(f) diff --git a/backend/microservice/api/newmlservice.py b/backend/microservice/api/newmlservice.py index 2f08d4b4..6a863013 100644 --- a/backend/microservice/api/newmlservice.py +++ b/backend/microservice/api/newmlservice.py @@ -291,17 +291,18 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback): random=123 else: random=0 - x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=test, random_state=random) + #x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=test, random_state=random) #print(x_train,x_test) - + x, x_test, y, y_test = train_test_split(x, y, test_size=test, random_state=random, shuffle=True) + x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.15, shuffle=True) # # Treniranje modela # # ###OPTIMIZATORI - + print(paramsModel['optimizer']) if(paramsModel['optimizer']=='Adam'): - opt=tf.keras.optimizers.Adam(learning_rate=float(paramsModel['learningRate'])) + opt=tf.keras.optimizers.Adam(learning_rate=3) elif(paramsModel['optimizer']=='Adadelta'): opt=tf.keras.optimizers.Adadelta(learning_rate=float(paramsModel['learningRate'])) @@ -315,13 +316,16 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback): elif(paramsModel['optimizer']=='Nadam'): opt=tf.keras.optimizers.Nadam(learning_rate=float(paramsModel['learningRate'])) - elif(paramsModel['optimizer']=='Sgd'): + elif(paramsModel['optimizer']=='SGD'): + opt=tf.keras.optimizers.SGD(learning_rate=float(paramsModel['learningRate'])) + + if(paramsModel['optimizer']=='SGDMomentum'): opt=tf.keras.optimizers.SGD(learning_rate=float(paramsModel['learningRate'])) elif(paramsModel['optimizer']=='Ftrl'): opt=tf.keras.optimizers.Ftrl(learning_rate=float(paramsModel['learningRate'])) - elif(paramsModel['optimizer']=='Rmsprop'): + elif(paramsModel['optimizer']=='RMSprop'): opt=tf.keras.optimizers.RMSprop(learning_rate=float(paramsModel['learningRate'])) ###REGULARIZACIJA @@ -331,7 +335,7 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback): filepath=os.path.join("temp/",paramsExperiment['_id']+"_"+paramsModel['_id']+".h5") if(problem_type=='multi-klasifikacioni'): #print('multi') - + #print(paramsModel) reg=paramsModel['layers'][0]['regularisation'] regRate=float(paramsModel['layers'][0]['regularisationRate']) if(reg=='l1'): @@ -365,10 +369,10 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback): - classifier.compile(loss =paramsModel["lossFunction"] , optimizer = opt, metrics = ['accuracy','mae','mse']) + classifier.compile(loss =paramsModel["lossFunction"] , optimizer =opt, metrics = ['accuracy','mae','mse']) + + history=classifier.fit( x=x_train, y=y_train, epochs = paramsModel['epochs'],batch_size=int(paramsModel['batchSize']),callbacks=callback(x_test, y_test,paramsModel['_id']),validation_data=(x_val, y_val)) - history=classifier.fit(x_train, y_train, epochs = paramsModel['epochs'],batch_size=int(paramsModel['batchSize']),callbacks=callback(x_test, y_test,paramsModel['_id'])) - hist=history.history #plt.plot(hist['accuracy']) #plt.show() @@ -380,12 +384,28 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback): classifier.save(filepath, save_format='h5') - + metrics={} + macro_averaged_precision=sm.precision_score(y_test, y_pred, average = 'macro') + micro_averaged_precision=sm.precision_score(y_test, y_pred, average = 'micro') + macro_averaged_recall=sm.recall_score(y_test, y_pred, average = 'macro') + micro_averaged_recall=sm.recall_score(y_test, y_pred, average = 'micro') + macro_averaged_f1=sm.f1_score(y_test, y_pred, average = 'macro') + micro_averaged_f1=sm.f1_score(y_test, y_pred, average = 'micro') + + metrics= { + "macro_averaged_precision" :float(macro_averaged_precision), + "micro_averaged_precision" : float(micro_averaged_precision), + "macro_averaged_recall" : float(macro_averaged_recall), + "micro_averaged_recall" : float(micro_averaged_recall), + "macro_averaged_f1" : float(macro_averaged_f1), + "micro_averaged_f1" : float(micro_averaged_f1) + } + #vizuelizacija u python-u #from ann_visualizer.visualize import ann_viz; #ann_viz(classifier, title="My neural network") - return filepath,hist + return filepath,hist,metrics elif(problem_type=='binarni-klasifikacioni'): #print('*************************************************************************binarni') @@ -415,26 +435,47 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback): kernelreg=tf.keras.regularizers.l2(regRate) biasreg=tf.keras.regularizers.l2(regRate) activityreg=tf.keras.regularizers.l2(regRate) - classifier.add(tf.keras.layers.Dense(units=paramsModel['layers'][i+1]['neurons'], activation=paramsModel['layers'][i+1]['activationFunction'],kernel_regularizer=kernelreg, bias_regularizer=biasreg, activity_regularizer=activityreg))#i-ti skriveni sloj + classifier.add(tf.keras.layers.Dense(units=paramsModel['layers'][i+1]['neurons'], activation=paramsModel['layers'][i+1]['activationFunction']))#i-ti skriveni sloj classifier.add(tf.keras.layers.Dense(units=1, activation=paramsModel['outputLayerActivationFunction']))#izlazni sloj - classifier.compile(loss =paramsModel["lossFunction"] , optimizer = opt , metrics = ['accuracy','mae','mse']) + classifier.compile(loss =paramsModel["lossFunction"] , optimizer =opt , metrics = ['accuracy','mae','mse']) - history=classifier.fit(x_train, y_train, epochs = paramsModel['epochs'],batch_size=int(paramsModel['batchSize']),callbacks=callback(x_test, y_test,paramsModel['_id'])) + history=classifier.fit( x=x_train, y=y_train, epochs = paramsModel['epochs'],batch_size=int(paramsModel['batchSize']),callbacks=callback(x_test, y_test,paramsModel['_id']),validation_data=(x_val, y_val)) hist=history.history y_pred=classifier.predict(x_test) y_pred=(y_pred>=0.5).astype('int') - - #print(y_pred.flatten()) - #print(y_test) scores = classifier.evaluate(x_test, y_test) #print("\n%s: %.2f%%" % (classifier.metrics_names[1], scores[1]*100)) - #ann_viz(classifier, title="My neural network") + # ann_viz(classifier, title="My neural network") classifier.save(filepath, save_format='h5') - return filepath,hist + + accuracy = float(sm.accuracy_score(y_test,y_pred)) + precision = float(sm.precision_score(y_test,y_pred)) + recall = float(sm.recall_score(y_test,y_pred)) + tn, fp, fn, tp = sm.confusion_matrix(y_test,y_pred).ravel() + specificity = float(tn / (tn+fp)) + f1 = float(sm.f1_score(y_test,y_pred)) + fpr, tpr, _ = sm.roc_curve(y_test,y_pred) + logloss = float(sm.log_loss(y_test, y_pred)) + metrics= { + "accuracy" : accuracy, + "precision" : precision, + "recall" : recall, + "specificity" : specificity, + "f1" : f1, + "tn" : float(tn), + "fp" : float(fp), + "fn" : float(fn), + "tp" : float(tp), + "fpr" : fpr.tolist(), + "tpr" : tpr.tolist(), + "logloss" : logloss + } + + return filepath,hist,metrics elif(problem_type=='regresioni'): reg=paramsModel['layers'][0]['regularisation'] @@ -470,12 +511,36 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback): classifier.compile(loss =paramsModel["lossFunction"] , optimizer = opt , metrics = ['accuracy','mae','mse']) - history=classifier.fit(x_train, y_train, epochs = paramsModel['epochs'],batch_size=int(paramsModel['batchSize']),callbacks=callback(x_test, y_test,paramsModel['_id'])) + history=classifier.fit( x=x_train, y=y_train, epochs = paramsModel['epochs'],batch_size=int(paramsModel['batchSize']),callbacks=callback(x_test, y_test,paramsModel['_id']),validation_data=(x_val, y_val)) hist=history.history y_pred=classifier.predict(x_test) #print(classifier.evaluate(x_test, y_test)) + classifier.save(filepath, save_format='h5') - return filepath,hist + + + mse = float(sm.mean_squared_error(y_test,y_pred)) + mae = float(sm.mean_absolute_error(y_test,y_pred)) + mape = float(sm.mean_absolute_percentage_error(y_test,y_pred)) + rmse = float(np.sqrt(sm.mean_squared_error(y_test,y_pred))) + rmsle = float(np.sqrt(sm.mean_squared_error(y_test, y_pred))) + r2 = float(sm.r2_score(y_test, y_pred)) + # n - num of observations + # k - num of independent variables + n = 40 + k = 2 + adj_r2 = float(1 - ((1-r2)*(n-1)/(n-k-1))) + metrics= {"mse" : mse, + "mae" : mae, + "mape" : mape, + "rmse" : rmse, + "rmsle" : rmsle, + "r2" : r2, + "adj_r2" : adj_r2 + } + + return filepath,hist,metrics + def roc_auc_score_multiclass(actual_class, pred_class, average = "macro"): #creating a set of all the unique classes using the actual class list @@ -498,30 +563,8 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback): # # Metrike # - - if(problem_type=="binarni-klasifikacioni"): - accuracy = float(sm.accuracy_score(y_test,y_pred)) - precision = float(sm.precision_score(y_test,y_pred)) - recall = float(sm.recall_score(y_test,y_pred)) - tn, fp, fn, tp = sm.confusion_matrix(y_test,y_pred).ravel() - specificity = float(tn / (tn+fp)) - f1 = float(sm.f1_score(y_test,y_pred)) - fpr, tpr, _ = sm.roc_curve(y_test,y_pred) - logloss = float(sm.log_loss(y_test, y_pred)) - metrics= {"accuracy" : accuracy, - "precision" : precision, - "recall" : recall, - "specificity" : specificity, - "f1" : f1, - "tn" : float(tn), - "fp" : float(fp), - "fn" : float(fn), - "tp" : float(tp), - "fpr" : fpr.tolist(), - "tpr" : tpr.tolist(), - "logloss" : logloss - } - elif(problem_type=="regresioni"): + ''' + if(problem_type=="regresioni"): # https://www.analyticsvidhya.com/blog/2021/05/know-the-best-evaluation-metrics-for-your-regression-model/ mse = float(sm.mean_squared_error(y_test,y_pred)) mae = float(sm.mean_absolute_error(y_test,y_pred)) @@ -542,7 +585,7 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback): "r2" : r2, "adj_r2" : adj_r2 } - ''' + elif(problem_type=="multi-klasifikacioni"): cr=sm.classification_report(y_test, y_pred) @@ -556,32 +599,36 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback): macro_averaged_f1=metrics.f1_score(y_test, y_pred, average = 'macro') micro_averaged_f1=metrics.f1_score(y_test, y_pred, average = 'micro') roc_auc_dict=roc_auc_score_multiclass(y_test, y_pred) - ''' +''' def predict(experiment, predictor, model): #model.predict() # ovo je pre bilo manageH5 return "TODO" -def manageH5(dataset,params,h5model): - problem_type = params["type"] - #print(problem_type) +def manageH5(dataset,paramsModel,paramsExperiment,paramsDataset,h5model,callback): + problem_type = paramsModel["type"] data = pd.DataFrame() - #print(data) - for col in params["inputColumns"]: - #print(col) - data[col]=dataset[col] - output_column = params["columnToPredict"] + for col in paramsExperiment["inputColumns"]: + if(col!=paramsExperiment["outputColumn"]): + data[col]=dataset[col] + output_column = paramsExperiment["outputColumn"] data[output_column] = dataset[output_column] - #print(data) - ###NULL - null_value_options = params["nullValues"] - null_values_replacers = params["nullValuesReplacers"] + kategorijskekolone=[] + columnInfo=paramsDataset['columnInfo'] + columnTypes=paramsExperiment['columnTypes'] + for i in range(len(columnInfo)): + col=columnInfo[i] + if(columnTypes[i]=="categorical" and col['columnName'] in paramsExperiment['inputColumns']): + data[col['columnName']]=data[col['columnName']].apply(str) + kategorijskekolone.append(col['columnName']) + + null_value_options = paramsExperiment["nullValues"] + null_values_replacers = paramsExperiment["nullValuesReplacers"] if(null_value_options=='replace'): - #print("replace null") # TODO - dict=params['null_values_replacers'] + dict=null_values_replacers while(len(dict)>0): replace=dict.pop() col=replace['column'] @@ -592,58 +639,38 @@ def manageH5(dataset,params,h5model): elif(null_value_options=='delete_rows'): data=data.dropna() elif(null_value_options=='delete_columns'): - data=data.dropna() - - #print(data.shape) - - # - # Brisanje kolona koje ne uticu na rezultat - # - num_rows=data.shape[0] - for col in data.columns: - if((data[col].nunique()==(num_rows)) and (data[col].dtype==np.object_)): - data.pop(col) - # - ### Enkodiranje - encoding=params["encoding"] - if(encoding=='label'): - encoder=LabelEncoder() - for col in data.columns: - if(data[col].dtype==np.object_): - data[col]=encoder.fit_transform(data[col]) - - - elif(encoding=='onehot'): - category_columns=[] - for col in data.columns: - if(data[col].dtype==np.object_): - category_columns.append(col) - data=pd.get_dummies(data, columns=category_columns, prefix=category_columns) - #print(data) + data=data.dropna(axis=1) + + encodings=paramsExperiment["encodings"] + for kolonaEncoding in encodings: + kolona = kolonaEncoding["columnName"] + encoding = kolonaEncoding["encoding"] + if(kolona in kategorijskekolone): + if(encoding=='label'): + encoder=LabelEncoder() + for col in data.columns: + if(data[col].dtype==np.object_): + data[col]=encoder.fit_transform(data[col]) + elif(encoding=='onehot'): + category_columns=[] + for col in data.columns: + if(data[col].dtype==np.object_): + category_columns.append(col) + data=pd.get_dummies(data, columns=category_columns, prefix=category_columns) - # - # Input - output - # x_columns = [] for col in data.columns: if(col!=output_column): x_columns.append(col) - #print(x_columns) x2 = data[x_columns] - #print(x2) - #print(x2.values) x2 = data[x_columns].values - #print(x2) y2 = data[output_column].values - h5model.summary() + #h5model.summary() #ann_viz(h5model, title="My neural network") - h5model.compile(loss=params['lossFunction'], optimizer=params['optimizer'], metrics = ['accuracy','mae','mse']) - history=h5model.fit(x2, y2, epochs = params['epochs'],batch_size=int(params['batchSize'])) - + history=h5model.fit(x2, y2, epochs = paramsModel['epochs'],batch_size=int(paramsModel['batchSize']),callbacks=callback(x2, y2,paramsModel['_id'])) y_pred2=h5model.predict(x2) - y_pred2=np.argmax(y_pred2,axis=1) #y_pred=h5model.predict_classes(x) score = h5model.evaluate(x2,y_pred2, verbose=0) |