diff options
author | TAMARA JERINIC <tamara.jerinic@gmail.com> | 2022-04-02 22:37:38 +0200 |
---|---|---|
committer | TAMARA JERINIC <tamara.jerinic@gmail.com> | 2022-04-02 22:37:38 +0200 |
commit | 94dcb65454e55caafa0a6e36e5766144cfb204c6 (patch) | |
tree | f1e86994a182a2c890faeb549b2ed47b0e3ee6c5 /backend/microservice/api | |
parent | 491db336204f911c8f0717b0b16ff345ca5ee355 (diff) |
Obrada statistike je dodata u ml_service.py fajl i kreirana je ruta u controller.py fajlu.
Diffstat (limited to 'backend/microservice/api')
-rw-r--r-- | backend/microservice/api/controller.py | 11 | ||||
-rw-r--r-- | backend/microservice/api/ml_service.py | 62 |
2 files changed, 65 insertions, 8 deletions
diff --git a/backend/microservice/api/controller.py b/backend/microservice/api/controller.py index 059af317..524b97b5 100644 --- a/backend/microservice/api/controller.py +++ b/backend/microservice/api/controller.py @@ -38,6 +38,17 @@ def predict(): # #model.predict? +@app.route('/preprocess',methods=['POST']) +def returnColumnsInfo(): + f=request.json['filepathcolinfo'] + dataset=pd.read_csv(f) + + result=ml_service.returnColumnsInfo(dataset) + + return jsonify(result) + + + print("App loaded.") ml_socket.start() app.run()
\ No newline at end of file diff --git a/backend/microservice/api/ml_service.py b/backend/microservice/api/ml_service.py index 7b950bcd..21ec8fa3 100644 --- a/backend/microservice/api/ml_service.py +++ b/backend/microservice/api/ml_service.py @@ -1,3 +1,6 @@ +from cmath import nan +from enum import unique +from itertools import count import pandas as pd from sklearn import datasets import tensorflow as tf @@ -13,12 +16,55 @@ from copyreg import constructor from flask import request, jsonify, render_template from sklearn.preprocessing import LabelEncoder from sklearn.preprocessing import OrdinalEncoder -#import category_encoders as ce +import category_encoders as ce from sklearn.preprocessing import StandardScaler from sklearn.model_selection import train_test_split from dataclasses import dataclass +import statistics as s + +def returnColumnsInfo(dataset): + dict=[] + datafront=dataset.copy() + svekolone=datafront.columns + kategorijskekolone=datafront.select_dtypes(include=['object']).columns + allNullCols=0 + for kolona in svekolone: + if(kolona in kategorijskekolone): + uniquevalues=datafront[kolona].unique() + mean=0 + median=0 + nullCount=datafront[kolona].isnull().sum() + if(nullCount>0): + allNullCols=allNullCols+1 + frontreturn={'columnName':kolona, + 'isNumber':False, + 'uniqueValues':uniquevalues.tolist(), + 'median':float(mean), + 'mean':float(median), + 'numNulls':float(nullCount) + } + dict.append(frontreturn) + else: + mean=datafront[kolona].mean() + median=s.median(datafront[kolona]) + nullCount=datafront[kolona].isnull().sum() + if(nullCount>0): + allNullCols=allNullCols+1 + frontreturn={'columnName':kolona, + 'isNumber':1, + 'uniqueValues':[], + 'mean':float(mean), + 'median':float(median), + 'numNulls':float(nullCount) + } + dict.append(frontreturn) + NullRows = datafront[datafront.isnull().any(axis=1)] + #print(NullRows) + #print(len(NullRows)) + allNullRows=len(NullRows) + + return {'columnInfo':dict,'allNullColl':allNullCols,'allNullRows':allNullRows} -''' @dataclass class TrainingResultClassification: accuracy: float @@ -34,18 +80,18 @@ class TrainingResultClassification: fpr: float tpr: float metrics: dict - +''' @datasets class TrainingResultRegression: mse: float mae: float mape: float rmse: float -''' + @dataclass class TrainingResult: metrics: dict - +''' def train(dataset, params, callback): problem_type = params["type"] data = pd.DataFrame() @@ -98,7 +144,7 @@ def train(dataset, params, callback): for col in data.columns: if(data[col].dtype==np.object_): data[col]=encoder.fit_transform(data[col]) - ''' + elif(encoding=='hashing'): category_columns=[] for col in data.columns: @@ -120,7 +166,7 @@ def train(dataset, params, callback): if(data[col].dtype==np.object_): category_columns.append(col) encoder=ce.BaseNEncoder(cols=category_columns, return_df=True, base=5) - encoder.fit_transform(data)''' + encoder.fit_transform(data) # # Input - output # @@ -256,4 +302,4 @@ def train(dataset, params, callback): "adj_r2" : adj_r2 } # TODO upload trenirani model nazad na backend - return TrainingResult(metrics)
\ No newline at end of file +#return TrainingResult(metrics)
\ No newline at end of file |