diff options
Diffstat (limited to 'backend/microservice/api/newmlservice.py')
-rw-r--r-- | backend/microservice/api/newmlservice.py | 85 |
1 files changed, 45 insertions, 40 deletions
diff --git a/backend/microservice/api/newmlservice.py b/backend/microservice/api/newmlservice.py index a9bce3bb..9951c25f 100644 --- a/backend/microservice/api/newmlservice.py +++ b/backend/microservice/api/newmlservice.py @@ -156,48 +156,53 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback): # ### Enkodiranje encoding=paramsExperiment["encoding"] - if(encoding=='label'): - encoder=LabelEncoder() - for col in data.columns: - if(data[col].dtype==np.object_): - data[col]=encoder.fit_transform(data[col]) + datafront=dataset.copy() + svekolone=datafront.columns + kategorijskekolone=datafront.select_dtypes(include=['object']).columns + for kolona in svekolone: + if(kolona in kategorijskekolone): + if(encoding=='label'): + encoder=LabelEncoder() + for col in data.columns: + if(data[col].dtype==np.object_): + data[col]=encoder.fit_transform(data[col]) - elif(encoding=='onehot'): - category_columns=[] - for col in data.columns: - if(data[col].dtype==np.object_): - category_columns.append(col) - data=pd.get_dummies(data, columns=category_columns, prefix=category_columns) - - elif(encoding=='ordinal'): - encoder = OrdinalEncoder() - for col in data.columns: - if(data[col].dtype==np.object_): - data[col]=encoder.fit_transform(data[col]) - - elif(encoding=='hashing'): - category_columns=[] - for col in data.columns: - if(data[col].dtype==np.object_): - category_columns.append(col) - encoder=ce.HashingEncoder(cols=category_columns, n_components=len(category_columns)) - encoder.fit_transform(data) - elif(encoding=='binary'): - category_columns=[] - for col in data.columns: - if(data[col].dtype==np.object_): - category_columns.append(col) - encoder=ce.BinaryEncoder(cols=category_columns, return_df=True) - encoder.fit_transform(data) - - elif(encoding=='baseN'): - category_columns=[] - for col in data.columns: - if(data[col].dtype==np.object_): - category_columns.append(col) - encoder=ce.BaseNEncoder(cols=category_columns, return_df=True, base=5) - encoder.fit_transform(data) + elif(encoding=='onehot'): + category_columns=[] + for col in data.columns: + if(data[col].dtype==np.object_): + category_columns.append(col) + data=pd.get_dummies(data, columns=category_columns, prefix=category_columns) + + elif(encoding=='ordinal'): + encoder = OrdinalEncoder() + for col in data.columns: + if(data[col].dtype==np.object_): + data[col]=encoder.fit_transform(data[col]) + + elif(encoding=='hashing'): + category_columns=[] + for col in data.columns: + if(data[col].dtype==np.object_): + category_columns.append(col) + encoder=ce.HashingEncoder(cols=category_columns, n_components=len(category_columns)) + encoder.fit_transform(data) + elif(encoding=='binary'): + category_columns=[] + for col in data.columns: + if(data[col].dtype==np.object_): + category_columns.append(col) + encoder=ce.BinaryEncoder(cols=category_columns, return_df=True) + encoder.fit_transform(data) + + elif(encoding=='baseN'): + category_columns=[] + for col in data.columns: + if(data[col].dtype==np.object_): + category_columns.append(col) + encoder=ce.BaseNEncoder(cols=category_columns, return_df=True, base=5) + encoder.fit_transform(data) # # Input - output # |