diff options
author | Sonja Galovic <galovicsonja@gmail.com> | 2022-04-18 00:20:50 +0200 |
---|---|---|
committer | Sonja Galovic <galovicsonja@gmail.com> | 2022-04-18 00:20:50 +0200 |
commit | fbf1a64ae46148754a2ca76170591b243fb8426e (patch) | |
tree | 89b76899724d9d4c0031cd2f3ee09b3b409f33c2 /backend/microservice | |
parent | 07b11e07fc62d9ea9765595812ab68209be99a3a (diff) | |
parent | c3cc4c680bed2d2d00743bc03e6dc01501f90e25 (diff) |
Merge branch 'dev' of http://gitlab.pmf.kg.ac.rs/igrannonica/neuronstellar into dev
Diffstat (limited to 'backend/microservice')
-rw-r--r-- | backend/microservice/api/newmlservice.py | 85 |
1 files changed, 45 insertions, 40 deletions
diff --git a/backend/microservice/api/newmlservice.py b/backend/microservice/api/newmlservice.py index a9bce3bb..9951c25f 100644 --- a/backend/microservice/api/newmlservice.py +++ b/backend/microservice/api/newmlservice.py @@ -156,48 +156,53 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback): # ### Enkodiranje encoding=paramsExperiment["encoding"] - if(encoding=='label'): - encoder=LabelEncoder() - for col in data.columns: - if(data[col].dtype==np.object_): - data[col]=encoder.fit_transform(data[col]) + datafront=dataset.copy() + svekolone=datafront.columns + kategorijskekolone=datafront.select_dtypes(include=['object']).columns + for kolona in svekolone: + if(kolona in kategorijskekolone): + if(encoding=='label'): + encoder=LabelEncoder() + for col in data.columns: + if(data[col].dtype==np.object_): + data[col]=encoder.fit_transform(data[col]) - elif(encoding=='onehot'): - category_columns=[] - for col in data.columns: - if(data[col].dtype==np.object_): - category_columns.append(col) - data=pd.get_dummies(data, columns=category_columns, prefix=category_columns) - - elif(encoding=='ordinal'): - encoder = OrdinalEncoder() - for col in data.columns: - if(data[col].dtype==np.object_): - data[col]=encoder.fit_transform(data[col]) - - elif(encoding=='hashing'): - category_columns=[] - for col in data.columns: - if(data[col].dtype==np.object_): - category_columns.append(col) - encoder=ce.HashingEncoder(cols=category_columns, n_components=len(category_columns)) - encoder.fit_transform(data) - elif(encoding=='binary'): - category_columns=[] - for col in data.columns: - if(data[col].dtype==np.object_): - category_columns.append(col) - encoder=ce.BinaryEncoder(cols=category_columns, return_df=True) - encoder.fit_transform(data) - - elif(encoding=='baseN'): - category_columns=[] - for col in data.columns: - if(data[col].dtype==np.object_): - category_columns.append(col) - encoder=ce.BaseNEncoder(cols=category_columns, return_df=True, base=5) - encoder.fit_transform(data) + elif(encoding=='onehot'): + category_columns=[] + for col in data.columns: + if(data[col].dtype==np.object_): + category_columns.append(col) + data=pd.get_dummies(data, columns=category_columns, prefix=category_columns) + + elif(encoding=='ordinal'): + encoder = OrdinalEncoder() + for col in data.columns: + if(data[col].dtype==np.object_): + data[col]=encoder.fit_transform(data[col]) + + elif(encoding=='hashing'): + category_columns=[] + for col in data.columns: + if(data[col].dtype==np.object_): + category_columns.append(col) + encoder=ce.HashingEncoder(cols=category_columns, n_components=len(category_columns)) + encoder.fit_transform(data) + elif(encoding=='binary'): + category_columns=[] + for col in data.columns: + if(data[col].dtype==np.object_): + category_columns.append(col) + encoder=ce.BinaryEncoder(cols=category_columns, return_df=True) + encoder.fit_transform(data) + + elif(encoding=='baseN'): + category_columns=[] + for col in data.columns: + if(data[col].dtype==np.object_): + category_columns.append(col) + encoder=ce.BaseNEncoder(cols=category_columns, return_df=True, base=5) + encoder.fit_transform(data) # # Input - output # |