aboutsummaryrefslogtreecommitdiff
path: root/backend/microservice
diff options
context:
space:
mode:
authorSonja Galovic <galovicsonja@gmail.com>2022-04-18 00:20:50 +0200
committerSonja Galovic <galovicsonja@gmail.com>2022-04-18 00:20:50 +0200
commitfbf1a64ae46148754a2ca76170591b243fb8426e (patch)
tree89b76899724d9d4c0031cd2f3ee09b3b409f33c2 /backend/microservice
parent07b11e07fc62d9ea9765595812ab68209be99a3a (diff)
parentc3cc4c680bed2d2d00743bc03e6dc01501f90e25 (diff)
Merge branch 'dev' of http://gitlab.pmf.kg.ac.rs/igrannonica/neuronstellar into dev
Diffstat (limited to 'backend/microservice')
-rw-r--r--backend/microservice/api/newmlservice.py85
1 files changed, 45 insertions, 40 deletions
diff --git a/backend/microservice/api/newmlservice.py b/backend/microservice/api/newmlservice.py
index a9bce3bb..9951c25f 100644
--- a/backend/microservice/api/newmlservice.py
+++ b/backend/microservice/api/newmlservice.py
@@ -156,48 +156,53 @@ def train(dataset, paramsModel,paramsExperiment,paramsDataset,callback):
#
### Enkodiranje
encoding=paramsExperiment["encoding"]
- if(encoding=='label'):
- encoder=LabelEncoder()
- for col in data.columns:
- if(data[col].dtype==np.object_):
- data[col]=encoder.fit_transform(data[col])
+ datafront=dataset.copy()
+ svekolone=datafront.columns
+ kategorijskekolone=datafront.select_dtypes(include=['object']).columns
+ for kolona in svekolone:
+ if(kolona in kategorijskekolone):
+ if(encoding=='label'):
+ encoder=LabelEncoder()
+ for col in data.columns:
+ if(data[col].dtype==np.object_):
+ data[col]=encoder.fit_transform(data[col])
- elif(encoding=='onehot'):
- category_columns=[]
- for col in data.columns:
- if(data[col].dtype==np.object_):
- category_columns.append(col)
- data=pd.get_dummies(data, columns=category_columns, prefix=category_columns)
-
- elif(encoding=='ordinal'):
- encoder = OrdinalEncoder()
- for col in data.columns:
- if(data[col].dtype==np.object_):
- data[col]=encoder.fit_transform(data[col])
-
- elif(encoding=='hashing'):
- category_columns=[]
- for col in data.columns:
- if(data[col].dtype==np.object_):
- category_columns.append(col)
- encoder=ce.HashingEncoder(cols=category_columns, n_components=len(category_columns))
- encoder.fit_transform(data)
- elif(encoding=='binary'):
- category_columns=[]
- for col in data.columns:
- if(data[col].dtype==np.object_):
- category_columns.append(col)
- encoder=ce.BinaryEncoder(cols=category_columns, return_df=True)
- encoder.fit_transform(data)
-
- elif(encoding=='baseN'):
- category_columns=[]
- for col in data.columns:
- if(data[col].dtype==np.object_):
- category_columns.append(col)
- encoder=ce.BaseNEncoder(cols=category_columns, return_df=True, base=5)
- encoder.fit_transform(data)
+ elif(encoding=='onehot'):
+ category_columns=[]
+ for col in data.columns:
+ if(data[col].dtype==np.object_):
+ category_columns.append(col)
+ data=pd.get_dummies(data, columns=category_columns, prefix=category_columns)
+
+ elif(encoding=='ordinal'):
+ encoder = OrdinalEncoder()
+ for col in data.columns:
+ if(data[col].dtype==np.object_):
+ data[col]=encoder.fit_transform(data[col])
+
+ elif(encoding=='hashing'):
+ category_columns=[]
+ for col in data.columns:
+ if(data[col].dtype==np.object_):
+ category_columns.append(col)
+ encoder=ce.HashingEncoder(cols=category_columns, n_components=len(category_columns))
+ encoder.fit_transform(data)
+ elif(encoding=='binary'):
+ category_columns=[]
+ for col in data.columns:
+ if(data[col].dtype==np.object_):
+ category_columns.append(col)
+ encoder=ce.BinaryEncoder(cols=category_columns, return_df=True)
+ encoder.fit_transform(data)
+
+ elif(encoding=='baseN'):
+ category_columns=[]
+ for col in data.columns:
+ if(data[col].dtype==np.object_):
+ category_columns.append(col)
+ encoder=ce.BaseNEncoder(cols=category_columns, return_df=True, base=5)
+ encoder.fit_transform(data)
#
# Input - output
#