From 38eda7a2967d3d3739070a3b805511fac665a843 Mon Sep 17 00:00:00 2001 From: TAMARA JERINIC Date: Tue, 5 Apr 2022 00:55:56 +0200 Subject: Omogućena predikcija na osnovu h5 fajla. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/microservice/api/controller.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/backend/microservice/api/controller.py b/backend/microservice/api/controller.py index 524b97b5..bbba4af9 100644 --- a/backend/microservice/api/controller.py +++ b/backend/microservice/api/controller.py @@ -34,9 +34,8 @@ def predict(): f = request.json['filepath'] dataset = pd.read_csv(f) m = request.json['modelpath'] - #model = tf.keras.models.load_model(m) - # - #model.predict? + model = tf.keras.models.load_model(m) + h5=ml_service.manageH5(dataset,request.json,model,train_callback) @app.route('/preprocess',methods=['POST']) def returnColumnsInfo(): -- cgit v1.2.3 From edc79f0cff16ce889a0691351d088a61c9d1c353 Mon Sep 17 00:00:00 2001 From: TAMARA JERINIC Date: Tue, 5 Apr 2022 00:57:25 +0200 Subject: Izmena ml_service, dodato učitavanje h5 fajla. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/microservice/api/ml_service.py | 128 ++++++++++++++++++++++++++++++--- 1 file changed, 117 insertions(+), 11 deletions(-) diff --git a/backend/microservice/api/ml_service.py b/backend/microservice/api/ml_service.py index 73b191da..0aed3dc9 100644 --- a/backend/microservice/api/ml_service.py +++ b/backend/microservice/api/ml_service.py @@ -34,21 +34,27 @@ def returnColumnsInfo(dataset): uniquevalues=datafront[kolona].unique() mean=0 median=0 + min=0 + max=0 nullCount=datafront[kolona].isnull().sum() if(nullCount>0): allNullCols=allNullCols+1 frontreturn={'columnName':kolona, 'isNumber':False, 'uniqueValues':uniquevalues.tolist(), - 'median':float(mean), - 'mean':float(median), - 'numNulls':float(nullCount) + 'mean':float(mean), + 'median':float(median), + 'numNulls':float(nullCount), + 'min':min, + 'max':max } dict.append(frontreturn) else: mean=datafront[kolona].mean() median=s.median(datafront[kolona]) nullCount=datafront[kolona].isnull().sum() + min=min(datafront[kolona]) + max=max(datafront[kolona]) if(nullCount>0): allNullCols=allNullCols+1 frontreturn={'columnName':kolona, @@ -56,7 +62,9 @@ def returnColumnsInfo(dataset): 'uniqueValues':[], 'mean':float(mean), 'median':float(median), - 'numNulls':float(nullCount) + 'numNulls':float(nullCount), + 'min':min, + 'max':max } dict.append(frontreturn) NullRows = datafront[datafront.isnull().any(axis=1)] @@ -98,6 +106,8 @@ def train(dataset, params, callback): data = pd.DataFrame() for col in params["inputColumns"]: data[col]=dataset[col] + + print(data.head()) output_column = params["columnToPredict"] data[output_column] = dataset[output_column] # @@ -177,6 +187,8 @@ def train(dataset, params, callback): x_columns.append(col) x = data[x_columns].values y = data[output_column].values + print(x_columns) + print(x) # # Podela na test i trening skupove # @@ -186,7 +198,7 @@ def train(dataset, params, callback): random=123 else: random=0 - x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=test, shuffle=params["shuffle"], random_state=random) + x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.5,random_state=0) # # Skaliranje vrednosti # @@ -212,17 +224,21 @@ def train(dataset, params, callback): batch_size = params["batchSize"] epochs = params["epochs"] inputDim = len(data.columns) - 1 - + ''' classifier=tf.keras.Sequential() - + classifier.add(tf.keras.layers.Dense(units=len(data.columns),input_dim=inputDim))#input layer for f in func:#hidden layers - classifier.add(tf.keras.layers.Dense(units=hidden_layer_neurons,activation=f)) + classifier.add(tf.keras.layers.Dense(hidden_layer_neurons,activation=f)) numberofclasses=len(output_column.unique()) - classifier.add(tf.keras.layers.Dense(units=numberofclasses,activation=output_func))#output layer - + classifier.add(tf.keras.layers.Dense(numberofclasses,activation=output_func))#output layer + ''' + model=tf.keras.Sequential() + model.add(tf.keras.layers.Dense(1,input_dim=x_train.shape[1]))#input layer + model.add(tf.keras.layers.Dense(1, activation='sigmoid')) + model.add(tf.keras.layers.Dense(len(output_column.unique())+1, activation='softmax')) classifier.compile(optimizer=optimizer, loss=loss_func,metrics=metrics) history=classifier.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, callbacks=callback(x_test, y_test)) @@ -348,4 +364,94 @@ def train(dataset, params, callback): # TODO upload trenirani model nazad na backend - #return TrainingResult(metrics) \ No newline at end of file + #return TrainingResult(metrics) + + +def manageH5(datain,params,h5model): + dataset=datain.copy() + problem_type = params["type"] + data = pd.DataFrame() + for col in params["inputColumns"]: + data[col]=dataset[col] + output_column = params["columnToPredict"] + data[output_column] = dataset[output_column] + # + # Brisanje null kolona / redova / zamena + #nullreplace=[ + # {"column":"Embarked","value":"C","deleteRow":false,"deleteCol":true}, + # {"column": "Cabin","value":"C123","deleteRow":"0","deleteCol":"0"}] + + null_value_options = params["nullValues"] + null_values_replacers = params["nullValuesReplacers"] + + if(null_value_options=='replace'): + print("replace null") # TODO + elif(null_value_options=='delete_rows'): + data=data.dropna() + elif(null_value_options=='delete_columns'): + data=data.dropna() + # + #print(data.isnull().any()) + # + # Brisanje kolona koje ne uticu na rezultat + # + num_rows=data.shape[0] + for col in data.columns: + if((data[col].nunique()==(num_rows)) and (data[col].dtype==np.object_)): + data.pop(col) + # + # Enkodiranje + # https://www.analyticsvidhya.com/blog/2020/08/types-of-categorical-data-encoding/ + # + encoding=params["encoding"] + if(encoding=='label'): + encoder=LabelEncoder() + for col in data.columns: + if(data[col].dtype==np.object_): + data[col]=encoder.fit_transform(data[col]) + elif(encoding=='onehot'): + category_columns=[] + for col in data.columns: + if(data[col].dtype==np.object_): + category_columns.append(col) + data=pd.get_dummies(data, columns=category_columns, prefix=category_columns) + elif(encoding=='ordinal'): + encoder = OrdinalEncoder() + for col in data.columns: + if(data[col].dtype==np.object_): + data[col]=encoder.fit_transform(data[col]) + + elif(encoding=='hashing'): + category_columns=[] + for col in data.columns: + if(data[col].dtype==np.object_): + category_columns.append(col) + encoder=ce.HashingEncoder(cols=category_columns, n_components=len(category_columns)) + encoder.fit_transform(data) + elif(encoding=='binary'): + category_columns=[] + for col in data.columns: + if(data[col].dtype==np.object_): + category_columns.append(col) + encoder=ce.BinaryEncoder(cols=category_columns, return_df=True) + encoder.fit_transform(data) + + elif(encoding=='baseN'): + category_columns=[] + for col in data.columns: + if(data[col].dtype==np.object_): + category_columns.append(col) + encoder=ce.BaseNEncoder(cols=category_columns, return_df=True, base=5) + encoder.fit_transform(data) + # + # Input - output + # + x_columns = [] + for col in data.columns: + if(col!=output_column): + x_columns.append(col) + x = data[x_columns].values + y = data[output_column].values + + + y_pred=h5model.predict_classes(x) \ No newline at end of file -- cgit v1.2.3 From fef66d17b52568bb64a8857c3dd8feacfde57297 Mon Sep 17 00:00:00 2001 From: Sonja Galovic Date: Tue, 5 Apr 2022 17:33:43 +0200 Subject: Kompletna reorganizacija komponenti za izbor dataseta pri dodavanju modela. Dodata komponenta add-new-dataset, dataset-load.component izmenjena, preostala add-model da se uskladi. --- .../add-new-dataset/add-new-dataset.component.css | 0 .../add-new-dataset/add-new-dataset.component.html | 41 +++++++ .../add-new-dataset.component.spec.ts | 25 +++++ .../add-new-dataset/add-new-dataset.component.ts | 78 +++++++++++++ .../dataset-load/dataset-load.component.css | 15 ++- .../dataset-load/dataset-load.component.html | 70 ++++++------ .../dataset-load/dataset-load.component.ts | 121 ++++++++++++--------- .../app/_pages/add-model/add-model.component.css | 9 +- .../app/_pages/add-model/add-model.component.html | 39 +------ .../app/_pages/add-model/add-model.component.ts | 62 +++++------ frontend/src/app/app.module.ts | 4 +- 11 files changed, 300 insertions(+), 164 deletions(-) create mode 100644 frontend/src/app/_elements/add-new-dataset/add-new-dataset.component.css create mode 100644 frontend/src/app/_elements/add-new-dataset/add-new-dataset.component.html create mode 100644 frontend/src/app/_elements/add-new-dataset/add-new-dataset.component.spec.ts create mode 100644 frontend/src/app/_elements/add-new-dataset/add-new-dataset.component.ts diff --git a/frontend/src/app/_elements/add-new-dataset/add-new-dataset.component.css b/frontend/src/app/_elements/add-new-dataset/add-new-dataset.component.css new file mode 100644 index 00000000..e69de29b diff --git a/frontend/src/app/_elements/add-new-dataset/add-new-dataset.component.html b/frontend/src/app/_elements/add-new-dataset/add-new-dataset.component.html new file mode 100644 index 00000000..dfeb4f62 --- /dev/null +++ b/frontend/src/app/_elements/add-new-dataset/add-new-dataset.component.html @@ -0,0 +1,41 @@ +
+
+
+
+ + + + +
+ +
+ + + + +
+
+
+
+ + + + + + + +
+ +
+
\ No newline at end of file diff --git a/frontend/src/app/_elements/add-new-dataset/add-new-dataset.component.spec.ts b/frontend/src/app/_elements/add-new-dataset/add-new-dataset.component.spec.ts new file mode 100644 index 00000000..a9ea25b4 --- /dev/null +++ b/frontend/src/app/_elements/add-new-dataset/add-new-dataset.component.spec.ts @@ -0,0 +1,25 @@ +import { ComponentFixture, TestBed } from '@angular/core/testing'; + +import { AddNewDatasetComponent } from './add-new-dataset.component'; + +describe('AddNewDatasetComponent', () => { + let component: AddNewDatasetComponent; + let fixture: ComponentFixture; + + beforeEach(async () => { + await TestBed.configureTestingModule({ + declarations: [ AddNewDatasetComponent ] + }) + .compileComponents(); + }); + + beforeEach(() => { + fixture = TestBed.createComponent(AddNewDatasetComponent); + component = fixture.componentInstance; + fixture.detectChanges(); + }); + + it('should create', () => { + expect(component).toBeTruthy(); + }); +}); diff --git a/frontend/src/app/_elements/add-new-dataset/add-new-dataset.component.ts b/frontend/src/app/_elements/add-new-dataset/add-new-dataset.component.ts new file mode 100644 index 00000000..fceb53cf --- /dev/null +++ b/frontend/src/app/_elements/add-new-dataset/add-new-dataset.component.ts @@ -0,0 +1,78 @@ +import { Component, EventEmitter, Output, ViewChild } from '@angular/core'; +import { NgxCsvParser, NgxCSVParserError } from 'ngx-csv-parser'; +import Dataset from 'src/app/_data/Dataset'; + +@Component({ + selector: 'app-add-new-dataset', + templateUrl: './add-new-dataset.component.html', + styleUrls: ['./add-new-dataset.component.css'] +}) +export class AddNewDatasetComponent { + + @Output() loaded = new EventEmitter(); + + delimiterOptions: Array = [",", ";", "\t", "razmak", "|"]; //podrazumevano "," + + //hasHeader: boolean = true; + hasInput: boolean = false; + + csvRecords: any[] = []; + files: File[] = []; + rowsNumber: number = 0; + colsNumber: number = 0; + + dataset: Dataset; //dodaj ! potencijalno + + constructor(private ngxCsvParser: NgxCsvParser) { + this.dataset = new Dataset(); + } + + //@ViewChild('fileImportInput', { static: false }) fileImportInput: any; cemu je ovo sluzilo? + + changeListener($event: any): void { + this.files = $event.srcElement.files; + if (this.files.length == 0 || this.files[0] == null) { + //console.log("NEMA FAJLA"); + //this.loaded.emit("not loaded"); + this.hasInput = false; + return; + } + else + this.hasInput = true; + + this.update(); + } + + update() { + + if (this.files.length < 1) + return; + + this.ngxCsvParser.parse(this.files[0], { header: false, delimiter: (this.dataset.delimiter == "razmak") ? " " : (this.dataset.delimiter == "") ? "," : this.dataset.delimiter }) + .pipe().subscribe((result) => { + + console.log('Result', result); + if (result.constructor === Array) { + this.csvRecords = result; + if (this.dataset.hasHeader) + this.rowsNumber = this.csvRecords.length - 1; + else + this.rowsNumber = this.csvRecords.length; + this.colsNumber = this.csvRecords[0].length; + + if (this.dataset.hasHeader) //kasnije dodati opciju kada nema header da korisnik rucno unosi header-e + this.dataset.header = this.csvRecords[0]; + + this.loaded.emit("loaded"); + } + }, (error: NgxCSVParserError) => { + console.log('Error', error); + }); + } + + checkAccessible() { + if (this.dataset.isPublic) + this.dataset.accessibleByLink = true; + } + +} diff --git a/frontend/src/app/_elements/dataset-load/dataset-load.component.css b/frontend/src/app/_elements/dataset-load/dataset-load.component.css index 05819702..54e0738e 100644 --- a/frontend/src/app/_elements/dataset-load/dataset-load.component.css +++ b/frontend/src/app/_elements/dataset-load/dataset-load.component.css @@ -1,6 +1,13 @@ -#divInputs { - margin-left: 20px; +.btnType1 { + background-color: #003459; + color: white; } -#divOutputs { - margin-left: 20px; +.btnType2 { + background-color: white; + color: #003459; + border-color: #003459; +} +.selectedDatasetClass { + /*border-color: 2px solid #003459;*/ + background-color: lightblue; } \ No newline at end of file diff --git a/frontend/src/app/_elements/dataset-load/dataset-load.component.html b/frontend/src/app/_elements/dataset-load/dataset-load.component.html index 76e46092..674e5990 100644 --- a/frontend/src/app/_elements/dataset-load/dataset-load.component.html +++ b/frontend/src/app/_elements/dataset-load/dataset-load.component.html @@ -1,44 +1,42 @@
-
-
-
-
- - - - -
- -
+ - - - -
+
+

Izvor podataka:

-
- - - - - + +

ili

+ +
+
+ +
+
+
+
    +
  • + +
  • +
+
+
- -
- -
+ + +
+
+
\ No newline at end of file diff --git a/frontend/src/app/_elements/dataset-load/dataset-load.component.ts b/frontend/src/app/_elements/dataset-load/dataset-load.component.ts index f9343117..ed71dc3c 100644 --- a/frontend/src/app/_elements/dataset-load/dataset-load.component.ts +++ b/frontend/src/app/_elements/dataset-load/dataset-load.component.ts @@ -1,6 +1,12 @@ -import { Component, EventEmitter, Output, ViewChild } from '@angular/core'; -import { NgxCsvParser, NgxCSVParserError } from 'ngx-csv-parser'; +import { Component, OnInit, ViewChild } from '@angular/core'; +import { AddNewDatasetComponent } from '../add-new-dataset/add-new-dataset.component'; +import { ModelsService } from 'src/app/_services/models.service'; +import shared from 'src/app/Shared'; import Dataset from 'src/app/_data/Dataset'; +import { DatatableComponent } from 'src/app/_elements/datatable/datatable.component'; +import { DatasetsService } from 'src/app/_services/datasets.service'; +import { CsvParseService } from 'src/app/_services/csv-parse.service'; +import { Output, EventEmitter } from '@angular/core'; @Component({ selector: 'app-dataset-load', @@ -9,70 +15,77 @@ import Dataset from 'src/app/_data/Dataset'; }) export class DatasetLoadComponent { - @Output() loaded = new EventEmitter(); + @Output() selectedDatasetChangeEvent = new EventEmitter(); - delimiterOptions: Array = [",", ";", "\t", "razmak", "|"]; //podrazumevano "," + @ViewChild(AddNewDatasetComponent) addNewDatasetComponent?: AddNewDatasetComponent; + @ViewChild(AddNewDatasetComponent) datatable?: DatatableComponent; + datasetLoaded: boolean = false; + selectedDatasetLoaded: boolean = false; - //hasHeader: boolean = true; - hasInput: boolean = false; + showMyDatasets: boolean = true; + myDatasets?: Dataset[]; + existingDatasetSelected: boolean = false; + selectedDataset?: Dataset; + otherDataset?: Dataset; + otherDatasetFile?: any[]; + datasetFile?: any[]; + datasetHasHeader?: boolean = true; - csvRecords: any[] = []; - files: File[] = []; - rowsNumber: number = 0; - colsNumber: number = 0; + term: string = ""; - dataset: Dataset; //dodaj ! potencijalno - - constructor(private ngxCsvParser: NgxCsvParser) { - this.dataset = new Dataset(); + constructor(private models: ModelsService, private datasets: DatasetsService, private csv: CsvParseService) { + this.datasets.getMyDatasets().subscribe((datasets) => { + this.myDatasets = datasets; + }); } - @ViewChild('fileImportInput', { static: false }) fileImportInput: any; - - changeListener($event: any): void { - this.files = $event.srcElement.files; - if (this.files.length == 0 || this.files[0] == null) { - //console.log("NEMA FAJLA"); - //this.loaded.emit("not loaded"); - this.hasInput = false; - return; - } - else - this.hasInput = true; - - this.update(); + viewMyDatasetsForm() { + this.showMyDatasets = true; + this.resetSelectedDataset(); + //this.resetCbsAndRbs(); //TREBA DA SE DESI + } + viewNewDatasetForm() { + this.showMyDatasets = false; + this.resetSelectedDataset(); + //this.resetCbsAndRbs(); //TREBA DA SE DESI } - update() { - - if (this.files.length < 1) - return; - - this.ngxCsvParser.parse(this.files[0], { header: false, delimiter: (this.dataset.delimiter == "razmak") ? " " : (this.dataset.delimiter == "") ? "," : this.dataset.delimiter }) - .pipe().subscribe((result) => { - - console.log('Result', result); - if (result.constructor === Array) { - this.csvRecords = result; - if (this.dataset.hasHeader) - this.rowsNumber = this.csvRecords.length - 1; + selectThisDataset(dataset: Dataset) { + this.selectedDataset = dataset; + this.selectedDatasetLoaded = false; + this.existingDatasetSelected = true; + this.datasetHasHeader = this.selectedDataset.hasHeader; + + this.datasets.getDatasetFile(dataset.fileId).subscribe((file: string | undefined) => { + if (file) { + this.datasetFile = this.csv.csvToArray(file, (dataset.delimiter == "razmak") ? " " : (dataset.delimiter == "") ? "," : dataset.delimiter); + /*for (let i = this.datasetFile.length - 1; i >= 0; i--) { //moguce da je vise redova na kraju fajla prazno i sl. + if (this.datasetFile[i].length != this.datasetFile[0].length) + this.datasetFile[i].pop(); else - this.rowsNumber = this.csvRecords.length; - this.colsNumber = this.csvRecords[0].length; + break; //nema potrebe dalje + }*/ + //console.log(this.datasetFile); + //this.resetCbsAndRbs(); //TREBA DA SE DESI + //this.refreshThreeNullValueRadioOptions(); //TREBA DA SE DESI + this.selectedDatasetLoaded = true; + //this.scrollToNextForm(); + } + }); + } - if (this.dataset.hasHeader) //kasnije dodati opciju kada nema header da korisnik rucno unosi header-e - this.dataset.header = this.csvRecords[0]; + resetSelectedDataset(): boolean { + const temp = this.selectedDataset; + this.selectedDataset = this.otherDataset; + this.otherDataset = temp; + const tempFile = this.datasetFile; + this.datasetFile = this.otherDatasetFile; + this.otherDatasetFile = tempFile; - this.loaded.emit("loaded"); - } - }, (error: NgxCSVParserError) => { - console.log('Error', error); - }); - } + this.selectedDatasetChangeEvent.emit(this.selectedDataset); - checkAccessible() { - if (this.dataset.isPublic) - this.dataset.accessibleByLink = true; + return true; } + } diff --git a/frontend/src/app/_pages/add-model/add-model.component.css b/frontend/src/app/_pages/add-model/add-model.component.css index 6d961287..7f05af0f 100644 --- a/frontend/src/app/_pages/add-model/add-model.component.css +++ b/frontend/src/app/_pages/add-model/add-model.component.css @@ -32,4 +32,11 @@ } ul li:hover { background-color: lightblue; -} \ No newline at end of file +} + +#divInputs { + margin-left: 20px; +} +#divOutputs { + margin-left: 20px; +} diff --git a/frontend/src/app/_pages/add-model/add-model.component.html b/frontend/src/app/_pages/add-model/add-model.component.html index 97b35b7a..9d727236 100644 --- a/frontend/src/app/_pages/add-model/add-model.component.html +++ b/frontend/src/app/_pages/add-model/add-model.component.html @@ -26,48 +26,15 @@
-
-

Izvor podataka:

-
-
- -

ili

- -
-
- -
-
-
-
    -
  • - -
  • -
-
-
+ + - - -
- -
+ *ngIf="selectedDataset && ((showMyDatasets) || (!showMyDatasets))">
diff --git a/frontend/src/app/_pages/add-model/add-model.component.ts b/frontend/src/app/_pages/add-model/add-model.component.ts index 945a58b5..192fc6ff 100644 --- a/frontend/src/app/_pages/add-model/add-model.component.ts +++ b/frontend/src/app/_pages/add-model/add-model.component.ts @@ -5,9 +5,7 @@ import { DatasetLoadComponent } from 'src/app/_elements/dataset-load/dataset-loa import { ModelsService } from 'src/app/_services/models.service'; import shared from 'src/app/Shared'; import Dataset from 'src/app/_data/Dataset'; -import { DatatableComponent } from 'src/app/_elements/datatable/datatable.component'; import { DatasetsService } from 'src/app/_services/datasets.service'; -import { NgxCsvParser } from 'ngx-csv-parser'; import { CsvParseService } from 'src/app/_services/csv-parse.service'; @@ -18,11 +16,6 @@ import { CsvParseService } from 'src/app/_services/csv-parse.service'; }) export class AddModelComponent implements OnInit { - @ViewChild(DatasetLoadComponent) datasetLoadComponent?: DatasetLoadComponent; - @ViewChild(DatatableComponent) datatable?: DatatableComponent; - datasetLoaded: boolean = false; - selectedDatasetLoaded: boolean = false; - newModel: Model; ProblemType = ProblemType; @@ -71,8 +64,13 @@ export class AddModelComponent implements OnInit { (document.getElementById("btnMyDataset")).focus(); } + datasetHasChanged(selectedDataset: Dataset) { + this.selectedDataset = selectedDataset; + this.resetCbsAndRbs(); + this.refreshThreeNullValueRadioOptions(); + } - viewMyDatasetsForm() { + /*viewMyDatasetsForm() { this.showMyDatasets = true; this.resetSelectedDataset(); //this.datasetLoaded = false; @@ -82,7 +80,7 @@ export class AddModelComponent implements OnInit { this.showMyDatasets = false; this.resetSelectedDataset(); this.resetCbsAndRbs(); - } + }*/ addModel() { if (!this.showMyDatasets) @@ -117,20 +115,20 @@ export class AddModelComponent implements OnInit { if (this.validationInputsOutput()) { console.log('ADD MODEL: STEP 1 - UPLOAD FILE'); - if (this.datasetLoadComponent) { - console.log("this.datasetLoadComponent.files:", this.datasetLoadComponent.files); - this.models.uploadData(this.datasetLoadComponent.files[0]).subscribe((file) => { + if (this.selectedDataset) { + //console.log("this.datasetLoadComponent.files:", this.datasetLoadComponent.files); + /*this.models.uploadData(this.datasetLoadComponent.files[0]).subscribe((file) => { ZAKOMENTARISANO ZBOG KOMPAJLERSKE GRESKE TOKOM REORGANIZACIJE console.log('ADD MODEL: STEP 2 - ADD DATASET WITH FILE ID ' + file._id); - if (this.datasetLoadComponent) { - this.datasetLoadComponent.dataset.fileId = file._id; - this.datasetLoadComponent.dataset.username = shared.username; + if (this.selectedDataset) { + this.selectedDataset!.fileId = file._id; + this.selectedDataset!.username = shared.username; - this.datasets.addDataset(this.datasetLoadComponent.dataset).subscribe((dataset) => { + this.datasets.addDataset(this.selectedDataset!).subscribe((dataset) => { console.log('ADD MODEL: STEP 3 - ADD MODEL WITH DATASET ID ', dataset._id); this.newModel.datasetId = dataset._id; //da se doda taj dataset u listu postojecih, da bude izabran - this.refreshMyDatasetList(); + this.refreshMyDatasetList(); MORA OVO this.showMyDatasets = true; this.selectThisDataset(dataset); @@ -151,7 +149,7 @@ export class AddModelComponent implements OnInit { } //kraj treceg ifa }, (error) => { - }); //kraj uploadData subscribe + }); //kraj uploadData subscribe*/ } //kraj drugog ifa } //kraj prvog ifa @@ -234,30 +232,30 @@ export class AddModelComponent implements OnInit { return true; } - selectThisDataset(dataset: Dataset) { + /*selectThisDataset(dataset: Dataset) { this.selectedDataset = dataset; - this.selectedDatasetLoaded = false; + //this.selectedDatasetLoaded = false; this.existingDatasetSelected = true; this.datasetHasHeader = this.selectedDataset.hasHeader; this.datasets.getDatasetFile(dataset.fileId).subscribe((file: string | undefined) => { if (file) { this.datasetFile = this.csv.csvToArray(file, (dataset.delimiter == "razmak") ? " " : (dataset.delimiter == "") ? "," : dataset.delimiter); - /*for (let i = this.datasetFile.length - 1; i >= 0; i--) { //moguce da je vise redova na kraju fajla prazno i sl. - if (this.datasetFile[i].length != this.datasetFile[0].length) - this.datasetFile[i].pop(); - else - break; //nema potrebe dalje - }*/ + //for (let i = this.datasetFile.length - 1; i >= 0; i--) { //moguce da je vise redova na kraju fajla prazno i sl. + //if (this.datasetFile[i].length != this.datasetFile[0].length) + //this.datasetFile[i].pop(); + //else + // break; //nema potrebe dalje + //} //console.log(this.datasetFile); this.resetCbsAndRbs(); this.refreshThreeNullValueRadioOptions(); - this.selectedDatasetLoaded = true; + //this.selectedDatasetLoaded = true; this.scrollToNextForm(); } }); //this.datasetHasHeader = false; - } + }*/ scrollToNextForm() { (document.getElementById("selectInAndOuts")).scrollIntoView({ @@ -267,7 +265,7 @@ export class AddModelComponent implements OnInit { }); } - resetSelectedDataset(): boolean { + /*resetSelectedDataset(): boolean { const temp = this.selectedDataset; this.selectedDataset = this.otherDataset; this.otherDataset = temp; @@ -275,7 +273,7 @@ export class AddModelComponent implements OnInit { this.datasetFile = this.otherDatasetFile; this.otherDatasetFile = tempFile; return true; - } + }*/ resetCbsAndRbs(): boolean { this.uncheckRbs(); this.checkAllCbs(); @@ -345,7 +343,7 @@ export class AddModelComponent implements OnInit { let colIndex = this.findColIndexByName(colName); let sumOfNulls = 0; - let startValue = (this.datasetLoadComponent?.dataset.hasHeader) ? 1 : 0; + let startValue = (this.selectedDataset!.hasHeader) ? 1 : 0; for (let i = startValue; i < this.datasetFile.length; i++) { if (this.datasetFile[i][colIndex] == "" || this.datasetFile[i][colIndex] == undefined) ++sumOfNulls; @@ -360,7 +358,7 @@ export class AddModelComponent implements OnInit { let sum = 0; let n = 0; - let startValue = (this.datasetLoadComponent?.dataset.hasHeader) ? 1 : 0; + let startValue = (this.selectedDataset!.hasHeader) ? 1 : 0; for (let i = startValue; i < this.datasetFile.length; i++) if (this.datasetFile[i][colIndex] != '') { sum += Number(this.datasetFile[i][colIndex]); diff --git a/frontend/src/app/app.module.ts b/frontend/src/app/app.module.ts index 8098df40..b9ad524f 100644 --- a/frontend/src/app/app.module.ts +++ b/frontend/src/app/app.module.ts @@ -40,6 +40,7 @@ import { ReactiveBackgroundComponent } from './_elements/reactive-background/rea import { ItemModelComponent } from './_elements/item-model/item-model.component'; import { AnnvisualComponent } from './_elements/annvisual/annvisual.component'; import { AlertDialogComponent } from './_modals/alert-dialog/alert-dialog.component'; +import { AddNewDatasetComponent } from './_elements/add-new-dataset/add-new-dataset.component'; @NgModule({ declarations: [ @@ -69,7 +70,8 @@ import { AlertDialogComponent } from './_modals/alert-dialog/alert-dialog.compon ReactiveBackgroundComponent, ItemModelComponent, AnnvisualComponent, - AlertDialogComponent + AlertDialogComponent, + AddNewDatasetComponent ], imports: [ BrowserModule, -- cgit v1.2.3 From 4dbaa82b1448a2cf7b69246ae20ebaae36d03f9b Mon Sep 17 00:00:00 2001 From: TAMARA JERINIC Date: Tue, 5 Apr 2022 22:15:30 +0200 Subject: Dodat je novi ml service fajl, uklonjeni su bagovi iz prethodnog. Izmenjen je i controller.py fajl --- backend/microservice/api/controller.py | 10 +- backend/microservice/api/newmlservice.py | 424 +++++++++++++++++++++++++++++++ 2 files changed, 430 insertions(+), 4 deletions(-) create mode 100644 backend/microservice/api/newmlservice.py diff --git a/backend/microservice/api/controller.py b/backend/microservice/api/controller.py index bbba4af9..1b17f727 100644 --- a/backend/microservice/api/controller.py +++ b/backend/microservice/api/controller.py @@ -1,7 +1,7 @@ import flask from flask import request, jsonify import ml_socket -import ml_service +import newmlservice import tensorflow as tf import pandas as pd @@ -25,7 +25,7 @@ def train(): f = request.json["dataset"] dataset = pd.read_csv(f) # - result = ml_service.train(dataset, request.json["model"], train_callback) + result = newmlservice.train(dataset, request.json["model"], train_callback) print(result) return jsonify(result) @@ -35,14 +35,16 @@ def predict(): dataset = pd.read_csv(f) m = request.json['modelpath'] model = tf.keras.models.load_model(m) - h5=ml_service.manageH5(dataset,request.json,model,train_callback) + print("********************************model loaded*******************************") + newmlservice.manageH5(dataset,request.json['model'],model) + return "done" @app.route('/preprocess',methods=['POST']) def returnColumnsInfo(): f=request.json['filepathcolinfo'] dataset=pd.read_csv(f) - result=ml_service.returnColumnsInfo(dataset) + result=newmlservice.returnColumnsInfo(dataset) return jsonify(result) diff --git a/backend/microservice/api/newmlservice.py b/backend/microservice/api/newmlservice.py new file mode 100644 index 00000000..50af15f8 --- /dev/null +++ b/backend/microservice/api/newmlservice.py @@ -0,0 +1,424 @@ +from enum import unique +from itertools import count +import pandas as pd +from sklearn import datasets, multiclass +import tensorflow as tf +import keras +import numpy as np +import csv +import json +import h5py +import sklearn.metrics as sm +from statistics import mode +from typing_extensions import Self +from copyreg import constructor +from flask import request, jsonify, render_template +from sklearn.preprocessing import LabelEncoder, MinMaxScaler +from sklearn.preprocessing import OrdinalEncoder +import category_encoders as ce +from sklearn.preprocessing import StandardScaler +from sklearn.model_selection import train_test_split +from dataclasses import dataclass +import statistics as s +from sklearn.metrics import roc_auc_score +from ann_visualizer.visualize import ann_viz; +def returnColumnsInfo(dataset): + dict=[] + datafront=dataset.copy() + svekolone=datafront.columns + kategorijskekolone=datafront.select_dtypes(include=['object']).columns + allNullCols=0 + for kolona in svekolone: + if(kolona in kategorijskekolone): + uniquevalues=datafront[kolona].unique() + mean=0 + median=0 + nullCount=datafront[kolona].isnull().sum() + if(nullCount>0): + allNullCols=allNullCols+1 + frontreturn={'columnName':kolona, + 'isNumber':False, + 'uniqueValues':uniquevalues.tolist(), + 'median':float(mean), + 'mean':float(median), + 'numNulls':float(nullCount) + } + dict.append(frontreturn) + else: + mean=datafront[kolona].mean() + median=s.median(datafront[kolona]) + nullCount=datafront[kolona].isnull().sum() + if(nullCount>0): + allNullCols=allNullCols+1 + frontreturn={'columnName':kolona, + 'isNumber':1, + 'uniqueValues':[], + 'mean':float(mean), + 'median':float(median), + 'numNulls':float(nullCount) + } + dict.append(frontreturn) + NullRows = datafront[datafront.isnull().any(axis=1)] + #print(NullRows) + #print(len(NullRows)) + allNullRows=len(NullRows) + + return {'columnInfo':dict,'allNullColl':allNullCols,'allNullRows':allNullRows} + +@dataclass +class TrainingResultClassification: + accuracy: float + precision: float + recall: float + tn: float + fp: float + fn: float + tp: float + specificity: float + f1: float + logloss: float + fpr: float + tpr: float + metrics: dict +''' +@datasets +class TrainingResultRegression: + mse: float + mae: float + mape: float + rmse: float + +@dataclass +class TrainingResult: + metrics: dict +''' + +def train(dataset, params, callback): + problem_type = params["type"] + print(problem_type) + data = pd.DataFrame() + print(data) + for col in params["inputColumns"]: + print(col) + data[col]=dataset[col] + output_column = params["columnToPredict"] + data[output_column] = dataset[output_column] + print(data) + + ###NULL + null_value_options = params["nullValues"] + null_values_replacers = params["nullValuesReplacers"] + + if(null_value_options=='replace'): + print("replace null") # TODO + elif(null_value_options=='delete_rows'): + data=data.dropna() + elif(null_value_options=='delete_columns'): + data=data.dropna() + print(data.shape) + + # + # Brisanje kolona koje ne uticu na rezultat + # + num_rows=data.shape[0] + for col in data.columns: + if((data[col].nunique()==(num_rows)) and (data[col].dtype==np.object_)): + data.pop(col) + # + ### Enkodiranje + encoding=params["encoding"] + if(encoding=='label'): + encoder=LabelEncoder() + for col in data.columns: + if(data[col].dtype==np.object_): + data[col]=encoder.fit_transform(data[col]) + + + elif(encoding=='onehot'): + category_columns=[] + for col in data.columns: + if(data[col].dtype==np.object_): + category_columns.append(col) + data=pd.get_dummies(data, columns=category_columns, prefix=category_columns) + + elif(encoding=='ordinal'): + encoder = OrdinalEncoder() + for col in data.columns: + if(data[col].dtype==np.object_): + data[col]=encoder.fit_transform(data[col]) + + elif(encoding=='hashing'): + category_columns=[] + for col in data.columns: + if(data[col].dtype==np.object_): + category_columns.append(col) + encoder=ce.HashingEncoder(cols=category_columns, n_components=len(category_columns)) + encoder.fit_transform(data) + elif(encoding=='binary'): + category_columns=[] + for col in data.columns: + if(data[col].dtype==np.object_): + category_columns.append(col) + encoder=ce.BinaryEncoder(cols=category_columns, return_df=True) + encoder.fit_transform(data) + + elif(encoding=='baseN'): + category_columns=[] + for col in data.columns: + if(data[col].dtype==np.object_): + category_columns.append(col) + encoder=ce.BaseNEncoder(cols=category_columns, return_df=True, base=5) + encoder.fit_transform(data) + # + # Input - output + # + x_columns = [] + for col in data.columns: + if(col!=output_column): + x_columns.append(col) + print(x_columns) + x = data[x_columns].values + y = data[output_column].values + + # + # Podela na test i trening skupove + # + test=params["randomTestSetDistribution"] + randomOrder = params["randomOrder"] + if(randomOrder): + random=123 + else: + random=0 + x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=test, random_state=random) + print(x_train,x_test) + + # + # Treniranje modela + # + # + if(problem_type=='multi-klasifikacioni'): + #print('multi') + classifier=tf.keras.Sequential() + + classifier.add(tf.keras.layers.Dense(units=params['hiddenLayerNeurons'], activation=params['hiddenLayerActivationFunctions'][0],input_dim=x_train.shape[1]))#prvi skriveni + definisanje prethodnog-ulaznog + for i in range(params['hiddenLayers']-1):#ako postoji vise od jednog skrivenog sloja + #print(i) + classifier.add(tf.keras.layers.Dense(units=params['hiddenLayerNeurons'], activation=params['hiddenLayerActivationFunctions'][i+1]))#i-ti skriveni sloj + classifier.add(tf.keras.layers.Dense(units=5, activation=params['outputLayerActivationFunction']))#izlazni sloj + + classifier.compile(loss =params["lossFunction"] , optimizer = params['optimizer'] , metrics =params['metrics']) + + history=classifier.fit(x_train, y_train, epochs = params['epochs'],batch_size=params['batchSize']) + + y_pred=classifier.predict(x_test) + y_pred=np.argmax(y_pred,axis=1) + #print(y_pred.flatten()) + #print(y_test) + scores = classifier.evaluate(x_test, y_test) + print("\n%s: %.2f%%" % (classifier.metrics_names[1], scores[1]*100)) + classifier.save("temp/"+params['name'], save_format='h5') + #vizuelizacija u python-u + #from ann_visualizer.visualize import ann_viz; + #ann_viz(classifier, title="My neural network") + + elif(problem_type=='binarni-klasifikacioni'): + #print('*************************************************************************binarni') + classifier=tf.keras.Sequential() + + classifier.add(tf.keras.layers.Dense(units=params['hiddenLayerNeurons'], activation=params['hiddenLayerActivationFunctions'][0],input_dim=x_train.shape[1]))#prvi skriveni + definisanje prethodnog-ulaznog + for i in range(params['hiddenLayers']-1):#ako postoji vise od jednog skrivenog sloja + #print(i) + classifier.add(tf.keras.layers.Dense(units=params['hiddenLayerNeurons'], activation=params['hiddenLayerActivationFunctions'][i+1]))#i-ti skriveni sloj + classifier.add(tf.keras.layers.Dense(units=1, activation=params['outputLayerActivationFunction']))#izlazni sloj + + classifier.compile(loss =params["lossFunction"] , optimizer = params['optimizer'] , metrics =params['metrics']) + + history=classifier.fit(x_train, y_train, epochs = params['epochs'],batch_size=params['batchSize']) + + y_pred=classifier.predict(x_test) + y_pred=(y_pred>=0.5).astype('int') + + print(y_pred.flatten()) + print(y_test) + + scores = classifier.evaluate(x_test, y_test) + print("\n%s: %.2f%%" % (classifier.metrics_names[1], scores[1]*100)) + #ann_viz(classifier, title="My neural network") + + classifier.save("temp/"+params['name'], save_format='h5') + + elif(problem_type=='regresioni'): + classifier=tf.keras.Sequential() + + classifier.add(tf.keras.layers.Dense(units=params['hiddenLayerNeurons'], activation=params['hiddenLayerActivationFunctions'][0],input_dim=x_train.shape[1]))#prvi skriveni + definisanje prethodnog-ulaznog + for i in range(params['hiddenLayers']-1):#ako postoji vise od jednog skrivenog sloja + #print(i) + classifier.add(tf.keras.layers.Dense(units=params['hiddenLayerNeurons'], activation=params['hiddenLayerActivationFunctions'][i+1]))#i-ti skriveni sloj + classifier.add(tf.keras.layers.Dense(units=1)) + + classifier.compile(loss =params["lossFunction"] , optimizer = params['optimizer'] , metrics =params['metrics']) + + history=classifier.fit(x_train, y_train, epochs = params['epochs'],batch_size=params['batchSize']) + y_pred=classifier.predict(x_test) + print(classifier.evaluate(x_test, y_test)) + + def roc_auc_score_multiclass(actual_class, pred_class, average = "macro"): + + #creating a set of all the unique classes using the actual class list + unique_class = set(actual_class) + roc_auc_dict = {} + for per_class in unique_class: + + #creating a list of all the classes except the current class + other_class = [x for x in unique_class if x != per_class] + + #marking the current class as 1 and all other classes as 0 + new_actual_class = [0 if x in other_class else 1 for x in actual_class] + new_pred_class = [0 if x in other_class else 1 for x in pred_class] + + #using the sklearn metrics method to calculate the roc_auc_score + roc_auc = roc_auc_score(new_actual_class, new_pred_class, average = average) + roc_auc_dict[per_class] = roc_auc + + return roc_auc_dict + # + # Metrike + # + + if(problem_type=="binarni-klasifikacioni"): + accuracy = float(sm.accuracy_score(y_test,y_pred)) + precision = float(sm.precision_score(y_test,y_pred)) + recall = float(sm.recall_score(y_test,y_pred)) + tn, fp, fn, tp = sm.confusion_matrix(y_test,y_pred).ravel() + specificity = float(tn / (tn+fp)) + f1 = float(sm.f1_score(y_test,y_pred)) + fpr, tpr, _ = sm.roc_curve(y_test,y_pred) + logloss = float(sm.log_loss(y_test, y_pred)) + metrics= {"accuracy" : accuracy, + "precision" : precision, + "recall" : recall, + "specificity" : specificity, + "f1" : f1, + "tn" : float(tn), + "fp" : float(fp), + "fn" : float(fn), + "tp" : float(tp), + "fpr" : fpr.tolist(), + "tpr" : tpr.tolist(), + "logloss" : logloss + } + elif(problem_type=="regresioni"): + # https://www.analyticsvidhya.com/blog/2021/05/know-the-best-evaluation-metrics-for-your-regression-model/ + mse = float(sm.mean_squared_error(y_test,y_pred)) + mae = float(sm.mean_absolute_error(y_test,y_pred)) + mape = float(sm.mean_absolute_percentage_error(y_test,y_pred)) + rmse = float(np.sqrt(sm.mean_squared_error(y_test,y_pred))) + rmsle = float(np.sqrt(sm.mean_squared_error(y_test, y_pred))) + r2 = float(sm.r2_score(y_test, y_pred)) + # n - num of observations + # k - num of independent variables + n = 40 + k = 2 + adj_r2 = float(1 - ((1-r2)*(n-1)/(n-k-1))) + metrics= {"mse" : mse, + "mae" : mae, + "mape" : mape, + "rmse" : rmse, + "rmsle" : rmsle, + "r2" : r2, + "adj_r2" : adj_r2 + } + ''' + elif(problem_type=="multi-klasifikacioni"): + + cr=sm.classification_report(y_test, y_pred) + cm=sm.confusion_matrix(y_test,y_pred) + # https://www.kaggle.com/code/nkitgupta/evaluation-metrics-for-multi-class-classification/notebook + accuracy=metrics.accuracy_score(y_test, y_pred) + macro_averaged_precision=metrics.precision_score(y_test, y_pred, average = 'macro') + micro_averaged_precision=metrics.precision_score(y_test, y_pred, average = 'micro') + macro_averaged_recall=metrics.recall_score(y_test, y_pred, average = 'macro') + micro_averaged_recall=metrics.recall_score(y_test, y_pred, average = 'micro') + macro_averaged_f1=metrics.f1_score(y_test, y_pred, average = 'macro') + micro_averaged_f1=metrics.f1_score(y_test, y_pred, average = 'micro') + roc_auc_dict=roc_auc_score_multiclass(y_test, y_pred) + ''' + +def manageH5(dataset,params,h5model): + problem_type = params["type"] + print(problem_type) + data = pd.DataFrame() + #print(data) + for col in params["inputColumns"]: + print(col) + data[col]=dataset[col] + output_column = params["columnToPredict"] + data[output_column] = dataset[output_column] + #print(data) + + ###NULL + null_value_options = params["nullValues"] + null_values_replacers = params["nullValuesReplacers"] + + if(null_value_options=='replace'): + print("replace null") # TODO + elif(null_value_options=='delete_rows'): + data=data.dropna() + elif(null_value_options=='delete_columns'): + data=data.dropna() + print(data.shape) + + # + # Brisanje kolona koje ne uticu na rezultat + # + num_rows=data.shape[0] + for col in data.columns: + if((data[col].nunique()==(num_rows)) and (data[col].dtype==np.object_)): + data.pop(col) + # + ### Enkodiranje + encoding=params["encoding"] + if(encoding=='label'): + encoder=LabelEncoder() + for col in data.columns: + if(data[col].dtype==np.object_): + data[col]=encoder.fit_transform(data[col]) + + + elif(encoding=='onehot'): + category_columns=[] + for col in data.columns: + if(data[col].dtype==np.object_): + category_columns.append(col) + data=pd.get_dummies(data, columns=category_columns, prefix=category_columns) + #print(data) + + # + # Input - output + # + x_columns = [] + for col in data.columns: + if(col!=output_column): + x_columns.append(col) + #print(x_columns) + x2 = data[x_columns] + print(x2) + print(x2.values) + x2 = data[x_columns].values + print(x2) + y2 = data[output_column].values + h5model.summary() + ann_viz(h5model, title="My neural network") + + h5model.compile(loss=params['lossFunction'], optimizer=params['optimizer'], metrics=params['metrics']) + + history=h5model.fit(x2, y2, epochs = params['epochs'],batch_size=params['batchSize']) + + y_pred2=h5model.predict(x2) + + y_pred2=np.argmax(y_pred2,axis=1) + #y_pred=h5model.predict_classes(x) + score = h5model.evaluate(x2,y_pred2, verbose=0) + print("%s: %.2f%%" % (h5model.metrics_names[1], score[1]*100)) + print(y_pred2) + print( 'done') \ No newline at end of file -- cgit v1.2.3