From 5790ed5445648bcd596c28cd92717c666e6c7485 Mon Sep 17 00:00:00 2001 From: Nevena Bojovic Date: Wed, 9 Mar 2022 20:04:04 +0100 Subject: Metoda za optimizaciju neuronskih mreza - varijante gradijentnog spusta. --- backend/microservice/gradientDescent.ipynb | 355 +++++++++++++++++++++++++++++ 1 file changed, 355 insertions(+) create mode 100644 backend/microservice/gradientDescent.ipynb (limited to 'backend/microservice/gradientDescent.ipynb') diff --git a/backend/microservice/gradientDescent.ipynb b/backend/microservice/gradientDescent.ipynb new file mode 100644 index 00000000..930f0784 --- /dev/null +++ b/backend/microservice/gradientDescent.ipynb @@ -0,0 +1,355 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "gradientDescent.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "Gradijentni spust" + ], + "metadata": { + "id": "ktt1djHu0cNl" + } + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "ZQDusvq4z52M" + }, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "code", + "source": [ + "def gradient_descent(f, gradient, x0, alpha, eps, iters):\n", + " result = {}\n", + " \n", + " x = x0\n", + " for i in range(iters):\n", + " x_new = x - alpha * gradient(x)\n", + "\n", + " if abs(f(x_new) - f(x)) < eps:\n", + " result['converged'] = True\n", + " break\n", + "\n", + " x = x_new\n", + " else:\n", + " result['converged'] = False\n", + " \n", + "\n", + " result['num_iters'] = i\n", + " result['x'] = x_new\n", + " \n", + " return result" + ], + "metadata": { + "id": "SRYg9QtE0Dcf" + }, + "execution_count": 1, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "x[0] = x\n", + "\n", + "x[1] = y" + ], + "metadata": { + "id": "XewBwwG71oqK" + } + }, + { + "cell_type": "code", + "source": [ + "def f(x):\n", + " return 0.5 * (x[0]**2 + 10*x[1]**2)" + ], + "metadata": { + "id": "Mbtzh4hx0DZf" + }, + "execution_count": 3, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def gradient(x):\n", + " return np.array([x[0], 10*x[1]])" + ], + "metadata": { + "id": "vwmWa-qQ0DXD" + }, + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "x0 = np.array([3,5])\n", + "eps = 0.00001\n", + "iters = 10\n", + "alpha = 0.1\n", + "\n", + "gradient_descent(f, gradient, x0, alpha, eps, iters)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "62OBHamq0DUI", + "outputId": "2fecd164-6feb-439c-8cc6-079085f20cba" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{'converged': False, 'num_iters': 9, 'x': array([1.04603532, 0. ])}" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Sada se posmatraju gradijent i inercija.\n", + "\n", + "d - za cuvanje prethodnih gradijenata\n", + "\n", + "beta*d - koliko znacaja se daje inerciji\n", + "\n", + "Prvo racuna gradijent pa se tek onda pomera po inerciji." + ], + "metadata": { + "id": "ul28Spkz3bY7" + } + }, + { + "cell_type": "code", + "source": [ + "def momentum(f, gradient, x0, alpha, eps, iters, beta):\n", + " result = {}\n", + " \n", + " x = x0\n", + " d = 0\n", + " \n", + " for i in range(iters):\n", + " d = beta * d + alpha * gradient(x)\n", + " x_new = x - d\n", + " \n", + " if abs(f(x_new) - f(x)) < eps:\n", + " result['converged'] = True\n", + " break\n", + " x = x_new\n", + " else:\n", + " result['converged'] = False\n", + " result['num_iters'] = i\n", + " result['x'] = x_new\n", + " \n", + " return result " + ], + "metadata": { + "id": "SSzc08Rf0DRg" + }, + "execution_count": 6, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "momentum(f, gradient, x0, alpha, eps, iters, beta=0.5)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "eEfVIiS86DjY", + "outputId": "312e9a41-ae2f-4182-85c5-3ea5bf855a05" + }, + "execution_count": 7, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{'converged': False, 'num_iters': 9, 'x': array([0.19952216, 0.16601562])}" + ] + }, + "metadata": {}, + "execution_count": 7 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Racuna gradijent u tacki nakon inercije. Prvo se pomera po inerciji pa tek onda rcauna gradijent." + ], + "metadata": { + "id": "n1bl2N5l8Cf4" + } + }, + { + "cell_type": "code", + "source": [ + "def nesterov(f, gradient, x0, alpha, eps, iters, beta):\n", + " result = {}\n", + " \n", + " x = x0\n", + " d = 0\n", + " \n", + " for i in range(iters):\n", + " d = beta * d + alpha * gradient(x - beta*d)\n", + " x_new = x - d\n", + " \n", + " if abs(f(x_new) - f(x)) < eps:\n", + " result['converged'] = True\n", + " break\n", + " x = x_new\n", + " else:\n", + " result['converged'] = False\n", + " \n", + " result['num_iters'] = i\n", + " result['x'] = x_new\n", + " \n", + " return result " + ], + "metadata": { + "id": "EVAuWKXH6JNi" + }, + "execution_count": 8, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "nesterov(f, gradient, x0, alpha, eps, iters, beta=0.5)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Gldkp1lH6M1P", + "outputId": "74bfb0ca-63cc-44ee-a7b9-65400eca3e33" + }, + "execution_count": 9, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{'converged': False, 'num_iters': 9, 'x': array([0.31974124, 0. ])}" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "ADAM (Adaptive Moment Estimation)\n", + "\n", + "Pokusava da aproksimira prvi i drugi momenat (statistika).\n", + "\n", + "E(X^k) - k-ti momenat\n", + "\n", + "m - ocena prvog momenta (akumulira gradijente, povecava korak), m je proporcionalno velicini koraka\n", + "\n", + "v - ocena drugog momenta (gleda promene gradijenta, kvadrat gradijenta), v je obrnuto proporcionalno velicini koraka\n" + ], + "metadata": { + "id": "uq3aN31j-pHD" + } + }, + { + "cell_type": "code", + "source": [ + "def adam(f, gradient, x0, alpha, eps, iters, beta1, beta2, delta):\n", + " result = {}\n", + " \n", + " x = x0\n", + " m = 0\n", + " v = 0\n", + " \n", + " for i in range(1, iters+1):\n", + " grad = gradient(x)\n", + " m = beta1 * m + (1 - beta1) * grad\n", + " v = beta2 * v + (1 - beta2) * grad**2\n", + " \n", + " m_hat = m / (1 - beta1**i)\n", + " v_hat = v / (1 - beta2**i)\n", + " \n", + " x_new = x - alpha * m_hat / (np.sqrt(v_hat) + delta)\n", + " \n", + " if abs(f(x_new) - f(x)) < eps:\n", + " result['converged'] = True\n", + " break\n", + " \n", + " x = x_new\n", + " else:\n", + " result['converged'] = False\n", + " \n", + " result['num_iters'] = i\n", + " result['x'] = x_new\n", + " \n", + " return result " + ], + "metadata": { + "id": "mEe_3317-Ko2" + }, + "execution_count": 10, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "adam(f, gradient, x0, alpha, eps, iters, 0.9, 0.999, 1e-7)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hjCIJjRv-UkZ", + "outputId": "afdb0882-8934-44b7-c1a3-a4b8864a07ec" + }, + "execution_count": 11, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{'converged': False, 'num_iters': 10, 'x': array([2.01418844, 4.00760162])}" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + } + ] +} \ No newline at end of file -- cgit v1.2.3