diff --git a/tutorials/custom_differentiation.ipynb b/tutorials/custom_differentiation.ipynb index 2700372..e341e4f 100644 --- a/tutorials/custom_differentiation.ipynb +++ b/tutorials/custom_differentiation.ipynb @@ -27,25 +27,18 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 3, "id": "1cbe98d3-e4c9-4f89-9a27-3918f8877e51", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ + "import os\n", "import time\n", "from copy import deepcopy\n", "\n", + "# disabling hardware accelerators warnings\n", + "os.environ[\"TF_CPP_MIN_LOG_LEVEL\"] = \"3\"\n", + "\n", "import numpy as np\n", "import tensorflow as tf\n", "import matplotlib.pyplot as plt\n", @@ -53,15 +46,12 @@ "from qibo import set_backend\n", "from qibo import Circuit, gates, hamiltonians\n", "\n", - "from qiboml.operations import differentiation, expectation\n", - "\n", - "from importlib import reload\n", - "reload(expectation)" + "from qiboml.operations import differentiation, expectation" ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 4, "id": "bbbf2339-efe7-4170-9408-a7b66dff35ae", "metadata": {}, "outputs": [ @@ -69,7 +59,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "[Qibo 0.2.7|INFO|2024-04-23 18:29:05]: Using tensorflow backend on /device:CPU:0\n" + "[Qibo 0.2.7|INFO|2024-04-26 17:24:00]: Using tensorflow backend on /device:CPU:0\n" ] } ], @@ -84,22 +74,47 @@ "source": [ "Now let's setup a simple problem. We build a quantum circuit $U$ composed of some rotations and we compute the gradients of\n", "$$ \\langle 0 | U^{\\dagger} O U | 0 \\rangle, $$\n", - "where $O$ is an observable." + "where $O$ is an observable.\n", + "\n", + "Let's start with the circuit:" ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 34, "id": "fd9264ff-150c-4be7-89ba-5b5fb37afa3b", "metadata": {}, + "outputs": [], + "source": [ + "def build_parametric_circuit(nqubits, nlayers):\n", + " \"\"\"Build a Parametric Quantum Circuit with Qibo.\"\"\"\n", + " \n", + " c = Circuit(nqubits)\n", + " for _ in range(nlayers):\n", + " for q in range(nqubits):\n", + " c.add(gates.RY(q=q, theta=0))\n", + " c.add(gates.RZ(q=q, theta=0))\n", + " for q in range(0, nqubits-1, 1):\n", + " c.add(gates.CNOT(q0=q, q1=q+1))\n", + " c.add(gates.CNOT(q0=nqubits-1, q1=0))\n", + " c.add(gates.M(*range(nqubits)))\n", + "\n", + " return c" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "37ba84ff-ecb5-46b1-a060-8a4df87d1c1c", + "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "q0: ─RY─RZ─RY─RZ─RY─RZ─M─\n", - "q1: ─RY─RZ─RY─RZ─RY─RZ─M─\n", - "q2: ─RY─RZ─RY─RZ─RY─RZ─M─\n" + "q0: ─RY─RZ─o───X─RY─RZ─o───X─RY─RZ─o───X─M─\n", + "q1: ─RY─RZ─X─o─|─RY─RZ─X─o─|─RY─RZ─X─o─|─M─\n", + "q2: ─RY─RZ───X─o─RY─RZ───X─o─RY─RZ───X─o─M─\n" ] } ], @@ -108,19 +123,21 @@ "nqubits = 3\n", "nlayers = 3\n", "\n", - "c = Circuit(nqubits)\n", - "for _ in range(nlayers):\n", - " for q in range(nqubits):\n", - " c.add(gates.RY(q=q, theta=0))\n", - " c.add(gates.RZ(q=q, theta=0))\n", - "c.add(gates.M(*range(nqubits)))\n", - "\n", + "c = build_parametric_circuit(nqubits, nlayers)\n", "print(c.draw())" ] }, + { + "cell_type": "markdown", + "id": "398f2f1f-3c91-4437-9a23-8801753c0785", + "metadata": {}, + "source": [ + "We can fill the circuit with a set of random parameters" + ] + }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 36, "id": "6b4afa80-bf2d-45e9-ba46-c7996a463fba", "metadata": {}, "outputs": [], @@ -133,9 +150,17 @@ "c.set_parameters(params)" ] }, + { + "cell_type": "markdown", + "id": "26162d5b-45ca-45aa-af62-3367ef0d57a7", + "metadata": {}, + "source": [ + "We can now define a simple hamiltonian, which will be our target observable." + ] + }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 37, "id": "b73dad19-cadc-45d5-8d58-85ee5f4d4247", "metadata": {}, "outputs": [], @@ -144,9 +169,17 @@ "obs = hamiltonians.Z(nqubits=nqubits)" ] }, + { + "cell_type": "markdown", + "id": "2142eec0-4010-4140-b393-cccf8d32d1fa", + "metadata": {}, + "source": [ + "Once executed the circuit, we can use the final state to compute the expectation value of the target observable using the appropriate`Qibo` function. " + ] + }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 38, "id": "6ea9d1a0-e908-4940-9759-fa4c92053ffa", "metadata": {}, "outputs": [ @@ -154,7 +187,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "tf.Tensor(1.5517656244439153, shape=(), dtype=float64)\n" + "tf.Tensor(0.34862167327428123, shape=(), dtype=float64)\n" ] } ], @@ -165,9 +198,26 @@ "print(obs.expectation(final_state))" ] }, + { + "cell_type": "markdown", + "id": "793479b8-ee33-4ec4-9b31-5b539234e904", + "metadata": {}, + "source": [ + "On the other hand, we developed a customized version of the `expectation` function in `qiboml`, which allows the user to keep the name convention, while integrating the possibility of customize the automatic differentiation provided by the chosen machine learning framework. It can be called from the `qiboml.expectation` module.\n", + "\n", + "This function accepts some more argument than the `qibo`'s one. In particular:\n", + "\n", + "- `observable`: the target observable, whose expectation value we are interested in;\n", + "- `circuit`: the circuit which returns the final state used to compute the expectation value;\n", + "- `inital_state`: the state of the system before applying the circuit;\n", + "- `nshots`: the number of shots to compute the expectation value;\n", + "- `backend`: the `qibo` backend on which we want to execute the circuit. This backend can even be a real quantum computer, when setting the `qibolab` backend;\n", + "- `differentiation_rule`: the actual differentiation rule one wants to apply. " + ] + }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 39, "id": "0c279101-960f-4ce4-b11f-3297c24e5e72", "metadata": {}, "outputs": [ @@ -175,7 +225,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "tf.Tensor(1.522, shape=(), dtype=float64)\n" + "tf.Tensor(0.2, shape=(), dtype=float64)\n" ] } ], @@ -184,7 +234,8 @@ " observable=obs,\n", " circuit=c,\n", " backend=\"numpy\",\n", - " differentiation_rule=differentiation.parameter_shift\n", + " differentiation_rule=differentiation.parameter_shift,\n", + " nshots=100,\n", ")\n", "\n", "print(exp)" @@ -200,7 +251,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 40, "id": "bec419f1-0fca-4d7a-beca-e09947951278", "metadata": {}, "outputs": [ @@ -234,7 +285,7 @@ " differentiation_rule=differentiation.parameter_shift\n", " )\n", " ft = time.time()\n", - " np_times.append((ft-it)+np_times[-1])\n", + " np_times.append((ft-it))\n", "\n", " # executing on tensorflow backend\n", " it = time.time()\n", @@ -245,18 +296,18 @@ " differentiation_rule=differentiation.parameter_shift\n", " )\n", " ft = time.time()\n", - " tf_times.append((ft-it)+tf_times[-1])" + " tf_times.append((ft-it))" ] }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 41, "id": "72af880a-092d-4531-85d8-e1809359788d", "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -267,8 +318,8 @@ ], "source": [ "plt.figure(figsize=(6, 6*6/8))\n", - "plt.plot(np_times, color=\"red\", label=\"Numpy backend\")\n", - "plt.plot(tf_times, color=\"royalblue\", label=\"TensorFlow backend\")\n", + "plt.plot(np.add.accumulate(np_times), color=\"red\", label=\"Numpy backend\")\n", + "plt.plot(np.add.accumulate(tf_times), color=\"royalblue\", label=\"TensorFlow backend\")\n", "plt.legend()\n", "plt.xlabel(\"# executions\")\n", "plt.ylabel(\"Time [s]\")\n", @@ -291,152 +342,220 @@ "source": [ "### Optimization example\n", "\n", - "In the following, we optimize the parameters of the parametric circuit to minimize a target cost function, which is the expectation value of a target hamiltonian. We will repeat this twice, using the `parameter_shift_rule` as differentiation method and both `numpy` and `tensorflow` as backends." + "In the following, we optimize the parameters of the parametric circuit to minimize a target cost function, which is the expectation value of a target hamiltonian. We will repeat this three times, using different configuation of the `qiboml` custom differentiation. Let us describe each of these scenarios with the triad `(frontend, differentiation rule, execution backend)`:\n", + "\n", + "1. `(tf, psr, tf)`: we are working with the TensorFlow interface, we use the parameter shift rule as differentiation algorithm and we execute the circuits using the Qibo's `tensorflow` backend;\n", + "2. `(tf, psr, np)`: we are working with the TensorFlow interface, we use the parameter shift rule as differentiation algorithm and we execute the circuits using the Qibo's `numpy` backend;\n", + "3. `(tf, tf, tf)`: we are working with the TensorFlow interface, we use the tensorflow automatic differentiation algorithm and we execute the circuits using the Qibo's `tensorflow` backend.\n", + "\n", + "Note that these are only three of the many possible combinations. One has to pay attention to the problem configuration, e.g. considering the parameter shift algorithm when executing circuits on real hardware (`qibolab` backend), or activating the shot-noise. In fact, the automatic differentiation of tensorflow breaks when shot-noise is activated.\n", + "\n", + "We make use of the following function, through which we can customize the training. \n", + "We also repeat the same exercise changing the number of the qubits, to analyze the scaling of \n", + "the problem with this hyper-parameter." ] }, { "cell_type": "code", - "execution_count": 31, - "id": "e1cc3737-f67b-4fd2-b28d-c025b58d815c", + "execution_count": 61, + "id": "6c4b84d3-aa09-4536-988e-d3ca02a60dcd", "metadata": {}, "outputs": [], "source": [ - "def cost_function(parameters, circuit, hamiltonian, backend):\n", + "def train_circuit(circuit, hamiltonian, nepochs, exec_backend, differentiation_rule=None, nshots=None):\n", " \"\"\"\n", - " Compute expectation value of ``hamiltonian`` over the final state we \n", - " get executing ``circuit`` using ``parameters``.\n", - " \"\"\"" + " Perform a simple gradient descent training of ``circuit`` to minimize the expectation \n", + " value of ``hamiltonian``. Gradients are computed via the chosen ``differentiation_rule``\n", + " and expectation values calculated executing circuit on the selected ``exec_backend``.\n", + "\n", + " Returns: \n", + " float: total execution time.\n", + " \"\"\"\n", + " learning_rate = 0.05\n", + " random_seed = 42\n", + "\n", + " # random parameters\n", + " np.random.seed(random_seed)\n", + " nparams = len(circuit.get_parameters())\n", + " params = tf.Variable(np.random.uniform(0, 2*np.pi, nparams))\n", + "\n", + " it = time.time()\n", + "\n", + " for epoch in range(nepochs):\n", + " with tf.GradientTape() as tape:\n", + " circuit.set_parameters(params)\n", + " cost = expectation.expectation(\n", + " observable=hamiltonian,\n", + " circuit=circuit,\n", + " backend=exec_backend,\n", + " differentiation_rule=differentiation_rule,\n", + " nshots=nshots,\n", + " )\n", + " if (epoch % 10 == 0):\n", + " print(f\"Cost: {round(cost, 4)} \\t |\\t Epoch: {epoch}\")\n", + " gradients = tape.gradient(cost, params, )\n", + " init_params = params.assign_sub(learning_rate * gradients)\n", + " ft = time.time()\n", + " \n", + " return (ft - it) " + ] + }, + { + "cell_type": "markdown", + "id": "63950898-fd94-422b-a9cf-b9222700c331", + "metadata": {}, + "source": [ + "#### Training scenario 1: `(tf, psr, tf)`" ] }, { "cell_type": "code", - "execution_count": 32, - "id": "08cd072f-bc6b-477b-814b-574186ae3d5b", + "execution_count": 57, + "id": "26bd1eeb-4479-45c6-859b-21b9dca97c2c", "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "Cost: -0.1995 \t |\t Epoch: 0\n", + "Cost: -1.242 \t |\t Epoch: 10\n", + "Cost: -1.6788 \t |\t Epoch: 20\n", + "Cost: -1.9628 \t |\t Epoch: 30\n", + "Cost: -2.1845 \t |\t Epoch: 40\n", + "Cost: -2.3791 \t |\t Epoch: 50\n", + "Cost: -2.5614 \t |\t Epoch: 60\n", + "Cost: -2.73 \t |\t Epoch: 70\n", + "Cost: -2.8574 \t |\t Epoch: 80\n", + "Cost: -2.9297 \t |\t Epoch: 90\n", + "Execution time with (tf, psr, tf): 168.0022475719452\n" + ] } ], "source": [ - "reload(expectation)" + "nqubits = 3\n", + "nlayers = 5\n", + "\n", + "# setup the problem\n", + "circuit = build_parametric_circuit(nqubits, nlayers)\n", + "hamiltonian = hamiltonians.Z(nqubits)\n", + "\n", + "tf_psr_tf_time = train_circuit(\n", + " circuit=circuit,\n", + " hamiltonian=hamiltonian,\n", + " nepochs=100,\n", + " exec_backend=\"tensorflow\",\n", + " differentiation_rule=differentiation.parameter_shift\n", + ")\n", + "\n", + "print(f\"Execution time with (tf, psr, tf): {tf_psr_tf_time}\")" + ] + }, + { + "cell_type": "markdown", + "id": "08aba037-9dac-4b06-beba-0e3177138091", + "metadata": {}, + "source": [ + "#### Training scenario 2: `(tf, psr, np)`" ] }, { "cell_type": "code", - "execution_count": 33, - "id": "26bd1eeb-4479-45c6-859b-21b9dca97c2c", + "execution_count": 62, + "id": "775d2bb6-2112-4a47-9c4e-e22938be0194", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Cost: 1.522 \t |\t Epoch: 0\n", - "Cost: 1.042 \t |\t Epoch: 10\n", - "Cost: 0.446 \t |\t Epoch: 20\n", - "Cost: -0.106 \t |\t Epoch: 30\n", - "Cost: -0.524 \t |\t Epoch: 40\n", - "Cost: -1.098 \t |\t Epoch: 50\n", - "Cost: -1.442 \t |\t Epoch: 60\n", - "Cost: -1.84 \t |\t Epoch: 70\n", - "Cost: -2.106 \t |\t Epoch: 80\n", - "Cost: -2.386 \t |\t Epoch: 90\n" + "Cost: -0.1995 \t |\t Epoch: 0\n", + "Cost: -1.242 \t |\t Epoch: 10\n", + "Cost: -1.6788 \t |\t Epoch: 20\n", + "Cost: -1.9628 \t |\t Epoch: 30\n", + "Cost: -2.1845 \t |\t Epoch: 40\n", + "Cost: -2.3791 \t |\t Epoch: 50\n", + "Cost: -2.5614 \t |\t Epoch: 60\n", + "Cost: -2.73 \t |\t Epoch: 70\n", + "Cost: -2.8574 \t |\t Epoch: 80\n", + "Cost: -2.9297 \t |\t Epoch: 90\n", + "Execution time with (tf, psr, np): 13.642082214355469\n" ] } ], "source": [ - "learning_rate = 0.01\n", - "nepochs = 100\n", + "nqubits = 3\n", + "nlayers = 5\n", "\n", - "# set random generator seed\n", - "np.random.seed(42)\n", - "params = tf.Variable(np.random.uniform(0, 2*np.pi, nparams))\n", - "\n", - "it = time.time()\n", - "for epoch in range(nepochs):\n", - " with tf.GradientTape() as tape:\n", - " c.set_parameters(params)\n", - " cost = expectation.expectation(\n", - " observable=obs,\n", - " circuit=c,\n", - " backend=\"numpy\",\n", - " differentiation_rule=differentiation.parameter_shift\n", - " )\n", - " if (epoch % 10 == 0):\n", - " print(f\"Cost: {round(cost, 4)} \\t |\\t Epoch: {epoch}\")\n", - " gradients = tape.gradient(cost, params)\n", - " init_params = params.assign_sub(learning_rate * gradients)\n", - "ft = time.time()\n", - "\n", - "numpy_time = ft - it" + "# setup the problem\n", + "circuit = build_parametric_circuit(nqubits, nlayers)\n", + "hamiltonian = hamiltonians.Z(nqubits)\n", + "\n", + "tf_psr_np_time = train_circuit(\n", + " circuit=circuit,\n", + " hamiltonian=hamiltonian,\n", + " nepochs=100,\n", + " exec_backend=\"numpy\",\n", + " differentiation_rule=differentiation.parameter_shift\n", + ")\n", + "\n", + "print(f\"Execution time with (tf, psr, np): {tf_psr_np_time}\")" ] }, { "cell_type": "markdown", - "id": "5805a0fe-ade3-4d83-a2ef-41a0a61c3fcb", + "id": "c24cfb3b-d9e2-4aaf-a590-63712e9d9afb", "metadata": {}, "source": [ - "Let's now repeat the same optimization but using `tensorflow` as backend." + "#### Training scenario 3: `(tf, tf, tf)`" ] }, { "cell_type": "code", - "execution_count": 34, - "id": "25d7682a-eb38-486a-ac7b-0cfd07db9b27", + "execution_count": 63, + "id": "20e27788-e721-4a66-a606-fd7d8f5eda34", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Cost: 1.536 \t |\t Epoch: 0\n", - "Cost: 1.018 \t |\t Epoch: 10\n", - "Cost: 0.564 \t |\t Epoch: 20\n", - "Cost: -0.05 \t |\t Epoch: 30\n", - "Cost: -0.6 \t |\t Epoch: 40\n", - "Cost: -0.956 \t |\t Epoch: 50\n", - "Cost: -1.422 \t |\t Epoch: 60\n", - "Cost: -1.934 \t |\t Epoch: 70\n", - "Cost: -2.19 \t |\t Epoch: 80\n", - "Cost: -2.416 \t |\t Epoch: 90\n" + "Cost: 0.216 \t |\t Epoch: 0\n", + "Cost: -0.2827 \t |\t Epoch: 10\n", + "Cost: -0.5996 \t |\t Epoch: 20\n", + "Cost: -0.7808 \t |\t Epoch: 30\n", + "Cost: -0.8834 \t |\t Epoch: 40\n", + "Cost: -0.938 \t |\t Epoch: 50\n", + "Cost: -0.966 \t |\t Epoch: 60\n", + "Cost: -0.9804 \t |\t Epoch: 70\n", + "Cost: -0.9882 \t |\t Epoch: 80\n", + "Cost: -0.9927 \t |\t Epoch: 90\n", + "Execution time with (tf, tf, tf): 3.7739181518554688\n" ] } ], "source": [ - "# set random generator seed\n", - "np.random.seed(42)\n", - "params = tf.Variable(np.random.uniform(0, 2*np.pi, nparams))\n", - "\n", - "it = time.time()\n", - "for epoch in range(nepochs):\n", - " with tf.GradientTape() as tape:\n", - " c.set_parameters(params)\n", - " cost = expectation.expectation(\n", - " observable=obs,\n", - " circuit=c,\n", - " backend=\"tensorflow\",\n", - " differentiation_rule=differentiation.parameter_shift\n", - " )\n", - " if (epoch % 10 == 0):\n", - " print(f\"Cost: {round(cost, 4)} \\t |\\t Epoch: {epoch}\")\n", - " gradients = tape.gradient(cost, params)\n", - " init_params = params.assign_sub(learning_rate * gradients)\n", - "ft = time.time()\n", - "\n", - "tensorflow_time = ft - it" + "nqubits = 3\n", + "nlayers = 2\n", + "\n", + "# setup the problem\n", + "circuit = build_parametric_circuit(nqubits, nlayers)\n", + "hamiltonian = hamiltonians.Z(nqubits)\n", + "\n", + "tf_tf_tf_time = train_circuit(\n", + " circuit=circuit,\n", + " hamiltonian=hamiltonian,\n", + " nepochs=100,\n", + " exec_backend=\"tensorflow\",\n", + " differentiation_rule=None\n", + ")\n", + "\n", + "print(f\"Execution time with (tf, tf, tf): {tf_tf_tf_time}\")" ] }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 64, "id": "2a905fa8-7b67-444c-b953-5a7ac3703b75", "metadata": {}, "outputs": [ @@ -444,15 +563,33 @@ "name": "stdout", "output_type": "stream", "text": [ - "Optimization time with tf: 67.71683645248413\n", - "Optimization time with np: 13.010458946228027\n" + "(tf, psr, tf): 168.0022475719452\n", + "(tf, psr, np): 13.642082214355469\n", + "(tf, tf, tf): 3.7739181518554688\n" ] } ], "source": [ - "print(f\"Optimization time with tf: {tensorflow_time}\")\n", - "print(f\"Optimization time with np: {numpy_time}\")" + "print(f\"(tf, psr, tf): {tf_psr_tf_time}\")\n", + "print(f\"(tf, psr, np): {tf_psr_np_time}\")\n", + "print(f\"(tf, tf, tf): {tf_tf_tf_time}\")" + ] + }, + { + "cell_type": "markdown", + "id": "79c62b66-ac75-4cad-97af-d45851cffe1b", + "metadata": {}, + "source": [ + "It is clear in this setup the last configuration is the fastest. This can be explained considering TensorFlow's automatic differentiation routines, which are well optimized to compute the backpropagation algorithm. But if we activate the shot-noise the discussion changes. In fact, the TensorFlow automatic differentiation is not usable in a shot-noise setup, and one of the other options has to be set. In that case, using the `numpy` (or the `) backend" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "27233b0a-6a2e-4609-9a13-96b46eada437", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {