Source code for qrl.env.core.expressibility

'''
Implementation of ExpressibilityV0 environment

Author: Jay Shah (@Jayshah25)

Contact: jay.shah@qrlqai.com

License: Apache-2.0
'''

from typing import List, Tuple, Optional, Dict
import pennylane as qml
from pennylane import numpy as np
from gymnasium import spaces
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from matplotlib.animation import FuncAnimation
import shutil
from ._base import QuantumEnv


[docs] class ExpressibilityV0(QuantumEnv): """ Parameterized circuit expressibility optimization environment. ``ExpressibilityV0`` is a ``gymnasium.Env``-compatible environment that models the construction of parameterized quantum circuits with high expressibility. In the context of variational quantum algorithms, expressibility measures how well an ansatz can explore the Hilbert space of quantum states relative to the Haar-random distribution. The agent incrementally builds a circuit by adding or removing predefined rotation and entangling blocks, or by explicitly terminating construction. Rewards encourage circuits whose fidelity distribution closely matches the Haar distribution, while penalizing excessive circuit depth and two-qubit gate usage. Key properties -------------- - **Action space**: Discrete set of architectural edits (add/remove blocks or terminate construction). - **Observation space**: Vector of circuit statistics summarizing depth, parameter count, entanglement, and recent expressibility estimates (shape ``(7,)``). - **Reward**: Negative KL divergence to the Haar distribution with regularization penalties for depth and two-qubit gates. - **Termination**: Explicit termination by the agent or truncation at ``max_steps``. Rendering --------- The ``render()`` method visualizes expressibility optimization via a two-panel animation showing the circuit’s fidelity distribution compared to the Haar-random distribution alongside a block-level diagram of the evolving circuit architecture. Input Parameters ---------- n_qubits : int Number of qubits in the circuit. max_blocks : int Maximum number of blocks allowed in the circuit. max_steps : int Maximum number of construction steps per episode. n_pairs_eval : int Number of random state pairs used to estimate expressibility. bins : int Number of histogram bins for fidelity distributions. lambda_depth : float Penalty weight for circuit depth. lambda_2q : float Penalty weight for two-qubit gate usage. terminate_bonus : float Bonus reward for explicit termination. device_name : str PennyLane device backend used for simulation. seed : int or None Random seed for reproducibility. allow_all_to_all : bool Whether to allow all-to-all entangling blocks. ffmpeg : bool Whether to use FFmpeg when saving animations. See Also -------- :doc:`tutorials/expressibility` Tutorial on optimizing ansatz expressibility with block-based circuits. """ def __init__( self, n_qubits: int = 4, max_blocks: int = 12, max_steps: int = 20, n_pairs_eval: int = 120, bins: int = 50, lambda_depth: float = 0.002, lambda_2q: float = 0.002, terminate_bonus: float = 0.1, device_name: str = "default.qubit", seed: Optional[int] = None, allow_all_to_all: bool = False, ffmpeg: bool = False, ): super().__init__() self.n_qubits = n_qubits self.max_blocks = max_blocks self.max_steps = max_steps self.n_pairs_eval = n_pairs_eval self.bins = bins self.lambda_depth = lambda_depth self.lambda_2q = lambda_2q self.terminate_bonus = terminate_bonus self.allow_all_to_all = allow_all_to_all self.D = 2 ** n_qubits self._rng = np.random.default_rng(seed) self.blocks: List[str] = [] self.device = qml.device(device_name, wires=n_qubits) self.qnode = qml.QNode(self._circuit, self.device) #self._circuit is method defined below self.ACTION_NAMES = [ "RotX", "RotY", "RotZ", "RotXYZ", "EntRingCNOT", "EntLadderCZ", "RemoveLast", "Terminate", ] self.action_space = spaces.Discrete(len(self.ACTION_NAMES)) high = np.array([ 10_000, # depth max_blocks, # number of blocks 10_000, # number of two-qubit gates 10_000, # number of rotational trainable parameters 1_000, # entanglement density 10.0, # last value of expressibility max_steps # steps left ], dtype=np.float32) self.observation_space = spaces.Box(low=np.zeros_like(high), high=high, dtype=np.float32) self.current_step = 0 self.last_express = None self.last_reward = 0.0 self.info_last_hist = None self.history = [] self.writer = "ffmpeg" if ffmpeg else "pillow" self.render_extension = "mp4" if ffmpeg else "gif" if ffmpeg==True and shutil.which("ffmpeg") is None: raise ValueError("ffmpeg not found on system. Please install ffmpeg or set ffmpeg=False") # utils self.done = None self.terminated = None self.obs = None self.info = None
[docs] def get_reward(self, action): """ The selected action modifies the circuit architecture by adding, removing, or terminating block construction. Expressibility is evaluated after the update, and a reward is computed based on the circuit's deviation from the Haar distribution and architectural penalties. Parameters ---------- action : int Index of the selected architectural action. Returns ------- reward : float Reward value combining expressibility and architectural penalties. """ assert self.action_space.contains(action) self.done = False self.terminated = False # available actions if action == 0 and len(self.blocks) < self.max_blocks: self.blocks.append("RotX") elif action == 1 and len(self.blocks) < self.max_blocks: self.blocks.append("RotY") elif action == 2 and len(self.blocks) < self.max_blocks: self.blocks.append("RotZ") elif action == 3 and len(self.blocks) < self.max_blocks: self.blocks.append("RotXYZ") elif action == 4 and len(self.blocks) < self.max_blocks: self.blocks.append("EntRingCNOT") elif action == 5 and len(self.blocks) < self.max_blocks: self.blocks.append("EntLadderCZ") elif action == 6 and self.blocks: self.blocks.pop() elif action == 7: self.done = True self.terminated = True # update current step self.current_step += 1 if self.current_step >= self.max_steps: self.done = True # get expressibility for the current circuit express, kl, hist_c, hist_haar = self._expressibility() self.last_express = express self.info_last_hist = {"P_C": hist_c, "P_Haar": hist_haar} depth, n_blocks, n_twoq, n_params, ent_density = self._arch_stats() reward = -kl - self.lambda_depth * depth - self.lambda_2q * n_twoq if self.terminated: reward += self.terminate_bonus self.last_reward = reward # check if the action is supported self.obs = self._make_obs() self.info = { "kl": float(kl), "expressibility": float(express), "depth": int(depth), "n_twoq": int(n_twoq), "params": int(n_params), "blocks": list(self.blocks), "terminated": self.terminated } # store info for visualization in render() self.history.append({ "P_C": hist_c, "P_Haar": hist_haar, "blocks": self.blocks.copy(), "reward": self.last_reward, "express": self.last_express, }) return reward
[docs] def reset(self, *, seed: Optional[int] = None, options: Optional[Dict] = None): """ Reset the environment to an empty circuit. Clears the current circuit architecture, resets internal counters, and initializes the observation vector corresponding to an empty ansatz. Parameters ---------- seed : int or None, optional Random seed for reproducibility. If provided, reinitializes the internal random number generator. options : dict or None, optional Additional reset options (currently unused, included for Gymnasium compatibility). Returns ------- observation : np.ndarray Initial observation vector describing an empty circuit, shape ``(7,)``. info : dict Empty dictionary provided for Gymnasium API compatibility. """ super().reset(seed=seed) if seed is not None: self._rng = np.random.default_rng(seed) self.blocks = [] self.current_step = 0 self.last_express = None self.last_reward = 0.0 self.info_last_hist = None obs = self._make_obs() self.history = [] return obs, {}
[docs] def step(self, action: int): """ Execute one architecture-modification step by calling the get_reward method. Parameters ---------- action : int Index of the selected architectural action. Returns ------- observation : np.ndarray Updated observation vector summarizing circuit statistics, shape ``(7,)``. reward : float Reward value combining expressibility and architectural penalties. done : bool True if the episode ended due to termination by agent or truncation, False otherwise. info : dict Diagnostic information including expressibility, KL divergence, depth, parameter count, current block sequence, and terminated (true if agent explicitly terminated, false if episode ended due to max steps). """ reward = self.get_reward(action) return self.obs, reward, self.done, self.info
[docs] def render(self, save_path_without_extension=None, interval=800): """ Render the expressibility optimization process as an animation. The animation shows: 1. A histogram of circuit fidelity distribution compared to the Haar-random distribution. 2. A block-diagram visualization of the evolving circuit architecture. Parameters ---------- save_path_without_extension : str or None, optional Path (without file extension) to save the animation. If provided, the animation is saved using the configured writer (MP4 for FFmpeg or GIF for Pillow). If None, the animation is displayed interactively. interval : int, optional Delay between animation frames in milliseconds. Default is 800. Returns ------- None This method produces a visualization but does not return a value. """ if not hasattr(self, "history") or len(self.history) == 0: print("No history available for animation.") return fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5)) # Init function def init(): ax1.clear() ax2.clear() # Update function for each frame def update(frame): ax1.clear() ax2.clear() info = self.history[frame] # Fidelity distribution vs Haar hist_c = info["P_C"] hist_haar = info["P_Haar"] bins = len(hist_c) xs = np.linspace(0, 1, bins) ax1.bar(xs, hist_c, width=1.0/bins, alpha=0.6, label="Circuit Fidelity Dist.") ax1.plot(xs, hist_haar, "r-", lw=2, label="Haar Distribution") ax1.set_title(f"Expressibility (step {frame})") ax1.set_xlabel("Fidelity") ax1.set_ylabel("Density") ax1.legend() # Block diagram blocks = info["blocks"] ax2.set_xlim(0, max(1, len(blocks))) ax2.set_ylim(0, 1) ax2.axis("off") for i, b in enumerate(blocks): rect = Rectangle((i, 0.4), 0.9, 0.2, facecolor="skyblue", edgecolor="k") ax2.add_patch(rect) ax2.text(i+0.45, 0.5, b, ha="center", va="center", fontsize=8) ax2.set_title("Circuit Architecture Blocks") reward = info["reward"] express = info["express"] plt.suptitle(f"Reward={reward:.3f}, Expressibility={express:.3f}") anim = FuncAnimation(fig, update, frames=len(self.history), init_func=init, interval=interval, repeat=False) if save_path_without_extension: anim.save(f"{save_path_without_extension}.{self.render_extension}", writer=self.writer) else: plt.show()
def _make_obs(self): """ Construct the current observation vector. The observation encodes summary statistics of the circuit architecture and training progress. Returns ------- np.ndarray Observation vector of shape ``(7,)`` containing: ``[depth, n_blocks, n_twoq, n_params, ent_density, last_express, steps_left]``. """ depth, n_blocks, n_twoq, n_params, ent_density = self._arch_stats() last_ex = self.last_express if self.last_express is not None else 0.0 steps_left = max(self.max_steps - self.current_step, 0) vec = np.array([ depth, n_blocks, n_twoq, n_params, int(1e3 * ent_density), last_ex, steps_left ], dtype=np.float32) return vec def _arch_stats(self) -> Tuple[int, int, int, int, float]: """ Compute architecture statistics for the current circuit. Returns ------- depth : int Total circuit depth. n_blocks : int Number of blocks in the circuit. n_twoq : int Total number of two-qubit gates. n_params : int Number of trainable rotation parameters. ent_density : float Normalized entangling density of the circuit. """ depth = 0 n_twoq = 0 # number of two-qubit gates n_params = 0 for b in self.blocks: if b == "RotX": depth += 1 n_params += 1 * self.n_qubits elif b == "RotY": depth += 1 n_params += 1 * self.n_qubits elif b == "RotZ": depth += 1 n_params += 1 * self.n_qubits elif b == "RotXYZ": depth += 1 n_params += 3 * self.n_qubits elif b == "EntRingCNOT": depth += 1 n_twoq += self.n_qubits elif b == "EntLadderCZ": depth += 1 n_twoq += self.n_qubits - 1 elif b == "EntAllToAllISWAP": depth += 1 n_twoq += self.n_qubits * (self.n_qubits - 1) // 2 n_blocks = len(self.blocks) # maximum number of possible two-qubit connections max_edges = self.n_qubits * (self.n_qubits - 1) / 2 # entangling density ent_density = (n_twoq / max(1, n_blocks)) / max(1.0, max_edges) return depth, n_blocks, n_twoq, n_params, float(ent_density) def _circuit(self, thetas=None): """ Construct and execute the quantum circuit for given parameters. The circuit structure is determined by the current block sequence. If parameters are not provided, they are sampled randomly. Parameters ---------- thetas : np.ndarray or None, optional Vector of rotation parameters. If None, parameters are sampled from a standard normal distribution. Returns ------- np.ndarray Statevector of the constructed quantum circuit. """ if thetas is None: thetas = self._rng.standard_normal(self._count_rot_params()) idx = 0 for b in self.blocks: if b == "RotX": for w in range(self.n_qubits): qml.RX(thetas[idx], wires=w); idx += 1 elif b=="RotY": for w in range(self.n_qubits): qml.RY(thetas[idx], wires=w); idx += 1 elif b == "RotZ": for w in range(self.n_qubits): qml.RZ(thetas[idx], wires=w); idx += 1 elif b == "EntRingCNOT": for w in range(self.n_qubits): qml.CNOT(wires=[w, (w + 1) % self.n_qubits]) elif b == "EntLadderCZ": for w in range(self.n_qubits - 1): qml.CZ(wires=[w, w + 1]) elif b == "EntAllToAllISWAP": if not self.allow_all_to_all: continue for i in range(self.n_qubits): for j in range(i + 1, self.n_qubits): qml.ISWAP(wires=[i, j]) return qml.state() def _count_rot_params(self) -> int: """ Count the number of trainable rotation parameters in the circuit. Returns ------- int Total number of variational parameters required by the current block sequence. """ count = 0 for b in self.blocks: if b=="RotX": count += self.n_qubits elif b=="RotY": count += self.n_qubits elif b=="RotZ": count += self.n_qubits elif b=="RotXYZ": count += (3 * self.n_qubits) return count def _expressibility(self) -> Tuple[float, float, np.ndarray, np.ndarray]: """ Estimate the expressibility of the current circuit architecture. Expressibility is evaluated by comparing the fidelity distribution of randomly sampled circuit states against the Haar-random distribution. Returns ------- expressibility : float Expressibility score defined as the negative KL divergence. kl : float Kullback–Leibler divergence between circuit and Haar distributions. hist_c : np.ndarray Histogram of circuit fidelity distribution. p_haar : np.ndarray Haar-random fidelity distribution. """ n_pairs = max(2, self.n_pairs_eval) fidelities = np.empty(n_pairs, dtype=np.float64) n_params = self._count_rot_params() for k in range(n_pairs): # generate two random param vectors theta1, theta2 theta1 = self._rng.uniform(0, 2*np.pi, n_params) if n_params else None theta2 = self._rng.uniform(0, 2*np.pi, n_params) if n_params else None # get output states psi = self.qnode(theta1) phi = self.qnode(theta2) # calculate fidelity of the output states fid = np.abs(np.vdot(psi, phi)) ** 2 fidelities[k] = fid.real # generate historgram of all the circuit fidelities calculated hist_c, edges = np.histogram(fidelities, bins=self.bins, range=(0.0, 1.0), density=True) centers = 0.5 * (edges[:-1] + edges[1:]) dx = edges[1] - edges[0] # calculate Haar random distribution of fidelities # This is the ideal distribution for a maximally expressive ansatz. D = self.D # D = 2**num_qubits p_haar = (D - 1) * np.power(1.0 - centers, D - 2) p_haar = p_haar / (p_haar.sum() * dx + 1e-12) # normalize # calculate KL divergence # for kl==0, ansatz distribution perfectly matches Haar distribution # for kl==1, ansatz distribution is way of the Haar distribution (This is super bad) eps = 1e-12 kl = float(np.sum(hist_c * np.log((hist_c + eps) / (p_haar + eps))) * dx) # We generally express expressibility as higher the better # for express==0, ansatz distribution perfectly matches Haar distribution # for express==-1, ansatz distribution is way off the Haar distribution (This is super bad) express = -kl return express, kl, hist_c, p_haar
[docs] def action_meanings(self): """ Return a mapping from action indices to action names. Returns ------- dict Dictionary mapping integer action indices to human-readable architectural action names. """ return {i: n for i, n in enumerate(self.ACTION_NAMES)}