Source code for qrl.env.core.expressibility

'''
Implementation of ExpressibilityV0 environment

Author: Jay Shah (@Jayshah25)

Contact: jay.shah@qrlqai.com

License: Apache-2.0
'''

from typing import List, Tuple, Optional, Dict
import pennylane as qml
from pennylane import numpy as np
from gymnasium import spaces
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from matplotlib.animation import FuncAnimation
import shutil
from ._base import QuantumEnv



[docs]
class ExpressibilityV0(QuantumEnv):
    """
    Parameterized circuit expressibility optimization environment.

    ``ExpressibilityV0`` is a ``gymnasium.Env``-compatible environment that models
    the construction of parameterized quantum circuits with high expressibility.
    In the context of variational quantum algorithms, expressibility measures how
    well an ansatz can explore the Hilbert space of quantum states relative to the
    Haar-random distribution.

    The agent incrementally builds a circuit by adding or removing predefined
    rotation and entangling blocks, or by explicitly terminating construction.
    Rewards encourage circuits whose fidelity distribution closely matches the
    Haar distribution, while penalizing excessive circuit depth and two-qubit gate
    usage.

    Key properties
    --------------
    - **Action space**: Discrete set of architectural edits (add/remove blocks or
    terminate construction).
    - **Observation space**: Vector of circuit statistics summarizing depth,
    parameter count, entanglement, and recent expressibility estimates
    (shape ``(7,)``).
    - **Reward**: Negative KL divergence to the Haar distribution with regularization
    penalties for depth and two-qubit gates.
    - **Termination**: Explicit termination by the agent or truncation at
    ``max_steps``.

    Rendering
    ---------
    The ``render()`` method visualizes expressibility optimization via a two-panel
    animation showing the circuit’s fidelity distribution compared to the
    Haar-random distribution alongside a block-level diagram of the evolving
    circuit architecture.

    Input Parameters
    ----------
    n_qubits : int
        Number of qubits in the circuit.
    max_blocks : int
        Maximum number of blocks allowed in the circuit.
    max_steps : int
        Maximum number of construction steps per episode.
    n_pairs_eval : int
        Number of random state pairs used to estimate expressibility.
    bins : int
        Number of histogram bins for fidelity distributions.
    lambda_depth : float
        Penalty weight for circuit depth.
    lambda_2q : float
        Penalty weight for two-qubit gate usage.
    terminate_bonus : float
        Bonus reward for explicit termination.
    device_name : str
        PennyLane device backend used for simulation.
    seed : int or None
        Random seed for reproducibility.
    allow_all_to_all : bool
        Whether to allow all-to-all entangling blocks.
    ffmpeg : bool
        Whether to use FFmpeg when saving animations.

    See Also
    --------
    :doc:`tutorials/expressibility`
        Tutorial on optimizing ansatz expressibility with block-based circuits.

    """

    def __init__(
        self,
        n_qubits: int = 4,
        max_blocks: int = 12,
        max_steps: int = 20,
        n_pairs_eval: int = 120,
        bins: int = 50,
        lambda_depth: float = 0.002,
        lambda_2q: float = 0.002,
        terminate_bonus: float = 0.1,
        device_name: str = "default.qubit",
        seed: Optional[int] = None,
        allow_all_to_all: bool = False,
        ffmpeg: bool = False,
    ):
        super().__init__()

        self.n_qubits = n_qubits
        self.max_blocks = max_blocks
        self.max_steps = max_steps
        self.n_pairs_eval = n_pairs_eval
        self.bins = bins
        self.lambda_depth = lambda_depth
        self.lambda_2q = lambda_2q
        self.terminate_bonus = terminate_bonus
        self.allow_all_to_all = allow_all_to_all

        self.D = 2 ** n_qubits
        self._rng = np.random.default_rng(seed)

        self.blocks: List[str] = []
        self.device = qml.device(device_name, wires=n_qubits)
        self.qnode = qml.QNode(self._circuit, self.device) #self._circuit is method defined below

        self.ACTION_NAMES = [
                        "RotX",
                        "RotY",
                        "RotZ",
                        "RotXYZ",
                        "EntRingCNOT",
                        "EntLadderCZ",
                        "RemoveLast",
                        "Terminate",
                        ]

        self.action_space = spaces.Discrete(len(self.ACTION_NAMES))

        high = np.array([
            10_000, # depth
            max_blocks, # number of blocks
            10_000, # number of two-qubit gates
            10_000, # number of rotational trainable parameters
            1_000, # entanglement density
            10.0, # last value of expressibility 
            max_steps # steps left
        ], dtype=np.float32)
        self.observation_space = spaces.Box(low=np.zeros_like(high), high=high, dtype=np.float32)

        self.current_step = 0
        self.last_express = None
        self.last_reward = 0.0
        self.info_last_hist = None
        self.history = []
        self.writer = "ffmpeg" if ffmpeg else "pillow"
        self.render_extension = "mp4" if ffmpeg else "gif"
        if ffmpeg==True and shutil.which("ffmpeg") is None:
            raise ValueError("ffmpeg not found on system. Please install ffmpeg or set ffmpeg=False")
        
        # utils
        self.done = None
        self.terminated = None
        self.obs = None
        self.info = None


[docs]
    def get_reward(self, action):
        """

        The selected action modifies the circuit architecture by adding,
        removing, or terminating block construction. Expressibility is
        evaluated after the update, and a reward is computed based on the
        circuit's deviation from the Haar distribution and architectural
        penalties.

        Parameters
        ----------
        action : int
            Index of the selected architectural action.

        Returns
        -------
        reward : float
            Reward value combining expressibility and architectural penalties.
        """

        assert self.action_space.contains(action)
        self.done = False
        self.terminated = False

        # available actions
        if action == 0 and len(self.blocks) < self.max_blocks:
            self.blocks.append("RotX")
        elif action == 1 and len(self.blocks) < self.max_blocks:
            self.blocks.append("RotY")
        elif action == 2 and len(self.blocks) < self.max_blocks:
            self.blocks.append("RotZ")
        elif action == 3 and len(self.blocks) < self.max_blocks:
            self.blocks.append("RotXYZ")
        elif action == 4 and len(self.blocks) < self.max_blocks:
            self.blocks.append("EntRingCNOT")
        elif action == 5 and len(self.blocks) < self.max_blocks:
            self.blocks.append("EntLadderCZ")
        elif action == 6 and self.blocks:
            self.blocks.pop()
        elif action == 7:
            self.done = True
            self.terminated = True

        # update current step
        self.current_step += 1
        if self.current_step >= self.max_steps:
            self.done = True

        # get expressibility for the current circuit
        express, kl, hist_c, hist_haar = self._expressibility()
        self.last_express = express
        self.info_last_hist = {"P_C": hist_c, "P_Haar": hist_haar}

        depth, n_blocks, n_twoq, n_params, ent_density = self._arch_stats()

        reward = -kl - self.lambda_depth * depth - self.lambda_2q * n_twoq
        if self.terminated:
            reward += self.terminate_bonus

        self.last_reward = reward

        # check if the action is supported
        self.obs = self._make_obs()
        self.info = {
            "kl": float(kl),
            "expressibility": float(express),
            "depth": int(depth),
            "n_twoq": int(n_twoq),
            "params": int(n_params),
            "blocks": list(self.blocks),
            "terminated": self.terminated
        }

        # store info for visualization in render()
        self.history.append({
            "P_C": hist_c,
            "P_Haar": hist_haar,
            "blocks": self.blocks.copy(),
            "reward": self.last_reward,
            "express": self.last_express,
            })

        return reward



[docs]
    def reset(self, *, seed: Optional[int] = None, options: Optional[Dict] = None):
        """
        Reset the environment to an empty circuit.

        Clears the current circuit architecture, resets internal counters, and
        initializes the observation vector corresponding to an empty ansatz.

        Parameters
        ----------
        seed : int or None, optional
            Random seed for reproducibility. If provided, reinitializes the
            internal random number generator.
        options : dict or None, optional
            Additional reset options (currently unused, included for
            Gymnasium compatibility).

        Returns
        -------
        observation : np.ndarray
            Initial observation vector describing an empty circuit, shape ``(7,)``.
        info : dict
            Empty dictionary provided for Gymnasium API compatibility.
        """
        super().reset(seed=seed)
        if seed is not None:
            self._rng = np.random.default_rng(seed)
        self.blocks = []
        self.current_step = 0
        self.last_express = None
        self.last_reward = 0.0
        self.info_last_hist = None
        obs = self._make_obs()
        self.history = []
        return obs, {}



[docs]
    def step(self, action: int):
        """
        Execute one architecture-modification step by calling the get_reward method.

        Parameters
        ----------
        action : int
            Index of the selected architectural action.

        Returns
        -------
        observation : np.ndarray
            Updated observation vector summarizing circuit statistics,
            shape ``(7,)``.
        reward : float
            Reward value combining expressibility and architectural penalties.
        done : bool
            True if the episode ended due to termination by agent or truncation, 
            False otherwise.
        info : dict
            Diagnostic information including expressibility, KL divergence,
            depth, parameter count, current block sequence, and 
            terminated (true if agent explicitly terminated, false if episode ended due to max steps).
        """

        reward = self.get_reward(action)
        return self.obs, reward, self.done, self.info




[docs]
    def render(self, save_path_without_extension=None, interval=800):
        """
        Render the expressibility optimization process as an animation.

        The animation shows:
        1. A histogram of circuit fidelity distribution compared to the
        Haar-random distribution.
        2. A block-diagram visualization of the evolving circuit architecture.

        Parameters
        ----------
        save_path_without_extension : str or None, optional
            Path (without file extension) to save the animation.
            If provided, the animation is saved using the configured writer
            (MP4 for FFmpeg or GIF for Pillow). If None, the animation is
            displayed interactively.
        interval : int, optional
            Delay between animation frames in milliseconds. Default is 800.

        Returns
        -------
        None
            This method produces a visualization but does not return a value.
        """

        if not hasattr(self, "history") or len(self.history) == 0:
            print("No history available for animation.")
            return

        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

        # Init function
        def init():
            ax1.clear()
            ax2.clear()

        # Update function for each frame
        def update(frame):
            ax1.clear()
            ax2.clear()

            info = self.history[frame]

            # Fidelity distribution vs Haar
            hist_c = info["P_C"]
            hist_haar = info["P_Haar"]
            bins = len(hist_c)
            xs = np.linspace(0, 1, bins)

            ax1.bar(xs, hist_c, width=1.0/bins, alpha=0.6, label="Circuit Fidelity Dist.")
            ax1.plot(xs, hist_haar, "r-", lw=2, label="Haar Distribution")
            ax1.set_title(f"Expressibility (step {frame})")
            ax1.set_xlabel("Fidelity")
            ax1.set_ylabel("Density")
            ax1.legend()

            # Block diagram
            blocks = info["blocks"]
            ax2.set_xlim(0, max(1, len(blocks)))
            ax2.set_ylim(0, 1)
            ax2.axis("off")
            for i, b in enumerate(blocks):
                rect = Rectangle((i, 0.4), 0.9, 0.2, facecolor="skyblue", edgecolor="k")
                ax2.add_patch(rect)
                ax2.text(i+0.45, 0.5, b, ha="center", va="center", fontsize=8)
            ax2.set_title("Circuit Architecture Blocks")

            reward = info["reward"]
            express = info["express"]
            plt.suptitle(f"Reward={reward:.3f}, Expressibility={express:.3f}")

        anim = FuncAnimation(fig, update, frames=len(self.history),
                            init_func=init, interval=interval, repeat=False)

        if save_path_without_extension:
            anim.save(f"{save_path_without_extension}.{self.render_extension}", writer=self.writer)
        else:
            plt.show()


    def _make_obs(self):
        """
        Construct the current observation vector.

        The observation encodes summary statistics of the circuit architecture
        and training progress.

        Returns
        -------
        np.ndarray
            Observation vector of shape ``(7,)`` containing:
            ``[depth, n_blocks, n_twoq, n_params, ent_density, last_express, steps_left]``.
        """
        depth, n_blocks, n_twoq, n_params, ent_density = self._arch_stats()
        last_ex = self.last_express if self.last_express is not None else 0.0
        steps_left = max(self.max_steps - self.current_step, 0)
        vec = np.array([
            depth, n_blocks, n_twoq, n_params, int(1e3 * ent_density), last_ex, steps_left
        ], dtype=np.float32)
        return vec

    def _arch_stats(self) -> Tuple[int, int, int, int, float]:
        """
        Compute architecture statistics for the current circuit.

        Returns
        -------
        depth : int
            Total circuit depth.
        n_blocks : int
            Number of blocks in the circuit.
        n_twoq : int
            Total number of two-qubit gates.
        n_params : int
            Number of trainable rotation parameters.
        ent_density : float
            Normalized entangling density of the circuit.
        """
        depth = 0
        n_twoq = 0 # number of two-qubit gates
        n_params = 0
        for b in self.blocks:
            if b == "RotX":
                depth += 1
                n_params += 1 * self.n_qubits
            elif b == "RotY":
                depth += 1
                n_params += 1 * self.n_qubits
            elif b == "RotZ":
                depth += 1
                n_params += 1 * self.n_qubits
            elif b == "RotXYZ":
                depth += 1
                n_params += 3 * self.n_qubits
            elif b == "EntRingCNOT":
                depth += 1
                n_twoq += self.n_qubits
            elif b == "EntLadderCZ":
                depth += 1
                n_twoq += self.n_qubits - 1
            elif b == "EntAllToAllISWAP":
                depth += 1
                n_twoq += self.n_qubits * (self.n_qubits - 1) // 2
        n_blocks = len(self.blocks)

        # maximum number of possible two-qubit connections
        max_edges = self.n_qubits * (self.n_qubits - 1) / 2

        # entangling density
        ent_density = (n_twoq / max(1, n_blocks)) / max(1.0, max_edges)
        return depth, n_blocks, n_twoq, n_params, float(ent_density)

    def _circuit(self, thetas=None):
        """
        Construct and execute the quantum circuit for given parameters.

        The circuit structure is determined by the current block sequence.
        If parameters are not provided, they are sampled randomly.

        Parameters
        ----------
        thetas : np.ndarray or None, optional
            Vector of rotation parameters. If None, parameters are sampled
            from a standard normal distribution.

        Returns
        -------
        np.ndarray
            Statevector of the constructed quantum circuit.
        """
        if thetas is None:
            thetas = self._rng.standard_normal(self._count_rot_params())
        idx = 0
        for b in self.blocks:
            if b == "RotX":
                for w in range(self.n_qubits):
                    qml.RX(thetas[idx], wires=w); idx += 1
            elif b=="RotY":
                for w in range(self.n_qubits):
                    qml.RY(thetas[idx], wires=w); idx += 1
            elif b == "RotZ":
                for w in range(self.n_qubits):
                    qml.RZ(thetas[idx], wires=w); idx += 1
            elif b == "EntRingCNOT":
                for w in range(self.n_qubits):
                    qml.CNOT(wires=[w, (w + 1) % self.n_qubits])
            elif b == "EntLadderCZ":
                for w in range(self.n_qubits - 1):
                    qml.CZ(wires=[w, w + 1])
            elif b == "EntAllToAllISWAP":
                if not self.allow_all_to_all:
                    continue
                for i in range(self.n_qubits):
                    for j in range(i + 1, self.n_qubits):
                        qml.ISWAP(wires=[i, j])
        return qml.state()

    def _count_rot_params(self) -> int:
        """
        Count the number of trainable rotation parameters in the circuit.

        Returns
        -------
        int
            Total number of variational parameters required by the current
            block sequence.
        """
        count = 0
        for b in self.blocks:
            if b=="RotX":
                count += self.n_qubits
            elif b=="RotY":
                count += self.n_qubits
            elif b=="RotZ":
                count += self.n_qubits
            elif b=="RotXYZ":
                count += (3 * self.n_qubits)
        return count

    def _expressibility(self) -> Tuple[float, float, np.ndarray, np.ndarray]:
        """
        Estimate the expressibility of the current circuit architecture.

        Expressibility is evaluated by comparing the fidelity distribution
        of randomly sampled circuit states against the Haar-random
        distribution.

        Returns
        -------
        expressibility : float
            Expressibility score defined as the negative KL divergence.
        kl : float
            Kullback–Leibler divergence between circuit and Haar distributions.
        hist_c : np.ndarray
            Histogram of circuit fidelity distribution.
        p_haar : np.ndarray
            Haar-random fidelity distribution.
        """
        n_pairs = max(2, self.n_pairs_eval)
        fidelities = np.empty(n_pairs, dtype=np.float64)
        n_params = self._count_rot_params()

        for k in range(n_pairs):

            # generate two random param vectors theta1, theta2
            theta1 = self._rng.uniform(0, 2*np.pi, n_params) if n_params else None
            theta2 = self._rng.uniform(0, 2*np.pi, n_params) if n_params else None
            
            # get output states
            psi = self.qnode(theta1)
            phi = self.qnode(theta2)

            # calculate fidelity of the output states
            fid = np.abs(np.vdot(psi, phi)) ** 2
            fidelities[k] = fid.real

        # generate historgram of all the circuit fidelities calculated
        hist_c, edges = np.histogram(fidelities, bins=self.bins, range=(0.0, 1.0), density=True)
        centers = 0.5 * (edges[:-1] + edges[1:])
        dx = edges[1] - edges[0]

        # calculate Haar random distribution of fidelities
        # This is the ideal distribution for a maximally expressive ansatz.
        D = self.D # D = 2**num_qubits
        p_haar = (D - 1) * np.power(1.0 - centers, D - 2)
        p_haar = p_haar / (p_haar.sum() * dx + 1e-12) # normalize

        # calculate KL divergence
        # for kl==0, ansatz distribution perfectly matches Haar distribution
        # for kl==1, ansatz distribution is way of the Haar distribution (This is super bad)
        eps = 1e-12
        kl = float(np.sum(hist_c * np.log((hist_c + eps) / (p_haar + eps))) * dx)
        
        # We generally express expressibility as higher the better
        # for express==0, ansatz distribution perfectly matches Haar distribution
        # for express==-1, ansatz distribution is way off the Haar distribution (This is super bad)
        express = -kl
        return express, kl, hist_c, p_haar


[docs]
    def action_meanings(self):
        """
        Return a mapping from action indices to action names.

        Returns
        -------
        dict
            Dictionary mapping integer action indices to human-readable
            architectural action names.
        """
        return {i: n for i, n in enumerate(self.ACTION_NAMES)}