Source code for qrl.env.core.compiler

'''
Implementation of CompilerV0 environment

Author: Jay Shah (@Jayshah25)

Contact: jay.shah@qrlqai.com

License: Apache-2.0
'''


from gymnasium import spaces
from pennylane import numpy as np
import matplotlib.pyplot as plt
from matplotlib import animation
import shutil
from .utils import GATES, RX, RY, RZ 
from ._base import QuantumEnv

[docs] class CompilerV0(QuantumEnv): """ Single-qubit quantum gate compilation environment. ``CompilerV0`` is a ``gymnasium.Env``-compatible environment that models the problem of compiling a target single-qubit unitary using a fixed, discrete gate set. The agent incrementally applies quantum gates to build a circuit whose resulting unitary approximates a given target operation in SU(2). At each step, the agent selects a gate action that left-multiplies the current circuit unitary. The episode reward is based on the average gate fidelity between the current unitary and the target unitary, encouraging the agent to discover short, high-fidelity gate sequences. Key properties -------------- - **Action space**: Discrete set of single-qubit gates (Clifford + rotations). - **Observation space**: Flattened real and imaginary parts of the current ``2×2`` unitary (shape ``(8,)``). - **Reward**: Average gate fidelity with respect to the target unitary. - **Termination**: Success when fidelity exceeds ``reward_tolerance`` or truncation at ``max_steps``. Rendering --------- The ``render()`` method visualizes the compilation process by displaying a heatmap of the magnitude of the difference matrix ``|U_target − U|`` over time, annotated with the current step, last applied gate, and reward. Input Parameters ---------- target : np.ndarray Target ``2×2`` unitary matrix in SU(2) to compile towards. max_steps : int Maximum number of gate applications per episode. reward_tolerance : float Fidelity threshold for early termination. ffmpeg : bool Whether to use FFmpeg when saving animations. See Also -------- :doc:`tutorials/compiler` Step-by-step tutorial on compiling SU(2) unitaries using ``CompilerV0``. """ def __init__(self, target_unitary, max_steps=30, reward_tolerance=0.98, ffmpeg=False): super().__init__() self.max_steps = max_steps self.target_unitary = target_unitary # target is a 2x2 unitary matrix assert self.target_unitary.shape == (2, 2), "Target unitary must be a 2x2 matrix." assert np.issubdtype(self.target_unitary.dtype, np.complexfloating), "Target matrix must be complex-valued." assert np.allclose(self.target_unitary.conj().T @ self.target_unitary, np.eye(2, dtype=complex)), "Target matrix must be unitary." # Observation: real+imag flattened 2x2 unitary = 8 floats self.observation_space = spaces.Box(low=-1, high=1, shape=(8,), dtype=np.float32) self.actions = ["H", "X", "Y", "Z", "S", "SDG", "T", "TDG", "RX_pi_2", "RX_pi_4", "RY_pi_2", "RY_pi_4", "RZ_pi_2", "RZ_pi_4"] self.action_space = spaces.Discrete(len(self.actions)) self.history = [] if reward_tolerance < 0 or reward_tolerance > 1: raise ValueError("reward_tolerance must be between 0 and 1") self.reward_tolerance = reward_tolerance self.steps = 0 self.U = np.eye(2, dtype=complex) self.writer = "ffmpeg" if ffmpeg else "pillow" self.render_extension = "mp4" if ffmpeg else "gif" if ffmpeg==True and shutil.which("ffmpeg") is None: raise ValueError("ffmpeg not found on system. Please install ffmpeg or set ffmpeg=False") def _unitary_to_obs(self, U): """ Convert a 2×2 unitary matrix into a flat observation vector. The unitary is represented by concatenating the flattened real and imaginary parts of the matrix. Parameters ---------- U : np.ndarray Complex ``2×2`` unitary matrix representing the current circuit. Returns ------- np.ndarray Flattened observation vector of shape ``(8,)`` containing ``[Re(U).flatten(), Im(U).flatten()]`` with dtype ``float32``. """ return np.concatenate([U.real.flatten(), U.imag.flatten()]).astype(np.float32)
[docs] def reset(self): """ Reset the environment to the initial compilation state. The circuit unitary is reset to the identity matrix, the step counter is cleared, and the history buffer is reinitialized. Returns ------- observation : np.ndarray Flattened observation corresponding to the identity unitary, shape ``(8,)``. info : dict Empty dictionary provided for compatibility with the Gymnasium API. """ self.steps = 0 self.U = np.eye(2, dtype=complex) # Random target unitary: sample U3(θ, φ, λ) # theta, phi, lam = np.random.uniform(0, 2*np.pi, 3) # self.target_unitary = (RZ(phi) @ RY(theta) @ RZ(lam)) # general SU(2) self.history = [(self.U, 'None', 'None')] return self._unitary_to_obs(self.U), {}
[docs] def get_reward(self, action): """ Apply a quantum gate action and compute the compilation reward. This method left-multiplies the current circuit unitary by the unitary corresponding to the selected action and evaluates the average gate fidelity with respect to the target unitary. Parameters ---------- action : int Index of the selected action in ``self.actions``. Returns ------- float Average gate fidelity between the current unitary and the target unitary, defined as ``0.5 * |Tr(U_target† · U)|`` for a single-qubit system. """ gate = self.actions[action] if gate in GATES: U_gate = GATES[gate] elif "RX" in gate: U_gate = RX(eval(gate.split("_")[1].replace("pi", "np.pi"))) elif "RY" in gate: U_gate = RY(eval(gate.split("_")[1].replace("pi", "np.pi"))) elif "RZ" in gate: U_gate = RZ(eval(gate.split("_")[1].replace("pi", "np.pi"))) # Apply gate self.U = U_gate @ self.U # Fidelity: average gate fidelity for 1-qubit reward = 0.5 * np.abs(np.trace(np.conj(self.target_unitary.T) @ self.U)) return reward
[docs] def step(self, action): """ Execute one compilation step. Applies the selected gate, updates the internal circuit unitary and history, computes the reward, and checks termination conditions. Parameters ---------- action : int Index of the selected action in ``self.actions``. Returns ------- observation : np.ndarray Updated flattened unitary observation, shape ``(8,)``. reward : float Average gate fidelity after applying the action. done : bool True if the episode has terminated due to reaching the fidelity threshold or the maximum number of steps. info : dict Empty dictionary provided for compatibility with the Gymnasium API. """ gate = self.actions[action] reward = self.get_reward(action) self.steps += 1 self.history.append((self.U, gate, round(reward, 3))) done = reward > self.reward_tolerance or self.steps >= self.max_steps return self._unitary_to_obs(self.U), reward, done, {}
[docs] def render(self, save_path_without_extension=None, interval=800): """ Render the compilation process as an animation of the difference matrix. The visualization shows the magnitude of the element-wise difference ``|U_target - U|`` as a heatmap that evolves over time, along with annotations indicating the current step, applied action, and reward. Parameters ---------- save_path_without_extension : str or None, optional Path (without file extension) to save the animation. If provided, the animation is saved using the configured writer (MP4 for FFmpeg or GIF for Pillow). If None, the animation is displayed interactively. interval : int, optional Delay between animation frames in milliseconds. Default is 800. Returns ------- None This method produces a visualization but does not return a value. """ fig, ax = plt.subplots(figsize=(5, 5)) # Initial difference diff = np.abs(self.target_unitary - self.history[0][0]) im = ax.imshow(diff, cmap="magma", vmin=0, vmax=1) cbar = plt.colorbar(im, ax=ax) cbar.set_label("|Target - Prediction|") def update(step): # Compute difference matrix diff = np.abs(self.target_unitary - self.history[step][0]) im.set_array(diff) # Update title with fidelity ax.set_title(f"Step {step} | Action: {self.history[step][1]} | Reward={self.history[step][2]}") return [im] ani = animation.FuncAnimation( fig, update, frames=len(self.history), interval=interval, blit=False, repeat=False ) if save_path_without_extension: ani.save(f"{save_path_without_extension}.{self.render_extension}", writer=self.writer) else: plt.show()