Source code for qrl.env.core.compiler

'''
Implementation of CompilerV0 environment

Author: Jay Shah (@Jayshah25)

Contact: jay.shah@qrlqai.com

License: Apache-2.0
'''


from gymnasium import spaces
from pennylane import numpy as np
import matplotlib.pyplot as plt
from matplotlib import animation
import shutil
from .utils import GATES, RX, RY, RZ 
from ._base import QuantumEnv


[docs]
class CompilerV0(QuantumEnv):
    """
    Single-qubit quantum gate compilation environment.

    ``CompilerV0`` is a ``gymnasium.Env``-compatible environment that models the
    problem of compiling a target single-qubit unitary using a fixed, discrete
    gate set. The agent incrementally applies quantum gates to build a circuit
    whose resulting unitary approximates a given target operation in SU(2).

    At each step, the agent selects a gate action that left-multiplies the current
    circuit unitary. The episode reward is based on the average gate fidelity
    between the current unitary and the target unitary, encouraging the agent to
    discover short, high-fidelity gate sequences.

    Key properties
    --------------
    - **Action space**: Discrete set of single-qubit gates (Clifford + rotations).
    - **Observation space**: Flattened real and imaginary parts of the current
      ``2×2`` unitary (shape ``(8,)``).
    - **Reward**: Average gate fidelity with respect to the target unitary.
    - **Termination**: Success when fidelity exceeds ``reward_tolerance`` or
      truncation at ``max_steps``.

    Rendering
    ---------
    The ``render()`` method visualizes the compilation process by displaying a
    heatmap of the magnitude of the difference matrix ``|U_target − U|`` over time,
    annotated with the current step, last applied gate, and reward.

    Input Parameters
    ----------
    target : np.ndarray
        Target ``2×2`` unitary matrix in SU(2) to compile towards.
    max_steps : int
        Maximum number of gate applications per episode.
    reward_tolerance : float
        Fidelity threshold for early termination.
    ffmpeg : bool
        Whether to use FFmpeg when saving animations.

    See Also
    --------
    :doc:`tutorials/compiler`
        Step-by-step tutorial on compiling SU(2) unitaries using ``CompilerV0``.
    
    """
    def __init__(self, target_unitary, max_steps=30, reward_tolerance=0.98, ffmpeg=False):
        super().__init__()
        self.max_steps = max_steps
        self.target_unitary = target_unitary  # target is a 2x2 unitary matrix

        assert self.target_unitary.shape == (2, 2), "Target unitary must be a 2x2 matrix."
        assert np.issubdtype(self.target_unitary.dtype, np.complexfloating), "Target matrix must be complex-valued."
        assert np.allclose(self.target_unitary.conj().T @ self.target_unitary, np.eye(2, dtype=complex)), "Target matrix must be unitary."

        # Observation: real+imag flattened 2x2 unitary = 8 floats
        self.observation_space = spaces.Box(low=-1, high=1, shape=(8,), dtype=np.float32)
        
        self.actions = ["H", "X", "Y", "Z", "S", "SDG", "T", "TDG",
                        "RX_pi_2", "RX_pi_4", "RY_pi_2", "RY_pi_4", "RZ_pi_2", "RZ_pi_4"]
        self.action_space = spaces.Discrete(len(self.actions))
        self.history = []

        if reward_tolerance < 0 or reward_tolerance > 1:
            raise ValueError("reward_tolerance must be between 0 and 1")
        self.reward_tolerance = reward_tolerance
        self.steps = 0
        self.U = np.eye(2, dtype=complex)
        self.writer = "ffmpeg" if ffmpeg else "pillow"
        self.render_extension = "mp4" if ffmpeg else "gif"
        if ffmpeg==True and shutil.which("ffmpeg") is None:
            raise ValueError("ffmpeg not found on system. Please install ffmpeg or set ffmpeg=False")


    def _unitary_to_obs(self, U):
        """
        Convert a 2×2 unitary matrix into a flat observation vector.

        The unitary is represented by concatenating the flattened real and
        imaginary parts of the matrix.

        Parameters
        ----------
        U : np.ndarray
            Complex ``2×2`` unitary matrix representing the current circuit.

        Returns
        -------
        np.ndarray
            Flattened observation vector of shape ``(8,)`` containing
            ``[Re(U).flatten(), Im(U).flatten()]`` with dtype ``float32``.
        """
        return np.concatenate([U.real.flatten(), U.imag.flatten()]).astype(np.float32)


[docs]
    def reset(self):
        """
        Reset the environment to the initial compilation state.

        The circuit unitary is reset to the identity matrix, the step counter
        is cleared, and the history buffer is reinitialized.

        Returns
        -------
        observation : np.ndarray
            Flattened observation corresponding to the identity unitary,
            shape ``(8,)``.
        info : dict
            Empty dictionary provided for compatibility with the Gymnasium API.
        """
        self.steps = 0
        self.U = np.eye(2, dtype=complex)
        
        # Random target unitary: sample U3(θ, φ, λ)
        # theta, phi, lam = np.random.uniform(0, 2*np.pi, 3)
        # self.target_unitary = (RZ(phi) @ RY(theta) @ RZ(lam))  # general SU(2)
        self.history = [(self.U, 'None', 'None')]
        return self._unitary_to_obs(self.U), {}

    

[docs]
    def get_reward(self, action):
        """
        Apply a quantum gate action and compute the compilation reward.

        This method left-multiplies the current circuit unitary by the unitary
        corresponding to the selected action and evaluates the average gate
        fidelity with respect to the target unitary.

        Parameters
        ----------
        action : int
            Index of the selected action in ``self.actions``.

        Returns
        -------
        float
            Average gate fidelity between the current unitary and the target
            unitary, defined as
            ``0.5 * |Tr(U_target† · U)|`` for a single-qubit system.
        """
        gate = self.actions[action]
        if gate in GATES:
            U_gate = GATES[gate]
        elif "RX" in gate:
            U_gate = RX(eval(gate.split("_")[1].replace("pi", "np.pi")))
        elif "RY" in gate:
            U_gate = RY(eval(gate.split("_")[1].replace("pi", "np.pi")))
        elif "RZ" in gate:
            U_gate = RZ(eval(gate.split("_")[1].replace("pi", "np.pi")))
        
        # Apply gate
        self.U = U_gate @ self.U
        
        # Fidelity: average gate fidelity for 1-qubit
        reward = 0.5 * np.abs(np.trace(np.conj(self.target_unitary.T) @ self.U))
        return reward



[docs]
    def step(self, action):
        """
        Execute one compilation step.

        Applies the selected gate, updates the internal circuit unitary and
        history, computes the reward, and checks termination conditions.

        Parameters
        ----------
        action : int
            Index of the selected action in ``self.actions``.

        Returns
        -------
        observation : np.ndarray
            Updated flattened unitary observation, shape ``(8,)``.
        reward : float
            Average gate fidelity after applying the action.
        done : bool
            True if the episode has terminated due to reaching the fidelity
            threshold or the maximum number of steps.
        info : dict
            Empty dictionary provided for compatibility with the Gymnasium API.
        """
        gate = self.actions[action]
        reward = self.get_reward(action)
        self.steps += 1
        self.history.append((self.U, gate, round(reward, 3)))
        done = reward > self.reward_tolerance or self.steps >= self.max_steps

        return self._unitary_to_obs(self.U), reward, done, {}



[docs]
    def render(self, save_path_without_extension=None, interval=800):
        """
        Render the compilation process as an animation of the difference matrix.

        The visualization shows the magnitude of the element-wise difference
        ``|U_target - U|`` as a heatmap that evolves over time, along with
        annotations indicating the current step, applied action, and reward.

        Parameters
        ----------
        save_path_without_extension : str or None, optional
            Path (without file extension) to save the animation.
            If provided, the animation is saved using the configured writer
            (MP4 for FFmpeg or GIF for Pillow). If None, the animation is
            displayed interactively.
        interval : int, optional
            Delay between animation frames in milliseconds. Default is 800.

        Returns
        -------
        None
            This method produces a visualization but does not return a value.
        """

        fig, ax = plt.subplots(figsize=(5, 5))

        # Initial difference
        diff = np.abs(self.target_unitary - self.history[0][0])
        im = ax.imshow(diff, cmap="magma", vmin=0, vmax=1)
        cbar = plt.colorbar(im, ax=ax)
        cbar.set_label("|Target - Prediction|")

        def update(step):
            # Compute difference matrix
            diff = np.abs(self.target_unitary - self.history[step][0])
            im.set_array(diff)

            # Update title with fidelity
            ax.set_title(f"Step {step} | Action: {self.history[step][1]} | Reward={self.history[step][2]}")
            return [im]

        ani = animation.FuncAnimation(
            fig, update, frames=len(self.history), interval=interval, blit=False, repeat=False
        )

        if save_path_without_extension:
            ani.save(f"{save_path_without_extension}.{self.render_extension}", writer=self.writer)
        else:
            plt.show()