Source code for lapis.scheduler

import random
from abc import ABC, abstractmethod
from statistics import mean
from typing import (
    Dict,
    Iterator,
    Optional,
    Tuple,
    List,
    TypeVar,
    Generic,
    Set,
    NamedTuple,
    Any,
)
from weakref import WeakKeyDictionary

from sortedcontainers import SortedDict

from classad import parse
from classad._base_expression import Expression
from classad._functions import quantize
from classad._primitives import HTCInt, Undefined
from classad._expression import ClassAd
from usim import Scope, interval, Resources

from lapis.drone import Drone
from lapis.cachingjob import CachingJob
from lapis.monitor.core import sampling_required
from lapis.monitor.duplicates import UserDemand

from usim import time


[docs]class JobQueue(list):
    pass


quantization_defaults = {
    "memory": HTCInt(128 * 1024 * 1024),
    "disk": HTCInt(1024 * 1024),
    "cores": HTCInt(1),
}

# ClassAd attributes are not case sensitive
machine_ad_defaults = """
requirements = target.requestcpus <= my.cpus
""".strip()

job_ad_defaults = """
requirements = my.requestcpus <= target.cpus && my.requestmemory <= target.memory
"""

T = TypeVar("T")
DJ = TypeVar("DJ", Drone, CachingJob)


[docs]class WrappedClassAd(ClassAd, Generic[DJ]):
    """
    Combines the original job/drone object and the associated ClassAd.
    """

    __slots__ = "_wrapped", "_temp"

    def __init__(self, classad: ClassAd, wrapped: DJ):
        """
        Initialization for wrapped ClassAd

        :param classad: the wrapped objects ClassAd description
        :param wrapped: wrapped object, either job or drone
        """
        super(WrappedClassAd, self).__init__()
        self._wrapped = wrapped
        self._data = classad._data
        self._temp = {}

[docs]    def empty(self):
        """
        Only relevant for wrapped drones to determine whether there are jobs running
        on them. If this is the case the amount of cores in usage is >= 1.

        :return: true if no CPU cores are in use, false if this is not the case
        """
        try:
            return self._temp["cores"] < 1
        except KeyError:
            return self._wrapped.theoretical_available_resources["cores"] < 1

    def __getitem__(self, item):
        """
        This method is used when evaluating classad expressions.

        :param item: name of a quantity in the classad expression
        :return: current value of this item
        """

        def access_wrapped(name, requested=True):
            """
            Extracts the wrapped object's current quantity of a certain resource (
            cores, memory, disk)

            :param name: name of the resource that is to be accessed
            :param requested: false if name is a resource of the drone, true if name
            is a resource requested by a job
            :return: value of respective resource
            """
            if isinstance(self._wrapped, Drone):
                return self._wrapped.theoretical_available_resources[name]
            if requested:
                return self._wrapped.resources[name]
            return self._wrapped.used_resources[name]

        if "target" not in item:
            if "requestcpus" == item:
                return access_wrapped("cores", requested=True)
            elif "requestmemory" == item:
                return (1 / 1024 / 1024) * access_wrapped("memory", requested=True)
            elif "requestdisk" == item:
                return (1 / 1024) * access_wrapped("disk", requested=True)
            elif "requestwalltime" == item:
                return self._wrapped.requested_walltime
            elif "cpus" == item:
                try:
                    return self._temp["cores"]
                except KeyError:
                    return access_wrapped("cores", requested=False)
            elif "memory" == item:
                try:
                    return (1 / 1000 / 1000) * self._temp["memory"]
                except KeyError:
                    return (1 / 1000 / 1000) * access_wrapped("memory", requested=False)
            elif "disk" == item:
                try:
                    return (1 / 1024) * self._temp["disk"]
                except KeyError:

                    return (1 / 1024) * access_wrapped("disk", requested=False)
            elif "cache_demand" == item:
                caches = self._wrapped.connection.storages.get(
                    self._wrapped.sitename, None
                )
                try:
                    return mean(
                        [1.0 / cache.connection._throughput_scale for cache in caches]
                    )
                except TypeError:
                    return 0

            elif "cache_scale" == item:
                caches = self._wrapped.connection.storages.get(
                    self._wrapped.sitename, None
                )
                try:
                    return mean(
                        [cache.connection._throughput_scale for cache in caches]
                    )
                except TypeError:
                    return 0

            elif "cache_throughput_per_core" == item:
                caches = self._wrapped.connection.storages.get(
                    self._wrapped.sitename, None
                )

                try:
                    return sum(
                        [
                            cache.connection.throughput / 1000.0 / 1000.0 / 1000.0
                            for cache in caches
                        ]
                    ) / float(self._wrapped.pool_resources["cores"])
                except TypeError:
                    return 0

            elif "cached_data" == item:
                return self._wrapped.cached_data / 1000.0 / 1000.0 / 1000.0

            elif "data_volume" == item:
                return self._wrapped._total_input_data / 1000.0 / 1000.0 / 1000.0

            elif "current_waiting_time" == item:
                return time.now - self._wrapped.queue_date

            elif "failed_matches" == item:
                return self._wrapped.failed_matches

            elif "jobs_with_cached_data" == item:
                return self._wrapped.jobs_with_cached_data

        return super(WrappedClassAd, self).__getitem__(item)

    def clear_temporary_resources(self):
        self._temp.clear()

    def __repr__(self):
        return f"<{self.__class__.__name__}>: {self._wrapped}"

    def __eq__(self, other):
        return super().__eq__(other) and self._wrapped == other._wrapped

    def __hash__(self):
        return id(self._wrapped)


[docs]class Cluster(List[WrappedClassAd[DJ]], Generic[DJ]):
    pass


[docs]class Bucket(List[Cluster[DJ]], Generic[DJ]):
    pass


[docs]class JobScheduler(ABC):
    __slots__ = ()

    @property
    def drone_list(self) -> Iterator[Drone]:
        """Yields the registered drones"""
        raise NotImplementedError

[docs]    def register_drone(self, drone: Drone):
        """Register a drone at the scheduler"""
        raise NotImplementedError

[docs]    def unregister_drone(self, drone: Drone):
        """Unregister a drone at the scheduler"""
        raise NotImplementedError

[docs]    def update_drone(self, drone: Drone):
        """Update parameters of a drone"""
        raise NotImplementedError

[docs]    async def run(self):
        """Run method of the scheduler"""
        raise NotImplementedError

[docs]    async def job_finished(self, job):
        """
        Declare a job as finished by a drone. This might even mean, that the job
        has failed and that the scheduler needs to requeue the job for further
        processing.
        """
        raise NotImplementedError


[docs]class CondorJobScheduler(JobScheduler):
    """
    Goal of the htcondor job scheduler is to have a scheduler that somehow
    mimics how htcondor schedules jobs.
    Htcondor is scheduling based on a priority queue. The priorities itself
    are managed by operators of htcondor.
    So different instances can apparently behave very different.
    A priority queue that sorts job slots
    by increasing costs is built. The scheduler checks if a job either
    exactly fits a slot or if it fits several times. The cost for
    putting a job at a given slot is given by the amount of resources that
    might remain unallocated.
    """

    def __init__(self, job_queue):
        self._stream_queue = job_queue
        self.drone_cluster = []
        self.interval = 60
        self.job_queue = JobQueue()
        self._collecting = True
        self._processing = Resources(jobs=0)

    @property
    def drone_list(self):
        for cluster in self.drone_cluster:
            for drone in cluster:
                yield drone

[docs]    def register_drone(self, drone: Drone):
        self._add_drone(drone)

[docs]    def unregister_drone(self, drone: Drone):
        for cluster in self.drone_cluster:
            try:
                cluster.remove(drone)
            except ValueError:
                pass
            else:
                if len(cluster) == 0:
                    self.drone_cluster.remove(cluster)

    def _add_drone(self, drone: Drone, drone_resources: Dict = None):
        minimum_distance_cluster = None
        distance = float("Inf")
        if len(self.drone_cluster) > 0:
            for cluster in self.drone_cluster:
                current_distance = 0
                for key in {*cluster[0].pool_resources, *drone.pool_resources}:
                    if drone_resources:
                        current_distance += abs(
                            cluster[0].theoretical_available_resources.get(key, 0)
                            - drone_resources.get(key, 0)
                        )
                    else:
                        current_distance += abs(
                            cluster[0].theoretical_available_resources.get(key, 0)
                            - drone.theoretical_available_resources.get(key, 0)
                        )
                if current_distance < distance:
                    minimum_distance_cluster = cluster
                    distance = current_distance
            if distance < 1:
                minimum_distance_cluster.append(drone)
            else:
                self.drone_cluster.append([drone])
        else:
            self.drone_cluster.append([drone])

[docs]    def update_drone(self, drone: Drone):
        self.unregister_drone(drone)
        self._add_drone(drone)

[docs]    async def run(self):
        async with Scope() as scope:
            scope.do(self._collect_jobs())
            async for _ in interval(self.interval):
                for job in self.job_queue.copy():
                    best_match = self._schedule_job(job)
                    if best_match:
                        await best_match.schedule_job(job)
                        self.job_queue.remove(job)
                        await sampling_required.put(self.job_queue)
                        await sampling_required.put(UserDemand(len(self.job_queue)))
                        self.unregister_drone(best_match)
                        left_resources = best_match.theoretical_available_resources
                        left_resources = {
                            key: value - job.resources.get(key, 0)
                            for key, value in left_resources.items()
                        }
                        self._add_drone(best_match, left_resources)
                if (
                    not self._collecting
                    and not self.job_queue
                    and self._processing.levels.jobs == 0
                ):
                    break
                await sampling_required.put(self)

    async def _collect_jobs(self):
        async for job in self._stream_queue:
            self.job_queue.append(job)
            await self._processing.increase(jobs=1)
            # TODO: logging happens with each job
            await sampling_required.put(self.job_queue)
            await sampling_required.put(UserDemand(len(self.job_queue)))
        self._collecting = False

[docs]    async def job_finished(self, job):
        if job.successful:
            await self._processing.decrease(jobs=1)
        else:
            await self._stream_queue.put(job)

    def _schedule_job(self, job) -> Optional[Drone]:
        priorities = {}
        for cluster in self.drone_cluster:
            drone = cluster[0]
            cost = 0
            resources = drone.theoretical_available_resources
            for resource_type in job.resources:
                if resources.get(resource_type, 0) < job.resources[resource_type]:
                    # Inf for all job resources that a drone does not support
                    # and all resources that are too small to even be considered
                    cost = float("Inf")
                    break
                else:
                    try:
                        cost += 1 / (
                            resources[resource_type] // job.resources[resource_type]
                        )
                    except KeyError:
                        pass
            for additional_resource_type in [
                key for key in drone.pool_resources if key not in job.resources
            ]:
                cost += resources[additional_resource_type]
            cost /= len((*job.resources, *drone.pool_resources))
            if cost <= 1:
                # directly start job
                return drone
            try:
                priorities[cost].append(drone)
            except KeyError:
                priorities[cost] = [drone]
        try:
            minimal_key = min(priorities)
            if minimal_key < float("Inf"):
                return priorities[minimal_key][0]
        except ValueError:
            pass
        return None


# HTCondor ClassAd Scheduler


[docs]class NoMatch(Exception):
    """A job could not be matched to any drone"""


[docs]class RankedClusterKey(NamedTuple):
    rank: float
    key: Tuple[float, ...]


RC = TypeVar("RC", bound="RankedClusters")


[docs]class RankedClusters(Generic[DJ]):
    """Automatically cluster drones by rank"""

    @abstractmethod
    def __init__(self, quantization: Dict[str, HTCInt], ranking: Expression):
        raise NotImplementedError

[docs]    @abstractmethod
    def empty(self) -> bool:
        """Whether there are no resources available"""
        raise NotImplementedError

[docs]    @abstractmethod
    def copy(self: "RankedAutoClusters[DJ]") -> "RankedAutoClusters[DJ]":
        """Copy the entire ranked auto clusters"""
        raise NotImplementedError

[docs]    @abstractmethod
    def add(self, item: WrappedClassAd[DJ]) -> None:
        """Add a new item"""
        raise NotImplementedError

[docs]    @abstractmethod
    def remove(self, item: WrappedClassAd[DJ]) -> None:
        """Remove an existing item"""
        raise NotImplementedError

[docs]    def update(self, item) -> None:
        """Update an existing item with its current state"""
        self.remove(item)
        self.add(item)

    @abstractmethod
    def clusters(self) -> Iterator[Set[WrappedClassAd[DJ]]]:
        raise NotImplementedError

    @abstractmethod
    def items(self) -> Iterator[Tuple[Any, Set[WrappedClassAd[DJ]]]]:
        raise NotImplementedError

[docs]    @abstractmethod
    def cluster_groups(self) -> Iterator[List[Set[WrappedClassAd[Drone]]]]:
        """Group autoclusters by PreJobRank"""
        raise NotImplementedError

[docs]    @abstractmethod
    def lookup(self, job: CachingJob) -> None:
        """Update information about cached data for every drone"""
        raise NotImplementedError


[docs]class RankedAutoClusters(RankedClusters[DJ]):
    """Automatically cluster similar jobs or drones"""

    def __init__(self, quantization: Dict[str, HTCInt], ranking: Expression):
        """
        :param quantization: factors to convert resources into HTCondor scaling
        :param ranking: prejobrank expression
        """
        self._quantization = quantization
        self._ranking = ranking
        self._clusters: Dict[RankedClusterKey, Set[WrappedClassAd[DJ]]] = SortedDict()
        self._inverse: Dict[WrappedClassAd[DJ], RankedClusterKey] = {}

[docs]    def empty(self) -> bool:
        """
        Checks whether all drones in the RankedCluster are empty and currently not
        running any jobs.

        :return:
        """
        for drones in self._clusters.values():
            if not next(iter(drones)).empty():
                return False
        return True

[docs]    def copy(self) -> "RankedAutoClusters[DJ]":
        clone = type(self)(quantization=self._quantization, ranking=self._ranking)
        clone._clusters = SortedDict(
            (key, value.copy()) for key, value in self._clusters.items()
        )
        clone._inverse = self._inverse.copy()
        return clone

[docs]    def add(self, item: WrappedClassAd[DJ]):
        """
        Add a new wrapped item, usually a drone, to the RankedAutoCluster.
        Unless the item is already contained, the item's key is generated and it is
        sorted in into the clusters accordingly. If there are already items with the
        same key, the new item is added to the existing cluster. If not,
        a new cluster is created.

        :param item:
        :return:
        """
        if item in self._inverse:
            raise ValueError(f"{item!r} already stored; use `.update(item)` instead")
        item_key = self._clustering_key(item)
        try:
            self._clusters[item_key].add(item)
        except KeyError:
            self._clusters[item_key] = {item}
        self._inverse[item] = item_key

[docs]    def remove(self, item: WrappedClassAd[DJ]):
        """
        Removes the item.

        :param item:
        :return:
        """
        item_key = self._inverse.pop(item)
        cluster = self._clusters[item_key]
        cluster.remove(item)
        if not cluster:
            del self._clusters[item_key]

[docs]    def _clustering_key(self, item: WrappedClassAd[DJ]):
        """
        Calculates an item's clustering key based on the specified ranking (in my use
        case the prejobrank) and the item's available resource. The resulting key's
        structure is (prejobrank value, (available cpus, available memory, available
        disk space)). The clustering key is negative as the SortedDict sorts its entries
        from low keys to high keys.

        :param item: drone for which the clustering key is calculated.
        :return: (prejobrank value, (available cpus, available memory, available
        disk space))
        """
        # TODO: assert that order is consistent
        quantization = self._quantization
        return RankedClusterKey(
            rank=-1.0 * self._ranking.evaluate(my=item),
            key=tuple(
                int(quantize(item[key], quantization.get(key, 1)))
                for key in ("cpus", "memory", "disk")
            ),
        )

[docs]    def clusters(self) -> Iterator[Set[WrappedClassAd[DJ]]]:
        """
        :return: iterator of all clusters
        """
        return iter(self._clusters.values())

[docs]    def items(self) -> Iterator[Tuple[RankedClusterKey, Set[WrappedClassAd[DJ]]]]:
        """
        :return: iterator of all clusters and corresponding keys
        """
        return iter(self._clusters.items())

[docs]    def cluster_groups(self) -> Iterator[List[Set[WrappedClassAd[Drone]]]]:
        """
        Sort clusters by the ranking key and then by the amount of available
        resources into nested lists of sets.

        :return:
        """
        group = []
        current_rank = None
        for ranked_key, drones in self._clusters.items():
            if next(iter(drones)).empty():
                continue
            if ranked_key.rank != current_rank:
                current_rank = ranked_key.rank
                if group:
                    yield group
                    group = []
            group.append(drones)
        if group:
            yield group

[docs]    def lookup(self, job: CachingJob):
        for _, drones in self._clusters.items():
            for drone in drones:
                drone._wrapped.look_up_cached_data(job)


[docs]class RankedNonClusters(RankedClusters[DJ]):
    """Automatically cluster jobs or drones by rank only"""

    def __init__(self, quantization: Dict[str, HTCInt], ranking: Expression):
        self._quantization = quantization
        self._ranking = ranking
        self._clusters: Dict[float, Set[WrappedClassAd[DJ]]] = SortedDict()
        self._inverse: Dict[WrappedClassAd[DJ], float] = {}

[docs]    def empty(self) -> bool:
        for drones in self._clusters.values():
            for drone in drones:
                if not drone.empty():
                    return False
        return True

[docs]    def copy(self) -> "RankedNonClusters[DJ]":
        clone = type(self)(quantization=self._quantization, ranking=self._ranking)
        clone._clusters = SortedDict(
            (key, value.copy()) for key, value in self._clusters.items()
        )
        clone._inverse = self._inverse.copy()
        return clone

[docs]    def add(self, item: WrappedClassAd[DJ]):
        if item in self._inverse:
            raise ValueError(f"{item!r} already stored; use `.update(item)` instead")
        item_key = self._clustering_key(item)
        try:
            self._clusters[item_key].add(item)
        except KeyError:
            self._clusters[item_key] = {item}
        self._inverse[item] = item_key

[docs]    def remove(self, item: WrappedClassAd[DJ]):
        item_key = self._inverse.pop(item)
        cluster = self._clusters[item_key]
        cluster.remove(item)
        if not cluster:
            del self._clusters[item_key]

[docs]    def update(self, item):
        self.remove(item)
        self.add(item)

[docs]    def _clustering_key(self, item: WrappedClassAd[DJ]):
        """
        For RankNonClusters there is only one clustering key, the objects defined
        ranking. The clustering key is negative as the SortedDict sorts its entries
        from low keys to high keys.
        """
        return -1.0 * self._ranking.evaluate(my=item)

    def clusters(self) -> Iterator[Set[WrappedClassAd[DJ]]]:
        return iter(self._clusters.values())

    def items(self) -> Iterator[Tuple[float, Set[WrappedClassAd[DJ]]]]:
        return iter(self._clusters.items())

[docs]    def cluster_groups(self) -> Iterator[List[Set[WrappedClassAd[Drone]]]]:
        """
        Sorts cluster by the ranking key. As there is no autoclustering, every drone
        is in a dedicated set and drones of the same ranking are combined into a list.
        These lists are then sorted by increasing ranking.

        :return: iterator of the lists containing drones with identical key
        """
        for _ranked_key, drones in self._clusters.items():
            yield [{item} for item in drones]

[docs]    def lookup(self, job: CachingJob):
        for _, drones in self._clusters.items():
            for drone in drones:
                drone._wrapped.look_up_cached_data(job)


[docs]class CondorClassadJobScheduler(JobScheduler):
    """
    Goal of the htcondor job scheduler is to have a scheduler that somehow
    mimics how htcondor does schedule jobs.
    Htcondor does scheduling based on a priority queue. The priorities itself
    are managed by operators of htcondor.
    So different instances can apparently behave very different.
    In this case a priority queue that sorts job slots
    by increasing cost is built. The scheduler checks if a job either
    exactly fits a slot or if it does fit into it several times. The cost for
    putting a job at a given slot is given by the amount of resources that
    might remain unallocated.
    """

    def __init__(
        self,
        job_queue,
        machine_ad: str = machine_ad_defaults,
        job_ad: str = job_ad_defaults,
        pre_job_rank: str = "0",
        interval: float = 60,
        autocluster: bool = False,
    ):
        """
        Initializes the CondorClassadJobScheduler

        :param job_queue: queue of jobs that are scheduled in the following simulation
        :param machine_ad: ClassAd that is used with every drone
        :param job_ad: ClassAd that is used with every job
        :param pre_job_rank: ClassAd attribute that all drones are sorted by
        :param interval: time between scheduling cycles
        :param autocluster: could be used to decide whether to use autoclusters
        """
        self._stream_queue = job_queue
        self._drones: RankedClusters[Drone] = RankedNonClusters(
            quantization=quantization_defaults, ranking=parse(pre_job_rank)
        )
        self.interval = interval
        self.job_queue = JobQueue()
        self._collecting = True
        self._processing = Resources(jobs=0)

        # temporary solution
        self._wrapped_classads = WeakKeyDictionary()
        self._machine_classad = parse(machine_ad)
        self._job_classad = parse(job_ad)

    @property
    def drone_list(self) -> Iterator[Drone]:
        """
        Takes an iterator over the WrappedClassAd objects of drones known to the
        scheduler, extracts the drones and returns an iterator over the drone objects.

        :return:
        """
        for cluster in self._drones.clusters():
            for drone in cluster:
                yield drone._wrapped

[docs]    def register_drone(self, drone: Drone):
        """
        Provides the drones with the drone ClassAd, combines both into one object and
        adds the resulting WrappedClassAd object to the drones known to the scheduler as
        well as the dictionary containing all WrappedClassAd objects the scheduler
        works with.

        :param drone:
        """
        wrapped_drone = WrappedClassAd(classad=self._machine_classad, wrapped=drone)
        self._drones.add(wrapped_drone)
        self._wrapped_classads[drone] = wrapped_drone

[docs]    def unregister_drone(self, drone: Drone):
        """
        Remove a drone's representation from the scheduler's scope.

        :param drone:
        :return:
        """
        drone_wrapper = self._wrapped_classads[drone]
        self._drones.remove(drone_wrapper)

[docs]    def update_drone(self, drone: Drone):
        """
        Update a drone's representation in the scheduler scope.

        :param drone:
        :return:
        """
        drone_wrapper = self._wrapped_classads[drone]
        self._drones.update(drone_wrapper)

[docs]    async def run(self):
        """
        Runs the scheduler's functionality. One executed, the scheduler starts up and
        begins to add the jobs that are

        :return:
        """
        async with Scope() as scope:
            scope.do(self._collect_jobs())
            async for _ in interval(self.interval):
                await self._schedule_jobs()
                if (
                    not self._collecting
                    and not self.job_queue
                    and self._processing.levels.jobs == 0
                ):
                    break

[docs]    @staticmethod
    def _match_job(
        job: ClassAd, pre_job_clusters: Iterator[List[Set[WrappedClassAd[Drone]]]]
    ):
        """
        Tries to find a match for the transferred job among the available drones.

        :param job: job to match
        :param pre_job_clusters: list of clusters of wrapped drones that are
        presorted by a clustering mechanism of RankedAutoClusters/RankedNonClusters
        that mimics the HTCondor NEGOTIATOR_PRE_JOB_RANK, short prejobrank. The
        clusters contain drones that are considered to be equivalent with respect to all
        Requirements and Ranks
        that are used during the matchmaking process. This mimics the Autoclustering
        functionality of HTCondor.
        [[highest prejobrank {autocluster}, {autocluster}], ..., [lowest prejobrank {
        autocluster}, {autocluster}]
        :return: drone that is the best match for the job

        The matching is performed in several steps:
        1. The job's requirements are evaluted and only drones that meet them are
        considered further. A drone of every autocluster is extracted from
        pre_job_clusters and if it meets the job's requirements it is not removed
        from pre_job_clusters.
        2. The autoclusters that are equivalent with respect to the prejobrank are
        then sorted by the job's rank expression. The resulting format of
        pre_job_clusters is
        [[(highest prejobrank, highest jobrank) {autocluster} {autocluster},
        ...,  (highest prejobrank, lowest jobrank) {autocluster}], ...]
        3. The resulting pre_job_clusters are then iterated and the drone with the
        highest (prejobrank, jobrank) whose requirements are also compatible with the
        job is returned as best match.
        """

        def debug_evaluate(expr, my, target=None):
            """
            Reimplementation of the classad packages evaluate function. Having it
            here enables developers to inspect the ClassAd evaluation process more
            closely and to add debug output if necessary.

            :param expr:
            :param my:
            :param target:
            :return:
            """
            if type(expr) is str:
                expr = my[expr]
            result = expr.evaluate(my=my, target=target)
            return result

        if job["Requirements"] != Undefined():
            pre_job_clusters_tmp = []
            for cluster_group in pre_job_clusters:
                cluster_group_tmp = []
                for cluster in cluster_group:
                    if debug_evaluate(
                        "Requirements", my=job, target=next(iter(cluster))
                    ):
                        cluster_group_tmp.append(cluster)
                pre_job_clusters_tmp.append(cluster_group_tmp)
            pre_job_clusters = pre_job_clusters_tmp

        if job["Rank"] != Undefined():
            pre_job_clusters_tmp = []
            for cluster_group in pre_job_clusters:
                pre_job_clusters_tmp.append(
                    sorted(
                        cluster_group,
                        key=lambda cluster: (
                            debug_evaluate("Rank", my=job, target=next(iter(cluster))),
                            random.random(),
                        ),
                        reverse=True,
                    )
                )
            pre_job_clusters = pre_job_clusters_tmp

        for cluster_group in pre_job_clusters:
            # TODO: if we have POST_JOB_RANK, collect *all* matches of a group
            for cluster in cluster_group:
                for drone in cluster:
                    if drone["Requirements"] == Undefined() or drone.evaluate(
                        "Requirements", my=drone, target=job
                    ):
                        return drone

        raise NoMatch()

[docs]    async def _schedule_jobs(self):
        """
        Handles the scheduling of jobs. Tried to match the jobs in the job queue to
        available resources. This occurs in several steps.
        1. The list of drones known to the scheduler is copied. The copy can then be
        used to keep track of the drones' available resources while matching jobs as
        the jobs allocate resources on the original drones before being processed but
        not during scheduling.
        2. The job in the job queue are matched to (the copied)resources iteratively.
        The actual matching is performed by the `_match_job` method that returns the
        most suitable drone unless no drone is compatible with the job's requirements.
        If a match was found, the resources requested by the job are allocated on the
        matched drone. If no resources remain unallocated after the last job's
        allocation, the matching process is ended for this scheduler interval.
        3. After the job matching is finished, the matched jobs are removed from the
        job queue as the index of a job in the job queue changes once a job with a
        lower index is removed from the queue.
        4. The matched jobs' execution is triggered.

        """
        # Pre CachingJob Rank is the same for all jobs
        # Use a copy to allow temporary "remainder after match" estimates
        if self._drones.empty():
            return
        pre_job_drones = self._drones.copy()
        matches: List[
            Tuple[int, WrappedClassAd[CachingJob], WrappedClassAd[Drone]]
        ] = []
        for queue_index, candidate_job in enumerate(self.job_queue):
            try:
                pre_job_drones.lookup(candidate_job._wrapped)
                matched_drone = self._match_job(
                    candidate_job, pre_job_drones.cluster_groups()
                )
            except NoMatch:
                candidate_job._wrapped.failed_matches += 1
                continue
            else:
                matches.append((queue_index, candidate_job, matched_drone))
                for key, value in candidate_job._wrapped.resources.items():
                    matched_drone._temp[key] = (
                        matched_drone._temp.get(
                            key,
                            matched_drone._wrapped.theoretical_available_resources[key],
                        )
                        - value
                    )
                pre_job_drones.update(matched_drone)

                # monitoring/coordination stuff
                if (
                    candidate_job._wrapped._total_input_data
                    and matched_drone._wrapped.cached_data
                ):
                    candidate_job._wrapped._cached_data = (
                        matched_drone._wrapped.cached_data
                    )
                if pre_job_drones.empty():
                    break
        if not matches:
            return
        # TODO: optimize for few matches, many matches, all matches
        for queue_index, _, _ in reversed(matches):
            del self.job_queue[queue_index]
        for _, job, drone in matches:
            drone.clear_temporary_resources()
            await self._execute_job(job=job, drone=drone)
        await sampling_required.put(self)
        # NOTE: Is this correct? Triggers once instead of for each job
        await sampling_required.put(self.job_queue)
        await sampling_required.put(UserDemand(len(self.job_queue)))

[docs]    async def _execute_job(self, job: WrappedClassAd, drone: WrappedClassAd):
        """
        Schedules a job on a drone by extracting both objects from the
        respective WrappedClassAd and using the drone's scheduling functionality

        :param job:
        :param drone:
        """
        wrapped_job = job._wrapped
        wrapped_drone = drone._wrapped
        await wrapped_drone.schedule_job(wrapped_job)

[docs]    async def _collect_jobs(self):
        """
        Combines jobs that are imported from the simulation's job config with a job
        ClassAd and adds the resulting WrappedClassAd objects to the scheduler's job
        queue.
        """
        async for job in self._stream_queue:
            wrapped_job = WrappedClassAd(classad=self._job_classad, wrapped=job)
            self._wrapped_classads[job] = wrapped_job
            self.job_queue.append(wrapped_job)
            await self._processing.increase(jobs=1)
            # TODO: logging happens with each job
            # TODO: job queue to the outside now contains wrapped classads...
            await sampling_required.put(self.job_queue)
            await sampling_required.put(UserDemand(len(self.job_queue)))
        self._collecting = False

[docs]    async def job_finished(self, job):
        """
        Handles the impact of finishing jobs on the scheduler. If the job is completed
        successfully, the amount of running jobs matched by the current scheduler
        instance is reduced. If the job is not finished successfully,
        it is resubmitted to the scheduler's job queue.
        :param job:
        """
        if job.successful:
            await self._processing.decrease(jobs=1)
        else:
            self.job_queue.append(self._wrapped_classads[job])