Re: [Mesa-dev] [PATCH] vulkan: Add VK_EXT_calibrated_timestamps extension (radv and anv) [v5]

From: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
To: Keith Packard <keithp@keithp.com>
Cc: mesa-dev <mesa-dev@lists.freedesktop.org>,
	ML dri-devel <dri-devel@lists.freedesktop.org>
Subject: Re: [Mesa-dev] [PATCH] vulkan: Add VK_EXT_calibrated_timestamps extension (radv and anv) [v5]
Date: Thu, 18 Oct 2018 00:49:21 +0200	[thread overview]
Message-ID: <CAP+8YyHj0dTxRhrwrgOkfqB=BrP5NSm8QHHUv2-Wy_F+0RVTrQ@mail.gmail.com> (raw)
In-Reply-To: <20181017164912.1704-1-keithp@keithp.com>

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
On Wed, Oct 17, 2018 at 6:49 PM Keith Packard <keithp@keithp.com> wrote:
>
> Offers three clocks, device, clock monotonic and clock monotonic
> raw. Could use some kernel support to reduce the deviation between
> clock values.
>
> v2:
>         Ensure deviation is at least as big as the GPU time interval.
>
> v3:
>         Set device->lost when returning DEVICE_LOST.
>         Use MAX2 and DIV_ROUND_UP instead of open coding these.
>         Delete spurious TIMESTAMP in radv version.
>
>         Suggested-by: Jason Ekstrand <jason@jlekstrand.net>
>         Suggested-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>
> v4:
>         Add anv_gem_reg_read to anv_gem_stubs.c
>
>         Suggested-by: Jason Ekstrand <jason@jlekstrand.net>
>
> v5:
>         Adjust maxDeviation computation to max(sampled_clock_period) +
>         sample_interval.
>
>         Suggested-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
>         Suggested-by: Jason Ekstrand <jason@jlekstrand.net>
>
> Signed-off-by: Keith Packard <keithp@keithp.com>
> ---
>  src/amd/vulkan/radv_device.c       | 119 +++++++++++++++++++++++++++
>  src/amd/vulkan/radv_extensions.py  |   1 +
>  src/intel/vulkan/anv_device.c      | 127 +++++++++++++++++++++++++++++
>  src/intel/vulkan/anv_extensions.py |   1 +
>  src/intel/vulkan/anv_gem.c         |  13 +++
>  src/intel/vulkan/anv_gem_stubs.c   |   7 ++
>  src/intel/vulkan/anv_private.h     |   2 +
>  7 files changed, 270 insertions(+)
>
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index 174922780fc..4a705a724ef 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -4955,3 +4955,122 @@ radv_GetDeviceGroupPeerMemoryFeatures(
>                                VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
>                                VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
>  }
> +
> +static const VkTimeDomainEXT radv_time_domains[] = {
> +       VK_TIME_DOMAIN_DEVICE_EXT,
> +       VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
> +       VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
> +};
> +
> +VkResult radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
> +       VkPhysicalDevice                             physicalDevice,
> +       uint32_t                                     *pTimeDomainCount,
> +       VkTimeDomainEXT                              *pTimeDomains)
> +{
> +       int d;
> +       VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount);
> +
> +       for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {
> +               vk_outarray_append(&out, i) {
> +                       *i = radv_time_domains[d];
> +               }
> +       }
> +
> +       return vk_outarray_status(&out);
> +}
> +
> +static uint64_t
> +radv_clock_gettime(clockid_t clock_id)
> +{
> +       struct timespec current;
> +       int ret;
> +
> +       ret = clock_gettime(clock_id, &current);
> +       if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
> +               ret = clock_gettime(CLOCK_MONOTONIC, &current);
> +       if (ret < 0)
> +               return 0;
> +
> +       return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
> +}
> +
> +VkResult radv_GetCalibratedTimestampsEXT(
> +       VkDevice                                     _device,
> +       uint32_t                                     timestampCount,
> +       const VkCalibratedTimestampInfoEXT           *pTimestampInfos,
> +       uint64_t                                     *pTimestamps,
> +       uint64_t                                     *pMaxDeviation)
> +{
> +       RADV_FROM_HANDLE(radv_device, device, _device);
> +       uint32_t clock_crystal_freq = device->physical_device->rad_info.clock_crystal_freq;
> +       int d;
> +       uint64_t begin, end;
> +        uint64_t max_clock_period = 0;
> +
> +       begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
> +
> +       for (d = 0; d < timestampCount; d++) {
> +               switch (pTimestampInfos[d].timeDomain) {
> +               case VK_TIME_DOMAIN_DEVICE_EXT:
> +                       pTimestamps[d] = device->ws->query_value(device->ws,
> +                                                                RADEON_TIMESTAMP);
> +                        uint64_t device_period = DIV_ROUND_UP(1000000, clock_crystal_freq);
> +                        max_clock_period = MAX2(max_clock_period, device_period);
> +                       break;
> +               case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
> +                       pTimestamps[d] = radv_clock_gettime(CLOCK_MONOTONIC);
> +                        max_clock_period = MAX2(max_clock_period, 1);
> +                       break;
> +
> +               case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
> +                       pTimestamps[d] = begin;
> +                       break;
> +               default:
> +                       pTimestamps[d] = 0;
> +                       break;
> +               }
> +       }
> +
> +       end = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
> +
> +        /*
> +         * The maximum deviation is the sum of the interval over which we
> +         * perform the sampling and the maximum period of any sampled
> +         * clock. That's because the maximum skew between any two sampled
> +         * clock edges is when the sampled clock with the largest period is
> +         * sampled at the end of that period but right at the beginning of the
> +         * sampling interval and some other clock is sampled right at the
> +         * begining of its sampling period and right at the end of the
> +         * sampling interval. Let's assume the GPU has the longest clock
> +         * period and that the application is sampling GPU and monotonic:
> +         *
> +         *                               s                 e
> +         *                      w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
> +         *     Raw              -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
> +         *
> +         *                               g
> +         *               0         1         2         3
> +         *     GPU       -----_____-----_____-----_____-----_____
> +         *
> +         *                                                m
> +         *                                         x y z 0 1 2 3 4 5 6 7 8 9 a b c
> +         *     Monotonic                           -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
> +         *
> +         *     Interval                     <----------------->
> +         *     Deviation           <-------------------------->
> +         *
> +         *             s  = read(raw)       2
> +         *             g  = read(GPU)       1
> +         *             m  = read(monotonic) 2
> +         *             e  = read(raw)       b
> +         *
> +         * We round the sample interval up by one tick to cover sampling error
> +         * in the interval clock
> +         */
> +
> +        uint64_t sample_interval = end - begin + 1;
> +
> +        *pMaxDeviation = sample_interval + max_clock_period;
> +
> +       return VK_SUCCESS;
> +}
> diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py
> index 5dcedae1c63..4c81d3f0068 100644
> --- a/src/amd/vulkan/radv_extensions.py
> +++ b/src/amd/vulkan/radv_extensions.py
> @@ -92,6 +92,7 @@ EXTENSIONS = [
>      Extension('VK_KHR_display',                          23, 'VK_USE_PLATFORM_DISPLAY_KHR'),
>      Extension('VK_EXT_direct_mode_display',               1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
>      Extension('VK_EXT_acquire_xlib_display',              1, 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),
> +    Extension('VK_EXT_calibrated_timestamps',             1, True),
>      Extension('VK_EXT_conditional_rendering',             1, True),
>      Extension('VK_EXT_conservative_rasterization',        1, 'device->rad_info.chip_class >= GFX9'),
>      Extension('VK_EXT_display_surface_counter',           1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index a2551452eb1..076ff3a57f6 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -3021,6 +3021,133 @@ void anv_DestroyFramebuffer(
>     vk_free2(&device->alloc, pAllocator, fb);
>  }
>
> +static const VkTimeDomainEXT anv_time_domains[] = {
> +   VK_TIME_DOMAIN_DEVICE_EXT,
> +   VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
> +   VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
> +};
> +
> +VkResult anv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
> +   VkPhysicalDevice                             physicalDevice,
> +   uint32_t                                     *pTimeDomainCount,
> +   VkTimeDomainEXT                              *pTimeDomains)
> +{
> +   int d;
> +   VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount);
> +
> +   for (d = 0; d < ARRAY_SIZE(anv_time_domains); d++) {
> +      vk_outarray_append(&out, i) {
> +         *i = anv_time_domains[d];
> +      }
> +   }
> +
> +   return vk_outarray_status(&out);
> +}
> +
> +static uint64_t
> +anv_clock_gettime(clockid_t clock_id)
> +{
> +   struct timespec current;
> +   int ret;
> +
> +   ret = clock_gettime(clock_id, &current);
> +   if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
> +      ret = clock_gettime(CLOCK_MONOTONIC, &current);
> +   if (ret < 0)
> +      return 0;
> +
> +   return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
> +}
> +
> +#define TIMESTAMP 0x2358
> +
> +VkResult anv_GetCalibratedTimestampsEXT(
> +   VkDevice                                     _device,
> +   uint32_t                                     timestampCount,
> +   const VkCalibratedTimestampInfoEXT           *pTimestampInfos,
> +   uint64_t                                     *pTimestamps,
> +   uint64_t                                     *pMaxDeviation)
> +{
> +   ANV_FROM_HANDLE(anv_device, device, _device);
> +   uint64_t timestamp_frequency = device->info.timestamp_frequency;
> +   int  ret;
> +   int d;
> +   uint64_t begin, end;
> +   uint64_t max_clock_period = 0;
> +
> +   begin = anv_clock_gettime(CLOCK_MONOTONIC_RAW);
> +
> +   for (d = 0; d < timestampCount; d++) {
> +      switch (pTimestampInfos[d].timeDomain) {
> +      case VK_TIME_DOMAIN_DEVICE_EXT:
> +         ret = anv_gem_reg_read(device, TIMESTAMP | 1,
> +                                &pTimestamps[d]);
> +
> +         if (ret != 0) {
> +            device->lost = TRUE;
> +            return VK_ERROR_DEVICE_LOST;
> +         }
> +         uint64_t device_period = DIV_ROUND_UP(1000000000, timestamp_frequency);
> +         max_clock_period = MAX2(max_clock_period, device_period);
> +         break;
> +      case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
> +         pTimestamps[d] = anv_clock_gettime(CLOCK_MONOTONIC);
> +         max_clock_period = MAX2(max_clock_period, 1);
> +         break;
> +
> +      case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
> +         pTimestamps[d] = begin;
> +         break;
> +      default:
> +         pTimestamps[d] = 0;
> +         break;
> +      }
> +   }
> +
> +   end = anv_clock_gettime(CLOCK_MONOTONIC_RAW);
> +
> +    /*
> +     * The maximum deviation is the sum of the interval over which we
> +     * perform the sampling and the maximum period of any sampled
> +     * clock. That's because the maximum skew between any two sampled
> +     * clock edges is when the sampled clock with the largest period is
> +     * sampled at the end of that period but right at the beginning of the
> +     * sampling interval and some other clock is sampled right at the
> +     * begining of its sampling period and right at the end of the
> +     * sampling interval. Let's assume the GPU has the longest clock
> +     * period and that the application is sampling GPU and monotonic:
> +     *
> +     *                               s                 e
> +     *                  w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
> +     * Raw              -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
> +     *
> +     *                               g
> +     *           0         1         2         3
> +     * GPU       -----_____-----_____-----_____-----_____
> +     *
> +     *                                                m
> +     *                                     x y z 0 1 2 3 4 5 6 7 8 9 a b c
> +     * Monotonic                           -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
> +     *
> +     * Interval                     <----------------->
> +     * Deviation           <-------------------------->
> +     *
> +     *         s  = read(raw)       2
> +     *         g  = read(GPU)       1
> +     *         m  = read(monotonic) 2
> +     *         e  = read(raw)       b
> +     *
> +     * We round the sample interval up by one tick to cover sampling error
> +     * in the interval clock
> +     */
> +
> +   uint64_t sample_interval = end - begin + 1;
> +
> +   *pMaxDeviation = sample_interval + max_clock_period;
> +
> +   return VK_SUCCESS;
> +}
> +
>  /* vk_icd.h does not declare this function, so we declare it here to
>   * suppress Wmissing-prototypes.
>   */
> diff --git a/src/intel/vulkan/anv_extensions.py b/src/intel/vulkan/anv_extensions.py
> index d4915c95013..a8535964da7 100644
> --- a/src/intel/vulkan/anv_extensions.py
> +++ b/src/intel/vulkan/anv_extensions.py
> @@ -126,6 +126,7 @@ EXTENSIONS = [
>      Extension('VK_EXT_vertex_attribute_divisor',          3, True),
>      Extension('VK_EXT_post_depth_coverage',               1, 'device->info.gen >= 9'),
>      Extension('VK_EXT_sampler_filter_minmax',             1, 'device->info.gen >= 9'),
> +    Extension('VK_EXT_calibrated_timestamps',             1, True),
>  ]
>
>  class VkVersion:
> diff --git a/src/intel/vulkan/anv_gem.c b/src/intel/vulkan/anv_gem.c
> index c43b5ef9e06..1bdf040c1a3 100644
> --- a/src/intel/vulkan/anv_gem.c
> +++ b/src/intel/vulkan/anv_gem.c
> @@ -423,6 +423,19 @@ anv_gem_fd_to_handle(struct anv_device *device, int fd)
>     return args.handle;
>  }
>
> +int
> +anv_gem_reg_read(struct anv_device *device, uint32_t offset, uint64_t *result)
> +{
> +   struct drm_i915_reg_read args = {
> +      .offset = offset
> +   };
> +
> +   int ret = anv_ioctl(device->fd, DRM_IOCTL_I915_REG_READ, &args);
> +
> +   *result = args.val;
> +   return ret;
> +}
> +
>  #ifndef SYNC_IOC_MAGIC
>  /* duplicated from linux/sync_file.h to avoid build-time dependency
>   * on new (v4.7) kernel headers.  Once distro's are mostly using
> diff --git a/src/intel/vulkan/anv_gem_stubs.c b/src/intel/vulkan/anv_gem_stubs.c
> index 5093bd5db1a..8cc3ad1f22e 100644
> --- a/src/intel/vulkan/anv_gem_stubs.c
> +++ b/src/intel/vulkan/anv_gem_stubs.c
> @@ -251,3 +251,10 @@ anv_gem_syncobj_wait(struct anv_device *device,
>  {
>     unreachable("Unused");
>  }
> +
> +int
> +anv_gem_reg_read(struct anv_device *device,
> +                 uint32_t offset, uint64_t *result)
> +{
> +   unreachable("Unused");
> +}
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
> index 599b903f25c..08376b00c8e 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -1103,6 +1103,8 @@ int anv_gem_get_aperture(int fd, uint64_t *size);
>  int anv_gem_gpu_get_reset_stats(struct anv_device *device,
>                                  uint32_t *active, uint32_t *pending);
>  int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle);
> +int anv_gem_reg_read(struct anv_device *device,
> +                     uint32_t offset, uint64_t *result);
>  uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd);
>  int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, uint32_t caching);
>  int anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle,
> --
> 2.19.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel