linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Geva, Erez" <erez.geva.ext@siemens.com>
To: Thomas Gleixner <tglx@linutronix.de>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"netdev@vger.kernel.org" <netdev@vger.kernel.org>,
	Cong Wang <xiyou.wangcong@gmail.com>,
	"David S . Miller" <davem@davemloft.net>,
	Jakub Kicinski <kuba@kernel.org>,
	Jamal Hadi Salim <jhs@mojatatu.com>,
	Jiri Pirko <jiri@resnulli.us>, Andrei Vagin <avagin@gmail.com>,
	Dmitry Safonov <0x7f454c46@gmail.com>,
	"Eric W . Biederman" <ebiederm@xmission.com>,
	Ingo Molnar <mingo@kernel.org>,
	John Stultz <john.stultz@linaro.org>,
	Michal Kubecek <mkubecek@suse.cz>,
	Oleg Nesterov <oleg@redhat.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Richard Cochran <richardcochran@gmail.com>,
	Stephen Boyd <sboyd@kernel.org>,
	Vladis Dronov <vdronov@redhat.com>,
	Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
	Frederic Weisbecker <frederic@kernel.org>,
	Eric Dumazet <edumazet@google.com>
Cc: Vinicius Costa Gomes <vinicius.gomes@intel.com>,
	Vedang Patel <vedang.patel@intel.com>,
	"Sudler, Simon" <simon.sudler@siemens.com>,
	"Meisinger, Andreas" <andreas.meisinger@siemens.com>,
	"Bucher, Andreas" <andreas.bucher@siemens.com>,
	"henning.schild@siemens.com" <henning.schild@siemens.com>,
	"jan.kiszka@siemens.com" <jan.kiszka@siemens.com>,
	"Zirkler, Andreas" <andreas.zirkler@siemens.com>,
	"Sakic, Ermin" <ermin.sakic@siemens.com>,
	"anninh.nguyen@siemens.com" <anninh.nguyen@siemens.com>,
	"Saenger, Michael" <michael.saenger@siemens.com>,
	"Maehringer, Bernd" <bernd.maehringer@siemens.com>,
	"gisela.greinert@siemens.com" <gisela.greinert@siemens.com>,
	Erez Geva <ErezGeva2@gmail.com>
Subject: Re: [PATCH 7/7] TC-ETF support PTP clocks
Date: Fri, 2 Oct 2020 11:05:01 +0000	[thread overview]
Message-ID: <VI1PR10MB244671875D9A041C2F9018AFAB310@VI1PR10MB2446.EURPRD10.PROD.OUTLOOK.COM> (raw)
In-Reply-To: <87a6x5eabi.fsf@nanos.tec.linutronix.de>



On 02/10/2020 02:33, Thomas Gleixner wrote:
> On Thu, Oct 01 2020 at 22:51, Erez Geva wrote:
>
>>    - Add support for using a POSIX dynamic clock with
>>      Traffic control Earliest TxTime First (ETF) Qdisc.
>
> ....
>
>> --- a/include/uapi/linux/net_tstamp.h
>> +++ b/include/uapi/linux/net_tstamp.h
>> @@ -167,6 +167,11 @@ enum txtime_flags {
>>      SOF_TXTIME_FLAGS_MASK = (SOF_TXTIME_FLAGS_LAST - 1) |
>>                               SOF_TXTIME_FLAGS_LAST
>>   };
>> +/*
>> + * Clock ID to use with POSIX clocks
>> + * The ID must be u8 to fit in (struct sock)->sk_clockid
>> + */
>> +#define SOF_TXTIME_POSIX_CLOCK_ID (0x77)
>
> Random number with a random name.
>
>>   struct sock_txtime {
>>      __kernel_clockid_t      clockid;/* reference clockid */
>> diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c
>> index c0de4c6f9299..8e3e0a61fa58 100644
>> --- a/net/sched/sch_etf.c
>> +++ b/net/sched/sch_etf.c
>> @@ -15,6 +15,7 @@
>>   #include <linux/rbtree.h>
>>   #include <linux/skbuff.h>
>>   #include <linux/posix-timers.h>
>> +#include <linux/posix-clock.h>
>>   #include <net/netlink.h>
>>   #include <net/sch_generic.h>
>>   #include <net/pkt_sched.h>
>> @@ -40,19 +41,40 @@ struct etf_sched_data {
>>      struct rb_root_cached head;
>>      struct qdisc_watchdog watchdog;
>>      ktime_t (*get_time)(void);
>> +#ifdef CONFIG_POSIX_TIMERS
>> +    struct posix_clock *pclock; /* pointer to a posix clock */
>
> Tail comments suck because they disturb the reading flow and this
> comment has absolute zero value.
>
> Comments are required to explain things which are not obvious...
>
>> +#endif /* CONFIG_POSIX_TIMERS */
>
> Also this #ifdeffery is bonkers. How is TSN supposed to work without
> POSIX_TIMERS in the first place?
>
>>   static const struct nla_policy etf_policy[TCA_ETF_MAX + 1] = {
>>      [TCA_ETF_PARMS] = { .len = sizeof(struct tc_etf_qopt) },
>>   };
>>
>> +static inline ktime_t get_now(struct Qdisc *sch, struct etf_sched_data *q)
>> +{
>> +#ifdef CONFIG_POSIX_TIMERS
>> +    if (IS_ERR_OR_NULL(q->get_time)) {
>> +            struct timespec64 ts;
>> +            int err = posix_clock_gettime(q->pclock, &ts);
>> +
>> +            if (err) {
>> +                    pr_warn("Clock is disabled (%d) for queue %d\n",
>> +                            err, q->queue);
>> +                    return 0;
>
> That's really useful error handling.
>
>> +            }
>> +            return timespec64_to_ktime(ts);
>> +    }
>> +#endif /* CONFIG_POSIX_TIMERS */
>> +    return q->get_time();
>> +}
>> +
>>   static inline int validate_input_params(struct tc_etf_qopt *qopt,
>>                                      struct netlink_ext_ack *extack)
>>   {
>>      /* Check if params comply to the following rules:
>>       *      * Clockid and delta must be valid.
>>       *
>> -     *      * Dynamic clockids are not supported.
>> +     *      * Dynamic CPU clockids are not supported.
>>       *
>>       *      * Delta must be a positive or zero integer.
>>       *
>> @@ -60,11 +82,22 @@ static inline int validate_input_params(struct tc_etf_qopt *qopt,
>>       * expect that system clocks have been synchronized to PHC.
>>       */
>>      if (qopt->clockid < 0) {
>> +#ifdef CONFIG_POSIX_TIMERS
>> +            /**
>> +             * Use of PTP clock through a posix clock.
>> +             * The TC application must open the posix clock device file
>> +             * and use the dynamic clockid from the file description.
>
> What? How is the code which calls into this guaranteed to have a valid
> file descriptor open for a particular dynamic posix clock?
>
>> +             */
>> +            if (!is_clockid_fd_clock(qopt->clockid)) {
>> +                    NL_SET_ERR_MSG(extack,
>> +                                   "Dynamic CPU clockids are not supported");
>> +                    return -EOPNOTSUPP;
>> +            }
>> +#else /* CONFIG_POSIX_TIMERS */
>>              NL_SET_ERR_MSG(extack, "Dynamic clockids are not supported");
>>              return -ENOTSUPP;
>> -    }
>> -
>> -    if (qopt->clockid != CLOCK_TAI) {
>> +#endif /* CONFIG_POSIX_TIMERS */
>> +    } else if (qopt->clockid != CLOCK_TAI) {
>>              NL_SET_ERR_MSG(extack, "Invalid clockid. CLOCK_TAI must be used");
>>              return -EINVAL;
>>      }
>> @@ -103,7 +136,7 @@ static bool is_packet_valid(struct Qdisc *sch, struct etf_sched_data *q,
>>              return false;
>>
>>   skip:
>> -    now = q->get_time();
>> +    now = get_now(sch, q);
>
> Yuck.
>
> is_packet_valid() is invoked via:
>
>      __dev_queue_xmit()
>        __dev_xmit_skb()
>           etf_enqueue_timesortedlist()
>             is_packet_valid()
>
> __dev_queue_xmit() does
>
>         rcu_read_lock_bh();
>
> and your get_now() does
>
>      posix_clock_gettime()
>               down_read(&clk->rwsem);
>
>   ----> FAIL
>
> down_read() might sleep and cannot be called from a BH disabled
> region. This clearly has never been tested with any mandatory debug
> option enabled. Why am I not surprised?
>
> Aside of accessing PCH clock being slow at hell this cannot ever work
> and there is no way to make it work in any consistent form.
>
> If you have several NICs on several PCH domains then all of these
> domains should have one thing in common: CLOCK_TAI and the frequency.
>
> If that's not the case then the overall system design is just broken,
> but yes I'm aware of the fact that some industries decided to have their
> own definition of time and frequency just because they can.
>
> Handling different starting points of the domains interpretation of
> "TAI" is doable because that's just an offset, but having different
> frequencies is a nightmare.
>
> So if such a trainwreck is a valid use case which needs to be supported
> then just duct taping it into the code is not going to fly.
>
> The only way to make this work is to sit down and actually design a
> mechanism which allows to correlate the various notions of PCH time with
> the systems CLOCK_TAI, i.e. providing offset and frequency correction.
>
> Also you want to explain how user space applications should deal with
> these different time domains in a sane way.
>
> Thanks,
>
>          tglx
>

Thank you for your quick and depth feedback.

Erez

  reply	other threads:[~2020-10-02 11:05 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-10-01 20:51 [PATCH 0/7] TC-ETF support PTP clocks series Erez Geva
2020-10-01 20:51 ` [PATCH 1/7] POSIX clock ID check function Erez Geva
2020-10-01 21:56   ` Thomas Gleixner
2020-10-01 20:51 ` [PATCH 2/7] Function to retrieve main clock state Erez Geva
2020-10-01 22:05   ` Thomas Gleixner
2020-10-02  0:19     ` Thomas Gleixner
2020-10-01 20:51 ` [PATCH 3/7] Functions to fetch POSIX dynamic clock object Erez Geva
2020-10-01 23:35   ` Thomas Gleixner
2020-10-01 20:51 ` [PATCH 4/7] Fix qdisc_watchdog_schedule_range_ns range check Erez Geva
2020-10-01 22:44   ` Thomas Gleixner
2020-10-01 20:51 ` [PATCH 5/7] Traffic control using high-resolution timer issue Erez Geva
2020-10-01 23:07   ` Thomas Gleixner
2020-10-01 20:51 ` [PATCH 6/7] TC-ETF code improvements Erez Geva
2020-10-01 20:51 ` [PATCH 7/7] TC-ETF support PTP clocks Erez Geva
2020-10-02  0:33   ` Thomas Gleixner
2020-10-02 11:05     ` Geva, Erez [this message]
2020-10-02 19:01 ` [PATCH 0/7] TC-ETF support PTP clocks series Vinicius Costa Gomes
2020-10-02 19:56   ` Geva, Erez
2020-10-03  0:10   ` Thomas Gleixner
2020-10-09 11:17     ` AW: " Meisinger, Andreas
2020-10-09 15:39       ` Thomas Gleixner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=VI1PR10MB244671875D9A041C2F9018AFAB310@VI1PR10MB2446.EURPRD10.PROD.OUTLOOK.COM \
    --to=erez.geva.ext@siemens.com \
    --cc=0x7f454c46@gmail.com \
    --cc=ErezGeva2@gmail.com \
    --cc=andreas.bucher@siemens.com \
    --cc=andreas.meisinger@siemens.com \
    --cc=andreas.zirkler@siemens.com \
    --cc=anninh.nguyen@siemens.com \
    --cc=avagin@gmail.com \
    --cc=bernd.maehringer@siemens.com \
    --cc=bigeasy@linutronix.de \
    --cc=davem@davemloft.net \
    --cc=ebiederm@xmission.com \
    --cc=edumazet@google.com \
    --cc=ermin.sakic@siemens.com \
    --cc=frederic@kernel.org \
    --cc=gisela.greinert@siemens.com \
    --cc=henning.schild@siemens.com \
    --cc=jan.kiszka@siemens.com \
    --cc=jhs@mojatatu.com \
    --cc=jiri@resnulli.us \
    --cc=john.stultz@linaro.org \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=michael.saenger@siemens.com \
    --cc=mingo@kernel.org \
    --cc=mkubecek@suse.cz \
    --cc=netdev@vger.kernel.org \
    --cc=oleg@redhat.com \
    --cc=peterz@infradead.org \
    --cc=richardcochran@gmail.com \
    --cc=sboyd@kernel.org \
    --cc=simon.sudler@siemens.com \
    --cc=tglx@linutronix.de \
    --cc=vdronov@redhat.com \
    --cc=vedang.patel@intel.com \
    --cc=vinicius.gomes@intel.com \
    --cc=xiyou.wangcong@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).