This is the mail archive of the systemtap@sourceware.org mailing list for the systemtap project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: Failure in syscall.open probe


This patch follows the API and fixes a bug - increment src/dst pointers

index c5022b5..082a4fd 100644
--- a/runtime/linux/copy.c
+++ b/runtime/linux/copy.c
@@ -24,6 +24,27 @@
  * @{
  */

+static long _stp_copy_from_user_inatomic(char *dst, const char __user
*src, long count)
+{
+       int res;
+       long bytes = 0;
+       for (;count;dst++, src++, count--, bytes++)
+       {
+               res = __copy_from_user_inatomic(dst, src, 1);
+               if (unlikely(res != 0))
+               {
+                       *dst = 0;
+                       break;
+               }
+               if (unlikely(*dst == 0))
+               {
+                       break;
+               }
+       }
+
+       return bytes;
+}
+
 /** Copy a NULL-terminated string from userspace.
  * On success, returns the length of the string (not including the trailing
  * NULL).
@@ -34,8 +55,8 @@
  *         least <i>count</i> bytes long.
  * @param src Source address, in user space.
  * @param count Maximum number of bytes to copy, including the trailing NULL.
- *
- * If <i>count</i> is smaller than the length of the string, copies
+ *
+ * If <i>count</i> is smaller than the length of the string, copies
  * <i>count</i> bytes and returns <i>count</i>.
  */

@@ -46,14 +67,8 @@ static long _stp_strncpy_from_user(char *dst, const
char __user *src, long count
         mm_segment_t _oldfs = get_fs();
         set_fs(USER_DS);
         pagefault_disable();
-        /* XXX: The following preempt() manipulations should be
-           redundant with probe entry/exit code, but for unknown
-           reasons on RHEL5/6 conversions.exp intermittently fails
-           without this.  */
-        preempt_disable();
        if (!lookup_bad_addr(VERIFY_READ, (const unsigned long)src, count))
-               res = strncpy_from_user(dst, src, count);
-        preempt_enable_no_resched();
+               res = _stp_copy_from_user_inatomic(dst, src, count);
         pagefault_enable();
         set_fs(_oldfs);
        return res;

On Mon, Aug 28, 2017 at 8:49 AM, Arkady <arkady.miasnikov@gmail.com> wrote:
> The following patch solves the problem, or, at least, makes it
> significantly less probable
>
> diff --git a/runtime/linux/copy.c b/runtime/linux/copy.c
> index c5022b5..2da01f1 100644
> --- a/runtime/linux/copy.c
> +++ b/runtime/linux/copy.c
> @@ -39,6 +39,28 @@
>   * <i>count</i> bytes and returns <i>count</i>.
>   */
>
> +static long _stp_copy_from_user_inatomic(char *dst, const char __user
> *src, long count)
> +{
> +       int res;
> +       long bytes = 0;
> +       while (count)
> +       {
> +               res = __copy_from_user_inatomic(dst, src, 1);
> +               if (unlikely(res != 1))
> +               {
> +                       return res;
> +               }
> +               if (unlikely((*dst == 0)))
> +               {
> +                       break;
> +               }
> +               bytes++;
> +               count--;
> +       }
> +
> +       return bytes;
> +}
> +
>  /* XXX: see also kread/uread in loc2c-runtime.h */
>  static long _stp_strncpy_from_user(char *dst, const char __user *src,
> long count)
>  {
> @@ -46,14 +68,8 @@ static long _stp_strncpy_from_user(char *dst, const
> char __user *src, long count
>          mm_segment_t _oldfs = get_fs();
>          set_fs(USER_DS);
>          pagefault_disable();
> -        /* XXX: The following preempt() manipulations should be
> -           redundant with probe entry/exit code, but for unknown
> -           reasons on RHEL5/6 conversions.exp intermittently fails
> -           without this.  */
> -        preempt_disable();
>         if (!lookup_bad_addr(VERIFY_READ, (const unsigned long)src, count))
> -               res = strncpy_from_user(dst, src, count);
> -        preempt_enable_no_resched();
> +               res = _stp_copy_from_user_inatomic(dst, src, count);
>          pagefault_enable();
>          set_fs(_oldfs);
>         return res;
>
> On Sun, Aug 27, 2017 at 3:38 PM, Arkady <arkady.miasnikov@gmail.com> wrote:
>> The following comment is probably relevant:
>> /*
>>  * On some kernels (e.g. 2.6.39), even with preemption disabled, the
>> strncpy_from_user,
>>  * instead of returning -1 after a page fault, schedules the process,
>> so we drop events
>>  * because of the preemption. This function reads the user buffer in
>> atomic chunks, and
>>  * returns when there's an error or the terminator is found
>>  */
>>
>> https://github.com/draios/sysdig/blob/dev/driver/ppm_events.c#L108
>>
>>> On Fri, Aug 25, 2017 at 10:52 PM, David Smith <dsmith@redhat.com> wrote:
>>>> Arkady,
>>>>
>>>> The "good" news is that I've duplicated your problem. I went ahead and
>>>> filed PR22012 (<https://sourceware.org/bugzilla/show_bug.cgi?id=22012>)
>>>> on this issue so I won't forget it.
>>>>
>>>> I'm looking into it.
>>>>
>>>>
>>>> On Fri, Aug 25, 2017 at 12:25 PM, Arkady <arkady.miasnikov@gmail.com> wrote:
>>>>> Reproduce the problem
>>>>>
>>>>> Run the following one liner on CentOS 6.9
>>>>>
>>>>> stap -e "global AF%;probe syscall.open {tid = tid();AF[tid] =
>>>>> filename;}probe syscall.open.return{tid = tid();delete AF[tid];}"
>>>>>
>>>>> Run a bash loop
>>>>> touch test.txt;while [ 1 ];do cat test.txt;done;
>>>>>
>>>>> Monitor the kernel log
>>>>> tail -f /var/log/messages
>>>>>
>>>>> After a couple of minutes you shall see
>>>>>
>>>>> BUG: scheduling while atomic: cat/87764/0x10000001
>>>>> Modules linked in: test_open(U) fuse rfcomm sco bridge bnep l2cap
>>>>> autofs4 bnx2fc cnic uio fcoe libfcoe libfc 8021q scsi_transport_fc
>>>>> garp stp scsi_tgt llc ipt_REJECT nf_conntrack_ipv4 nf_defrag_ipv4
>>>>> iptable_filter ip_tables ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6
>>>>> xt_state nf_conntrack ip6table_filter ip6_tables ib_ipoib rdma_ucm
>>>>> ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_sa ib_mad ib_core
>>>>> ib_addr ipv6 uinput microcode vmware_balloon uvcvideo videodev
>>>>> v4l2_compat_ioctl32 btusb bluetooth rfkill snd_seq_midi e1000
>>>>> snd_seq_midi_event snd_ens1371 snd_rawmidi snd_ac97_codec ac97_bus
>>>>> snd_seq snd_seq_device snd_pcm snd_timer snd soundcore snd_page_alloc
>>>>> sg i2c_piix4 shpchp ext4 jbd2 mbcache sd_mod crc_t10dif sr_mod cdrom
>>>>> mptspi mptscsih mptbase scsi_transport_spi pata_acpi ata_generic
>>>>> ata_piix vmwgfx ttm drm_kms_helper drm i2c_core dm_mirror
>>>>> dm_region_hash dm_log dm_mod [last unloaded: speedstep_lib]
>>>>> Pid: 87764, comm: cat Not tainted 2.6.32-696.10.1.el6.x86_64 #1
>>>>> Call Trace:
>>>>> <#DB>  [<ffffffff81068244>] ? __schedule_bug+0x44/0x50
>>>>> [<ffffffff8154ae0c>] ? schedule+0xa4c/0xb70
>>>>> [<ffffffff812a6216>] ? vsnprintf+0x336/0x5e0
>>>>> [<ffffffff810740aa>] ? __cond_resched+0x2a/0x40
>>>>> [<ffffffff8154b200>] ? _cond_resched+0x30/0x40
>>>>> [<ffffffff812a8b9a>] ? strncpy_from_user+0x4a/0x90
>>>>> [<ffffffffa0705ff9>] ? probe_3602+0x5f9/0x1220 [test_open]
>>>>> [<ffffffff81196c31>] ? sys_open+0x1/0x30
>>>>> [<ffffffffa0707fed>] ? enter_kprobe_probe+0x1ed/0x3a0 [test_open]
>>>>> [<ffffffff815512bb>] ? aggr_pre_handler+0x5b/0xb0
>>>>> [<ffffffff81196c30>] ? sys_open+0x0/0x30
>>>>> [<ffffffff81196c31>] ? sys_open+0x1/0x30
>>>>> [<ffffffff81550cd5>] ? kprobe_exceptions_notify+0x3d5/0x430
>>>>> [<ffffffff81550f45>] ? notifier_call_chain+0x55/0x80
>>>>> [<ffffffff81550faa>] ? atomic_notifier_call_chain+0x1a/0x20
>>>>> [<ffffffff810acd0e>] ? notify_die+0x2e/0x30
>>>>> [<ffffffff8154e815>] ? do_int3+0x35/0xb0
>>>>> [<ffffffff8154e083>] ? int3+0x33/0x40
>>>>> [<ffffffff81196c30>] ? sys_open+0x0/0x30
>>>>
>>>>
>>>>
>>>> --
>>>> David Smith
>>>> Principal Software Engineer
>>>> Red Hat


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]