[Midnightbsd-cvs] src [8808] trunk/sys/cam/scsi: Fix a device depature bug for pass(4), enc(4), sg(4) and ch(4)

laffer1 at midnightbsd.org laffer1 at midnightbsd.org
Sun Sep 25 23:32:45 EDT 2016


Revision: 8808
          http://svnweb.midnightbsd.org/src/?rev=8808
Author:   laffer1
Date:     2016-09-25 23:32:45 -0400 (Sun, 25 Sep 2016)
Log Message:
-----------
Fix a device depature bug for pass(4), enc(4), sg(4) and ch(4)

Modified Paths:
--------------
    trunk/sys/cam/scsi/scsi_ch.c
    trunk/sys/cam/scsi/scsi_enc.c
    trunk/sys/cam/scsi/scsi_enc_internal.h
    trunk/sys/cam/scsi/scsi_pass.c
    trunk/sys/cam/scsi/scsi_sg.c

Modified: trunk/sys/cam/scsi/scsi_ch.c
===================================================================
--- trunk/sys/cam/scsi/scsi_ch.c	2016-09-26 03:32:16 UTC (rev 8807)
+++ trunk/sys/cam/scsi/scsi_ch.c	2016-09-26 03:32:45 UTC (rev 8808)
@@ -144,7 +144,8 @@
 	ch_quirks	quirks;
 	union ccb	saved_ccb;
 	struct devstat	*device_stats;
-	struct cdev *dev;
+	struct cdev     *dev;
+	int		open_count;
 
 	int		sc_picker;	/* current picker */
 
@@ -237,6 +238,48 @@
 }
 
 static void
+chdevgonecb(void *arg)
+{
+	struct cam_sim	  *sim;
+	struct ch_softc   *softc;
+	struct cam_periph *periph;
+	int i;
+
+	periph = (struct cam_periph *)arg;
+	sim = periph->sim;
+	softc = (struct ch_softc *)periph->softc;
+
+	KASSERT(softc->open_count >= 0, ("Negative open count %d",
+		softc->open_count));
+
+	mtx_lock(sim->mtx);
+
+	/*
+	 * When we get this callback, we will get no more close calls from
+	 * devfs.  So if we have any dangling opens, we need to release the
+	 * reference held for that particular context.
+	 */
+	for (i = 0; i < softc->open_count; i++)
+		cam_periph_release_locked(periph);
+
+	softc->open_count = 0;
+
+	/*
+	 * Release the reference held for the device node, it is gone now.
+	 */
+	cam_periph_release_locked(periph);
+
+	/*
+	 * We reference the SIM lock directly here, instead of using
+	 * cam_periph_unlock().  The reason is that the final call to
+	 * cam_periph_release_locked() above could result in the periph
+	 * getting freed.  If that is the case, dereferencing the periph
+	 * with a cam_periph_unlock() call would cause a page fault.
+	 */
+	mtx_unlock(sim->mtx);
+}
+
+static void
 choninvalidate(struct cam_periph *periph)
 {
 	struct ch_softc *softc;
@@ -250,6 +293,12 @@
 
 	softc->flags |= CH_FLAG_INVALID;
 
+	/*
+	 * Tell devfs this device has gone away, and ask for a callback
+	 * when it has cleaned up its state.
+	 */
+	destroy_dev_sched_cb(softc->dev, chdevgonecb, periph);
+
 	xpt_print(periph->path, "lost device\n");
 
 }
@@ -262,10 +311,9 @@
 	softc = (struct ch_softc *)periph->softc;
 
 	xpt_print(periph->path, "removing device entry\n");
+
 	devstat_remove_entry(softc->device_stats);
-	cam_periph_unlock(periph);
-	destroy_dev(softc->dev);
-	cam_periph_lock(periph);
+
 	free(softc, M_DEVBUF);
 }
 
@@ -359,6 +407,19 @@
 			  XPORT_DEVSTAT_TYPE(cpi.transport),
 			  DEVSTAT_PRIORITY_OTHER);
 
+	/*
+	 * Acquire a reference to the periph before we create the devfs
+	 * instance for it.  We'll release this reference once the devfs
+	 * instance has been freed.
+	 */
+	if (cam_periph_acquire(periph) != CAM_REQ_CMP) {
+		xpt_print(periph->path, "%s: lost periph during "
+			  "registration!\n", __func__);
+		cam_periph_lock(periph);
+		return (CAM_REQ_CMP_ERR);
+	}
+
+
 	/* Register the device */
 	softc->dev = make_dev(&ch_cdevsw, periph->unit_number, UID_ROOT,
 			      GID_OPERATOR, 0600, "%s%d", periph->periph_name,
@@ -419,6 +480,9 @@
 	}
 
 	cam_periph_unhold(periph);
+
+	softc->open_count++;
+
 	cam_periph_unlock(periph);
 
 	return(error);
@@ -427,14 +491,37 @@
 static int
 chclose(struct cdev *dev, int flag, int fmt, struct thread *td)
 {
+	struct	cam_sim *sim;
 	struct	cam_periph *periph;
+	struct  ch_softc *softc;
 
 	periph = (struct cam_periph *)dev->si_drv1;
 	if (periph == NULL)
 		return(ENXIO);
 
-	cam_periph_release(periph);
+	sim = periph->sim;
+	softc = (struct ch_softc *)periph->softc;
 
+	mtx_lock(sim->mtx);
+
+	softc->open_count--;
+
+	cam_periph_release_locked(periph);
+
+	/*
+	 * We reference the SIM lock directly here, instead of using
+	 * cam_periph_unlock().  The reason is that the call to
+	 * cam_periph_release_locked() above could result in the periph
+	 * getting freed.  If that is the case, dereferencing the periph
+	 * with a cam_periph_unlock() call would cause a page fault.
+	 *
+	 * cam_periph_release() avoids this problem using the same method,
+	 * but we're manually acquiring and dropping the lock here to
+	 * protect the open count and avoid another lock acquisition and
+	 * release.
+	 */
+	mtx_unlock(sim->mtx);
+
 	return(0);
 }
 

Modified: trunk/sys/cam/scsi/scsi_enc.c
===================================================================
--- trunk/sys/cam/scsi/scsi_enc.c	2016-09-26 03:32:16 UTC (rev 8807)
+++ trunk/sys/cam/scsi/scsi_enc.c	2016-09-26 03:32:45 UTC (rev 8808)
@@ -111,11 +111,40 @@
 static void
 enc_devgonecb(void *arg)
 {
+	struct cam_sim    *sim;
 	struct cam_periph *periph;
+	struct enc_softc  *enc;
+	int i;
 
 	periph = (struct cam_periph *)arg;
+	sim = periph->sim;
+	enc = (struct enc_softc *)periph->softc;
 
-	cam_periph_release(periph);
+	mtx_lock(sim->mtx);
+
+	/*
+	 * When we get this callback, we will get no more close calls from
+	 * devfs.  So if we have any dangling opens, we need to release the
+	 * reference held for that particular context.
+	 */
+	for (i = 0; i < enc->open_count; i++)
+		cam_periph_release_locked(periph);
+
+	enc->open_count = 0;
+
+	/*
+	 * Release the reference held for the device node, it is gone now.
+	 */
+	cam_periph_release_locked(periph);
+
+	/*
+	 * We reference the SIM lock directly here, instead of using
+	 * cam_periph_unlock().  The reason is that the final call to
+	 * cam_periph_release_locked() above could result in the periph
+	 * getting freed.  If that is the case, dereferencing the periph
+	 * with a cam_periph_unlock() call would cause a page fault.
+	 */
+	mtx_unlock(sim->mtx);
 }
 
 static void
@@ -262,6 +291,8 @@
 out:
 	if (error != 0)
 		cam_periph_release_locked(periph);
+	else
+		softc->open_count++;
 
 	cam_periph_unlock(periph);
 
@@ -271,14 +302,37 @@
 static int
 enc_close(struct cdev *dev, int flag, int fmt, struct thread *td)
 {
+	struct cam_sim    *sim;
 	struct cam_periph *periph;
+	struct enc_softc  *enc;
 
 	periph = (struct cam_periph *)dev->si_drv1;
 	if (periph == NULL)
 		return (ENXIO);
 
-	cam_periph_release(periph);
+	sim = periph->sim;
+	enc = periph->softc;
 
+	mtx_lock(sim->mtx);
+
+	enc->open_count--;
+
+	cam_periph_release_locked(periph);
+
+	/*
+	 * We reference the SIM lock directly here, instead of using
+	 * cam_periph_unlock().  The reason is that the call to
+	 * cam_periph_release_locked() above could result in the periph
+	 * getting freed.  If that is the case, dereferencing the periph
+	 * with a cam_periph_unlock() call would cause a page fault.
+	 *
+	 * cam_periph_release() avoids this problem using the same method,
+	 * but we're manually acquiring and dropping the lock here to
+	 * protect the open count and avoid another lock acquisition and
+	 * release.
+	 */
+	mtx_unlock(sim->mtx);
+
 	return (0);
 }
 
@@ -946,6 +1000,11 @@
 		}
 	}
 
+	/*
+	 * Acquire a reference to the periph before we create the devfs
+	 * instance for it.  We'll release this reference once the devfs
+	 * instance has been freed.
+	 */
 	if (cam_periph_acquire(periph) != CAM_REQ_CMP) {
 		xpt_print(periph->path, "%s: lost periph during "
 			  "registration!\n", __func__);

Modified: trunk/sys/cam/scsi/scsi_enc_internal.h
===================================================================
--- trunk/sys/cam/scsi/scsi_enc_internal.h	2016-09-26 03:32:16 UTC (rev 8807)
+++ trunk/sys/cam/scsi/scsi_enc_internal.h	2016-09-26 03:32:45 UTC (rev 8808)
@@ -148,6 +148,7 @@
 	union ccb		 saved_ccb;
 	struct cdev		*enc_dev;
 	struct cam_periph	*periph;
+	int			 open_count;
 
 	/* Bitmap of pending operations. */
 	uint32_t		 pending_actions;

Modified: trunk/sys/cam/scsi/scsi_pass.c
===================================================================
--- trunk/sys/cam/scsi/scsi_pass.c	2016-09-26 03:32:16 UTC (rev 8807)
+++ trunk/sys/cam/scsi/scsi_pass.c	2016-09-26 03:32:45 UTC (rev 8808)
@@ -76,6 +76,7 @@
 	pass_flags	 flags;
 	u_int8_t	 pd_type;
 	union ccb	 saved_ccb;
+	int		 open_count;
 	struct devstat	*device_stats;
 	struct cdev	*dev;
 	struct cdev	*alias_dev;
@@ -140,12 +141,43 @@
 static void
 passdevgonecb(void *arg)
 {
+	struct cam_sim    *sim;
 	struct cam_periph *periph;
+	struct pass_softc *softc;
+	int i;
 
 	periph = (struct cam_periph *)arg;
+	sim = periph->sim;
+	softc = (struct pass_softc *)periph->softc;
 
-	xpt_print(periph->path, "%s: devfs entry is gone\n", __func__);
-	cam_periph_release(periph);
+	KASSERT(softc->open_count >= 0, ("Negative open count %d",
+		softc->open_count));
+
+	mtx_lock(sim->mtx);
+
+	/*
+	 * When we get this callback, we will get no more close calls from
+	 * devfs.  So if we have any dangling opens, we need to release the
+	 * reference held for that particular context.
+	 */
+	for (i = 0; i < softc->open_count; i++)
+		cam_periph_release_locked(periph);
+
+	softc->open_count = 0;
+
+	/*
+	 * Release the reference held for the device node, it is gone now.
+	 */
+	cam_periph_release_locked(periph);
+
+	/*
+	 * We reference the SIM lock directly here, instead of using
+	 * cam_periph_unlock().  The reason is that the final call to
+	 * cam_periph_release_locked() above could result in the periph
+	 * getting freed.  If that is the case, dereferencing the periph
+	 * with a cam_periph_unlock() call would cause a page fault.
+	 */
+	mtx_unlock(sim->mtx);
 }
 
 static void
@@ -368,7 +400,7 @@
 	if (cam_periph_acquire(periph) != CAM_REQ_CMP) {
 		xpt_print(periph->path, "%s: lost periph during "
 			  "registration!\n", __func__);
-		mtx_lock(periph->sim->mtx);
+		cam_periph_lock(periph);
 		return (CAM_REQ_CMP_ERR);
 	}
 
@@ -461,6 +493,8 @@
 		return(EINVAL);
 	}
 
+	softc->open_count++;
+
 	cam_periph_unlock(periph);
 
 	return (error);
@@ -469,14 +503,37 @@
 static int
 passclose(struct cdev *dev, int flag, int fmt, struct thread *td)
 {
+	struct  cam_sim    *sim;
 	struct 	cam_periph *periph;
+	struct  pass_softc *softc;
 
 	periph = (struct cam_periph *)dev->si_drv1;
 	if (periph == NULL)
 		return (ENXIO);	
 
-	cam_periph_release(periph);
+	sim = periph->sim;
+	softc = periph->softc;
 
+	mtx_lock(sim->mtx);
+
+	softc->open_count--;
+
+	cam_periph_release_locked(periph);
+
+	/*
+	 * We reference the SIM lock directly here, instead of using
+	 * cam_periph_unlock().  The reason is that the call to
+	 * cam_periph_release_locked() above could result in the periph
+	 * getting freed.  If that is the case, dereferencing the periph
+	 * with a cam_periph_unlock() call would cause a page fault.
+	 *
+	 * cam_periph_release() avoids this problem using the same method,
+	 * but we're manually acquiring and dropping the lock here to
+	 * protect the open count and avoid another lock acquisition and
+	 * release.
+	 */
+	mtx_unlock(sim->mtx);
+
 	return (0);
 }
 

Modified: trunk/sys/cam/scsi/scsi_sg.c
===================================================================
--- trunk/sys/cam/scsi/scsi_sg.c	2016-09-26 03:32:16 UTC (rev 8807)
+++ trunk/sys/cam/scsi/scsi_sg.c	2016-09-26 03:32:45 UTC (rev 8808)
@@ -99,6 +99,7 @@
 struct sg_softc {
 	sg_state		state;
 	sg_flags		flags;
+	int			open_count;
 	struct devstat		*device_stats;
 	TAILQ_HEAD(, sg_rdwr)	rdwr_done;
 	struct cdev		*dev;
@@ -169,6 +170,49 @@
 }
 
 static void
+sgdevgonecb(void *arg)
+{
+	struct cam_sim    *sim;
+	struct cam_periph *periph;
+	struct sg_softc *softc;
+	int i;
+
+	periph = (struct cam_periph *)arg;
+	sim = periph->sim;
+	softc = (struct sg_softc *)periph->softc;
+
+	KASSERT(softc->open_count >= 0, ("Negative open count %d",
+		softc->open_count));
+
+	mtx_lock(sim->mtx);
+
+	/*
+	 * When we get this callback, we will get no more close calls from
+	 * devfs.  So if we have any dangling opens, we need to release the
+	 * reference held for that particular context.
+	 */
+	for (i = 0; i < softc->open_count; i++)
+		cam_periph_release_locked(periph);
+
+	softc->open_count = 0;
+
+	/*
+	 * Release the reference held for the device node, it is gone now.
+	 */
+	cam_periph_release_locked(periph);
+
+	/*
+	 * We reference the SIM lock directly here, instead of using
+	 * cam_periph_unlock().  The reason is that the final call to
+	 * cam_periph_release_locked() above could result in the periph
+	 * getting freed.  If that is the case, dereferencing the periph
+	 * with a cam_periph_unlock() call would cause a page fault.
+	 */
+	mtx_unlock(sim->mtx);
+}
+
+
+static void
 sgoninvalidate(struct cam_periph *periph)
 {
 	struct sg_softc *softc;
@@ -183,6 +227,12 @@
 	softc->flags |= SG_FLAG_INVALID;
 
 	/*
+	 * Tell devfs this device has gone away, and ask for a callback
+	 * when it has cleaned up its state.
+	 */
+	destroy_dev_sched_cb(softc->dev, sgdevgonecb, periph);
+
+	/*
 	 * XXX Return all queued I/O with ENXIO.
 	 * XXX Handle any transactions queued to the card
 	 *     with XPT_ABORT_CCB.
@@ -201,10 +251,9 @@
 	softc = (struct sg_softc *)periph->softc;
 	if (bootverbose)
 		xpt_print(periph->path, "removing device entry\n");
+
 	devstat_remove_entry(softc->device_stats);
-	cam_periph_unlock(periph);
-	destroy_dev(softc->dev);
-	cam_periph_lock(periph);
+
 	free(softc, M_DEVBUF);
 }
 
@@ -299,6 +348,18 @@
 			DEVSTAT_TYPE_PASS,
 			DEVSTAT_PRIORITY_PASS);
 
+	/*
+	 * Acquire a reference to the periph before we create the devfs
+	 * instance for it.  We'll release this reference once the devfs
+	 * instance has been freed.
+	 */
+	if (cam_periph_acquire(periph) != CAM_REQ_CMP) {
+		xpt_print(periph->path, "%s: lost periph during "
+			  "registration!\n", __func__);
+		cam_periph_lock(periph);
+		return (CAM_REQ_CMP_ERR);
+	}
+
 	/* Register the device */
 	softc->dev = make_dev(&sg_cdevsw, periph->unit_number,
 			      UID_ROOT, GID_OPERATOR, 0600, "%s%d",
@@ -414,6 +475,8 @@
 		return (ENXIO);
 	}
 
+	softc->open_count++;
+
 	cam_periph_unlock(periph);
 
 	return (error);
@@ -422,14 +485,37 @@
 static int
 sgclose(struct cdev *dev, int flag, int fmt, struct thread *td)
 {
+	struct cam_sim    *sim;
 	struct cam_periph *periph;
+	struct sg_softc   *softc;
 
 	periph = (struct cam_periph *)dev->si_drv1;
 	if (periph == NULL)
 		return (ENXIO);
 
-	cam_periph_release(periph);
+	sim = periph->sim;
+	softc = periph->softc;
 
+	mtx_lock(sim->mtx);
+
+	softc->open_count--;
+
+	cam_periph_release_locked(periph);
+
+	/*
+	 * We reference the SIM lock directly here, instead of using
+	 * cam_periph_unlock().  The reason is that the call to
+	 * cam_periph_release_locked() above could result in the periph
+	 * getting freed.  If that is the case, dereferencing the periph
+	 * with a cam_periph_unlock() call would cause a page fault.
+	 *
+	 * cam_periph_release() avoids this problem using the same method,
+	 * but we're manually acquiring and dropping the lock here to
+	 * protect the open count and avoid another lock acquisition and
+	 * release.
+	 */
+	mtx_unlock(sim->mtx);
+
 	return (0);
 }
 



More information about the Midnightbsd-cvs mailing list