From c1a15d08f497150a91ba4e61bab54b8f5c8b49b9 Mon Sep 17 00:00:00 2001
From: Roger Pau Monne <roger.pau@citrix.com>
Date: Wed, 17 Apr 2013 20:18:55 +0200
Subject: [PATCH 001/320] xen-blkback: print stats about persistent grants
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Cc: xen-devel@lists.xen.org
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/block/xen-blkback/blkback.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index dd5b2fed97e9..f7526dbb204d 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -382,10 +382,12 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
 static void print_stats(struct xen_blkif *blkif)
 {
 	pr_info("xen-blkback (%s): oo %3llu  |  rd %4llu  |  wr %4llu  |  f %4llu"
-		 "  |  ds %4llu\n",
+		 "  |  ds %4llu | pg: %4u/%4u\n",
 		 current->comm, blkif->st_oo_req,
 		 blkif->st_rd_req, blkif->st_wr_req,
-		 blkif->st_f_req, blkif->st_ds_req);
+		 blkif->st_f_req, blkif->st_ds_req,
+		 blkif->persistent_gnt_c,
+		 max_mapped_grant_pages(blkif->blk_protocol));
 	blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
 	blkif->st_rd_req = 0;
 	blkif->st_wr_req = 0;

From c6cc142dac52e62e1e8a2aff5de1300202b96c66 Mon Sep 17 00:00:00 2001
From: Roger Pau Monne <roger.pau@citrix.com>
Date: Wed, 17 Apr 2013 20:18:56 +0200
Subject: [PATCH 002/320] xen-blkback: use balloon pages for all mappings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Using balloon pages for all granted pages allows us to simplify the
logic in blkback, especially in the xen_blkbk_map function, since now
we can decide if we want to map a grant persistently or not after we
have actually mapped it. This could not be done before because
persistent grants used ballooned pages, whereas non-persistent grants
used pages from the kernel.

This patch also introduces several changes, the first one is that the
list of free pages is no longer global, now each blkback instance has
it's own list of free pages that can be used to map grants. Also, a
run time parameter (max_buffer_pages) has been added in order to tune
the maximum number of free pages each blkback instance will keep in
it's buffer.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Cc: xen-devel@lists.xen.org
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 .../ABI/stable/sysfs-bus-xen-backend          |   8 +
 drivers/block/xen-blkback/blkback.c           | 290 ++++++++++--------
 drivers/block/xen-blkback/common.h            |   5 +
 drivers/block/xen-blkback/xenbus.c            |   3 +
 4 files changed, 183 insertions(+), 123 deletions(-)

diff --git a/Documentation/ABI/stable/sysfs-bus-xen-backend b/Documentation/ABI/stable/sysfs-bus-xen-backend
index 3d5951c8bf5f..e04afe0cca99 100644
--- a/Documentation/ABI/stable/sysfs-bus-xen-backend
+++ b/Documentation/ABI/stable/sysfs-bus-xen-backend
@@ -73,3 +73,11 @@ KernelVersion:	3.0
 Contact:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
 Description:
                 Number of sectors written by the frontend.
+
+What:           /sys/module/xen_blkback/parameters/max_buffer_pages
+Date:           March 2013
+KernelVersion:  3.10
+Contact:        Roger Pau Monné <roger.pau@citrix.com>
+Description:
+                Maximum number of free pages to keep in each block
+                backend buffer.
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index f7526dbb204d..8245c6bb9539 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -63,6 +63,21 @@ static int xen_blkif_reqs = 64;
 module_param_named(reqs, xen_blkif_reqs, int, 0);
 MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");
 
+/*
+ * Maximum number of unused free pages to keep in the internal buffer.
+ * Setting this to a value too low will reduce memory used in each backend,
+ * but can have a performance penalty.
+ *
+ * A sane value is xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST, but can
+ * be set to a lower value that might degrade performance on some intensive
+ * IO workloads.
+ */
+
+static int xen_blkif_max_buffer_pages = 704;
+module_param_named(max_buffer_pages, xen_blkif_max_buffer_pages, int, 0644);
+MODULE_PARM_DESC(max_buffer_pages,
+"Maximum number of free pages to keep in each block backend buffer");
+
 /* Run-time switchable: /sys/module/blkback/parameters/ */
 static unsigned int log_stats;
 module_param(log_stats, int, 0644);
@@ -82,10 +97,14 @@ struct pending_req {
 	int			status;
 	struct list_head	free_list;
 	DECLARE_BITMAP(unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+	struct page		*pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 };
 
 #define BLKBACK_INVALID_HANDLE (~0)
 
+/* Number of free pages to remove on each call to free_xenballooned_pages */
+#define NUM_BATCH_FREE_PAGES 10
+
 struct xen_blkbk {
 	struct pending_req	*pending_reqs;
 	/* List of all 'pending_req' available */
@@ -93,8 +112,6 @@ struct xen_blkbk {
 	/* And its spinlock. */
 	spinlock_t		pending_free_lock;
 	wait_queue_head_t	pending_free_wq;
-	/* The list of all pages that are available. */
-	struct page		**pending_pages;
 	/* And the grant handles that are available. */
 	grant_handle_t		*pending_grant_handles;
 };
@@ -143,14 +160,66 @@ static inline int vaddr_pagenr(struct pending_req *req, int seg)
 		BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
 }
 
-#define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)]
-
-static inline unsigned long vaddr(struct pending_req *req, int seg)
+static inline int get_free_page(struct xen_blkif *blkif, struct page **page)
 {
-	unsigned long pfn = page_to_pfn(blkbk->pending_page(req, seg));
-	return (unsigned long)pfn_to_kaddr(pfn);
+	unsigned long flags;
+
+	spin_lock_irqsave(&blkif->free_pages_lock, flags);
+	if (list_empty(&blkif->free_pages)) {
+		BUG_ON(blkif->free_pages_num != 0);
+		spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+		return alloc_xenballooned_pages(1, page, false);
+	}
+	BUG_ON(blkif->free_pages_num == 0);
+	page[0] = list_first_entry(&blkif->free_pages, struct page, lru);
+	list_del(&page[0]->lru);
+	blkif->free_pages_num--;
+	spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+
+	return 0;
 }
 
+static inline void put_free_pages(struct xen_blkif *blkif, struct page **page,
+                                  int num)
+{
+	unsigned long flags;
+	int i;
+
+	spin_lock_irqsave(&blkif->free_pages_lock, flags);
+	for (i = 0; i < num; i++)
+		list_add(&page[i]->lru, &blkif->free_pages);
+	blkif->free_pages_num += num;
+	spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+}
+
+static inline void shrink_free_pagepool(struct xen_blkif *blkif, int num)
+{
+	/* Remove requested pages in batches of NUM_BATCH_FREE_PAGES */
+	struct page *page[NUM_BATCH_FREE_PAGES];
+	unsigned int num_pages = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&blkif->free_pages_lock, flags);
+	while (blkif->free_pages_num > num) {
+		BUG_ON(list_empty(&blkif->free_pages));
+		page[num_pages] = list_first_entry(&blkif->free_pages,
+		                                   struct page, lru);
+		list_del(&page[num_pages]->lru);
+		blkif->free_pages_num--;
+		if (++num_pages == NUM_BATCH_FREE_PAGES) {
+			spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+			free_xenballooned_pages(num_pages, page);
+			spin_lock_irqsave(&blkif->free_pages_lock, flags);
+			num_pages = 0;
+		}
+	}
+	spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+	if (num_pages != 0)
+		free_xenballooned_pages(num_pages, page);
+}
+
+#define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page)))
+
 #define pending_handle(_req, _seg) \
 	(blkbk->pending_grant_handles[vaddr_pagenr(_req, _seg)])
 
@@ -170,7 +239,7 @@ static void make_response(struct xen_blkif *blkif, u64 id,
 	     (n) = (&(pos)->node != NULL) ? rb_next(&(pos)->node) : NULL)
 
 
-static void add_persistent_gnt(struct rb_root *root,
+static int add_persistent_gnt(struct rb_root *root,
 			       struct persistent_gnt *persistent_gnt)
 {
 	struct rb_node **new = &(root->rb_node), *parent = NULL;
@@ -186,14 +255,15 @@ static void add_persistent_gnt(struct rb_root *root,
 		else if (persistent_gnt->gnt > this->gnt)
 			new = &((*new)->rb_right);
 		else {
-			pr_alert(DRV_PFX " trying to add a gref that's already in the tree\n");
-			BUG();
+			pr_alert_ratelimited(DRV_PFX " trying to add a gref that's already in the tree\n");
+			return -EINVAL;
 		}
 	}
 
 	/* Add new node and rebalance tree. */
 	rb_link_node(&(persistent_gnt->node), parent, new);
 	rb_insert_color(&(persistent_gnt->node), root);
+	return 0;
 }
 
 static struct persistent_gnt *get_persistent_gnt(struct rb_root *root,
@@ -215,7 +285,8 @@ static struct persistent_gnt *get_persistent_gnt(struct rb_root *root,
 	return NULL;
 }
 
-static void free_persistent_gnts(struct rb_root *root, unsigned int num)
+static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
+                                 unsigned int num)
 {
 	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 	struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
@@ -240,7 +311,7 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num)
 			ret = gnttab_unmap_refs(unmap, NULL, pages,
 				segs_to_unmap);
 			BUG_ON(ret);
-			free_xenballooned_pages(segs_to_unmap, pages);
+			put_free_pages(blkif, pages, segs_to_unmap);
 			segs_to_unmap = 0;
 		}
 
@@ -422,13 +493,19 @@ int xen_blkif_schedule(void *arg)
 		if (do_block_io_op(blkif))
 			blkif->waiting_reqs = 1;
 
+		/* Shrink if we have more than xen_blkif_max_buffer_pages */
+		shrink_free_pagepool(blkif, xen_blkif_max_buffer_pages);
+
 		if (log_stats && time_after(jiffies, blkif->st_print))
 			print_stats(blkif);
 	}
 
+	/* Since we are shutting down remove all pages from the buffer */
+	shrink_free_pagepool(blkif, 0 /* All */);
+
 	/* Free all persistent grant pages */
 	if (!RB_EMPTY_ROOT(&blkif->persistent_gnts))
-		free_persistent_gnts(&blkif->persistent_gnts,
+		free_persistent_gnts(blkif, &blkif->persistent_gnts,
 			blkif->persistent_gnt_c);
 
 	BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
@@ -457,23 +534,25 @@ static void xen_blkbk_unmap(struct pending_req *req)
 	struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 	unsigned int i, invcount = 0;
 	grant_handle_t handle;
+	struct xen_blkif *blkif = req->blkif;
 	int ret;
 
 	for (i = 0; i < req->nr_pages; i++) {
 		if (!test_bit(i, req->unmap_seg))
 			continue;
 		handle = pending_handle(req, i);
+		pages[invcount] = req->pages[i];
 		if (handle == BLKBACK_INVALID_HANDLE)
 			continue;
-		gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i),
+		gnttab_set_unmap_op(&unmap[invcount], vaddr(pages[invcount]),
 				    GNTMAP_host_map, handle);
 		pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
-		pages[invcount] = virt_to_page(vaddr(req, i));
 		invcount++;
 	}
 
 	ret = gnttab_unmap_refs(unmap, NULL, pages, invcount);
 	BUG_ON(ret);
+	put_free_pages(blkif, pages, invcount);
 }
 
 static int xen_blkbk_map(struct blkif_request *req,
@@ -487,8 +566,7 @@ static int xen_blkbk_map(struct blkif_request *req,
 	struct persistent_gnt *persistent_gnt = NULL;
 	struct xen_blkif *blkif = pending_req->blkif;
 	phys_addr_t addr = 0;
-	int i, j;
-	bool new_map;
+	int i, seg_idx, new_map_idx;
 	int nseg = req->u.rw.nr_segments;
 	int segs_to_map = 0;
 	int ret = 0;
@@ -517,68 +595,16 @@ static int xen_blkbk_map(struct blkif_request *req,
 			 * We are using persistent grants and
 			 * the grant is already mapped
 			 */
-			new_map = false;
-		} else if (use_persistent_gnts &&
-			   blkif->persistent_gnt_c <
-			   max_mapped_grant_pages(blkif->blk_protocol)) {
-			/*
-			 * We are using persistent grants, the grant is
-			 * not mapped but we have room for it
-			 */
-			new_map = true;
-			persistent_gnt = kmalloc(
-				sizeof(struct persistent_gnt),
-				GFP_KERNEL);
-			if (!persistent_gnt)
-				return -ENOMEM;
-			if (alloc_xenballooned_pages(1, &persistent_gnt->page,
-			    false)) {
-				kfree(persistent_gnt);
-				return -ENOMEM;
-			}
-			persistent_gnt->gnt = req->u.rw.seg[i].gref;
-			persistent_gnt->handle = BLKBACK_INVALID_HANDLE;
-
-			pages_to_gnt[segs_to_map] =
-				persistent_gnt->page;
-			addr = (unsigned long) pfn_to_kaddr(
-				page_to_pfn(persistent_gnt->page));
-
-			add_persistent_gnt(&blkif->persistent_gnts,
-				persistent_gnt);
-			blkif->persistent_gnt_c++;
-			pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n",
-				 persistent_gnt->gnt, blkif->persistent_gnt_c,
-				 max_mapped_grant_pages(blkif->blk_protocol));
-		} else {
-			/*
-			 * We are either using persistent grants and
-			 * hit the maximum limit of grants mapped,
-			 * or we are not using persistent grants.
-			 */
-			if (use_persistent_gnts &&
-				!blkif->vbd.overflow_max_grants) {
-				blkif->vbd.overflow_max_grants = 1;
-				pr_alert(DRV_PFX " domain %u, device %#x is using maximum number of persistent grants\n",
-					 blkif->domid, blkif->vbd.handle);
-			}
-			new_map = true;
-			pages[i] = blkbk->pending_page(pending_req, i);
-			addr = vaddr(pending_req, i);
-			pages_to_gnt[segs_to_map] =
-				blkbk->pending_page(pending_req, i);
-		}
-
-		if (persistent_gnt) {
 			pages[i] = persistent_gnt->page;
 			persistent_gnts[i] = persistent_gnt;
 		} else {
+			if (get_free_page(blkif, &pages[i]))
+				goto out_of_memory;
+			addr = vaddr(pages[i]);
+			pages_to_gnt[segs_to_map] = pages[i];
 			persistent_gnts[i] = NULL;
-		}
-
-		if (new_map) {
 			flags = GNTMAP_host_map;
-			if (!persistent_gnt &&
+			if (!use_persistent_gnts &&
 			    (pending_req->operation != BLKIF_OP_READ))
 				flags |= GNTMAP_readonly;
 			gnttab_set_map_op(&map[segs_to_map++], addr,
@@ -598,48 +624,81 @@ static int xen_blkbk_map(struct blkif_request *req,
 	 * the page from the other domain.
 	 */
 	bitmap_zero(pending_req->unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
-	for (i = 0, j = 0; i < nseg; i++) {
-		if (!persistent_gnts[i] ||
-		    persistent_gnts[i]->handle == BLKBACK_INVALID_HANDLE) {
+	for (seg_idx = 0, new_map_idx = 0; seg_idx < nseg; seg_idx++) {
+		if (!persistent_gnts[seg_idx]) {
 			/* This is a newly mapped grant */
-			BUG_ON(j >= segs_to_map);
-			if (unlikely(map[j].status != 0)) {
+			BUG_ON(new_map_idx >= segs_to_map);
+			if (unlikely(map[new_map_idx].status != 0)) {
 				pr_debug(DRV_PFX "invalid buffer -- could not remap it\n");
-				map[j].handle = BLKBACK_INVALID_HANDLE;
+				pending_handle(pending_req, seg_idx) = BLKBACK_INVALID_HANDLE;
 				ret |= 1;
-				if (persistent_gnts[i]) {
-					rb_erase(&persistent_gnts[i]->node,
-						 &blkif->persistent_gnts);
-					blkif->persistent_gnt_c--;
-					kfree(persistent_gnts[i]);
-					persistent_gnts[i] = NULL;
-				}
-			}
-		}
-		if (persistent_gnts[i]) {
-			if (persistent_gnts[i]->handle ==
-			    BLKBACK_INVALID_HANDLE) {
+				new_map_idx++;
 				/*
-				 * If this is a new persistent grant
-				 * save the handler
+				 * No need to set unmap_seg bit, since
+				 * we can not unmap this grant because
+				 * the handle is invalid.
 				 */
-				persistent_gnts[i]->handle = map[j++].handle;
+				continue;
 			}
-			pending_handle(pending_req, i) =
-				persistent_gnts[i]->handle;
-
-			if (ret)
-				continue;
+			pending_handle(pending_req, seg_idx) = map[new_map_idx].handle;
 		} else {
-			pending_handle(pending_req, i) = map[j++].handle;
-			bitmap_set(pending_req->unmap_seg, i, 1);
-
-			if (ret)
-				continue;
+			/* This grant is persistent and already mapped */
+			goto next;
 		}
-		seg[i].offset = (req->u.rw.seg[i].first_sect << 9);
+		if (use_persistent_gnts &&
+		    blkif->persistent_gnt_c <
+		    max_mapped_grant_pages(blkif->blk_protocol)) {
+			/*
+			 * We are using persistent grants, the grant is
+			 * not mapped but we have room for it
+			 */
+			persistent_gnt = kmalloc(sizeof(struct persistent_gnt),
+				                 GFP_KERNEL);
+			if (!persistent_gnt) {
+				/*
+				 * If we don't have enough memory to
+				 * allocate the persistent_gnt struct
+				 * map this grant non-persistenly
+				 */
+				goto next_unmap;
+			}
+			persistent_gnt->gnt = map[new_map_idx].ref;
+			persistent_gnt->handle = map[new_map_idx].handle;
+			persistent_gnt->page = pages[seg_idx];
+			if (add_persistent_gnt(&blkif->persistent_gnts,
+			                       persistent_gnt)) {
+				kfree(persistent_gnt);
+				persistent_gnt = NULL;
+				goto next_unmap;
+			}
+			blkif->persistent_gnt_c++;
+			pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n",
+				 persistent_gnt->gnt, blkif->persistent_gnt_c,
+				 max_mapped_grant_pages(blkif->blk_protocol));
+			new_map_idx++;
+			goto next;
+		}
+		if (use_persistent_gnts && !blkif->vbd.overflow_max_grants) {
+			blkif->vbd.overflow_max_grants = 1;
+			pr_debug(DRV_PFX " domain %u, device %#x is using maximum number of persistent grants\n",
+			         blkif->domid, blkif->vbd.handle);
+		}
+next_unmap:
+		/*
+		 * We could not map this grant persistently, so use it as
+		 * a non-persistent grant.
+		 */
+		bitmap_set(pending_req->unmap_seg, seg_idx, 1);
+		new_map_idx++;
+next:
+		seg[seg_idx].offset = (req->u.rw.seg[seg_idx].first_sect << 9);
 	}
 	return ret;
+
+out_of_memory:
+	pr_alert(DRV_PFX "%s: out of memory\n", __func__);
+	put_free_pages(blkif, pages_to_gnt, segs_to_map);
+	return -ENOMEM;
 }
 
 static int dispatch_discard_io(struct xen_blkif *blkif,
@@ -863,7 +922,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 	int operation;
 	struct blk_plug plug;
 	bool drain = false;
-	struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct page **pages = pending_req->pages;
 
 	switch (req->operation) {
 	case BLKIF_OP_READ:
@@ -1090,22 +1149,14 @@ static int __init xen_blkif_init(void)
 					xen_blkif_reqs, GFP_KERNEL);
 	blkbk->pending_grant_handles = kmalloc(sizeof(blkbk->pending_grant_handles[0]) *
 					mmap_pages, GFP_KERNEL);
-	blkbk->pending_pages         = kzalloc(sizeof(blkbk->pending_pages[0]) *
-					mmap_pages, GFP_KERNEL);
 
-	if (!blkbk->pending_reqs || !blkbk->pending_grant_handles ||
-	    !blkbk->pending_pages) {
+	if (!blkbk->pending_reqs || !blkbk->pending_grant_handles) {
 		rc = -ENOMEM;
 		goto out_of_memory;
 	}
 
 	for (i = 0; i < mmap_pages; i++) {
 		blkbk->pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
-		blkbk->pending_pages[i] = alloc_page(GFP_KERNEL);
-		if (blkbk->pending_pages[i] == NULL) {
-			rc = -ENOMEM;
-			goto out_of_memory;
-		}
 	}
 	rc = xen_blkif_interface_init();
 	if (rc)
@@ -1130,13 +1181,6 @@ static int __init xen_blkif_init(void)
  failed_init:
 	kfree(blkbk->pending_reqs);
 	kfree(blkbk->pending_grant_handles);
-	if (blkbk->pending_pages) {
-		for (i = 0; i < mmap_pages; i++) {
-			if (blkbk->pending_pages[i])
-				__free_page(blkbk->pending_pages[i]);
-		}
-		kfree(blkbk->pending_pages);
-	}
 	kfree(blkbk);
 	blkbk = NULL;
 	return rc;
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index 60103e2517ba..6c73c3855e65 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -220,6 +220,11 @@ struct xen_blkif {
 	struct rb_root		persistent_gnts;
 	unsigned int		persistent_gnt_c;
 
+	/* buffer of free pages to map grant refs */
+	spinlock_t		free_pages_lock;
+	int			free_pages_num;
+	struct list_head	free_pages;
+
 	/* statistics */
 	unsigned long		st_print;
 	unsigned long long			st_rd_req;
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 8bfd1bcf95ec..24f7f6d87717 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -118,6 +118,9 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
 	blkif->st_print = jiffies;
 	init_waitqueue_head(&blkif->waiting_to_free);
 	blkif->persistent_gnts.rb_node = NULL;
+	spin_lock_init(&blkif->free_pages_lock);
+	INIT_LIST_HEAD(&blkif->free_pages);
+	blkif->free_pages_num = 0;
 
 	return blkif;
 }

From 3f3aad5e6686ed49242bbf86de378b39f119ec9d Mon Sep 17 00:00:00 2001
From: Roger Pau Monne <roger.pau@citrix.com>
Date: Wed, 17 Apr 2013 20:18:57 +0200
Subject: [PATCH 003/320] xen-blkback: implement LRU mechanism for persistent
 grants
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This mechanism allows blkback to change the number of grants
persistently mapped at run time.

The algorithm uses a simple LRU mechanism that removes (if needed) the
persistent grants that have not been used since the last LRU run, or
if all grants have been used it removes the first grants in the list
(that are not in use).

The algorithm allows the user to change the maximum number of
persistent grants, by changing max_persistent_grants in sysfs.

Since we are storing the persistent grants used inside the request
struct (to be able to mark them as "unused" when unmapping), we no
longer need the bitmap (unmap_seg).

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: xen-devel@lists.xen.org
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 .../ABI/stable/sysfs-bus-xen-backend          |  10 +
 drivers/block/xen-blkback/blkback.c           | 287 ++++++++++++++----
 drivers/block/xen-blkback/common.h            |  18 ++
 drivers/block/xen-blkback/xenbus.c            |   2 +
 4 files changed, 260 insertions(+), 57 deletions(-)

diff --git a/Documentation/ABI/stable/sysfs-bus-xen-backend b/Documentation/ABI/stable/sysfs-bus-xen-backend
index e04afe0cca99..947db11350bc 100644
--- a/Documentation/ABI/stable/sysfs-bus-xen-backend
+++ b/Documentation/ABI/stable/sysfs-bus-xen-backend
@@ -81,3 +81,13 @@ Contact:        Roger Pau Monné <roger.pau@citrix.com>
 Description:
                 Maximum number of free pages to keep in each block
                 backend buffer.
+
+What:           /sys/module/xen_blkback/parameters/max_persistent_grants
+Date:           March 2013
+KernelVersion:  3.10
+Contact:        Roger Pau Monné <roger.pau@citrix.com>
+Description:
+                Maximum number of grants to map persistently in
+                blkback. If the frontend tries to use more than
+                max_persistent_grants, the LRU kicks in and starts
+                removing 5% of max_persistent_grants every 100ms.
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 8245c6bb9539..17052f74ebe5 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -78,6 +78,36 @@ module_param_named(max_buffer_pages, xen_blkif_max_buffer_pages, int, 0644);
 MODULE_PARM_DESC(max_buffer_pages,
 "Maximum number of free pages to keep in each block backend buffer");
 
+/*
+ * Maximum number of grants to map persistently in blkback. For maximum
+ * performance this should be the total numbers of grants that can be used
+ * to fill the ring, but since this might become too high, specially with
+ * the use of indirect descriptors, we set it to a value that provides good
+ * performance without using too much memory.
+ *
+ * When the list of persistent grants is full we clean it up using a LRU
+ * algorithm.
+ */
+
+static int xen_blkif_max_pgrants = 352;
+module_param_named(max_persistent_grants, xen_blkif_max_pgrants, int, 0644);
+MODULE_PARM_DESC(max_persistent_grants,
+                 "Maximum number of grants to map persistently");
+
+/*
+ * The LRU mechanism to clean the lists of persistent grants needs to
+ * be executed periodically. The time interval between consecutive executions
+ * of the purge mechanism is set in ms.
+ */
+#define LRU_INTERVAL 100
+
+/*
+ * When the persistent grants list is full we will remove unused grants
+ * from the list. The percent number of grants to be removed at each LRU
+ * execution.
+ */
+#define LRU_PERCENT_CLEAN 5
+
 /* Run-time switchable: /sys/module/blkback/parameters/ */
 static unsigned int log_stats;
 module_param(log_stats, int, 0644);
@@ -96,8 +126,8 @@ struct pending_req {
 	unsigned short		operation;
 	int			status;
 	struct list_head	free_list;
-	DECLARE_BITMAP(unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
 	struct page		*pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct persistent_gnt	*persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 };
 
 #define BLKBACK_INVALID_HANDLE (~0)
@@ -118,36 +148,6 @@ struct xen_blkbk {
 
 static struct xen_blkbk *blkbk;
 
-/*
- * Maximum number of grant pages that can be mapped in blkback.
- * BLKIF_MAX_SEGMENTS_PER_REQUEST * RING_SIZE is the maximum number of
- * pages that blkback will persistently map.
- * Currently, this is:
- * RING_SIZE = 32 (for all known ring types)
- * BLKIF_MAX_SEGMENTS_PER_REQUEST = 11
- * sizeof(struct persistent_gnt) = 48
- * So the maximum memory used to store the grants is:
- * 32 * 11 * 48 = 16896 bytes
- */
-static inline unsigned int max_mapped_grant_pages(enum blkif_protocol protocol)
-{
-	switch (protocol) {
-	case BLKIF_PROTOCOL_NATIVE:
-		return __CONST_RING_SIZE(blkif, PAGE_SIZE) *
-			   BLKIF_MAX_SEGMENTS_PER_REQUEST;
-	case BLKIF_PROTOCOL_X86_32:
-		return __CONST_RING_SIZE(blkif_x86_32, PAGE_SIZE) *
-			   BLKIF_MAX_SEGMENTS_PER_REQUEST;
-	case BLKIF_PROTOCOL_X86_64:
-		return __CONST_RING_SIZE(blkif_x86_64, PAGE_SIZE) *
-			   BLKIF_MAX_SEGMENTS_PER_REQUEST;
-	default:
-		BUG();
-	}
-	return 0;
-}
-
-
 /*
  * Little helpful macro to figure out the index and virtual address of the
  * pending_pages[..]. For each 'pending_req' we have have up to
@@ -239,13 +239,29 @@ static void make_response(struct xen_blkif *blkif, u64 id,
 	     (n) = (&(pos)->node != NULL) ? rb_next(&(pos)->node) : NULL)
 
 
-static int add_persistent_gnt(struct rb_root *root,
+/*
+ * We don't need locking around the persistent grant helpers
+ * because blkback uses a single-thread for each backed, so we
+ * can be sure that this functions will never be called recursively.
+ *
+ * The only exception to that is put_persistent_grant, that can be called
+ * from interrupt context (by xen_blkbk_unmap), so we have to use atomic
+ * bit operations to modify the flags of a persistent grant and to count
+ * the number of used grants.
+ */
+static int add_persistent_gnt(struct xen_blkif *blkif,
 			       struct persistent_gnt *persistent_gnt)
 {
-	struct rb_node **new = &(root->rb_node), *parent = NULL;
+	struct rb_node **new = NULL, *parent = NULL;
 	struct persistent_gnt *this;
 
+	if (blkif->persistent_gnt_c >= xen_blkif_max_pgrants) {
+		if (!blkif->vbd.overflow_max_grants)
+			blkif->vbd.overflow_max_grants = 1;
+		return -EBUSY;
+	}
 	/* Figure out where to put new node */
+	new = &blkif->persistent_gnts.rb_node;
 	while (*new) {
 		this = container_of(*new, struct persistent_gnt, node);
 
@@ -260,18 +276,23 @@ static int add_persistent_gnt(struct rb_root *root,
 		}
 	}
 
+	bitmap_zero(persistent_gnt->flags, PERSISTENT_GNT_FLAGS_SIZE);
+	set_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
 	/* Add new node and rebalance tree. */
 	rb_link_node(&(persistent_gnt->node), parent, new);
-	rb_insert_color(&(persistent_gnt->node), root);
+	rb_insert_color(&(persistent_gnt->node), &blkif->persistent_gnts);
+	blkif->persistent_gnt_c++;
+	atomic_inc(&blkif->persistent_gnt_in_use);
 	return 0;
 }
 
-static struct persistent_gnt *get_persistent_gnt(struct rb_root *root,
+static struct persistent_gnt *get_persistent_gnt(struct xen_blkif *blkif,
 						 grant_ref_t gref)
 {
 	struct persistent_gnt *data;
-	struct rb_node *node = root->rb_node;
+	struct rb_node *node = NULL;
 
+	node = blkif->persistent_gnts.rb_node;
 	while (node) {
 		data = container_of(node, struct persistent_gnt, node);
 
@@ -279,12 +300,29 @@ static struct persistent_gnt *get_persistent_gnt(struct rb_root *root,
 			node = node->rb_left;
 		else if (gref > data->gnt)
 			node = node->rb_right;
-		else
+		else {
+			if(test_bit(PERSISTENT_GNT_ACTIVE, data->flags)) {
+				pr_alert_ratelimited(DRV_PFX " requesting a grant already in use\n");
+				return NULL;
+			}
+			set_bit(PERSISTENT_GNT_ACTIVE, data->flags);
+			atomic_inc(&blkif->persistent_gnt_in_use);
 			return data;
+		}
 	}
 	return NULL;
 }
 
+static void put_persistent_gnt(struct xen_blkif *blkif,
+                               struct persistent_gnt *persistent_gnt)
+{
+	if(!test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags))
+	          pr_alert_ratelimited(DRV_PFX " freeing a grant already unused");
+	set_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags);
+	clear_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
+	atomic_dec(&blkif->persistent_gnt_in_use);
+}
+
 static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
                                  unsigned int num)
 {
@@ -322,6 +360,129 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
 	BUG_ON(num != 0);
 }
 
+static void unmap_purged_grants(struct work_struct *work)
+{
+	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct persistent_gnt *persistent_gnt;
+	int ret, segs_to_unmap = 0;
+	struct xen_blkif *blkif = container_of(work, typeof(*blkif), persistent_purge_work);
+
+	while(!list_empty(&blkif->persistent_purge_list)) {
+		persistent_gnt = list_first_entry(&blkif->persistent_purge_list,
+		                                  struct persistent_gnt,
+		                                  remove_node);
+		list_del(&persistent_gnt->remove_node);
+
+		gnttab_set_unmap_op(&unmap[segs_to_unmap],
+			vaddr(persistent_gnt->page),
+			GNTMAP_host_map,
+			persistent_gnt->handle);
+
+		pages[segs_to_unmap] = persistent_gnt->page;
+
+		if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
+			ret = gnttab_unmap_refs(unmap, NULL, pages,
+				segs_to_unmap);
+			BUG_ON(ret);
+			put_free_pages(blkif, pages, segs_to_unmap);
+			segs_to_unmap = 0;
+		}
+		kfree(persistent_gnt);
+	}
+	if (segs_to_unmap > 0) {
+		ret = gnttab_unmap_refs(unmap, NULL, pages, segs_to_unmap);
+		BUG_ON(ret);
+		put_free_pages(blkif, pages, segs_to_unmap);
+	}
+}
+
+static void purge_persistent_gnt(struct xen_blkif *blkif)
+{
+	struct persistent_gnt *persistent_gnt;
+	struct rb_node *n;
+	unsigned int num_clean, total;
+	bool scan_used = false;
+	struct rb_root *root;
+
+	if (blkif->persistent_gnt_c < xen_blkif_max_pgrants ||
+	    (blkif->persistent_gnt_c == xen_blkif_max_pgrants &&
+	    !blkif->vbd.overflow_max_grants)) {
+		return;
+	}
+
+	if (work_pending(&blkif->persistent_purge_work)) {
+		pr_alert_ratelimited(DRV_PFX "Scheduled work from previous purge is still pending, cannot purge list\n");
+		return;
+	}
+
+	num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN;
+	num_clean = blkif->persistent_gnt_c - xen_blkif_max_pgrants + num_clean;
+	num_clean = min(blkif->persistent_gnt_c, num_clean);
+	if (num_clean >
+	    (blkif->persistent_gnt_c -
+	    atomic_read(&blkif->persistent_gnt_in_use)))
+		return;
+
+	/*
+	 * At this point, we can assure that there will be no calls
+         * to get_persistent_grant (because we are executing this code from
+         * xen_blkif_schedule), there can only be calls to put_persistent_gnt,
+         * which means that the number of currently used grants will go down,
+         * but never up, so we will always be able to remove the requested
+         * number of grants.
+	 */
+
+	total = num_clean;
+
+	pr_debug(DRV_PFX "Going to purge %u persistent grants\n", num_clean);
+
+	INIT_LIST_HEAD(&blkif->persistent_purge_list);
+	root = &blkif->persistent_gnts;
+purge_list:
+	foreach_grant_safe(persistent_gnt, n, root, node) {
+		BUG_ON(persistent_gnt->handle ==
+			BLKBACK_INVALID_HANDLE);
+
+		if (test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags))
+			continue;
+		if (!scan_used &&
+		    (test_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags)))
+			continue;
+
+		rb_erase(&persistent_gnt->node, root);
+		list_add(&persistent_gnt->remove_node,
+		         &blkif->persistent_purge_list);
+		if (--num_clean == 0)
+			goto finished;
+	}
+	/*
+	 * If we get here it means we also need to start cleaning
+	 * grants that were used since last purge in order to cope
+	 * with the requested num
+	 */
+	if (!scan_used) {
+		pr_debug(DRV_PFX "Still missing %u purged frames\n", num_clean);
+		scan_used = true;
+		goto purge_list;
+	}
+finished:
+	/* Remove the "used" flag from all the persistent grants */
+	foreach_grant_safe(persistent_gnt, n, root, node) {
+		BUG_ON(persistent_gnt->handle ==
+			BLKBACK_INVALID_HANDLE);
+		clear_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags);
+	}
+	blkif->persistent_gnt_c -= (total - num_clean);
+	blkif->vbd.overflow_max_grants = 0;
+
+	/* We can defer this work */
+	INIT_WORK(&blkif->persistent_purge_work, unmap_purged_grants);
+	schedule_work(&blkif->persistent_purge_work);
+	pr_debug(DRV_PFX "Purged %u/%u\n", (total - num_clean), total);
+	return;
+}
+
 /*
  * Retrieve from the 'pending_reqs' a free pending_req structure to be used.
  */
@@ -453,12 +614,12 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
 static void print_stats(struct xen_blkif *blkif)
 {
 	pr_info("xen-blkback (%s): oo %3llu  |  rd %4llu  |  wr %4llu  |  f %4llu"
-		 "  |  ds %4llu | pg: %4u/%4u\n",
+		 "  |  ds %4llu | pg: %4u/%4d\n",
 		 current->comm, blkif->st_oo_req,
 		 blkif->st_rd_req, blkif->st_wr_req,
 		 blkif->st_f_req, blkif->st_ds_req,
 		 blkif->persistent_gnt_c,
-		 max_mapped_grant_pages(blkif->blk_protocol));
+		 xen_blkif_max_pgrants);
 	blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
 	blkif->st_rd_req = 0;
 	blkif->st_wr_req = 0;
@@ -470,6 +631,7 @@ int xen_blkif_schedule(void *arg)
 {
 	struct xen_blkif *blkif = arg;
 	struct xen_vbd *vbd = &blkif->vbd;
+	unsigned long timeout;
 
 	xen_blkif_get(blkif);
 
@@ -479,13 +641,21 @@ int xen_blkif_schedule(void *arg)
 		if (unlikely(vbd->size != vbd_sz(vbd)))
 			xen_vbd_resize(blkif);
 
-		wait_event_interruptible(
+		timeout = msecs_to_jiffies(LRU_INTERVAL);
+
+		timeout = wait_event_interruptible_timeout(
 			blkif->wq,
-			blkif->waiting_reqs || kthread_should_stop());
-		wait_event_interruptible(
+			blkif->waiting_reqs || kthread_should_stop(),
+			timeout);
+		if (timeout == 0)
+			goto purge_gnt_list;
+		timeout = wait_event_interruptible_timeout(
 			blkbk->pending_free_wq,
 			!list_empty(&blkbk->pending_free) ||
-			kthread_should_stop());
+			kthread_should_stop(),
+			timeout);
+		if (timeout == 0)
+			goto purge_gnt_list;
 
 		blkif->waiting_reqs = 0;
 		smp_mb(); /* clear flag *before* checking for work */
@@ -493,6 +663,13 @@ int xen_blkif_schedule(void *arg)
 		if (do_block_io_op(blkif))
 			blkif->waiting_reqs = 1;
 
+purge_gnt_list:
+		if (blkif->vbd.feature_gnt_persistent &&
+		    time_after(jiffies, blkif->next_lru)) {
+			purge_persistent_gnt(blkif);
+			blkif->next_lru = jiffies + msecs_to_jiffies(LRU_INTERVAL);
+		}
+
 		/* Shrink if we have more than xen_blkif_max_buffer_pages */
 		shrink_free_pagepool(blkif, xen_blkif_max_buffer_pages);
 
@@ -538,8 +715,10 @@ static void xen_blkbk_unmap(struct pending_req *req)
 	int ret;
 
 	for (i = 0; i < req->nr_pages; i++) {
-		if (!test_bit(i, req->unmap_seg))
+		if (req->persistent_gnts[i] != NULL) {
+			put_persistent_gnt(blkif, req->persistent_gnts[i]);
 			continue;
+		}
 		handle = pending_handle(req, i);
 		pages[invcount] = req->pages[i];
 		if (handle == BLKBACK_INVALID_HANDLE)
@@ -561,8 +740,8 @@ static int xen_blkbk_map(struct blkif_request *req,
 			 struct page *pages[])
 {
 	struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-	struct persistent_gnt *persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 	struct page *pages_to_gnt[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct persistent_gnt **persistent_gnts = pending_req->persistent_gnts;
 	struct persistent_gnt *persistent_gnt = NULL;
 	struct xen_blkif *blkif = pending_req->blkif;
 	phys_addr_t addr = 0;
@@ -574,9 +753,6 @@ static int xen_blkbk_map(struct blkif_request *req,
 
 	use_persistent_gnts = (blkif->vbd.feature_gnt_persistent);
 
-	BUG_ON(blkif->persistent_gnt_c >
-		   max_mapped_grant_pages(pending_req->blkif->blk_protocol));
-
 	/*
 	 * Fill out preq.nr_sects with proper amount of sectors, and setup
 	 * assign map[..] with the PFN of the page in our domain with the
@@ -587,7 +763,7 @@ static int xen_blkbk_map(struct blkif_request *req,
 
 		if (use_persistent_gnts)
 			persistent_gnt = get_persistent_gnt(
-				&blkif->persistent_gnts,
+				blkif,
 				req->u.rw.seg[i].gref);
 
 		if (persistent_gnt) {
@@ -623,7 +799,6 @@ static int xen_blkbk_map(struct blkif_request *req,
 	 * so that when we access vaddr(pending_req,i) it has the contents of
 	 * the page from the other domain.
 	 */
-	bitmap_zero(pending_req->unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
 	for (seg_idx = 0, new_map_idx = 0; seg_idx < nseg; seg_idx++) {
 		if (!persistent_gnts[seg_idx]) {
 			/* This is a newly mapped grant */
@@ -646,11 +821,10 @@ static int xen_blkbk_map(struct blkif_request *req,
 			goto next;
 		}
 		if (use_persistent_gnts &&
-		    blkif->persistent_gnt_c <
-		    max_mapped_grant_pages(blkif->blk_protocol)) {
+		    blkif->persistent_gnt_c < xen_blkif_max_pgrants) {
 			/*
 			 * We are using persistent grants, the grant is
-			 * not mapped but we have room for it
+			 * not mapped but we might have room for it.
 			 */
 			persistent_gnt = kmalloc(sizeof(struct persistent_gnt),
 				                 GFP_KERNEL);
@@ -665,16 +839,16 @@ static int xen_blkbk_map(struct blkif_request *req,
 			persistent_gnt->gnt = map[new_map_idx].ref;
 			persistent_gnt->handle = map[new_map_idx].handle;
 			persistent_gnt->page = pages[seg_idx];
-			if (add_persistent_gnt(&blkif->persistent_gnts,
+			if (add_persistent_gnt(blkif,
 			                       persistent_gnt)) {
 				kfree(persistent_gnt);
 				persistent_gnt = NULL;
 				goto next_unmap;
 			}
-			blkif->persistent_gnt_c++;
+			persistent_gnts[seg_idx] = persistent_gnt;
 			pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n",
 				 persistent_gnt->gnt, blkif->persistent_gnt_c,
-				 max_mapped_grant_pages(blkif->blk_protocol));
+				 xen_blkif_max_pgrants);
 			new_map_idx++;
 			goto next;
 		}
@@ -688,7 +862,6 @@ static int xen_blkbk_map(struct blkif_request *req,
 		 * We could not map this grant persistently, so use it as
 		 * a non-persistent grant.
 		 */
-		bitmap_set(pending_req->unmap_seg, seg_idx, 1);
 		new_map_idx++;
 next:
 		seg[seg_idx].offset = (req->u.rw.seg[seg_idx].first_sect << 9);
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index 6c73c3855e65..af9bed48f773 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -182,12 +182,23 @@ struct xen_vbd {
 
 struct backend_info;
 
+/* Number of available flags */
+#define PERSISTENT_GNT_FLAGS_SIZE	2
+/* This persistent grant is currently in use */
+#define PERSISTENT_GNT_ACTIVE		0
+/*
+ * This persistent grant has been used, this flag is set when we remove the
+ * PERSISTENT_GNT_ACTIVE, to know that this grant has been used recently.
+ */
+#define PERSISTENT_GNT_WAS_ACTIVE	1
 
 struct persistent_gnt {
 	struct page *page;
 	grant_ref_t gnt;
 	grant_handle_t handle;
+	DECLARE_BITMAP(flags, PERSISTENT_GNT_FLAGS_SIZE);
 	struct rb_node node;
+	struct list_head remove_node;
 };
 
 struct xen_blkif {
@@ -219,6 +230,12 @@ struct xen_blkif {
 	/* tree to store persistent grants */
 	struct rb_root		persistent_gnts;
 	unsigned int		persistent_gnt_c;
+	atomic_t		persistent_gnt_in_use;
+	unsigned long           next_lru;
+
+	/* used by the kworker that offload work from the persistent purge */
+	struct list_head	persistent_purge_list;
+	struct work_struct	persistent_purge_work;
 
 	/* buffer of free pages to map grant refs */
 	spinlock_t		free_pages_lock;
@@ -262,6 +279,7 @@ int xen_blkif_xenbus_init(void);
 
 irqreturn_t xen_blkif_be_int(int irq, void *dev_id);
 int xen_blkif_schedule(void *arg);
+int xen_blkif_purge_persistent(void *arg);
 
 int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
 			      struct backend_info *be, int state);
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 24f7f6d87717..e0fd92a2a4cd 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -98,6 +98,7 @@ static void xen_update_blkif_status(struct xen_blkif *blkif)
 		err = PTR_ERR(blkif->xenblkd);
 		blkif->xenblkd = NULL;
 		xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
+		return;
 	}
 }
 
@@ -121,6 +122,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
 	spin_lock_init(&blkif->free_pages_lock);
 	INIT_LIST_HEAD(&blkif->free_pages);
 	blkif->free_pages_num = 0;
+	atomic_set(&blkif->persistent_gnt_in_use, 0);
 
 	return blkif;
 }

From bb6acb289fbaac0e99eb552abdefc80a2186ef3f Mon Sep 17 00:00:00 2001
From: Roger Pau Monne <roger.pau@citrix.com>
Date: Wed, 17 Apr 2013 20:18:58 +0200
Subject: [PATCH 004/320] xen-blkback: move pending handles list from blkbk to
 pending_req
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Moving grant ref handles from blkbk to pending_req will allow us to
get rid of the shared blkbk structure.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: xen-devel@lists.xen.org
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/block/xen-blkback/blkback.c | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 17052f74ebe5..ae7dc92ad3cf 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -128,6 +128,7 @@ struct pending_req {
 	struct list_head	free_list;
 	struct page		*pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 	struct persistent_gnt	*persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	grant_handle_t		grant_handles[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 };
 
 #define BLKBACK_INVALID_HANDLE (~0)
@@ -142,8 +143,6 @@ struct xen_blkbk {
 	/* And its spinlock. */
 	spinlock_t		pending_free_lock;
 	wait_queue_head_t	pending_free_wq;
-	/* And the grant handles that are available. */
-	grant_handle_t		*pending_grant_handles;
 };
 
 static struct xen_blkbk *blkbk;
@@ -221,7 +220,7 @@ static inline void shrink_free_pagepool(struct xen_blkif *blkif, int num)
 #define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page)))
 
 #define pending_handle(_req, _seg) \
-	(blkbk->pending_grant_handles[vaddr_pagenr(_req, _seg)])
+	(_req->grant_handles[_seg])
 
 
 static int do_block_io_op(struct xen_blkif *blkif);
@@ -1304,7 +1303,7 @@ static void make_response(struct xen_blkif *blkif, u64 id,
 
 static int __init xen_blkif_init(void)
 {
-	int i, mmap_pages;
+	int i;
 	int rc = 0;
 
 	if (!xen_domain())
@@ -1316,21 +1315,15 @@ static int __init xen_blkif_init(void)
 		return -ENOMEM;
 	}
 
-	mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
 
 	blkbk->pending_reqs          = kzalloc(sizeof(blkbk->pending_reqs[0]) *
 					xen_blkif_reqs, GFP_KERNEL);
-	blkbk->pending_grant_handles = kmalloc(sizeof(blkbk->pending_grant_handles[0]) *
-					mmap_pages, GFP_KERNEL);
 
-	if (!blkbk->pending_reqs || !blkbk->pending_grant_handles) {
+	if (!blkbk->pending_reqs) {
 		rc = -ENOMEM;
 		goto out_of_memory;
 	}
 
-	for (i = 0; i < mmap_pages; i++) {
-		blkbk->pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
-	}
 	rc = xen_blkif_interface_init();
 	if (rc)
 		goto failed_init;
@@ -1353,7 +1346,6 @@ static int __init xen_blkif_init(void)
 	pr_alert(DRV_PFX "%s: out of memory\n", __func__);
  failed_init:
 	kfree(blkbk->pending_reqs);
-	kfree(blkbk->pending_grant_handles);
 	kfree(blkbk);
 	blkbk = NULL;
 	return rc;

From bf0720c48c7cefd127ed2329e6d0e40b39fa4d0e Mon Sep 17 00:00:00 2001
From: Roger Pau Monne <roger.pau@citrix.com>
Date: Wed, 17 Apr 2013 20:18:59 +0200
Subject: [PATCH 005/320] xen-blkback: make the queue of free requests per
 backend
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove the last dependency from blkbk by moving the list of free
requests to blkif. This change reduces the contention on the list of
available requests.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: xen-devel@lists.xen.org
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/block/xen-blkback/blkback.c | 123 ++++------------------------
 drivers/block/xen-blkback/common.h  |  30 +++++++
 drivers/block/xen-blkback/xenbus.c  |  26 ++++++
 3 files changed, 74 insertions(+), 105 deletions(-)

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index ae7dc92ad3cf..90a57552f4b7 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -49,20 +49,6 @@
 #include <xen/balloon.h>
 #include "common.h"
 
-/*
- * These are rather arbitrary. They are fairly large because adjacent requests
- * pulled from a communication ring are quite likely to end up being part of
- * the same scatter/gather request at the disc.
- *
- * ** TRY INCREASING 'xen_blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW **
- *
- * This will increase the chances of being able to write whole tracks.
- * 64 should be enough to keep us competitive with Linux.
- */
-static int xen_blkif_reqs = 64;
-module_param_named(reqs, xen_blkif_reqs, int, 0);
-MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");
-
 /*
  * Maximum number of unused free pages to keep in the internal buffer.
  * Setting this to a value too low will reduce memory used in each backend,
@@ -112,53 +98,11 @@ MODULE_PARM_DESC(max_persistent_grants,
 static unsigned int log_stats;
 module_param(log_stats, int, 0644);
 
-/*
- * Each outstanding request that we've passed to the lower device layers has a
- * 'pending_req' allocated to it. Each buffer_head that completes decrements
- * the pendcnt towards zero. When it hits zero, the specified domain has a
- * response queued for it, with the saved 'id' passed back.
- */
-struct pending_req {
-	struct xen_blkif	*blkif;
-	u64			id;
-	int			nr_pages;
-	atomic_t		pendcnt;
-	unsigned short		operation;
-	int			status;
-	struct list_head	free_list;
-	struct page		*pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-	struct persistent_gnt	*persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-	grant_handle_t		grant_handles[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-};
-
 #define BLKBACK_INVALID_HANDLE (~0)
 
 /* Number of free pages to remove on each call to free_xenballooned_pages */
 #define NUM_BATCH_FREE_PAGES 10
 
-struct xen_blkbk {
-	struct pending_req	*pending_reqs;
-	/* List of all 'pending_req' available */
-	struct list_head	pending_free;
-	/* And its spinlock. */
-	spinlock_t		pending_free_lock;
-	wait_queue_head_t	pending_free_wq;
-};
-
-static struct xen_blkbk *blkbk;
-
-/*
- * Little helpful macro to figure out the index and virtual address of the
- * pending_pages[..]. For each 'pending_req' we have have up to
- * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through
- * 10 and would index in the pending_pages[..].
- */
-static inline int vaddr_pagenr(struct pending_req *req, int seg)
-{
-	return (req - blkbk->pending_reqs) *
-		BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
-}
-
 static inline int get_free_page(struct xen_blkif *blkif, struct page **page)
 {
 	unsigned long flags;
@@ -485,18 +429,18 @@ static void purge_persistent_gnt(struct xen_blkif *blkif)
 /*
  * Retrieve from the 'pending_reqs' a free pending_req structure to be used.
  */
-static struct pending_req *alloc_req(void)
+static struct pending_req *alloc_req(struct xen_blkif *blkif)
 {
 	struct pending_req *req = NULL;
 	unsigned long flags;
 
-	spin_lock_irqsave(&blkbk->pending_free_lock, flags);
-	if (!list_empty(&blkbk->pending_free)) {
-		req = list_entry(blkbk->pending_free.next, struct pending_req,
+	spin_lock_irqsave(&blkif->pending_free_lock, flags);
+	if (!list_empty(&blkif->pending_free)) {
+		req = list_entry(blkif->pending_free.next, struct pending_req,
 				 free_list);
 		list_del(&req->free_list);
 	}
-	spin_unlock_irqrestore(&blkbk->pending_free_lock, flags);
+	spin_unlock_irqrestore(&blkif->pending_free_lock, flags);
 	return req;
 }
 
@@ -504,17 +448,17 @@ static struct pending_req *alloc_req(void)
  * Return the 'pending_req' structure back to the freepool. We also
  * wake up the thread if it was waiting for a free page.
  */
-static void free_req(struct pending_req *req)
+static void free_req(struct xen_blkif *blkif, struct pending_req *req)
 {
 	unsigned long flags;
 	int was_empty;
 
-	spin_lock_irqsave(&blkbk->pending_free_lock, flags);
-	was_empty = list_empty(&blkbk->pending_free);
-	list_add(&req->free_list, &blkbk->pending_free);
-	spin_unlock_irqrestore(&blkbk->pending_free_lock, flags);
+	spin_lock_irqsave(&blkif->pending_free_lock, flags);
+	was_empty = list_empty(&blkif->pending_free);
+	list_add(&req->free_list, &blkif->pending_free);
+	spin_unlock_irqrestore(&blkif->pending_free_lock, flags);
 	if (was_empty)
-		wake_up(&blkbk->pending_free_wq);
+		wake_up(&blkif->pending_free_wq);
 }
 
 /*
@@ -649,8 +593,8 @@ int xen_blkif_schedule(void *arg)
 		if (timeout == 0)
 			goto purge_gnt_list;
 		timeout = wait_event_interruptible_timeout(
-			blkbk->pending_free_wq,
-			!list_empty(&blkbk->pending_free) ||
+			blkif->pending_free_wq,
+			!list_empty(&blkif->pending_free) ||
 			kthread_should_stop(),
 			timeout);
 		if (timeout == 0)
@@ -907,7 +851,7 @@ static int dispatch_other_io(struct xen_blkif *blkif,
 			     struct blkif_request *req,
 			     struct pending_req *pending_req)
 {
-	free_req(pending_req);
+	free_req(blkif, pending_req);
 	make_response(blkif, req->u.other.id, req->operation,
 		      BLKIF_RSP_EOPNOTSUPP);
 	return -EIO;
@@ -967,7 +911,7 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
 			if (atomic_read(&pending_req->blkif->drain))
 				complete(&pending_req->blkif->drain_complete);
 		}
-		free_req(pending_req);
+		free_req(pending_req->blkif, pending_req);
 	}
 }
 
@@ -1010,7 +954,7 @@ __do_block_io_op(struct xen_blkif *blkif)
 			break;
 		}
 
-		pending_req = alloc_req();
+		pending_req = alloc_req(blkif);
 		if (NULL == pending_req) {
 			blkif->st_oo_req++;
 			more_to_do = 1;
@@ -1044,7 +988,7 @@ __do_block_io_op(struct xen_blkif *blkif)
 				goto done;
 			break;
 		case BLKIF_OP_DISCARD:
-			free_req(pending_req);
+			free_req(blkif, pending_req);
 			if (dispatch_discard_io(blkif, &req))
 				goto done;
 			break;
@@ -1246,7 +1190,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
  fail_response:
 	/* Haven't submitted any bio's yet. */
 	make_response(blkif, req->u.rw.id, req->operation, BLKIF_RSP_ERROR);
-	free_req(pending_req);
+	free_req(blkif, pending_req);
 	msleep(1); /* back off a bit */
 	return -EIO;
 
@@ -1303,51 +1247,20 @@ static void make_response(struct xen_blkif *blkif, u64 id,
 
 static int __init xen_blkif_init(void)
 {
-	int i;
 	int rc = 0;
 
 	if (!xen_domain())
 		return -ENODEV;
 
-	blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL);
-	if (!blkbk) {
-		pr_alert(DRV_PFX "%s: out of memory!\n", __func__);
-		return -ENOMEM;
-	}
-
-
-	blkbk->pending_reqs          = kzalloc(sizeof(blkbk->pending_reqs[0]) *
-					xen_blkif_reqs, GFP_KERNEL);
-
-	if (!blkbk->pending_reqs) {
-		rc = -ENOMEM;
-		goto out_of_memory;
-	}
-
 	rc = xen_blkif_interface_init();
 	if (rc)
 		goto failed_init;
 
-	INIT_LIST_HEAD(&blkbk->pending_free);
-	spin_lock_init(&blkbk->pending_free_lock);
-	init_waitqueue_head(&blkbk->pending_free_wq);
-
-	for (i = 0; i < xen_blkif_reqs; i++)
-		list_add_tail(&blkbk->pending_reqs[i].free_list,
-			      &blkbk->pending_free);
-
 	rc = xen_blkif_xenbus_init();
 	if (rc)
 		goto failed_init;
 
-	return 0;
-
- out_of_memory:
-	pr_alert(DRV_PFX "%s: out of memory\n", __func__);
  failed_init:
-	kfree(blkbk->pending_reqs);
-	kfree(blkbk);
-	blkbk = NULL;
 	return rc;
 }
 
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index af9bed48f773..e33fafa0facd 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -192,6 +192,9 @@ struct backend_info;
  */
 #define PERSISTENT_GNT_WAS_ACTIVE	1
 
+/* Number of requests that we can fit in a ring */
+#define XEN_BLKIF_REQS			32
+
 struct persistent_gnt {
 	struct page *page;
 	grant_ref_t gnt;
@@ -242,6 +245,14 @@ struct xen_blkif {
 	int			free_pages_num;
 	struct list_head	free_pages;
 
+	/* Allocation of pending_reqs */
+	struct pending_req	*pending_reqs;
+	/* List of all 'pending_req' available */
+	struct list_head	pending_free;
+	/* And its spinlock. */
+	spinlock_t		pending_free_lock;
+	wait_queue_head_t	pending_free_wq;
+
 	/* statistics */
 	unsigned long		st_print;
 	unsigned long long			st_rd_req;
@@ -255,6 +266,25 @@ struct xen_blkif {
 	wait_queue_head_t	waiting_to_free;
 };
 
+/*
+ * Each outstanding request that we've passed to the lower device layers has a
+ * 'pending_req' allocated to it. Each buffer_head that completes decrements
+ * the pendcnt towards zero. When it hits zero, the specified domain has a
+ * response queued for it, with the saved 'id' passed back.
+ */
+struct pending_req {
+	struct xen_blkif	*blkif;
+	u64			id;
+	int			nr_pages;
+	atomic_t		pendcnt;
+	unsigned short		operation;
+	int			status;
+	struct list_head	free_list;
+	struct page		*pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct persistent_gnt	*persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	grant_handle_t		grant_handles[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
+
 
 #define vbd_sz(_v)	((_v)->bdev->bd_part ? \
 			 (_v)->bdev->bd_part->nr_sects : \
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index e0fd92a2a4cd..1f1ade6d6e09 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -105,6 +105,7 @@ static void xen_update_blkif_status(struct xen_blkif *blkif)
 static struct xen_blkif *xen_blkif_alloc(domid_t domid)
 {
 	struct xen_blkif *blkif;
+	int i;
 
 	blkif = kmem_cache_zalloc(xen_blkif_cachep, GFP_KERNEL);
 	if (!blkif)
@@ -124,6 +125,21 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
 	blkif->free_pages_num = 0;
 	atomic_set(&blkif->persistent_gnt_in_use, 0);
 
+	blkif->pending_reqs = kcalloc(XEN_BLKIF_REQS,
+	                              sizeof(blkif->pending_reqs[0]),
+	                              GFP_KERNEL);
+	if (!blkif->pending_reqs) {
+		kmem_cache_free(xen_blkif_cachep, blkif);
+		return ERR_PTR(-ENOMEM);
+	}
+	INIT_LIST_HEAD(&blkif->pending_free);
+	spin_lock_init(&blkif->pending_free_lock);
+	init_waitqueue_head(&blkif->pending_free_wq);
+
+	for (i = 0; i < XEN_BLKIF_REQS; i++)
+		list_add_tail(&blkif->pending_reqs[i].free_list,
+			      &blkif->pending_free);
+
 	return blkif;
 }
 
@@ -203,8 +219,18 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif)
 
 static void xen_blkif_free(struct xen_blkif *blkif)
 {
+	struct pending_req *req;
+	int i = 0;
+
 	if (!atomic_dec_and_test(&blkif->refcnt))
 		BUG();
+
+	/* Check that there is no request in use */
+	list_for_each_entry(req, &blkif->pending_free, free_list)
+		i++;
+	BUG_ON(i != XEN_BLKIF_REQS);
+
+	kfree(blkif->pending_reqs);
 	kmem_cache_free(xen_blkif_cachep, blkif);
 }
 

From 31552ee32df89f97a61766cee51b8dabb1ae3f4f Mon Sep 17 00:00:00 2001
From: Roger Pau Monne <roger.pau@citrix.com>
Date: Wed, 17 Apr 2013 20:19:00 +0200
Subject: [PATCH 006/320] xen-blkback: expand map/unmap functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Preparatory change for implementing indirect descriptors. Change
xen_blkbk_{map/unmap} in order to be able to map/unmap a random amount
of grants (previously it was limited to
BLKIF_MAX_SEGMENTS_PER_REQUEST). Also, remove the usage of pending_req
in the map/unmap functions, so we can map/unmap grants without needing
to pass a pending_req.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: xen-devel@lists.xen.org
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/block/xen-blkback/blkback.c | 141 +++++++++++++++++-----------
 1 file changed, 86 insertions(+), 55 deletions(-)

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 90a57552f4b7..356722f65f88 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -163,10 +163,6 @@ static inline void shrink_free_pagepool(struct xen_blkif *blkif, int num)
 
 #define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page)))
 
-#define pending_handle(_req, _seg) \
-	(_req->grant_handles[_seg])
-
-
 static int do_block_io_op(struct xen_blkif *blkif);
 static int dispatch_rw_block_io(struct xen_blkif *blkif,
 				struct blkif_request *req,
@@ -648,50 +644,57 @@ struct seg_buf {
  * Unmap the grant references, and also remove the M2P over-rides
  * used in the 'pending_req'.
  */
-static void xen_blkbk_unmap(struct pending_req *req)
+static void xen_blkbk_unmap(struct xen_blkif *blkif,
+                            grant_handle_t handles[],
+                            struct page *pages[],
+                            struct persistent_gnt *persistent_gnts[],
+                            int num)
 {
 	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-	struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct page *unmap_pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 	unsigned int i, invcount = 0;
-	grant_handle_t handle;
-	struct xen_blkif *blkif = req->blkif;
 	int ret;
 
-	for (i = 0; i < req->nr_pages; i++) {
-		if (req->persistent_gnts[i] != NULL) {
-			put_persistent_gnt(blkif, req->persistent_gnts[i]);
+	for (i = 0; i < num; i++) {
+		if (persistent_gnts[i] != NULL) {
+			put_persistent_gnt(blkif, persistent_gnts[i]);
 			continue;
 		}
-		handle = pending_handle(req, i);
-		pages[invcount] = req->pages[i];
-		if (handle == BLKBACK_INVALID_HANDLE)
+		if (handles[i] == BLKBACK_INVALID_HANDLE)
 			continue;
-		gnttab_set_unmap_op(&unmap[invcount], vaddr(pages[invcount]),
-				    GNTMAP_host_map, handle);
-		pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
-		invcount++;
+		unmap_pages[invcount] = pages[i];
+		gnttab_set_unmap_op(&unmap[invcount], vaddr(pages[i]),
+				    GNTMAP_host_map, handles[i]);
+		handles[i] = BLKBACK_INVALID_HANDLE;
+		if (++invcount == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
+			ret = gnttab_unmap_refs(unmap, NULL, unmap_pages,
+			                        invcount);
+			BUG_ON(ret);
+			put_free_pages(blkif, unmap_pages, invcount);
+			invcount = 0;
+		}
+	}
+	if (invcount) {
+		ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount);
+		BUG_ON(ret);
+		put_free_pages(blkif, unmap_pages, invcount);
 	}
-
-	ret = gnttab_unmap_refs(unmap, NULL, pages, invcount);
-	BUG_ON(ret);
-	put_free_pages(blkif, pages, invcount);
 }
 
-static int xen_blkbk_map(struct blkif_request *req,
-			 struct pending_req *pending_req,
-			 struct seg_buf seg[],
-			 struct page *pages[])
+static int xen_blkbk_map(struct xen_blkif *blkif, grant_ref_t grefs[],
+			 struct persistent_gnt *persistent_gnts[],
+			 grant_handle_t handles[],
+			 struct page *pages[],
+			 int num, bool ro)
 {
 	struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 	struct page *pages_to_gnt[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-	struct persistent_gnt **persistent_gnts = pending_req->persistent_gnts;
 	struct persistent_gnt *persistent_gnt = NULL;
-	struct xen_blkif *blkif = pending_req->blkif;
 	phys_addr_t addr = 0;
 	int i, seg_idx, new_map_idx;
-	int nseg = req->u.rw.nr_segments;
 	int segs_to_map = 0;
 	int ret = 0;
+	int last_map = 0, map_until = 0;
 	int use_persistent_gnts;
 
 	use_persistent_gnts = (blkif->vbd.feature_gnt_persistent);
@@ -701,13 +704,14 @@ static int xen_blkbk_map(struct blkif_request *req,
 	 * assign map[..] with the PFN of the page in our domain with the
 	 * corresponding grant reference for each page.
 	 */
-	for (i = 0; i < nseg; i++) {
+again:
+	for (i = map_until; i < num; i++) {
 		uint32_t flags;
 
 		if (use_persistent_gnts)
 			persistent_gnt = get_persistent_gnt(
 				blkif,
-				req->u.rw.seg[i].gref);
+				grefs[i]);
 
 		if (persistent_gnt) {
 			/*
@@ -723,13 +727,15 @@ static int xen_blkbk_map(struct blkif_request *req,
 			pages_to_gnt[segs_to_map] = pages[i];
 			persistent_gnts[i] = NULL;
 			flags = GNTMAP_host_map;
-			if (!use_persistent_gnts &&
-			    (pending_req->operation != BLKIF_OP_READ))
+			if (!use_persistent_gnts && ro)
 				flags |= GNTMAP_readonly;
 			gnttab_set_map_op(&map[segs_to_map++], addr,
-					  flags, req->u.rw.seg[i].gref,
+					  flags, grefs[i],
 					  blkif->domid);
 		}
+		map_until = i + 1;
+		if (segs_to_map == BLKIF_MAX_SEGMENTS_PER_REQUEST)
+			break;
 	}
 
 	if (segs_to_map) {
@@ -742,26 +748,19 @@ static int xen_blkbk_map(struct blkif_request *req,
 	 * so that when we access vaddr(pending_req,i) it has the contents of
 	 * the page from the other domain.
 	 */
-	for (seg_idx = 0, new_map_idx = 0; seg_idx < nseg; seg_idx++) {
+	for (seg_idx = last_map, new_map_idx = 0; seg_idx < map_until; seg_idx++) {
 		if (!persistent_gnts[seg_idx]) {
 			/* This is a newly mapped grant */
 			BUG_ON(new_map_idx >= segs_to_map);
 			if (unlikely(map[new_map_idx].status != 0)) {
 				pr_debug(DRV_PFX "invalid buffer -- could not remap it\n");
-				pending_handle(pending_req, seg_idx) = BLKBACK_INVALID_HANDLE;
+				handles[seg_idx] = BLKBACK_INVALID_HANDLE;
 				ret |= 1;
-				new_map_idx++;
-				/*
-				 * No need to set unmap_seg bit, since
-				 * we can not unmap this grant because
-				 * the handle is invalid.
-				 */
-				continue;
+				goto next;
 			}
-			pending_handle(pending_req, seg_idx) = map[new_map_idx].handle;
+			handles[seg_idx] = map[new_map_idx].handle;
 		} else {
-			/* This grant is persistent and already mapped */
-			goto next;
+			continue;
 		}
 		if (use_persistent_gnts &&
 		    blkif->persistent_gnt_c < xen_blkif_max_pgrants) {
@@ -777,7 +776,7 @@ static int xen_blkbk_map(struct blkif_request *req,
 				 * allocate the persistent_gnt struct
 				 * map this grant non-persistenly
 				 */
-				goto next_unmap;
+				goto next;
 			}
 			persistent_gnt->gnt = map[new_map_idx].ref;
 			persistent_gnt->handle = map[new_map_idx].handle;
@@ -786,13 +785,12 @@ static int xen_blkbk_map(struct blkif_request *req,
 			                       persistent_gnt)) {
 				kfree(persistent_gnt);
 				persistent_gnt = NULL;
-				goto next_unmap;
+				goto next;
 			}
 			persistent_gnts[seg_idx] = persistent_gnt;
 			pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n",
 				 persistent_gnt->gnt, blkif->persistent_gnt_c,
 				 xen_blkif_max_pgrants);
-			new_map_idx++;
 			goto next;
 		}
 		if (use_persistent_gnts && !blkif->vbd.overflow_max_grants) {
@@ -800,15 +798,18 @@ static int xen_blkbk_map(struct blkif_request *req,
 			pr_debug(DRV_PFX " domain %u, device %#x is using maximum number of persistent grants\n",
 			         blkif->domid, blkif->vbd.handle);
 		}
-next_unmap:
 		/*
 		 * We could not map this grant persistently, so use it as
 		 * a non-persistent grant.
 		 */
-		new_map_idx++;
 next:
-		seg[seg_idx].offset = (req->u.rw.seg[seg_idx].first_sect << 9);
+		new_map_idx++;
 	}
+	segs_to_map = 0;
+	last_map = map_until;
+	if (map_until != num)
+		goto again;
+
 	return ret;
 
 out_of_memory:
@@ -817,6 +818,31 @@ static int xen_blkbk_map(struct blkif_request *req,
 	return -ENOMEM;
 }
 
+static int xen_blkbk_map_seg(struct blkif_request *req,
+			     struct pending_req *pending_req,
+			     struct seg_buf seg[],
+			     struct page *pages[])
+{
+	int i, rc;
+	grant_ref_t grefs[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+
+	for (i = 0; i < req->u.rw.nr_segments; i++)
+		grefs[i] = req->u.rw.seg[i].gref;
+
+	rc = xen_blkbk_map(pending_req->blkif, grefs,
+	                   pending_req->persistent_gnts,
+	                   pending_req->grant_handles, pending_req->pages,
+	                   req->u.rw.nr_segments,
+	                   (pending_req->operation != BLKIF_OP_READ));
+	if (rc)
+		return rc;
+
+	for (i = 0; i < req->u.rw.nr_segments; i++)
+		seg[i].offset = (req->u.rw.seg[i].first_sect << 9);
+
+	return 0;
+}
+
 static int dispatch_discard_io(struct xen_blkif *blkif,
 				struct blkif_request *req)
 {
@@ -903,7 +929,10 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
 	 * the proper response on the ring.
 	 */
 	if (atomic_dec_and_test(&pending_req->pendcnt)) {
-		xen_blkbk_unmap(pending_req);
+		xen_blkbk_unmap(pending_req->blkif, pending_req->grant_handles,
+		                pending_req->pages,
+		                pending_req->persistent_gnts,
+		                pending_req->nr_pages);
 		make_response(pending_req->blkif, pending_req->id,
 			      pending_req->operation, pending_req->status);
 		xen_blkif_put(pending_req->blkif);
@@ -1125,7 +1154,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 	 * the hypercall to unmap the grants - that is all done in
 	 * xen_blkbk_unmap.
 	 */
-	if (xen_blkbk_map(req, pending_req, seg, pages))
+	if (xen_blkbk_map_seg(req, pending_req, seg, pages))
 		goto fail_flush;
 
 	/*
@@ -1186,7 +1215,9 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 	return 0;
 
  fail_flush:
-	xen_blkbk_unmap(pending_req);
+	xen_blkbk_unmap(blkif, pending_req->grant_handles,
+	                pending_req->pages, pending_req->persistent_gnts,
+	                pending_req->nr_pages);
  fail_response:
 	/* Haven't submitted any bio's yet. */
 	make_response(blkif, req->u.rw.id, req->operation, BLKIF_RSP_ERROR);

From 402b27f9f2c22309d5bb285628765bc27b82fcf5 Mon Sep 17 00:00:00 2001
From: Roger Pau Monne <roger.pau@citrix.com>
Date: Thu, 18 Apr 2013 16:06:54 +0200
Subject: [PATCH 007/320] xen-block: implement indirect descriptors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Indirect descriptors introduce a new block operation
(BLKIF_OP_INDIRECT) that passes grant references instead of segments
in the request. This grant references are filled with arrays of
blkif_request_segment_aligned, this way we can send more segments in a
request.

The proposed implementation sets the maximum number of indirect grefs
(frames filled with blkif_request_segment_aligned) to 256 in the
backend and 32 in the frontend. The value in the frontend has been
chosen experimentally, and the backend value has been set to a sane
value that allows expanding the maximum number of indirect descriptors
in the frontend if needed.

The migration code has changed from the previous implementation, in
which we simply remapped the segments on the shared ring. Now the
maximum number of segments allowed in a request can change depending
on the backend, so we have to requeue all the requests in the ring and
in the queue and split the bios in them if they are bigger than the
new maximum number of segments.

[v2: Fixed minor comments by Konrad.
[v1: Added padding to make the indirect request 64bit aligned.
 Added some BUGs, comments; fixed number of indirect pages in
 blkif_get_x86_{32/64}_req. Added description about the indirect operation
 in blkif.h]
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
[v3: Fixed spaces and tabs mix ups]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/block/xen-blkback/blkback.c | 132 ++++++--
 drivers/block/xen-blkback/common.h  |  98 +++++-
 drivers/block/xen-blkback/xenbus.c  |   7 +
 drivers/block/xen-blkfront.c        | 490 +++++++++++++++++++++++-----
 include/xen/interface/io/blkif.h    |  53 +++
 5 files changed, 656 insertions(+), 124 deletions(-)

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 356722f65f88..1ebc0aa0f0e4 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -59,7 +59,7 @@
  * IO workloads.
  */
 
-static int xen_blkif_max_buffer_pages = 704;
+static int xen_blkif_max_buffer_pages = 1024;
 module_param_named(max_buffer_pages, xen_blkif_max_buffer_pages, int, 0644);
 MODULE_PARM_DESC(max_buffer_pages,
 "Maximum number of free pages to keep in each block backend buffer");
@@ -75,7 +75,7 @@ MODULE_PARM_DESC(max_buffer_pages,
  * algorithm.
  */
 
-static int xen_blkif_max_pgrants = 352;
+static int xen_blkif_max_pgrants = 1056;
 module_param_named(max_persistent_grants, xen_blkif_max_pgrants, int, 0644);
 MODULE_PARM_DESC(max_persistent_grants,
                  "Maximum number of grants to map persistently");
@@ -636,10 +636,6 @@ int xen_blkif_schedule(void *arg)
 	return 0;
 }
 
-struct seg_buf {
-	unsigned int offset;
-	unsigned int nsec;
-};
 /*
  * Unmap the grant references, and also remove the M2P over-rides
  * used in the 'pending_req'.
@@ -818,29 +814,69 @@ static int xen_blkbk_map(struct xen_blkif *blkif, grant_ref_t grefs[],
 	return -ENOMEM;
 }
 
-static int xen_blkbk_map_seg(struct blkif_request *req,
-			     struct pending_req *pending_req,
+static int xen_blkbk_map_seg(struct pending_req *pending_req,
 			     struct seg_buf seg[],
 			     struct page *pages[])
 {
-	int i, rc;
-	grant_ref_t grefs[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	int rc;
 
-	for (i = 0; i < req->u.rw.nr_segments; i++)
-		grefs[i] = req->u.rw.seg[i].gref;
-
-	rc = xen_blkbk_map(pending_req->blkif, grefs,
+	rc = xen_blkbk_map(pending_req->blkif, pending_req->grefs,
 	                   pending_req->persistent_gnts,
 	                   pending_req->grant_handles, pending_req->pages,
-	                   req->u.rw.nr_segments,
+			   pending_req->nr_pages,
 	                   (pending_req->operation != BLKIF_OP_READ));
+
+	return rc;
+}
+
+static int xen_blkbk_parse_indirect(struct blkif_request *req,
+				    struct pending_req *pending_req,
+				    struct seg_buf seg[],
+				    struct phys_req *preq)
+{
+	struct persistent_gnt **persistent =
+		pending_req->indirect_persistent_gnts;
+	struct page **pages = pending_req->indirect_pages;
+	struct xen_blkif *blkif = pending_req->blkif;
+	int indirect_grefs, rc, n, nseg, i;
+	struct blkif_request_segment_aligned *segments = NULL;
+
+	nseg = pending_req->nr_pages;
+	indirect_grefs = INDIRECT_PAGES(nseg);
+	BUG_ON(indirect_grefs > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
+
+	rc = xen_blkbk_map(blkif, req->u.indirect.indirect_grefs,
+			   persistent, pending_req->indirect_handles,
+			   pages, indirect_grefs, true);
 	if (rc)
-		return rc;
+		goto unmap;
 
-	for (i = 0; i < req->u.rw.nr_segments; i++)
-		seg[i].offset = (req->u.rw.seg[i].first_sect << 9);
+	for (n = 0, i = 0; n < nseg; n++) {
+		if ((n % SEGS_PER_INDIRECT_FRAME) == 0) {
+			/* Map indirect segments */
+			if (segments)
+				kunmap_atomic(segments);
+			segments = kmap_atomic(pages[n/SEGS_PER_INDIRECT_FRAME]);
+		}
+		i = n % SEGS_PER_INDIRECT_FRAME;
+		pending_req->grefs[n] = segments[i].gref;
+		seg[n].nsec = segments[i].last_sect -
+			segments[i].first_sect + 1;
+		seg[n].offset = (segments[i].first_sect << 9);
+		if ((segments[i].last_sect >= (PAGE_SIZE >> 9)) ||
+		    (segments[i].last_sect < segments[i].first_sect)) {
+			rc = -EINVAL;
+			goto unmap;
+		}
+		preq->nr_sects += seg[n].nsec;
+	}
 
-	return 0;
+unmap:
+	if (segments)
+		kunmap_atomic(segments);
+	xen_blkbk_unmap(blkif, pending_req->indirect_handles,
+			pages, persistent, indirect_grefs);
+	return rc;
 }
 
 static int dispatch_discard_io(struct xen_blkif *blkif,
@@ -1013,6 +1049,7 @@ __do_block_io_op(struct xen_blkif *blkif)
 		case BLKIF_OP_WRITE:
 		case BLKIF_OP_WRITE_BARRIER:
 		case BLKIF_OP_FLUSH_DISKCACHE:
+		case BLKIF_OP_INDIRECT:
 			if (dispatch_rw_block_io(blkif, &req, pending_req))
 				goto done;
 			break;
@@ -1059,17 +1096,28 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 				struct pending_req *pending_req)
 {
 	struct phys_req preq;
-	struct seg_buf seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct seg_buf *seg = pending_req->seg;
 	unsigned int nseg;
 	struct bio *bio = NULL;
-	struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct bio **biolist = pending_req->biolist;
 	int i, nbio = 0;
 	int operation;
 	struct blk_plug plug;
 	bool drain = false;
 	struct page **pages = pending_req->pages;
+	unsigned short req_operation;
 
-	switch (req->operation) {
+	req_operation = req->operation == BLKIF_OP_INDIRECT ?
+			req->u.indirect.indirect_op : req->operation;
+	if ((req->operation == BLKIF_OP_INDIRECT) &&
+	    (req_operation != BLKIF_OP_READ) &&
+	    (req_operation != BLKIF_OP_WRITE)) {
+		pr_debug(DRV_PFX "Invalid indirect operation (%u)\n",
+			 req_operation);
+		goto fail_response;
+	}
+
+	switch (req_operation) {
 	case BLKIF_OP_READ:
 		blkif->st_rd_req++;
 		operation = READ;
@@ -1091,33 +1139,47 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 	}
 
 	/* Check that the number of segments is sane. */
-	nseg = req->u.rw.nr_segments;
+	nseg = req->operation == BLKIF_OP_INDIRECT ?
+	       req->u.indirect.nr_segments : req->u.rw.nr_segments;
 
 	if (unlikely(nseg == 0 && operation != WRITE_FLUSH) ||
-	    unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
+	    unlikely((req->operation != BLKIF_OP_INDIRECT) &&
+		     (nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) ||
+	    unlikely((req->operation == BLKIF_OP_INDIRECT) &&
+		     (nseg > MAX_INDIRECT_SEGMENTS))) {
 		pr_debug(DRV_PFX "Bad number of segments in request (%d)\n",
 			 nseg);
 		/* Haven't submitted any bio's yet. */
 		goto fail_response;
 	}
 
-	preq.sector_number = req->u.rw.sector_number;
 	preq.nr_sects      = 0;
 
 	pending_req->blkif     = blkif;
 	pending_req->id        = req->u.rw.id;
-	pending_req->operation = req->operation;
+	pending_req->operation = req_operation;
 	pending_req->status    = BLKIF_RSP_OKAY;
 	pending_req->nr_pages  = nseg;
 
-	for (i = 0; i < nseg; i++) {
-		seg[i].nsec = req->u.rw.seg[i].last_sect -
-			req->u.rw.seg[i].first_sect + 1;
-		if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
-		    (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect))
+	if (req->operation != BLKIF_OP_INDIRECT) {
+		preq.dev               = req->u.rw.handle;
+		preq.sector_number     = req->u.rw.sector_number;
+		for (i = 0; i < nseg; i++) {
+			pending_req->grefs[i] = req->u.rw.seg[i].gref;
+			seg[i].nsec = req->u.rw.seg[i].last_sect -
+				req->u.rw.seg[i].first_sect + 1;
+			seg[i].offset = (req->u.rw.seg[i].first_sect << 9);
+			if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
+			    (req->u.rw.seg[i].last_sect <
+			     req->u.rw.seg[i].first_sect))
+				goto fail_response;
+			preq.nr_sects += seg[i].nsec;
+		}
+	} else {
+		preq.dev               = req->u.indirect.handle;
+		preq.sector_number     = req->u.indirect.sector_number;
+		if (xen_blkbk_parse_indirect(req, pending_req, seg, &preq))
 			goto fail_response;
-		preq.nr_sects += seg[i].nsec;
-
 	}
 
 	if (xen_vbd_translate(&preq, blkif, operation) != 0) {
@@ -1154,7 +1216,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 	 * the hypercall to unmap the grants - that is all done in
 	 * xen_blkbk_unmap.
 	 */
-	if (xen_blkbk_map_seg(req, pending_req, seg, pages))
+	if (xen_blkbk_map_seg(pending_req, seg, pages))
 		goto fail_flush;
 
 	/*
@@ -1220,7 +1282,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 	                pending_req->nr_pages);
  fail_response:
 	/* Haven't submitted any bio's yet. */
-	make_response(blkif, req->u.rw.id, req->operation, BLKIF_RSP_ERROR);
+	make_response(blkif, req->u.rw.id, req_operation, BLKIF_RSP_ERROR);
 	free_req(blkif, pending_req);
 	msleep(1); /* back off a bit */
 	return -EIO;
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index e33fafa0facd..1ac53da8410f 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -50,6 +50,19 @@
 		 __func__, __LINE__, ##args)
 
 
+/*
+ * This is the maximum number of segments that would be allowed in indirect
+ * requests. This value will also be passed to the frontend.
+ */
+#define MAX_INDIRECT_SEGMENTS 256
+
+#define SEGS_PER_INDIRECT_FRAME \
+	(PAGE_SIZE/sizeof(struct blkif_request_segment_aligned))
+#define MAX_INDIRECT_PAGES \
+	((MAX_INDIRECT_SEGMENTS + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
+#define INDIRECT_PAGES(_segs) \
+	((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
+
 /* Not a real protocol.  Used to generate ring structs which contain
  * the elements common to all protocols only.  This way we get a
  * compiler-checkable way to use common struct elements, so we can
@@ -83,12 +96,31 @@ struct blkif_x86_32_request_other {
 	uint64_t       id;           /* private guest value, echoed in resp  */
 } __attribute__((__packed__));
 
+struct blkif_x86_32_request_indirect {
+	uint8_t        indirect_op;
+	uint16_t       nr_segments;
+	uint64_t       id;
+	blkif_sector_t sector_number;
+	blkif_vdev_t   handle;
+	uint16_t       _pad1;
+	grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
+	/*
+	 * The maximum number of indirect segments (and pages) that will
+	 * be used is determined by MAX_INDIRECT_SEGMENTS, this value
+	 * is also exported to the guest (via xenstore
+	 * feature-max-indirect-segments entry), so the frontend knows how
+	 * many indirect segments the backend supports.
+	 */
+	uint64_t       _pad2;        /* make it 64 byte aligned */
+} __attribute__((__packed__));
+
 struct blkif_x86_32_request {
 	uint8_t        operation;    /* BLKIF_OP_???                         */
 	union {
 		struct blkif_x86_32_request_rw rw;
 		struct blkif_x86_32_request_discard discard;
 		struct blkif_x86_32_request_other other;
+		struct blkif_x86_32_request_indirect indirect;
 	} u;
 } __attribute__((__packed__));
 
@@ -127,12 +159,32 @@ struct blkif_x86_64_request_other {
 	uint64_t       id;           /* private guest value, echoed in resp  */
 } __attribute__((__packed__));
 
+struct blkif_x86_64_request_indirect {
+	uint8_t        indirect_op;
+	uint16_t       nr_segments;
+	uint32_t       _pad1;        /* offsetof(blkif_..,u.indirect.id)==8   */
+	uint64_t       id;
+	blkif_sector_t sector_number;
+	blkif_vdev_t   handle;
+	uint16_t       _pad2;
+	grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
+	/*
+	 * The maximum number of indirect segments (and pages) that will
+	 * be used is determined by MAX_INDIRECT_SEGMENTS, this value
+	 * is also exported to the guest (via xenstore
+	 * feature-max-indirect-segments entry), so the frontend knows how
+	 * many indirect segments the backend supports.
+	 */
+	uint32_t       _pad3;        /* make it 64 byte aligned */
+} __attribute__((__packed__));
+
 struct blkif_x86_64_request {
 	uint8_t        operation;    /* BLKIF_OP_???                         */
 	union {
 		struct blkif_x86_64_request_rw rw;
 		struct blkif_x86_64_request_discard discard;
 		struct blkif_x86_64_request_other other;
+		struct blkif_x86_64_request_indirect indirect;
 	} u;
 } __attribute__((__packed__));
 
@@ -266,6 +318,11 @@ struct xen_blkif {
 	wait_queue_head_t	waiting_to_free;
 };
 
+struct seg_buf {
+	unsigned long offset;
+	unsigned int nsec;
+};
+
 /*
  * Each outstanding request that we've passed to the lower device layers has a
  * 'pending_req' allocated to it. Each buffer_head that completes decrements
@@ -280,9 +337,16 @@ struct pending_req {
 	unsigned short		operation;
 	int			status;
 	struct list_head	free_list;
-	struct page		*pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-	struct persistent_gnt	*persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-	grant_handle_t		grant_handles[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct page		*pages[MAX_INDIRECT_SEGMENTS];
+	struct persistent_gnt	*persistent_gnts[MAX_INDIRECT_SEGMENTS];
+	grant_handle_t		grant_handles[MAX_INDIRECT_SEGMENTS];
+	grant_ref_t		grefs[MAX_INDIRECT_SEGMENTS];
+	/* Indirect descriptors */
+	struct persistent_gnt	*indirect_persistent_gnts[MAX_INDIRECT_PAGES];
+	struct page		*indirect_pages[MAX_INDIRECT_PAGES];
+	grant_handle_t		indirect_handles[MAX_INDIRECT_PAGES];
+	struct seg_buf		seg[MAX_INDIRECT_SEGMENTS];
+	struct bio		*biolist[MAX_INDIRECT_SEGMENTS];
 };
 
 
@@ -321,7 +385,7 @@ struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be);
 static inline void blkif_get_x86_32_req(struct blkif_request *dst,
 					struct blkif_x86_32_request *src)
 {
-	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
 	dst->operation = src->operation;
 	switch (src->operation) {
 	case BLKIF_OP_READ:
@@ -344,6 +408,18 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst,
 		dst->u.discard.sector_number = src->u.discard.sector_number;
 		dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
 		break;
+	case BLKIF_OP_INDIRECT:
+		dst->u.indirect.indirect_op = src->u.indirect.indirect_op;
+		dst->u.indirect.nr_segments = src->u.indirect.nr_segments;
+		dst->u.indirect.handle = src->u.indirect.handle;
+		dst->u.indirect.id = src->u.indirect.id;
+		dst->u.indirect.sector_number = src->u.indirect.sector_number;
+		barrier();
+		j = min(MAX_INDIRECT_PAGES, INDIRECT_PAGES(dst->u.indirect.nr_segments));
+		for (i = 0; i < j; i++)
+			dst->u.indirect.indirect_grefs[i] =
+				src->u.indirect.indirect_grefs[i];
+		break;
 	default:
 		/*
 		 * Don't know how to translate this op. Only get the
@@ -357,7 +433,7 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst,
 static inline void blkif_get_x86_64_req(struct blkif_request *dst,
 					struct blkif_x86_64_request *src)
 {
-	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
 	dst->operation = src->operation;
 	switch (src->operation) {
 	case BLKIF_OP_READ:
@@ -380,6 +456,18 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst,
 		dst->u.discard.sector_number = src->u.discard.sector_number;
 		dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
 		break;
+	case BLKIF_OP_INDIRECT:
+		dst->u.indirect.indirect_op = src->u.indirect.indirect_op;
+		dst->u.indirect.nr_segments = src->u.indirect.nr_segments;
+		dst->u.indirect.handle = src->u.indirect.handle;
+		dst->u.indirect.id = src->u.indirect.id;
+		dst->u.indirect.sector_number = src->u.indirect.sector_number;
+		barrier();
+		j = min(MAX_INDIRECT_PAGES, INDIRECT_PAGES(dst->u.indirect.nr_segments));
+		for (i = 0; i < j; i++)
+			dst->u.indirect.indirect_grefs[i] =
+				src->u.indirect.indirect_grefs[i];
+		break;
 	default:
 		/*
 		 * Don't know how to translate this op. Only get the
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 1f1ade6d6e09..afab208c54e3 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -107,6 +107,8 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
 	struct xen_blkif *blkif;
 	int i;
 
+	BUILD_BUG_ON(MAX_INDIRECT_PAGES > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
+
 	blkif = kmem_cache_zalloc(xen_blkif_cachep, GFP_KERNEL);
 	if (!blkif)
 		return ERR_PTR(-ENOMEM);
@@ -709,6 +711,11 @@ static void connect(struct backend_info *be)
 				 dev->nodename);
 		goto abort;
 	}
+	err = xenbus_printf(xbt, dev->nodename, "feature-max-indirect-segments", "%u",
+			    MAX_INDIRECT_SEGMENTS);
+	if (err)
+		dev_warn(&dev->dev, "writing %s/feature-max-indirect-segments (%d)",
+			 dev->nodename, err);
 
 	err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
 			    (unsigned long long)vbd_sz(&be->blkif->vbd));
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index a894f88762d8..82d63d5b1750 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -74,12 +74,27 @@ struct grant {
 struct blk_shadow {
 	struct blkif_request req;
 	struct request *request;
-	struct grant *grants_used[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct grant **grants_used;
+	struct grant **indirect_grants;
+};
+
+struct split_bio {
+	struct bio *bio;
+	atomic_t pending;
+	int err;
 };
 
 static DEFINE_MUTEX(blkfront_mutex);
 static const struct block_device_operations xlvbd_block_fops;
 
+/*
+ * Maximum number of segments in indirect requests, the actual value used by
+ * the frontend driver is the minimum of this value and the value provided
+ * by the backend driver.
+ */
+
+static unsigned int xen_blkif_max_segments = 32;
+
 #define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
 
 /*
@@ -98,7 +113,7 @@ struct blkfront_info
 	enum blkif_state connected;
 	int ring_ref;
 	struct blkif_front_ring ring;
-	struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct scatterlist *sg;
 	unsigned int evtchn, irq;
 	struct request_queue *rq;
 	struct work_struct work;
@@ -114,6 +129,7 @@ struct blkfront_info
 	unsigned int discard_granularity;
 	unsigned int discard_alignment;
 	unsigned int feature_persistent:1;
+	unsigned int max_indirect_segments;
 	int is_ready;
 };
 
@@ -142,6 +158,13 @@ static DEFINE_SPINLOCK(minor_lock);
 
 #define DEV_NAME	"xvd"	/* name in /dev */
 
+#define SEGS_PER_INDIRECT_FRAME \
+	(PAGE_SIZE/sizeof(struct blkif_request_segment_aligned))
+#define INDIRECT_GREFS(_segs) \
+	((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
+
+static int blkfront_setup_indirect(struct blkfront_info *info);
+
 static int get_id_from_freelist(struct blkfront_info *info)
 {
 	unsigned long free = info->shadow_free;
@@ -358,7 +381,8 @@ static int blkif_queue_request(struct request *req)
 	struct blkif_request *ring_req;
 	unsigned long id;
 	unsigned int fsect, lsect;
-	int i, ref;
+	int i, ref, n;
+	struct blkif_request_segment_aligned *segments = NULL;
 
 	/*
 	 * Used to store if we are able to queue the request by just using
@@ -369,21 +393,27 @@ static int blkif_queue_request(struct request *req)
 	grant_ref_t gref_head;
 	struct grant *gnt_list_entry = NULL;
 	struct scatterlist *sg;
+	int nseg, max_grefs;
 
 	if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
 		return 1;
 
-	/* Check if we have enought grants to allocate a requests */
-	if (info->persistent_gnts_c < BLKIF_MAX_SEGMENTS_PER_REQUEST) {
+	max_grefs = info->max_indirect_segments ?
+		    info->max_indirect_segments +
+		    INDIRECT_GREFS(info->max_indirect_segments) :
+		    BLKIF_MAX_SEGMENTS_PER_REQUEST;
+
+	/* Check if we have enough grants to allocate a requests */
+	if (info->persistent_gnts_c < max_grefs) {
 		new_persistent_gnts = 1;
 		if (gnttab_alloc_grant_references(
-		    BLKIF_MAX_SEGMENTS_PER_REQUEST - info->persistent_gnts_c,
+		    max_grefs - info->persistent_gnts_c,
 		    &gref_head) < 0) {
 			gnttab_request_free_callback(
 				&info->callback,
 				blkif_restart_queue_callback,
 				info,
-				BLKIF_MAX_SEGMENTS_PER_REQUEST);
+				max_grefs);
 			return 1;
 		}
 	} else
@@ -394,42 +424,67 @@ static int blkif_queue_request(struct request *req)
 	id = get_id_from_freelist(info);
 	info->shadow[id].request = req;
 
-	ring_req->u.rw.id = id;
-	ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req);
-	ring_req->u.rw.handle = info->handle;
-
-	ring_req->operation = rq_data_dir(req) ?
-		BLKIF_OP_WRITE : BLKIF_OP_READ;
-
-	if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) {
-		/*
-		 * Ideally we can do an unordered flush-to-disk. In case the
-		 * backend onlysupports barriers, use that. A barrier request
-		 * a superset of FUA, so we can implement it the same
-		 * way.  (It's also a FLUSH+FUA, since it is
-		 * guaranteed ordered WRT previous writes.)
-		 */
-		ring_req->operation = info->flush_op;
-	}
-
 	if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE))) {
-		/* id, sector_number and handle are set above. */
 		ring_req->operation = BLKIF_OP_DISCARD;
 		ring_req->u.discard.nr_sectors = blk_rq_sectors(req);
+		ring_req->u.discard.id = id;
+		ring_req->u.discard.sector_number = (blkif_sector_t)blk_rq_pos(req);
 		if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard)
 			ring_req->u.discard.flag = BLKIF_DISCARD_SECURE;
 		else
 			ring_req->u.discard.flag = 0;
 	} else {
-		ring_req->u.rw.nr_segments = blk_rq_map_sg(req->q, req,
-							   info->sg);
-		BUG_ON(ring_req->u.rw.nr_segments >
-		       BLKIF_MAX_SEGMENTS_PER_REQUEST);
-
-		for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) {
+		BUG_ON(info->max_indirect_segments == 0 &&
+		       req->nr_phys_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
+		BUG_ON(info->max_indirect_segments &&
+		       req->nr_phys_segments > info->max_indirect_segments);
+		nseg = blk_rq_map_sg(req->q, req, info->sg);
+		ring_req->u.rw.id = id;
+		if (nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) {
+			/*
+			 * The indirect operation can only be a BLKIF_OP_READ or
+			 * BLKIF_OP_WRITE
+			 */
+			BUG_ON(req->cmd_flags & (REQ_FLUSH | REQ_FUA));
+			ring_req->operation = BLKIF_OP_INDIRECT;
+			ring_req->u.indirect.indirect_op = rq_data_dir(req) ?
+				BLKIF_OP_WRITE : BLKIF_OP_READ;
+			ring_req->u.indirect.sector_number = (blkif_sector_t)blk_rq_pos(req);
+			ring_req->u.indirect.handle = info->handle;
+			ring_req->u.indirect.nr_segments = nseg;
+		} else {
+			ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req);
+			ring_req->u.rw.handle = info->handle;
+			ring_req->operation = rq_data_dir(req) ?
+				BLKIF_OP_WRITE : BLKIF_OP_READ;
+			if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) {
+				/*
+				 * Ideally we can do an unordered flush-to-disk. In case the
+				 * backend onlysupports barriers, use that. A barrier request
+				 * a superset of FUA, so we can implement it the same
+				 * way.  (It's also a FLUSH+FUA, since it is
+				 * guaranteed ordered WRT previous writes.)
+				 */
+				ring_req->operation = info->flush_op;
+			}
+			ring_req->u.rw.nr_segments = nseg;
+		}
+		for_each_sg(info->sg, sg, nseg, i) {
 			fsect = sg->offset >> 9;
 			lsect = fsect + (sg->length >> 9) - 1;
 
+			if ((ring_req->operation == BLKIF_OP_INDIRECT) &&
+			    (i % SEGS_PER_INDIRECT_FRAME == 0)) {
+				if (segments)
+					kunmap_atomic(segments);
+
+				n = i / SEGS_PER_INDIRECT_FRAME;
+				gnt_list_entry = get_grant(&gref_head, info);
+				info->shadow[id].indirect_grants[n] = gnt_list_entry;
+				segments = kmap_atomic(pfn_to_page(gnt_list_entry->pfn));
+				ring_req->u.indirect.indirect_grefs[n] = gnt_list_entry->gref;
+			}
+
 			gnt_list_entry = get_grant(&gref_head, info);
 			ref = gnt_list_entry->gref;
 
@@ -441,8 +496,7 @@ static int blkif_queue_request(struct request *req)
 
 				BUG_ON(sg->offset + sg->length > PAGE_SIZE);
 
-				shared_data = kmap_atomic(
-					pfn_to_page(gnt_list_entry->pfn));
+				shared_data = kmap_atomic(pfn_to_page(gnt_list_entry->pfn));
 				bvec_data = kmap_atomic(sg_page(sg));
 
 				/*
@@ -461,13 +515,23 @@ static int blkif_queue_request(struct request *req)
 				kunmap_atomic(bvec_data);
 				kunmap_atomic(shared_data);
 			}
-
-			ring_req->u.rw.seg[i] =
-					(struct blkif_request_segment) {
-						.gref       = ref,
-						.first_sect = fsect,
-						.last_sect  = lsect };
+			if (ring_req->operation != BLKIF_OP_INDIRECT) {
+				ring_req->u.rw.seg[i] =
+						(struct blkif_request_segment) {
+							.gref       = ref,
+							.first_sect = fsect,
+							.last_sect  = lsect };
+			} else {
+				n = i % SEGS_PER_INDIRECT_FRAME;
+				segments[n] =
+					(struct blkif_request_segment_aligned) {
+							.gref       = ref,
+							.first_sect = fsect,
+							.last_sect  = lsect };
+			}
 		}
+		if (segments)
+			kunmap_atomic(segments);
 	}
 
 	info->ring.req_prod_pvt++;
@@ -542,7 +606,8 @@ static void do_blkif_request(struct request_queue *rq)
 		flush_requests(info);
 }
 
-static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
+static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
+				unsigned int segments)
 {
 	struct request_queue *rq;
 	struct blkfront_info *info = gd->private_data;
@@ -571,7 +636,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
 	blk_queue_max_segment_size(rq, PAGE_SIZE);
 
 	/* Ensure a merged request will fit in a single I/O ring slot. */
-	blk_queue_max_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+	blk_queue_max_segments(rq, segments);
 
 	/* Make sure buffer addresses are sector-aligned. */
 	blk_queue_dma_alignment(rq, 511);
@@ -588,13 +653,16 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
 static void xlvbd_flush(struct blkfront_info *info)
 {
 	blk_queue_flush(info->rq, info->feature_flush);
-	printk(KERN_INFO "blkfront: %s: %s: %s %s\n",
+	printk(KERN_INFO "blkfront: %s: %s: %s %s %s %s %s\n",
 	       info->gd->disk_name,
 	       info->flush_op == BLKIF_OP_WRITE_BARRIER ?
 		"barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ?
 		"flush diskcache" : "barrier or flush"),
-	       info->feature_flush ? "enabled" : "disabled",
-	       info->feature_persistent ? "using persistent grants" : "");
+	       info->feature_flush ? "enabled;" : "disabled;",
+	       "persistent grants:",
+	       info->feature_persistent ? "enabled;" : "disabled;",
+	       "indirect descriptors:",
+	       info->max_indirect_segments ? "enabled;" : "disabled;");
 }
 
 static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
@@ -734,7 +802,9 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
 	gd->driverfs_dev = &(info->xbdev->dev);
 	set_capacity(gd, capacity);
 
-	if (xlvbd_init_blk_queue(gd, sector_size)) {
+	if (xlvbd_init_blk_queue(gd, sector_size,
+				 info->max_indirect_segments ? :
+				 BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
 		del_gendisk(gd);
 		goto release;
 	}
@@ -818,6 +888,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
 {
 	struct grant *persistent_gnt;
 	struct grant *n;
+	int i, j, segs;
 
 	/* Prevent new requests being issued until we fix things up. */
 	spin_lock_irq(&info->io_lock);
@@ -843,6 +914,47 @@ static void blkif_free(struct blkfront_info *info, int suspend)
 	}
 	BUG_ON(info->persistent_gnts_c != 0);
 
+	kfree(info->sg);
+	info->sg = NULL;
+	for (i = 0; i < BLK_RING_SIZE; i++) {
+		/*
+		 * Clear persistent grants present in requests already
+		 * on the shared ring
+		 */
+		if (!info->shadow[i].request)
+			goto free_shadow;
+
+		segs = info->shadow[i].req.operation == BLKIF_OP_INDIRECT ?
+		       info->shadow[i].req.u.indirect.nr_segments :
+		       info->shadow[i].req.u.rw.nr_segments;
+		for (j = 0; j < segs; j++) {
+			persistent_gnt = info->shadow[i].grants_used[j];
+			gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
+			__free_page(pfn_to_page(persistent_gnt->pfn));
+			kfree(persistent_gnt);
+		}
+
+		if (info->shadow[i].req.operation != BLKIF_OP_INDIRECT)
+			/*
+			 * If this is not an indirect operation don't try to
+			 * free indirect segments
+			 */
+			goto free_shadow;
+
+		for (j = 0; j < INDIRECT_GREFS(segs); j++) {
+			persistent_gnt = info->shadow[i].indirect_grants[j];
+			gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
+			__free_page(pfn_to_page(persistent_gnt->pfn));
+			kfree(persistent_gnt);
+		}
+
+free_shadow:
+		kfree(info->shadow[i].grants_used);
+		info->shadow[i].grants_used = NULL;
+		kfree(info->shadow[i].indirect_grants);
+		info->shadow[i].indirect_grants = NULL;
+	}
+
 	/* No more gnttab callback work. */
 	gnttab_cancel_free_callback(&info->callback);
 	spin_unlock_irq(&info->io_lock);
@@ -873,6 +985,10 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
 	char *bvec_data;
 	void *shared_data;
 	unsigned int offset = 0;
+	int nseg;
+
+	nseg = s->req.operation == BLKIF_OP_INDIRECT ?
+		s->req.u.indirect.nr_segments : s->req.u.rw.nr_segments;
 
 	if (bret->operation == BLKIF_OP_READ) {
 		/*
@@ -885,7 +1001,7 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
 			BUG_ON((bvec->bv_offset + bvec->bv_len) > PAGE_SIZE);
 			if (bvec->bv_offset < offset)
 				i++;
-			BUG_ON(i >= s->req.u.rw.nr_segments);
+			BUG_ON(i >= nseg);
 			shared_data = kmap_atomic(
 				pfn_to_page(s->grants_used[i]->pfn));
 			bvec_data = bvec_kmap_irq(bvec, &flags);
@@ -897,10 +1013,16 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
 		}
 	}
 	/* Add the persistent grant into the list of free grants */
-	for (i = 0; i < s->req.u.rw.nr_segments; i++) {
+	for (i = 0; i < nseg; i++) {
 		list_add(&s->grants_used[i]->node, &info->persistent_gnts);
 		info->persistent_gnts_c++;
 	}
+	if (s->req.operation == BLKIF_OP_INDIRECT) {
+		for (i = 0; i < INDIRECT_GREFS(nseg); i++) {
+			list_add(&s->indirect_grants[i]->node, &info->persistent_gnts);
+			info->persistent_gnts_c++;
+		}
+	}
 }
 
 static irqreturn_t blkif_interrupt(int irq, void *dev_id)
@@ -1034,14 +1156,6 @@ static int setup_blkring(struct xenbus_device *dev,
 	SHARED_RING_INIT(sring);
 	FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
 
-	sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
-
-	/* Allocate memory for grants */
-	err = fill_grant_buffer(info, BLK_RING_SIZE *
-	                              BLKIF_MAX_SEGMENTS_PER_REQUEST);
-	if (err)
-		goto fail;
-
 	err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
 	if (err < 0) {
 		free_page((unsigned long)sring);
@@ -1223,13 +1337,84 @@ static int blkfront_probe(struct xenbus_device *dev,
 	return 0;
 }
 
+/*
+ * This is a clone of md_trim_bio, used to split a bio into smaller ones
+ */
+static void trim_bio(struct bio *bio, int offset, int size)
+{
+	/* 'bio' is a cloned bio which we need to trim to match
+	 * the given offset and size.
+	 * This requires adjusting bi_sector, bi_size, and bi_io_vec
+	 */
+	int i;
+	struct bio_vec *bvec;
+	int sofar = 0;
+
+	size <<= 9;
+	if (offset == 0 && size == bio->bi_size)
+		return;
+
+	bio->bi_sector += offset;
+	bio->bi_size = size;
+	offset <<= 9;
+	clear_bit(BIO_SEG_VALID, &bio->bi_flags);
+
+	while (bio->bi_idx < bio->bi_vcnt &&
+	       bio->bi_io_vec[bio->bi_idx].bv_len <= offset) {
+		/* remove this whole bio_vec */
+		offset -= bio->bi_io_vec[bio->bi_idx].bv_len;
+		bio->bi_idx++;
+	}
+	if (bio->bi_idx < bio->bi_vcnt) {
+		bio->bi_io_vec[bio->bi_idx].bv_offset += offset;
+		bio->bi_io_vec[bio->bi_idx].bv_len -= offset;
+	}
+	/* avoid any complications with bi_idx being non-zero*/
+	if (bio->bi_idx) {
+		memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
+			(bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec));
+		bio->bi_vcnt -= bio->bi_idx;
+		bio->bi_idx = 0;
+	}
+	/* Make sure vcnt and last bv are not too big */
+	bio_for_each_segment(bvec, bio, i) {
+		if (sofar + bvec->bv_len > size)
+			bvec->bv_len = size - sofar;
+		if (bvec->bv_len == 0) {
+			bio->bi_vcnt = i;
+			break;
+		}
+		sofar += bvec->bv_len;
+	}
+}
+
+static void split_bio_end(struct bio *bio, int error)
+{
+	struct split_bio *split_bio = bio->bi_private;
+
+	if (error)
+		split_bio->err = error;
+
+	if (atomic_dec_and_test(&split_bio->pending)) {
+		split_bio->bio->bi_phys_segments = 0;
+		bio_endio(split_bio->bio, split_bio->err);
+		kfree(split_bio);
+	}
+	bio_put(bio);
+}
 
 static int blkif_recover(struct blkfront_info *info)
 {
 	int i;
-	struct blkif_request *req;
+	struct request *req, *n;
 	struct blk_shadow *copy;
-	int j;
+	int rc;
+	struct bio *bio, *cloned_bio;
+	struct bio_list bio_list, merge_bio;
+	unsigned int segs, offset;
+	int pending, size;
+	struct split_bio *split_bio;
+	struct list_head requests;
 
 	/* Stage 1: Make a safe copy of the shadow state. */
 	copy = kmemdup(info->shadow, sizeof(info->shadow),
@@ -1244,36 +1429,64 @@ static int blkif_recover(struct blkfront_info *info)
 	info->shadow_free = info->ring.req_prod_pvt;
 	info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
 
-	/* Stage 3: Find pending requests and requeue them. */
+	rc = blkfront_setup_indirect(info);
+	if (rc) {
+		kfree(copy);
+		return rc;
+	}
+
+	segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST;
+	blk_queue_max_segments(info->rq, segs);
+	bio_list_init(&bio_list);
+	INIT_LIST_HEAD(&requests);
 	for (i = 0; i < BLK_RING_SIZE; i++) {
 		/* Not in use? */
 		if (!copy[i].request)
 			continue;
 
-		/* Grab a request slot and copy shadow state into it. */
-		req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
-		*req = copy[i].req;
-
-		/* We get a new request id, and must reset the shadow state. */
-		req->u.rw.id = get_id_from_freelist(info);
-		memcpy(&info->shadow[req->u.rw.id], &copy[i], sizeof(copy[i]));
-
-		if (req->operation != BLKIF_OP_DISCARD) {
-		/* Rewrite any grant references invalidated by susp/resume. */
-			for (j = 0; j < req->u.rw.nr_segments; j++)
-				gnttab_grant_foreign_access_ref(
-					req->u.rw.seg[j].gref,
-					info->xbdev->otherend_id,
-					pfn_to_mfn(copy[i].grants_used[j]->pfn),
-					0);
+		/*
+		 * Get the bios in the request so we can re-queue them.
+		 */
+		if (copy[i].request->cmd_flags &
+		    (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
+			/*
+			 * Flush operations don't contain bios, so
+			 * we need to requeue the whole request
+			 */
+			list_add(&copy[i].request->queuelist, &requests);
+			continue;
 		}
-		info->shadow[req->u.rw.id].req = *req;
-
-		info->ring.req_prod_pvt++;
+		merge_bio.head = copy[i].request->bio;
+		merge_bio.tail = copy[i].request->biotail;
+		bio_list_merge(&bio_list, &merge_bio);
+		copy[i].request->bio = NULL;
+		blk_put_request(copy[i].request);
 	}
 
 	kfree(copy);
 
+	/*
+	 * Empty the queue, this is important because we might have
+	 * requests in the queue with more segments than what we
+	 * can handle now.
+	 */
+	spin_lock_irq(&info->io_lock);
+	while ((req = blk_fetch_request(info->rq)) != NULL) {
+		if (req->cmd_flags &
+		    (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
+			list_add(&req->queuelist, &requests);
+			continue;
+		}
+		merge_bio.head = req->bio;
+		merge_bio.tail = req->biotail;
+		bio_list_merge(&bio_list, &merge_bio);
+		req->bio = NULL;
+		if (req->cmd_flags & (REQ_FLUSH | REQ_FUA))
+			pr_alert("diskcache flush request found!\n");
+		__blk_put_request(info->rq, req);
+	}
+	spin_unlock_irq(&info->io_lock);
+
 	xenbus_switch_state(info->xbdev, XenbusStateConnected);
 
 	spin_lock_irq(&info->io_lock);
@@ -1281,14 +1494,50 @@ static int blkif_recover(struct blkfront_info *info)
 	/* Now safe for us to use the shared ring */
 	info->connected = BLKIF_STATE_CONNECTED;
 
-	/* Send off requeued requests */
-	flush_requests(info);
-
 	/* Kick any other new requests queued since we resumed */
 	kick_pending_request_queues(info);
 
+	list_for_each_entry_safe(req, n, &requests, queuelist) {
+		/* Requeue pending requests (flush or discard) */
+		list_del_init(&req->queuelist);
+		BUG_ON(req->nr_phys_segments > segs);
+		blk_requeue_request(info->rq, req);
+	}
 	spin_unlock_irq(&info->io_lock);
 
+	while ((bio = bio_list_pop(&bio_list)) != NULL) {
+		/* Traverse the list of pending bios and re-queue them */
+		if (bio_segments(bio) > segs) {
+			/*
+			 * This bio has more segments than what we can
+			 * handle, we have to split it.
+			 */
+			pending = (bio_segments(bio) + segs - 1) / segs;
+			split_bio = kzalloc(sizeof(*split_bio), GFP_NOIO);
+			BUG_ON(split_bio == NULL);
+			atomic_set(&split_bio->pending, pending);
+			split_bio->bio = bio;
+			for (i = 0; i < pending; i++) {
+				offset = (i * segs * PAGE_SIZE) >> 9;
+				size = min((unsigned int)(segs * PAGE_SIZE) >> 9,
+					   (unsigned int)(bio->bi_size >> 9) - offset);
+				cloned_bio = bio_clone(bio, GFP_NOIO);
+				BUG_ON(cloned_bio == NULL);
+				trim_bio(cloned_bio, offset, size);
+				cloned_bio->bi_private = split_bio;
+				cloned_bio->bi_end_io = split_bio_end;
+				submit_bio(cloned_bio->bi_rw, cloned_bio);
+			}
+			/*
+			 * Now we have to wait for all those smaller bios to
+			 * end, so we can also end the "parent" bio.
+			 */
+			continue;
+		}
+		/* We don't need to split this bio */
+		submit_bio(bio->bi_rw, bio);
+	}
+
 	return 0;
 }
 
@@ -1308,8 +1557,12 @@ static int blkfront_resume(struct xenbus_device *dev)
 	blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
 
 	err = talk_to_blkback(dev, info);
-	if (info->connected == BLKIF_STATE_SUSPENDED && !err)
-		err = blkif_recover(info);
+
+	/*
+	 * We have to wait for the backend to switch to
+	 * connected state, since we want to read which
+	 * features it supports.
+	 */
 
 	return err;
 }
@@ -1387,6 +1640,61 @@ static void blkfront_setup_discard(struct blkfront_info *info)
 	kfree(type);
 }
 
+static int blkfront_setup_indirect(struct blkfront_info *info)
+{
+	unsigned int indirect_segments, segs;
+	int err, i;
+
+	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+			    "feature-max-indirect-segments", "%u", &indirect_segments,
+			    NULL);
+	if (err) {
+		info->max_indirect_segments = 0;
+		segs = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+	} else {
+		info->max_indirect_segments = min(indirect_segments,
+						  xen_blkif_max_segments);
+		segs = info->max_indirect_segments;
+	}
+	info->sg = kzalloc(sizeof(info->sg[0]) * segs, GFP_KERNEL);
+	if (info->sg == NULL)
+		goto out_of_memory;
+	sg_init_table(info->sg, segs);
+
+	err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE);
+	if (err)
+		goto out_of_memory;
+
+	for (i = 0; i < BLK_RING_SIZE; i++) {
+		info->shadow[i].grants_used = kzalloc(
+			sizeof(info->shadow[i].grants_used[0]) * segs,
+			GFP_NOIO);
+		if (info->max_indirect_segments)
+			info->shadow[i].indirect_grants = kzalloc(
+				sizeof(info->shadow[i].indirect_grants[0]) *
+				INDIRECT_GREFS(segs),
+				GFP_NOIO);
+		if ((info->shadow[i].grants_used == NULL) ||
+		     (info->max_indirect_segments &&
+		     (info->shadow[i].indirect_grants == NULL)))
+			goto out_of_memory;
+	}
+
+
+	return 0;
+
+out_of_memory:
+	kfree(info->sg);
+	info->sg = NULL;
+	for (i = 0; i < BLK_RING_SIZE; i++) {
+		kfree(info->shadow[i].grants_used);
+		info->shadow[i].grants_used = NULL;
+		kfree(info->shadow[i].indirect_grants);
+		info->shadow[i].indirect_grants = NULL;
+	}
+	return -ENOMEM;
+}
+
 /*
  * Invoked when the backend is finally 'ready' (and has told produced
  * the details about the physical device - #sectors, size, etc).
@@ -1414,8 +1722,15 @@ static void blkfront_connect(struct blkfront_info *info)
 		set_capacity(info->gd, sectors);
 		revalidate_disk(info->gd);
 
-		/* fall through */
+		return;
 	case BLKIF_STATE_SUSPENDED:
+		/*
+		 * If we are recovering from suspension, we need to wait
+		 * for the backend to announce it's features before
+		 * reconnecting, at least we need to know if the backend
+		 * supports indirect descriptors, and how many.
+		 */
+		blkif_recover(info);
 		return;
 
 	default:
@@ -1483,6 +1798,13 @@ static void blkfront_connect(struct blkfront_info *info)
 	else
 		info->feature_persistent = persistent;
 
+	err = blkfront_setup_indirect(info);
+	if (err) {
+		xenbus_dev_fatal(info->xbdev, err, "setup_indirect at %s",
+				 info->xbdev->otherend);
+		return;
+	}
+
 	err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
 	if (err) {
 		xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h
index ffd4652de91c..65e12099ef89 100644
--- a/include/xen/interface/io/blkif.h
+++ b/include/xen/interface/io/blkif.h
@@ -102,6 +102,30 @@ typedef uint64_t blkif_sector_t;
  */
 #define BLKIF_OP_DISCARD           5
 
+/*
+ * Recognized if "feature-max-indirect-segments" in present in the backend
+ * xenbus info. The "feature-max-indirect-segments" node contains the maximum
+ * number of segments allowed by the backend per request. If the node is
+ * present, the frontend might use blkif_request_indirect structs in order to
+ * issue requests with more than BLKIF_MAX_SEGMENTS_PER_REQUEST (11). The
+ * maximum number of indirect segments is fixed by the backend, but the
+ * frontend can issue requests with any number of indirect segments as long as
+ * it's less than the number provided by the backend. The indirect_grefs field
+ * in blkif_request_indirect should be filled by the frontend with the
+ * grant references of the pages that are holding the indirect segments.
+ * This pages are filled with an array of blkif_request_segment_aligned
+ * that hold the information about the segments. The number of indirect
+ * pages to use is determined by the maximum number of segments
+ * a indirect request contains. Every indirect page can contain a maximum
+ * of 512 segments (PAGE_SIZE/sizeof(blkif_request_segment_aligned)),
+ * so to calculate the number of indirect pages to use we have to do
+ * ceil(indirect_segments/512).
+ *
+ * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not*
+ * create the "feature-max-indirect-segments" node!
+ */
+#define BLKIF_OP_INDIRECT          6
+
 /*
  * Maximum scatter/gather segments per request.
  * This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE.
@@ -109,6 +133,16 @@ typedef uint64_t blkif_sector_t;
  */
 #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
 
+#define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8
+
+struct blkif_request_segment_aligned {
+	grant_ref_t gref;        /* reference to I/O buffer frame        */
+	/* @first_sect: first sector in frame to transfer (inclusive).   */
+	/* @last_sect: last sector in frame to transfer (inclusive).     */
+	uint8_t     first_sect, last_sect;
+	uint16_t    _pad; /* padding to make it 8 bytes, so it's cache-aligned */
+} __attribute__((__packed__));
+
 struct blkif_request_rw {
 	uint8_t        nr_segments;  /* number of segments                   */
 	blkif_vdev_t   handle;       /* only for read/write requests         */
@@ -147,12 +181,31 @@ struct blkif_request_other {
 	uint64_t     id;           /* private guest value, echoed in resp  */
 } __attribute__((__packed__));
 
+struct blkif_request_indirect {
+	uint8_t        indirect_op;
+	uint16_t       nr_segments;
+#ifdef CONFIG_X86_64
+	uint32_t       _pad1;        /* offsetof(blkif_...,u.indirect.id) == 8 */
+#endif
+	uint64_t       id;
+	blkif_sector_t sector_number;
+	blkif_vdev_t   handle;
+	uint16_t       _pad2;
+	grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
+#ifdef CONFIG_X86_64
+	uint32_t      _pad3;         /* make it 64 byte aligned */
+#else
+	uint64_t      _pad3;         /* make it 64 byte aligned */
+#endif
+} __attribute__((__packed__));
+
 struct blkif_request {
 	uint8_t        operation;    /* BLKIF_OP_???                         */
 	union {
 		struct blkif_request_rw rw;
 		struct blkif_request_discard discard;
 		struct blkif_request_other other;
+		struct blkif_request_indirect indirect;
 	} u;
 } __attribute__((__packed__));
 

From bb642e8315fd573795e8b6fa9b9629064d73add1 Mon Sep 17 00:00:00 2001
From: Roger Pau Monne <roger.pau@citrix.com>
Date: Thu, 2 May 2013 10:21:17 +0200
Subject: [PATCH 008/320] xen-blkback: allocate list of pending reqs in small
 chunks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Allocate pending requests in smaller chunks instead of allocating them
all at the same time.

This change also removes the global array of pending_reqs, it is no
longer necessay.

Variables related to the grant mapping have been grouped into a struct
called "grant_page", this allows to allocate them in smaller chunks,
and also improves memory locality.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Reported-by: Sander Eikelenboom <linux@eikelenboom.it>
Tested-by: Sander Eikelenboom <linux@eikelenboom.it>
Reviewed-by: David Vrabel <david.vrabel@citrix.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/block/xen-blkback/blkback.c | 92 +++++++++++++----------------
 drivers/block/xen-blkback/common.h  | 18 +++---
 drivers/block/xen-blkback/xenbus.c  | 76 ++++++++++++++++++------
 3 files changed, 107 insertions(+), 79 deletions(-)

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 1ebc0aa0f0e4..e79ab4559233 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -641,9 +641,7 @@ int xen_blkif_schedule(void *arg)
  * used in the 'pending_req'.
  */
 static void xen_blkbk_unmap(struct xen_blkif *blkif,
-                            grant_handle_t handles[],
-                            struct page *pages[],
-                            struct persistent_gnt *persistent_gnts[],
+                            struct grant_page *pages[],
                             int num)
 {
 	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
@@ -652,16 +650,16 @@ static void xen_blkbk_unmap(struct xen_blkif *blkif,
 	int ret;
 
 	for (i = 0; i < num; i++) {
-		if (persistent_gnts[i] != NULL) {
-			put_persistent_gnt(blkif, persistent_gnts[i]);
+		if (pages[i]->persistent_gnt != NULL) {
+			put_persistent_gnt(blkif, pages[i]->persistent_gnt);
 			continue;
 		}
-		if (handles[i] == BLKBACK_INVALID_HANDLE)
+		if (pages[i]->handle == BLKBACK_INVALID_HANDLE)
 			continue;
-		unmap_pages[invcount] = pages[i];
-		gnttab_set_unmap_op(&unmap[invcount], vaddr(pages[i]),
-				    GNTMAP_host_map, handles[i]);
-		handles[i] = BLKBACK_INVALID_HANDLE;
+		unmap_pages[invcount] = pages[i]->page;
+		gnttab_set_unmap_op(&unmap[invcount], vaddr(pages[i]->page),
+				    GNTMAP_host_map, pages[i]->handle);
+		pages[i]->handle = BLKBACK_INVALID_HANDLE;
 		if (++invcount == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
 			ret = gnttab_unmap_refs(unmap, NULL, unmap_pages,
 			                        invcount);
@@ -677,10 +675,8 @@ static void xen_blkbk_unmap(struct xen_blkif *blkif,
 	}
 }
 
-static int xen_blkbk_map(struct xen_blkif *blkif, grant_ref_t grefs[],
-			 struct persistent_gnt *persistent_gnts[],
-			 grant_handle_t handles[],
-			 struct page *pages[],
+static int xen_blkbk_map(struct xen_blkif *blkif,
+			 struct grant_page *pages[],
 			 int num, bool ro)
 {
 	struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
@@ -707,26 +703,26 @@ static int xen_blkbk_map(struct xen_blkif *blkif, grant_ref_t grefs[],
 		if (use_persistent_gnts)
 			persistent_gnt = get_persistent_gnt(
 				blkif,
-				grefs[i]);
+				pages[i]->gref);
 
 		if (persistent_gnt) {
 			/*
 			 * We are using persistent grants and
 			 * the grant is already mapped
 			 */
-			pages[i] = persistent_gnt->page;
-			persistent_gnts[i] = persistent_gnt;
+			pages[i]->page = persistent_gnt->page;
+			pages[i]->persistent_gnt = persistent_gnt;
 		} else {
-			if (get_free_page(blkif, &pages[i]))
+			if (get_free_page(blkif, &pages[i]->page))
 				goto out_of_memory;
-			addr = vaddr(pages[i]);
-			pages_to_gnt[segs_to_map] = pages[i];
-			persistent_gnts[i] = NULL;
+			addr = vaddr(pages[i]->page);
+			pages_to_gnt[segs_to_map] = pages[i]->page;
+			pages[i]->persistent_gnt = NULL;
 			flags = GNTMAP_host_map;
 			if (!use_persistent_gnts && ro)
 				flags |= GNTMAP_readonly;
 			gnttab_set_map_op(&map[segs_to_map++], addr,
-					  flags, grefs[i],
+					  flags, pages[i]->gref,
 					  blkif->domid);
 		}
 		map_until = i + 1;
@@ -745,16 +741,16 @@ static int xen_blkbk_map(struct xen_blkif *blkif, grant_ref_t grefs[],
 	 * the page from the other domain.
 	 */
 	for (seg_idx = last_map, new_map_idx = 0; seg_idx < map_until; seg_idx++) {
-		if (!persistent_gnts[seg_idx]) {
+		if (!pages[seg_idx]->persistent_gnt) {
 			/* This is a newly mapped grant */
 			BUG_ON(new_map_idx >= segs_to_map);
 			if (unlikely(map[new_map_idx].status != 0)) {
 				pr_debug(DRV_PFX "invalid buffer -- could not remap it\n");
-				handles[seg_idx] = BLKBACK_INVALID_HANDLE;
+				pages[seg_idx]->handle = BLKBACK_INVALID_HANDLE;
 				ret |= 1;
 				goto next;
 			}
-			handles[seg_idx] = map[new_map_idx].handle;
+			pages[seg_idx]->handle = map[new_map_idx].handle;
 		} else {
 			continue;
 		}
@@ -776,14 +772,14 @@ static int xen_blkbk_map(struct xen_blkif *blkif, grant_ref_t grefs[],
 			}
 			persistent_gnt->gnt = map[new_map_idx].ref;
 			persistent_gnt->handle = map[new_map_idx].handle;
-			persistent_gnt->page = pages[seg_idx];
+			persistent_gnt->page = pages[seg_idx]->page;
 			if (add_persistent_gnt(blkif,
 			                       persistent_gnt)) {
 				kfree(persistent_gnt);
 				persistent_gnt = NULL;
 				goto next;
 			}
-			persistent_gnts[seg_idx] = persistent_gnt;
+			pages[seg_idx]->persistent_gnt = persistent_gnt;
 			pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n",
 				 persistent_gnt->gnt, blkif->persistent_gnt_c,
 				 xen_blkif_max_pgrants);
@@ -814,15 +810,11 @@ static int xen_blkbk_map(struct xen_blkif *blkif, grant_ref_t grefs[],
 	return -ENOMEM;
 }
 
-static int xen_blkbk_map_seg(struct pending_req *pending_req,
-			     struct seg_buf seg[],
-			     struct page *pages[])
+static int xen_blkbk_map_seg(struct pending_req *pending_req)
 {
 	int rc;
 
-	rc = xen_blkbk_map(pending_req->blkif, pending_req->grefs,
-	                   pending_req->persistent_gnts,
-	                   pending_req->grant_handles, pending_req->pages,
+	rc = xen_blkbk_map(pending_req->blkif, pending_req->segments,
 			   pending_req->nr_pages,
 	                   (pending_req->operation != BLKIF_OP_READ));
 
@@ -834,9 +826,7 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req,
 				    struct seg_buf seg[],
 				    struct phys_req *preq)
 {
-	struct persistent_gnt **persistent =
-		pending_req->indirect_persistent_gnts;
-	struct page **pages = pending_req->indirect_pages;
+	struct grant_page **pages = pending_req->indirect_pages;
 	struct xen_blkif *blkif = pending_req->blkif;
 	int indirect_grefs, rc, n, nseg, i;
 	struct blkif_request_segment_aligned *segments = NULL;
@@ -845,9 +835,10 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req,
 	indirect_grefs = INDIRECT_PAGES(nseg);
 	BUG_ON(indirect_grefs > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
 
-	rc = xen_blkbk_map(blkif, req->u.indirect.indirect_grefs,
-			   persistent, pending_req->indirect_handles,
-			   pages, indirect_grefs, true);
+	for (i = 0; i < indirect_grefs; i++)
+		pages[i]->gref = req->u.indirect.indirect_grefs[i];
+
+	rc = xen_blkbk_map(blkif, pages, indirect_grefs, true);
 	if (rc)
 		goto unmap;
 
@@ -856,10 +847,10 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req,
 			/* Map indirect segments */
 			if (segments)
 				kunmap_atomic(segments);
-			segments = kmap_atomic(pages[n/SEGS_PER_INDIRECT_FRAME]);
+			segments = kmap_atomic(pages[n/SEGS_PER_INDIRECT_FRAME]->page);
 		}
 		i = n % SEGS_PER_INDIRECT_FRAME;
-		pending_req->grefs[n] = segments[i].gref;
+		pending_req->segments[n]->gref = segments[i].gref;
 		seg[n].nsec = segments[i].last_sect -
 			segments[i].first_sect + 1;
 		seg[n].offset = (segments[i].first_sect << 9);
@@ -874,8 +865,7 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req,
 unmap:
 	if (segments)
 		kunmap_atomic(segments);
-	xen_blkbk_unmap(blkif, pending_req->indirect_handles,
-			pages, persistent, indirect_grefs);
+	xen_blkbk_unmap(blkif, pages, indirect_grefs);
 	return rc;
 }
 
@@ -965,9 +955,8 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
 	 * the proper response on the ring.
 	 */
 	if (atomic_dec_and_test(&pending_req->pendcnt)) {
-		xen_blkbk_unmap(pending_req->blkif, pending_req->grant_handles,
-		                pending_req->pages,
-		                pending_req->persistent_gnts,
+		xen_blkbk_unmap(pending_req->blkif,
+		                pending_req->segments,
 		                pending_req->nr_pages);
 		make_response(pending_req->blkif, pending_req->id,
 			      pending_req->operation, pending_req->status);
@@ -1104,7 +1093,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 	int operation;
 	struct blk_plug plug;
 	bool drain = false;
-	struct page **pages = pending_req->pages;
+	struct grant_page **pages = pending_req->segments;
 	unsigned short req_operation;
 
 	req_operation = req->operation == BLKIF_OP_INDIRECT ?
@@ -1165,7 +1154,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 		preq.dev               = req->u.rw.handle;
 		preq.sector_number     = req->u.rw.sector_number;
 		for (i = 0; i < nseg; i++) {
-			pending_req->grefs[i] = req->u.rw.seg[i].gref;
+			pages[i]->gref = req->u.rw.seg[i].gref;
 			seg[i].nsec = req->u.rw.seg[i].last_sect -
 				req->u.rw.seg[i].first_sect + 1;
 			seg[i].offset = (req->u.rw.seg[i].first_sect << 9);
@@ -1216,7 +1205,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 	 * the hypercall to unmap the grants - that is all done in
 	 * xen_blkbk_unmap.
 	 */
-	if (xen_blkbk_map_seg(pending_req, seg, pages))
+	if (xen_blkbk_map_seg(pending_req))
 		goto fail_flush;
 
 	/*
@@ -1228,7 +1217,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 	for (i = 0; i < nseg; i++) {
 		while ((bio == NULL) ||
 		       (bio_add_page(bio,
-				     pages[i],
+				     pages[i]->page,
 				     seg[i].nsec << 9,
 				     seg[i].offset) == 0)) {
 
@@ -1277,8 +1266,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 	return 0;
 
  fail_flush:
-	xen_blkbk_unmap(blkif, pending_req->grant_handles,
-	                pending_req->pages, pending_req->persistent_gnts,
+	xen_blkbk_unmap(blkif, pending_req->segments,
 	                pending_req->nr_pages);
  fail_response:
 	/* Haven't submitted any bio's yet. */
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index 1ac53da8410f..c6b4cb9af6c2 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -297,8 +297,6 @@ struct xen_blkif {
 	int			free_pages_num;
 	struct list_head	free_pages;
 
-	/* Allocation of pending_reqs */
-	struct pending_req	*pending_reqs;
 	/* List of all 'pending_req' available */
 	struct list_head	pending_free;
 	/* And its spinlock. */
@@ -323,6 +321,13 @@ struct seg_buf {
 	unsigned int nsec;
 };
 
+struct grant_page {
+	struct page 		*page;
+	struct persistent_gnt	*persistent_gnt;
+	grant_handle_t		handle;
+	grant_ref_t		gref;
+};
+
 /*
  * Each outstanding request that we've passed to the lower device layers has a
  * 'pending_req' allocated to it. Each buffer_head that completes decrements
@@ -337,14 +342,9 @@ struct pending_req {
 	unsigned short		operation;
 	int			status;
 	struct list_head	free_list;
-	struct page		*pages[MAX_INDIRECT_SEGMENTS];
-	struct persistent_gnt	*persistent_gnts[MAX_INDIRECT_SEGMENTS];
-	grant_handle_t		grant_handles[MAX_INDIRECT_SEGMENTS];
-	grant_ref_t		grefs[MAX_INDIRECT_SEGMENTS];
+	struct grant_page	*segments[MAX_INDIRECT_SEGMENTS];
 	/* Indirect descriptors */
-	struct persistent_gnt	*indirect_persistent_gnts[MAX_INDIRECT_PAGES];
-	struct page		*indirect_pages[MAX_INDIRECT_PAGES];
-	grant_handle_t		indirect_handles[MAX_INDIRECT_PAGES];
+	struct grant_page	*indirect_pages[MAX_INDIRECT_PAGES];
 	struct seg_buf		seg[MAX_INDIRECT_SEGMENTS];
 	struct bio		*biolist[MAX_INDIRECT_SEGMENTS];
 };
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index afab208c54e3..4a4749c78942 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -105,7 +105,8 @@ static void xen_update_blkif_status(struct xen_blkif *blkif)
 static struct xen_blkif *xen_blkif_alloc(domid_t domid)
 {
 	struct xen_blkif *blkif;
-	int i;
+	struct pending_req *req, *n;
+	int i, j;
 
 	BUILD_BUG_ON(MAX_INDIRECT_PAGES > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
 
@@ -127,22 +128,51 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
 	blkif->free_pages_num = 0;
 	atomic_set(&blkif->persistent_gnt_in_use, 0);
 
-	blkif->pending_reqs = kcalloc(XEN_BLKIF_REQS,
-	                              sizeof(blkif->pending_reqs[0]),
-	                              GFP_KERNEL);
-	if (!blkif->pending_reqs) {
-		kmem_cache_free(xen_blkif_cachep, blkif);
-		return ERR_PTR(-ENOMEM);
-	}
 	INIT_LIST_HEAD(&blkif->pending_free);
+
+	for (i = 0; i < XEN_BLKIF_REQS; i++) {
+		req = kzalloc(sizeof(*req), GFP_KERNEL);
+		if (!req)
+			goto fail;
+		list_add_tail(&req->free_list,
+		              &blkif->pending_free);
+		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
+			req->segments[j] = kzalloc(sizeof(*req->segments[0]),
+			                           GFP_KERNEL);
+			if (!req->segments[j])
+				goto fail;
+		}
+		for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
+			req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]),
+			                                 GFP_KERNEL);
+			if (!req->indirect_pages[j])
+				goto fail;
+		}
+	}
 	spin_lock_init(&blkif->pending_free_lock);
 	init_waitqueue_head(&blkif->pending_free_wq);
 
-	for (i = 0; i < XEN_BLKIF_REQS; i++)
-		list_add_tail(&blkif->pending_reqs[i].free_list,
-			      &blkif->pending_free);
-
 	return blkif;
+
+fail:
+	list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
+		list_del(&req->free_list);
+		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
+			if (!req->segments[j])
+				break;
+			kfree(req->segments[j]);
+		}
+		for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
+			if (!req->indirect_pages[j])
+				break;
+			kfree(req->indirect_pages[j]);
+		}
+		kfree(req);
+	}
+
+	kmem_cache_free(xen_blkif_cachep, blkif);
+
+	return ERR_PTR(-ENOMEM);
 }
 
 static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
@@ -221,18 +251,28 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif)
 
 static void xen_blkif_free(struct xen_blkif *blkif)
 {
-	struct pending_req *req;
-	int i = 0;
+	struct pending_req *req, *n;
+	int i = 0, j;
 
 	if (!atomic_dec_and_test(&blkif->refcnt))
 		BUG();
 
 	/* Check that there is no request in use */
-	list_for_each_entry(req, &blkif->pending_free, free_list)
-		i++;
-	BUG_ON(i != XEN_BLKIF_REQS);
+	list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
+		list_del(&req->free_list);
+
+		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
+			kfree(req->segments[j]);
+
+		for (j = 0; j < MAX_INDIRECT_PAGES; j++)
+			kfree(req->indirect_pages[j]);
+
+		kfree(req);
+		i++;
+	}
+
+	WARN_ON(i != XEN_BLKIF_REQS);
 
-	kfree(blkif->pending_reqs);
 	kmem_cache_free(xen_blkif_cachep, blkif);
 }
 

From b7649158a0d241f8d53d13ff7441858539e16656 Mon Sep 17 00:00:00 2001
From: Roger Pau Monne <roger.pau@citrix.com>
Date: Thu, 2 May 2013 10:58:50 +0200
Subject: [PATCH 009/320] xen-blkfront: use a different scatterlist for each
 request
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In blkif_queue_request blkfront iterates over the scatterlist in order
to set the segments of the request, and in blkif_completion blkfront
iterates over the raw request, which makes it hard to know the exact
position of the source and destination memory positions.

This can be solved by allocating a scatterlist for each request, that
will be keep until the request is finished, allowing us to copy the
data back to the original memory without having to iterate over the
raw request.

Oracle-Bug: 16660413 - LARGE ASYNCHRONOUS READS APPEAR BROKEN ON 2.6.39-400
CC: stable@vger.kernel.org
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Reported-and-Tested-by: Anne Milicia <anne.milicia@oracle.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/block/xen-blkfront.c | 43 +++++++++++++++---------------------
 1 file changed, 18 insertions(+), 25 deletions(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 82d63d5b1750..bac8cf31319b 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -76,6 +76,7 @@ struct blk_shadow {
 	struct request *request;
 	struct grant **grants_used;
 	struct grant **indirect_grants;
+	struct scatterlist *sg;
 };
 
 struct split_bio {
@@ -113,7 +114,6 @@ struct blkfront_info
 	enum blkif_state connected;
 	int ring_ref;
 	struct blkif_front_ring ring;
-	struct scatterlist *sg;
 	unsigned int evtchn, irq;
 	struct request_queue *rq;
 	struct work_struct work;
@@ -438,7 +438,7 @@ static int blkif_queue_request(struct request *req)
 		       req->nr_phys_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
 		BUG_ON(info->max_indirect_segments &&
 		       req->nr_phys_segments > info->max_indirect_segments);
-		nseg = blk_rq_map_sg(req->q, req, info->sg);
+		nseg = blk_rq_map_sg(req->q, req, info->shadow[id].sg);
 		ring_req->u.rw.id = id;
 		if (nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) {
 			/*
@@ -469,7 +469,7 @@ static int blkif_queue_request(struct request *req)
 			}
 			ring_req->u.rw.nr_segments = nseg;
 		}
-		for_each_sg(info->sg, sg, nseg, i) {
+		for_each_sg(info->shadow[id].sg, sg, nseg, i) {
 			fsect = sg->offset >> 9;
 			lsect = fsect + (sg->length >> 9) - 1;
 
@@ -914,8 +914,6 @@ static void blkif_free(struct blkfront_info *info, int suspend)
 	}
 	BUG_ON(info->persistent_gnts_c != 0);
 
-	kfree(info->sg);
-	info->sg = NULL;
 	for (i = 0; i < BLK_RING_SIZE; i++) {
 		/*
 		 * Clear persistent grants present in requests already
@@ -953,6 +951,8 @@ static void blkif_free(struct blkfront_info *info, int suspend)
 		info->shadow[i].grants_used = NULL;
 		kfree(info->shadow[i].indirect_grants);
 		info->shadow[i].indirect_grants = NULL;
+		kfree(info->shadow[i].sg);
+		info->shadow[i].sg = NULL;
 	}
 
 	/* No more gnttab callback work. */
@@ -979,12 +979,9 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
 			     struct blkif_response *bret)
 {
 	int i = 0;
-	struct bio_vec *bvec;
-	struct req_iterator iter;
-	unsigned long flags;
+	struct scatterlist *sg;
 	char *bvec_data;
 	void *shared_data;
-	unsigned int offset = 0;
 	int nseg;
 
 	nseg = s->req.operation == BLKIF_OP_INDIRECT ?
@@ -997,19 +994,16 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
 		 * than PAGE_SIZE, we have to keep track of the current offset,
 		 * to be sure we are copying the data from the right shared page.
 		 */
-		rq_for_each_segment(bvec, s->request, iter) {
-			BUG_ON((bvec->bv_offset + bvec->bv_len) > PAGE_SIZE);
-			if (bvec->bv_offset < offset)
-				i++;
-			BUG_ON(i >= nseg);
+		for_each_sg(s->sg, sg, nseg, i) {
+			BUG_ON(sg->offset + sg->length > PAGE_SIZE);
 			shared_data = kmap_atomic(
 				pfn_to_page(s->grants_used[i]->pfn));
-			bvec_data = bvec_kmap_irq(bvec, &flags);
-			memcpy(bvec_data, shared_data + bvec->bv_offset,
-				bvec->bv_len);
-			bvec_kunmap_irq(bvec_data, &flags);
+			bvec_data = kmap_atomic(sg_page(sg));
+			memcpy(bvec_data   + sg->offset,
+			       shared_data + sg->offset,
+			       sg->length);
+			kunmap_atomic(bvec_data);
 			kunmap_atomic(shared_data);
-			offset = bvec->bv_offset + bvec->bv_len;
 		}
 	}
 	/* Add the persistent grant into the list of free grants */
@@ -1656,10 +1650,6 @@ static int blkfront_setup_indirect(struct blkfront_info *info)
 						  xen_blkif_max_segments);
 		segs = info->max_indirect_segments;
 	}
-	info->sg = kzalloc(sizeof(info->sg[0]) * segs, GFP_KERNEL);
-	if (info->sg == NULL)
-		goto out_of_memory;
-	sg_init_table(info->sg, segs);
 
 	err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE);
 	if (err)
@@ -1669,26 +1659,29 @@ static int blkfront_setup_indirect(struct blkfront_info *info)
 		info->shadow[i].grants_used = kzalloc(
 			sizeof(info->shadow[i].grants_used[0]) * segs,
 			GFP_NOIO);
+		info->shadow[i].sg = kzalloc(sizeof(info->shadow[i].sg[0]) * segs, GFP_NOIO);
 		if (info->max_indirect_segments)
 			info->shadow[i].indirect_grants = kzalloc(
 				sizeof(info->shadow[i].indirect_grants[0]) *
 				INDIRECT_GREFS(segs),
 				GFP_NOIO);
 		if ((info->shadow[i].grants_used == NULL) ||
+			(info->shadow[i].sg == NULL) ||
 		     (info->max_indirect_segments &&
 		     (info->shadow[i].indirect_grants == NULL)))
 			goto out_of_memory;
+		sg_init_table(info->shadow[i].sg, segs);
 	}
 
 
 	return 0;
 
 out_of_memory:
-	kfree(info->sg);
-	info->sg = NULL;
 	for (i = 0; i < BLK_RING_SIZE; i++) {
 		kfree(info->shadow[i].grants_used);
 		info->shadow[i].grants_used = NULL;
+		kfree(info->shadow[i].sg);
+		info->shadow[i].sg = NULL;
 		kfree(info->shadow[i].indirect_grants);
 		info->shadow[i].indirect_grants = NULL;
 	}

From 2d5dc3ba853344f39a41ae5bdb0a337b2ecaafa6 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 15 May 2013 10:39:34 -0400
Subject: [PATCH 010/320] xen-blkfront: Introduce a 'max' module parameter to
 alter the amount of indirect segments.

The max module parameter (by default 32) is the maximum number of
segments that the frontend will negotiate with the backend for indirect
descriptors.  Higher value means more potential throughput but more
memory usage. The backend picks the minimum of the frontend and its
default backend value.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 Documentation/ABI/testing/sysfs-driver-xen-blkfront | 10 ++++++++++
 drivers/block/xen-blkfront.c                        |  2 ++
 2 files changed, 12 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-driver-xen-blkfront

diff --git a/Documentation/ABI/testing/sysfs-driver-xen-blkfront b/Documentation/ABI/testing/sysfs-driver-xen-blkfront
new file mode 100644
index 000000000000..c0a6cb7eb314
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-xen-blkfront
@@ -0,0 +1,10 @@
+What:           /sys/module/xen_blkfront/parameters/max
+Date:           June 2013
+KernelVersion:  3.11
+Contact:        Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+                Maximum number of segments that the frontend will negotiate
+                with the backend for indirect descriptors. The default value
+                is 32 - higher value means more potential throughput but more
+                memory usage. The backend picks the minimum of the frontend
+                and its default backend value.
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index bac8cf31319b..08bdfc3b2de2 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -95,6 +95,8 @@ static const struct block_device_operations xlvbd_block_fops;
  */
 
 static unsigned int xen_blkif_max_segments = 32;
+module_param_named(max, xen_blkif_max_segments, int, S_IRUGO);
+MODULE_PARM_DESC(max, "Maximum amount of segments in indirect requests (default is 32)");
 
 #define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
 

From 1d1996509cd3f90551f4460a68aaf3dc940a05e3 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Tue, 4 Jun 2013 09:32:41 -0400
Subject: [PATCH 011/320] xen-blkback/sysfs: Move the parameters for the
 persistent grant features

to a testing subdirectory (as this value should not be baked
for the life-time) and also in an appropiate file.

Also modified the introduction Linux version from 3.10 to 3.11.
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 Documentation/ABI/stable/sysfs-bus-xen-backend | 18 ------------------
 .../ABI/testing/sysfs-driver-xen-blkback       | 17 +++++++++++++++++
 2 files changed, 17 insertions(+), 18 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-driver-xen-blkback

diff --git a/Documentation/ABI/stable/sysfs-bus-xen-backend b/Documentation/ABI/stable/sysfs-bus-xen-backend
index 947db11350bc..3d5951c8bf5f 100644
--- a/Documentation/ABI/stable/sysfs-bus-xen-backend
+++ b/Documentation/ABI/stable/sysfs-bus-xen-backend
@@ -73,21 +73,3 @@ KernelVersion:	3.0
 Contact:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
 Description:
                 Number of sectors written by the frontend.
-
-What:           /sys/module/xen_blkback/parameters/max_buffer_pages
-Date:           March 2013
-KernelVersion:  3.10
-Contact:        Roger Pau Monné <roger.pau@citrix.com>
-Description:
-                Maximum number of free pages to keep in each block
-                backend buffer.
-
-What:           /sys/module/xen_blkback/parameters/max_persistent_grants
-Date:           March 2013
-KernelVersion:  3.10
-Contact:        Roger Pau Monné <roger.pau@citrix.com>
-Description:
-                Maximum number of grants to map persistently in
-                blkback. If the frontend tries to use more than
-                max_persistent_grants, the LRU kicks in and starts
-                removing 5% of max_persistent_grants every 100ms.
diff --git a/Documentation/ABI/testing/sysfs-driver-xen-blkback b/Documentation/ABI/testing/sysfs-driver-xen-blkback
new file mode 100644
index 000000000000..8bb43b66eb55
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-xen-blkback
@@ -0,0 +1,17 @@
+What:           /sys/module/xen_blkback/parameters/max_buffer_pages
+Date:           March 2013
+KernelVersion:  3.11
+Contact:        Roger Pau Monné <roger.pau@citrix.com>
+Description:
+                Maximum number of free pages to keep in each block
+                backend buffer.
+
+What:           /sys/module/xen_blkback/parameters/max_persistent_grants
+Date:           March 2013
+KernelVersion:  3.11
+Contact:        Roger Pau Monné <roger.pau@citrix.com>
+Description:
+                Maximum number of grants to map persistently in
+                blkback. If the frontend tries to use more than
+                max_persistent_grants, the LRU kicks in and starts
+                removing 5% of max_persistent_grants every 100ms.

From 7c4d7d710f7eb499ec483f25acc28b53adaa3260 Mon Sep 17 00:00:00 2001
From: Stefan Bader <stefan.bader@canonical.com>
Date: Mon, 13 May 2013 16:28:15 +0200
Subject: [PATCH 012/320] xen/blkback: Use physical sector size for setup

Currently xen-blkback passes the logical sector size over xenbus and
xen-blkfront sets up the paravirt disk with that logical block size.
But newer drives usually have the logical sector size set to 512 for
compatibility reasons and would show the actual sector size only in
physical sector size.
This results in the device being partitioned and accessed in dom0 with
the correct sector size, but the guest thinks 512 bytes is the correct
block size. And that results in poor performance.

To fix this, blkback gets modified to pass also physical-sector-size
over xenbus and blkfront to use both values to set up the paravirt
disk. I did not just change the passed in sector-size because I am
not sure having a bigger logical sector size than the physical one
is valid (and that would happen if a newer dom0 kernel hits an older
domU kernel). Also this way a domU set up before should still be
accessible (just some tools might detect the unaligned setup).

[v2: Make xenbus write failure non-fatal]
[v3: Use xenbus_scanf instead of xenbus_gather]
[v4: Rebased against segment changes]

Signed-off-by: Stefan Bader <stefan.bader@canonical.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/block/xen-blkback/xenbus.c |  5 +++++
 drivers/block/xen-blkfront.c       | 21 ++++++++++++++++++---
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 4a4749c78942..7b06f943371c 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -782,6 +782,11 @@ static void connect(struct backend_info *be)
 				 dev->nodename);
 		goto abort;
 	}
+	err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u",
+			    bdev_physical_block_size(be->blkif->vbd.bdev));
+	if (err)
+		xenbus_dev_error(dev, err, "writing %s/physical-sector-size",
+				 dev->nodename);
 
 	err = xenbus_transaction_end(xbt, 0);
 	if (err == -EAGAIN)
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 08bdfc3b2de2..1a0f67c10ec7 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -609,6 +609,7 @@ static void do_blkif_request(struct request_queue *rq)
 }
 
 static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
+				unsigned int physical_sector_size,
 				unsigned int segments)
 {
 	struct request_queue *rq;
@@ -631,6 +632,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
 
 	/* Hard sector size and max sectors impersonate the equiv. hardware. */
 	blk_queue_logical_block_size(rq, sector_size);
+	blk_queue_physical_block_size(rq, physical_sector_size);
 	blk_queue_max_hw_sectors(rq, 512);
 
 	/* Each segment in a request is up to an aligned page in size. */
@@ -737,7 +739,8 @@ static char *encode_disk_name(char *ptr, unsigned int n)
 
 static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
 			       struct blkfront_info *info,
-			       u16 vdisk_info, u16 sector_size)
+			       u16 vdisk_info, u16 sector_size,
+			       unsigned int physical_sector_size)
 {
 	struct gendisk *gd;
 	int nr_minors = 1;
@@ -804,7 +807,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
 	gd->driverfs_dev = &(info->xbdev->dev);
 	set_capacity(gd, capacity);
 
-	if (xlvbd_init_blk_queue(gd, sector_size,
+	if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size,
 				 info->max_indirect_segments ? :
 				 BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
 		del_gendisk(gd);
@@ -1698,6 +1701,7 @@ static void blkfront_connect(struct blkfront_info *info)
 {
 	unsigned long long sectors;
 	unsigned long sector_size;
+	unsigned int physical_sector_size;
 	unsigned int binfo;
 	int err;
 	int barrier, flush, discard, persistent;
@@ -1747,6 +1751,16 @@ static void blkfront_connect(struct blkfront_info *info)
 		return;
 	}
 
+	/*
+	 * physcial-sector-size is a newer field, so old backends may not
+	 * provide this. Assume physical sector size to be the same as
+	 * sector_size in that case.
+	 */
+	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+			   "physical-sector-size", "%u", &physical_sector_size);
+	if (err != 1)
+		physical_sector_size = sector_size;
+
 	info->feature_flush = 0;
 	info->flush_op = 0;
 
@@ -1800,7 +1814,8 @@ static void blkfront_connect(struct blkfront_info *info)
 		return;
 	}
 
-	err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
+	err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size,
+				  physical_sector_size);
 	if (err) {
 		xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
 				 info->xbdev->otherend);

From 604c499cbbcc3d5fe5fb8d53306aa0fae1990109 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 16 Jan 2013 11:33:52 -0500
Subject: [PATCH 013/320] xen/blkback: Check device permissions before allowing
 OP_DISCARD

We need to make sure that the device is not RO or that
the request is not past the number of sectors we want to
issue the DISCARD operation for.

This fixes CVE-2013-2140.

Cc: stable@vger.kernel.org
Acked-by: Jan Beulich <JBeulich@suse.com>
Acked-by: Ian Campbell <Ian.Campbell@citrix.com>
[v1: Made it pr_warn instead of pr_debug]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/block/xen-blkback/blkback.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index e79ab4559233..4119bcdefd1a 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -876,7 +876,18 @@ static int dispatch_discard_io(struct xen_blkif *blkif,
 	int status = BLKIF_RSP_OKAY;
 	struct block_device *bdev = blkif->vbd.bdev;
 	unsigned long secure;
+	struct phys_req preq;
 
+	preq.sector_number = req->u.discard.sector_number;
+	preq.nr_sects      = req->u.discard.nr_sectors;
+
+	err = xen_vbd_translate(&preq, blkif, WRITE);
+	if (err) {
+		pr_warn(DRV_PFX "access denied: DISCARD [%llu->%llu] on dev=%04x\n",
+			preq.sector_number,
+			preq.sector_number + preq.nr_sects, blkif->vbd.pdevice);
+		goto fail_response;
+	}
 	blkif->st_ds_req++;
 
 	xen_blkif_get(blkif);
@@ -887,7 +898,7 @@ static int dispatch_discard_io(struct xen_blkif *blkif,
 	err = blkdev_issue_discard(bdev, req->u.discard.sector_number,
 				   req->u.discard.nr_sectors,
 				   GFP_KERNEL, secure);
-
+fail_response:
 	if (err == -EOPNOTSUPP) {
 		pr_debug(DRV_PFX "discard op failed, not supported\n");
 		status = BLKIF_RSP_EOPNOTSUPP;

From 8d9256906a97c24e97e016482b9be06ea2532b05 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Mon, 17 Jun 2013 15:16:33 -0400
Subject: [PATCH 014/320] xen/io/ring.h: new macro to detect whether there are
 too many requests on the ring

Backends may need to protect themselves against an insane number of
produced requests stored by a frontend, in case they iterate over
requests until reaching the req_prod value. There can't be more
requests on the ring than the difference between produced requests
and produced (but possibly not yet published) responses.

This is a more strict alternative to a patch previously posted by
Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 include/xen/interface/io/ring.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h
index 75271b9a8f61..7d28aff605c7 100644
--- a/include/xen/interface/io/ring.h
+++ b/include/xen/interface/io/ring.h
@@ -188,6 +188,11 @@ struct __name##_back_ring {						\
 #define RING_REQUEST_CONS_OVERFLOW(_r, _cons)				\
     (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r))
 
+/* Ill-behaved frontend determination: Can there be this many requests? */
+#define RING_REQUEST_PROD_OVERFLOW(_r, _prod)               \
+    (((_prod) - (_r)->rsp_prod_pvt) > RING_SIZE(_r))
+
+
 #define RING_PUSH_REQUESTS(_r) do {					\
     wmb(); /* back sees requests /before/ updated producer index */	\
     (_r)->sring->req_prod = (_r)->req_prod_pvt;				\

From 8e3f8755545cc4a7f4da8e9ef76d6d32e0dca576 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 23 Jan 2013 16:54:32 -0500
Subject: [PATCH 015/320] xen/blkback: Check for insane amounts of request on
 the ring (v6).

Check that the ring does not have an insane amount of requests
(more than there could fit on the ring).

If we detect this case we will stop processing the requests
and wait until the XenBus disconnects the ring.

The existing check RING_REQUEST_CONS_OVERFLOW which checks for how
many responses we have created in the past (rsp_prod_pvt) vs
requests consumed (req_cons) and whether said difference is greater or
equal to the size of the ring, does not catch this case.

Wha the condition does check if there is a need to process more
as we still have a backlog of responses to finish. Note that both
of those values (rsp_prod_pvt and req_cons) are not exposed on the
shared ring.

To understand this problem a mini crash course in ring protocol
response/request updates is in place.

There are four entries: req_prod and rsp_prod; req_event and rsp_event
to track the ring entries. We are only concerned about the first two -
which set the tone of this bug.

The req_prod is a value incremented by frontend for each request put
on the ring. Conversely the rsp_prod is a value incremented by the backend
for each response put on the ring (rsp_prod gets set by rsp_prod_pvt when
pushing the responses on the ring).  Both values can
wrap and are modulo the size of the ring (in block case that is 32).
Please see RING_GET_REQUEST and RING_GET_RESPONSE for the more details.

The culprit here is that if the difference between the
req_prod and req_cons is greater than the ring size we have a problem.
Fortunately for us, the '__do_block_io_op' loop:

	rc = blk_rings->common.req_cons;
	rp = blk_rings->common.sring->req_prod;

	while (rc != rp) {

		..
		blk_rings->common.req_cons = ++rc; /* before make_response() */

	}

will loop up to the point when rc == rp. The macros inside of the
loop (RING_GET_REQUEST) is smart and is indexing based on the modulo
of the ring size. If the frontend has provided a bogus req_prod value
we will loop until the 'rc == rp' - which means we could be processing
already processed requests (or responses) often.

The reason the RING_REQUEST_CONS_OVERFLOW is not helping here is
b/c it only tracks how many responses we have internally produced
and whether we would should process more. The astute reader will
notice that the macro RING_REQUEST_CONS_OVERFLOW provides two
arguments - more on this later.

For example, if we were to enter this function with these values:

       	blk_rings->common.sring->req_prod =  X+31415 (X is the value from
		the last time __do_block_io_op was called).
        blk_rings->common.req_cons = X
        blk_rings->common.rsp_prod_pvt = X

The RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, blk_rings->common.req_cons)
is doing:

	req_cons - rsp_prod_pvt >= 32

Which is,
	X - X >= 32 or 0 >= 32

And that is false, so we continue on looping (this bug).

If we re-use said macro RING_REQUEST_CONS_OVERFLOW and pass in the rp
instead (sring->req_prod) of rc, the this macro can do the check:

     req_prod - rsp_prov_pvt >= 32

Which is,
       X + 31415 - X >= 32 , or 31415 >= 32

which is true, so we can error out and break out of the function.

Unfortunatly the difference between rsp_prov_pvt and req_prod can be
at 32 (which would error out in the macro). This condition exists when
the backend is lagging behind with the responses and still has not finished
responding to all of them (so make_response has not been called), and
the rsp_prov_pvt + 32 == req_cons. This ends up with us not being able
to use said macro.

Hence introducing a new macro called RING_REQUEST_PROD_OVERFLOW which does
a simple check of:

    req_prod - rsp_prod_pvt > RING_SIZE

And with the X values from above:

   X + 31415 - X > 32

Returns true. Also not that if the ring is full (which is where
the RING_REQUEST_CONS_OVERFLOW triggered), we would not hit the
same condition:

   X + 32 - X > 32

Which is false.

Lets use that macro.
Note that in v5 of this patchset the macro was different - we used an
earlier version.

Cc: stable@vger.kernel.org
[v1: Move the check outside the loop]
[v2: Add a pr_warn as suggested by David]
[v3: Use RING_REQUEST_CONS_OVERFLOW as suggested by Jan]
[v4: Move wake_up after kthread_stop as suggested by Jan]
[v5: Use RING_REQUEST_PROD_OVERFLOW instead]
[v6: Use RING_REQUEST_PROD_OVERFLOW - Jan's version]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>

gadsa
---
 drivers/block/xen-blkback/blkback.c | 13 ++++++++++++-
 drivers/block/xen-blkback/common.h  |  2 ++
 drivers/block/xen-blkback/xenbus.c  |  2 ++
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 4119bcdefd1a..ea158fe0c9a4 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -571,6 +571,7 @@ int xen_blkif_schedule(void *arg)
 	struct xen_blkif *blkif = arg;
 	struct xen_vbd *vbd = &blkif->vbd;
 	unsigned long timeout;
+	int ret;
 
 	xen_blkif_get(blkif);
 
@@ -599,8 +600,12 @@ int xen_blkif_schedule(void *arg)
 		blkif->waiting_reqs = 0;
 		smp_mb(); /* clear flag *before* checking for work */
 
-		if (do_block_io_op(blkif))
+		ret = do_block_io_op(blkif);
+		if (ret > 0)
 			blkif->waiting_reqs = 1;
+		if (ret == -EACCES)
+			wait_event_interruptible(blkif->shutdown_wq,
+						 kthread_should_stop());
 
 purge_gnt_list:
 		if (blkif->vbd.feature_gnt_persistent &&
@@ -1009,6 +1014,12 @@ __do_block_io_op(struct xen_blkif *blkif)
 	rp = blk_rings->common.sring->req_prod;
 	rmb(); /* Ensure we see queued requests up to 'rp'. */
 
+	if (RING_REQUEST_PROD_OVERFLOW(&blk_rings->common, rp)) {
+		rc = blk_rings->common.rsp_prod_pvt;
+		pr_warn(DRV_PFX "Frontend provided bogus ring requests (%d - %d = %d). Halting ring processing on dev=%04x\n",
+			rp, rc, rp - rc, blkif->vbd.pdevice);
+		return -EACCES;
+	}
 	while (rc != rp) {
 
 		if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index c6b4cb9af6c2..8d8807563d99 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -314,6 +314,8 @@ struct xen_blkif {
 	unsigned long long			st_wr_sect;
 
 	wait_queue_head_t	waiting_to_free;
+	/* Thread shutdown wait queue. */
+	wait_queue_head_t	shutdown_wq;
 };
 
 struct seg_buf {
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 7b06f943371c..2e5b69d612ac 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -151,6 +151,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
 	}
 	spin_lock_init(&blkif->pending_free_lock);
 	init_waitqueue_head(&blkif->pending_free_wq);
+	init_waitqueue_head(&blkif->shutdown_wq);
 
 	return blkif;
 
@@ -231,6 +232,7 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif)
 {
 	if (blkif->xenblkd) {
 		kthread_stop(blkif->xenblkd);
+		wake_up(&blkif->shutdown_wq);
 		blkif->xenblkd = NULL;
 	}
 

From a3299ab18591d36ad5622f5064619123c439b779 Mon Sep 17 00:00:00 2001
From: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Date: Tue, 18 Jun 2013 14:34:54 -0500
Subject: [PATCH 016/320] rsxx: Individual workqueues for interruptible events.

Giving all interrupt based events their own workqueue to complete
tasks on. This fixes a bug that would cause creg commands to timeout
if too many are issued at once.

Signed-off-by: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/rsxx/core.c      | 23 ++++++++++++++++++++---
 drivers/block/rsxx/cregs.c     |  5 +++++
 drivers/block/rsxx/rsxx_priv.h |  2 ++
 3 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c
index 5af21f2db29c..774f810c6a9c 100644
--- a/drivers/block/rsxx/core.c
+++ b/drivers/block/rsxx/core.c
@@ -163,12 +163,13 @@ static irqreturn_t rsxx_isr(int irq, void *pdata)
 		}
 
 		if (isr & CR_INTR_CREG) {
-			schedule_work(&card->creg_ctrl.done_work);
+			queue_work(card->creg_ctrl.creg_wq,
+				   &card->creg_ctrl.done_work);
 			handled++;
 		}
 
 		if (isr & CR_INTR_EVENT) {
-			schedule_work(&card->event_work);
+			queue_work(card->event_wq, &card->event_work);
 			rsxx_disable_ier_and_isr(card, CR_INTR_EVENT);
 			handled++;
 		}
@@ -610,7 +611,11 @@ static int rsxx_pci_probe(struct pci_dev *dev,
 	}
 
 	/************* Setup Processor Command Interface *************/
-	rsxx_creg_setup(card);
+	st = rsxx_creg_setup(card);
+	if (st) {
+		dev_err(CARD_TO_DEV(card), "Failed to setup creg interface.\n");
+		goto failed_creg_setup;
+	}
 
 	spin_lock_irq(&card->irq_lock);
 	rsxx_enable_ier_and_isr(card, CR_INTR_CREG);
@@ -650,6 +655,12 @@ static int rsxx_pci_probe(struct pci_dev *dev,
 	}
 
 	/************* Setup Card Event Handler *************/
+	card->event_wq = create_singlethread_workqueue(DRIVER_NAME"_event");
+	if (!card->event_wq) {
+		dev_err(CARD_TO_DEV(card), "Failed card event setup.\n");
+		goto failed_event_handler;
+	}
+
 	INIT_WORK(&card->event_work, card_event_handler);
 
 	st = rsxx_setup_dev(card);
@@ -688,9 +699,15 @@ static int rsxx_pci_probe(struct pci_dev *dev,
 	return 0;
 
 failed_create_dev:
+	destroy_workqueue(card->event_wq);
+	card->event_wq = NULL;
+failed_event_handler:
 	rsxx_dma_destroy(card);
 failed_dma_setup:
 failed_compatiblity_check:
+	destroy_workqueue(card->creg_ctrl.creg_wq);
+	card->creg_ctrl.creg_wq = NULL;
+failed_creg_setup:
 	spin_lock_irq(&card->irq_lock);
 	rsxx_disable_ier_and_isr(card, CR_INTR_ALL);
 	spin_unlock_irq(&card->irq_lock);
diff --git a/drivers/block/rsxx/cregs.c b/drivers/block/rsxx/cregs.c
index 4b5c020a0a65..4914464c19fc 100644
--- a/drivers/block/rsxx/cregs.c
+++ b/drivers/block/rsxx/cregs.c
@@ -727,6 +727,11 @@ int rsxx_creg_setup(struct rsxx_cardinfo *card)
 {
 	card->creg_ctrl.active_cmd = NULL;
 
+	card->creg_ctrl.creg_wq =
+			create_singlethread_workqueue(DRIVER_NAME"_creg");
+	if (!card->creg_ctrl.creg_wq)
+		return -ENOMEM;
+
 	INIT_WORK(&card->creg_ctrl.done_work, creg_cmd_done);
 	mutex_init(&card->creg_ctrl.reset_lock);
 	INIT_LIST_HEAD(&card->creg_ctrl.queue);
diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h
index 382e8bf5c03b..0dd62d966772 100644
--- a/drivers/block/rsxx/rsxx_priv.h
+++ b/drivers/block/rsxx/rsxx_priv.h
@@ -134,6 +134,7 @@ struct rsxx_cardinfo {
 		spinlock_t		lock;
 		bool			active;
 		struct creg_cmd		*active_cmd;
+		struct workqueue_struct	*creg_wq;
 		struct work_struct	done_work;
 		struct list_head	queue;
 		unsigned int		q_depth;
@@ -154,6 +155,7 @@ struct rsxx_cardinfo {
 		int buf_len;
 	} log;
 
+	struct workqueue_struct	*event_wq;
 	struct work_struct	event_work;
 	unsigned int		state;
 	u64			size8;

From 0ab4743ebc18c23bddf3e288cfc6221ec71533ac Mon Sep 17 00:00:00 2001
From: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Date: Tue, 18 Jun 2013 14:36:26 -0500
Subject: [PATCH 017/320] rsxx: Restructured DMA cancel scheme.

Before, DMAs would never be cancelled if there was a data stall
or an EEH Permenant failure which would cause an unrecoverable
I/O hang.

The DMA cancellation mechanism has been modified to fix
these issues and allows DMAs to be cancelled during the
above mentioned events.

Signed-off-by: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/rsxx/core.c      |  17 +++-
 drivers/block/rsxx/dev.c       |   6 +-
 drivers/block/rsxx/dma.c       | 161 +++++++++++++++------------------
 drivers/block/rsxx/rsxx_priv.h |   4 +-
 4 files changed, 95 insertions(+), 93 deletions(-)

diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c
index 774f810c6a9c..aca3f198e5cd 100644
--- a/drivers/block/rsxx/core.c
+++ b/drivers/block/rsxx/core.c
@@ -368,15 +368,26 @@ static void rsxx_eeh_failure(struct pci_dev *dev)
 {
 	struct rsxx_cardinfo *card = pci_get_drvdata(dev);
 	int i;
+	int cnt = 0;
 
 	dev_err(&dev->dev, "IBM FlashSystem PCI: disabling failed card.\n");
 
 	card->eeh_state = 1;
+	card->halt = 1;
 
-	for (i = 0; i < card->n_targets; i++)
-		del_timer_sync(&card->ctrl[i].activity_timer);
+	for (i = 0; i < card->n_targets; i++) {
+		spin_lock_bh(&card->ctrl[i].queue_lock);
+		cnt = rsxx_cleanup_dma_queue(&card->ctrl[i],
+					     &card->ctrl[i].queue);
+		spin_unlock_bh(&card->ctrl[i].queue_lock);
 
-	rsxx_eeh_cancel_dmas(card);
+		cnt += rsxx_dma_cancel(&card->ctrl[i]);
+
+		if (cnt)
+			dev_info(CARD_TO_DEV(card),
+				"Freed %d queued DMAs on channel %d\n",
+				cnt, card->ctrl[i].id);
+	}
 }
 
 static int rsxx_eeh_fifo_flush_poll(struct rsxx_cardinfo *card)
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index 4346d17d2949..604ad2dafa43 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -155,7 +155,8 @@ static void bio_dma_done_cb(struct rsxx_cardinfo *card,
 		atomic_set(&meta->error, 1);
 
 	if (atomic_dec_and_test(&meta->pending_dmas)) {
-		disk_stats_complete(card, meta->bio, meta->start_time);
+		if (!card->eeh_state && card->gendisk)
+			disk_stats_complete(card, meta->bio, meta->start_time);
 
 		bio_endio(meta->bio, atomic_read(&meta->error) ? -EIO : 0);
 		kmem_cache_free(bio_meta_pool, meta);
@@ -196,7 +197,8 @@ static void rsxx_make_request(struct request_queue *q, struct bio *bio)
 	atomic_set(&bio_meta->pending_dmas, 0);
 	bio_meta->start_time = jiffies;
 
-	disk_stats_start(card, bio);
+	if (!unlikely(card->halt))
+		disk_stats_start(card, bio);
 
 	dev_dbg(CARD_TO_DEV(card), "BIO[%c]: meta: %p addr8: x%llx size: %d\n",
 		 bio_data_dir(bio) ? 'W' : 'R', bio_meta,
diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c
index 0607513cfb41..213e40e4bd92 100644
--- a/drivers/block/rsxx/dma.c
+++ b/drivers/block/rsxx/dma.c
@@ -245,6 +245,22 @@ static void rsxx_complete_dma(struct rsxx_dma_ctrl *ctrl,
 	kmem_cache_free(rsxx_dma_pool, dma);
 }
 
+int rsxx_cleanup_dma_queue(struct rsxx_dma_ctrl *ctrl,
+			   struct list_head *q)
+{
+	struct rsxx_dma *dma;
+	struct rsxx_dma *tmp;
+	int cnt = 0;
+
+	list_for_each_entry_safe(dma, tmp, q, list) {
+		list_del(&dma->list);
+		rsxx_complete_dma(ctrl, dma, DMA_CANCELLED);
+		cnt++;
+	}
+
+	return cnt;
+}
+
 static void rsxx_requeue_dma(struct rsxx_dma_ctrl *ctrl,
 				 struct rsxx_dma *dma)
 {
@@ -252,9 +268,9 @@ static void rsxx_requeue_dma(struct rsxx_dma_ctrl *ctrl,
 	 * Requeued DMAs go to the front of the queue so they are issued
 	 * first.
 	 */
-	spin_lock(&ctrl->queue_lock);
+	spin_lock_bh(&ctrl->queue_lock);
 	list_add(&dma->list, &ctrl->queue);
-	spin_unlock(&ctrl->queue_lock);
+	spin_unlock_bh(&ctrl->queue_lock);
 }
 
 static void rsxx_handle_dma_error(struct rsxx_dma_ctrl *ctrl,
@@ -329,6 +345,7 @@ static void rsxx_handle_dma_error(struct rsxx_dma_ctrl *ctrl,
 static void dma_engine_stalled(unsigned long data)
 {
 	struct rsxx_dma_ctrl *ctrl = (struct rsxx_dma_ctrl *)data;
+	int cnt;
 
 	if (atomic_read(&ctrl->stats.hw_q_depth) == 0 ||
 	    unlikely(ctrl->card->eeh_state))
@@ -349,6 +366,18 @@ static void dma_engine_stalled(unsigned long data)
 			"DMA channel %d has stalled, faulting interface.\n",
 			ctrl->id);
 		ctrl->card->dma_fault = 1;
+
+		/* Clean up the DMA queue */
+		spin_lock(&ctrl->queue_lock);
+		cnt = rsxx_cleanup_dma_queue(ctrl, &ctrl->queue);
+		spin_unlock(&ctrl->queue_lock);
+
+		cnt += rsxx_dma_cancel(ctrl);
+
+		if (cnt)
+			dev_info(CARD_TO_DEV(ctrl->card),
+				"Freed %d queued DMAs on channel %d\n",
+				cnt, ctrl->id);
 	}
 }
 
@@ -368,22 +397,22 @@ static void rsxx_issue_dmas(struct work_struct *work)
 		return;
 
 	while (1) {
-		spin_lock(&ctrl->queue_lock);
+		spin_lock_bh(&ctrl->queue_lock);
 		if (list_empty(&ctrl->queue)) {
-			spin_unlock(&ctrl->queue_lock);
+			spin_unlock_bh(&ctrl->queue_lock);
 			break;
 		}
-		spin_unlock(&ctrl->queue_lock);
+		spin_unlock_bh(&ctrl->queue_lock);
 
 		tag = pop_tracker(ctrl->trackers);
 		if (tag == -1)
 			break;
 
-		spin_lock(&ctrl->queue_lock);
+		spin_lock_bh(&ctrl->queue_lock);
 		dma = list_entry(ctrl->queue.next, struct rsxx_dma, list);
 		list_del(&dma->list);
 		ctrl->stats.sw_q_depth--;
-		spin_unlock(&ctrl->queue_lock);
+		spin_unlock_bh(&ctrl->queue_lock);
 
 		/*
 		 * This will catch any DMAs that slipped in right before the
@@ -520,33 +549,10 @@ static void rsxx_dma_done(struct work_struct *work)
 	rsxx_enable_ier(ctrl->card, CR_INTR_DMA(ctrl->id));
 	spin_unlock_irqrestore(&ctrl->card->irq_lock, flags);
 
-	spin_lock(&ctrl->queue_lock);
+	spin_lock_bh(&ctrl->queue_lock);
 	if (ctrl->stats.sw_q_depth)
 		queue_work(ctrl->issue_wq, &ctrl->issue_dma_work);
-	spin_unlock(&ctrl->queue_lock);
-}
-
-static int rsxx_cleanup_dma_queue(struct rsxx_cardinfo *card,
-				      struct list_head *q)
-{
-	struct rsxx_dma *dma;
-	struct rsxx_dma *tmp;
-	int cnt = 0;
-
-	list_for_each_entry_safe(dma, tmp, q, list) {
-		list_del(&dma->list);
-
-		if (dma->dma_addr)
-			pci_unmap_page(card->dev, dma->dma_addr,
-				       get_dma_size(dma),
-				       (dma->cmd == HW_CMD_BLK_WRITE) ?
-				       PCI_DMA_TODEVICE :
-				       PCI_DMA_FROMDEVICE);
-		kmem_cache_free(rsxx_dma_pool, dma);
-		cnt++;
-	}
-
-	return cnt;
+	spin_unlock_bh(&ctrl->queue_lock);
 }
 
 static int rsxx_queue_discard(struct rsxx_cardinfo *card,
@@ -698,10 +704,10 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
 
 	for (i = 0; i < card->n_targets; i++) {
 		if (!list_empty(&dma_list[i])) {
-			spin_lock(&card->ctrl[i].queue_lock);
+			spin_lock_bh(&card->ctrl[i].queue_lock);
 			card->ctrl[i].stats.sw_q_depth += dma_cnt[i];
 			list_splice_tail(&dma_list[i], &card->ctrl[i].queue);
-			spin_unlock(&card->ctrl[i].queue_lock);
+			spin_unlock_bh(&card->ctrl[i].queue_lock);
 
 			queue_work(card->ctrl[i].issue_wq,
 				   &card->ctrl[i].issue_dma_work);
@@ -711,8 +717,11 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
 	return 0;
 
 bvec_err:
-	for (i = 0; i < card->n_targets; i++)
-		rsxx_cleanup_dma_queue(card, &dma_list[i]);
+	for (i = 0; i < card->n_targets; i++) {
+		spin_lock_bh(&card->ctrl[i].queue_lock);
+		rsxx_cleanup_dma_queue(&card->ctrl[i], &dma_list[i]);
+		spin_unlock_bh(&card->ctrl[i].queue_lock);
+	}
 
 	return st;
 }
@@ -918,13 +927,30 @@ int rsxx_dma_setup(struct rsxx_cardinfo *card)
 	return st;
 }
 
+int rsxx_dma_cancel(struct rsxx_dma_ctrl *ctrl)
+{
+	struct rsxx_dma *dma;
+	int i;
+	int cnt = 0;
+
+	/* Clean up issued DMAs */
+	for (i = 0; i < RSXX_MAX_OUTSTANDING_CMDS; i++) {
+		dma = get_tracker_dma(ctrl->trackers, i);
+		if (dma) {
+			atomic_dec(&ctrl->stats.hw_q_depth);
+			rsxx_complete_dma(ctrl, dma, DMA_CANCELLED);
+			push_tracker(ctrl->trackers, i);
+			cnt++;
+		}
+	}
+
+	return cnt;
+}
 
 void rsxx_dma_destroy(struct rsxx_cardinfo *card)
 {
 	struct rsxx_dma_ctrl *ctrl;
-	struct rsxx_dma *dma;
-	int i, j;
-	int cnt = 0;
+	int i;
 
 	for (i = 0; i < card->n_targets; i++) {
 		ctrl = &card->ctrl[i];
@@ -943,33 +969,11 @@ void rsxx_dma_destroy(struct rsxx_cardinfo *card)
 			del_timer_sync(&ctrl->activity_timer);
 
 		/* Clean up the DMA queue */
-		spin_lock(&ctrl->queue_lock);
-		cnt = rsxx_cleanup_dma_queue(card, &ctrl->queue);
-		spin_unlock(&ctrl->queue_lock);
+		spin_lock_bh(&ctrl->queue_lock);
+		rsxx_cleanup_dma_queue(ctrl, &ctrl->queue);
+		spin_unlock_bh(&ctrl->queue_lock);
 
-		if (cnt)
-			dev_info(CARD_TO_DEV(card),
-				"Freed %d queued DMAs on channel %d\n",
-				cnt, i);
-
-		/* Clean up issued DMAs */
-		for (j = 0; j < RSXX_MAX_OUTSTANDING_CMDS; j++) {
-			dma = get_tracker_dma(ctrl->trackers, j);
-			if (dma) {
-				pci_unmap_page(card->dev, dma->dma_addr,
-					       get_dma_size(dma),
-					       (dma->cmd == HW_CMD_BLK_WRITE) ?
-					       PCI_DMA_TODEVICE :
-					       PCI_DMA_FROMDEVICE);
-				kmem_cache_free(rsxx_dma_pool, dma);
-				cnt++;
-			}
-		}
-
-		if (cnt)
-			dev_info(CARD_TO_DEV(card),
-				"Freed %d pending DMAs on channel %d\n",
-				cnt, i);
+		rsxx_dma_cancel(ctrl);
 
 		vfree(ctrl->trackers);
 
@@ -1013,7 +1017,7 @@ int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card)
 			cnt++;
 		}
 
-		spin_lock(&card->ctrl[i].queue_lock);
+		spin_lock_bh(&card->ctrl[i].queue_lock);
 		list_splice(&issued_dmas[i], &card->ctrl[i].queue);
 
 		atomic_sub(cnt, &card->ctrl[i].stats.hw_q_depth);
@@ -1028,7 +1032,7 @@ int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card)
 					       PCI_DMA_TODEVICE :
 					       PCI_DMA_FROMDEVICE);
 		}
-		spin_unlock(&card->ctrl[i].queue_lock);
+		spin_unlock_bh(&card->ctrl[i].queue_lock);
 	}
 
 	kfree(issued_dmas);
@@ -1036,30 +1040,13 @@ int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card)
 	return 0;
 }
 
-void rsxx_eeh_cancel_dmas(struct rsxx_cardinfo *card)
-{
-	struct rsxx_dma *dma;
-	struct rsxx_dma *tmp;
-	int i;
-
-	for (i = 0; i < card->n_targets; i++) {
-		spin_lock(&card->ctrl[i].queue_lock);
-		list_for_each_entry_safe(dma, tmp, &card->ctrl[i].queue, list) {
-			list_del(&dma->list);
-
-			rsxx_complete_dma(&card->ctrl[i], dma, DMA_CANCELLED);
-		}
-		spin_unlock(&card->ctrl[i].queue_lock);
-	}
-}
-
 int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card)
 {
 	struct rsxx_dma *dma;
 	int i;
 
 	for (i = 0; i < card->n_targets; i++) {
-		spin_lock(&card->ctrl[i].queue_lock);
+		spin_lock_bh(&card->ctrl[i].queue_lock);
 		list_for_each_entry(dma, &card->ctrl[i].queue, list) {
 			dma->dma_addr = pci_map_page(card->dev, dma->page,
 					dma->pg_off, get_dma_size(dma),
@@ -1067,12 +1054,12 @@ int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card)
 					PCI_DMA_TODEVICE :
 					PCI_DMA_FROMDEVICE);
 			if (!dma->dma_addr) {
-				spin_unlock(&card->ctrl[i].queue_lock);
+				spin_unlock_bh(&card->ctrl[i].queue_lock);
 				kmem_cache_free(rsxx_dma_pool, dma);
 				return -ENOMEM;
 			}
 		}
-		spin_unlock(&card->ctrl[i].queue_lock);
+		spin_unlock_bh(&card->ctrl[i].queue_lock);
 	}
 
 	return 0;
diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h
index 0dd62d966772..60b6ed6779ac 100644
--- a/drivers/block/rsxx/rsxx_priv.h
+++ b/drivers/block/rsxx/rsxx_priv.h
@@ -39,6 +39,7 @@
 #include <linux/vmalloc.h>
 #include <linux/timer.h>
 #include <linux/ioctl.h>
+#include <linux/delay.h>
 
 #include "rsxx.h"
 #include "rsxx_cfg.h"
@@ -374,6 +375,8 @@ typedef void (*rsxx_dma_cb)(struct rsxx_cardinfo *card,
 int rsxx_dma_setup(struct rsxx_cardinfo *card);
 void rsxx_dma_destroy(struct rsxx_cardinfo *card);
 int rsxx_dma_init(void);
+int rsxx_cleanup_dma_queue(struct rsxx_dma_ctrl *ctrl, struct list_head *q);
+int rsxx_dma_cancel(struct rsxx_dma_ctrl *ctrl);
 void rsxx_dma_cleanup(void);
 void rsxx_dma_queue_reset(struct rsxx_cardinfo *card);
 int rsxx_dma_configure(struct rsxx_cardinfo *card);
@@ -384,7 +387,6 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
 			   void *cb_data);
 int rsxx_hw_buffers_init(struct pci_dev *dev, struct rsxx_dma_ctrl *ctrl);
 int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card);
-void rsxx_eeh_cancel_dmas(struct rsxx_cardinfo *card);
 int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card);
 
 /***** cregs.c *****/

From 31a70bb4440c963e69ce210389d8119c70b5c39d Mon Sep 17 00:00:00 2001
From: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Date: Tue, 18 Jun 2013 14:38:26 -0500
Subject: [PATCH 018/320] rsxx: Fixes soft-lockup issues during DMAs.

The workqueue mechanism has been reworked to prevent soft
lockup issues from occuring by adding in mutex sychronization.

Signed-off-by: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/rsxx/dma.c       | 35 ++++++++++++++++++++++++++--------
 drivers/block/rsxx/rsxx_priv.h |  1 +
 2 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c
index 213e40e4bd92..b485a65b8de1 100644
--- a/drivers/block/rsxx/dma.c
+++ b/drivers/block/rsxx/dma.c
@@ -381,15 +381,13 @@ static void dma_engine_stalled(unsigned long data)
 	}
 }
 
-static void rsxx_issue_dmas(struct work_struct *work)
+static void rsxx_issue_dmas(struct rsxx_dma_ctrl *ctrl)
 {
-	struct rsxx_dma_ctrl *ctrl;
 	struct rsxx_dma *dma;
 	int tag;
 	int cmds_pending = 0;
 	struct hw_cmd *hw_cmd_buf;
 
-	ctrl = container_of(work, struct rsxx_dma_ctrl, issue_dma_work);
 	hw_cmd_buf = ctrl->cmd.buf;
 
 	if (unlikely(ctrl->card->halt) ||
@@ -469,9 +467,8 @@ static void rsxx_issue_dmas(struct work_struct *work)
 	}
 }
 
-static void rsxx_dma_done(struct work_struct *work)
+static void rsxx_dma_done(struct rsxx_dma_ctrl *ctrl)
 {
-	struct rsxx_dma_ctrl *ctrl;
 	struct rsxx_dma *dma;
 	unsigned long flags;
 	u16 count;
@@ -479,7 +476,6 @@ static void rsxx_dma_done(struct work_struct *work)
 	u8 tag;
 	struct hw_status *hw_st_buf;
 
-	ctrl = container_of(work, struct rsxx_dma_ctrl, dma_done_work);
 	hw_st_buf = ctrl->status.buf;
 
 	if (unlikely(ctrl->card->halt) ||
@@ -555,6 +551,28 @@ static void rsxx_dma_done(struct work_struct *work)
 	spin_unlock_bh(&ctrl->queue_lock);
 }
 
+static void rsxx_schedule_issue(struct work_struct *work)
+{
+	struct rsxx_dma_ctrl *ctrl;
+
+	ctrl = container_of(work, struct rsxx_dma_ctrl, issue_dma_work);
+
+	mutex_lock(&ctrl->work_lock);
+	rsxx_issue_dmas(ctrl);
+	mutex_unlock(&ctrl->work_lock);
+}
+
+static void rsxx_schedule_done(struct work_struct *work)
+{
+	struct rsxx_dma_ctrl *ctrl;
+
+	ctrl = container_of(work, struct rsxx_dma_ctrl, dma_done_work);
+
+	mutex_lock(&ctrl->work_lock);
+	rsxx_dma_done(ctrl);
+	mutex_unlock(&ctrl->work_lock);
+}
+
 static int rsxx_queue_discard(struct rsxx_cardinfo *card,
 				  struct list_head *q,
 				  unsigned int laddr,
@@ -789,6 +807,7 @@ static int rsxx_dma_ctrl_init(struct pci_dev *dev,
 	spin_lock_init(&ctrl->trackers->lock);
 
 	spin_lock_init(&ctrl->queue_lock);
+	mutex_init(&ctrl->work_lock);
 	INIT_LIST_HEAD(&ctrl->queue);
 
 	setup_timer(&ctrl->activity_timer, dma_engine_stalled,
@@ -802,8 +821,8 @@ static int rsxx_dma_ctrl_init(struct pci_dev *dev,
 	if (!ctrl->done_wq)
 		return -ENOMEM;
 
-	INIT_WORK(&ctrl->issue_dma_work, rsxx_issue_dmas);
-	INIT_WORK(&ctrl->dma_done_work, rsxx_dma_done);
+	INIT_WORK(&ctrl->issue_dma_work, rsxx_schedule_issue);
+	INIT_WORK(&ctrl->dma_done_work, rsxx_schedule_done);
 
 	st = rsxx_hw_buffers_init(dev, ctrl);
 	if (st)
diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h
index 60b6ed6779ac..c968a6918b6e 100644
--- a/drivers/block/rsxx/rsxx_priv.h
+++ b/drivers/block/rsxx/rsxx_priv.h
@@ -115,6 +115,7 @@ struct rsxx_dma_ctrl {
 	struct timer_list		activity_timer;
 	struct dma_tracker_list		*trackers;
 	struct rsxx_dma_stats		stats;
+	struct mutex			work_lock;
 };
 
 struct rsxx_cardinfo {

From 7b379cc3785bfa827249a265548a055e934eaaea Mon Sep 17 00:00:00 2001
From: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Date: Tue, 18 Jun 2013 14:39:44 -0500
Subject: [PATCH 019/320] rsxx: Allow block size to be determined by
 configuration.

Previously, the block size was determined by whether or not
our Hardware could handle 512 byte accesses. Now, all of our
Hardware can handle 512 and 4096 block sizes.

This fix allows it to be user configurable.

Signed-off-by: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/rsxx/dev.c | 20 +-------------------
 1 file changed, 1 insertion(+), 19 deletions(-)

diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index 604ad2dafa43..1fa7cccd6866 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -227,24 +227,6 @@ static bool rsxx_discard_supported(struct rsxx_cardinfo *card)
 	return (pci_rev >= RSXX_DISCARD_SUPPORT);
 }
 
-static unsigned short rsxx_get_logical_block_size(
-					struct rsxx_cardinfo *card)
-{
-	u32 capabilities = 0;
-	int st;
-
-	st = rsxx_get_card_capabilities(card, &capabilities);
-	if (st)
-		dev_warn(CARD_TO_DEV(card),
-			"Failed reading card capabilities register\n");
-
-	/* Earlier firmware did not have support for 512 byte accesses */
-	if (capabilities & CARD_CAP_SUBPAGE_WRITES)
-		return 512;
-	else
-		return RSXX_HW_BLK_SIZE;
-}
-
 int rsxx_attach_dev(struct rsxx_cardinfo *card)
 {
 	mutex_lock(&card->dev_lock);
@@ -307,7 +289,7 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card)
 		return -ENOMEM;
 	}
 
-	blk_size = rsxx_get_logical_block_size(card);
+	blk_size = card->config.data.block_size;
 
 	blk_queue_make_request(card->queue, rsxx_make_request);
 	blk_queue_bounce_limit(card->queue, BLK_BOUNCE_ANY);

From fb065cd9e0058551b08d6d32ff0494848c9e213d Mon Sep 17 00:00:00 2001
From: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Date: Tue, 18 Jun 2013 14:42:36 -0500
Subject: [PATCH 020/320] rsxx: Adding in sync_start module paramenter.

Before, the partition table would have to be reread because our
card was attached before it transistioned out of it's 'starting'
state.

This change will cause the driver to wait to attach the device
until the adapter is ready.

Signed-off-by: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/rsxx/core.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c
index aca3f198e5cd..0f1be41ccfa8 100644
--- a/drivers/block/rsxx/core.c
+++ b/drivers/block/rsxx/core.c
@@ -39,6 +39,7 @@
 #include "rsxx_cfg.h"
 
 #define NO_LEGACY 0
+#define SYNC_START_TIMEOUT (10 * 60) /* 10 minutes */
 
 MODULE_DESCRIPTION("IBM FlashSystem 70/80 PCIe SSD Device Driver");
 MODULE_AUTHOR("Joshua Morris/Philip Kelleher, IBM");
@@ -49,6 +50,11 @@ static unsigned int force_legacy = NO_LEGACY;
 module_param(force_legacy, uint, 0444);
 MODULE_PARM_DESC(force_legacy, "Force the use of legacy type PCI interrupts");
 
+static unsigned int sync_start = 1;
+module_param(sync_start, uint, 0444);
+MODULE_PARM_DESC(sync_start, "On by Default: Driver load will not complete "
+			     "until the card startup has completed.");
+
 static DEFINE_IDA(rsxx_disk_ida);
 static DEFINE_SPINLOCK(rsxx_ida_lock);
 
@@ -540,6 +546,7 @@ static int rsxx_pci_probe(struct pci_dev *dev,
 {
 	struct rsxx_cardinfo *card;
 	int st;
+	unsigned int sync_timeout;
 
 	dev_info(&dev->dev, "PCI-Flash SSD discovered\n");
 
@@ -698,6 +705,33 @@ static int rsxx_pci_probe(struct pci_dev *dev,
 		if (st)
 			dev_crit(CARD_TO_DEV(card),
 				"Failed issuing card startup\n");
+		if (sync_start) {
+			sync_timeout = SYNC_START_TIMEOUT;
+
+			dev_info(CARD_TO_DEV(card),
+				 "Waiting for card to startup\n");
+
+			do {
+				ssleep(1);
+				sync_timeout--;
+
+				rsxx_get_card_state(card, &card->state);
+			} while (sync_timeout &&
+				(card->state == CARD_STATE_STARTING));
+
+			if (card->state == CARD_STATE_STARTING) {
+				dev_warn(CARD_TO_DEV(card),
+					 "Card startup timed out\n");
+				card->size8 = 0;
+			} else {
+				dev_info(CARD_TO_DEV(card),
+					"card state: %s\n",
+					rsxx_card_state_to_str(card->state));
+				st = rsxx_get_card_size8(card, &card->size8);
+				if (st)
+					card->size8 = 0;
+			}
+		}
 	} else if (card->state == CARD_STATE_GOOD ||
 		   card->state == CARD_STATE_RD_ONLY_FAULT) {
 		st = rsxx_get_card_size8(card, &card->size8);

From f730e3dc6dc4698d55fd9bf6de33a5436900e9bd Mon Sep 17 00:00:00 2001
From: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Date: Tue, 18 Jun 2013 14:43:58 -0500
Subject: [PATCH 021/320] rsxx: Changing the adapter name to the official name.

Changing the adapter name from FlashSystem-80 to the official
name: Flash Adapter 900GB Full Height.

Signed-off-by: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 MAINTAINERS               |  2 +-
 drivers/block/Kconfig     |  4 ++--
 drivers/block/rsxx/core.c | 10 +++++-----
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 3d7782b9f90d..75e52e341a67 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3297,7 +3297,7 @@ F:	Documentation/firmware_class/
 F:	drivers/base/firmware*.c
 F:	include/linux/firmware.h
 
-FLASHSYSTEM DRIVER (IBM FlashSystem 70/80 PCI SSD Flash Card)
+FLASH ADAPTER DRIVER (IBM Flash Adapter 900GB Full Height PCI Flash Card)
 M:	Joshua Morris <josh.h.morris@us.ibm.com>
 M:	Philip Kelleher <pjk1939@linux.vnet.ibm.com>
 S:	Maintained
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index b81ddfea1da0..e07a5fd58ad7 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -532,11 +532,11 @@ config BLK_DEV_RBD
 	  If unsure, say N.
 
 config BLK_DEV_RSXX
-	tristate "IBM FlashSystem 70/80 PCIe SSD Device Driver"
+	tristate "IBM Flash Adapter 900GB Full Height PCIe Device Driver"
 	depends on PCI
 	help
 	  Device driver for IBM's high speed PCIe SSD
-	  storage devices: FlashSystem-70 and FlashSystem-80.
+	  storage device: Flash Adapter 900GB Full Height.
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called rsxx.
diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c
index 0f1be41ccfa8..bd763f426774 100644
--- a/drivers/block/rsxx/core.c
+++ b/drivers/block/rsxx/core.c
@@ -41,7 +41,7 @@
 #define NO_LEGACY 0
 #define SYNC_START_TIMEOUT (10 * 60) /* 10 minutes */
 
-MODULE_DESCRIPTION("IBM FlashSystem 70/80 PCIe SSD Device Driver");
+MODULE_DESCRIPTION("IBM Flash Adapter 900GB Full Height Device Driver");
 MODULE_AUTHOR("Joshua Morris/Philip Kelleher, IBM");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRIVER_VERSION);
@@ -336,7 +336,7 @@ static int rsxx_eeh_frozen(struct pci_dev *dev)
 	int i;
 	int st;
 
-	dev_warn(&dev->dev, "IBM FlashSystem PCI: preparing for slot reset.\n");
+	dev_warn(&dev->dev, "IBM Flash Adapter PCI: preparing for slot reset.\n");
 
 	card->eeh_state = 1;
 	rsxx_mask_interrupts(card);
@@ -376,7 +376,7 @@ static void rsxx_eeh_failure(struct pci_dev *dev)
 	int i;
 	int cnt = 0;
 
-	dev_err(&dev->dev, "IBM FlashSystem PCI: disabling failed card.\n");
+	dev_err(&dev->dev, "IBM Flash Adapter PCI: disabling failed card.\n");
 
 	card->eeh_state = 1;
 	card->halt = 1;
@@ -450,7 +450,7 @@ static pci_ers_result_t rsxx_slot_reset(struct pci_dev *dev)
 	int st;
 
 	dev_warn(&dev->dev,
-		"IBM FlashSystem PCI: recovering from slot reset.\n");
+		"IBM Flash Adapter PCI: recovering from slot reset.\n");
 
 	st = pci_enable_device(dev);
 	if (st)
@@ -503,7 +503,7 @@ static pci_ers_result_t rsxx_slot_reset(struct pci_dev *dev)
 				&card->ctrl[i].issue_dma_work);
 	}
 
-	dev_info(&dev->dev, "IBM FlashSystem PCI: recovery complete.\n");
+	dev_info(&dev->dev, "IBM Flash Adapter PCI: recovery complete.\n");
 
 	return PCI_ERS_RESULT_RECOVERED;
 

From 66bc600363acd0acae84e878e5a06e7b7a38c014 Mon Sep 17 00:00:00 2001
From: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Date: Tue, 18 Jun 2013 14:46:04 -0500
Subject: [PATCH 022/320] rsxx: Fixes DLPAR add kernel panic if partition still
 mounted.

A kernel panic would occur on a DLPAR add if there was a partition
still mounted during the DLPAR remove. This bug fix will allow the
user to unmount the partition and bring the driver back into a
good state after the DLPAR add.

Signed-off-by: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/rsxx/dev.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index 1fa7cccd6866..a092f58db212 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -171,6 +171,9 @@ static void rsxx_make_request(struct request_queue *q, struct bio *bio)
 
 	might_sleep();
 
+	if (!card)
+		goto req_err;
+
 	if (unlikely(card->halt)) {
 		st = -EFAULT;
 		goto req_err;
@@ -331,6 +334,7 @@ void rsxx_destroy_dev(struct rsxx_cardinfo *card)
 	card->gendisk = NULL;
 
 	blk_cleanup_queue(card->queue);
+	card->queue->queuedata = NULL;
 	unregister_blkdev(card->major, DRIVER_NAME);
 }
 

From 3eb8dcafb5a73041e2f3b4a39c057a58e4354d83 Mon Sep 17 00:00:00 2001
From: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Date: Tue, 18 Jun 2013 14:48:38 -0500
Subject: [PATCH 023/320] rsxx: Adapter address space sanity check.

Adding a sanity check to guarentee that DMAs outside of the device's
address space will be errored out right away.

Signed-off-by: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/rsxx/dev.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index a092f58db212..d7af441880be 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -174,6 +174,9 @@ static void rsxx_make_request(struct request_queue *q, struct bio *bio)
 	if (!card)
 		goto req_err;
 
+	if (bio->bi_sector + (bio->bi_size >> 9) > get_capacity(card->gendisk))
+		goto req_err;
+
 	if (unlikely(card->halt)) {
 		st = -EFAULT;
 		goto req_err;

From b8b225da139f5770d7689b189fd5debc58f4b35d Mon Sep 17 00:00:00 2001
From: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Date: Tue, 18 Jun 2013 14:49:48 -0500
Subject: [PATCH 024/320] rsxx: Adding EEH check inside cregs timeout.

Unfortunaly, our CPU register path does not do any kind of
EEH error checking. So to fix this issue, an ioread32 was
added to the CPU register timeout code. This way, the
driver can check to see if the timeout was caused by an EEH
error or not. This is a dummy read.

Signed-off-by: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/rsxx/cregs.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/block/rsxx/cregs.c b/drivers/block/rsxx/cregs.c
index 4914464c19fc..926dce9c452f 100644
--- a/drivers/block/rsxx/cregs.c
+++ b/drivers/block/rsxx/cregs.c
@@ -431,6 +431,15 @@ static int __issue_creg_rw(struct rsxx_cardinfo *card,
 	*hw_stat = completion.creg_status;
 
 	if (completion.st) {
+		/*
+		* This read is needed to verify that there has not been any
+		* extreme errors that might have occurred, i.e. EEH. The
+		* function iowrite32 will not detect EEH errors, so it is
+		* necessary that we recover if such an error is the reason
+		* for the timeout. This is a dummy read.
+		*/
+		ioread32(card->regmap + SCRATCH);
+
 		dev_warn(CARD_TO_DEV(card),
 			"creg command failed(%d x%08x)\n",
 			completion.st, addr);

From 62302508f2986720ad73494dd8037dff1c4f77d1 Mon Sep 17 00:00:00 2001
From: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Date: Tue, 18 Jun 2013 14:50:48 -0500
Subject: [PATCH 025/320] rsxx: Fixes incorrect stats calculation.

Fixing incorrect stats calculation during read retries.

Signed-off-by: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/rsxx/dma.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c
index b485a65b8de1..bed32f16b084 100644
--- a/drivers/block/rsxx/dma.c
+++ b/drivers/block/rsxx/dma.c
@@ -269,6 +269,7 @@ static void rsxx_requeue_dma(struct rsxx_dma_ctrl *ctrl,
 	 * first.
 	 */
 	spin_lock_bh(&ctrl->queue_lock);
+	ctrl->stats.sw_q_depth++;
 	list_add(&dma->list, &ctrl->queue);
 	spin_unlock_bh(&ctrl->queue_lock);
 }

From 36f988e978f81ffa415df4d77bbcd8887917f25c Mon Sep 17 00:00:00 2001
From: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Date: Tue, 18 Jun 2013 14:52:21 -0500
Subject: [PATCH 026/320] rsxx: Adding in debugfs entries.

Adding debugfs entries to help with debugging and testing and
testing code.

pci_regs:
       	This entry will spit out all of the data stored on the BAR.

stats:
       	This entry will display all of the driver stats for each
       	DMA channel.

cram:
	This will allow read/write ability to the CRAM address space
	on our adapter's CPU.

Signed-off-by: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/rsxx/core.c      | 275 +++++++++++++++++++++++++++++++++
 drivers/block/rsxx/rsxx_priv.h |   3 +
 2 files changed, 278 insertions(+)

diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c
index bd763f426774..6e85e21445eb 100644
--- a/drivers/block/rsxx/core.c
+++ b/drivers/block/rsxx/core.c
@@ -31,6 +31,8 @@
 #include <linux/slab.h>
 #include <linux/bitops.h>
 #include <linux/delay.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
 
 #include <linux/genhd.h>
 #include <linux/idr.h>
@@ -58,6 +60,274 @@ MODULE_PARM_DESC(sync_start, "On by Default: Driver load will not complete "
 static DEFINE_IDA(rsxx_disk_ida);
 static DEFINE_SPINLOCK(rsxx_ida_lock);
 
+/* --------------------Debugfs Setup ------------------- */
+
+struct rsxx_cram {
+	u32 f_pos;
+	u32 offset;
+	void *i_private;
+};
+
+static int rsxx_attr_pci_regs_show(struct seq_file *m, void *p)
+{
+	struct rsxx_cardinfo *card = m->private;
+
+	seq_printf(m, "HWID		0x%08x\n",
+					ioread32(card->regmap + HWID));
+	seq_printf(m, "SCRATCH		0x%08x\n",
+					ioread32(card->regmap + SCRATCH));
+	seq_printf(m, "IER		0x%08x\n",
+					ioread32(card->regmap + IER));
+	seq_printf(m, "IPR		0x%08x\n",
+					ioread32(card->regmap + IPR));
+	seq_printf(m, "CREG_CMD		0x%08x\n",
+					ioread32(card->regmap + CREG_CMD));
+	seq_printf(m, "CREG_ADD		0x%08x\n",
+					ioread32(card->regmap + CREG_ADD));
+	seq_printf(m, "CREG_CNT		0x%08x\n",
+					ioread32(card->regmap + CREG_CNT));
+	seq_printf(m, "CREG_STAT	0x%08x\n",
+					ioread32(card->regmap + CREG_STAT));
+	seq_printf(m, "CREG_DATA0	0x%08x\n",
+					ioread32(card->regmap + CREG_DATA0));
+	seq_printf(m, "CREG_DATA1	0x%08x\n",
+					ioread32(card->regmap + CREG_DATA1));
+	seq_printf(m, "CREG_DATA2	0x%08x\n",
+					ioread32(card->regmap + CREG_DATA2));
+	seq_printf(m, "CREG_DATA3	0x%08x\n",
+					ioread32(card->regmap + CREG_DATA3));
+	seq_printf(m, "CREG_DATA4	0x%08x\n",
+					ioread32(card->regmap + CREG_DATA4));
+	seq_printf(m, "CREG_DATA5	0x%08x\n",
+					ioread32(card->regmap + CREG_DATA5));
+	seq_printf(m, "CREG_DATA6	0x%08x\n",
+					ioread32(card->regmap + CREG_DATA6));
+	seq_printf(m, "CREG_DATA7	0x%08x\n",
+					ioread32(card->regmap + CREG_DATA7));
+	seq_printf(m, "INTR_COAL	0x%08x\n",
+					ioread32(card->regmap + INTR_COAL));
+	seq_printf(m, "HW_ERROR		0x%08x\n",
+					ioread32(card->regmap + HW_ERROR));
+	seq_printf(m, "DEBUG0		0x%08x\n",
+					ioread32(card->regmap + PCI_DEBUG0));
+	seq_printf(m, "DEBUG1		0x%08x\n",
+					ioread32(card->regmap + PCI_DEBUG1));
+	seq_printf(m, "DEBUG2		0x%08x\n",
+					ioread32(card->regmap + PCI_DEBUG2));
+	seq_printf(m, "DEBUG3		0x%08x\n",
+					ioread32(card->regmap + PCI_DEBUG3));
+	seq_printf(m, "DEBUG4		0x%08x\n",
+					ioread32(card->regmap + PCI_DEBUG4));
+	seq_printf(m, "DEBUG5		0x%08x\n",
+					ioread32(card->regmap + PCI_DEBUG5));
+	seq_printf(m, "DEBUG6		0x%08x\n",
+					ioread32(card->regmap + PCI_DEBUG6));
+	seq_printf(m, "DEBUG7		0x%08x\n",
+					ioread32(card->regmap + PCI_DEBUG7));
+	seq_printf(m, "RECONFIG		0x%08x\n",
+					ioread32(card->regmap + PCI_RECONFIG));
+
+	return 0;
+}
+
+static int rsxx_attr_stats_show(struct seq_file *m, void *p)
+{
+	struct rsxx_cardinfo *card = m->private;
+	int i;
+
+	for (i = 0; i < card->n_targets; i++) {
+		seq_printf(m, "Ctrl %d CRC Errors	= %d\n",
+				i, card->ctrl[i].stats.crc_errors);
+		seq_printf(m, "Ctrl %d Hard Errors	= %d\n",
+				i, card->ctrl[i].stats.hard_errors);
+		seq_printf(m, "Ctrl %d Soft Errors	= %d\n",
+				i, card->ctrl[i].stats.soft_errors);
+		seq_printf(m, "Ctrl %d Writes Issued	= %d\n",
+				i, card->ctrl[i].stats.writes_issued);
+		seq_printf(m, "Ctrl %d Writes Failed	= %d\n",
+				i, card->ctrl[i].stats.writes_failed);
+		seq_printf(m, "Ctrl %d Reads Issued	= %d\n",
+				i, card->ctrl[i].stats.reads_issued);
+		seq_printf(m, "Ctrl %d Reads Failed	= %d\n",
+				i, card->ctrl[i].stats.reads_failed);
+		seq_printf(m, "Ctrl %d Reads Retried	= %d\n",
+				i, card->ctrl[i].stats.reads_retried);
+		seq_printf(m, "Ctrl %d Discards Issued	= %d\n",
+				i, card->ctrl[i].stats.discards_issued);
+		seq_printf(m, "Ctrl %d Discards Failed	= %d\n",
+				i, card->ctrl[i].stats.discards_failed);
+		seq_printf(m, "Ctrl %d DMA SW Errors	= %d\n",
+				i, card->ctrl[i].stats.dma_sw_err);
+		seq_printf(m, "Ctrl %d DMA HW Faults	= %d\n",
+				i, card->ctrl[i].stats.dma_hw_fault);
+		seq_printf(m, "Ctrl %d DMAs Cancelled	= %d\n",
+				i, card->ctrl[i].stats.dma_cancelled);
+		seq_printf(m, "Ctrl %d SW Queue Depth	= %d\n",
+				i, card->ctrl[i].stats.sw_q_depth);
+		seq_printf(m, "Ctrl %d HW Queue Depth	= %d\n",
+			i, atomic_read(&card->ctrl[i].stats.hw_q_depth));
+	}
+
+	return 0;
+}
+
+static int rsxx_attr_stats_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, rsxx_attr_stats_show, inode->i_private);
+}
+
+static int rsxx_attr_pci_regs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, rsxx_attr_pci_regs_show, inode->i_private);
+}
+
+static ssize_t rsxx_cram_read(struct file *fp, char __user *ubuf,
+			      size_t cnt, loff_t *ppos)
+{
+	struct rsxx_cram *info = fp->private_data;
+	struct rsxx_cardinfo *card = info->i_private;
+	char *buf;
+	int st;
+
+	buf = kzalloc(sizeof(*buf) * cnt, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	info->f_pos = (u32)*ppos + info->offset;
+
+	st = rsxx_creg_read(card, CREG_ADD_CRAM + info->f_pos, cnt, buf, 1);
+	if (st)
+		return st;
+
+	st = copy_to_user(ubuf, buf, cnt);
+	if (st)
+		return st;
+
+	info->offset += cnt;
+
+	kfree(buf);
+
+	return cnt;
+}
+
+static ssize_t rsxx_cram_write(struct file *fp, const char __user *ubuf,
+			       size_t cnt, loff_t *ppos)
+{
+	struct rsxx_cram *info = fp->private_data;
+	struct rsxx_cardinfo *card = info->i_private;
+	char *buf;
+	int st;
+
+	buf = kzalloc(sizeof(*buf) * cnt, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	st = copy_from_user(buf, ubuf, cnt);
+	if (st)
+		return st;
+
+	info->f_pos = (u32)*ppos + info->offset;
+
+	st = rsxx_creg_write(card, CREG_ADD_CRAM + info->f_pos, cnt, buf, 1);
+	if (st)
+		return st;
+
+	info->offset += cnt;
+
+	kfree(buf);
+
+	return cnt;
+}
+
+static int rsxx_cram_open(struct inode *inode, struct file *file)
+{
+	struct rsxx_cram *info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+
+	info->i_private = inode->i_private;
+	info->f_pos = file->f_pos;
+	file->private_data = info;
+
+	return 0;
+}
+
+static int rsxx_cram_release(struct inode *inode, struct file *file)
+{
+	struct rsxx_cram *info = file->private_data;
+
+	if (!info)
+		return 0;
+
+	kfree(info);
+	file->private_data = NULL;
+
+	return 0;
+}
+
+static const struct file_operations debugfs_cram_fops = {
+	.owner		= THIS_MODULE,
+	.open		= rsxx_cram_open,
+	.read		= rsxx_cram_read,
+	.write		= rsxx_cram_write,
+	.release	= rsxx_cram_release,
+};
+
+static const struct file_operations debugfs_stats_fops = {
+	.owner		= THIS_MODULE,
+	.open		= rsxx_attr_stats_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static const struct file_operations debugfs_pci_regs_fops = {
+	.owner		= THIS_MODULE,
+	.open		= rsxx_attr_pci_regs_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static void rsxx_debugfs_dev_new(struct rsxx_cardinfo *card)
+{
+	struct dentry *debugfs_stats;
+	struct dentry *debugfs_pci_regs;
+	struct dentry *debugfs_cram;
+
+	card->debugfs_dir = debugfs_create_dir(card->gendisk->disk_name, NULL);
+	if (IS_ERR_OR_NULL(card->debugfs_dir))
+		goto failed_debugfs_dir;
+
+	debugfs_stats = debugfs_create_file("stats", S_IRUGO,
+					    card->debugfs_dir, card,
+					    &debugfs_stats_fops);
+	if (IS_ERR_OR_NULL(debugfs_stats))
+		goto failed_debugfs_stats;
+
+	debugfs_pci_regs = debugfs_create_file("pci_regs", S_IRUGO,
+					       card->debugfs_dir, card,
+					       &debugfs_pci_regs_fops);
+	if (IS_ERR_OR_NULL(debugfs_pci_regs))
+		goto failed_debugfs_pci_regs;
+
+	debugfs_cram = debugfs_create_file("cram", S_IRUGO | S_IWUSR,
+					   card->debugfs_dir, card,
+					   &debugfs_cram_fops);
+	if (IS_ERR_OR_NULL(debugfs_cram))
+		goto failed_debugfs_cram;
+
+	return;
+failed_debugfs_cram:
+	debugfs_remove(debugfs_pci_regs);
+failed_debugfs_pci_regs:
+	debugfs_remove(debugfs_stats);
+failed_debugfs_stats:
+	debugfs_remove(card->debugfs_dir);
+failed_debugfs_dir:
+	card->debugfs_dir = NULL;
+}
+
 /*----------------- Interrupt Control & Handling -------------------*/
 
 static void rsxx_mask_interrupts(struct rsxx_cardinfo *card)
@@ -741,6 +1011,9 @@ static int rsxx_pci_probe(struct pci_dev *dev,
 
 	rsxx_attach_dev(card);
 
+	/************* Setup Debugfs *************/
+	rsxx_debugfs_dev_new(card);
+
 	return 0;
 
 failed_create_dev:
@@ -818,6 +1091,8 @@ static void rsxx_pci_remove(struct pci_dev *dev)
 	/* Prevent work_structs from re-queuing themselves. */
 	card->halt = 1;
 
+	debugfs_remove_recursive(card->debugfs_dir);
+
 	free_irq(dev->irq, card);
 
 	if (!force_legacy)
diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h
index c968a6918b6e..5ad5055a4104 100644
--- a/drivers/block/rsxx/rsxx_priv.h
+++ b/drivers/block/rsxx/rsxx_priv.h
@@ -185,6 +185,8 @@ struct rsxx_cardinfo {
 
 	int			n_targets;
 	struct rsxx_dma_ctrl	*ctrl;
+
+	struct dentry		*debugfs_dir;
 };
 
 enum rsxx_pci_regmap {
@@ -287,6 +289,7 @@ enum rsxx_creg_addr {
 	CREG_ADD_CAPABILITIES		= 0x80001050,
 	CREG_ADD_LOG			= 0x80002000,
 	CREG_ADD_NUM_TARGETS		= 0x80003000,
+	CREG_ADD_CRAM			= 0xA0000000,
 	CREG_ADD_CONFIG			= 0xB0000000,
 };
 

From 2d9105433ff471d2c688817e98804029c074a623 Mon Sep 17 00:00:00 2001
From: Roger Pau Monne <roger.pau@citrix.com>
Date: Fri, 21 Jun 2013 12:56:53 +0200
Subject: [PATCH 027/320] xen-blkback: workaround compiler bug in gcc 4.1
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The code generat with gcc (GCC) 4.1.2 20080704 (Red Hat 4.1.2-54)
creates an unbound loop for the second foreach_grant_safe loop in
purge_persistent_gnt.

The workaround is to avoid having this second loop and instead
perform all the work inside the first loop by adding a new variable,
clean_used, that will be set when all the desired persistent grants
have been removed and we need to iterate over the remaining ones to
remove the WAS_ACTIVE flag.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Reported-by: Tom O'Neill <toneill@vmem.com>
Reported-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/block/xen-blkback/blkback.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index ea158fe0c9a4..4662217c61be 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -341,7 +341,7 @@ static void purge_persistent_gnt(struct xen_blkif *blkif)
 	struct persistent_gnt *persistent_gnt;
 	struct rb_node *n;
 	unsigned int num_clean, total;
-	bool scan_used = false;
+	bool scan_used = false, clean_used = false;
 	struct rb_root *root;
 
 	if (blkif->persistent_gnt_c < xen_blkif_max_pgrants ||
@@ -358,9 +358,8 @@ static void purge_persistent_gnt(struct xen_blkif *blkif)
 	num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN;
 	num_clean = blkif->persistent_gnt_c - xen_blkif_max_pgrants + num_clean;
 	num_clean = min(blkif->persistent_gnt_c, num_clean);
-	if (num_clean >
-	    (blkif->persistent_gnt_c -
-	    atomic_read(&blkif->persistent_gnt_in_use)))
+	if ((num_clean == 0) ||
+	    (num_clean > (blkif->persistent_gnt_c - atomic_read(&blkif->persistent_gnt_in_use))))
 		return;
 
 	/*
@@ -383,6 +382,11 @@ static void purge_persistent_gnt(struct xen_blkif *blkif)
 		BUG_ON(persistent_gnt->handle ==
 			BLKBACK_INVALID_HANDLE);
 
+		if (clean_used) {
+			clear_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags);
+			continue;
+		}
+
 		if (test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags))
 			continue;
 		if (!scan_used &&
@@ -400,18 +404,18 @@ static void purge_persistent_gnt(struct xen_blkif *blkif)
 	 * grants that were used since last purge in order to cope
 	 * with the requested num
 	 */
-	if (!scan_used) {
+	if (!scan_used && !clean_used) {
 		pr_debug(DRV_PFX "Still missing %u purged frames\n", num_clean);
 		scan_used = true;
 		goto purge_list;
 	}
 finished:
-	/* Remove the "used" flag from all the persistent grants */
-	foreach_grant_safe(persistent_gnt, n, root, node) {
-		BUG_ON(persistent_gnt->handle ==
-			BLKBACK_INVALID_HANDLE);
-		clear_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags);
+	if (!clean_used) {
+		pr_debug(DRV_PFX "Finished scanning for grants to clean, removing used flag\n");
+		clean_used = true;
+		goto purge_list;
 	}
+
 	blkif->persistent_gnt_c -= (total - num_clean);
 	blkif->vbd.overflow_max_grants = 0;
 

From 294caaf29c26cfed3b446fb46393b8b39ea1c0d3 Mon Sep 17 00:00:00 2001
From: Roger Pau Monne <roger.pau@citrix.com>
Date: Fri, 21 Jun 2013 12:56:54 +0200
Subject: [PATCH 028/320] xen-blkfront: set blk_queue_max_hw_sectors correctly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now that indirect segments are enabled blk_queue_max_hw_sectors must
be set to match the maximum number of sectors we can handle in a
request.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Reported-by: Felipe Franciosi <felipe.franciosi@citrix.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/block/xen-blkfront.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 1a0f67c10ec7..2e1ee348ffe1 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -633,7 +633,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
 	/* Hard sector size and max sectors impersonate the equiv. hardware. */
 	blk_queue_logical_block_size(rq, sector_size);
 	blk_queue_physical_block_size(rq, physical_sector_size);
-	blk_queue_max_hw_sectors(rq, 512);
+	blk_queue_max_hw_sectors(rq, (segments * PAGE_SIZE) / 512);
 
 	/* Each segment in a request is up to an aligned page in size. */
 	blk_queue_segment_boundary(rq, PAGE_SIZE - 1);

From 1e0f7a21b2fffc70f27cc4a454c60321501045b1 Mon Sep 17 00:00:00 2001
From: Roger Pau Monne <roger.pau@citrix.com>
Date: Sat, 22 Jun 2013 09:59:17 +0200
Subject: [PATCH 029/320] xen-blkback: check the number of iovecs before
 allocating a bios
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With the introduction of indirect segments we can receive requests
with a number of segments bigger than the maximum number of allowed
iovecs in a bios, so make sure that blkback doesn't try to allocate a
bios with more iovecs than BIO_MAX_PAGES

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/block/xen-blkback/blkback.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 4662217c61be..bf4b9d282c04 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -1247,7 +1247,8 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 				     seg[i].nsec << 9,
 				     seg[i].offset) == 0)) {
 
-			bio = bio_alloc(GFP_KERNEL, nseg-i);
+			int nr_iovecs = min_t(int, (nseg-i), BIO_MAX_PAGES);
+			bio = bio_alloc(GFP_KERNEL, nr_iovecs);
 			if (unlikely(bio == NULL))
 				goto fail_put_bio;
 

From 5c694129c8db6d89c9be109049a16510b2f70f6d Mon Sep 17 00:00:00 2001
From: Kumar Amit Mehta <gmate.amit@gmail.com>
Date: Tue, 28 May 2013 00:31:15 -0700
Subject: [PATCH 030/320] md: bcache: io.c: fix a potential NULL pointer
 dereference

bio_alloc_bioset returns NULL on failure. This fix adds a missing check
for potential NULL pointer dereferencing.

Signed-off-by: Kumar Amit Mehta <gmate.amit@gmail.com>
Signed-off-by: Kent Overstreet <koverstreet@google.com>
---
 drivers/md/bcache/io.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index 48efd4dea645..d285cd49104c 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -97,6 +97,8 @@ struct bio *bch_bio_split(struct bio *bio, int sectors,
 
 	if (bio->bi_rw & REQ_DISCARD) {
 		ret = bio_alloc_bioset(gfp, 1, bs);
+		if (!ret)
+			return NULL;
 		idx = 0;
 		goto out;
 	}

From bbc77aa7fb72e616edcb1b165c293e47c6d4d0cf Mon Sep 17 00:00:00 2001
From: Kent Overstreet <koverstreet@google.com>
Date: Tue, 28 May 2013 21:53:19 -0700
Subject: [PATCH 031/320] bcache: fix a spurious gcc complaint, use scnprintf

An old version of gcc was complaining about using a const int as the
size of a stack allocated array. Which should be fine - but using
ARRAY_SIZE() is better, anyways.

Also, refactor the code to use scnprintf().

Signed-off-by: Kent Overstreet <koverstreet@google.com>
---
 drivers/md/bcache/sysfs.c | 39 ++++++++++++++++++++-------------------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 4d9cca47e4c6..29228b8a6ffe 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -665,12 +665,10 @@ SHOW(__bch_cache)
 		int cmp(const void *l, const void *r)
 		{	return *((uint16_t *) r) - *((uint16_t *) l); }
 
-		/* Number of quantiles we compute */
-		const unsigned nq = 31;
-
 		size_t n = ca->sb.nbuckets, i, unused, btree;
 		uint64_t sum = 0;
-		uint16_t q[nq], *p, *cached;
+		/* Compute 31 quantiles */
+		uint16_t q[31], *p, *cached;
 		ssize_t ret;
 
 		cached = p = vmalloc(ca->sb.nbuckets * sizeof(uint16_t));
@@ -703,26 +701,29 @@ SHOW(__bch_cache)
 		if (n)
 			do_div(sum, n);
 
-		for (i = 0; i < nq; i++)
-			q[i] = INITIAL_PRIO - cached[n * (i + 1) / (nq + 1)];
+		for (i = 0; i < ARRAY_SIZE(q); i++)
+			q[i] = INITIAL_PRIO - cached[n * (i + 1) /
+				(ARRAY_SIZE(q) + 1)];
 
 		vfree(p);
 
-		ret = snprintf(buf, PAGE_SIZE,
-			       "Unused:		%zu%%\n"
-			       "Metadata:	%zu%%\n"
-			       "Average:	%llu\n"
-			       "Sectors per Q:	%zu\n"
-			       "Quantiles:	[",
-			       unused * 100 / (size_t) ca->sb.nbuckets,
-			       btree * 100 / (size_t) ca->sb.nbuckets, sum,
-			       n * ca->sb.bucket_size / (nq + 1));
+		ret = scnprintf(buf, PAGE_SIZE,
+				"Unused:		%zu%%\n"
+				"Metadata:	%zu%%\n"
+				"Average:	%llu\n"
+				"Sectors per Q:	%zu\n"
+				"Quantiles:	[",
+				unused * 100 / (size_t) ca->sb.nbuckets,
+				btree * 100 / (size_t) ca->sb.nbuckets, sum,
+				n * ca->sb.bucket_size / (ARRAY_SIZE(q) + 1));
 
-		for (i = 0; i < nq && ret < (ssize_t) PAGE_SIZE; i++)
-			ret += snprintf(buf + ret, PAGE_SIZE - ret,
-					i < nq - 1 ? "%u " : "%u]\n", q[i]);
+		for (i = 0; i < ARRAY_SIZE(q); i++)
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+					 "%u ", q[i]);
+		ret--;
+
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "]\n");
 
-		buf[PAGE_SIZE - 1] = '\0';
 		return ret;
 	}
 

From a9dd53adbb84c12f769a862ba2c80404873c2c99 Mon Sep 17 00:00:00 2001
From: Gabriel de Perthuis <g2p.code@gmail.com>
Date: Sat, 4 May 2013 12:19:41 +0200
Subject: [PATCH 032/320] bcache: Warn when a device is already registered.

Signed-off-by: Gabriel de Perthuis <g2p.code+bcache@gmail.com>
Signed-off-by: Kent Overstreet <koverstreet@google.com>
---
 drivers/md/bcache/super.c | 39 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 37 insertions(+), 2 deletions(-)

diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index f88e2b653a3f..3de5626919ef 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1786,6 +1786,36 @@ static ssize_t register_bcache(struct kobject *, struct kobj_attribute *,
 kobj_attribute_write(register,		register_bcache);
 kobj_attribute_write(register_quiet,	register_bcache);
 
+static bool bch_is_open_backing(struct block_device *bdev) {
+	struct cache_set *c, *tc;
+	struct cached_dev *dc, *t;
+
+	list_for_each_entry_safe(c, tc, &bch_cache_sets, list)
+		list_for_each_entry_safe(dc, t, &c->cached_devs, list)
+			if (dc->bdev == bdev)
+				return true;
+	list_for_each_entry_safe(dc, t, &uncached_devices, list)
+		if (dc->bdev == bdev)
+			return true;
+	return false;
+}
+
+static bool bch_is_open_cache(struct block_device *bdev) {
+	struct cache_set *c, *tc;
+	struct cache *ca;
+	unsigned i;
+
+	list_for_each_entry_safe(c, tc, &bch_cache_sets, list)
+		for_each_cache(ca, c, i)
+			if (ca->bdev == bdev)
+				return true;
+	return false;
+}
+
+static bool bch_is_open(struct block_device *bdev) {
+	return bch_is_open_cache(bdev) || bch_is_open_backing(bdev);
+}
+
 static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
 			       const char *buffer, size_t size)
 {
@@ -1810,8 +1840,13 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
 				  FMODE_READ|FMODE_WRITE|FMODE_EXCL,
 				  sb);
 	if (IS_ERR(bdev)) {
-		if (bdev == ERR_PTR(-EBUSY))
-			err = "device busy";
+		if (bdev == ERR_PTR(-EBUSY)) {
+			bdev = lookup_bdev(strim(path));
+			if (!IS_ERR(bdev) && bch_is_open(bdev))
+				err = "device already registered";
+			else
+				err = "device busy";
+		}
 		goto err;
 	}
 

From 119ba0f82839cd80eaef3e6991988f1403965d5b Mon Sep 17 00:00:00 2001
From: Kent Overstreet <koverstreet@google.com>
Date: Wed, 24 Apr 2013 19:01:12 -0700
Subject: [PATCH 033/320] bcache: Convert allocator thread to kthread

Using a workqueue when we just want a single thread is a bit silly.

Signed-off-by: Kent Overstreet <koverstreet@google.com>
---
 drivers/md/bcache/alloc.c  | 34 ++++++++++++++++++++++------------
 drivers/md/bcache/bcache.h | 17 +++++++++++------
 drivers/md/bcache/btree.c  |  6 +++---
 drivers/md/bcache/super.c  | 19 +++++++------------
 4 files changed, 43 insertions(+), 33 deletions(-)

diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 048f2947e08b..38428f46ea74 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -63,6 +63,7 @@
 #include "bcache.h"
 #include "btree.h"
 
+#include <linux/kthread.h>
 #include <linux/random.h>
 
 #define MAX_IN_FLIGHT_DISCARDS		8U
@@ -151,7 +152,7 @@ static void discard_finish(struct work_struct *w)
 	mutex_unlock(&ca->set->bucket_lock);
 
 	closure_wake_up(&ca->set->bucket_wait);
-	wake_up(&ca->set->alloc_wait);
+	wake_up_process(ca->alloc_thread);
 
 	closure_put(&ca->set->cl);
 }
@@ -358,30 +359,26 @@ static void invalidate_buckets(struct cache *ca)
 
 #define allocator_wait(ca, cond)					\
 do {									\
-	DEFINE_WAIT(__wait);						\
-									\
 	while (1) {							\
-		prepare_to_wait(&ca->set->alloc_wait,			\
-				&__wait, TASK_INTERRUPTIBLE);		\
+		set_current_state(TASK_INTERRUPTIBLE);			\
 		if (cond)						\
 			break;						\
 									\
 		mutex_unlock(&(ca)->set->bucket_lock);			\
 		if (test_bit(CACHE_SET_STOPPING_2, &ca->set->flags)) {	\
-			finish_wait(&ca->set->alloc_wait, &__wait);	\
-			closure_return(cl);				\
+			closure_put(&ca->set->cl);			\
+			return 0;					\
 		}							\
 									\
 		schedule();						\
 		mutex_lock(&(ca)->set->bucket_lock);			\
 	}								\
-									\
-	finish_wait(&ca->set->alloc_wait, &__wait);			\
+	__set_current_state(TASK_RUNNING);				\
 } while (0)
 
-void bch_allocator_thread(struct closure *cl)
+static int bch_allocator_thread(void *arg)
 {
-	struct cache *ca = container_of(cl, struct cache, alloc);
+	struct cache *ca = arg;
 
 	mutex_lock(&ca->set->bucket_lock);
 
@@ -442,7 +439,7 @@ long bch_bucket_alloc(struct cache *ca, unsigned watermark, struct closure *cl)
 {
 	long r = -1;
 again:
-	wake_up(&ca->set->alloc_wait);
+	wake_up_process(ca->alloc_thread);
 
 	if (fifo_used(&ca->free) > ca->watermark[watermark] &&
 	    fifo_pop(&ca->free, r)) {
@@ -552,6 +549,19 @@ int bch_bucket_alloc_set(struct cache_set *c, unsigned watermark,
 
 /* Init */
 
+int bch_cache_allocator_start(struct cache *ca)
+{
+	ca->alloc_thread = kthread_create(bch_allocator_thread,
+					  ca, "bcache_allocator");
+	if (IS_ERR(ca->alloc_thread))
+		return PTR_ERR(ca->alloc_thread);
+
+	closure_get(&ca->set->cl);
+	wake_up_process(ca->alloc_thread);
+
+	return 0;
+}
+
 void bch_cache_allocator_exit(struct cache *ca)
 {
 	struct discard *d;
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index d3e15b42a4ab..166c8ddc0be4 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -565,8 +565,7 @@ struct cache {
 
 	unsigned		watermark[WATERMARK_MAX];
 
-	struct closure		alloc;
-	struct workqueue_struct	*alloc_workqueue;
+	struct task_struct	*alloc_thread;
 
 	struct closure		prio;
 	struct prio_set		*disk_buckets;
@@ -703,9 +702,6 @@ struct cache_set {
 	/* For the btree cache */
 	struct shrinker		shrink;
 
-	/* For the allocator itself */
-	wait_queue_head_t	alloc_wait;
-
 	/* For the btree cache and anything allocation related */
 	struct mutex		bucket_lock;
 
@@ -1173,6 +1169,15 @@ static inline uint8_t bucket_disk_gen(struct bucket *b)
 	static struct kobj_attribute ksysfs_##n =			\
 		__ATTR(n, S_IWUSR|S_IRUSR, show, store)
 
+static inline void wake_up_allocators(struct cache_set *c)
+{
+	struct cache *ca;
+	unsigned i;
+
+	for_each_cache(ca, c, i)
+		wake_up_process(ca->alloc_thread);
+}
+
 /* Forward declarations */
 
 void bch_writeback_queue(struct cached_dev *);
@@ -1193,7 +1198,6 @@ void bch_submit_bbio(struct bio *, struct cache_set *, struct bkey *, unsigned);
 uint8_t bch_inc_gen(struct cache *, struct bucket *);
 void bch_rescale_priorities(struct cache_set *, int);
 bool bch_bucket_add_unused(struct cache *, struct bucket *);
-void bch_allocator_thread(struct closure *);
 
 long bch_bucket_alloc(struct cache *, unsigned, struct closure *);
 void bch_bucket_free(struct cache_set *, struct bkey *);
@@ -1244,6 +1248,7 @@ int bch_btree_cache_alloc(struct cache_set *);
 void bch_cached_dev_writeback_init(struct cached_dev *);
 void bch_moving_init_cache_set(struct cache_set *);
 
+int bch_cache_allocator_start(struct cache *ca);
 void bch_cache_allocator_exit(struct cache *ca);
 int bch_cache_allocator_init(struct cache *ca);
 
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 7a5658f04e62..45b88fbffbe0 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -273,7 +273,7 @@ static void btree_complete_write(struct btree *b, struct btree_write *w)
 {
 	if (w->prio_blocked &&
 	    !atomic_sub_return(w->prio_blocked, &b->c->prio_blocked))
-		wake_up(&b->c->alloc_wait);
+		wake_up_allocators(b->c);
 
 	if (w->journal) {
 		atomic_dec_bug(w->journal);
@@ -984,7 +984,7 @@ static void btree_node_free(struct btree *b, struct btree_op *op)
 
 	if (b->prio_blocked &&
 	    !atomic_sub_return(b->prio_blocked, &b->c->prio_blocked))
-		wake_up(&b->c->alloc_wait);
+		wake_up_allocators(b->c);
 
 	b->prio_blocked = 0;
 
@@ -1547,7 +1547,7 @@ static void bch_btree_gc(struct closure *cl)
 	blktrace_msg_all(c, "Finished gc");
 
 	trace_bcache_gc_end(c->sb.set_uuid);
-	wake_up(&c->alloc_wait);
+	wake_up_allocators(c);
 
 	continue_at(cl, bch_moving_gc, bch_gc_wq);
 }
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 3de5626919ef..aaeda235fc75 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1282,7 +1282,7 @@ static void cache_set_flush(struct closure *cl)
 
 	/* Shut down allocator threads */
 	set_bit(CACHE_SET_STOPPING_2, &c->flags);
-	wake_up(&c->alloc_wait);
+	wake_up_allocators(c);
 
 	bch_cache_accounting_destroy(&c->accounting);
 
@@ -1373,7 +1373,6 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
 		c->btree_pages = max_t(int, c->btree_pages / 4,
 				       BTREE_MAX_PAGES);
 
-	init_waitqueue_head(&c->alloc_wait);
 	mutex_init(&c->bucket_lock);
 	mutex_init(&c->fill_lock);
 	mutex_init(&c->sort_lock);
@@ -1496,9 +1495,10 @@ static void run_cache_set(struct cache_set *c)
 		 */
 		bch_journal_next(&c->journal);
 
+		err = "error starting allocator thread";
 		for_each_cache(ca, c, i)
-			closure_call(&ca->alloc, bch_allocator_thread,
-				     system_wq, &c->cl);
+			if (bch_cache_allocator_start(ca))
+				goto err;
 
 		/*
 		 * First place it's safe to allocate: btree_check() and
@@ -1531,17 +1531,16 @@ static void run_cache_set(struct cache_set *c)
 
 		bch_btree_gc_finish(c);
 
+		err = "error starting allocator thread";
 		for_each_cache(ca, c, i)
-			closure_call(&ca->alloc, bch_allocator_thread,
-				     ca->alloc_workqueue, &c->cl);
+			if (bch_cache_allocator_start(ca))
+				goto err;
 
 		mutex_lock(&c->bucket_lock);
 		for_each_cache(ca, c, i)
 			bch_prio_write(ca);
 		mutex_unlock(&c->bucket_lock);
 
-		wake_up(&c->alloc_wait);
-
 		err = "cannot allocate new UUID bucket";
 		if (__uuid_write(c))
 			goto err_unlock_gc;
@@ -1673,9 +1672,6 @@ void bch_cache_release(struct kobject *kobj)
 
 	bio_split_pool_free(&ca->bio_split_hook);
 
-	if (ca->alloc_workqueue)
-		destroy_workqueue(ca->alloc_workqueue);
-
 	free_pages((unsigned long) ca->disk_buckets, ilog2(bucket_pages(ca)));
 	kfree(ca->prio_buckets);
 	vfree(ca->buckets);
@@ -1723,7 +1719,6 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca)
 	    !(ca->prio_buckets	= kzalloc(sizeof(uint64_t) * prio_buckets(ca) *
 					  2, GFP_KERNEL)) ||
 	    !(ca->disk_buckets	= alloc_bucket_pages(GFP_KERNEL, ca)) ||
-	    !(ca->alloc_workqueue = alloc_workqueue("bch_allocator", 0, 1)) ||
 	    bio_split_pool_init(&ca->bio_split_hook))
 		return -ENOMEM;
 

From 5794351146199b9ac67a5ab1beab82be8bfd7b5d Mon Sep 17 00:00:00 2001
From: Kent Overstreet <koverstreet@google.com>
Date: Thu, 25 Apr 2013 13:58:35 -0700
Subject: [PATCH 034/320] bcache: Refactor btree io

The most significant change is that btree reads are now done
synchronously, instead of asynchronously and doing the post read stuff
from a workqueue.

This was originally done because we can't block on IO under
generic_make_request(). But - we already have a mechanism to punt cache
lookups to workqueue if needed, so if we just use that we don't have to
deal with the complexity of doing things asynchronously.

The main benefit is this makes the locking situation saner; we can hold
our write lock on the btree node until we're finished reading it, and we
don't need that btree_node_read_done() flag anymore.

Also, for writes, btree_write() was broken out into btree_node_write()
and btree_leaf_dirty() - the old code with the boolean argument was dumb
and confusing.

The prio_blocked mechanism was improved a bit too, now the only counter
is in struct btree_write, we don't mess with transfering a count from
struct btree anymore.

This required changing garbage collection to block prios at the start
and unblock when it finishes, which is cleaner than what it was doing
anyways (the old code had mostly the same effect, but was doing it in a
convoluted way)

And the btree iter btree_node_read_done() uses was converted to a real
mempool.

Signed-off-by: Kent Overstreet <koverstreet@google.com>
---
 drivers/md/bcache/bcache.h  |   5 +-
 drivers/md/bcache/btree.c   | 278 ++++++++++++++++--------------------
 drivers/md/bcache/btree.h   |  19 +--
 drivers/md/bcache/debug.c   |   4 +-
 drivers/md/bcache/journal.c |   2 +-
 drivers/md/bcache/super.c   |  12 +-
 6 files changed, 142 insertions(+), 178 deletions(-)

diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 166c8ddc0be4..ad4957b52f10 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -819,10 +819,9 @@ struct cache_set {
 
 	/*
 	 * A btree node on disk could have too many bsets for an iterator to fit
-	 * on the stack - this is a single element mempool for btree_read_work()
+	 * on the stack - have to dynamically allocate them
 	 */
-	struct mutex		fill_lock;
-	struct btree_iter	*fill_iter;
+	mempool_t		*fill_iter;
 
 	/*
 	 * btree_sort() is a merge sort and requires temporary space - single
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 45b88fbffbe0..aaec186f7ba6 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -134,44 +134,17 @@ static uint64_t btree_csum_set(struct btree *b, struct bset *i)
 	return crc ^ 0xffffffffffffffffULL;
 }
 
-static void btree_bio_endio(struct bio *bio, int error)
+void bch_btree_node_read_done(struct btree *b)
 {
-	struct closure *cl = bio->bi_private;
-	struct btree *b = container_of(cl, struct btree, io.cl);
-
-	if (error)
-		set_btree_node_io_error(b);
-
-	bch_bbio_count_io_errors(b->c, bio, error, (bio->bi_rw & WRITE)
-				 ? "writing btree" : "reading btree");
-	closure_put(cl);
-}
-
-static void btree_bio_init(struct btree *b)
-{
-	BUG_ON(b->bio);
-	b->bio = bch_bbio_alloc(b->c);
-
-	b->bio->bi_end_io	= btree_bio_endio;
-	b->bio->bi_private	= &b->io.cl;
-}
-
-void bch_btree_read_done(struct closure *cl)
-{
-	struct btree *b = container_of(cl, struct btree, io.cl);
-	struct bset *i = b->sets[0].data;
-	struct btree_iter *iter = b->c->fill_iter;
 	const char *err = "bad btree header";
-	BUG_ON(b->nsets || b->written);
+	struct bset *i = b->sets[0].data;
+	struct btree_iter *iter;
 
-	bch_bbio_free(b->bio, b->c);
-	b->bio = NULL;
-
-	mutex_lock(&b->c->fill_lock);
+	iter = mempool_alloc(b->c->fill_iter, GFP_NOWAIT);
+	iter->size = b->c->sb.bucket_size / b->c->sb.block_size;
 	iter->used = 0;
 
-	if (btree_node_io_error(b) ||
-	    !i->seq)
+	if (!i->seq)
 		goto err;
 
 	for (;
@@ -228,17 +201,8 @@ void bch_btree_read_done(struct closure *cl)
 	if (b->written < btree_blocks(b))
 		bch_bset_init_next(b);
 out:
-
-	mutex_unlock(&b->c->fill_lock);
-
-	spin_lock(&b->c->btree_read_time_lock);
-	bch_time_stats_update(&b->c->btree_read_time, b->io_start_time);
-	spin_unlock(&b->c->btree_read_time_lock);
-
-	smp_wmb(); /* read_done is our write lock */
-	set_btree_node_read_done(b);
-
-	closure_return(cl);
+	mempool_free(iter, b->c->fill_iter);
+	return;
 err:
 	set_btree_node_io_error(b);
 	bch_cache_set_error(b->c, "%s at bucket %zu, block %zu, %u keys",
@@ -247,26 +211,51 @@ void bch_btree_read_done(struct closure *cl)
 	goto out;
 }
 
-void bch_btree_read(struct btree *b)
+static void btree_node_read_endio(struct bio *bio, int error)
 {
-	BUG_ON(b->nsets || b->written);
+	struct closure *cl = bio->bi_private;
+	closure_put(cl);
+}
 
-	if (!closure_trylock(&b->io.cl, &b->c->cl))
-		BUG();
-
-	b->io_start_time = local_clock();
-
-	btree_bio_init(b);
-	b->bio->bi_rw	= REQ_META|READ_SYNC;
-	b->bio->bi_size	= KEY_SIZE(&b->key) << 9;
-
-	bch_bio_map(b->bio, b->sets[0].data);
+void bch_btree_node_read(struct btree *b)
+{
+	uint64_t start_time = local_clock();
+	struct closure cl;
+	struct bio *bio;
 
+	closure_init_stack(&cl);
 	pr_debug("%s", pbtree(b));
-	trace_bcache_btree_read(b->bio);
-	bch_submit_bbio(b->bio, b->c, &b->key, 0);
 
-	continue_at(&b->io.cl, bch_btree_read_done, system_wq);
+	bio = bch_bbio_alloc(b->c);
+	bio->bi_rw	= REQ_META|READ_SYNC;
+	bio->bi_size	= KEY_SIZE(&b->key) << 9;
+	bio->bi_end_io	= btree_node_read_endio;
+	bio->bi_private	= &cl;
+
+	bch_bio_map(bio, b->sets[0].data);
+
+	trace_bcache_btree_read(bio);
+	bch_submit_bbio(bio, b->c, &b->key, 0);
+	closure_sync(&cl);
+
+	if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
+		set_btree_node_io_error(b);
+
+	bch_bbio_free(bio, b->c);
+
+	if (btree_node_io_error(b))
+		goto err;
+
+	bch_btree_node_read_done(b);
+
+	spin_lock(&b->c->btree_read_time_lock);
+	bch_time_stats_update(&b->c->btree_read_time, start_time);
+	spin_unlock(&b->c->btree_read_time_lock);
+
+	return;
+err:
+	bch_cache_set_error(b->c, "io error reading bucket %lu",
+			    PTR_BUCKET_NR(b->c, &b->key, 0));
 }
 
 static void btree_complete_write(struct btree *b, struct btree_write *w)
@@ -280,15 +269,11 @@ static void btree_complete_write(struct btree *b, struct btree_write *w)
 		__closure_wake_up(&b->c->journal.wait);
 	}
 
-	if (w->owner)
-		closure_put(w->owner);
-
 	w->prio_blocked	= 0;
 	w->journal	= NULL;
-	w->owner	= NULL;
 }
 
-static void __btree_write_done(struct closure *cl)
+static void __btree_node_write_done(struct closure *cl)
 {
 	struct btree *b = container_of(cl, struct btree, io.cl);
 	struct btree_write *w = btree_prev_write(b);
@@ -304,7 +289,7 @@ static void __btree_write_done(struct closure *cl)
 	closure_return(cl);
 }
 
-static void btree_write_done(struct closure *cl)
+static void btree_node_write_done(struct closure *cl)
 {
 	struct btree *b = container_of(cl, struct btree, io.cl);
 	struct bio_vec *bv;
@@ -313,10 +298,22 @@ static void btree_write_done(struct closure *cl)
 	__bio_for_each_segment(bv, b->bio, n, 0)
 		__free_page(bv->bv_page);
 
-	__btree_write_done(cl);
+	__btree_node_write_done(cl);
 }
 
-static void do_btree_write(struct btree *b)
+static void btree_node_write_endio(struct bio *bio, int error)
+{
+	struct closure *cl = bio->bi_private;
+	struct btree *b = container_of(cl, struct btree, io.cl);
+
+	if (error)
+		set_btree_node_io_error(b);
+
+	bch_bbio_count_io_errors(b->c, bio, error, "writing btree");
+	closure_put(cl);
+}
+
+static void do_btree_node_write(struct btree *b)
 {
 	struct closure *cl = &b->io.cl;
 	struct bset *i = b->sets[b->nsets].data;
@@ -325,7 +322,11 @@ static void do_btree_write(struct btree *b)
 	i->version	= BCACHE_BSET_VERSION;
 	i->csum		= btree_csum_set(b, i);
 
-	btree_bio_init(b);
+	BUG_ON(b->bio);
+	b->bio = bch_bbio_alloc(b->c);
+
+	b->bio->bi_end_io	= btree_node_write_endio;
+	b->bio->bi_private	= &b->io.cl;
 	b->bio->bi_rw	= REQ_META|WRITE_SYNC;
 	b->bio->bi_size	= set_blocks(i, b->c) * block_bytes(b->c);
 	bch_bio_map(b->bio, i);
@@ -345,7 +346,7 @@ static void do_btree_write(struct btree *b)
 		trace_bcache_btree_write(b->bio);
 		bch_submit_bbio(b->bio, b->c, &k.key, 0);
 
-		continue_at(cl, btree_write_done, NULL);
+		continue_at(cl, btree_node_write_done, NULL);
 	} else {
 		b->bio->bi_vcnt = 0;
 		bch_bio_map(b->bio, i);
@@ -354,26 +355,30 @@ static void do_btree_write(struct btree *b)
 		bch_submit_bbio(b->bio, b->c, &k.key, 0);
 
 		closure_sync(cl);
-		__btree_write_done(cl);
+		__btree_node_write_done(cl);
 	}
 }
 
-static void __btree_write(struct btree *b)
+void bch_btree_node_write(struct btree *b, struct closure *parent)
 {
 	struct bset *i = b->sets[b->nsets].data;
 
 	BUG_ON(current->bio_list);
+	BUG_ON(b->written >= btree_blocks(b));
+	BUG_ON(b->written && !i->keys);
+	BUG_ON(b->sets->data->seq != i->seq);
 
-	closure_lock(&b->io, &b->c->cl);
 	cancel_delayed_work(&b->work);
 
+	/* If caller isn't waiting for write, parent refcount is cache set */
+	closure_lock(&b->io, parent ?: &b->c->cl);
+
 	clear_bit(BTREE_NODE_dirty,	 &b->flags);
 	change_bit(BTREE_NODE_write_idx, &b->flags);
 
 	bch_check_key_order(b, i);
-	BUG_ON(b->written && !i->keys);
 
-	do_btree_write(b);
+	do_btree_node_write(b);
 
 	pr_debug("%s block %i keys %i", pbtree(b), b->written, i->keys);
 
@@ -387,37 +392,31 @@ static void __btree_write(struct btree *b)
 		bch_bset_init_next(b);
 }
 
-static void btree_write_work(struct work_struct *w)
+static void btree_node_write_work(struct work_struct *w)
 {
 	struct btree *b = container_of(to_delayed_work(w), struct btree, work);
 
-	down_write(&b->lock);
+	rw_lock(true, b, b->level);
 
 	if (btree_node_dirty(b))
-		__btree_write(b);
-	up_write(&b->lock);
+		bch_btree_node_write(b, NULL);
+	rw_unlock(true, b);
 }
 
-void bch_btree_write(struct btree *b, bool now, struct btree_op *op)
+static void bch_btree_leaf_dirty(struct btree *b, struct btree_op *op)
 {
 	struct bset *i = b->sets[b->nsets].data;
 	struct btree_write *w = btree_current_write(b);
 
-	BUG_ON(b->written &&
-	       (b->written >= btree_blocks(b) ||
-		i->seq != b->sets[0].data->seq ||
-		!i->keys));
+	BUG_ON(!b->written);
+	BUG_ON(!i->keys);
 
-	if (!btree_node_dirty(b)) {
-		set_btree_node_dirty(b);
-		queue_delayed_work(btree_io_wq, &b->work,
-				   msecs_to_jiffies(30000));
-	}
+	if (!btree_node_dirty(b))
+		queue_delayed_work(btree_io_wq, &b->work, 30 * HZ);
 
-	w->prio_blocked += b->prio_blocked;
-	b->prio_blocked = 0;
+	set_btree_node_dirty(b);
 
-	if (op && op->journal && !b->level) {
+	if (op && op->journal) {
 		if (w->journal &&
 		    journal_pin_cmp(b->c, w, op)) {
 			atomic_dec_bug(w->journal);
@@ -430,23 +429,10 @@ void bch_btree_write(struct btree *b, bool now, struct btree_op *op)
 		}
 	}
 
-	if (current->bio_list)
-		return;
-
 	/* Force write if set is too big */
-	if (now ||
-	    b->level ||
-	    set_bytes(i) > PAGE_SIZE - 48) {
-		if (op && now) {
-			/* Must wait on multiple writes */
-			BUG_ON(w->owner);
-			w->owner = &op->cl;
-			closure_get(&op->cl);
-		}
-
-		__btree_write(b);
-	}
-	BUG_ON(!b->written);
+	if (set_bytes(i) > PAGE_SIZE - 48 &&
+	    !current->bio_list)
+		bch_btree_node_write(b, NULL);
 }
 
 /*
@@ -559,7 +545,7 @@ static struct btree *mca_bucket_alloc(struct cache_set *c,
 	init_rwsem(&b->lock);
 	lockdep_set_novalidate_class(&b->lock);
 	INIT_LIST_HEAD(&b->list);
-	INIT_DELAYED_WORK(&b->work, btree_write_work);
+	INIT_DELAYED_WORK(&b->work, btree_node_write_work);
 	b->c = c;
 	closure_init_unlocked(&b->io);
 
@@ -582,7 +568,7 @@ static int mca_reap(struct btree *b, struct closure *cl, unsigned min_order)
 	BUG_ON(btree_node_dirty(b) && !b->sets[0].data);
 
 	if (cl && btree_node_dirty(b))
-		bch_btree_write(b, true, NULL);
+		bch_btree_node_write(b, NULL);
 
 	if (cl)
 		closure_wait_event_async(&b->io.wait, cl,
@@ -905,6 +891,9 @@ struct btree *bch_btree_node_get(struct cache_set *c, struct bkey *k,
 	b = mca_find(c, k);
 
 	if (!b) {
+		if (current->bio_list)
+			return ERR_PTR(-EAGAIN);
+
 		mutex_lock(&c->bucket_lock);
 		b = mca_alloc(c, k, level, &op->cl);
 		mutex_unlock(&c->bucket_lock);
@@ -914,7 +903,7 @@ struct btree *bch_btree_node_get(struct cache_set *c, struct bkey *k,
 		if (IS_ERR(b))
 			return b;
 
-		bch_btree_read(b);
+		bch_btree_node_read(b);
 
 		if (!write)
 			downgrade_write(&b->lock);
@@ -937,15 +926,12 @@ struct btree *bch_btree_node_get(struct cache_set *c, struct bkey *k,
 	for (; i <= b->nsets; i++)
 		prefetch(b->sets[i].data);
 
-	if (!closure_wait_event(&b->io.wait, &op->cl,
-				btree_node_read_done(b))) {
+	if (btree_node_io_error(b)) {
 		rw_unlock(write, b);
-		b = ERR_PTR(-EAGAIN);
-	} else if (btree_node_io_error(b)) {
-		rw_unlock(write, b);
-		b = ERR_PTR(-EIO);
-	} else
-		BUG_ON(!b->written);
+		return ERR_PTR(-EIO);
+	}
+
+	BUG_ON(!b->written);
 
 	return b;
 }
@@ -959,7 +945,7 @@ static void btree_node_prefetch(struct cache_set *c, struct bkey *k, int level)
 	mutex_unlock(&c->bucket_lock);
 
 	if (!IS_ERR_OR_NULL(b)) {
-		bch_btree_read(b);
+		bch_btree_node_read(b);
 		rw_unlock(true, b);
 	}
 }
@@ -982,12 +968,6 @@ static void btree_node_free(struct btree *b, struct btree_op *op)
 		btree_complete_write(b, btree_current_write(b));
 	clear_bit(BTREE_NODE_dirty, &b->flags);
 
-	if (b->prio_blocked &&
-	    !atomic_sub_return(b->prio_blocked, &b->c->prio_blocked))
-		wake_up_allocators(b->c);
-
-	b->prio_blocked = 0;
-
 	cancel_delayed_work(&b->work);
 
 	mutex_lock(&b->c->bucket_lock);
@@ -1028,7 +1008,6 @@ struct btree *bch_btree_node_alloc(struct cache_set *c, int level,
 		goto retry;
 	}
 
-	set_btree_node_read_done(b);
 	b->accessed = 1;
 	bch_bset_init_next(b);
 
@@ -1166,14 +1145,11 @@ static struct btree *btree_gc_alloc(struct btree *b, struct bkey *k,
 
 	if (!IS_ERR_OR_NULL(n)) {
 		swap(b, n);
+		__bkey_put(b->c, &b->key);
 
 		memcpy(k->ptr, b->key.ptr,
 		       sizeof(uint64_t) * KEY_PTRS(&b->key));
 
-		__bkey_put(b->c, &b->key);
-		atomic_inc(&b->c->prio_blocked);
-		b->prio_blocked++;
-
 		btree_node_free(n, op);
 		up_write(&n->lock);
 	}
@@ -1293,14 +1269,9 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op,
 	void write(struct btree *r)
 	{
 		if (!r->written)
-			bch_btree_write(r, true, op);
-		else if (btree_node_dirty(r)) {
-			BUG_ON(btree_current_write(r)->owner);
-			btree_current_write(r)->owner = writes;
-			closure_get(writes);
-
-			bch_btree_write(r, true, NULL);
-		}
+			bch_btree_node_write(r, &op->cl);
+		else if (btree_node_dirty(r))
+			bch_btree_node_write(r, writes);
 
 		up_write(&r->lock);
 	}
@@ -1386,9 +1357,7 @@ static int bch_btree_gc_root(struct btree *b, struct btree_op *op,
 		ret = btree_gc_recurse(b, op, writes, gc);
 
 	if (!b->written || btree_node_dirty(b)) {
-		atomic_inc(&b->c->prio_blocked);
-		b->prio_blocked++;
-		bch_btree_write(b, true, n ? op : NULL);
+		bch_btree_node_write(b, n ? &op->cl : NULL);
 	}
 
 	if (!IS_ERR_OR_NULL(n)) {
@@ -1508,8 +1477,8 @@ static void bch_btree_gc(struct closure *cl)
 	struct gc_stat stats;
 	struct closure writes;
 	struct btree_op op;
-
 	uint64_t start_time = local_clock();
+
 	trace_bcache_gc_start(c->sb.set_uuid);
 	blktrace_msg_all(c, "Starting gc");
 
@@ -1520,6 +1489,8 @@ static void bch_btree_gc(struct closure *cl)
 
 	btree_gc_start(c);
 
+	atomic_inc(&c->prio_blocked);
+
 	ret = btree_root(gc_root, c, &op, &writes, &stats);
 	closure_sync(&op.cl);
 	closure_sync(&writes);
@@ -1537,6 +1508,9 @@ static void bch_btree_gc(struct closure *cl)
 
 	available = bch_btree_gc_finish(c);
 
+	atomic_dec(&c->prio_blocked);
+	wake_up_allocators(c);
+
 	bch_time_stats_update(&c->btree_gc_time, start_time);
 
 	stats.key_bytes *= sizeof(uint64_t);
@@ -1544,10 +1518,9 @@ static void bch_btree_gc(struct closure *cl)
 	stats.data	<<= 9;
 	stats.in_use	= (c->nbuckets - available) * 100 / c->nbuckets;
 	memcpy(&c->gc_stats, &stats, sizeof(struct gc_stat));
-	blktrace_msg_all(c, "Finished gc");
 
+	blktrace_msg_all(c, "Finished gc");
 	trace_bcache_gc_end(c->sb.set_uuid);
-	wake_up_allocators(c);
 
 	continue_at(cl, bch_moving_gc, bch_gc_wq);
 }
@@ -1857,7 +1830,7 @@ copy:	bkey_copy(m, k);
 				op_type(op), pbtree(b), pkey(k));
 
 	if (b->level && !KEY_OFFSET(k))
-		b->prio_blocked++;
+		btree_current_write(b)->prio_blocked++;
 
 	pr_debug("%s for %s at %s: %s", status,
 		 op_type(op), pbtree(b), pkey(k));
@@ -1907,7 +1880,6 @@ bool bch_btree_insert_check_key(struct btree *b, struct btree_op *op,
 
 	BUG_ON(op->type != BTREE_INSERT);
 	BUG_ON(!btree_insert_key(b, op, &tmp.k));
-	bch_btree_write(b, false, NULL);
 	ret = true;
 out:
 	downgrade_write(&b->lock);
@@ -1967,18 +1939,18 @@ static int btree_split(struct btree *b, struct btree_op *op)
 		bkey_copy_key(&n2->key, &b->key);
 
 		bch_keylist_add(&op->keys, &n2->key);
-		bch_btree_write(n2, true, op);
+		bch_btree_node_write(n2, &op->cl);
 		rw_unlock(true, n2);
 	} else
 		bch_btree_insert_keys(n1, op);
 
 	bch_keylist_add(&op->keys, &n1->key);
-	bch_btree_write(n1, true, op);
+	bch_btree_node_write(n1, &op->cl);
 
 	if (n3) {
 		bkey_copy_key(&n3->key, &MAX_KEY);
 		bch_btree_insert_keys(n3, op);
-		bch_btree_write(n3, true, op);
+		bch_btree_node_write(n3, &op->cl);
 
 		closure_sync(&op->cl);
 		bch_btree_set_root(n3);
@@ -2082,8 +2054,12 @@ static int bch_btree_insert_recurse(struct btree *b, struct btree_op *op,
 
 		BUG_ON(write_block(b) != b->sets[b->nsets].data);
 
-		if (bch_btree_insert_keys(b, op))
-			bch_btree_write(b, false, op);
+		if (bch_btree_insert_keys(b, op)) {
+			if (!b->level)
+				bch_btree_leaf_dirty(b, op);
+			else
+				bch_btree_node_write(b, &op->cl);
+		}
 	}
 
 	return 0;
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index af4a7092a28c..809bd77847a2 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -102,7 +102,6 @@
 #include "debug.h"
 
 struct btree_write {
-	struct closure		*owner;
 	atomic_t		*journal;
 
 	/* If btree_split() frees a btree node, it writes a new pointer to that
@@ -142,16 +141,12 @@ struct btree {
 	 */
 	struct bset_tree	sets[MAX_BSETS];
 
-	/* Used to refcount bio splits, also protects b->bio */
+	/* For outstanding btree writes, used as a lock - protects write_idx */
 	struct closure_with_waitlist	io;
 
-	/* Gets transferred to w->prio_blocked - see the comment there */
-	int			prio_blocked;
-
 	struct list_head	list;
 	struct delayed_work	work;
 
-	uint64_t		io_start_time;
 	struct btree_write	writes[2];
 	struct bio		*bio;
 };
@@ -164,13 +159,11 @@ static inline void set_btree_node_ ## flag(struct btree *b)		\
 {	set_bit(BTREE_NODE_ ## flag, &b->flags); }			\
 
 enum btree_flags {
-	BTREE_NODE_read_done,
 	BTREE_NODE_io_error,
 	BTREE_NODE_dirty,
 	BTREE_NODE_write_idx,
 };
 
-BTREE_FLAG(read_done);
 BTREE_FLAG(io_error);
 BTREE_FLAG(dirty);
 BTREE_FLAG(write_idx);
@@ -293,9 +286,7 @@ static inline void rw_unlock(bool w, struct btree *b)
 #ifdef CONFIG_BCACHE_EDEBUG
 	unsigned i;
 
-	if (w &&
-	    b->key.ptr[0] &&
-	    btree_node_read_done(b))
+	if (w && b->key.ptr[0])
 		for (i = 0; i <= b->nsets; i++)
 			bch_check_key_order(b, b->sets[i].data);
 #endif
@@ -370,9 +361,9 @@ static inline bool should_split(struct btree *b)
 		 > btree_blocks(b));
 }
 
-void bch_btree_read_done(struct closure *);
-void bch_btree_read(struct btree *);
-void bch_btree_write(struct btree *b, bool now, struct btree_op *op);
+void bch_btree_node_read(struct btree *);
+void bch_btree_node_read_done(struct btree *);
+void bch_btree_node_write(struct btree *, struct closure *);
 
 void bch_cannibalize_unlock(struct cache_set *, struct closure *);
 void bch_btree_set_root(struct btree *);
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index 89fd5204924e..ae6096c6845d 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -144,7 +144,7 @@ void bch_btree_verify(struct btree *b, struct bset *new)
 	v->written = 0;
 	v->level = b->level;
 
-	bch_btree_read(v);
+	bch_btree_node_read(v);
 	closure_wait_event(&v->io.wait, &cl,
 			   atomic_read(&b->io.cl.remaining) == -1);
 
@@ -512,7 +512,7 @@ static ssize_t btree_fuzz(struct kobject *k, struct kobj_attribute *a,
 
 		bch_btree_sort(b);
 		fill->written = 0;
-		bch_btree_read_done(&fill->io.cl);
+		bch_btree_node_read_done(fill);
 
 		if (b->sets[0].data->keys != fill->sets[0].data->keys ||
 		    memcmp(b->sets[0].data->start,
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 8c8dfdcd9d4c..970d819d4350 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -384,7 +384,7 @@ static void btree_flush_write(struct cache_set *c)
 		return;
 found:
 	if (btree_node_dirty(best))
-		bch_btree_write(best, true, NULL);
+		bch_btree_node_write(best, NULL);
 	rw_unlock(true, best);
 }
 
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index aaeda235fc75..e53f89988b08 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1255,9 +1255,10 @@ static void cache_set_free(struct closure *cl)
 	free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c)));
 	free_pages((unsigned long) c->sort, ilog2(bucket_pages(c)));
 
-	kfree(c->fill_iter);
 	if (c->bio_split)
 		bioset_free(c->bio_split);
+	if (c->fill_iter)
+		mempool_destroy(c->fill_iter);
 	if (c->bio_meta)
 		mempool_destroy(c->bio_meta);
 	if (c->search)
@@ -1295,7 +1296,7 @@ static void cache_set_flush(struct closure *cl)
 	/* Should skip this if we're unregistering because of an error */
 	list_for_each_entry(b, &c->btree_cache, list)
 		if (btree_node_dirty(b))
-			bch_btree_write(b, true, NULL);
+			bch_btree_node_write(b, NULL);
 
 	closure_return(cl);
 }
@@ -1374,7 +1375,6 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
 				       BTREE_MAX_PAGES);
 
 	mutex_init(&c->bucket_lock);
-	mutex_init(&c->fill_lock);
 	mutex_init(&c->sort_lock);
 	spin_lock_init(&c->sort_time_lock);
 	closure_init_unlocked(&c->sb_write);
@@ -1400,8 +1400,8 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
 	    !(c->bio_meta = mempool_create_kmalloc_pool(2,
 				sizeof(struct bbio) + sizeof(struct bio_vec) *
 				bucket_pages(c))) ||
+	    !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) ||
 	    !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
-	    !(c->fill_iter = kmalloc(iter_size, GFP_KERNEL)) ||
 	    !(c->sort = alloc_bucket_pages(GFP_KERNEL, c)) ||
 	    !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
 	    bch_journal_alloc(c) ||
@@ -1409,8 +1409,6 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
 	    bch_open_buckets_alloc(c))
 		goto err;
 
-	c->fill_iter->size = sb->bucket_size / sb->block_size;
-
 	c->congested_read_threshold_us	= 2000;
 	c->congested_write_threshold_us	= 20000;
 	c->error_limit	= 8 << IO_ERROR_SHIFT;
@@ -1551,7 +1549,7 @@ static void run_cache_set(struct cache_set *c)
 			goto err_unlock_gc;
 
 		bkey_copy_key(&c->root->key, &MAX_KEY);
-		bch_btree_write(c->root, true, &op);
+		bch_btree_node_write(c->root, &op.cl);
 
 		bch_btree_set_root(c->root);
 		rw_unlock(true, c->root);

From c37511b863f36c1cc6e18440717fd4cc0e881b8a Mon Sep 17 00:00:00 2001
From: Kent Overstreet <koverstreet@google.com>
Date: Fri, 26 Apr 2013 15:39:55 -0700
Subject: [PATCH 035/320] bcache: Fix/revamp tracepoints

The tracepoints were reworked to be more sensible, and fixed a null
pointer deref in one of the tracepoints.

Converted some of the pr_debug()s to tracepoints - this is partly a
performance optimization; it used to be that with DEBUG or
CONFIG_DYNAMIC_DEBUG pr_debug() was an empty macro; but at some point it
was changed to an empty inline function.

Some of the pr_debug() statements had rather expensive function calls as
part of the arguments, so this code was getting run unnecessarily even
on non debug kernels - in some fast paths, too.

Signed-off-by: Kent Overstreet <koverstreet@google.com>
---
 drivers/md/bcache/alloc.c     |  10 +-
 drivers/md/bcache/bcache.h    |  20 --
 drivers/md/bcache/bset.h      |   4 +
 drivers/md/bcache/btree.c     |  47 ++--
 drivers/md/bcache/io.c        |   2 +
 drivers/md/bcache/journal.c   |  14 +-
 drivers/md/bcache/movinggc.c  |  12 +-
 drivers/md/bcache/request.c   |  65 ++---
 drivers/md/bcache/request.h   |   2 +-
 drivers/md/bcache/super.c     |   2 +-
 drivers/md/bcache/sysfs.c     |   1 +
 drivers/md/bcache/trace.c     |  45 +++-
 drivers/md/bcache/util.h      |   2 -
 drivers/md/bcache/writeback.c |  10 +-
 include/trace/events/bcache.h | 444 ++++++++++++++++++++++------------
 15 files changed, 413 insertions(+), 267 deletions(-)

diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 38428f46ea74..b54b73b9b2b7 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -65,6 +65,7 @@
 
 #include <linux/kthread.h>
 #include <linux/random.h>
+#include <trace/events/bcache.h>
 
 #define MAX_IN_FLIGHT_DISCARDS		8U
 
@@ -351,10 +352,7 @@ static void invalidate_buckets(struct cache *ca)
 		break;
 	}
 
-	pr_debug("free %zu/%zu free_inc %zu/%zu unused %zu/%zu",
-		 fifo_used(&ca->free), ca->free.size,
-		 fifo_used(&ca->free_inc), ca->free_inc.size,
-		 fifo_used(&ca->unused), ca->unused.size);
+	trace_bcache_alloc_invalidate(ca);
 }
 
 #define allocator_wait(ca, cond)					\
@@ -473,9 +471,7 @@ long bch_bucket_alloc(struct cache *ca, unsigned watermark, struct closure *cl)
 		return r;
 	}
 
-	pr_debug("alloc failure: blocked %i free %zu free_inc %zu unused %zu",
-		 atomic_read(&ca->set->prio_blocked), fifo_used(&ca->free),
-		 fifo_used(&ca->free_inc), fifo_used(&ca->unused));
+	trace_bcache_alloc_fail(ca);
 
 	if (cl) {
 		closure_wait(&ca->set->bucket_wait, cl);
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index ad4957b52f10..59c15e09e4dd 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -178,7 +178,6 @@
 #define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__
 
 #include <linux/bio.h>
-#include <linux/blktrace_api.h>
 #include <linux/kobject.h>
 #include <linux/list.h>
 #include <linux/mutex.h>
@@ -901,8 +900,6 @@ static inline unsigned local_clock_us(void)
 	return local_clock() >> 10;
 }
 
-#define MAX_BSETS		4U
-
 #define BTREE_PRIO		USHRT_MAX
 #define INITIAL_PRIO		32768
 
@@ -1107,23 +1104,6 @@ static inline void __bkey_put(struct cache_set *c, struct bkey *k)
 		atomic_dec_bug(&PTR_BUCKET(c, k, i)->pin);
 }
 
-/* Blktrace macros */
-
-#define blktrace_msg(c, fmt, ...)					\
-do {									\
-	struct request_queue *q = bdev_get_queue(c->bdev);		\
-	if (q)								\
-		blk_add_trace_msg(q, fmt, ##__VA_ARGS__);		\
-} while (0)
-
-#define blktrace_msg_all(s, fmt, ...)					\
-do {									\
-	struct cache *_c;						\
-	unsigned i;							\
-	for_each_cache(_c, (s), i)					\
-		blktrace_msg(_c, fmt, ##__VA_ARGS__);			\
-} while (0)
-
 static inline void cached_dev_put(struct cached_dev *dc)
 {
 	if (atomic_dec_and_test(&dc->count))
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index 57a9cff41546..ae115a253d73 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -1,6 +1,8 @@
 #ifndef _BCACHE_BSET_H
 #define _BCACHE_BSET_H
 
+#include <linux/slab.h>
+
 /*
  * BKEYS:
  *
@@ -142,6 +144,8 @@
 
 /* Btree key comparison/iteration */
 
+#define MAX_BSETS		4U
+
 struct btree_iter {
 	size_t size, used;
 	struct btree_iter_set {
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index aaec186f7ba6..218d486259a3 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -223,8 +223,9 @@ void bch_btree_node_read(struct btree *b)
 	struct closure cl;
 	struct bio *bio;
 
+	trace_bcache_btree_read(b);
+
 	closure_init_stack(&cl);
-	pr_debug("%s", pbtree(b));
 
 	bio = bch_bbio_alloc(b->c);
 	bio->bi_rw	= REQ_META|READ_SYNC;
@@ -234,7 +235,6 @@ void bch_btree_node_read(struct btree *b)
 
 	bch_bio_map(bio, b->sets[0].data);
 
-	trace_bcache_btree_read(bio);
 	bch_submit_bbio(bio, b->c, &b->key, 0);
 	closure_sync(&cl);
 
@@ -343,7 +343,6 @@ static void do_btree_node_write(struct btree *b)
 			memcpy(page_address(bv->bv_page),
 			       base + j * PAGE_SIZE, PAGE_SIZE);
 
-		trace_bcache_btree_write(b->bio);
 		bch_submit_bbio(b->bio, b->c, &k.key, 0);
 
 		continue_at(cl, btree_node_write_done, NULL);
@@ -351,7 +350,6 @@ static void do_btree_node_write(struct btree *b)
 		b->bio->bi_vcnt = 0;
 		bch_bio_map(b->bio, i);
 
-		trace_bcache_btree_write(b->bio);
 		bch_submit_bbio(b->bio, b->c, &k.key, 0);
 
 		closure_sync(cl);
@@ -363,10 +361,13 @@ void bch_btree_node_write(struct btree *b, struct closure *parent)
 {
 	struct bset *i = b->sets[b->nsets].data;
 
+	trace_bcache_btree_write(b);
+
 	BUG_ON(current->bio_list);
 	BUG_ON(b->written >= btree_blocks(b));
 	BUG_ON(b->written && !i->keys);
 	BUG_ON(b->sets->data->seq != i->seq);
+	bch_check_key_order(b, i);
 
 	cancel_delayed_work(&b->work);
 
@@ -376,12 +377,8 @@ void bch_btree_node_write(struct btree *b, struct closure *parent)
 	clear_bit(BTREE_NODE_dirty,	 &b->flags);
 	change_bit(BTREE_NODE_write_idx, &b->flags);
 
-	bch_check_key_order(b, i);
-
 	do_btree_node_write(b);
 
-	pr_debug("%s block %i keys %i", pbtree(b), b->written, i->keys);
-
 	b->written += set_blocks(i, b->c);
 	atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size,
 			&PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written);
@@ -752,6 +749,8 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k,
 	int ret = -ENOMEM;
 	struct btree *i;
 
+	trace_bcache_btree_cache_cannibalize(c);
+
 	if (!cl)
 		return ERR_PTR(-ENOMEM);
 
@@ -770,7 +769,6 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct bkey *k,
 		return ERR_PTR(-EAGAIN);
 	}
 
-	/* XXX: tracepoint */
 	c->try_harder = cl;
 	c->try_harder_start = local_clock();
 retry:
@@ -956,13 +954,14 @@ static void btree_node_free(struct btree *b, struct btree_op *op)
 {
 	unsigned i;
 
+	trace_bcache_btree_node_free(b);
+
 	/*
 	 * The BUG_ON() in btree_node_get() implies that we must have a write
 	 * lock on parent to free or even invalidate a node
 	 */
 	BUG_ON(op->lock <= b->level);
 	BUG_ON(b == b->c->root);
-	pr_debug("bucket %s", pbtree(b));
 
 	if (btree_node_dirty(b))
 		btree_complete_write(b, btree_current_write(b));
@@ -1012,12 +1011,16 @@ struct btree *bch_btree_node_alloc(struct cache_set *c, int level,
 	bch_bset_init_next(b);
 
 	mutex_unlock(&c->bucket_lock);
+
+	trace_bcache_btree_node_alloc(b);
 	return b;
 err_free:
 	bch_bucket_free(c, &k.key);
 	__bkey_put(c, &k.key);
 err:
 	mutex_unlock(&c->bucket_lock);
+
+	trace_bcache_btree_node_alloc_fail(b);
 	return b;
 }
 
@@ -1254,7 +1257,7 @@ static void btree_gc_coalesce(struct btree *b, struct btree_op *op,
 	btree_node_free(r->b, op);
 	up_write(&r->b->lock);
 
-	pr_debug("coalesced %u nodes", nodes);
+	trace_bcache_btree_gc_coalesce(nodes);
 
 	gc->nodes--;
 	nodes--;
@@ -1479,8 +1482,7 @@ static void bch_btree_gc(struct closure *cl)
 	struct btree_op op;
 	uint64_t start_time = local_clock();
 
-	trace_bcache_gc_start(c->sb.set_uuid);
-	blktrace_msg_all(c, "Starting gc");
+	trace_bcache_gc_start(c);
 
 	memset(&stats, 0, sizeof(struct gc_stat));
 	closure_init_stack(&writes);
@@ -1496,9 +1498,7 @@ static void bch_btree_gc(struct closure *cl)
 	closure_sync(&writes);
 
 	if (ret) {
-		blktrace_msg_all(c, "Stopped gc");
 		pr_warn("gc failed!");
-
 		continue_at(cl, bch_btree_gc, bch_gc_wq);
 	}
 
@@ -1519,8 +1519,7 @@ static void bch_btree_gc(struct closure *cl)
 	stats.in_use	= (c->nbuckets - available) * 100 / c->nbuckets;
 	memcpy(&c->gc_stats, &stats, sizeof(struct gc_stat));
 
-	blktrace_msg_all(c, "Finished gc");
-	trace_bcache_gc_end(c->sb.set_uuid);
+	trace_bcache_gc_end(c);
 
 	continue_at(cl, bch_moving_gc, bch_gc_wq);
 }
@@ -1901,12 +1900,11 @@ static int btree_split(struct btree *b, struct btree_op *op)
 
 	split = set_blocks(n1->sets[0].data, n1->c) > (btree_blocks(b) * 4) / 5;
 
-	pr_debug("%ssplitting at %s keys %i", split ? "" : "not ",
-		 pbtree(b), n1->sets[0].data->keys);
-
 	if (split) {
 		unsigned keys = 0;
 
+		trace_bcache_btree_node_split(b, n1->sets[0].data->keys);
+
 		n2 = bch_btree_node_alloc(b->c, b->level, &op->cl);
 		if (IS_ERR(n2))
 			goto err_free1;
@@ -1941,8 +1939,11 @@ static int btree_split(struct btree *b, struct btree_op *op)
 		bch_keylist_add(&op->keys, &n2->key);
 		bch_btree_node_write(n2, &op->cl);
 		rw_unlock(true, n2);
-	} else
+	} else {
+		trace_bcache_btree_node_compact(b, n1->sets[0].data->keys);
+
 		bch_btree_insert_keys(n1, op);
+	}
 
 	bch_keylist_add(&op->keys, &n1->key);
 	bch_btree_node_write(n1, &op->cl);
@@ -2117,6 +2118,8 @@ void bch_btree_set_root(struct btree *b)
 {
 	unsigned i;
 
+	trace_bcache_btree_set_root(b);
+
 	BUG_ON(!b->written);
 
 	for (i = 0; i < KEY_PTRS(&b->key); i++)
@@ -2130,7 +2133,6 @@ void bch_btree_set_root(struct btree *b)
 	__bkey_put(b->c, &b->key);
 
 	bch_journal_meta(b->c, NULL);
-	pr_debug("%s for %pf", pbtree(b), __builtin_return_address(0));
 }
 
 /* Cache lookup */
@@ -2216,7 +2218,6 @@ static int submit_partial_cache_hit(struct btree *b, struct btree_op *op,
 		n->bi_end_io	= bch_cache_read_endio;
 		n->bi_private	= &s->cl;
 
-		trace_bcache_cache_hit(n);
 		__bch_submit_bbio(n, b->c);
 	}
 
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index d285cd49104c..0f6d69658b61 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -9,6 +9,8 @@
 #include "bset.h"
 #include "debug.h"
 
+#include <linux/blkdev.h>
+
 static void bch_bi_idx_hack_endio(struct bio *bio, int error)
 {
 	struct bio *p = bio->bi_private;
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 970d819d4350..5ca22149b749 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -9,6 +9,8 @@
 #include "debug.h"
 #include "request.h"
 
+#include <trace/events/bcache.h>
+
 /*
  * Journal replay/recovery:
  *
@@ -300,7 +302,8 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list,
 		for (k = i->j.start;
 		     k < end(&i->j);
 		     k = bkey_next(k)) {
-			pr_debug("%s", pkey(k));
+			trace_bcache_journal_replay_key(k);
+
 			bkey_copy(op->keys.top, k);
 			bch_keylist_push(&op->keys);
 
@@ -712,7 +715,8 @@ void bch_journal(struct closure *cl)
 	spin_lock(&c->journal.lock);
 
 	if (journal_full(&c->journal)) {
-		/* XXX: tracepoint */
+		trace_bcache_journal_full(c);
+
 		closure_wait(&c->journal.wait, cl);
 
 		journal_reclaim(c);
@@ -728,13 +732,15 @@ void bch_journal(struct closure *cl)
 
 	if (b * c->sb.block_size > PAGE_SECTORS << JSET_BITS ||
 	    b > c->journal.blocks_free) {
-		/* XXX: If we were inserting so many keys that they won't fit in
+		trace_bcache_journal_entry_full(c);
+
+		/*
+		 * XXX: If we were inserting so many keys that they won't fit in
 		 * an _empty_ journal write, we'll deadlock. For now, handle
 		 * this in bch_keylist_realloc() - but something to think about.
 		 */
 		BUG_ON(!w->data->keys);
 
-		/* XXX: tracepoint */
 		BUG_ON(!closure_wait(&w->wait, cl));
 
 		closure_flush(&c->journal.io);
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index 8589512c972e..04f6b97ffda6 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -9,6 +9,8 @@
 #include "debug.h"
 #include "request.h"
 
+#include <trace/events/bcache.h>
+
 struct moving_io {
 	struct keybuf_key	*w;
 	struct search		s;
@@ -49,9 +51,8 @@ static void write_moving_finish(struct closure *cl)
 	while (bv-- != bio->bi_io_vec)
 		__free_page(bv->bv_page);
 
-	pr_debug("%s %s", io->s.op.insert_collision
-		 ? "collision moving" : "moved",
-		 pkey(&io->w->key));
+	if (io->s.op.insert_collision)
+		trace_bcache_gc_copy_collision(&io->w->key);
 
 	bch_keybuf_del(&io->s.op.c->moving_gc_keys, io->w);
 
@@ -94,8 +95,6 @@ static void write_moving(struct closure *cl)
 	struct moving_io *io = container_of(s, struct moving_io, s);
 
 	if (!s->error) {
-		trace_bcache_write_moving(&io->bio.bio);
-
 		moving_init(io);
 
 		io->bio.bio.bi_sector	= KEY_START(&io->w->key);
@@ -122,7 +121,6 @@ static void read_moving_submit(struct closure *cl)
 	struct moving_io *io = container_of(s, struct moving_io, s);
 	struct bio *bio = &io->bio.bio;
 
-	trace_bcache_read_moving(bio);
 	bch_submit_bbio(bio, s->op.c, &io->w->key, 0);
 
 	continue_at(cl, write_moving, bch_gc_wq);
@@ -162,7 +160,7 @@ static void read_moving(struct closure *cl)
 		if (bch_bio_alloc_pages(bio, GFP_KERNEL))
 			goto err;
 
-		pr_debug("%s", pkey(&w->key));
+		trace_bcache_gc_copy(&w->key);
 
 		closure_call(&io->s.cl, read_moving_submit, NULL, &c->gc.cl);
 
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index e5ff12e52d5b..695469958c1e 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -530,10 +530,9 @@ static void bch_insert_data_loop(struct closure *cl)
 		if (KEY_CSUM(k))
 			bio_csum(n, k);
 
-		pr_debug("%s", pkey(k));
+		trace_bcache_cache_insert(k);
 		bch_keylist_push(&op->keys);
 
-		trace_bcache_cache_insert(n, n->bi_sector, n->bi_bdev);
 		n->bi_rw |= REQ_WRITE;
 		bch_submit_bbio(n, op->c, k, 0);
 	} while (n != bio);
@@ -784,11 +783,8 @@ static void request_read_error(struct closure *cl)
 	int i;
 
 	if (s->recoverable) {
-		/* The cache read failed, but we can retry from the backing
-		 * device.
-		 */
-		pr_debug("recovering at sector %llu",
-			 (uint64_t) s->orig_bio->bi_sector);
+		/* Retry from the backing device: */
+		trace_bcache_read_retry(s->orig_bio);
 
 		s->error = 0;
 		bv = s->bio.bio.bi_io_vec;
@@ -806,7 +802,6 @@ static void request_read_error(struct closure *cl)
 
 		/* XXX: invalidate cache */
 
-		trace_bcache_read_retry(&s->bio.bio);
 		closure_bio_submit(&s->bio.bio, &s->cl, s->d);
 	}
 
@@ -899,6 +894,7 @@ static void request_read_done_bh(struct closure *cl)
 	struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
 
 	bch_mark_cache_accounting(s, !s->cache_miss, s->op.skip);
+	trace_bcache_read(s->orig_bio, !s->cache_miss, s->op.skip);
 
 	if (s->error)
 		continue_at_nobarrier(cl, request_read_error, bcache_wq);
@@ -969,7 +965,6 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
 	s->cache_miss = miss;
 	bio_get(s->op.cache_bio);
 
-	trace_bcache_cache_miss(s->orig_bio);
 	closure_bio_submit(s->op.cache_bio, &s->cl, s->d);
 
 	return ret;
@@ -1040,15 +1035,15 @@ static void request_write(struct cached_dev *dc, struct search *s)
 	if (should_writeback(dc, s->orig_bio))
 		s->writeback = true;
 
+	trace_bcache_write(s->orig_bio, s->writeback, s->op.skip);
+
 	if (!s->writeback) {
 		s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO,
 						   dc->disk.bio_split);
 
-		trace_bcache_writethrough(s->orig_bio);
 		closure_bio_submit(bio, cl, s->d);
 	} else {
 		s->op.cache_bio = bio;
-		trace_bcache_writeback(s->orig_bio);
 		bch_writeback_add(dc, bio_sectors(bio));
 	}
 out:
@@ -1058,7 +1053,6 @@ static void request_write(struct cached_dev *dc, struct search *s)
 	s->op.skip = true;
 	s->op.cache_bio = s->orig_bio;
 	bio_get(s->op.cache_bio);
-	trace_bcache_write_skip(s->orig_bio);
 
 	if ((bio->bi_rw & REQ_DISCARD) &&
 	    !blk_queue_discard(bdev_get_queue(dc->bdev)))
@@ -1088,9 +1082,10 @@ static void request_nodata(struct cached_dev *dc, struct search *s)
 
 /* Cached devices - read & write stuff */
 
-int bch_get_congested(struct cache_set *c)
+unsigned bch_get_congested(struct cache_set *c)
 {
 	int i;
+	long rand;
 
 	if (!c->congested_read_threshold_us &&
 	    !c->congested_write_threshold_us)
@@ -1106,7 +1101,13 @@ int bch_get_congested(struct cache_set *c)
 
 	i += CONGESTED_MAX;
 
-	return i <= 0 ? 1 : fract_exp_two(i, 6);
+	if (i > 0)
+		i = fract_exp_two(i, 6);
+
+	rand = get_random_int();
+	i -= bitmap_weight(&rand, BITS_PER_LONG);
+
+	return i > 0 ? i : 1;
 }
 
 static void add_sequential(struct task_struct *t)
@@ -1126,10 +1127,8 @@ static void check_should_skip(struct cached_dev *dc, struct search *s)
 {
 	struct cache_set *c = s->op.c;
 	struct bio *bio = &s->bio.bio;
-
-	long rand;
-	int cutoff = bch_get_congested(c);
 	unsigned mode = cache_mode(dc, bio);
+	unsigned sectors, congested = bch_get_congested(c);
 
 	if (atomic_read(&dc->disk.detaching) ||
 	    c->gc_stats.in_use > CUTOFF_CACHE_ADD ||
@@ -1147,17 +1146,14 @@ static void check_should_skip(struct cached_dev *dc, struct search *s)
 		goto skip;
 	}
 
-	if (!cutoff) {
-		cutoff = dc->sequential_cutoff >> 9;
+	if (!congested && !dc->sequential_cutoff)
+		goto rescale;
 
-		if (!cutoff)
-			goto rescale;
-
-		if (mode == CACHE_MODE_WRITEBACK &&
-		    (bio->bi_rw & REQ_WRITE) &&
-		    (bio->bi_rw & REQ_SYNC))
-			goto rescale;
-	}
+	if (!congested &&
+	    mode == CACHE_MODE_WRITEBACK &&
+	    (bio->bi_rw & REQ_WRITE) &&
+	    (bio->bi_rw & REQ_SYNC))
+		goto rescale;
 
 	if (dc->sequential_merge) {
 		struct io *i;
@@ -1192,12 +1188,19 @@ static void check_should_skip(struct cached_dev *dc, struct search *s)
 		add_sequential(s->task);
 	}
 
-	rand = get_random_int();
-	cutoff -= bitmap_weight(&rand, BITS_PER_LONG);
+	sectors = max(s->task->sequential_io,
+		      s->task->sequential_io_avg) >> 9;
 
-	if (cutoff <= (int) (max(s->task->sequential_io,
-				 s->task->sequential_io_avg) >> 9))
+	if (dc->sequential_cutoff &&
+	    sectors >= dc->sequential_cutoff >> 9) {
+		trace_bcache_bypass_sequential(s->orig_bio);
 		goto skip;
+	}
+
+	if (congested && sectors >= congested) {
+		trace_bcache_bypass_congested(s->orig_bio);
+		goto skip;
+	}
 
 rescale:
 	bch_rescale_priorities(c, bio_sectors(bio));
diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h
index 254d9ab5707c..57dc4784f4f4 100644
--- a/drivers/md/bcache/request.h
+++ b/drivers/md/bcache/request.h
@@ -30,7 +30,7 @@ struct search {
 };
 
 void bch_cache_read_endio(struct bio *, int);
-int bch_get_congested(struct cache_set *);
+unsigned bch_get_congested(struct cache_set *);
 void bch_insert_data(struct closure *cl);
 void bch_btree_insert_async(struct closure *);
 void bch_cache_read_endio(struct bio *, int);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index e53f89988b08..47bc13745068 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -11,6 +11,7 @@
 #include "debug.h"
 #include "request.h"
 
+#include <linux/blkdev.h>
 #include <linux/buffer_head.h>
 #include <linux/debugfs.h>
 #include <linux/genhd.h>
@@ -543,7 +544,6 @@ void bch_prio_write(struct cache *ca)
 
 	pr_debug("free %zu, free_inc %zu, unused %zu", fifo_used(&ca->free),
 		 fifo_used(&ca->free_inc), fifo_used(&ca->unused));
-	blktrace_msg(ca, "Starting priorities: " buckets_free(ca));
 
 	for (i = prio_buckets(ca) - 1; i >= 0; --i) {
 		long bucket;
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 29228b8a6ffe..f5c2d8695230 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -10,6 +10,7 @@
 #include "btree.h"
 #include "request.h"
 
+#include <linux/blkdev.h>
 #include <linux/sort.h>
 
 static const char * const cache_replacement_policies[] = {
diff --git a/drivers/md/bcache/trace.c b/drivers/md/bcache/trace.c
index 983f9bb411bc..7f4f38aa16ae 100644
--- a/drivers/md/bcache/trace.c
+++ b/drivers/md/bcache/trace.c
@@ -2,6 +2,7 @@
 #include "btree.h"
 #include "request.h"
 
+#include <linux/blktrace_api.h>
 #include <linux/module.h>
 
 #define CREATE_TRACE_POINTS
@@ -9,18 +10,42 @@
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_request_start);
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_request_end);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_passthrough);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_hit);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_miss);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_bypass_sequential);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_bypass_congested);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write);
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read_retry);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writethrough);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write_skip);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_insert);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_replay_key);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_write);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_full);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_entry_full);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_cache_cannibalize);
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_read);
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_write);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write_dirty);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read_dirty);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_write);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_insert);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_alloc);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_alloc_fail);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_free);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_gc_coalesce);
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_start);
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_end);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_copy);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_copy_collision);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_split);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_compact);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_set_root);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_alloc_invalidate);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_alloc_fail);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback_collision);
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index 577393e38c3a..e02780545f12 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -15,8 +15,6 @@
 
 struct closure;
 
-#include <trace/events/bcache.h>
-
 #ifdef CONFIG_BCACHE_EDEBUG
 
 #define atomic_dec_bug(v)	BUG_ON(atomic_dec_return(v) < 0)
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 2714ed3991d1..82f6d4577be2 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -10,6 +10,8 @@
 #include "btree.h"
 #include "debug.h"
 
+#include <trace/events/bcache.h>
+
 static struct workqueue_struct *dirty_wq;
 
 static void read_dirty(struct closure *);
@@ -236,10 +238,12 @@ static void write_dirty_finish(struct closure *cl)
 		for (i = 0; i < KEY_PTRS(&w->key); i++)
 			atomic_inc(&PTR_BUCKET(dc->disk.c, &w->key, i)->pin);
 
-		pr_debug("clearing %s", pkey(&w->key));
 		bch_btree_insert(&op, dc->disk.c);
 		closure_sync(&op.cl);
 
+		if (op.insert_collision)
+			trace_bcache_writeback_collision(&w->key);
+
 		atomic_long_inc(op.insert_collision
 				? &dc->disk.c->writeback_keys_failed
 				: &dc->disk.c->writeback_keys_done);
@@ -275,7 +279,6 @@ static void write_dirty(struct closure *cl)
 	io->bio.bi_bdev		= io->dc->bdev;
 	io->bio.bi_end_io	= dirty_endio;
 
-	trace_bcache_write_dirty(&io->bio);
 	closure_bio_submit(&io->bio, cl, &io->dc->disk);
 
 	continue_at(cl, write_dirty_finish, dirty_wq);
@@ -296,7 +299,6 @@ static void read_dirty_submit(struct closure *cl)
 {
 	struct dirty_io *io = container_of(cl, struct dirty_io, cl);
 
-	trace_bcache_read_dirty(&io->bio);
 	closure_bio_submit(&io->bio, cl, &io->dc->disk);
 
 	continue_at(cl, write_dirty, dirty_wq);
@@ -352,7 +354,7 @@ static void read_dirty(struct closure *cl)
 		if (bch_bio_alloc_pages(&io->bio, GFP_KERNEL))
 			goto err_free;
 
-		pr_debug("%s", pkey(&w->key));
+		trace_bcache_writeback(&w->key);
 
 		closure_call(&io->cl, read_dirty_submit, NULL, &dc->disk.cl);
 
diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h
index 3cc5a0b278c3..c9952b36fcea 100644
--- a/include/trace/events/bcache.h
+++ b/include/trace/events/bcache.h
@@ -9,9 +9,7 @@
 struct search;
 
 DECLARE_EVENT_CLASS(bcache_request,
-
 	TP_PROTO(struct search *s, struct bio *bio),
-
 	TP_ARGS(s, bio),
 
 	TP_STRUCT__entry(
@@ -22,7 +20,6 @@ DECLARE_EVENT_CLASS(bcache_request,
 		__field(dev_t,		orig_sector		)
 		__field(unsigned int,	nr_sector		)
 		__array(char,		rwbs,	6		)
-		__array(char,		comm,	TASK_COMM_LEN	)
 	),
 
 	TP_fast_assign(
@@ -33,36 +30,66 @@ DECLARE_EVENT_CLASS(bcache_request,
 		__entry->orig_sector	= bio->bi_sector - 16;
 		__entry->nr_sector	= bio->bi_size >> 9;
 		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
-		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
 	),
 
-	TP_printk("%d,%d %s %llu + %u [%s] (from %d,%d @ %llu)",
+	TP_printk("%d,%d %s %llu + %u (from %d,%d @ %llu)",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->rwbs,
-		  (unsigned long long)__entry->sector,
-		  __entry->nr_sector, __entry->comm,
-		  __entry->orig_major, __entry->orig_minor,
+		  __entry->rwbs, (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->orig_major, __entry->orig_minor,
 		  (unsigned long long)__entry->orig_sector)
 );
 
+DECLARE_EVENT_CLASS(bkey,
+	TP_PROTO(struct bkey *k),
+	TP_ARGS(k),
+
+	TP_STRUCT__entry(
+		__field(u32,	size				)
+		__field(u32,	inode				)
+		__field(u64,	offset				)
+		__field(bool,	dirty				)
+	),
+
+	TP_fast_assign(
+		__entry->inode	= KEY_INODE(k);
+		__entry->offset	= KEY_OFFSET(k);
+		__entry->size	= KEY_SIZE(k);
+		__entry->dirty	= KEY_DIRTY(k);
+	),
+
+	TP_printk("%u:%llu len %u dirty %u", __entry->inode,
+		  __entry->offset, __entry->size, __entry->dirty)
+);
+
+DECLARE_EVENT_CLASS(btree_node,
+	TP_PROTO(struct btree *b),
+	TP_ARGS(b),
+
+	TP_STRUCT__entry(
+		__field(size_t,		bucket			)
+	),
+
+	TP_fast_assign(
+		__entry->bucket	= PTR_BUCKET_NR(b->c, &b->key, 0);
+	),
+
+	TP_printk("bucket %zu", __entry->bucket)
+);
+
+/* request.c */
+
 DEFINE_EVENT(bcache_request, bcache_request_start,
-
 	TP_PROTO(struct search *s, struct bio *bio),
-
 	TP_ARGS(s, bio)
 );
 
 DEFINE_EVENT(bcache_request, bcache_request_end,
-
 	TP_PROTO(struct search *s, struct bio *bio),
-
 	TP_ARGS(s, bio)
 );
 
 DECLARE_EVENT_CLASS(bcache_bio,
-
 	TP_PROTO(struct bio *bio),
-
 	TP_ARGS(bio),
 
 	TP_STRUCT__entry(
@@ -70,7 +97,6 @@ DECLARE_EVENT_CLASS(bcache_bio,
 		__field(sector_t,	sector			)
 		__field(unsigned int,	nr_sector		)
 		__array(char,		rwbs,	6		)
-		__array(char,		comm,	TASK_COMM_LEN	)
 	),
 
 	TP_fast_assign(
@@ -78,191 +104,295 @@ DECLARE_EVENT_CLASS(bcache_bio,
 		__entry->sector		= bio->bi_sector;
 		__entry->nr_sector	= bio->bi_size >> 9;
 		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
-		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
 	),
 
-	TP_printk("%d,%d  %s %llu + %u [%s]",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->rwbs,
-		  (unsigned long long)__entry->sector,
-		  __entry->nr_sector, __entry->comm)
+	TP_printk("%d,%d  %s %llu + %u",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  (unsigned long long)__entry->sector, __entry->nr_sector)
 );
 
-
-DEFINE_EVENT(bcache_bio, bcache_passthrough,
-
+DEFINE_EVENT(bcache_bio, bcache_bypass_sequential,
 	TP_PROTO(struct bio *bio),
-
 	TP_ARGS(bio)
 );
 
-DEFINE_EVENT(bcache_bio, bcache_cache_hit,
-
+DEFINE_EVENT(bcache_bio, bcache_bypass_congested,
 	TP_PROTO(struct bio *bio),
-
 	TP_ARGS(bio)
 );
 
-DEFINE_EVENT(bcache_bio, bcache_cache_miss,
-
-	TP_PROTO(struct bio *bio),
-
-	TP_ARGS(bio)
-);
-
-DEFINE_EVENT(bcache_bio, bcache_read_retry,
-
-	TP_PROTO(struct bio *bio),
-
-	TP_ARGS(bio)
-);
-
-DEFINE_EVENT(bcache_bio, bcache_writethrough,
-
-	TP_PROTO(struct bio *bio),
-
-	TP_ARGS(bio)
-);
-
-DEFINE_EVENT(bcache_bio, bcache_writeback,
-
-	TP_PROTO(struct bio *bio),
-
-	TP_ARGS(bio)
-);
-
-DEFINE_EVENT(bcache_bio, bcache_write_skip,
-
-	TP_PROTO(struct bio *bio),
-
-	TP_ARGS(bio)
-);
-
-DEFINE_EVENT(bcache_bio, bcache_btree_read,
-
-	TP_PROTO(struct bio *bio),
-
-	TP_ARGS(bio)
-);
-
-DEFINE_EVENT(bcache_bio, bcache_btree_write,
-
-	TP_PROTO(struct bio *bio),
-
-	TP_ARGS(bio)
-);
-
-DEFINE_EVENT(bcache_bio, bcache_write_dirty,
-
-	TP_PROTO(struct bio *bio),
-
-	TP_ARGS(bio)
-);
-
-DEFINE_EVENT(bcache_bio, bcache_read_dirty,
-
-	TP_PROTO(struct bio *bio),
-
-	TP_ARGS(bio)
-);
-
-DEFINE_EVENT(bcache_bio, bcache_write_moving,
-
-	TP_PROTO(struct bio *bio),
-
-	TP_ARGS(bio)
-);
-
-DEFINE_EVENT(bcache_bio, bcache_read_moving,
-
-	TP_PROTO(struct bio *bio),
-
-	TP_ARGS(bio)
-);
-
-DEFINE_EVENT(bcache_bio, bcache_journal_write,
-
-	TP_PROTO(struct bio *bio),
-
-	TP_ARGS(bio)
-);
-
-DECLARE_EVENT_CLASS(bcache_cache_bio,
-
-	TP_PROTO(struct bio *bio,
-		 sector_t orig_sector,
-		 struct block_device* orig_bdev),
-
-	TP_ARGS(bio, orig_sector, orig_bdev),
+TRACE_EVENT(bcache_read,
+	TP_PROTO(struct bio *bio, bool hit, bool bypass),
+	TP_ARGS(bio, hit, bypass),
 
 	TP_STRUCT__entry(
 		__field(dev_t,		dev			)
-		__field(dev_t,		orig_dev		)
 		__field(sector_t,	sector			)
-		__field(sector_t,	orig_sector		)
 		__field(unsigned int,	nr_sector		)
 		__array(char,		rwbs,	6		)
-		__array(char,		comm,	TASK_COMM_LEN	)
+		__field(bool,		cache_hit		)
+		__field(bool,		bypass			)
 	),
 
 	TP_fast_assign(
 		__entry->dev		= bio->bi_bdev->bd_dev;
-		__entry->orig_dev	= orig_bdev->bd_dev;
 		__entry->sector		= bio->bi_sector;
-		__entry->orig_sector	= orig_sector;
 		__entry->nr_sector	= bio->bi_size >> 9;
 		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
-		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+		__entry->cache_hit = hit;
+		__entry->bypass = bypass;
 	),
 
-	TP_printk("%d,%d  %s %llu + %u [%s] (from %d,%d %llu)",
+	TP_printk("%d,%d  %s %llu + %u hit %u bypass %u",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->rwbs,
-		  (unsigned long long)__entry->sector,
-		  __entry->nr_sector, __entry->comm,
-		  MAJOR(__entry->orig_dev), MINOR(__entry->orig_dev),
-		  (unsigned long long)__entry->orig_sector)
+		  __entry->rwbs, (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->cache_hit, __entry->bypass)
 );
 
-DEFINE_EVENT(bcache_cache_bio, bcache_cache_insert,
-
-	TP_PROTO(struct bio *bio,
-		 sector_t orig_sector,
-		 struct block_device *orig_bdev),
-
-	TP_ARGS(bio, orig_sector, orig_bdev)
-);
-
-DECLARE_EVENT_CLASS(bcache_gc,
-
-	TP_PROTO(uint8_t *uuid),
-
-	TP_ARGS(uuid),
+TRACE_EVENT(bcache_write,
+	TP_PROTO(struct bio *bio, bool writeback, bool bypass),
+	TP_ARGS(bio, writeback, bypass),
 
 	TP_STRUCT__entry(
-		__field(uint8_t *,	uuid)
+		__field(dev_t,		dev			)
+		__field(sector_t,	sector			)
+		__field(unsigned int,	nr_sector		)
+		__array(char,		rwbs,	6		)
+		__field(bool,		writeback		)
+		__field(bool,		bypass			)
 	),
 
 	TP_fast_assign(
-		__entry->uuid		= uuid;
+		__entry->dev		= bio->bi_bdev->bd_dev;
+		__entry->sector		= bio->bi_sector;
+		__entry->nr_sector	= bio->bi_size >> 9;
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+		__entry->writeback = writeback;
+		__entry->bypass = bypass;
+	),
+
+	TP_printk("%d,%d  %s %llu + %u hit %u bypass %u",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->rwbs, (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->writeback, __entry->bypass)
+);
+
+DEFINE_EVENT(bcache_bio, bcache_read_retry,
+	TP_PROTO(struct bio *bio),
+	TP_ARGS(bio)
+);
+
+DEFINE_EVENT(bkey, bcache_cache_insert,
+	TP_PROTO(struct bkey *k),
+	TP_ARGS(k)
+);
+
+/* Journal */
+
+DECLARE_EVENT_CLASS(cache_set,
+	TP_PROTO(struct cache_set *c),
+	TP_ARGS(c),
+
+	TP_STRUCT__entry(
+		__array(char,		uuid,	16 )
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->uuid, c->sb.set_uuid, 16);
 	),
 
 	TP_printk("%pU", __entry->uuid)
 );
 
-
-DEFINE_EVENT(bcache_gc, bcache_gc_start,
-
-	     TP_PROTO(uint8_t *uuid),
-
-	     TP_ARGS(uuid)
+DEFINE_EVENT(bkey, bcache_journal_replay_key,
+	TP_PROTO(struct bkey *k),
+	TP_ARGS(k)
 );
 
-DEFINE_EVENT(bcache_gc, bcache_gc_end,
+DEFINE_EVENT(cache_set, bcache_journal_full,
+	TP_PROTO(struct cache_set *c),
+	TP_ARGS(c)
+);
 
-	     TP_PROTO(uint8_t *uuid),
+DEFINE_EVENT(cache_set, bcache_journal_entry_full,
+	TP_PROTO(struct cache_set *c),
+	TP_ARGS(c)
+);
 
-	     TP_ARGS(uuid)
+DEFINE_EVENT(bcache_bio, bcache_journal_write,
+	TP_PROTO(struct bio *bio),
+	TP_ARGS(bio)
+);
+
+/* Btree */
+
+DEFINE_EVENT(cache_set, bcache_btree_cache_cannibalize,
+	TP_PROTO(struct cache_set *c),
+	TP_ARGS(c)
+);
+
+DEFINE_EVENT(btree_node, bcache_btree_read,
+	TP_PROTO(struct btree *b),
+	TP_ARGS(b)
+);
+
+TRACE_EVENT(bcache_btree_write,
+	TP_PROTO(struct btree *b),
+	TP_ARGS(b),
+
+	TP_STRUCT__entry(
+		__field(size_t,		bucket			)
+		__field(unsigned,	block			)
+		__field(unsigned,	keys			)
+	),
+
+	TP_fast_assign(
+		__entry->bucket	= PTR_BUCKET_NR(b->c, &b->key, 0);
+		__entry->block	= b->written;
+		__entry->keys	= b->sets[b->nsets].data->keys;
+	),
+
+	TP_printk("bucket %zu", __entry->bucket)
+);
+
+DEFINE_EVENT(btree_node, bcache_btree_node_alloc,
+	TP_PROTO(struct btree *b),
+	TP_ARGS(b)
+);
+
+DEFINE_EVENT(btree_node, bcache_btree_node_alloc_fail,
+	TP_PROTO(struct btree *b),
+	TP_ARGS(b)
+);
+
+DEFINE_EVENT(btree_node, bcache_btree_node_free,
+	TP_PROTO(struct btree *b),
+	TP_ARGS(b)
+);
+
+TRACE_EVENT(bcache_btree_gc_coalesce,
+	TP_PROTO(unsigned nodes),
+	TP_ARGS(nodes),
+
+	TP_STRUCT__entry(
+		__field(unsigned,	nodes			)
+	),
+
+	TP_fast_assign(
+		__entry->nodes	= nodes;
+	),
+
+	TP_printk("coalesced %u nodes", __entry->nodes)
+);
+
+DEFINE_EVENT(cache_set, bcache_gc_start,
+	TP_PROTO(struct cache_set *c),
+	TP_ARGS(c)
+);
+
+DEFINE_EVENT(cache_set, bcache_gc_end,
+	TP_PROTO(struct cache_set *c),
+	TP_ARGS(c)
+);
+
+DEFINE_EVENT(bkey, bcache_gc_copy,
+	TP_PROTO(struct bkey *k),
+	TP_ARGS(k)
+);
+
+DEFINE_EVENT(bkey, bcache_gc_copy_collision,
+	TP_PROTO(struct bkey *k),
+	TP_ARGS(k)
+);
+
+DECLARE_EVENT_CLASS(btree_split,
+	TP_PROTO(struct btree *b, unsigned keys),
+	TP_ARGS(b, keys),
+
+	TP_STRUCT__entry(
+		__field(size_t,		bucket			)
+		__field(unsigned,	keys			)
+	),
+
+	TP_fast_assign(
+		__entry->bucket	= PTR_BUCKET_NR(b->c, &b->key, 0);
+		__entry->keys	= keys;
+	),
+
+	TP_printk("bucket %zu keys %u", __entry->bucket, __entry->keys)
+);
+
+DEFINE_EVENT(btree_split, bcache_btree_node_split,
+	TP_PROTO(struct btree *b, unsigned keys),
+	TP_ARGS(b, keys)
+);
+
+DEFINE_EVENT(btree_split, bcache_btree_node_compact,
+	TP_PROTO(struct btree *b, unsigned keys),
+	TP_ARGS(b, keys)
+);
+
+DEFINE_EVENT(btree_node, bcache_btree_set_root,
+	TP_PROTO(struct btree *b),
+	TP_ARGS(b)
+);
+
+/* Allocator */
+
+TRACE_EVENT(bcache_alloc_invalidate,
+	TP_PROTO(struct cache *ca),
+	TP_ARGS(ca),
+
+	TP_STRUCT__entry(
+		__field(unsigned,	free			)
+		__field(unsigned,	free_inc		)
+		__field(unsigned,	free_inc_size		)
+		__field(unsigned,	unused			)
+	),
+
+	TP_fast_assign(
+		__entry->free		= fifo_used(&ca->free);
+		__entry->free_inc	= fifo_used(&ca->free_inc);
+		__entry->free_inc_size	= ca->free_inc.size;
+		__entry->unused		= fifo_used(&ca->unused);
+	),
+
+	TP_printk("free %u free_inc %u/%u unused %u", __entry->free,
+		  __entry->free_inc, __entry->free_inc_size, __entry->unused)
+);
+
+TRACE_EVENT(bcache_alloc_fail,
+	TP_PROTO(struct cache *ca),
+	TP_ARGS(ca),
+
+	TP_STRUCT__entry(
+		__field(unsigned,	free			)
+		__field(unsigned,	free_inc		)
+		__field(unsigned,	unused			)
+		__field(unsigned,	blocked			)
+	),
+
+	TP_fast_assign(
+		__entry->free		= fifo_used(&ca->free);
+		__entry->free_inc	= fifo_used(&ca->free_inc);
+		__entry->unused		= fifo_used(&ca->unused);
+		__entry->blocked	= atomic_read(&ca->set->prio_blocked);
+	),
+
+	TP_printk("free %u free_inc %u unused %u blocked %u", __entry->free,
+		  __entry->free_inc, __entry->unused, __entry->blocked)
+);
+
+/* Background writeback */
+
+DEFINE_EVENT(bkey, bcache_writeback,
+	TP_PROTO(struct bkey *k),
+	TP_ARGS(k)
+);
+
+DEFINE_EVENT(bkey, bcache_writeback_collision,
+	TP_PROTO(struct bkey *k),
+	TP_ARGS(k)
 );
 
 #endif /* _TRACE_BCACHE_H */

From 85b1492ee113486d871de7676a61f506a43ca475 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <koverstreet@google.com>
Date: Tue, 14 May 2013 20:33:16 -0700
Subject: [PATCH 036/320] bcache: Rip out pkey()/pbtree()

Old gcc doesnt like the struct hack, and it is kind of ugly. So finish
off the work to convert pr_debug() statements to tracepoints, and delete
pkey()/pbtree().

Signed-off-by: Kent Overstreet <koverstreet@google.com>
---
 drivers/md/bcache/bset.c      | 16 ++++++++++----
 drivers/md/bcache/btree.c     | 21 ++++++------------
 drivers/md/bcache/btree.h     |  7 ++++++
 drivers/md/bcache/debug.c     | 40 ++++++++++++++++++++++-------------
 drivers/md/bcache/debug.h     | 11 ++--------
 drivers/md/bcache/super.c     |  5 +++--
 drivers/md/bcache/trace.c     |  2 ++
 include/trace/events/bcache.h | 33 +++++++++++++++++++++++++++++
 8 files changed, 90 insertions(+), 45 deletions(-)

diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index cb4578a327b9..e9399ed7f688 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -78,6 +78,7 @@ struct bkey *bch_keylist_pop(struct keylist *l)
 bool __bch_ptr_invalid(struct cache_set *c, int level, const struct bkey *k)
 {
 	unsigned i;
+	char buf[80];
 
 	if (level && (!KEY_PTRS(k) || !KEY_SIZE(k) || KEY_DIRTY(k)))
 		goto bad;
@@ -102,7 +103,8 @@ bool __bch_ptr_invalid(struct cache_set *c, int level, const struct bkey *k)
 
 	return false;
 bad:
-	cache_bug(c, "spotted bad key %s: %s", pkey(k), bch_ptr_status(c, k));
+	bch_bkey_to_text(buf, sizeof(buf), k);
+	cache_bug(c, "spotted bad key %s: %s", buf, bch_ptr_status(c, k));
 	return true;
 }
 
@@ -162,10 +164,16 @@ bool bch_ptr_bad(struct btree *b, const struct bkey *k)
 #ifdef CONFIG_BCACHE_EDEBUG
 bug:
 	mutex_unlock(&b->c->bucket_lock);
-	btree_bug(b,
+
+	{
+		char buf[80];
+
+		bch_bkey_to_text(buf, sizeof(buf), k);
+		btree_bug(b,
 "inconsistent pointer %s: bucket %zu pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i",
-		  pkey(k), PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin),
-		  g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen);
+			  buf, PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin),
+			  g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen);
+	}
 	return true;
 #endif
 }
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 218d486259a3..53a0f4ef4e32 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -1770,7 +1770,7 @@ static bool btree_insert_key(struct btree *b, struct btree_op *op,
 {
 	struct bset *i = b->sets[b->nsets].data;
 	struct bkey *m, *prev;
-	const char *status = "insert";
+	unsigned status = BTREE_INSERT_STATUS_INSERT;
 
 	BUG_ON(bkey_cmp(k, &b->key) > 0);
 	BUG_ON(b->level && !KEY_PTRS(k));
@@ -1803,17 +1803,17 @@ static bool btree_insert_key(struct btree *b, struct btree_op *op,
 			goto insert;
 
 		/* prev is in the tree, if we merge we're done */
-		status = "back merging";
+		status = BTREE_INSERT_STATUS_BACK_MERGE;
 		if (prev &&
 		    bch_bkey_try_merge(b, prev, k))
 			goto merged;
 
-		status = "overwrote front";
+		status = BTREE_INSERT_STATUS_OVERWROTE;
 		if (m != end(i) &&
 		    KEY_PTRS(m) == KEY_PTRS(k) && !KEY_SIZE(m))
 			goto copy;
 
-		status = "front merge";
+		status = BTREE_INSERT_STATUS_FRONT_MERGE;
 		if (m != end(i) &&
 		    bch_bkey_try_merge(b, k, m))
 			goto copy;
@@ -1823,16 +1823,12 @@ static bool btree_insert_key(struct btree *b, struct btree_op *op,
 insert:	shift_keys(b, m, k);
 copy:	bkey_copy(m, k);
 merged:
-	bch_check_keys(b, "%s for %s at %s: %s", status,
-		       op_type(op), pbtree(b), pkey(k));
-	bch_check_key_order_msg(b, i, "%s for %s at %s: %s", status,
-				op_type(op), pbtree(b), pkey(k));
+	bch_check_keys(b, "%u for %s", status, op_type(op));
 
 	if (b->level && !KEY_OFFSET(k))
 		btree_current_write(b)->prio_blocked++;
 
-	pr_debug("%s for %s at %s: %s", status,
-		 op_type(op), pbtree(b), pkey(k));
+	trace_bcache_btree_insert_key(b, k, op->type, status);
 
 	return true;
 }
@@ -2234,9 +2230,6 @@ int bch_btree_search_recurse(struct btree *b, struct btree_op *op)
 	struct btree_iter iter;
 	bch_btree_iter_init(b, &iter, &KEY(op->inode, bio->bi_sector, 0));
 
-	pr_debug("at %s searching for %u:%llu", pbtree(b), op->inode,
-		 (uint64_t) bio->bi_sector);
-
 	do {
 		k = bch_btree_iter_next_filter(&iter, b, bch_ptr_bad);
 		if (!k) {
@@ -2302,8 +2295,6 @@ static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op,
 			if (buf->key_predicate(buf, k)) {
 				struct keybuf_key *w;
 
-				pr_debug("%s", pkey(k));
-
 				spin_lock(&buf->lock);
 
 				w = array_alloc(&buf->freelist);
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index 809bd77847a2..2b016b93cad4 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -271,6 +271,13 @@ struct btree_op {
 	BKEY_PADDED(replace);
 };
 
+enum {
+	BTREE_INSERT_STATUS_INSERT,
+	BTREE_INSERT_STATUS_BACK_MERGE,
+	BTREE_INSERT_STATUS_OVERWROTE,
+	BTREE_INSERT_STATUS_FRONT_MERGE,
+};
+
 void bch_btree_op_init_stack(struct btree_op *);
 
 static inline void rw_lock(bool w, struct btree *b, int level)
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index ae6096c6845d..82e3a07771ec 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -47,11 +47,10 @@ const char *bch_ptr_status(struct cache_set *c, const struct bkey *k)
 	return "";
 }
 
-struct keyprint_hack bch_pkey(const struct bkey *k)
+int bch_bkey_to_text(char *buf, size_t size, const struct bkey *k)
 {
 	unsigned i = 0;
-	struct keyprint_hack r;
-	char *out = r.s, *end = r.s + KEYHACK_SIZE;
+	char *out = buf, *end = buf + size;
 
 #define p(...)	(out += scnprintf(out, end - out, __VA_ARGS__))
 
@@ -75,16 +74,14 @@ struct keyprint_hack bch_pkey(const struct bkey *k)
 	if (KEY_CSUM(k))
 		p(" cs%llu %llx", KEY_CSUM(k), k->ptr[1]);
 #undef p
-	return r;
+	return out - buf;
 }
 
-struct keyprint_hack bch_pbtree(const struct btree *b)
+int bch_btree_to_text(char *buf, size_t size, const struct btree *b)
 {
-	struct keyprint_hack r;
-
-	snprintf(r.s, 40, "%zu level %i/%i", PTR_BUCKET_NR(b->c, &b->key, 0),
-		 b->level, b->c->root ? b->c->root->level : -1);
-	return r;
+	return scnprintf(buf, size, "%zu level %i/%i",
+			 PTR_BUCKET_NR(b->c, &b->key, 0),
+			 b->level, b->c->root ? b->c->root->level : -1);
 }
 
 #if defined(CONFIG_BCACHE_DEBUG) || defined(CONFIG_BCACHE_EDEBUG)
@@ -100,10 +97,12 @@ static void dump_bset(struct btree *b, struct bset *i)
 {
 	struct bkey *k;
 	unsigned j;
+	char buf[80];
 
 	for (k = i->start; k < end(i); k = bkey_next(k)) {
+		bch_bkey_to_text(buf, sizeof(buf), k);
 		printk(KERN_ERR "block %zu key %zi/%u: %s", index(i, b),
-		       (uint64_t *) k - i->d, i->keys, pkey(k));
+		       (uint64_t *) k - i->d, i->keys, buf);
 
 		for (j = 0; j < KEY_PTRS(k); j++) {
 			size_t n = PTR_BUCKET_NR(b->c, k, j);
@@ -252,6 +251,7 @@ static void vdump_bucket_and_panic(struct btree *b, const char *fmt,
 				   va_list args)
 {
 	unsigned i;
+	char buf[80];
 
 	console_lock();
 
@@ -262,7 +262,8 @@ static void vdump_bucket_and_panic(struct btree *b, const char *fmt,
 
 	console_unlock();
 
-	panic("at %s\n", pbtree(b));
+	bch_btree_to_text(buf, sizeof(buf), b);
+	panic("at %s\n", buf);
 }
 
 void bch_check_key_order_msg(struct btree *b, struct bset *i,
@@ -337,6 +338,7 @@ static ssize_t bch_dump_read(struct file *file, char __user *buf,
 {
 	struct dump_iterator *i = file->private_data;
 	ssize_t ret = 0;
+	char kbuf[80];
 
 	while (size) {
 		struct keybuf_key *w;
@@ -359,7 +361,8 @@ static ssize_t bch_dump_read(struct file *file, char __user *buf,
 		if (!w)
 			break;
 
-		i->bytes = snprintf(i->buf, PAGE_SIZE, "%s\n", pkey(&w->key));
+		bch_bkey_to_text(kbuf, sizeof(kbuf), &w->key);
+		i->bytes = snprintf(i->buf, PAGE_SIZE, "%s\n", kbuf);
 		bch_keybuf_del(&i->keys, w);
 	}
 
@@ -526,10 +529,17 @@ static ssize_t btree_fuzz(struct kobject *k, struct kobj_attribute *a,
 			     k < end(i);
 			     k = bkey_next(k), l = bkey_next(l))
 				if (bkey_cmp(k, l) ||
-				    KEY_SIZE(k) != KEY_SIZE(l))
+				    KEY_SIZE(k) != KEY_SIZE(l)) {
+					char buf1[80];
+					char buf2[80];
+
+					bch_bkey_to_text(buf1, sizeof(buf1), k);
+					bch_bkey_to_text(buf2, sizeof(buf2), l);
+
 					pr_err("key %zi differs: %s != %s",
 					       (uint64_t *) k - i->d,
-					       pkey(k), pkey(l));
+					       buf1, buf2);
+				}
 
 			for (j = 0; j < 3; j++) {
 				pr_err("**** Set %i ****", j);
diff --git a/drivers/md/bcache/debug.h b/drivers/md/bcache/debug.h
index f9378a218148..1c39b5a2489b 100644
--- a/drivers/md/bcache/debug.h
+++ b/drivers/md/bcache/debug.h
@@ -3,15 +3,8 @@
 
 /* Btree/bkey debug printing */
 
-#define KEYHACK_SIZE 80
-struct keyprint_hack {
-	char s[KEYHACK_SIZE];
-};
-
-struct keyprint_hack bch_pkey(const struct bkey *k);
-struct keyprint_hack bch_pbtree(const struct btree *b);
-#define pkey(k)		(&bch_pkey(k).s[0])
-#define pbtree(b)	(&bch_pbtree(b).s[0])
+int bch_bkey_to_text(char *buf, size_t size, const struct bkey *k);
+int bch_btree_to_text(char *buf, size_t size, const struct btree *b);
 
 #ifdef CONFIG_BCACHE_EDEBUG
 
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 47bc13745068..f24c2e0cbb1c 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -343,6 +343,7 @@ static void uuid_io(struct cache_set *c, unsigned long rw,
 	struct closure *cl = &c->uuid_write.cl;
 	struct uuid_entry *u;
 	unsigned i;
+	char buf[80];
 
 	BUG_ON(!parent);
 	closure_lock(&c->uuid_write, parent);
@@ -363,8 +364,8 @@ static void uuid_io(struct cache_set *c, unsigned long rw,
 			break;
 	}
 
-	pr_debug("%s UUIDs at %s", rw & REQ_WRITE ? "wrote" : "read",
-		 pkey(&c->uuid_bucket));
+	bch_bkey_to_text(buf, sizeof(buf), k);
+	pr_debug("%s UUIDs at %s", rw & REQ_WRITE ? "wrote" : "read", buf);
 
 	for (u = c->uuids; u < c->uuids + c->nr_uuids; u++)
 		if (!bch_is_zero(u->uuid, 16))
diff --git a/drivers/md/bcache/trace.c b/drivers/md/bcache/trace.c
index 7f4f38aa16ae..f7b6c197f90f 100644
--- a/drivers/md/bcache/trace.c
+++ b/drivers/md/bcache/trace.c
@@ -40,6 +40,8 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_end);
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_copy);
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_copy_collision);
 
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_insert_key);
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_split);
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_compact);
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_set_root);
diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h
index c9952b36fcea..5ebda976ea93 100644
--- a/include/trace/events/bcache.h
+++ b/include/trace/events/bcache.h
@@ -305,6 +305,39 @@ DEFINE_EVENT(bkey, bcache_gc_copy_collision,
 	TP_ARGS(k)
 );
 
+TRACE_EVENT(bcache_btree_insert_key,
+	TP_PROTO(struct btree *b, struct bkey *k, unsigned op, unsigned status),
+	TP_ARGS(b, k, op, status),
+
+	TP_STRUCT__entry(
+		__field(u64,	btree_node			)
+		__field(u32,	btree_level			)
+		__field(u32,	inode				)
+		__field(u64,	offset				)
+		__field(u32,	size				)
+		__field(u8,	dirty				)
+		__field(u8,	op				)
+		__field(u8,	status				)
+	),
+
+	TP_fast_assign(
+		__entry->btree_node = PTR_BUCKET_NR(b->c, &b->key, 0);
+		__entry->btree_level = b->level;
+		__entry->inode	= KEY_INODE(k);
+		__entry->offset	= KEY_OFFSET(k);
+		__entry->size	= KEY_SIZE(k);
+		__entry->dirty	= KEY_DIRTY(k);
+		__entry->op = op;
+		__entry->status = status;
+	),
+
+	TP_printk("%u for %u at %llu(%u): %u:%llu len %u dirty %u",
+		  __entry->status, __entry->op,
+		  __entry->btree_node, __entry->btree_level,
+		  __entry->inode, __entry->offset,
+		  __entry->size, __entry->dirty)
+);
+
 DECLARE_EVENT_CLASS(btree_split,
 	TP_PROTO(struct btree *b, unsigned keys),
 	TP_ARGS(b, keys),

From 6ded34d1a54c046a45db071d3cb7b37bd0a4a31f Mon Sep 17 00:00:00 2001
From: Kent Overstreet <koverstreet@google.com>
Date: Sat, 11 May 2013 15:59:37 -0700
Subject: [PATCH 037/320] bcache: Improve lazy sorting

The old lazy sorting code was kind of hacky - rewrite in a way that
mathematically makes more sense; the idea is that the size of the sets
of keys in a btree node should increase by a more or less fixed ratio
from smallest to biggest.

Signed-off-by: Kent Overstreet <koverstreet@google.com>
---
 drivers/md/bcache/bcache.h |  1 +
 drivers/md/bcache/bset.c   | 40 ++++++++++++++++++++++----------------
 drivers/md/bcache/super.c  |  2 ++
 3 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 59c15e09e4dd..6fa5a1e33c49 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -828,6 +828,7 @@ struct cache_set {
 	 */
 	struct mutex		sort_lock;
 	struct bset		*sort;
+	unsigned		sort_crit_factor;
 
 	/* List of buckets we're currently writing data to */
 	struct list_head	data_buckets;
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index e9399ed7f688..a0f190ac17a4 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -1092,33 +1092,39 @@ void bch_btree_sort_into(struct btree *b, struct btree *new)
 	new->sets->size = 0;
 }
 
+#define SORT_CRIT	(4096 / sizeof(uint64_t))
+
 void bch_btree_sort_lazy(struct btree *b)
 {
-	if (b->nsets) {
-		unsigned i, j, keys = 0, total;
+	unsigned crit = SORT_CRIT;
+	int i;
 
-		for (i = 0; i <= b->nsets; i++)
-			keys += b->sets[i].data->keys;
+	/* Don't sort if nothing to do */
+	if (!b->nsets)
+		goto out;
 
-		total = keys;
+	/* If not a leaf node, always sort */
+	if (b->level) {
+		bch_btree_sort(b);
+		return;
+	}
 
-		for (j = 0; j < b->nsets; j++) {
-			if (keys * 2 < total ||
-			    keys < 1000) {
-				bch_btree_sort_partial(b, j);
-				return;
-			}
+	for (i = b->nsets - 1; i >= 0; --i) {
+		crit *= b->c->sort_crit_factor;
 
-			keys -= b->sets[j].data->keys;
-		}
-
-		/* Must sort if b->nsets == 3 or we'll overflow */
-		if (b->nsets >= (MAX_BSETS - 1) - b->level) {
-			bch_btree_sort(b);
+		if (b->sets[i].data->keys < crit) {
+			bch_btree_sort_partial(b, i);
 			return;
 		}
 	}
 
+	/* Sort if we'd overflow */
+	if (b->nsets + 1 == MAX_BSETS) {
+		bch_btree_sort(b);
+		return;
+	}
+
+out:
 	bset_build_written_tree(b);
 }
 
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index f24c2e0cbb1c..3c8474161e8e 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1375,6 +1375,8 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
 		c->btree_pages = max_t(int, c->btree_pages / 4,
 				       BTREE_MAX_PAGES);
 
+	c->sort_crit_factor = int_sqrt(c->btree_pages);
+
 	mutex_init(&c->bucket_lock);
 	mutex_init(&c->sort_lock);
 	spin_lock_init(&c->sort_time_lock);

From 444fc0b6b167ed164e7436621a9d095e042644dd Mon Sep 17 00:00:00 2001
From: Kent Overstreet <koverstreet@google.com>
Date: Sat, 11 May 2013 17:07:26 -0700
Subject: [PATCH 038/320] bcache: Initialize sectors_dirty when attaching

Previously, dirty_data wouldn't get initialized until the first garbage
collection... which was a bit of a problem for background writeback (as
the PD controller keys off of it) and also confusing for users.

This is also prep work for making background writeback aware of raid5/6
stripes.

Signed-off-by: Kent Overstreet <koverstreet@google.com>
---
 drivers/md/bcache/bcache.h    |  2 +-
 drivers/md/bcache/btree.c     | 29 +---------------------------
 drivers/md/bcache/super.c     |  1 +
 drivers/md/bcache/writeback.c | 36 +++++++++++++++++++++++++++++++++++
 4 files changed, 39 insertions(+), 29 deletions(-)

diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 6fa5a1e33c49..d099d8894c2f 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -438,7 +438,6 @@ struct bcache_device {
 	atomic_t		detaching;
 
 	atomic_long_t		sectors_dirty;
-	unsigned long		sectors_dirty_gc;
 	unsigned long		sectors_dirty_last;
 	long			sectors_dirty_derivative;
 
@@ -1225,6 +1224,7 @@ void bch_cache_set_stop(struct cache_set *);
 struct cache_set *bch_cache_set_alloc(struct cache_sb *);
 void bch_btree_cache_free(struct cache_set *);
 int bch_btree_cache_alloc(struct cache_set *);
+void bch_sectors_dirty_init(struct cached_dev *);
 void bch_cached_dev_writeback_init(struct cached_dev *);
 void bch_moving_init_cache_set(struct cache_set *);
 
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 53a0f4ef4e32..230c3a6d9be2 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -1119,11 +1119,8 @@ static int btree_gc_mark_node(struct btree *b, unsigned *keys,
 		gc->nkeys++;
 
 		gc->data += KEY_SIZE(k);
-		if (KEY_DIRTY(k)) {
+		if (KEY_DIRTY(k))
 			gc->dirty += KEY_SIZE(k);
-			if (d)
-				d->sectors_dirty_gc += KEY_SIZE(k);
-		}
 	}
 
 	for (t = b->sets; t <= &b->sets[b->nsets]; t++)
@@ -1377,7 +1374,6 @@ static void btree_gc_start(struct cache_set *c)
 {
 	struct cache *ca;
 	struct bucket *b;
-	struct bcache_device **d;
 	unsigned i;
 
 	if (!c->gc_mark_valid)
@@ -1395,12 +1391,6 @@ static void btree_gc_start(struct cache_set *c)
 				SET_GC_MARK(b, GC_MARK_RECLAIMABLE);
 		}
 
-	for (d = c->devices;
-	     d < c->devices + c->nr_uuids;
-	     d++)
-		if (*d)
-			(*d)->sectors_dirty_gc = 0;
-
 	mutex_unlock(&c->bucket_lock);
 }
 
@@ -1409,7 +1399,6 @@ size_t bch_btree_gc_finish(struct cache_set *c)
 	size_t available = 0;
 	struct bucket *b;
 	struct cache *ca;
-	struct bcache_device **d;
 	unsigned i;
 
 	mutex_lock(&c->bucket_lock);
@@ -1452,22 +1441,6 @@ size_t bch_btree_gc_finish(struct cache_set *c)
 		}
 	}
 
-	for (d = c->devices;
-	     d < c->devices + c->nr_uuids;
-	     d++)
-		if (*d) {
-			unsigned long last =
-				atomic_long_read(&((*d)->sectors_dirty));
-			long difference = (*d)->sectors_dirty_gc - last;
-
-			pr_debug("sectors dirty off by %li", difference);
-
-			(*d)->sectors_dirty_last += difference;
-
-			atomic_long_set(&((*d)->sectors_dirty),
-					(*d)->sectors_dirty_gc);
-		}
-
 	mutex_unlock(&c->bucket_lock);
 	return available;
 }
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 3c8474161e8e..dbfa1c38e85e 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -961,6 +961,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
 	atomic_set(&dc->count, 1);
 
 	if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
+		bch_sectors_dirty_init(dc);
 		atomic_set(&dc->has_dirty, 1);
 		atomic_inc(&dc->count);
 		bch_writeback_queue(dc);
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 82f6d4577be2..553949eefd51 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -377,6 +377,42 @@ static void read_dirty(struct closure *cl)
 	refill_dirty(cl);
 }
 
+/* Init */
+
+static int bch_btree_sectors_dirty_init(struct btree *b, struct btree_op *op,
+					struct cached_dev *dc)
+{
+	struct bkey *k;
+	struct btree_iter iter;
+
+	bch_btree_iter_init(b, &iter, &KEY(dc->disk.id, 0, 0));
+	while ((k = bch_btree_iter_next_filter(&iter, b, bch_ptr_bad)))
+		if (!b->level) {
+			if (KEY_INODE(k) > dc->disk.id)
+				break;
+
+			if (KEY_DIRTY(k))
+				atomic_long_add(KEY_SIZE(k),
+						&dc->disk.sectors_dirty);
+		} else {
+			btree(sectors_dirty_init, k, b, op, dc);
+			if (KEY_INODE(k) > dc->disk.id)
+				break;
+
+			cond_resched();
+		}
+
+	return 0;
+}
+
+void bch_sectors_dirty_init(struct cached_dev *dc)
+{
+	struct btree_op op;
+
+	bch_btree_op_init_stack(&op);
+	btree_root(sectors_dirty_init, dc->disk.c, &op, dc);
+}
+
 void bch_cached_dev_writeback_init(struct cached_dev *dc)
 {
 	closure_init_unlocked(&dc->writeback);

From 279afbad4e54acbd61bf88a54a73af3bbfdeb5dd Mon Sep 17 00:00:00 2001
From: Kent Overstreet <koverstreet@google.com>
Date: Wed, 5 Jun 2013 06:21:07 -0700
Subject: [PATCH 039/320] bcache: Track dirty data by stripe

To make background writeback aware of raid5/6 stripes, we first need to
track the amount of dirty data within each stripe - we do this by
breaking up the existing sectors_dirty into per stripe atomic_ts

Signed-off-by: Kent Overstreet <koverstreet@google.com>
---
 drivers/md/bcache/bcache.h    | 10 ++++-----
 drivers/md/bcache/btree.c     | 20 ++++++++++++------
 drivers/md/bcache/request.c   |  3 ++-
 drivers/md/bcache/super.c     | 32 ++++++++++++++++++++++++----
 drivers/md/bcache/sysfs.c     |  5 +++--
 drivers/md/bcache/writeback.c | 40 +++++++++++++++++++++++++++++------
 drivers/md/bcache/writeback.h | 21 ++++++++++++++++++
 7 files changed, 105 insertions(+), 26 deletions(-)
 create mode 100644 drivers/md/bcache/writeback.h

diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index d099d8894c2f..dbddef0cdb59 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -437,7 +437,10 @@ struct bcache_device {
 	/* If nonzero, we're detaching/unregistering from cache set */
 	atomic_t		detaching;
 
-	atomic_long_t		sectors_dirty;
+	uint64_t		nr_stripes;
+	unsigned		stripe_size_bits;
+	atomic_t		*stripe_sectors_dirty;
+
 	unsigned long		sectors_dirty_last;
 	long			sectors_dirty_derivative;
 
@@ -1159,9 +1162,6 @@ static inline void wake_up_allocators(struct cache_set *c)
 
 /* Forward declarations */
 
-void bch_writeback_queue(struct cached_dev *);
-void bch_writeback_add(struct cached_dev *, unsigned);
-
 void bch_count_io_errors(struct cache *, int, const char *);
 void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
 			      int, const char *);
@@ -1224,8 +1224,6 @@ void bch_cache_set_stop(struct cache_set *);
 struct cache_set *bch_cache_set_alloc(struct cache_sb *);
 void bch_btree_cache_free(struct cache_set *);
 int bch_btree_cache_alloc(struct cache_set *);
-void bch_sectors_dirty_init(struct cached_dev *);
-void bch_cached_dev_writeback_init(struct cached_dev *);
 void bch_moving_init_cache_set(struct cache_set *);
 
 int bch_cache_allocator_start(struct cache *ca);
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 230c3a6d9be2..b93cf56260a4 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -24,6 +24,7 @@
 #include "btree.h"
 #include "debug.h"
 #include "request.h"
+#include "writeback.h"
 
 #include <linux/slab.h>
 #include <linux/bitops.h>
@@ -1599,14 +1600,14 @@ static bool fix_overlapping_extents(struct btree *b,
 				    struct btree_iter *iter,
 				    struct btree_op *op)
 {
-	void subtract_dirty(struct bkey *k, int sectors)
+	void subtract_dirty(struct bkey *k, uint64_t offset, int sectors)
 	{
-		struct bcache_device *d = b->c->devices[KEY_INODE(k)];
-
-		if (KEY_DIRTY(k) && d)
-			atomic_long_sub(sectors, &d->sectors_dirty);
+		if (KEY_DIRTY(k))
+			bcache_dev_sectors_dirty_add(b->c, KEY_INODE(k),
+						     offset, -sectors);
 	}
 
+	uint64_t old_offset;
 	unsigned old_size, sectors_found = 0;
 
 	while (1) {
@@ -1618,6 +1619,7 @@ static bool fix_overlapping_extents(struct btree *b,
 		if (bkey_cmp(k, &START_KEY(insert)) <= 0)
 			continue;
 
+		old_offset = KEY_START(k);
 		old_size = KEY_SIZE(k);
 
 		/*
@@ -1673,7 +1675,7 @@ static bool fix_overlapping_extents(struct btree *b,
 
 			struct bkey *top;
 
-			subtract_dirty(k, KEY_SIZE(insert));
+			subtract_dirty(k, KEY_START(insert), KEY_SIZE(insert));
 
 			if (bkey_written(b, k)) {
 				/*
@@ -1720,7 +1722,7 @@ static bool fix_overlapping_extents(struct btree *b,
 			}
 		}
 
-		subtract_dirty(k, old_size - KEY_SIZE(k));
+		subtract_dirty(k, old_offset, old_size - KEY_SIZE(k));
 	}
 
 check_failed:
@@ -1796,6 +1798,10 @@ static bool btree_insert_key(struct btree *b, struct btree_op *op,
 insert:	shift_keys(b, m, k);
 copy:	bkey_copy(m, k);
 merged:
+	if (KEY_DIRTY(k))
+		bcache_dev_sectors_dirty_add(b->c, KEY_INODE(k),
+					     KEY_START(k), KEY_SIZE(k));
+
 	bch_check_keys(b, "%u for %s", status, op_type(op));
 
 	if (b->level && !KEY_OFFSET(k))
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 695469958c1e..017c95fced8e 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -10,6 +10,7 @@
 #include "btree.h"
 #include "debug.h"
 #include "request.h"
+#include "writeback.h"
 
 #include <linux/cgroup.h>
 #include <linux/module.h>
@@ -1044,7 +1045,7 @@ static void request_write(struct cached_dev *dc, struct search *s)
 		closure_bio_submit(bio, cl, s->d);
 	} else {
 		s->op.cache_bio = bio;
-		bch_writeback_add(dc, bio_sectors(bio));
+		bch_writeback_add(dc);
 	}
 out:
 	closure_call(&s->op.cl, bch_insert_data, NULL, cl);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index dbfa1c38e85e..8c73f0c7f28a 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -10,6 +10,7 @@
 #include "btree.h"
 #include "debug.h"
 #include "request.h"
+#include "writeback.h"
 
 #include <linux/blkdev.h>
 #include <linux/buffer_head.h>
@@ -744,13 +745,35 @@ static void bcache_device_free(struct bcache_device *d)
 		mempool_destroy(d->unaligned_bvec);
 	if (d->bio_split)
 		bioset_free(d->bio_split);
+	if (is_vmalloc_addr(d->stripe_sectors_dirty))
+		vfree(d->stripe_sectors_dirty);
+	else
+		kfree(d->stripe_sectors_dirty);
 
 	closure_debug_destroy(&d->cl);
 }
 
-static int bcache_device_init(struct bcache_device *d, unsigned block_size)
+static int bcache_device_init(struct bcache_device *d, unsigned block_size,
+			      sector_t sectors)
 {
 	struct request_queue *q;
+	size_t n;
+
+	if (!d->stripe_size_bits)
+		d->stripe_size_bits = 31;
+
+	d->nr_stripes = round_up(sectors, 1 << d->stripe_size_bits) >>
+		d->stripe_size_bits;
+
+	if (!d->nr_stripes || d->nr_stripes > SIZE_MAX / sizeof(atomic_t))
+		return -ENOMEM;
+
+	n = d->nr_stripes * sizeof(atomic_t);
+	d->stripe_sectors_dirty = n < PAGE_SIZE << 6
+		? kzalloc(n, GFP_KERNEL)
+		: vzalloc(n);
+	if (!d->stripe_sectors_dirty)
+		return -ENOMEM;
 
 	if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
 	    !(d->unaligned_bvec = mempool_create_kmalloc_pool(1,
@@ -760,6 +783,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size)
 	    !(q = blk_alloc_queue(GFP_KERNEL)))
 		return -ENOMEM;
 
+	set_capacity(d->disk, sectors);
 	snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", bcache_minor);
 
 	d->disk->major		= bcache_major;
@@ -1047,7 +1071,8 @@ static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
 		hlist_add_head(&io->hash, dc->io_hash + RECENT_IO);
 	}
 
-	ret = bcache_device_init(&dc->disk, block_size);
+	ret = bcache_device_init(&dc->disk, block_size,
+			 dc->bdev->bd_part->nr_sects - dc->sb.data_offset);
 	if (ret)
 		return ret;
 
@@ -1146,11 +1171,10 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u)
 
 	kobject_init(&d->kobj, &bch_flash_dev_ktype);
 
-	if (bcache_device_init(d, block_bytes(c)))
+	if (bcache_device_init(d, block_bytes(c), u->sectors))
 		goto err;
 
 	bcache_device_attach(d, c, u - c->uuids);
-	set_capacity(d->disk, u->sectors);
 	bch_flash_dev_request_init(d);
 	add_disk(d->disk);
 
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index f5c2d8695230..cf8d91ec3238 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -9,6 +9,7 @@
 #include "sysfs.h"
 #include "btree.h"
 #include "request.h"
+#include "writeback.h"
 
 #include <linux/blkdev.h>
 #include <linux/sort.h>
@@ -128,7 +129,7 @@ SHOW(__bch_cached_dev)
 		char derivative[20];
 		char target[20];
 		bch_hprint(dirty,
-		       atomic_long_read(&dc->disk.sectors_dirty) << 9);
+			   bcache_dev_sectors_dirty(&dc->disk) << 9);
 		bch_hprint(derivative,	dc->writeback_rate_derivative << 9);
 		bch_hprint(target,	dc->writeback_rate_target << 9);
 
@@ -144,7 +145,7 @@ SHOW(__bch_cached_dev)
 	}
 
 	sysfs_hprint(dirty_data,
-		     atomic_long_read(&dc->disk.sectors_dirty) << 9);
+		     bcache_dev_sectors_dirty(&dc->disk) << 9);
 
 	var_printf(sequential_merge,	"%i");
 	var_hprint(sequential_cutoff);
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 553949eefd51..dd815475c524 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -9,6 +9,7 @@
 #include "bcache.h"
 #include "btree.h"
 #include "debug.h"
+#include "writeback.h"
 
 #include <trace/events/bcache.h>
 
@@ -38,7 +39,7 @@ static void __update_writeback_rate(struct cached_dev *dc)
 
 	int change = 0;
 	int64_t error;
-	int64_t dirty = atomic_long_read(&dc->disk.sectors_dirty);
+	int64_t dirty = bcache_dev_sectors_dirty(&dc->disk);
 	int64_t derivative = dirty - dc->disk.sectors_dirty_last;
 
 	dc->disk.sectors_dirty_last = dirty;
@@ -183,10 +184,8 @@ void bch_writeback_queue(struct cached_dev *dc)
 	}
 }
 
-void bch_writeback_add(struct cached_dev *dc, unsigned sectors)
+void bch_writeback_add(struct cached_dev *dc)
 {
-	atomic_long_add(sectors, &dc->disk.sectors_dirty);
-
 	if (!atomic_read(&dc->has_dirty) &&
 	    !atomic_xchg(&dc->has_dirty, 1)) {
 		atomic_inc(&dc->count);
@@ -205,6 +204,34 @@ void bch_writeback_add(struct cached_dev *dc, unsigned sectors)
 	}
 }
 
+void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode,
+				  uint64_t offset, int nr_sectors)
+{
+	struct bcache_device *d = c->devices[inode];
+	unsigned stripe_size, stripe_offset;
+	uint64_t stripe;
+
+	if (!d)
+		return;
+
+	stripe_size = 1 << d->stripe_size_bits;
+	stripe = offset >> d->stripe_size_bits;
+	stripe_offset = offset & (stripe_size - 1);
+
+	while (nr_sectors) {
+		int s = min_t(unsigned, abs(nr_sectors),
+			      stripe_size - stripe_offset);
+
+		if (nr_sectors < 0)
+			s = -s;
+
+		atomic_add(s, d->stripe_sectors_dirty + stripe);
+		nr_sectors -= s;
+		stripe_offset = 0;
+		stripe++;
+	}
+}
+
 /* Background writeback - IO loop */
 
 static void dirty_io_destructor(struct closure *cl)
@@ -392,8 +419,9 @@ static int bch_btree_sectors_dirty_init(struct btree *b, struct btree_op *op,
 				break;
 
 			if (KEY_DIRTY(k))
-				atomic_long_add(KEY_SIZE(k),
-						&dc->disk.sectors_dirty);
+				bcache_dev_sectors_dirty_add(b->c, dc->disk.id,
+							     KEY_START(k),
+							     KEY_SIZE(k));
 		} else {
 			btree(sectors_dirty_init, k, b, op, dc);
 			if (KEY_INODE(k) > dc->disk.id)
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
new file mode 100644
index 000000000000..5ce9771df047
--- /dev/null
+++ b/drivers/md/bcache/writeback.h
@@ -0,0 +1,21 @@
+#ifndef _BCACHE_WRITEBACK_H
+#define _BCACHE_WRITEBACK_H
+
+static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d)
+{
+	uint64_t i, ret = 0;
+
+	for (i = 0; i < d->nr_stripes; i++)
+		ret += atomic_read(d->stripe_sectors_dirty + i);
+
+	return ret;
+}
+
+void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int);
+void bch_writeback_queue(struct cached_dev *);
+void bch_writeback_add(struct cached_dev *);
+
+void bch_sectors_dirty_init(struct cached_dev *dc);
+void bch_cached_dev_writeback_init(struct cached_dev *);
+
+#endif

From 72c270612bd33192fa836ad0f2939af1ca218292 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <koverstreet@google.com>
Date: Wed, 5 Jun 2013 06:24:39 -0700
Subject: [PATCH 040/320] bcache: Write out full stripes

Now that we're tracking dirty data per stripe, we can add two
optimizations for raid5/6:

 * If a stripe is already dirty, force writes to that stripe to
   writeback mode - to help build up full stripes of dirty data

 * When flushing dirty data, preferentially write out full stripes first
   if there are any.

Signed-off-by: Kent Overstreet <koverstreet@google.com>
---
 drivers/md/bcache/bcache.h    |  3 +--
 drivers/md/bcache/btree.c     | 19 ++++++++-------
 drivers/md/bcache/btree.h     |  9 +++----
 drivers/md/bcache/debug.c     |  4 ++--
 drivers/md/bcache/movinggc.c  |  5 ++--
 drivers/md/bcache/request.c   | 23 ++++++------------
 drivers/md/bcache/sysfs.c     |  8 +++++++
 drivers/md/bcache/writeback.c | 44 +++++++++++++++++++++++++++++++++--
 drivers/md/bcache/writeback.h | 43 ++++++++++++++++++++++++++++++++++
 9 files changed, 121 insertions(+), 37 deletions(-)

diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index dbddef0cdb59..342ba86c6e4f 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -387,8 +387,6 @@ struct keybuf_key {
 typedef bool (keybuf_pred_fn)(struct keybuf *, struct bkey *);
 
 struct keybuf {
-	keybuf_pred_fn		*key_predicate;
-
 	struct bkey		last_scanned;
 	spinlock_t		lock;
 
@@ -532,6 +530,7 @@ struct cached_dev {
 	unsigned		sequential_merge:1;
 	unsigned		verify:1;
 
+	unsigned		partial_stripes_expensive:1;
 	unsigned		writeback_metadata:1;
 	unsigned		writeback_running:1;
 	unsigned char		writeback_percent;
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index b93cf56260a4..09fb8a2f43da 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -2252,7 +2252,8 @@ static inline int keybuf_nonoverlapping_cmp(struct keybuf_key *l,
 }
 
 static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op,
-				   struct keybuf *buf, struct bkey *end)
+				   struct keybuf *buf, struct bkey *end,
+				   keybuf_pred_fn *pred)
 {
 	struct btree_iter iter;
 	bch_btree_iter_init(b, &iter, &buf->last_scanned);
@@ -2271,7 +2272,7 @@ static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op,
 			if (bkey_cmp(&buf->last_scanned, end) >= 0)
 				break;
 
-			if (buf->key_predicate(buf, k)) {
+			if (pred(buf, k)) {
 				struct keybuf_key *w;
 
 				spin_lock(&buf->lock);
@@ -2290,7 +2291,7 @@ static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op,
 			if (!k)
 				break;
 
-			btree(refill_keybuf, k, b, op, buf, end);
+			btree(refill_keybuf, k, b, op, buf, end, pred);
 			/*
 			 * Might get an error here, but can't really do anything
 			 * and it'll get logged elsewhere. Just read what we
@@ -2308,7 +2309,7 @@ static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op,
 }
 
 void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf,
-			  struct bkey *end)
+		       struct bkey *end, keybuf_pred_fn *pred)
 {
 	struct bkey start = buf->last_scanned;
 	struct btree_op op;
@@ -2316,7 +2317,7 @@ void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf,
 
 	cond_resched();
 
-	btree_root(refill_keybuf, c, &op, buf, end);
+	btree_root(refill_keybuf, c, &op, buf, end, pred);
 	closure_sync(&op.cl);
 
 	pr_debug("found %s keys from %llu:%llu to %llu:%llu",
@@ -2402,7 +2403,8 @@ struct keybuf_key *bch_keybuf_next(struct keybuf *buf)
 
 struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *c,
 					     struct keybuf *buf,
-					     struct bkey *end)
+					     struct bkey *end,
+					     keybuf_pred_fn *pred)
 {
 	struct keybuf_key *ret;
 
@@ -2416,15 +2418,14 @@ struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *c,
 			break;
 		}
 
-		bch_refill_keybuf(c, buf, end);
+		bch_refill_keybuf(c, buf, end, pred);
 	}
 
 	return ret;
 }
 
-void bch_keybuf_init(struct keybuf *buf, keybuf_pred_fn *fn)
+void bch_keybuf_init(struct keybuf *buf)
 {
-	buf->key_predicate	= fn;
 	buf->last_scanned	= MAX_KEY;
 	buf->keys		= RB_ROOT;
 
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index 2b016b93cad4..f66d69a7baf1 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -391,13 +391,14 @@ void bch_moving_gc(struct closure *);
 int bch_btree_check(struct cache_set *, struct btree_op *);
 uint8_t __bch_btree_mark_key(struct cache_set *, int, struct bkey *);
 
-void bch_keybuf_init(struct keybuf *, keybuf_pred_fn *);
-void bch_refill_keybuf(struct cache_set *, struct keybuf *, struct bkey *);
+void bch_keybuf_init(struct keybuf *);
+void bch_refill_keybuf(struct cache_set *, struct keybuf *, struct bkey *,
+		       keybuf_pred_fn *);
 bool bch_keybuf_check_overlapping(struct keybuf *, struct bkey *,
 				  struct bkey *);
 void bch_keybuf_del(struct keybuf *, struct keybuf_key *);
 struct keybuf_key *bch_keybuf_next(struct keybuf *);
-struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *,
-					  struct keybuf *, struct bkey *);
+struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *, struct keybuf *,
+					  struct bkey *, keybuf_pred_fn *);
 
 #endif
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index 82e3a07771ec..1c8fd319846e 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -357,7 +357,7 @@ static ssize_t bch_dump_read(struct file *file, char __user *buf,
 		if (i->bytes)
 			break;
 
-		w = bch_keybuf_next_rescan(i->c, &i->keys, &MAX_KEY);
+		w = bch_keybuf_next_rescan(i->c, &i->keys, &MAX_KEY, dump_pred);
 		if (!w)
 			break;
 
@@ -380,7 +380,7 @@ static int bch_dump_open(struct inode *inode, struct file *file)
 
 	file->private_data = i;
 	i->c = c;
-	bch_keybuf_init(&i->keys, dump_pred);
+	bch_keybuf_init(&i->keys);
 	i->keys.last_scanned = KEY(0, 0, 0);
 
 	return 0;
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index 04f6b97ffda6..a241e9fd4f7f 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -136,7 +136,8 @@ static void read_moving(struct closure *cl)
 	/* XXX: if we error, background writeback could stall indefinitely */
 
 	while (!test_bit(CACHE_SET_STOPPING, &c->flags)) {
-		w = bch_keybuf_next_rescan(c, &c->moving_gc_keys, &MAX_KEY);
+		w = bch_keybuf_next_rescan(c, &c->moving_gc_keys,
+					   &MAX_KEY, moving_pred);
 		if (!w)
 			break;
 
@@ -248,5 +249,5 @@ void bch_moving_gc(struct closure *cl)
 
 void bch_moving_init_cache_set(struct cache_set *c)
 {
-	bch_keybuf_init(&c->moving_gc_keys, moving_pred);
+	bch_keybuf_init(&c->moving_gc_keys);
 }
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 017c95fced8e..17bd59704eba 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -22,8 +22,6 @@
 
 #define CUTOFF_CACHE_ADD	95
 #define CUTOFF_CACHE_READA	90
-#define CUTOFF_WRITEBACK	50
-#define CUTOFF_WRITEBACK_SYNC	75
 
 struct kmem_cache *bch_search_cache;
 
@@ -998,17 +996,6 @@ static void cached_dev_write_complete(struct closure *cl)
 	cached_dev_bio_complete(cl);
 }
 
-static bool should_writeback(struct cached_dev *dc, struct bio *bio)
-{
-	unsigned threshold = (bio->bi_rw & REQ_SYNC)
-		? CUTOFF_WRITEBACK_SYNC
-		: CUTOFF_WRITEBACK;
-
-	return !atomic_read(&dc->disk.detaching) &&
-		cache_mode(dc, bio) == CACHE_MODE_WRITEBACK &&
-		dc->disk.c->gc_stats.in_use < threshold;
-}
-
 static void request_write(struct cached_dev *dc, struct search *s)
 {
 	struct closure *cl = &s->cl;
@@ -1030,12 +1017,16 @@ static void request_write(struct cached_dev *dc, struct search *s)
 	if (bio->bi_rw & REQ_DISCARD)
 		goto skip;
 
+	if (should_writeback(dc, s->orig_bio,
+			     cache_mode(dc, bio),
+			     s->op.skip)) {
+		s->op.skip = false;
+		s->writeback = true;
+	}
+
 	if (s->op.skip)
 		goto skip;
 
-	if (should_writeback(dc, s->orig_bio))
-		s->writeback = true;
-
 	trace_bcache_write(s->orig_bio, s->writeback, s->op.skip);
 
 	if (!s->writeback) {
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index cf8d91ec3238..70c6dff0d0cd 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -81,6 +81,9 @@ rw_attribute(writeback_rate_p_term_inverse);
 rw_attribute(writeback_rate_d_smooth);
 read_attribute(writeback_rate_debug);
 
+read_attribute(stripe_size);
+read_attribute(partial_stripes_expensive);
+
 rw_attribute(synchronous);
 rw_attribute(journal_delay_ms);
 rw_attribute(discard);
@@ -147,6 +150,9 @@ SHOW(__bch_cached_dev)
 	sysfs_hprint(dirty_data,
 		     bcache_dev_sectors_dirty(&dc->disk) << 9);
 
+	sysfs_hprint(stripe_size,	(1 << dc->disk.stripe_size_bits) << 9);
+	var_printf(partial_stripes_expensive,	"%u");
+
 	var_printf(sequential_merge,	"%i");
 	var_hprint(sequential_cutoff);
 	var_hprint(readahead);
@@ -286,6 +292,8 @@ static struct attribute *bch_cached_dev_files[] = {
 	&sysfs_writeback_rate_d_smooth,
 	&sysfs_writeback_rate_debug,
 	&sysfs_dirty_data,
+	&sysfs_stripe_size,
+	&sysfs_partial_stripes_expensive,
 	&sysfs_sequential_cutoff,
 	&sysfs_sequential_merge,
 	&sysfs_clear_stats,
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index dd815475c524..d81ee5ccc726 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -108,6 +108,31 @@ static bool dirty_pred(struct keybuf *buf, struct bkey *k)
 	return KEY_DIRTY(k);
 }
 
+static bool dirty_full_stripe_pred(struct keybuf *buf, struct bkey *k)
+{
+	uint64_t stripe;
+	unsigned nr_sectors = KEY_SIZE(k);
+	struct cached_dev *dc = container_of(buf, struct cached_dev,
+					     writeback_keys);
+	unsigned stripe_size = 1 << dc->disk.stripe_size_bits;
+
+	if (!KEY_DIRTY(k))
+		return false;
+
+	stripe = KEY_START(k) >> dc->disk.stripe_size_bits;
+	while (1) {
+		if (atomic_read(dc->disk.stripe_sectors_dirty + stripe) !=
+		    stripe_size)
+			return false;
+
+		if (nr_sectors <= stripe_size)
+			return true;
+
+		nr_sectors -= stripe_size;
+		stripe++;
+	}
+}
+
 static void dirty_init(struct keybuf_key *w)
 {
 	struct dirty_io *io = w->private;
@@ -152,7 +177,22 @@ static void refill_dirty(struct closure *cl)
 		searched_from_start = true;
 	}
 
-	bch_refill_keybuf(dc->disk.c, buf, &end);
+	if (dc->partial_stripes_expensive) {
+		uint64_t i;
+
+		for (i = 0; i < dc->disk.nr_stripes; i++)
+			if (atomic_read(dc->disk.stripe_sectors_dirty + i) ==
+			    1 << dc->disk.stripe_size_bits)
+				goto full_stripes;
+
+		goto normal_refill;
+full_stripes:
+		bch_refill_keybuf(dc->disk.c, buf, &end,
+				  dirty_full_stripe_pred);
+	} else {
+normal_refill:
+		bch_refill_keybuf(dc->disk.c, buf, &end, dirty_pred);
+	}
 
 	if (bkey_cmp(&buf->last_scanned, &end) >= 0 && searched_from_start) {
 		/* Searched the entire btree  - delay awhile */
@@ -446,7 +486,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
 	closure_init_unlocked(&dc->writeback);
 	init_rwsem(&dc->writeback_lock);
 
-	bch_keybuf_init(&dc->writeback_keys, dirty_pred);
+	bch_keybuf_init(&dc->writeback_keys);
 
 	dc->writeback_metadata		= true;
 	dc->writeback_running		= true;
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
index 5ce9771df047..c91f61bb95b6 100644
--- a/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
@@ -1,6 +1,9 @@
 #ifndef _BCACHE_WRITEBACK_H
 #define _BCACHE_WRITEBACK_H
 
+#define CUTOFF_WRITEBACK	40
+#define CUTOFF_WRITEBACK_SYNC	70
+
 static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d)
 {
 	uint64_t i, ret = 0;
@@ -11,6 +14,46 @@ static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d)
 	return ret;
 }
 
+static inline bool bcache_dev_stripe_dirty(struct bcache_device *d,
+					   uint64_t offset,
+					   unsigned nr_sectors)
+{
+	uint64_t stripe = offset >> d->stripe_size_bits;
+
+	while (1) {
+		if (atomic_read(d->stripe_sectors_dirty + stripe))
+			return true;
+
+		if (nr_sectors <= 1 << d->stripe_size_bits)
+			return false;
+
+		nr_sectors -= 1 << d->stripe_size_bits;
+		stripe++;
+	}
+}
+
+static inline bool should_writeback(struct cached_dev *dc, struct bio *bio,
+				    unsigned cache_mode, bool would_skip)
+{
+	unsigned in_use = dc->disk.c->gc_stats.in_use;
+
+	if (cache_mode != CACHE_MODE_WRITEBACK ||
+	    atomic_read(&dc->disk.detaching) ||
+	    in_use > CUTOFF_WRITEBACK_SYNC)
+		return false;
+
+	if (dc->partial_stripes_expensive &&
+	    bcache_dev_stripe_dirty(&dc->disk, bio->bi_sector,
+				    bio_sectors(bio)))
+		return true;
+
+	if (would_skip)
+		return false;
+
+	return bio->bi_rw & REQ_SYNC ||
+		in_use <= CUTOFF_WRITEBACK;
+}
+
 void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int);
 void bch_writeback_queue(struct cached_dev *);
 void bch_writeback_add(struct cached_dev *);

From bd206b51bca1e0a1c5f4b00218d56213a1f6d3bd Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Mon, 20 May 2013 00:04:35 +0900
Subject: [PATCH 041/320] doc: Fix typo in documentation/bcache.txt

Correct spelling typo in documentation/bcache.txt

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Signed-off-by: Kent Overstreet <koverstreet@google.com>
---
 Documentation/bcache.txt | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Documentation/bcache.txt b/Documentation/bcache.txt
index b3a7e7d384f6..c3365f26b2d9 100644
--- a/Documentation/bcache.txt
+++ b/Documentation/bcache.txt
@@ -181,7 +181,7 @@ want for getting the best possible numbers when benchmarking.
 
    In practice this isn't an issue because as soon as a write comes along it'll
    cause the btree node to be split, and you need almost no write traffic for
-   this to not show up enough to be noticable (especially since bcache's btree
+   this to not show up enough to be noticeable (especially since bcache's btree
    nodes are huge and index large regions of the device). But when you're
    benchmarking, if you're trying to warm the cache by reading a bunch of data
    and there's no other traffic - that can be a problem.
@@ -222,7 +222,7 @@ running
   it's in passthrough mode or caching).
 
 sequential_cutoff
-  A sequential IO will bypass the cache once it passes this threshhold; the
+  A sequential IO will bypass the cache once it passes this threshold; the
   most recent 128 IOs are tracked so sequential IO can be detected even when
   it isn't all done at once.
 
@@ -296,7 +296,7 @@ cache_miss_collisions
   since the synchronization for cache misses was rewritten)
 
 cache_readaheads
-  Count of times readahead occured.
+  Count of times readahead occurred.
 
 SYSFS - CACHE SET:
 
@@ -362,7 +362,7 @@ unregister
 SYSFS - CACHE SET INTERNAL:
 
 This directory also exposes timings for a number of internal operations, with
-separate files for average duration, average frequency, last occurence and max
+separate files for average duration, average frequency, last occurrence and max
 duration: garbage collection, btree read, btree node sorts and btree splits.
 
 active_journal_entries
@@ -417,7 +417,7 @@ freelist_percent
   space.
 
 io_errors
-  Number of errors that have occured, decayed by io_error_halflife.
+  Number of errors that have occurred, decayed by io_error_halflife.
 
 metadata_written
   Sum of all non data writes (btree writes and all other metadata).

From a25c32bedeff3573b53572b87bcafe10ec5b75a9 Mon Sep 17 00:00:00 2001
From: Gabriel de Perthuis <g2p.code@gmail.com>
Date: Fri, 7 Jun 2013 23:27:01 +0200
Subject: [PATCH 042/320] bcache: Send a uevent with a cached device's UUID

Signed-off-by: Gabriel de Perthuis <g2p.code@gmail.com>
---
 drivers/md/bcache/super.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 8c73f0c7f28a..a104dad2c7fa 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -825,6 +825,11 @@ static void calc_cached_dev_sectors(struct cache_set *c)
 void bch_cached_dev_run(struct cached_dev *dc)
 {
 	struct bcache_device *d = &dc->disk;
+	char *env[] = {
+		"DRIVER=bcache",
+		kasprintf(GFP_KERNEL, "CACHED_UUID=%pU", dc->sb.uuid),
+		NULL
+	};
 
 	if (atomic_xchg(&dc->running, 1))
 		return;
@@ -841,10 +846,11 @@ void bch_cached_dev_run(struct cached_dev *dc)
 
 	add_disk(d->disk);
 	bd_link_disk_holder(dc->bdev, dc->disk.disk);
-#if 0
-	char *env[] = { "SYMLINK=label" , NULL };
+	/* won't show up in the uevent file, use udevadm monitor -e instead
+	 * only class / kset properties are persistent */
 	kobject_uevent_env(&disk_to_dev(d->disk)->kobj, KOBJ_CHANGE, env);
-#endif
+	kfree(env[1]);
+
 	if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") ||
 	    sysfs_create_link(&disk_to_dev(d->disk)->kobj, &d->kobj, "bcache"))
 		pr_debug("error creating sysfs link");

From ab9e14002e271eba41f7f9ab7e9b03cac4adc22d Mon Sep 17 00:00:00 2001
From: Gabriel de Perthuis <g2p.code@gmail.com>
Date: Sun, 9 Jun 2013 00:54:48 +0200
Subject: [PATCH 043/320] bcache: Send label uevents

Signed-off-by: Gabriel de Perthuis <g2p.code@gmail.com>
Signed-off-by: Kent Overstreet <koverstreet@google.com>
---
 drivers/md/bcache/super.c | 9 ++++++++-
 drivers/md/bcache/sysfs.c | 9 +++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index a104dad2c7fa..cff2d182dfb0 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -825,12 +825,18 @@ static void calc_cached_dev_sectors(struct cache_set *c)
 void bch_cached_dev_run(struct cached_dev *dc)
 {
 	struct bcache_device *d = &dc->disk;
+	char buf[SB_LABEL_SIZE + 1];
 	char *env[] = {
 		"DRIVER=bcache",
 		kasprintf(GFP_KERNEL, "CACHED_UUID=%pU", dc->sb.uuid),
-		NULL
+		NULL,
+		NULL,
 	};
 
+	memcpy(buf, dc->sb.label, SB_LABEL_SIZE);
+	buf[SB_LABEL_SIZE] = '\0';
+	env[2] = kasprintf(GFP_KERNEL, "CACHED_LABEL=%s", buf);
+
 	if (atomic_xchg(&dc->running, 1))
 		return;
 
@@ -850,6 +856,7 @@ void bch_cached_dev_run(struct cached_dev *dc)
 	 * only class / kset properties are persistent */
 	kobject_uevent_env(&disk_to_dev(d->disk)->kobj, KOBJ_CHANGE, env);
 	kfree(env[1]);
+	kfree(env[2]);
 
 	if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") ||
 	    sysfs_create_link(&disk_to_dev(d->disk)->kobj, &d->kobj, "bcache"))
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 70c6dff0d0cd..dd3f00a42729 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -178,6 +178,7 @@ STORE(__cached_dev)
 					     disk.kobj);
 	unsigned v = size;
 	struct cache_set *c;
+	struct kobj_uevent_env *env;
 
 #define d_strtoul(var)		sysfs_strtoul(var, dc->var)
 #define d_strtoi_h(var)		sysfs_hatoi(var, dc->var)
@@ -222,6 +223,7 @@ STORE(__cached_dev)
 	}
 
 	if (attr == &sysfs_label) {
+		/* note: endlines are preserved */
 		memcpy(dc->sb.label, buf, SB_LABEL_SIZE);
 		bch_write_bdev_super(dc, NULL);
 		if (dc->disk.c) {
@@ -229,6 +231,13 @@ STORE(__cached_dev)
 			       buf, SB_LABEL_SIZE);
 			bch_uuid_write(dc->disk.c);
 		}
+		env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL);
+		add_uevent_var(env, "DRIVER=bcache");
+		add_uevent_var(env, "CACHED_UUID=%pU", dc->sb.uuid),
+		add_uevent_var(env, "CACHED_LABEL=%s", buf);
+		kobject_uevent_env(
+			&disk_to_dev(dc->disk.disk)->kobj, KOBJ_CHANGE, env->envp);
+		kfree(env);
 	}
 
 	if (attr == &sysfs_attach) {

From cecd628d9a9966ed0af1237df5cc5818945fe9f2 Mon Sep 17 00:00:00 2001
From: Gabriel de Perthuis <g2p.code@gmail.com>
Date: Thu, 27 Jun 2013 02:12:07 +0200
Subject: [PATCH 044/320] bcache: Refresh usage docs

Mention udev autoregistration, symlinks.  Write down some sysfs paths.

Signed-off-by: Gabriel de Perthuis <g2p.code@gmail.com>
Signed-off-by: Kent Overstreet <koverstreet@google.com>
---
 Documentation/bcache.txt | 37 ++++++++++++++++++++++++-------------
 1 file changed, 24 insertions(+), 13 deletions(-)

diff --git a/Documentation/bcache.txt b/Documentation/bcache.txt
index c3365f26b2d9..32b6c3189d98 100644
--- a/Documentation/bcache.txt
+++ b/Documentation/bcache.txt
@@ -46,29 +46,33 @@ you format your backing devices and cache device at the same time, you won't
 have to manually attach:
   make-bcache -B /dev/sda /dev/sdb -C /dev/sdc
 
-To make bcache devices known to the kernel, echo them to /sys/fs/bcache/register:
+bcache-tools now ships udev rules, and bcache devices are known to the kernel
+immediately.  Without udev, you can manually register devices like this:
 
   echo /dev/sdb > /sys/fs/bcache/register
   echo /dev/sdc > /sys/fs/bcache/register
 
-To register your bcache devices automatically, you could add something like
-this to an init script:
+Registering the backing device makes the bcache device show up in /dev; you can
+now format it and use it as normal. But the first time using a new bcache
+device, it'll be running in passthrough mode until you attach it to a cache.
+See the section on attaching.
 
-  echo /dev/sd* > /sys/fs/bcache/register_quiet
+The devices show up as:
 
-It'll look for bcache superblocks and ignore everything that doesn't have one.
+  /dev/bcache<N>
 
-Registering the backing device makes the bcache show up in /dev; you can now
-format it and use it as normal. But the first time using a new bcache device,
-it'll be running in passthrough mode until you attach it to a cache. See the
-section on attaching.
+As well as (with udev):
 
-The devices show up at /dev/bcacheN, and can be controlled via sysfs from
-/sys/block/bcacheN/bcache:
+  /dev/bcache/by-uuid/<uuid>
+  /dev/bcache/by-label/<label>
+
+To get started:
 
   mkfs.ext4 /dev/bcache0
   mount /dev/bcache0 /mnt
 
+You can control bcache devices through sysfs at /sys/block/bcache<N>/bcache .
+
 Cache devices are managed as sets; multiple caches per set isn't supported yet
 but will allow for mirroring of metadata and dirty data in the future. Your new
 cache set shows up as /sys/fs/bcache/<UUID>
@@ -80,11 +84,11 @@ must be attached to your cache set to enable caching. Attaching a backing
 device to a cache set is done thusly, with the UUID of the cache set in
 /sys/fs/bcache:
 
-  echo <UUID> > /sys/block/bcache0/bcache/attach
+  echo <CSET-UUID> > /sys/block/bcache0/bcache/attach
 
 This only has to be done once. The next time you reboot, just reregister all
 your bcache devices. If a backing device has data in a cache somewhere, the
-/dev/bcache# device won't be created until the cache shows up - particularly
+/dev/bcache<N> device won't be created until the cache shows up - particularly
 important if you have writeback caching turned on.
 
 If you're booting up and your cache device is gone and never coming back, you
@@ -191,6 +195,9 @@ want for getting the best possible numbers when benchmarking.
 
 SYSFS - BACKING DEVICE:
 
+Available at /sys/block/<bdev>/bcache, /sys/block/bcache*/bcache and
+(if attached) /sys/fs/bcache/<cset-uuid>/bdev*
+
 attach
   Echo the UUID of a cache set to this file to enable caching.
 
@@ -300,6 +307,8 @@ cache_readaheads
 
 SYSFS - CACHE SET:
 
+Available at /sys/fs/bcache/<cset-uuid>
+
 average_key_size
   Average data per key in the btree.
 
@@ -390,6 +399,8 @@ trigger_gc
 
 SYSFS - CACHE DEVICE:
 
+Available at /sys/block/<cdev>/bcache
+
 block_size
   Minimum granularity of writes - should match hardware sector size.
 

From 26ea8f9239a062a47cfef38ea9c63409906366ff Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@linbit.com>
Date: Tue, 25 Jun 2013 16:50:03 +0200
Subject: [PATCH 045/320] drbd: Do not sleep inside rcu

Signed-off-by: Andreas Gruenbacher <agruen@linbit.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/drbd/drbd_receiver.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 4222affff488..adee58e19e83 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1039,6 +1039,8 @@ static int conn_connect(struct drbd_tconn *tconn)
 	rcu_read_lock();
 	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
 		kref_get(&mdev->kref);
+		rcu_read_unlock();
+
 		/* Prevent a race between resync-handshake and
 		 * being promoted to Primary.
 		 *
@@ -1049,8 +1051,6 @@ static int conn_connect(struct drbd_tconn *tconn)
 		mutex_lock(mdev->state_mutex);
 		mutex_unlock(mdev->state_mutex);
 
-		rcu_read_unlock();
-
 		if (discard_my_data)
 			set_bit(DISCARD_MY_DATA, &mdev->flags);
 		else

From 6110d70bdf99f9d0448f1f61798542e3b123b42a Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Tue, 25 Jun 2013 16:50:04 +0200
Subject: [PATCH 046/320] drbd: fix error return code in drbd_init()

Fix to return a negative error code from the error handling
case instead of 0, as returned elsewhere in this function.

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Andreas Gruenbacher <agruen@linbit.com>
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/drbd/drbd_main.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index a5dca6affcbb..49040a336949 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2762,8 +2762,6 @@ int __init drbd_init(void)
 	/*
 	 * allocate all necessary structs
 	 */
-	err = -ENOMEM;
-
 	init_waitqueue_head(&drbd_pp_wait);
 
 	drbd_proc = NULL; /* play safe for drbd_cleanup */
@@ -2773,6 +2771,7 @@ int __init drbd_init(void)
 	if (err)
 		goto fail;
 
+	err = -ENOMEM;
 	drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL);
 	if (!drbd_proc)	{
 		printk(KERN_ERR "drbd: unable to register proc file\n");
@@ -2803,7 +2802,6 @@ int __init drbd_init(void)
 fail:
 	drbd_cleanup();
 	if (err == -ENOMEM)
-		/* currently always the case */
 		printk(KERN_ERR "drbd: ran out of memory\n");
 	else
 		printk(KERN_ERR "drbd: initialization failure\n");

From f9eb7bf424e766e00bbc6d69fd7eaaf4bd003cf9 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@linbit.com>
Date: Tue, 25 Jun 2013 16:50:05 +0200
Subject: [PATCH 047/320] drbd: Fix rcu_read_lock balance on error path

Signed-off-by: Andreas Gruenbacher <agruen@linbit.com>
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/drbd/drbd_nl.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 9e3f441e7e84..0936d6aabef9 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -2658,7 +2658,6 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev,
 		const struct sib_info *sib)
 {
 	struct state_info *si = NULL; /* for sizeof(si->member); */
-	struct net_conf *nc;
 	struct nlattr *nla;
 	int got_ldev;
 	int err = 0;
@@ -2688,13 +2687,19 @@ int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev,
 		goto nla_put_failure;
 
 	rcu_read_lock();
-	if (got_ldev)
-		if (disk_conf_to_skb(skb, rcu_dereference(mdev->ldev->disk_conf), exclude_sensitive))
-			goto nla_put_failure;
+	if (got_ldev) {
+		struct disk_conf *disk_conf;
 
-	nc = rcu_dereference(mdev->tconn->net_conf);
-	if (nc)
-		err = net_conf_to_skb(skb, nc, exclude_sensitive);
+		disk_conf = rcu_dereference(mdev->ldev->disk_conf);
+		err = disk_conf_to_skb(skb, disk_conf, exclude_sensitive);
+	}
+	if (!err) {
+		struct net_conf *nc;
+
+		nc = rcu_dereference(mdev->tconn->net_conf);
+		if (nc)
+			err = net_conf_to_skb(skb, nc, exclude_sensitive);
+	}
 	rcu_read_unlock();
 	if (err)
 		goto nla_put_failure;

From 28e448bb30d0f3fc7daa652d2d3a30adaf9e171b Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Tue, 25 Jun 2013 16:50:06 +0200
Subject: [PATCH 048/320] drbd: Ignore the exit code of a fence-peer handler if
 it returns too late

In case the connection was established and lost again before
the a fence-peer handler returns, ignore the exit code of this
instance. (And use the exit code of the later started instance)

Signed-off-by: Andreas Gruenbacher <agruen@linbit.com>
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/drbd/drbd_int.h   |  1 +
 drivers/block/drbd/drbd_nl.c    | 15 +++++++++++++--
 drivers/block/drbd/drbd_state.c |  4 +++-
 3 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index f943aacfdad8..f104328d6325 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -832,6 +832,7 @@ struct drbd_tconn {			/* is a resource from the config file */
 	unsigned susp_nod:1;		/* IO suspended because no data */
 	unsigned susp_fen:1;		/* IO suspended because fence peer handler runs */
 	struct mutex cstate_mutex;	/* Protects graceful disconnects */
+	unsigned int connect_cnt;	/* Inc each time a connection is established */
 
 	unsigned long flags;
 	struct net_conf *net_conf;	/* content protected by rcu */
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 0936d6aabef9..e25803b447ff 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -417,6 +417,7 @@ static enum drbd_fencing_p highest_fencing_policy(struct drbd_tconn *tconn)
 
 bool conn_try_outdate_peer(struct drbd_tconn *tconn)
 {
+	unsigned int connect_cnt;
 	union drbd_state mask = { };
 	union drbd_state val = { };
 	enum drbd_fencing_p fp;
@@ -428,6 +429,10 @@ bool conn_try_outdate_peer(struct drbd_tconn *tconn)
 		return false;
 	}
 
+	spin_lock_irq(&tconn->req_lock);
+	connect_cnt = tconn->connect_cnt;
+	spin_unlock_irq(&tconn->req_lock);
+
 	fp = highest_fencing_policy(tconn);
 	switch (fp) {
 	case FP_NOT_AVAIL:
@@ -492,8 +497,14 @@ bool conn_try_outdate_peer(struct drbd_tconn *tconn)
 	   here, because we might were able to re-establish the connection in the
 	   meantime. */
 	spin_lock_irq(&tconn->req_lock);
-	if (tconn->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &tconn->flags))
-		_conn_request_state(tconn, mask, val, CS_VERBOSE);
+	if (tconn->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &tconn->flags)) {
+		if (tconn->connect_cnt != connect_cnt)
+			/* In case the connection was established and droped
+			   while the fence-peer handler was running, ignore it */
+			conn_info(tconn, "Ignoring fence-peer exit code\n");
+		else
+			_conn_request_state(tconn, mask, val, CS_VERBOSE);
+	}
 	spin_unlock_irq(&tconn->req_lock);
 
 	return conn_highest_pdsk(tconn) <= D_OUTDATED;
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 90c5be2b1d30..216d47b7e88b 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -1115,8 +1115,10 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
 		drbd_thread_restart_nowait(&mdev->tconn->receiver);
 
 	/* Resume AL writing if we get a connection */
-	if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
+	if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
 		drbd_resume_al(mdev);
+		mdev->tconn->connect_cnt++;
+	}
 
 	/* remember last attach time so request_timer_fn() won't
 	 * kill newly established sessions while we are still trying to thaw

From e96c96333fe5a4f252cc4e1d7edde8ee7dce7dfe Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Tue, 25 Jun 2013 16:50:07 +0200
Subject: [PATCH 049/320] drbd: Constants should be UPPERCASE

Signed-off-by: Andreas Gruenbacher <agruen@linbit.com>
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/drbd/drbd_int.h      |  7 ++++++-
 drivers/block/drbd/drbd_nl.c       | 20 ++++++++++----------
 drivers/block/drbd/drbd_receiver.c |  6 +++---
 3 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index f104328d6325..4519d63350fb 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1467,7 +1467,12 @@ extern void drbd_suspend_io(struct drbd_conf *mdev);
 extern void drbd_resume_io(struct drbd_conf *mdev);
 extern char *ppsize(char *buf, unsigned long long size);
 extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, sector_t, int);
-enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 };
+enum determine_dev_size {
+	DS_ERROR = -1,
+	DS_UNCHANGED = 0,
+	DS_SHRUNK = 1,
+	DS_GREW = 2
+};
 extern enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local);
 extern void resync_after_online_grow(struct drbd_conf *);
 extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index e25803b447ff..45d127522e0a 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -835,7 +835,7 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
 	char ppb[10];
 
 	int md_moved, la_size_changed;
-	enum determine_dev_size rv = unchanged;
+	enum determine_dev_size rv = DS_UNCHANGED;
 
 	/* race:
 	 * application request passes inc_ap_bio,
@@ -878,7 +878,7 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
 				    "Leaving size unchanged at size = %lu KB\n",
 				    (unsigned long)size);
 			}
-			rv = dev_size_error;
+			rv = DS_ERROR;
 		}
 		/* racy, see comments above. */
 		drbd_set_my_capacity(mdev, size);
@@ -886,7 +886,7 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
 		dev_info(DEV, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
 		     (unsigned long long)size>>1);
 	}
-	if (rv == dev_size_error)
+	if (rv == DS_ERROR)
 		goto out;
 
 	la_size_changed = (la_size_sect != mdev->ldev->md.la_size_sect);
@@ -905,16 +905,16 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
 		err = drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
 				     "size changed", BM_LOCKED_MASK);
 		if (err) {
-			rv = dev_size_error;
+			rv = DS_ERROR;
 			goto out;
 		}
 		drbd_md_mark_dirty(mdev);
 	}
 
 	if (size > la_size_sect)
-		rv = grew;
+		rv = DS_GREW;
 	if (size < la_size_sect)
-		rv = shrunk;
+		rv = DS_SHRUNK;
 out:
 	lc_unlock(mdev->act_log);
 	wake_up(&mdev->al_wait);
@@ -1619,10 +1619,10 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 		set_bit(USE_DEGR_WFC_T, &mdev->flags);
 
 	dd = drbd_determine_dev_size(mdev, 0);
-	if (dd == dev_size_error) {
+	if (dd == DS_ERROR) {
 		retcode = ERR_NOMEM_BITMAP;
 		goto force_diskless_dec;
-	} else if (dd == grew)
+	} else if (dd == DS_GREW)
 		set_bit(RESYNC_AFTER_NEG, &mdev->flags);
 
 	if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC) ||
@@ -2387,13 +2387,13 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 	dd = drbd_determine_dev_size(mdev, ddsf);
 	drbd_md_sync(mdev);
 	put_ldev(mdev);
-	if (dd == dev_size_error) {
+	if (dd == DS_ERROR) {
 		retcode = ERR_NOMEM_BITMAP;
 		goto fail;
 	}
 
 	if (mdev->state.conn == C_CONNECTED) {
-		if (dd == grew)
+		if (dd == DS_GREW)
 			set_bit(RESIZE_PENDING, &mdev->flags);
 
 		drbd_send_uuids(mdev);
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index adee58e19e83..26852b87b034 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -3545,7 +3545,7 @@ static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi)
 {
 	struct drbd_conf *mdev;
 	struct p_sizes *p = pi->data;
-	enum determine_dev_size dd = unchanged;
+	enum determine_dev_size dd = DS_UNCHANGED;
 	sector_t p_size, p_usize, my_usize;
 	int ldsc = 0; /* local disk size changed */
 	enum dds_flags ddsf;
@@ -3619,7 +3619,7 @@ static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi)
 	if (get_ldev(mdev)) {
 		dd = drbd_determine_dev_size(mdev, ddsf);
 		put_ldev(mdev);
-		if (dd == dev_size_error)
+		if (dd == DS_ERROR)
 			return -EIO;
 		drbd_md_sync(mdev);
 	} else {
@@ -3647,7 +3647,7 @@ static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi)
 			drbd_send_sizes(mdev, 0, ddsf);
 		}
 		if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
-		    (dd == grew && mdev->state.conn == C_CONNECTED)) {
+		    (dd == DS_GREW && mdev->state.conn == C_CONNECTED)) {
 			if (mdev->state.pdsk >= D_INCONSISTENT &&
 			    mdev->state.disk >= D_INCONSISTENT) {
 				if (ddsf & DDSF_NO_RESYNC)

From d752b2696072ed52fd5afab08b601e2220a3b87e Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Tue, 25 Jun 2013 16:50:08 +0200
Subject: [PATCH 050/320] drbd: Allow online change of al-stripes and
 al-stripe-size

Allow to change the AL layout with an resize operation. For that
the reisze command gets two new fields: al_stripes and al_stripe_size.

In order to make the operation crash save:
1) Lock out all IO and MD-IO
2) Write the super block with MDF_PRIMARY_IND clear
3) write the bitmap to the new location (all zeros, since
   we allow only while connected)
4) Initialize the new AL-area
5) Write the super block with the restored MDF_PRIMARY_IND.
6) Unfreeze all IO

Since the AL-layout has no influence on the protocol, this operation
needs to be beforemed on both sides of a resource (if intended).

Signed-off-by: Andreas Gruenbacher <agruen@linbit.com>
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/drbd/drbd_actlog.c   |  21 +++++
 drivers/block/drbd/drbd_int.h      |   7 +-
 drivers/block/drbd/drbd_main.c     |  57 +++++++-----
 drivers/block/drbd/drbd_nl.c       | 137 +++++++++++++++++++++++------
 drivers/block/drbd/drbd_receiver.c |   2 +-
 include/linux/drbd.h               |   6 +-
 include/linux/drbd_genl.h          |   2 +
 include/linux/drbd_limits.h        |   9 ++
 8 files changed, 188 insertions(+), 53 deletions(-)

diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 6608076dc39e..28c73ca320a8 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -659,6 +659,27 @@ void drbd_al_shrink(struct drbd_conf *mdev)
 	wake_up(&mdev->al_wait);
 }
 
+int drbd_initialize_al(struct drbd_conf *mdev, void *buffer)
+{
+	struct al_transaction_on_disk *al = buffer;
+	struct drbd_md *md = &mdev->ldev->md;
+	sector_t al_base = md->md_offset + md->al_offset;
+	int al_size_4k = md->al_stripes * md->al_stripe_size_4k;
+	int i;
+
+	memset(al, 0, 4096);
+	al->magic = cpu_to_be32(DRBD_AL_MAGIC);
+	al->transaction_type = cpu_to_be16(AL_TR_INITIALIZED);
+	al->crc32c = cpu_to_be32(crc32c(0, al, 4096));
+
+	for (i = 0; i < al_size_4k; i++) {
+		int err = drbd_md_sync_page_io(mdev, mdev->ldev, al_base + i * 8, WRITE);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
 static int w_update_odbm(struct drbd_work *w, int unused)
 {
 	struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w);
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 4519d63350fb..2d7f608d181c 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1133,6 +1133,7 @@ extern void drbd_mdev_cleanup(struct drbd_conf *mdev);
 void drbd_print_uuids(struct drbd_conf *mdev, const char *text);
 
 extern void conn_md_sync(struct drbd_tconn *tconn);
+extern void drbd_md_write(struct drbd_conf *mdev, void *buffer);
 extern void drbd_md_sync(struct drbd_conf *mdev);
 extern int  drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev);
 extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local);
@@ -1468,12 +1469,15 @@ extern void drbd_resume_io(struct drbd_conf *mdev);
 extern char *ppsize(char *buf, unsigned long long size);
 extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, sector_t, int);
 enum determine_dev_size {
+	DS_ERROR_SHRINK = -3,
+	DS_ERROR_SPACE_MD = -2,
 	DS_ERROR = -1,
 	DS_UNCHANGED = 0,
 	DS_SHRUNK = 1,
 	DS_GREW = 2
 };
-extern enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local);
+extern enum determine_dev_size
+drbd_determine_dev_size(struct drbd_conf *, enum dds_flags, struct resize_parms *) __must_hold(local);
 extern void resync_after_online_grow(struct drbd_conf *);
 extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev);
 extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev,
@@ -1639,6 +1643,7 @@ extern int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector,
 #define drbd_set_out_of_sync(mdev, sector, size) \
 	__drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__)
 extern void drbd_al_shrink(struct drbd_conf *mdev);
+extern int drbd_initialize_al(struct drbd_conf *, void *);
 
 /* drbd_nl.c */
 /* state info broadcast */
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 49040a336949..55635edf563b 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2879,34 +2879,14 @@ struct meta_data_on_disk {
 	u8 reserved_u8[4096 - (7*8 + 10*4)];
 } __packed;
 
-/**
- * drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set
- * @mdev:	DRBD device.
- */
-void drbd_md_sync(struct drbd_conf *mdev)
+
+
+void drbd_md_write(struct drbd_conf *mdev, void *b)
 {
-	struct meta_data_on_disk *buffer;
+	struct meta_data_on_disk *buffer = b;
 	sector_t sector;
 	int i;
 
-	/* Don't accidentally change the DRBD meta data layout. */
-	BUILD_BUG_ON(UI_SIZE != 4);
-	BUILD_BUG_ON(sizeof(struct meta_data_on_disk) != 4096);
-
-	del_timer(&mdev->md_sync_timer);
-	/* timer may be rearmed by drbd_md_mark_dirty() now. */
-	if (!test_and_clear_bit(MD_DIRTY, &mdev->flags))
-		return;
-
-	/* We use here D_FAILED and not D_ATTACHING because we try to write
-	 * metadata even if we detach due to a disk failure! */
-	if (!get_ldev_if_state(mdev, D_FAILED))
-		return;
-
-	buffer = drbd_md_get_buffer(mdev);
-	if (!buffer)
-		goto out;
-
 	memset(buffer, 0, sizeof(*buffer));
 
 	buffer->la_size_sect = cpu_to_be64(drbd_get_capacity(mdev->this_bdev));
@@ -2935,6 +2915,35 @@ void drbd_md_sync(struct drbd_conf *mdev)
 		dev_err(DEV, "meta data update failed!\n");
 		drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
 	}
+}
+
+/**
+ * drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set
+ * @mdev:	DRBD device.
+ */
+void drbd_md_sync(struct drbd_conf *mdev)
+{
+	struct meta_data_on_disk *buffer;
+
+	/* Don't accidentally change the DRBD meta data layout. */
+	BUILD_BUG_ON(UI_SIZE != 4);
+	BUILD_BUG_ON(sizeof(struct meta_data_on_disk) != 4096);
+
+	del_timer(&mdev->md_sync_timer);
+	/* timer may be rearmed by drbd_md_mark_dirty() now. */
+	if (!test_and_clear_bit(MD_DIRTY, &mdev->flags))
+		return;
+
+	/* We use here D_FAILED and not D_ATTACHING because we try to write
+	 * metadata even if we detach due to a disk failure! */
+	if (!get_ldev_if_state(mdev, D_FAILED))
+		return;
+
+	buffer = drbd_md_get_buffer(mdev);
+	if (!buffer)
+		goto out;
+
+	drbd_md_write(mdev, buffer);
 
 	/* Update mdev->ldev->md.la_size_sect,
 	 * since we updated it on metadata. */
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 45d127522e0a..8cc1e640f485 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -827,12 +827,17 @@ void drbd_resume_io(struct drbd_conf *mdev)
  * Returns 0 on success, negative return values indicate errors.
  * You should call drbd_md_sync() after calling this function.
  */
-enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local)
+enum determine_dev_size
+drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags, struct resize_parms *rs) __must_hold(local)
 {
 	sector_t prev_first_sect, prev_size; /* previous meta location */
 	sector_t la_size_sect, u_size;
+	struct drbd_md *md = &mdev->ldev->md;
+	u32 prev_al_stripe_size_4k;
+	u32 prev_al_stripes;
 	sector_t size;
 	char ppb[10];
+	void *buffer;
 
 	int md_moved, la_size_changed;
 	enum determine_dev_size rv = DS_UNCHANGED;
@@ -847,6 +852,11 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
 	 * still lock the act_log to not trigger ASSERTs there.
 	 */
 	drbd_suspend_io(mdev);
+	buffer = drbd_md_get_buffer(mdev); /* Lock meta-data IO */
+	if (!buffer) {
+		drbd_resume_io(mdev);
+		return DS_ERROR;
+	}
 
 	/* no wait necessary anymore, actually we could assert that */
 	wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
@@ -855,7 +865,17 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
 	prev_size = mdev->ldev->md.md_size_sect;
 	la_size_sect = mdev->ldev->md.la_size_sect;
 
-	/* TODO: should only be some assert here, not (re)init... */
+	if (rs) {
+		/* rs is non NULL if we should change the AL layout only */
+
+		prev_al_stripes = md->al_stripes;
+		prev_al_stripe_size_4k = md->al_stripe_size_4k;
+
+		md->al_stripes = rs->al_stripes;
+		md->al_stripe_size_4k = rs->al_stripe_size / 4;
+		md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / 4;
+	}
+
 	drbd_md_set_sector_offsets(mdev, mdev->ldev);
 
 	rcu_read_lock();
@@ -863,6 +883,21 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
 	rcu_read_unlock();
 	size = drbd_new_dev_size(mdev, mdev->ldev, u_size, flags & DDSF_FORCED);
 
+	if (size < la_size_sect) {
+		if (rs && u_size == 0) {
+			/* Remove "rs &&" later. This check should always be active, but
+			   right now the receiver expects the permissive behavior */
+			dev_warn(DEV, "Implicit shrink not allowed. "
+				 "Use --size=%llus for explicit shrink.\n",
+				 (unsigned long long)size);
+			rv = DS_ERROR_SHRINK;
+		}
+		if (u_size > size)
+			rv = DS_ERROR_SPACE_MD;
+		if (rv != DS_UNCHANGED)
+			goto err_out;
+	}
+
 	if (drbd_get_capacity(mdev->this_bdev) != size ||
 	    drbd_bm_capacity(mdev) != size) {
 		int err;
@@ -886,38 +921,57 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
 		dev_info(DEV, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
 		     (unsigned long long)size>>1);
 	}
-	if (rv == DS_ERROR)
-		goto out;
+	if (rv <= DS_ERROR)
+		goto err_out;
 
 	la_size_changed = (la_size_sect != mdev->ldev->md.la_size_sect);
 
 	md_moved = prev_first_sect != drbd_md_first_sector(mdev->ldev)
 		|| prev_size	   != mdev->ldev->md.md_size_sect;
 
-	if (la_size_changed || md_moved) {
-		int err;
+	if (la_size_changed || md_moved || rs) {
+		u32 prev_flags;
 
 		drbd_al_shrink(mdev); /* All extents inactive. */
+
+		prev_flags = md->flags;
+		md->flags &= ~MDF_PRIMARY_IND;
+		drbd_md_write(mdev, buffer);
+
 		dev_info(DEV, "Writing the whole bitmap, %s\n",
 			 la_size_changed && md_moved ? "size changed and md moved" :
 			 la_size_changed ? "size changed" : "md moved");
 		/* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
-		err = drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
-				     "size changed", BM_LOCKED_MASK);
-		if (err) {
-			rv = DS_ERROR;
-			goto out;
-		}
-		drbd_md_mark_dirty(mdev);
+		drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
+			       "size changed", BM_LOCKED_MASK);
+		drbd_initialize_al(mdev, buffer);
+
+		md->flags = prev_flags;
+		drbd_md_write(mdev, buffer);
+
+		if (rs)
+			dev_info(DEV, "Changed AL layout to al-stripes = %d, al-stripe-size-kB = %d\n",
+				 md->al_stripes, md->al_stripe_size_4k * 4);
 	}
 
 	if (size > la_size_sect)
 		rv = DS_GREW;
 	if (size < la_size_sect)
 		rv = DS_SHRUNK;
-out:
+
+	if (0) {
+	err_out:
+		if (rs) {
+			md->al_stripes = prev_al_stripes;
+			md->al_stripe_size_4k = prev_al_stripe_size_4k;
+			md->al_size_4k = (u64)prev_al_stripes * prev_al_stripe_size_4k;
+
+			drbd_md_set_sector_offsets(mdev, mdev->ldev);
+		}
+	}
 	lc_unlock(mdev->act_log);
 	wake_up(&mdev->al_wait);
+	drbd_md_put_buffer(mdev);
 	drbd_resume_io(mdev);
 
 	return rv;
@@ -1618,8 +1672,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	    !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND))
 		set_bit(USE_DEGR_WFC_T, &mdev->flags);
 
-	dd = drbd_determine_dev_size(mdev, 0);
-	if (dd == DS_ERROR) {
+	dd = drbd_determine_dev_size(mdev, 0, NULL);
+	if (dd <= DS_ERROR) {
 		retcode = ERR_NOMEM_BITMAP;
 		goto force_diskless_dec;
 	} else if (dd == DS_GREW)
@@ -2316,6 +2370,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 	struct drbd_conf *mdev;
 	enum drbd_ret_code retcode;
 	enum determine_dev_size dd;
+	bool change_al_layout = false;
 	enum dds_flags ddsf;
 	sector_t u_size;
 	int err;
@@ -2326,31 +2381,33 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 	if (retcode != NO_ERROR)
 		goto fail;
 
+	mdev = adm_ctx.mdev;
+	if (!get_ldev(mdev)) {
+		retcode = ERR_NO_DISK;
+		goto fail;
+	}
+
 	memset(&rs, 0, sizeof(struct resize_parms));
+	rs.al_stripes = mdev->ldev->md.al_stripes;
+	rs.al_stripe_size = mdev->ldev->md.al_stripe_size_4k * 4;
 	if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
 		err = resize_parms_from_attrs(&rs, info);
 		if (err) {
 			retcode = ERR_MANDATORY_TAG;
 			drbd_msg_put_info(from_attrs_err_to_txt(err));
-			goto fail;
+			goto fail_ldev;
 		}
 	}
 
-	mdev = adm_ctx.mdev;
 	if (mdev->state.conn > C_CONNECTED) {
 		retcode = ERR_RESIZE_RESYNC;
-		goto fail;
+		goto fail_ldev;
 	}
 
 	if (mdev->state.role == R_SECONDARY &&
 	    mdev->state.peer == R_SECONDARY) {
 		retcode = ERR_NO_PRIMARY;
-		goto fail;
-	}
-
-	if (!get_ldev(mdev)) {
-		retcode = ERR_NO_DISK;
-		goto fail;
+		goto fail_ldev;
 	}
 
 	if (rs.no_resync && mdev->tconn->agreed_pro_version < 93) {
@@ -2369,6 +2426,28 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 		}
 	}
 
+	if (mdev->ldev->md.al_stripes != rs.al_stripes ||
+	    mdev->ldev->md.al_stripe_size_4k != rs.al_stripe_size / 4) {
+		u32 al_size_k = rs.al_stripes * rs.al_stripe_size;
+
+		if (al_size_k > (16 * 1024 * 1024)) {
+			retcode = ERR_MD_LAYOUT_TOO_BIG;
+			goto fail_ldev;
+		}
+
+		if (al_size_k < MD_32kB_SECT/2) {
+			retcode = ERR_MD_LAYOUT_TOO_SMALL;
+			goto fail_ldev;
+		}
+
+		if (mdev->state.conn != C_CONNECTED) {
+			retcode = ERR_MD_LAYOUT_CONNECTED;
+			goto fail_ldev;
+		}
+
+		change_al_layout = true;
+	}
+
 	if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev))
 		mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
 
@@ -2384,12 +2463,18 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
-	dd = drbd_determine_dev_size(mdev, ddsf);
+	dd = drbd_determine_dev_size(mdev, ddsf, change_al_layout ? &rs : NULL);
 	drbd_md_sync(mdev);
 	put_ldev(mdev);
 	if (dd == DS_ERROR) {
 		retcode = ERR_NOMEM_BITMAP;
 		goto fail;
+	} else if (dd == DS_ERROR_SPACE_MD) {
+		retcode = ERR_MD_LAYOUT_NO_FIT;
+		goto fail;
+	} else if (dd == DS_ERROR_SHRINK) {
+		retcode = ERR_IMPLICIT_SHRINK;
+		goto fail;
 	}
 
 	if (mdev->state.conn == C_CONNECTED) {
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 26852b87b034..cc29cd3bf78b 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -3617,7 +3617,7 @@ static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi)
 
 	ddsf = be16_to_cpu(p->dds_flags);
 	if (get_ldev(mdev)) {
-		dd = drbd_determine_dev_size(mdev, ddsf);
+		dd = drbd_determine_dev_size(mdev, ddsf, NULL);
 		put_ldev(mdev);
 		if (dd == DS_ERROR)
 			return -EIO;
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 1b4d4ee1168f..de7d74ab3de6 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -177,7 +177,11 @@ enum drbd_ret_code {
 	ERR_NEED_APV_100	= 163,
 	ERR_NEED_ALLOW_TWO_PRI  = 164,
 	ERR_MD_UNCLEAN          = 165,
-
+	ERR_MD_LAYOUT_CONNECTED = 166,
+	ERR_MD_LAYOUT_TOO_BIG   = 167,
+	ERR_MD_LAYOUT_TOO_SMALL = 168,
+	ERR_MD_LAYOUT_NO_FIT    = 169,
+	ERR_IMPLICIT_SHRINK     = 170,
 	/* insert new ones above this line */
 	AFTER_LAST_ERR_CODE
 };
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index d0d8fac8a6e4..e8c44572b8cb 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -181,6 +181,8 @@ GENL_struct(DRBD_NLA_RESIZE_PARMS, 7, resize_parms,
 	__u64_field(1, DRBD_GENLA_F_MANDATORY,	resize_size)
 	__flg_field(2, DRBD_GENLA_F_MANDATORY,	resize_force)
 	__flg_field(3, DRBD_GENLA_F_MANDATORY,	no_resync)
+	__u32_field_def(4, 0 /* OPTIONAL */, al_stripes, DRBD_AL_STRIPES_DEF)
+	__u32_field_def(5, 0 /* OPTIONAL */, al_stripe_size, DRBD_AL_STRIPE_SIZE_DEF)
 )
 
 GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info,
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 1fedf2b17cc8..17e50bb00521 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -215,4 +215,13 @@
 #define DRBD_ALWAYS_ASBP_DEF	0
 #define DRBD_USE_RLE_DEF	1
 
+#define DRBD_AL_STRIPES_MIN     1
+#define DRBD_AL_STRIPES_MAX     1024
+#define DRBD_AL_STRIPES_DEF     1
+#define DRBD_AL_STRIPES_SCALE   '1'
+
+#define DRBD_AL_STRIPE_SIZE_MIN   4
+#define DRBD_AL_STRIPE_SIZE_MAX   16777216
+#define DRBD_AL_STRIPE_SIZE_DEF   32
+#define DRBD_AL_STRIPE_SIZE_SCALE 'k' /* kilobytes */
 #endif

From e49c7c374e7aacd1f04ecbc21d9dbbeeea4a77d6 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <koverstreet@google.com>
Date: Wed, 26 Jun 2013 17:25:38 -0700
Subject: [PATCH 051/320] bcache: FUA fixes

Journal writes need to be marked FUA, not just REQ_FLUSH. And btree node
writes have... weird ordering requirements.

Signed-off-by: Kent Overstreet <koverstreet@google.com>
---
 drivers/md/bcache/btree.c   | 25 ++++++++++++++++++++++---
 drivers/md/bcache/journal.c |  2 +-
 drivers/md/bcache/request.c | 13 ++++++++++++-
 3 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 09fb8a2f43da..a6ad49ac5f2b 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -328,10 +328,25 @@ static void do_btree_node_write(struct btree *b)
 
 	b->bio->bi_end_io	= btree_node_write_endio;
 	b->bio->bi_private	= &b->io.cl;
-	b->bio->bi_rw	= REQ_META|WRITE_SYNC;
-	b->bio->bi_size	= set_blocks(i, b->c) * block_bytes(b->c);
+	b->bio->bi_rw		= REQ_META|WRITE_SYNC|REQ_FUA;
+	b->bio->bi_size		= set_blocks(i, b->c) * block_bytes(b->c);
 	bch_bio_map(b->bio, i);
 
+	/*
+	 * If we're appending to a leaf node, we don't technically need FUA -
+	 * this write just needs to be persisted before the next journal write,
+	 * which will be marked FLUSH|FUA.
+	 *
+	 * Similarly if we're writing a new btree root - the pointer is going to
+	 * be in the next journal entry.
+	 *
+	 * But if we're writing a new btree node (that isn't a root) or
+	 * appending to a non leaf btree node, we need either FUA or a flush
+	 * when we write the parent with the new pointer. FUA is cheaper than a
+	 * flush, and writes appending to leaf nodes aren't blocking anything so
+	 * just make all btree node writes FUA to keep things sane.
+	 */
+
 	bkey_copy(&k.key, &b->key);
 	SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + bset_offset(b, i));
 
@@ -2092,6 +2107,9 @@ int bch_btree_insert(struct btree_op *op, struct cache_set *c)
 void bch_btree_set_root(struct btree *b)
 {
 	unsigned i;
+	struct closure cl;
+
+	closure_init_stack(&cl);
 
 	trace_bcache_btree_set_root(b);
 
@@ -2107,7 +2125,8 @@ void bch_btree_set_root(struct btree *b)
 	b->c->root = b;
 	__bkey_put(b->c, &b->key);
 
-	bch_journal_meta(b->c, NULL);
+	bch_journal_meta(b->c, &cl);
+	closure_sync(&cl);
 }
 
 /* Cache lookup */
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 5ca22149b749..4b250667bb7f 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -620,7 +620,7 @@ static void journal_write_unlocked(struct closure *cl)
 		bio_reset(bio);
 		bio->bi_sector	= PTR_OFFSET(k, i);
 		bio->bi_bdev	= ca->bdev;
-		bio->bi_rw	= REQ_WRITE|REQ_SYNC|REQ_META|REQ_FLUSH;
+		bio->bi_rw	= REQ_WRITE|REQ_SYNC|REQ_META|REQ_FLUSH|REQ_FUA;
 		bio->bi_size	= sectors << 9;
 
 		bio->bi_end_io	= journal_write_endio;
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 17bd59704eba..bcdf1f782c3e 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -1035,8 +1035,19 @@ static void request_write(struct cached_dev *dc, struct search *s)
 
 		closure_bio_submit(bio, cl, s->d);
 	} else {
-		s->op.cache_bio = bio;
 		bch_writeback_add(dc);
+
+		if (s->op.flush_journal) {
+			/* Also need to send a flush to the backing device */
+			s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO,
+							   dc->disk.bio_split);
+
+			bio->bi_size = 0;
+			bio->bi_vcnt = 0;
+			closure_bio_submit(bio, cl, s->d);
+		} else {
+			s->op.cache_bio = bio;
+		}
 	}
 out:
 	closure_call(&s->op.cl, bch_insert_data, NULL, cl);

From 36c9ea9837c1cb21c778781495101eaff7e5eb56 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <koverstreet@google.com>
Date: Mon, 3 Jun 2013 13:04:56 -0700
Subject: [PATCH 052/320] bcache: Document shrinker reserve better

Signed-off-by: Kent Overstreet <kmo@daterainc.com>
---
 drivers/md/bcache/btree.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index a6ad49ac5f2b..04636a70ffaa 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -622,6 +622,13 @@ static int bch_mca_shrink(struct shrinker *shrink, struct shrink_control *sc)
 	else if (!mutex_trylock(&c->bucket_lock))
 		return -1;
 
+	/*
+	 * It's _really_ critical that we don't free too many btree nodes - we
+	 * have to always leave ourselves a reserve. The reserve is how we
+	 * guarantee that allocating memory for a new btree node can always
+	 * succeed, so that inserting keys into the btree can always succeed and
+	 * IO can always make forward progress:
+	 */
 	nr /= c->btree_pages;
 	nr = min_t(unsigned long, nr, mca_can_free(c));
 

From f3059a54610f6c516c0942d58b9435921768ce2d Mon Sep 17 00:00:00 2001
From: Kent Overstreet <koverstreet@google.com>
Date: Wed, 15 May 2013 17:13:45 -0700
Subject: [PATCH 053/320] bcache: Delete fuzz tester

This code has rotted and it hasn't been used in ages anyways.

Signed-off-by: Kent Overstreet <kmo@daterainc.com>
---
 drivers/md/bcache/btree.c |   4 +-
 drivers/md/bcache/btree.h |   2 -
 drivers/md/bcache/debug.c | 148 --------------------------------------
 3 files changed, 2 insertions(+), 152 deletions(-)

diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 04636a70ffaa..e0cca3673d0f 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -135,7 +135,7 @@ static uint64_t btree_csum_set(struct btree *b, struct bset *i)
 	return crc ^ 0xffffffffffffffffULL;
 }
 
-void bch_btree_node_read_done(struct btree *b)
+static void bch_btree_node_read_done(struct btree *b)
 {
 	const char *err = "bad btree header";
 	struct bset *i = b->sets[0].data;
@@ -1834,7 +1834,7 @@ copy:	bkey_copy(m, k);
 	return true;
 }
 
-bool bch_btree_insert_keys(struct btree *b, struct btree_op *op)
+static bool bch_btree_insert_keys(struct btree *b, struct btree_op *op)
 {
 	bool ret = false;
 	struct bkey *k;
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index f66d69a7baf1..3333d3723633 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -369,7 +369,6 @@ static inline bool should_split(struct btree *b)
 }
 
 void bch_btree_node_read(struct btree *);
-void bch_btree_node_read_done(struct btree *);
 void bch_btree_node_write(struct btree *, struct closure *);
 
 void bch_cannibalize_unlock(struct cache_set *, struct closure *);
@@ -378,7 +377,6 @@ struct btree *bch_btree_node_alloc(struct cache_set *, int, struct closure *);
 struct btree *bch_btree_node_get(struct cache_set *, struct bkey *,
 				int, struct btree_op *);
 
-bool bch_btree_insert_keys(struct btree *, struct btree_op *);
 bool bch_btree_insert_check_key(struct btree *, struct btree_op *,
 				   struct bio *);
 int bch_btree_insert(struct btree_op *, struct cache_set *);
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index 1c8fd319846e..ba2ceee0fdbb 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -412,149 +412,6 @@ void bch_debug_init_cache_set(struct cache_set *c)
 
 #endif
 
-/* Fuzz tester has rotted: */
-#if 0
-
-static ssize_t btree_fuzz(struct kobject *k, struct kobj_attribute *a,
-			  const char *buffer, size_t size)
-{
-	void dump(struct btree *b)
-	{
-		struct bset *i;
-
-		for (i = b->sets[0].data;
-		     index(i, b) < btree_blocks(b) &&
-		     i->seq == b->sets[0].data->seq;
-		     i = ((void *) i) + set_blocks(i, b->c) * block_bytes(b->c))
-			dump_bset(b, i);
-	}
-
-	struct cache_sb *sb;
-	struct cache_set *c;
-	struct btree *all[3], *b, *fill, *orig;
-	int j;
-
-	struct btree_op op;
-	bch_btree_op_init_stack(&op);
-
-	sb = kzalloc(sizeof(struct cache_sb), GFP_KERNEL);
-	if (!sb)
-		return -ENOMEM;
-
-	sb->bucket_size = 128;
-	sb->block_size = 4;
-
-	c = bch_cache_set_alloc(sb);
-	if (!c)
-		return -ENOMEM;
-
-	for (j = 0; j < 3; j++) {
-		BUG_ON(list_empty(&c->btree_cache));
-		all[j] = list_first_entry(&c->btree_cache, struct btree, list);
-		list_del_init(&all[j]->list);
-
-		all[j]->key = KEY(0, 0, c->sb.bucket_size);
-		bkey_copy_key(&all[j]->key, &MAX_KEY);
-	}
-
-	b = all[0];
-	fill = all[1];
-	orig = all[2];
-
-	while (1) {
-		for (j = 0; j < 3; j++)
-			all[j]->written = all[j]->nsets = 0;
-
-		bch_bset_init_next(b);
-
-		while (1) {
-			struct bset *i = write_block(b);
-			struct bkey *k = op.keys.top;
-			unsigned rand;
-
-			bkey_init(k);
-			rand = get_random_int();
-
-			op.type = rand & 1
-				? BTREE_INSERT
-				: BTREE_REPLACE;
-			rand >>= 1;
-
-			SET_KEY_SIZE(k, bucket_remainder(c, rand));
-			rand >>= c->bucket_bits;
-			rand &= 1024 * 512 - 1;
-			rand += c->sb.bucket_size;
-			SET_KEY_OFFSET(k, rand);
-#if 0
-			SET_KEY_PTRS(k, 1);
-#endif
-			bch_keylist_push(&op.keys);
-			bch_btree_insert_keys(b, &op);
-
-			if (should_split(b) ||
-			    set_blocks(i, b->c) !=
-			    __set_blocks(i, i->keys + 15, b->c)) {
-				i->csum = csum_set(i);
-
-				memcpy(write_block(fill),
-				       i, set_bytes(i));
-
-				b->written += set_blocks(i, b->c);
-				fill->written = b->written;
-				if (b->written == btree_blocks(b))
-					break;
-
-				bch_btree_sort_lazy(b);
-				bch_bset_init_next(b);
-			}
-		}
-
-		memcpy(orig->sets[0].data,
-		       fill->sets[0].data,
-		       btree_bytes(c));
-
-		bch_btree_sort(b);
-		fill->written = 0;
-		bch_btree_node_read_done(fill);
-
-		if (b->sets[0].data->keys != fill->sets[0].data->keys ||
-		    memcmp(b->sets[0].data->start,
-			   fill->sets[0].data->start,
-			   b->sets[0].data->keys * sizeof(uint64_t))) {
-			struct bset *i = b->sets[0].data;
-			struct bkey *k, *l;
-
-			for (k = i->start,
-			     l = fill->sets[0].data->start;
-			     k < end(i);
-			     k = bkey_next(k), l = bkey_next(l))
-				if (bkey_cmp(k, l) ||
-				    KEY_SIZE(k) != KEY_SIZE(l)) {
-					char buf1[80];
-					char buf2[80];
-
-					bch_bkey_to_text(buf1, sizeof(buf1), k);
-					bch_bkey_to_text(buf2, sizeof(buf2), l);
-
-					pr_err("key %zi differs: %s != %s",
-					       (uint64_t *) k - i->d,
-					       buf1, buf2);
-				}
-
-			for (j = 0; j < 3; j++) {
-				pr_err("**** Set %i ****", j);
-				dump(all[j]);
-			}
-			panic("\n");
-		}
-
-		pr_info("fuzz complete: %i keys", b->sets[0].data->keys);
-	}
-}
-
-kobj_attribute_write(fuzz, btree_fuzz);
-#endif
-
 void bch_debug_exit(void)
 {
 	if (!IS_ERR_OR_NULL(debug))
@@ -564,11 +421,6 @@ void bch_debug_exit(void)
 int __init bch_debug_init(struct kobject *kobj)
 {
 	int ret = 0;
-#if 0
-	ret = sysfs_create_file(kobj, &ksysfs_fuzz.attr);
-	if (ret)
-		return ret;
-#endif
 
 	debug = debugfs_create_dir("bcache", NULL);
 	return ret;

From 47cd2eb0ee05d9b1f8acd4808a1c829d63e93ac1 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kmo@daterainc.com>
Date: Mon, 1 Jul 2013 12:19:55 -0700
Subject: [PATCH 054/320] bcache: Update email address

Signed-off-by: Kent Overstreet <kmo@daterainc.com>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 5be702cc8449..58814f0f06bd 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1621,7 +1621,7 @@ S:	Maintained
 F:	drivers/net/hamradio/baycom*
 
 BCACHE (BLOCK LAYER CACHE)
-M:	Kent Overstreet <koverstreet@google.com>
+M:	Kent Overstreet <kmo@daterainc.com>
 L:	linux-bcache@vger.kernel.org
 W:	http://bcache.evilpiepirate.org
 S:	Maintained:

From 8e51e414a3c6d92ef2cc41720c67342a8e2c0bf7 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <koverstreet@google.com>
Date: Thu, 6 Jun 2013 18:15:57 -0700
Subject: [PATCH 055/320] bcache: Use standard utility code

Some of bcache's utility code has made it into the rest of the kernel,
so drop the bcache versions.

Bcache used to have a workaround for allocating from a bio set under
generic_make_request() (if you allocated more than once, the bios you
already allocated would get stuck on current->bio_list when you
submitted, and you'd risk deadlock) - bcache would mask out __GFP_WAIT
when allocating bios under generic_make_request() so that allocation
could fail and it could retry from workqueue. But bio_alloc_bioset() has
a workaround now, so we can drop this hack and the associated error
handling.

Signed-off-by: Kent Overstreet <koverstreet@google.com>
---
 drivers/md/bcache/btree.c     |  7 +--
 drivers/md/bcache/debug.c     |  2 +-
 drivers/md/bcache/io.c        | 64 +++++++++-----------------
 drivers/md/bcache/movinggc.c  |  7 +--
 drivers/md/bcache/request.c   | 87 ++++++++---------------------------
 drivers/md/bcache/util.c      | 17 -------
 drivers/md/bcache/util.h      |  4 --
 drivers/md/bcache/writeback.c |  7 +--
 8 files changed, 51 insertions(+), 144 deletions(-)

diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index e0cca3673d0f..15b58239c683 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -350,7 +350,7 @@ static void do_btree_node_write(struct btree *b)
 	bkey_copy(&k.key, &b->key);
 	SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + bset_offset(b, i));
 
-	if (!bch_bio_alloc_pages(b->bio, GFP_NOIO)) {
+	if (!bio_alloc_pages(b->bio, GFP_NOIO)) {
 		int j;
 		struct bio_vec *bv;
 		void *base = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1));
@@ -1865,7 +1865,7 @@ bool bch_btree_insert_check_key(struct btree *b, struct btree_op *op,
 	    should_split(b))
 		goto out;
 
-	op->replace = KEY(op->inode, bio_end(bio), bio_sectors(bio));
+	op->replace = KEY(op->inode, bio_end_sector(bio), bio_sectors(bio));
 
 	SET_KEY_PTRS(&op->replace, 1);
 	get_random_bytes(&op->replace.ptr[0], sizeof(uint64_t));
@@ -2194,9 +2194,6 @@ static int submit_partial_cache_hit(struct btree *b, struct btree_op *op,
 					 KEY_OFFSET(k) - bio->bi_sector);
 
 		n = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split);
-		if (!n)
-			return -EAGAIN;
-
 		if (n == bio)
 			op->lookup_done = true;
 
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index ba2ceee0fdbb..88e6411eab4f 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -199,7 +199,7 @@ void bch_data_verify(struct search *s)
 	if (!check)
 		return;
 
-	if (bch_bio_alloc_pages(check, GFP_NOIO))
+	if (bio_alloc_pages(check, GFP_NOIO))
 		goto out_put;
 
 	check->bi_rw		= READ_SYNC;
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index 0f6d69658b61..9056632995b1 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -68,13 +68,6 @@ static void bch_generic_make_request_hack(struct bio *bio)
  * The newly allocated bio will point to @bio's bi_io_vec, if the split was on a
  * bvec boundry; it is the caller's responsibility to ensure that @bio is not
  * freed before the split.
- *
- * If bch_bio_split() is running under generic_make_request(), it's not safe to
- * allocate more than one bio from the same bio set. Therefore, if it is running
- * under generic_make_request() it masks out __GFP_WAIT when doing the
- * allocation. The caller must check for failure if there's any possibility of
- * it being called from under generic_make_request(); it is then the caller's
- * responsibility to retry from a safe context (by e.g. punting to workqueue).
  */
 struct bio *bch_bio_split(struct bio *bio, int sectors,
 			  gfp_t gfp, struct bio_set *bs)
@@ -85,15 +78,6 @@ struct bio *bch_bio_split(struct bio *bio, int sectors,
 
 	BUG_ON(sectors <= 0);
 
-	/*
-	 * If we're being called from underneath generic_make_request() and we
-	 * already allocated any bios from this bio set, we risk deadlock if we
-	 * use the mempool. So instead, we possibly fail and let the caller punt
-	 * to workqueue or somesuch and retry in a safe context.
-	 */
-	if (current->bio_list)
-		gfp &= ~__GFP_WAIT;
-
 	if (sectors >= bio_sectors(bio))
 		return bio;
 
@@ -164,17 +148,18 @@ static unsigned bch_bio_max_sectors(struct bio *bio)
 	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 	unsigned max_segments = min_t(unsigned, BIO_MAX_PAGES,
 				      queue_max_segments(q));
-	struct bio_vec *bv, *end = bio_iovec(bio) +
-		min_t(int, bio_segments(bio), max_segments);
 
 	if (bio->bi_rw & REQ_DISCARD)
 		return min(ret, q->limits.max_discard_sectors);
 
 	if (bio_segments(bio) > max_segments ||
 	    q->merge_bvec_fn) {
+		struct bio_vec *bv;
+		int i, seg = 0;
+
 		ret = 0;
 
-		for (bv = bio_iovec(bio); bv < end; bv++) {
+		bio_for_each_segment(bv, bio, i) {
 			struct bvec_merge_data bvm = {
 				.bi_bdev	= bio->bi_bdev,
 				.bi_sector	= bio->bi_sector,
@@ -182,10 +167,14 @@ static unsigned bch_bio_max_sectors(struct bio *bio)
 				.bi_rw		= bio->bi_rw,
 			};
 
+			if (seg == max_segments)
+				break;
+
 			if (q->merge_bvec_fn &&
 			    q->merge_bvec_fn(q, &bvm, bv) < (int) bv->bv_len)
 				break;
 
+			seg++;
 			ret += bv->bv_len >> 9;
 		}
 	}
@@ -222,30 +211,10 @@ static void bch_bio_submit_split_endio(struct bio *bio, int error)
 	closure_put(cl);
 }
 
-static void __bch_bio_submit_split(struct closure *cl)
-{
-	struct bio_split_hook *s = container_of(cl, struct bio_split_hook, cl);
-	struct bio *bio = s->bio, *n;
-
-	do {
-		n = bch_bio_split(bio, bch_bio_max_sectors(bio),
-				  GFP_NOIO, s->p->bio_split);
-		if (!n)
-			continue_at(cl, __bch_bio_submit_split, system_wq);
-
-		n->bi_end_io	= bch_bio_submit_split_endio;
-		n->bi_private	= cl;
-
-		closure_get(cl);
-		bch_generic_make_request_hack(n);
-	} while (n != bio);
-
-	continue_at(cl, bch_bio_submit_split_done, NULL);
-}
-
 void bch_generic_make_request(struct bio *bio, struct bio_split_pool *p)
 {
 	struct bio_split_hook *s;
+	struct bio *n;
 
 	if (!bio_has_data(bio) && !(bio->bi_rw & REQ_DISCARD))
 		goto submit;
@@ -254,6 +223,7 @@ void bch_generic_make_request(struct bio *bio, struct bio_split_pool *p)
 		goto submit;
 
 	s = mempool_alloc(p->bio_split_hook, GFP_NOIO);
+	closure_init(&s->cl, NULL);
 
 	s->bio		= bio;
 	s->p		= p;
@@ -261,8 +231,18 @@ void bch_generic_make_request(struct bio *bio, struct bio_split_pool *p)
 	s->bi_private	= bio->bi_private;
 	bio_get(bio);
 
-	closure_call(&s->cl, __bch_bio_submit_split, NULL, NULL);
-	return;
+	do {
+		n = bch_bio_split(bio, bch_bio_max_sectors(bio),
+				  GFP_NOIO, s->p->bio_split);
+
+		n->bi_end_io	= bch_bio_submit_split_endio;
+		n->bi_private	= &s->cl;
+
+		closure_get(&s->cl);
+		bch_generic_make_request_hack(n);
+	} while (n != bio);
+
+	continue_at(&s->cl, bch_bio_submit_split_done, NULL);
 submit:
 	bch_generic_make_request_hack(bio);
 }
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index a241e9fd4f7f..1a3b4f4786c3 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -46,9 +46,10 @@ static void write_moving_finish(struct closure *cl)
 {
 	struct moving_io *io = container_of(cl, struct moving_io, s.cl);
 	struct bio *bio = &io->bio.bio;
-	struct bio_vec *bv = bio_iovec_idx(bio, bio->bi_vcnt);
+	struct bio_vec *bv;
+	int i;
 
-	while (bv-- != bio->bi_io_vec)
+	bio_for_each_segment_all(bv, bio, i)
 		__free_page(bv->bv_page);
 
 	if (io->s.op.insert_collision)
@@ -158,7 +159,7 @@ static void read_moving(struct closure *cl)
 		bio->bi_rw	= READ;
 		bio->bi_end_io	= read_moving_endio;
 
-		if (bch_bio_alloc_pages(bio, GFP_KERNEL))
+		if (bio_alloc_pages(bio, GFP_KERNEL))
 			goto err;
 
 		trace_bcache_gc_copy(&w->key);
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index bcdf1f782c3e..b6e74d3c8faf 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -509,10 +509,6 @@ static void bch_insert_data_loop(struct closure *cl)
 			goto err;
 
 		n = bch_bio_split(bio, KEY_SIZE(k), GFP_NOIO, split);
-		if (!n) {
-			__bkey_put(op->c, k);
-			continue_at(cl, bch_insert_data_loop, bcache_wq);
-		}
 
 		n->bi_end_io	= bch_insert_data_endio;
 		n->bi_private	= cl;
@@ -821,53 +817,13 @@ static void request_read_done(struct closure *cl)
 	 */
 
 	if (s->op.cache_bio) {
-		struct bio_vec *src, *dst;
-		unsigned src_offset, dst_offset, bytes;
-		void *dst_ptr;
-
 		bio_reset(s->op.cache_bio);
 		s->op.cache_bio->bi_sector	= s->cache_miss->bi_sector;
 		s->op.cache_bio->bi_bdev	= s->cache_miss->bi_bdev;
 		s->op.cache_bio->bi_size	= s->cache_bio_sectors << 9;
 		bch_bio_map(s->op.cache_bio, NULL);
 
-		src = bio_iovec(s->op.cache_bio);
-		dst = bio_iovec(s->cache_miss);
-		src_offset = src->bv_offset;
-		dst_offset = dst->bv_offset;
-		dst_ptr = kmap(dst->bv_page);
-
-		while (1) {
-			if (dst_offset == dst->bv_offset + dst->bv_len) {
-				kunmap(dst->bv_page);
-				dst++;
-				if (dst == bio_iovec_idx(s->cache_miss,
-						s->cache_miss->bi_vcnt))
-					break;
-
-				dst_offset = dst->bv_offset;
-				dst_ptr = kmap(dst->bv_page);
-			}
-
-			if (src_offset == src->bv_offset + src->bv_len) {
-				src++;
-				if (src == bio_iovec_idx(s->op.cache_bio,
-						 s->op.cache_bio->bi_vcnt))
-					BUG();
-
-				src_offset = src->bv_offset;
-			}
-
-			bytes = min(dst->bv_offset + dst->bv_len - dst_offset,
-				    src->bv_offset + src->bv_len - src_offset);
-
-			memcpy(dst_ptr + dst_offset,
-			       page_address(src->bv_page) + src_offset,
-			       bytes);
-
-			src_offset	+= bytes;
-			dst_offset	+= bytes;
-		}
+		bio_copy_data(s->cache_miss, s->op.cache_bio);
 
 		bio_put(s->cache_miss);
 		s->cache_miss = NULL;
@@ -912,9 +868,6 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
 	struct bio *miss;
 
 	miss = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split);
-	if (!miss)
-		return -EAGAIN;
-
 	if (miss == bio)
 		s->op.lookup_done = true;
 
@@ -933,8 +886,9 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
 		reada = min(dc->readahead >> 9,
 			    sectors - bio_sectors(miss));
 
-		if (bio_end(miss) + reada > bdev_sectors(miss->bi_bdev))
-			reada = bdev_sectors(miss->bi_bdev) - bio_end(miss);
+		if (bio_end_sector(miss) + reada > bdev_sectors(miss->bi_bdev))
+			reada = bdev_sectors(miss->bi_bdev) -
+				bio_end_sector(miss);
 	}
 
 	s->cache_bio_sectors = bio_sectors(miss) + reada;
@@ -958,7 +912,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
 		goto out_put;
 
 	bch_bio_map(s->op.cache_bio, NULL);
-	if (bch_bio_alloc_pages(s->op.cache_bio, __GFP_NOWARN|GFP_NOIO))
+	if (bio_alloc_pages(s->op.cache_bio, __GFP_NOWARN|GFP_NOIO))
 		goto out_put;
 
 	s->cache_miss = miss;
@@ -1002,7 +956,7 @@ static void request_write(struct cached_dev *dc, struct search *s)
 	struct bio *bio = &s->bio.bio;
 	struct bkey start, end;
 	start = KEY(dc->disk.id, bio->bi_sector, 0);
-	end = KEY(dc->disk.id, bio_end(bio), 0);
+	end = KEY(dc->disk.id, bio_end_sector(bio), 0);
 
 	bch_keybuf_check_overlapping(&s->op.c->moving_gc_keys, &start, &end);
 
@@ -1176,7 +1130,7 @@ static void check_should_skip(struct cached_dev *dc, struct search *s)
 		if (i->sequential + bio->bi_size > i->sequential)
 			i->sequential	+= bio->bi_size;
 
-		i->last			 = bio_end(bio);
+		i->last			 = bio_end_sector(bio);
 		i->jiffies		 = jiffies + msecs_to_jiffies(5000);
 		s->task->sequential_io	 = i->sequential;
 
@@ -1294,30 +1248,25 @@ void bch_cached_dev_request_init(struct cached_dev *dc)
 static int flash_dev_cache_miss(struct btree *b, struct search *s,
 				struct bio *bio, unsigned sectors)
 {
+	struct bio_vec *bv;
+	int i;
+
 	/* Zero fill bio */
 
-	while (bio->bi_idx != bio->bi_vcnt) {
-		struct bio_vec *bv = bio_iovec(bio);
+	bio_for_each_segment(bv, bio, i) {
 		unsigned j = min(bv->bv_len >> 9, sectors);
 
 		void *p = kmap(bv->bv_page);
 		memset(p + bv->bv_offset, 0, j << 9);
 		kunmap(bv->bv_page);
 
-		bv->bv_len	-= j << 9;
-		bv->bv_offset	+= j << 9;
-
-		if (bv->bv_len)
-			return 0;
-
-		bio->bi_sector	+= j;
-		bio->bi_size	-= j << 9;
-
-		bio->bi_idx++;
-		sectors		-= j;
+		sectors	-= j;
 	}
 
-	s->op.lookup_done = true;
+	bio_advance(bio, min(sectors << 9, bio->bi_size));
+
+	if (!bio->bi_size)
+		s->op.lookup_done = true;
 
 	return 0;
 }
@@ -1344,8 +1293,8 @@ static void flash_dev_make_request(struct request_queue *q, struct bio *bio)
 		closure_call(&s->op.cl, btree_read_async, NULL, cl);
 	} else if (bio_has_data(bio) || s->op.skip) {
 		bch_keybuf_check_overlapping(&s->op.c->moving_gc_keys,
-					     &KEY(d->id, bio->bi_sector, 0),
-					     &KEY(d->id, bio_end(bio), 0));
+					&KEY(d->id, bio->bi_sector, 0),
+					&KEY(d->id, bio_end_sector(bio), 0));
 
 		s->writeback	= true;
 		s->op.cache_bio	= bio;
diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c
index da3a99e85b1e..98eb81159a22 100644
--- a/drivers/md/bcache/util.c
+++ b/drivers/md/bcache/util.c
@@ -228,23 +228,6 @@ start:		bv->bv_len	= min_t(size_t, PAGE_SIZE - bv->bv_offset,
 	}
 }
 
-int bch_bio_alloc_pages(struct bio *bio, gfp_t gfp)
-{
-	int i;
-	struct bio_vec *bv;
-
-	bio_for_each_segment(bv, bio, i) {
-		bv->bv_page = alloc_page(gfp);
-		if (!bv->bv_page) {
-			while (bv-- != bio->bi_io_vec + bio->bi_idx)
-				__free_page(bv->bv_page);
-			return -ENOMEM;
-		}
-	}
-
-	return 0;
-}
-
 /*
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group (Any
  * use permitted, subject to terms of PostgreSQL license; see.)
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index e02780545f12..1ae2a73ad85f 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -564,12 +564,8 @@ static inline unsigned fract_exp_two(unsigned x, unsigned fract_bits)
 	return x;
 }
 
-#define bio_end(bio)	((bio)->bi_sector + bio_sectors(bio))
-
 void bch_bio_map(struct bio *bio, void *base);
 
-int bch_bio_alloc_pages(struct bio *bio, gfp_t gfp);
-
 static inline sector_t bdev_sectors(struct block_device *bdev)
 {
 	return bdev->bd_inode->i_size >> 9;
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index d81ee5ccc726..22cbff551628 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -285,9 +285,10 @@ static void write_dirty_finish(struct closure *cl)
 	struct dirty_io *io = container_of(cl, struct dirty_io, cl);
 	struct keybuf_key *w = io->bio.bi_private;
 	struct cached_dev *dc = io->dc;
-	struct bio_vec *bv = bio_iovec_idx(&io->bio, io->bio.bi_vcnt);
+	struct bio_vec *bv;
+	int i;
 
-	while (bv-- != io->bio.bi_io_vec)
+	bio_for_each_segment_all(bv, &io->bio, i)
 		__free_page(bv->bv_page);
 
 	/* This is kind of a dumb way of signalling errors. */
@@ -418,7 +419,7 @@ static void read_dirty(struct closure *cl)
 		io->bio.bi_rw		= READ;
 		io->bio.bi_end_io	= read_dirty_endio;
 
-		if (bch_bio_alloc_pages(&io->bio, GFP_KERNEL))
+		if (bio_alloc_pages(&io->bio, GFP_KERNEL))
 			goto err_free;
 
 		trace_bcache_writeback(&w->key);

From add0c59d802e6118e51e21244c3871be35164e4b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 9 Jul 2013 16:17:39 -0700
Subject: [PATCH 056/320] cgroup: remove bcache_subsys_id which got added
 stealthily

cafe563591 ("bcache: A block layer cache") added a new cgroup
subsystem bcache_subsys without proper review and ack.  bcache_subsys
seems to use cgroup for group stats and per-group cache_mode
configuration.  This is very much the type of usage that we don't want
to allow.

Fortunately, CONFIG_CGROUP_BCACHE which enables bcache_subsys is
currently commented out, so this shouldn't have any upstream users.
Let's nip in the bud.  While at it, clarify in cgroup_subsys.h that no
new subsystem should be added without explicit acks from cgroup
maintainers.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Li Zefan <lizefan@huawei.com>
Cc: cgroups@vger.kernel.org
Cc: Kent Overstreet <kent.overstreet@gmail.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: linux-bcache@vger.kernel.org
---
 include/linux/cgroup_subsys.h | 45 ++++++-----------------------------
 1 file changed, 7 insertions(+), 38 deletions(-)

diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 6e7ec64b69ab..b613ffd402d1 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -1,86 +1,55 @@
-/* Add subsystem definitions of the form SUBSYS(<name>) in this
- * file. Surround each one by a line of comment markers so that
- * patches don't collide
+/*
+ * List of cgroup subsystems.
+ *
+ * DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP MAINTAINERS.
  */
-
-/* */
-
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_CPUSETS)
 SUBSYS(cpuset)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_DEBUG)
 SUBSYS(debug)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_SCHED)
 SUBSYS(cpu_cgroup)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_CPUACCT)
 SUBSYS(cpuacct)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_MEMCG)
 SUBSYS(mem_cgroup)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_DEVICE)
 SUBSYS(devices)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_FREEZER)
 SUBSYS(freezer)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_NET_CLS_CGROUP)
 SUBSYS(net_cls)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_BLK_CGROUP)
 SUBSYS(blkio)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_PERF)
 SUBSYS(perf)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_NETPRIO_CGROUP)
 SUBSYS(net_prio)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_HUGETLB)
 SUBSYS(hugetlb)
 #endif
-
-/* */
-
-#ifdef CONFIG_CGROUP_BCACHE
-SUBSYS(bcache)
-#endif
-
-/* */
+/*
+ * DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP MAINTAINERS.
+ */

From 060810d7abaabcab282e062c595871d661561400 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Mon, 8 Jul 2013 14:15:51 +1000
Subject: [PATCH 057/320] drm/nouveau: fix locking issues in page flipping
 paths

b580c9e2b7ba5030a795aa2fb73b796523d65a78 introduced additional problems
while trying to solve issues that became apparent while porting to the
new reservation stuff.

The major problem was that the the previously mentioned patch took the
client mutex earlier than previously, but the pinning of new_bo can
can potentially cause a buffer move, which would result in attempting to
acquire the same mutex again.

This commit attempts to fix that "fix".

Thanks to Maarten for the tips on keeping lockdep happy and cooking :)

Reported-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Acked-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
---
 drivers/gpu/drm/nouveau/nouveau_bo.c      |  2 +-
 drivers/gpu/drm/nouveau/nouveau_display.c | 53 +++++++++--------------
 drivers/gpu/drm/nouveau/nouveau_drm.c     |  3 --
 3 files changed, 22 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 4b1afb131380..85fed108d7e4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -973,7 +973,7 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
 	struct ttm_mem_reg *old_mem = &bo->mem;
 	int ret;
 
-	mutex_lock(&chan->cli->mutex);
+	mutex_lock_nested(&chan->cli->mutex, SINGLE_DEPTH_NESTING);
 
 	/* create temporary vmas for the transfer and attach them to the
 	 * old nouveau_mem node, these will get cleaned up after ttm has
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index 708b2d1c0037..61fdef8eac49 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -524,9 +524,12 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 	struct nouveau_page_flip_state *s;
 	struct nouveau_channel *chan = NULL;
 	struct nouveau_fence *fence;
-	struct list_head res;
-	struct ttm_validate_buffer res_val[2];
+	struct ttm_validate_buffer resv[2] = {
+		{ .bo = &old_bo->bo },
+		{ .bo = &new_bo->bo },
+	};
 	struct ww_acquire_ctx ticket;
+	LIST_HEAD(res);
 	int ret;
 
 	if (!drm->channel)
@@ -545,27 +548,19 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 		chan = drm->channel;
 	spin_unlock(&old_bo->bo.bdev->fence_lock);
 
-	mutex_lock(&chan->cli->mutex);
-
 	if (new_bo != old_bo) {
 		ret = nouveau_bo_pin(new_bo, TTM_PL_FLAG_VRAM);
-		if (likely(!ret)) {
-			res_val[0].bo = &old_bo->bo;
-			res_val[1].bo = &new_bo->bo;
-			INIT_LIST_HEAD(&res);
-			list_add_tail(&res_val[0].head, &res);
-			list_add_tail(&res_val[1].head, &res);
-			ret = ttm_eu_reserve_buffers(&ticket, &res);
-			if (ret)
-				nouveau_bo_unpin(new_bo);
-		}
-	} else
-		ret = ttm_bo_reserve(&new_bo->bo, false, false, false, 0);
+		if (ret)
+			goto fail_free;
 
-	if (ret) {
-		mutex_unlock(&chan->cli->mutex);
-		goto fail_free;
+		list_add(&resv[1].head, &res);
 	}
+	list_add(&resv[0].head, &res);
+
+	mutex_lock(&chan->cli->mutex);
+	ret = ttm_eu_reserve_buffers(&ticket, &res);
+	if (ret)
+		goto fail_unpin;
 
 	/* Initialize a page flip struct */
 	*s = (struct nouveau_page_flip_state)
@@ -576,10 +571,8 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 	/* Emit a page flip */
 	if (nv_device(drm->device)->card_type >= NV_50) {
 		ret = nv50_display_flip_next(crtc, fb, chan, 0);
-		if (ret) {
-			mutex_unlock(&chan->cli->mutex);
+		if (ret)
 			goto fail_unreserve;
-		}
 	}
 
 	ret = nouveau_page_flip_emit(chan, old_bo, new_bo, s, &fence);
@@ -590,22 +583,18 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 	/* Update the crtc struct and cleanup */
 	crtc->fb = fb;
 
-	if (old_bo != new_bo) {
-		ttm_eu_fence_buffer_objects(&ticket, &res, fence);
+	ttm_eu_fence_buffer_objects(&ticket, &res, fence);
+	if (old_bo != new_bo)
 		nouveau_bo_unpin(old_bo);
-	} else {
-		nouveau_bo_fence(new_bo, fence);
-		ttm_bo_unreserve(&new_bo->bo);
-	}
 	nouveau_fence_unref(&fence);
 	return 0;
 
 fail_unreserve:
-	if (old_bo != new_bo) {
-		ttm_eu_backoff_reservation(&ticket, &res);
+	ttm_eu_backoff_reservation(&ticket, &res);
+fail_unpin:
+	mutex_unlock(&chan->cli->mutex);
+	if (old_bo != new_bo)
 		nouveau_bo_unpin(new_bo);
-	} else
-		ttm_bo_unreserve(&new_bo->bo);
 fail_free:
 	kfree(s);
 	return ret;
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 218a4b522fe5..4eca52b8d573 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -284,8 +284,6 @@ static int nouveau_drm_probe(struct pci_dev *pdev,
 	return 0;
 }
 
-static struct lock_class_key drm_client_lock_class_key;
-
 static int
 nouveau_drm_load(struct drm_device *dev, unsigned long flags)
 {
@@ -297,7 +295,6 @@ nouveau_drm_load(struct drm_device *dev, unsigned long flags)
 	ret = nouveau_cli_create(pdev, "DRM", sizeof(*drm), (void**)&drm);
 	if (ret)
 		return ret;
-	lockdep_set_class(&drm->client.mutex, &drm_client_lock_class_key);
 
 	dev->dev_private = drm;
 	drm->dev = dev;

From fdfb8332651db7a280851dfccfc4f0cff4bcd052 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Date: Mon, 8 Jul 2013 14:50:54 +1000
Subject: [PATCH 058/320] drm/nouveau: fix some error-path leaks in fbcon
 handling code

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_display.c | 16 ++++++++++------
 drivers/gpu/drm/nouveau/nouveau_fbcon.c   |  1 +
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index 61fdef8eac49..907d20ef6d4d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -138,7 +138,7 @@ nouveau_user_framebuffer_create(struct drm_device *dev,
 {
 	struct nouveau_framebuffer *nouveau_fb;
 	struct drm_gem_object *gem;
-	int ret;
+	int ret = -ENOMEM;
 
 	gem = drm_gem_object_lookup(dev, file_priv, mode_cmd->handles[0]);
 	if (!gem)
@@ -146,15 +146,19 @@ nouveau_user_framebuffer_create(struct drm_device *dev,
 
 	nouveau_fb = kzalloc(sizeof(struct nouveau_framebuffer), GFP_KERNEL);
 	if (!nouveau_fb)
-		return ERR_PTR(-ENOMEM);
+		goto err_unref;
 
 	ret = nouveau_framebuffer_init(dev, nouveau_fb, mode_cmd, nouveau_gem_object(gem));
-	if (ret) {
-		drm_gem_object_unreference(gem);
-		return ERR_PTR(ret);
-	}
+	if (ret)
+		goto err;
 
 	return &nouveau_fb->base;
+
+err:
+	kfree(nouveau_fb);
+err_unref:
+	drm_gem_object_unreference(gem);
+	return ERR_PTR(ret);
 }
 
 static const struct drm_mode_config_funcs nouveau_mode_config_funcs = {
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index 9352010030e9..4c1bc061fae2 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -385,6 +385,7 @@ nouveau_fbcon_create(struct drm_fb_helper *helper,
 	mutex_unlock(&dev->struct_mutex);
 	if (chan)
 		nouveau_bo_vma_del(nvbo, &fbcon->nouveau_fb.vma);
+	nouveau_bo_unmap(nvbo);
 out_unpin:
 	nouveau_bo_unpin(nvbo);
 out_unref:

From 8dda53fca24789acf891ef31a3c5e03332b43cce Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Tue, 9 Jul 2013 12:35:55 +1000
Subject: [PATCH 059/320] drm/nv50/kms: fix pin refcnt leaks

Weren't critical previously, the buffers would go away anyway.  But with
recent changes to core drm/ttm lockdep will get pissed off now, so let's
fix it.

Reported-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Acked-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
---
 drivers/gpu/drm/nouveau/nv50_display.c | 46 ++++++++++++++++++++------
 1 file changed, 35 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index 54dc6355b0c2..8b40a36c1b57 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -355,6 +355,7 @@ struct nv50_oimm {
 
 struct nv50_head {
 	struct nouveau_crtc base;
+	struct nouveau_bo *image;
 	struct nv50_curs curs;
 	struct nv50_sync sync;
 	struct nv50_ovly ovly;
@@ -517,9 +518,10 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 {
 	struct nouveau_framebuffer *nv_fb = nouveau_framebuffer(fb);
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+	struct nv50_head *head = nv50_head(crtc);
 	struct nv50_sync *sync = nv50_sync(crtc);
-	int head = nv_crtc->index, ret;
 	u32 *push;
+	int ret;
 
 	swap_interval <<= 4;
 	if (swap_interval == 0)
@@ -537,7 +539,7 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 			return ret;
 
 		BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 2);
-		OUT_RING  (chan, NvEvoSema0 + head);
+		OUT_RING  (chan, NvEvoSema0 + nv_crtc->index);
 		OUT_RING  (chan, sync->addr ^ 0x10);
 		BEGIN_NV04(chan, 0, NV11_SUBCHAN_SEMAPHORE_RELEASE, 1);
 		OUT_RING  (chan, sync->data + 1);
@@ -546,7 +548,7 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 		OUT_RING  (chan, sync->data);
 	} else
 	if (chan && nv_mclass(chan->object) < NVC0_CHANNEL_IND_CLASS) {
-		u64 addr = nv84_fence_crtc(chan, head) + sync->addr;
+		u64 addr = nv84_fence_crtc(chan, nv_crtc->index) + sync->addr;
 		ret = RING_SPACE(chan, 12);
 		if (ret)
 			return ret;
@@ -565,7 +567,7 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 		OUT_RING  (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
 	} else
 	if (chan) {
-		u64 addr = nv84_fence_crtc(chan, head) + sync->addr;
+		u64 addr = nv84_fence_crtc(chan, nv_crtc->index) + sync->addr;
 		ret = RING_SPACE(chan, 10);
 		if (ret)
 			return ret;
@@ -630,6 +632,8 @@ nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 	evo_mthd(push, 0x0080, 1);
 	evo_data(push, 0x00000000);
 	evo_kick(push, sync);
+
+	nouveau_bo_ref(nv_fb->nvbo, &head->image);
 	return 0;
 }
 
@@ -1038,18 +1042,17 @@ static int
 nv50_crtc_swap_fbs(struct drm_crtc *crtc, struct drm_framebuffer *old_fb)
 {
 	struct nouveau_framebuffer *nvfb = nouveau_framebuffer(crtc->fb);
+	struct nv50_head *head = nv50_head(crtc);
 	int ret;
 
 	ret = nouveau_bo_pin(nvfb->nvbo, TTM_PL_FLAG_VRAM);
-	if (ret)
-		return ret;
-
-	if (old_fb) {
-		nvfb = nouveau_framebuffer(old_fb);
-		nouveau_bo_unpin(nvfb->nvbo);
+	if (ret == 0) {
+		if (head->image)
+			nouveau_bo_unpin(head->image);
+		nouveau_bo_ref(nvfb->nvbo, &head->image);
 	}
 
-	return 0;
+	return ret;
 }
 
 static int
@@ -1198,6 +1201,15 @@ nv50_crtc_lut_load(struct drm_crtc *crtc)
 	}
 }
 
+static void
+nv50_crtc_disable(struct drm_crtc *crtc)
+{
+	struct nv50_head *head = nv50_head(crtc);
+	if (head->image)
+		nouveau_bo_unpin(head->image);
+	nouveau_bo_ref(NULL, &head->image);
+}
+
 static int
 nv50_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
 		     uint32_t handle, uint32_t width, uint32_t height)
@@ -1271,18 +1283,29 @@ nv50_crtc_destroy(struct drm_crtc *crtc)
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
 	struct nv50_disp *disp = nv50_disp(crtc->dev);
 	struct nv50_head *head = nv50_head(crtc);
+
 	nv50_dmac_destroy(disp->core, &head->ovly.base);
 	nv50_pioc_destroy(disp->core, &head->oimm.base);
 	nv50_dmac_destroy(disp->core, &head->sync.base);
 	nv50_pioc_destroy(disp->core, &head->curs.base);
+
+	/*XXX: this shouldn't be necessary, but the core doesn't call
+	 *     disconnect() during the cleanup paths
+	 */
+	if (head->image)
+		nouveau_bo_unpin(head->image);
+	nouveau_bo_ref(NULL, &head->image);
+
 	nouveau_bo_unmap(nv_crtc->cursor.nvbo);
 	if (nv_crtc->cursor.nvbo)
 		nouveau_bo_unpin(nv_crtc->cursor.nvbo);
 	nouveau_bo_ref(NULL, &nv_crtc->cursor.nvbo);
+
 	nouveau_bo_unmap(nv_crtc->lut.nvbo);
 	if (nv_crtc->lut.nvbo)
 		nouveau_bo_unpin(nv_crtc->lut.nvbo);
 	nouveau_bo_ref(NULL, &nv_crtc->lut.nvbo);
+
 	drm_crtc_cleanup(crtc);
 	kfree(crtc);
 }
@@ -1296,6 +1319,7 @@ static const struct drm_crtc_helper_funcs nv50_crtc_hfunc = {
 	.mode_set_base = nv50_crtc_mode_set_base,
 	.mode_set_base_atomic = nv50_crtc_mode_set_base_atomic,
 	.load_lut = nv50_crtc_lut_load,
+	.disable = nv50_crtc_disable,
 };
 
 static const struct drm_crtc_funcs nv50_crtc_func = {

From 4f3855997c5d2bf6443853800eb1a03f61ad9140 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Date: Sun, 7 Jul 2013 10:37:35 +0200
Subject: [PATCH 060/320] drm/nouveau: do not unpin in nouveau_gem_object_del

This should no longer be required, and is harmful for framebuffer pinning.
Also add a warning if unpin causes the pin count to drop below 0.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_bo.c  | 7 +++++--
 drivers/gpu/drm/nouveau/nouveau_gem.c | 6 ------
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 85fed108d7e4..824a98811de6 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -148,6 +148,7 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo)
 
 	if (unlikely(nvbo->gem))
 		DRM_ERROR("bo %p still attached to GEM object\n", bo);
+	WARN_ON(nvbo->pin_refcnt > 0);
 	nv10_bo_put_tile_region(dev, nvbo->tile, NULL);
 	kfree(nvbo);
 }
@@ -340,13 +341,15 @@ nouveau_bo_unpin(struct nouveau_bo *nvbo)
 {
 	struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
 	struct ttm_buffer_object *bo = &nvbo->bo;
-	int ret;
+	int ret, ref;
 
 	ret = ttm_bo_reserve(bo, false, false, false, 0);
 	if (ret)
 		return ret;
 
-	if (--nvbo->pin_refcnt)
+	ref = --nvbo->pin_refcnt;
+	WARN_ON_ONCE(ref < 0);
+	if (ref)
 		goto out;
 
 	nouveau_bo_placement_set(nvbo, bo->mem.placement, 0);
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index e72d09c068a8..830cb7bad922 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -50,12 +50,6 @@ nouveau_gem_object_del(struct drm_gem_object *gem)
 		return;
 	nvbo->gem = NULL;
 
-	/* Lockdep hates you for doing reserve with gem object lock held */
-	if (WARN_ON_ONCE(nvbo->pin_refcnt)) {
-		nvbo->pin_refcnt = 1;
-		nouveau_bo_unpin(nvbo);
-	}
-
 	if (gem->import_attach)
 		drm_prime_gem_destroy(gem, nvbo->bo.sg);
 

From 06b237ef3903a0a5e33785105ea5203153323dd8 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Date: Sun, 7 Jul 2013 10:53:37 +0200
Subject: [PATCH 061/320] drm/nouveau: bump fence timeout to 15 seconds

calim didn't like 150 seconds timeout, so lower the timeout for him.
15 seconds should still be plenty.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_fence.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index 1680d9187bab..be3149932c2d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -143,7 +143,7 @@ nouveau_fence_emit(struct nouveau_fence *fence, struct nouveau_channel *chan)
 	int ret;
 
 	fence->channel  = chan;
-	fence->timeout  = jiffies + (3 * DRM_HZ);
+	fence->timeout  = jiffies + (15 * DRM_HZ);
 	fence->sequence = ++fctx->sequence;
 
 	ret = fctx->emit(fence);

From 00fc6f6f731efb7b76b839598e494b01890d901d Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Tue, 9 Jul 2013 14:20:15 +1000
Subject: [PATCH 062/320] drm/nouveau: use dedicated channel for async moves on
 GT/GF chipsets.

The moves themselves were generally async to graphics previously, with
the exception that if the "main" channel is used to synchronise a
page flip at the same time, it can end up blocked for a noticable amount
of time for large buffer moves.

Not really critical, and there's better ways of handling this, but they
are all rather invasive, so this is fine for now.

Based on a patch by Maarten Lankhorst addressing the same issue.

Reported-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Acked-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
---
 drivers/gpu/drm/nouveau/nouveau_bo.c  |  6 +++---
 drivers/gpu/drm/nouveau/nouveau_drm.c | 12 ++++++++++++
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 824a98811de6..459a44550ce5 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -581,7 +581,7 @@ nve0_bo_move_init(struct nouveau_channel *chan, u32 handle)
 	int ret = RING_SPACE(chan, 2);
 	if (ret == 0) {
 		BEGIN_NVC0(chan, NvSubCopy, 0x0000, 1);
-		OUT_RING  (chan, handle);
+		OUT_RING  (chan, handle & 0x0000ffff);
 		FIRE_RING (chan);
 	}
 	return ret;
@@ -1017,7 +1017,7 @@ nouveau_bo_move_init(struct nouveau_drm *drm)
 			    struct ttm_mem_reg *, struct ttm_mem_reg *);
 		int (*init)(struct nouveau_channel *, u32 handle);
 	} _methods[] = {
-		{  "COPY", 0, 0xa0b5, nve0_bo_move_copy, nve0_bo_move_init },
+		{  "COPY", 4, 0xa0b5, nve0_bo_move_copy, nve0_bo_move_init },
 		{  "GRCE", 0, 0xa0b5, nve0_bo_move_copy, nvc0_bo_move_init },
 		{ "COPY1", 5, 0x90b8, nvc0_bo_move_copy, nvc0_bo_move_init },
 		{ "COPY0", 4, 0x90b5, nvc0_bo_move_copy, nvc0_bo_move_init },
@@ -1037,7 +1037,7 @@ nouveau_bo_move_init(struct nouveau_drm *drm)
 		struct nouveau_channel *chan;
 		u32 handle = (mthd->engine << 16) | mthd->oclass;
 
-		if (mthd->init == nve0_bo_move_init)
+		if (mthd->engine)
 			chan = drm->cechan;
 		else
 			chan = drm->channel;
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 4eca52b8d573..61972668fd05 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -192,6 +192,18 @@ nouveau_accel_init(struct nouveau_drm *drm)
 
 		arg0 = NVE0_CHANNEL_IND_ENGINE_GR;
 		arg1 = 1;
+	} else
+	if (device->chipset >= 0xa3 &&
+	    device->chipset != 0xaa &&
+	    device->chipset != 0xac) {
+		ret = nouveau_channel_new(drm, &drm->client, NVDRM_DEVICE,
+					  NVDRM_CHAN + 1, NvDmaFB, NvDmaTT,
+					  &drm->cechan);
+		if (ret)
+			NV_ERROR(drm, "failed to create ce channel, %d\n", ret);
+
+		arg0 = NvDmaFB;
+		arg1 = NvDmaTT;
 	} else {
 		arg0 = NvDmaFB;
 		arg1 = NvDmaTT;

From 9b234db37825860b833ba110673301e2009f0546 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Date: Sun, 7 Jul 2013 10:45:43 +0200
Subject: [PATCH 063/320] drm/nouveau: add falcon interrupt handler

This prevents 100% cpu usage on fermi cards when the exit interrupt
from the secret scrubber is not acked.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 .../gpu/drm/nouveau/core/engine/bsp/nvc0.c    |  1 +
 .../gpu/drm/nouveau/core/engine/bsp/nve0.c    |  1 +
 drivers/gpu/drm/nouveau/core/engine/falcon.c  | 19 +++++++++++++++++++
 .../gpu/drm/nouveau/core/engine/ppp/nvc0.c    |  1 +
 drivers/gpu/drm/nouveau/core/engine/vp/nvc0.c |  1 +
 drivers/gpu/drm/nouveau/core/engine/vp/nve0.c |  1 +
 .../drm/nouveau/core/include/engine/falcon.h  |  2 ++
 7 files changed, 26 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/core/engine/bsp/nvc0.c b/drivers/gpu/drm/nouveau/core/engine/bsp/nvc0.c
index 262c9f5f5f60..ce860de43e61 100644
--- a/drivers/gpu/drm/nouveau/core/engine/bsp/nvc0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/bsp/nvc0.c
@@ -90,6 +90,7 @@ nvc0_bsp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 		return ret;
 
 	nv_subdev(priv)->unit = 0x00008000;
+	nv_subdev(priv)->intr = nouveau_falcon_intr;
 	nv_engine(priv)->cclass = &nvc0_bsp_cclass;
 	nv_engine(priv)->sclass = nvc0_bsp_sclass;
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/core/engine/bsp/nve0.c b/drivers/gpu/drm/nouveau/core/engine/bsp/nve0.c
index c46882c83982..ba6aeca0285e 100644
--- a/drivers/gpu/drm/nouveau/core/engine/bsp/nve0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/bsp/nve0.c
@@ -90,6 +90,7 @@ nve0_bsp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 		return ret;
 
 	nv_subdev(priv)->unit = 0x00008000;
+	nv_subdev(priv)->intr = nouveau_falcon_intr;
 	nv_engine(priv)->cclass = &nve0_bsp_cclass;
 	nv_engine(priv)->sclass = nve0_bsp_sclass;
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/core/engine/falcon.c b/drivers/gpu/drm/nouveau/core/engine/falcon.c
index 3c7a31f7590e..e03fc8e4dc1d 100644
--- a/drivers/gpu/drm/nouveau/core/engine/falcon.c
+++ b/drivers/gpu/drm/nouveau/core/engine/falcon.c
@@ -23,6 +23,25 @@
 #include <engine/falcon.h>
 #include <subdev/timer.h>
 
+void
+nouveau_falcon_intr(struct nouveau_subdev *subdev)
+{
+	struct nouveau_falcon *falcon = (void *)subdev;
+	u32 dispatch = nv_ro32(falcon, 0x01c);
+	u32 intr = nv_ro32(falcon, 0x008) & dispatch & ~(dispatch >> 16);
+
+	if (intr & 0x00000010) {
+		nv_debug(falcon, "ucode halted\n");
+		nv_wo32(falcon, 0x004, 0x00000010);
+		intr &= ~0x00000010;
+	}
+
+	if (intr)  {
+		nv_error(falcon, "unhandled intr 0x%08x\n", intr);
+		nv_wo32(falcon, 0x004, intr);
+	}
+}
+
 u32
 _nouveau_falcon_rd32(struct nouveau_object *object, u64 addr)
 {
diff --git a/drivers/gpu/drm/nouveau/core/engine/ppp/nvc0.c b/drivers/gpu/drm/nouveau/core/engine/ppp/nvc0.c
index 98072c1ff360..73719aaa62d6 100644
--- a/drivers/gpu/drm/nouveau/core/engine/ppp/nvc0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/ppp/nvc0.c
@@ -90,6 +90,7 @@ nvc0_ppp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 		return ret;
 
 	nv_subdev(priv)->unit = 0x00000002;
+	nv_subdev(priv)->intr = nouveau_falcon_intr;
 	nv_engine(priv)->cclass = &nvc0_ppp_cclass;
 	nv_engine(priv)->sclass = nvc0_ppp_sclass;
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/core/engine/vp/nvc0.c b/drivers/gpu/drm/nouveau/core/engine/vp/nvc0.c
index 1879229b60eb..ac1f62aace72 100644
--- a/drivers/gpu/drm/nouveau/core/engine/vp/nvc0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/vp/nvc0.c
@@ -90,6 +90,7 @@ nvc0_vp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 		return ret;
 
 	nv_subdev(priv)->unit = 0x00020000;
+	nv_subdev(priv)->intr = nouveau_falcon_intr;
 	nv_engine(priv)->cclass = &nvc0_vp_cclass;
 	nv_engine(priv)->sclass = nvc0_vp_sclass;
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/core/engine/vp/nve0.c b/drivers/gpu/drm/nouveau/core/engine/vp/nve0.c
index d28ecbf7bc49..d4c3108479c9 100644
--- a/drivers/gpu/drm/nouveau/core/engine/vp/nve0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/vp/nve0.c
@@ -90,6 +90,7 @@ nve0_vp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 		return ret;
 
 	nv_subdev(priv)->unit = 0x00020000;
+	nv_subdev(priv)->intr = nouveau_falcon_intr;
 	nv_engine(priv)->cclass = &nve0_vp_cclass;
 	nv_engine(priv)->sclass = nve0_vp_sclass;
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/core/include/engine/falcon.h b/drivers/gpu/drm/nouveau/core/include/engine/falcon.h
index 1edec386ab36..181aa7da524d 100644
--- a/drivers/gpu/drm/nouveau/core/include/engine/falcon.h
+++ b/drivers/gpu/drm/nouveau/core/include/engine/falcon.h
@@ -72,6 +72,8 @@ int nouveau_falcon_create_(struct nouveau_object *, struct nouveau_object *,
 			   struct nouveau_oclass *, u32, bool, const char *,
 			   const char *, int, void **);
 
+void nouveau_falcon_intr(struct nouveau_subdev *subdev);
+
 #define _nouveau_falcon_dtor _nouveau_engine_dtor
 int  _nouveau_falcon_init(struct nouveau_object *);
 int  _nouveau_falcon_fini(struct nouveau_object *, bool);

From 0108bc808107b97e101b15af9705729626be6447 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Date: Sun, 7 Jul 2013 10:40:19 +0200
Subject: [PATCH 064/320] drm/nouveau: do not allow negative sizes for now

The API allows up to 64-bits allocations, but size is handled as int
inside nouveau almost everywhere. Until this is fixed it's better to
prevent negative sizes.

The 256 kB before INT_MAX is paranoia, because of the large page
aligning below that could flip it above INT_MAX.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_bo.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 459a44550ce5..4e7ee5f4155c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -198,6 +198,12 @@ nouveau_bo_new(struct drm_device *dev, int size, int align,
 	size_t acc_size;
 	int ret;
 	int type = ttm_bo_type_device;
+	int max_size = INT_MAX & ~((1 << drm->client.base.vm->vmm->lpg_shift) - 1);
+
+	if (size <= 0 || size > max_size) {
+		nv_warn(drm, "skipped size %x\n", (u32)size);
+		return -EINVAL;
+	}
 
 	if (sg)
 		type = ttm_bo_type_sg;

From 9c23b7d3d6bda41e2a27375df705485523a96dc8 Mon Sep 17 00:00:00 2001
From: Vakul Garg <vakul@freescale.com>
Date: Wed, 10 Jul 2013 06:26:13 +0000
Subject: [PATCH 065/320] crypto: caam - Fixed the memory out of bound
 overwrite issue

When kernel is compiled with CONFIG_SLUB_DEBUG=y and
CRYPTO_MANAGER_DISABLE_TESTS=n, during kernel bootup, the kernel
reports error given below. The root cause is that in function
hash_digest_key(), for allocating descriptor, insufficient memory was
being allocated. The required number of descriptor words apart from
input and output pointers are 8 (instead of 6).

=============================================================================
BUG dma-kmalloc-32 (Not tainted): Redzone overwritten
-----------------------------------------------------------------------------

Disabling lock debugging due to kernel taint
INFO: 0xdec5dec0-0xdec5dec3. First byte 0x0 instead of 0xcc
INFO: Allocated in ahash_setkey+0x60/0x594 age=7 cpu=1 pid=1257
        __kmalloc+0x154/0x1b4
        ahash_setkey+0x60/0x594
        test_hash+0x260/0x5a0
        alg_test_hash+0x48/0xb0
        alg_test+0x84/0x228
        cryptomgr_test+0x4c/0x54
        kthread+0x98/0x9c
        ret_from_kernel_thread+0x64/0x6c
INFO: Slab 0xc0bd0ba0 objects=19 used=2 fp=0xdec5d0d0 flags=0x0081
INFO: Object 0xdec5dea0 @offset=3744 fp=0x5c200014

Bytes b4 dec5de90: 00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a
........ZZZZZZZZ
Object dec5dea0: b0 80 00 0a 84 41 00 0d f0 40 00 00 00 67 3f c0
.....A...@...g?.
Object dec5deb0: 00 00 00 50 2c 14 00 50 f8 40 00 00 1e c5 d0 00
...P,..P.@......
Redzone dec5dec0: 00 00 00 14                                      ....
Padding dec5df68: 5a 5a 5a 5a 5a 5a 5a 5a
ZZZZZZZZ
Call Trace:
[dec65b60] [c00071b4] show_stack+0x4c/0x168 (unreliable)
[dec65ba0] [c00d4ec8] check_bytes_and_report+0xe4/0x11c
[dec65bd0] [c00d507c] check_object+0x17c/0x23c
[dec65bf0] [c0550a00] free_debug_processing+0xf4/0x294
[dec65c20] [c0550bdc] __slab_free+0x3c/0x294
[dec65c80] [c03f0744] ahash_setkey+0x4e0/0x594
[dec65cd0] [c01ef138] test_hash+0x260/0x5a0
[dec65e50] [c01ef4c0] alg_test_hash+0x48/0xb0
[dec65e70] [c01eecc4] alg_test+0x84/0x228
[dec65ee0] [c01ec640] cryptomgr_test+0x4c/0x54
[dec65ef0] [c005adc0] kthread+0x98/0x9c
[dec65f40] [c000e1ac] ret_from_kernel_thread+0x64/0x6c
FIX dma-kmalloc-32: Restoring 0xdec5dec0-0xdec5dec3=0xcc

Change-Id: I0c7a1048053e811025d1c3b487940f87345c8f5d
Signed-off-by: Vakul Garg <vakul@freescale.com>
CC: <stable@vger.kernel.org> #3.9
Reviewed-by: Geanta Neag Horia Ioan-B05471 <horia.geanta@freescale.com>
Reviewed-by: Fleming Andrew-AFLEMING <AFLEMING@freescale.com>
Tested-by: Fleming Andrew-AFLEMING <AFLEMING@freescale.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/caamhash.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index 5996521a1caf..84573b4d6f92 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -429,7 +429,7 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, const u8 *key_in,
 	dma_addr_t src_dma, dst_dma;
 	int ret = 0;
 
-	desc = kmalloc(CAAM_CMD_SZ * 6 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA);
+	desc = kmalloc(CAAM_CMD_SZ * 8 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA);
 	if (!desc) {
 		dev_err(jrdev, "unable to allocate key input memory\n");
 		return -ENOMEM;

From 2e26af79b7e24e9644a6c16ad4dca61501fb4b3f Mon Sep 17 00:00:00 2001
From: Asias He <asias@redhat.com>
Date: Tue, 7 May 2013 14:54:33 +0800
Subject: [PATCH 066/320] vhost-net: Always access vq->private_data under vq
 mutex

Signed-off-by: Asias He <asias@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vhost/net.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 027be91db139..99f8d63491aa 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -346,12 +346,11 @@ static void handle_tx(struct vhost_net *net)
 	struct vhost_net_ubuf_ref *uninitialized_var(ubufs);
 	bool zcopy, zcopy_used;
 
-	/* TODO: check that we are running from vhost_worker? */
-	sock = rcu_dereference_check(vq->private_data, 1);
-	if (!sock)
-		return;
-
 	mutex_lock(&vq->mutex);
+	sock = vq->private_data;
+	if (!sock)
+		goto out;
+
 	vhost_disable_notify(&net->dev, vq);
 
 	hdr_size = nvq->vhost_hlen;
@@ -461,7 +460,7 @@ static void handle_tx(struct vhost_net *net)
 			break;
 		}
 	}
-
+out:
 	mutex_unlock(&vq->mutex);
 }
 
@@ -570,14 +569,14 @@ static void handle_rx(struct vhost_net *net)
 	s16 headcount;
 	size_t vhost_hlen, sock_hlen;
 	size_t vhost_len, sock_len;
-	/* TODO: check that we are running from vhost_worker? */
-	struct socket *sock = rcu_dereference_check(vq->private_data, 1);
-
-	if (!sock)
-		return;
+	struct socket *sock;
 
 	mutex_lock(&vq->mutex);
+	sock = vq->private_data;
+	if (!sock)
+		goto out;
 	vhost_disable_notify(&net->dev, vq);
+
 	vhost_hlen = nvq->vhost_hlen;
 	sock_hlen = nvq->sock_hlen;
 
@@ -652,7 +651,7 @@ static void handle_rx(struct vhost_net *net)
 			break;
 		}
 	}
-
+out:
 	mutex_unlock(&vq->mutex);
 }
 

From e7802212ea4bbbd5db99181942a19ab36ca4b914 Mon Sep 17 00:00:00 2001
From: Asias He <asias@redhat.com>
Date: Tue, 7 May 2013 14:54:35 +0800
Subject: [PATCH 067/320] vhost-scsi: Always access vq->private_data under vq
 mutex

Signed-off-by: Asias He <asias@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vhost/scsi.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 4264840ef7dc..45365396dbbc 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -896,19 +896,15 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
 	int head, ret;
 	u8 target;
 
+	mutex_lock(&vq->mutex);
 	/*
 	 * We can handle the vq only after the endpoint is setup by calling the
 	 * VHOST_SCSI_SET_ENDPOINT ioctl.
-	 *
-	 * TODO: Check that we are running from vhost_worker which acts
-	 * as read-side critical section for vhost kind of RCU.
-	 * See the comments in struct vhost_virtqueue in drivers/vhost/vhost.h
 	 */
-	vs_tpg = rcu_dereference_check(vq->private_data, 1);
+	vs_tpg = vq->private_data;
 	if (!vs_tpg)
-		return;
+		goto out;
 
-	mutex_lock(&vq->mutex);
 	vhost_disable_notify(&vs->dev, vq);
 
 	for (;;) {
@@ -1058,6 +1054,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
 	vhost_scsi_free_cmd(cmd);
 err_cmd:
 	vhost_scsi_send_bad_target(vs, vq, head, out);
+out:
 	mutex_unlock(&vq->mutex);
 }
 

From 22fa90c7fb479694d6affebc049d21f06b714be6 Mon Sep 17 00:00:00 2001
From: Asias He <asias@redhat.com>
Date: Tue, 7 May 2013 14:54:36 +0800
Subject: [PATCH 068/320] vhost: Remove custom vhost rcu usage

Now, vq->private_data is always accessed under vq mutex. No need to play
the vhost rcu trick.

Signed-off-by: Asias He <asias@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vhost/net.c   | 16 ++++++----------
 drivers/vhost/scsi.c  |  6 ++----
 drivers/vhost/test.c  |  6 ++----
 drivers/vhost/vhost.h | 10 ++--------
 4 files changed, 12 insertions(+), 26 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 99f8d63491aa..969a85960e9f 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -15,7 +15,6 @@
 #include <linux/moduleparam.h>
 #include <linux/mutex.h>
 #include <linux/workqueue.h>
-#include <linux/rcupdate.h>
 #include <linux/file.h>
 #include <linux/slab.h>
 
@@ -749,8 +748,7 @@ static int vhost_net_enable_vq(struct vhost_net *n,
 	struct vhost_poll *poll = n->poll + (nvq - n->vqs);
 	struct socket *sock;
 
-	sock = rcu_dereference_protected(vq->private_data,
-					 lockdep_is_held(&vq->mutex));
+	sock = vq->private_data;
 	if (!sock)
 		return 0;
 
@@ -763,10 +761,9 @@ static struct socket *vhost_net_stop_vq(struct vhost_net *n,
 	struct socket *sock;
 
 	mutex_lock(&vq->mutex);
-	sock = rcu_dereference_protected(vq->private_data,
-					 lockdep_is_held(&vq->mutex));
+	sock = vq->private_data;
 	vhost_net_disable_vq(n, vq);
-	rcu_assign_pointer(vq->private_data, NULL);
+	vq->private_data = NULL;
 	mutex_unlock(&vq->mutex);
 	return sock;
 }
@@ -922,8 +919,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
 	}
 
 	/* start polling new socket */
-	oldsock = rcu_dereference_protected(vq->private_data,
-					    lockdep_is_held(&vq->mutex));
+	oldsock = vq->private_data;
 	if (sock != oldsock) {
 		ubufs = vhost_net_ubuf_alloc(vq,
 					     sock && vhost_sock_zcopy(sock));
@@ -933,7 +929,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
 		}
 
 		vhost_net_disable_vq(n, vq);
-		rcu_assign_pointer(vq->private_data, sock);
+		vq->private_data = sock;
 		r = vhost_init_used(vq);
 		if (r)
 			goto err_used;
@@ -967,7 +963,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
 	return 0;
 
 err_used:
-	rcu_assign_pointer(vq->private_data, oldsock);
+	vq->private_data = oldsock;
 	vhost_net_enable_vq(n, vq);
 	if (ubufs)
 		vhost_net_ubuf_put_wait_and_free(ubufs);
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 45365396dbbc..35ab0ce98414 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -1223,9 +1223,8 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs,
 		       sizeof(vs->vs_vhost_wwpn));
 		for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
 			vq = &vs->vqs[i].vq;
-			/* Flushing the vhost_work acts as synchronize_rcu */
 			mutex_lock(&vq->mutex);
-			rcu_assign_pointer(vq->private_data, vs_tpg);
+			vq->private_data = vs_tpg;
 			vhost_init_used(vq);
 			mutex_unlock(&vq->mutex);
 		}
@@ -1304,9 +1303,8 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs,
 	if (match) {
 		for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
 			vq = &vs->vqs[i].vq;
-			/* Flushing the vhost_work acts as synchronize_rcu */
 			mutex_lock(&vq->mutex);
-			rcu_assign_pointer(vq->private_data, NULL);
+			vq->private_data = NULL;
 			mutex_unlock(&vq->mutex);
 		}
 	}
diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
index a73ea217f24d..339eae85859a 100644
--- a/drivers/vhost/test.c
+++ b/drivers/vhost/test.c
@@ -13,7 +13,6 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/workqueue.h>
-#include <linux/rcupdate.h>
 #include <linux/file.h>
 #include <linux/slab.h>
 
@@ -200,9 +199,8 @@ static long vhost_test_run(struct vhost_test *n, int test)
 		priv = test ? n : NULL;
 
 		/* start polling new socket */
-		oldpriv = rcu_dereference_protected(vq->private_data,
-						    lockdep_is_held(&vq->mutex));
-		rcu_assign_pointer(vq->private_data, priv);
+		oldpriv = vq->private_data;
+		vq->private_data = priv;
 
 		r = vhost_init_used(&n->vqs[index]);
 
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 42298cd23c73..4465ed5f316d 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -103,14 +103,8 @@ struct vhost_virtqueue {
 	struct iovec iov[UIO_MAXIOV];
 	struct iovec *indirect;
 	struct vring_used_elem *heads;
-	/* We use a kind of RCU to access private pointer.
-	 * All readers access it from worker, which makes it possible to
-	 * flush the vhost_work instead of synchronize_rcu. Therefore readers do
-	 * not need to call rcu_read_lock/rcu_read_unlock: the beginning of
-	 * vhost_work execution acts instead of rcu_read_lock() and the end of
-	 * vhost_work execution acts instead of rcu_read_unlock().
-	 * Writers use virtqueue mutex. */
-	void __rcu *private_data;
+	/* Protected by virtqueue mutex. */
+	void *private_data;
 	/* Log write descriptors */
 	void __user *log_base;
 	struct vhost_log *log;

From 0692282181e248328c226c2520994fc06dfd65bf Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Thu, 11 Jul 2013 13:35:40 +0200
Subject: [PATCH 069/320] drm/i915: fix up readout of the lvds dither bit on
 gen2/3

It's in the PFIT_CONTROL register, but very much associated with the
lvds encoder. So move the readout for it (in the case of an otherwise
disabled pfit) from the pipe to the lvds encoder's get_config
function.

Otherwise we get a pipe state mismatch if we use pipe B for a non-lvds
output and we've left the dither bit enabled behind us. This can
happen if the BIOS has set the bit (some seem to unconditionally do
that, even in the complete absence of an lvds port), but not enabled
pipe B at boot-up. Then we won't clear the pfit control register since
we can only touch that if the pfit is associated with our pipe in the
crtc configuration - we could trample over the pfit state of the other
pipe otherwise since it's shared. Once pipe B is enabled we notice
that the 6to8 dither bit is set and complain about the mismatch.

Note that testing indicates that we don't actually need to set this
bit when the pfit is disabled, dithering on 18bpp panels seems to work
regardless. But ripping that code out is not something for a bugfix
meant for -rc kernels.

v2: While at it clarify the logic in i9xx_get_pfit_config, spurred by
comments from Chris on irc.

v3: Use Chris suggestion to make the control flow in
i9xx_get_pfit_config easier to understand.

v4: Kill the extra line, spotted by Chris.

Reported-by: Knut Petersen <Knut_Petersen@t-online.de>
Cc: Knut Petersen <Knut_Petersen@t-online.de>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
References: http://lists.freedesktop.org/archives/intel-gfx/2013-July/030092.html
Tested-by: Knut Petersen <Knut_Petersen@t-online.de>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_display.c | 11 ++++-------
 drivers/gpu/drm/i915/intel_lvds.c    |  7 +++++++
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 85f3eb74d2b7..c59335ce84f4 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -4913,22 +4913,19 @@ static void i9xx_get_pfit_config(struct intel_crtc *crtc,
 	uint32_t tmp;
 
 	tmp = I915_READ(PFIT_CONTROL);
+	if (!(tmp & PFIT_ENABLE))
+		return;
 
+	/* Check whether the pfit is attached to our pipe. */
 	if (INTEL_INFO(dev)->gen < 4) {
 		if (crtc->pipe != PIPE_B)
 			return;
-
-		/* gen2/3 store dither state in pfit control, needs to match */
-		pipe_config->gmch_pfit.control = tmp & PANEL_8TO6_DITHER_ENABLE;
 	} else {
 		if ((tmp & PFIT_PIPE_MASK) != (crtc->pipe << PFIT_PIPE_SHIFT))
 			return;
 	}
 
-	if (!(tmp & PFIT_ENABLE))
-		return;
-
-	pipe_config->gmch_pfit.control = I915_READ(PFIT_CONTROL);
+	pipe_config->gmch_pfit.control = tmp;
 	pipe_config->gmch_pfit.pgm_ratios = I915_READ(PFIT_PGM_RATIOS);
 	if (INTEL_INFO(dev)->gen < 5)
 		pipe_config->gmch_pfit.lvds_border_bits =
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index 2abb2d3c727b..12079983c8ab 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -109,6 +109,13 @@ static void intel_lvds_get_config(struct intel_encoder *encoder,
 		flags |= DRM_MODE_FLAG_PVSYNC;
 
 	pipe_config->adjusted_mode.flags |= flags;
+
+	/* gen2/3 store dither state in pfit control, needs to match */
+	if (INTEL_INFO(dev)->gen < 4) {
+		tmp = I915_READ(PFIT_CONTROL);
+
+		pipe_config->gmch_pfit.control |= tmp & PANEL_8TO6_DITHER_ENABLE;
+	}
 }
 
 /* The LVDS pin pair needs to be on before the DPLLs are enabled.

From 6aa8f1a6ca41c49721d2de4e048d3da8d06411f9 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kmo@daterainc.com>
Date: Wed, 10 Jul 2013 18:04:21 -0700
Subject: [PATCH 070/320] bcache: Fix a dumb race

In the far-too-complicated closure code - closures can have destructors,
for probably dubious reasons; they get run after the closure is no
longer waiting on anything but before dropping the parent ref, intended
just for freeing whatever memory the closure is embedded in.

Trouble is, when remaining goes to 0 and we've got nothing more to run -
we also have to unlock the closure, setting remaining to -1. If there's
a destructor, that unlock isn't doing anything - nobody could be trying
to lock it if we're about to free it - but if the unlock _is needed...
that check for a destructor was racy. Argh.

Signed-off-by: Kent Overstreet <kmo@daterainc.com>
Cc: linux-stable <stable@vger.kernel.org> # >= v3.10
---
 drivers/md/bcache/closure.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c
index bd05a9a8c7cf..9aba2017f0d1 100644
--- a/drivers/md/bcache/closure.c
+++ b/drivers/md/bcache/closure.c
@@ -66,16 +66,18 @@ static inline void closure_put_after_sub(struct closure *cl, int flags)
 		} else {
 			struct closure *parent = cl->parent;
 			struct closure_waitlist *wait = closure_waitlist(cl);
+			closure_fn *destructor = cl->fn;
 
 			closure_debug_destroy(cl);
 
+			smp_mb();
 			atomic_set(&cl->remaining, -1);
 
 			if (wait)
 				closure_wake_up(wait);
 
-			if (cl->fn)
-				cl->fn(cl);
+			if (destructor)
+				destructor(cl);
 
 			if (parent)
 				closure_put(parent);

From d2a65ce2ac224413b291307201c5dafb03aa90d7 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 5 Jul 2013 09:05:46 +0300
Subject: [PATCH 071/320] bcache: check for allocation failures

There is a missing NULL check after the kzalloc().

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
---
 drivers/md/bcache/sysfs.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index dd3f00a42729..12a2c2846f99 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -232,6 +232,8 @@ STORE(__cached_dev)
 			bch_uuid_write(dc->disk.c);
 		}
 		env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL);
+		if (!env)
+			return -ENOMEM;
 		add_uevent_var(env, "DRIVER=bcache");
 		add_uevent_var(env, "CACHED_UUID=%pU", dc->sb.uuid),
 		add_uevent_var(env, "CACHED_LABEL=%s", buf);

From 54d12f2b4fd0f218590d1490b41a18d0e2328a9a Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kmo@daterainc.com>
Date: Wed, 10 Jul 2013 18:44:40 -0700
Subject: [PATCH 072/320] bcache: Advertise that flushes are supported

Whoops - bcache's flush/FUA was mostly correct, but flushes get filtered
out unless we say we support them...

Signed-off-by: Kent Overstreet <kmo@daterainc.com>
Cc: linux-stable <stable@vger.kernel.org> # >= v3.10
---
 drivers/md/bcache/request.c | 8 +++++++-
 drivers/md/bcache/super.c   | 2 ++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index b6e74d3c8faf..786a1a4f74d8 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -488,6 +488,12 @@ static void bch_insert_data_loop(struct closure *cl)
 		bch_queue_gc(op->c);
 	}
 
+	/*
+	 * Journal writes are marked REQ_FLUSH; if the original write was a
+	 * flush, it'll wait on the journal write.
+	 */
+	bio->bi_rw &= ~(REQ_FLUSH|REQ_FUA);
+
 	do {
 		unsigned i;
 		struct bkey *k;
@@ -710,7 +716,7 @@ static struct search *search_alloc(struct bio *bio, struct bcache_device *d)
 	s->task			= current;
 	s->orig_bio		= bio;
 	s->write		= (bio->bi_rw & REQ_WRITE) != 0;
-	s->op.flush_journal	= (bio->bi_rw & REQ_FLUSH) != 0;
+	s->op.flush_journal	= (bio->bi_rw & (REQ_FLUSH|REQ_FUA)) != 0;
 	s->op.skip		= (bio->bi_rw & REQ_DISCARD) != 0;
 	s->recoverable		= 1;
 	s->start_time		= jiffies;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index cff2d182dfb0..728fdc673f31 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -806,6 +806,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
 	set_bit(QUEUE_FLAG_NONROT,	&d->disk->queue->queue_flags);
 	set_bit(QUEUE_FLAG_DISCARD,	&d->disk->queue->queue_flags);
 
+	blk_queue_flush(q, REQ_FLUSH|REQ_FUA);
+
 	return 0;
 }
 

From c9502ea4424b31728703d113fc6b30bfead14633 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kmo@daterainc.com>
Date: Wed, 10 Jul 2013 21:25:02 -0700
Subject: [PATCH 073/320] bcache: Fix a sysfs splat on shutdown

If we stopped a bcache device when we were already detaching (or
something like that), bcache_device_unlink() would try to remove a
symlink from sysfs that was already gone because the bcache dev kobject
had already been removed from sysfs.

So keep track of whether we've removed stuff from sysfs.

Signed-off-by: Kent Overstreet <kmo@daterainc.com>
Cc: linux-stable <stable@vger.kernel.org> # >= v3.10
---
 drivers/md/bcache/bcache.h |  1 +
 drivers/md/bcache/super.c  | 11 ++++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 342ba86c6e4f..68f1ded81ae0 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -434,6 +434,7 @@ struct bcache_device {
 
 	/* If nonzero, we're detaching/unregistering from cache set */
 	atomic_t		detaching;
+	int			flush_done;
 
 	uint64_t		nr_stripes;
 	unsigned		stripe_size_bits;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 728fdc673f31..7a1dcdb2536e 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -706,7 +706,8 @@ static void bcache_device_detach(struct bcache_device *d)
 		atomic_set(&d->detaching, 0);
 	}
 
-	bcache_device_unlink(d);
+	if (!d->flush_done)
+		bcache_device_unlink(d);
 
 	d->c->devices[d->id] = NULL;
 	closure_put(&d->c->caching);
@@ -1055,6 +1056,14 @@ static void cached_dev_flush(struct closure *cl)
 	struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
 	struct bcache_device *d = &dc->disk;
 
+	mutex_lock(&bch_register_lock);
+	d->flush_done = 1;
+
+	if (d->c)
+		bcache_device_unlink(d);
+
+	mutex_unlock(&bch_register_lock);
+
 	bch_cache_accounting_destroy(&dc->accounting);
 	kobject_del(&d->kobj);
 

From 5caa52afc5abd1396e4af720469abb5843a71eb8 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kmo@daterainc.com>
Date: Wed, 10 Jul 2013 21:03:25 -0700
Subject: [PATCH 074/320] bcache: Shutdown fix

Stopping a cache set is supposed to make it stop attached backing
devices, but somewhere along the way that code got lost. Fixing this
mainly has the effect of fixing our reboot notifier.

Signed-off-by: Kent Overstreet <kmo@daterainc.com>
Cc: linux-stable <stable@vger.kernel.org> # >= v3.10
---
 drivers/md/bcache/super.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 7a1dcdb2536e..f6a62174e8f6 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1354,18 +1354,22 @@ static void cache_set_flush(struct closure *cl)
 static void __cache_set_unregister(struct closure *cl)
 {
 	struct cache_set *c = container_of(cl, struct cache_set, caching);
-	struct cached_dev *dc, *t;
+	struct cached_dev *dc;
 	size_t i;
 
 	mutex_lock(&bch_register_lock);
 
-	if (test_bit(CACHE_SET_UNREGISTERING, &c->flags))
-		list_for_each_entry_safe(dc, t, &c->cached_devs, list)
-			bch_cached_dev_detach(dc);
-
 	for (i = 0; i < c->nr_uuids; i++)
-		if (c->devices[i] && UUID_FLASH_ONLY(&c->uuids[i]))
-			bcache_device_stop(c->devices[i]);
+		if (c->devices[i]) {
+			if (!UUID_FLASH_ONLY(&c->uuids[i]) &&
+			    test_bit(CACHE_SET_UNREGISTERING, &c->flags)) {
+				dc = container_of(c->devices[i],
+						  struct cached_dev, disk);
+				bch_cached_dev_detach(dc);
+			} else {
+				bcache_device_stop(c->devices[i]);
+			}
+		}
 
 	mutex_unlock(&bch_register_lock);
 

From faa5673617656ee58369a3cfe4a312cfcdc59c81 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kmo@daterainc.com>
Date: Thu, 11 Jul 2013 22:42:14 -0700
Subject: [PATCH 075/320] bcache: Journal replay fix

The journal replay code starts by finding something that looks like a
valid journal entry, then it does a binary search over the unchecked
region of the journal for the journal entries with the highest sequence
numbers.

Trouble is, the logic was wrong - journal_read_bucket() returns true if
it found journal entries we need, but if the range of journal entries
we're looking for loops around the end of the journal - in that case
journal_read_bucket() could return true when it hadn't found the highest
sequence number we'd seen yet, and in that case the binary search did
the wrong thing. Whoops.

Signed-off-by: Kent Overstreet <kmo@daterainc.com>
Cc: linux-stable <stable@vger.kernel.org> # >= v3.10
---
 drivers/md/bcache/journal.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 4b250667bb7f..ba95ab84b2be 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -184,9 +184,14 @@ int bch_journal_read(struct cache_set *c, struct list_head *list,
 		pr_debug("starting binary search, l %u r %u", l, r);
 
 		while (l + 1 < r) {
-			m = (l + r) >> 1;
+			seq = list_entry(list->prev, struct journal_replay,
+					 list)->j.seq;
 
-			if (read_bucket(m))
+			m = (l + r) >> 1;
+			read_bucket(m);
+
+			if (seq != list_entry(list->prev, struct journal_replay,
+					      list)->j.seq)
 				l = m;
 			else
 				r = m;

From 29ebf465b9050f241c4433a796a32e6c896a9dcd Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kmo@daterainc.com>
Date: Thu, 11 Jul 2013 19:43:21 -0700
Subject: [PATCH 076/320] bcache: Fix GC_SECTORS_USED() calculation

Part of the job of garbage collection is to add up however many sectors
of live data it finds in each bucket, but that doesn't work very well if
it doesn't reset GC_SECTORS_USED() when it starts. Whoops.

This wouldn't have broken anything horribly, but allocation tries to
preferentially reclaim buckets that are mostly empty and that's not
gonna work with an incorrect GC_SECTORS_USED() value.

Signed-off-by: Kent Overstreet <kmo@daterainc.com>
Cc: linux-stable <stable@vger.kernel.org> # >= v3.10
---
 drivers/md/bcache/btree.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 15b58239c683..ee372884c405 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -1410,8 +1410,10 @@ static void btree_gc_start(struct cache_set *c)
 	for_each_cache(ca, c, i)
 		for_each_bucket(b, ca) {
 			b->gc_gen = b->gen;
-			if (!atomic_read(&b->pin))
+			if (!atomic_read(&b->pin)) {
 				SET_GC_MARK(b, GC_MARK_RECLAIMABLE);
+				SET_GC_SECTORS_USED(b, 0);
+			}
 		}
 
 	mutex_unlock(&c->bucket_lock);

From 79826c35eb99cd3c0873b8396f45fa26c87fb0b0 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kmo@daterainc.com>
Date: Wed, 10 Jul 2013 18:31:58 -0700
Subject: [PATCH 077/320] bcache: Allocation kthread fixes

The alloc kthread should've been using try_to_freeze() - and also there
was the potential for the alloc kthread to get woken up after it had
shut down, which would have been bad.

Signed-off-by: Kent Overstreet <kmo@daterainc.com>
---
 drivers/md/bcache/alloc.c  | 18 ++++++++----------
 drivers/md/bcache/bcache.h |  4 ----
 drivers/md/bcache/super.c  | 11 +++++++----
 3 files changed, 15 insertions(+), 18 deletions(-)

diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index b54b73b9b2b7..e45f5575fd4d 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -63,6 +63,7 @@
 #include "bcache.h"
 #include "btree.h"
 
+#include <linux/freezer.h>
 #include <linux/kthread.h>
 #include <linux/random.h>
 #include <trace/events/bcache.h>
@@ -363,11 +364,10 @@ do {									\
 			break;						\
 									\
 		mutex_unlock(&(ca)->set->bucket_lock);			\
-		if (test_bit(CACHE_SET_STOPPING_2, &ca->set->flags)) {	\
-			closure_put(&ca->set->cl);			\
+		if (kthread_should_stop())				\
 			return 0;					\
-		}							\
 									\
+		try_to_freeze();					\
 		schedule();						\
 		mutex_lock(&(ca)->set->bucket_lock);			\
 	}								\
@@ -547,14 +547,12 @@ int bch_bucket_alloc_set(struct cache_set *c, unsigned watermark,
 
 int bch_cache_allocator_start(struct cache *ca)
 {
-	ca->alloc_thread = kthread_create(bch_allocator_thread,
-					  ca, "bcache_allocator");
-	if (IS_ERR(ca->alloc_thread))
-		return PTR_ERR(ca->alloc_thread);
-
-	closure_get(&ca->set->cl);
-	wake_up_process(ca->alloc_thread);
+	struct task_struct *k = kthread_run(bch_allocator_thread,
+					    ca, "bcache_allocator");
+	if (IS_ERR(k))
+		return PTR_ERR(k);
 
+	ca->alloc_thread = k;
 	return 0;
 }
 
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 68f1ded81ae0..b39f6f0b45f2 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -664,13 +664,9 @@ struct gc_stat {
  * CACHE_SET_STOPPING always gets set first when we're closing down a cache set;
  * we'll continue to run normally for awhile with CACHE_SET_STOPPING set (i.e.
  * flushing dirty data).
- *
- * CACHE_SET_STOPPING_2 gets set at the last phase, when it's time to shut down
- * the allocation thread.
  */
 #define CACHE_SET_UNREGISTERING		0
 #define	CACHE_SET_STOPPING		1
-#define	CACHE_SET_STOPPING_2		2
 
 struct cache_set {
 	struct closure		cl;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index f6a62174e8f6..547c4c57b052 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -16,6 +16,7 @@
 #include <linux/buffer_head.h>
 #include <linux/debugfs.h>
 #include <linux/genhd.h>
+#include <linux/kthread.h>
 #include <linux/module.h>
 #include <linux/random.h>
 #include <linux/reboot.h>
@@ -1329,11 +1330,9 @@ static void cache_set_free(struct closure *cl)
 static void cache_set_flush(struct closure *cl)
 {
 	struct cache_set *c = container_of(cl, struct cache_set, caching);
+	struct cache *ca;
 	struct btree *b;
-
-	/* Shut down allocator threads */
-	set_bit(CACHE_SET_STOPPING_2, &c->flags);
-	wake_up_allocators(c);
+	unsigned i;
 
 	bch_cache_accounting_destroy(&c->accounting);
 
@@ -1348,6 +1347,10 @@ static void cache_set_flush(struct closure *cl)
 		if (btree_node_dirty(b))
 			bch_btree_node_write(b, NULL);
 
+	for_each_cache(ca, c, i)
+		if (ca->alloc_thread)
+			kthread_stop(ca->alloc_thread);
+
 	closure_return(cl);
 }
 

From 21d8a4756af5fdf4a42e79a77cf3b6f52678d443 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Fri, 12 Jul 2013 08:07:30 +0200
Subject: [PATCH 078/320] drm/i915: fix pfit regression for non-autoscaled
 resolutions

I.e. for letter/pillarboxing. For those cases we need to adjust the
mode a bit, but Jesse gmch pfit refactoring in

commit 2dd24552cab40ea829ba3fda890eeafd2c4816d8
Author: Jesse Barnes <jbarnes@virtuousgeek.org>
Date:   Thu Apr 25 12:55:01 2013 -0700

    drm/i915: factor out GMCH panel fitting code and use for eDP v3

broke that by reordering the computation of the gmch pfit state with
the block of code that prepared the adjusted mode for it and told the
modeset core not to overwrite the adjusted mode with default settings.

We might want to switch around the core code to just fill in defaults,
but this code predates the pipe_config modeset rework. And in the old
crtc helpers we did not have a suitable spot to do this.

Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Hans de Bruin <jmdebruin@xmsnet.nl>
Reported-and-tested-by: Hans de Bruin <jmdebruin@xmsnet.nl>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_lvds.c  | 5 +----
 drivers/gpu/drm/i915/intel_panel.c | 3 +++
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index 12079983c8ab..9a797c987537 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -297,14 +297,11 @@ static bool intel_lvds_compute_config(struct intel_encoder *intel_encoder,
 
 		intel_pch_panel_fitting(intel_crtc, pipe_config,
 					intel_connector->panel.fitting_mode);
-		return true;
 	} else {
 		intel_gmch_panel_fitting(intel_crtc, pipe_config,
 					 intel_connector->panel.fitting_mode);
-	}
 
-	drm_mode_set_crtcinfo(adjusted_mode, 0);
-	pipe_config->timings_set = true;
+	}
 
 	/*
 	 * XXX: It would be nice to support lower refresh rates on the
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index 80bea1d3209f..45010bb5d132 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -194,6 +194,9 @@ void intel_gmch_panel_fitting(struct intel_crtc *intel_crtc,
 	    adjusted_mode->vdisplay == mode->vdisplay)
 		goto out;
 
+	drm_mode_set_crtcinfo(adjusted_mode, 0);
+	pipe_config->timings_set = true;
+
 	switch (fitting_mode) {
 	case DRM_MODE_SCALE_CENTER:
 		/*

From 913ffdb54366f94eec65c656cae8c6e00e1ab1b0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 11 Jul 2013 16:34:48 -0700
Subject: [PATCH 079/320] cgroup: replace task_cgroup_path_from_hierarchy()
 with task_cgroup_path()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

task_cgroup_path_from_hierarchy() was added for the planned new users
and none of the currently planned users wants to know about multiple
hierarchies.  This patch drops the multiple hierarchy part and makes
it always return the path in the first non-dummy hierarchy.

As unified hierarchy will always have id 1, this is guaranteed to
return the path for the unified hierarchy if mounted; otherwise, it
will return the path from the hierarchy which happens to occupy the
lowest hierarchy id, which will usually be the first hierarchy mounted
after boot.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
Cc: Lennart Poettering <lennart@poettering.net>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Cc: Jan Kaluža <jkaluza@redhat.com>
---
 include/linux/cgroup.h |  3 +--
 kernel/cgroup.c        | 31 +++++++++++++++++++------------
 2 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index fd097ecfcd97..21cfaff7e002 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -540,8 +540,7 @@ int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
 bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor);
 
 int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen);
-int task_cgroup_path_from_hierarchy(struct task_struct *task, int hierarchy_id,
-				    char *buf, size_t buflen);
+int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);
 
 int cgroup_task_count(const struct cgroup *cgrp);
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e5583d10a325..afb8d53ca6c7 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1846,36 +1846,43 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
 EXPORT_SYMBOL_GPL(cgroup_path);
 
 /**
- * task_cgroup_path_from_hierarchy - cgroup path of a task on a hierarchy
+ * task_cgroup_path - cgroup path of a task in the first cgroup hierarchy
  * @task: target task
- * @hierarchy_id: the hierarchy to look up @task's cgroup from
  * @buf: the buffer to write the path into
  * @buflen: the length of the buffer
  *
- * Determine @task's cgroup on the hierarchy specified by @hierarchy_id and
- * copy its path into @buf.  This function grabs cgroup_mutex and shouldn't
- * be used inside locks used by cgroup controller callbacks.
+ * Determine @task's cgroup on the first (the one with the lowest non-zero
+ * hierarchy_id) cgroup hierarchy and copy its path into @buf.  This
+ * function grabs cgroup_mutex and shouldn't be used inside locks used by
+ * cgroup controller callbacks.
+ *
+ * Returns 0 on success, fails with -%ENAMETOOLONG if @buflen is too short.
  */
-int task_cgroup_path_from_hierarchy(struct task_struct *task, int hierarchy_id,
-				    char *buf, size_t buflen)
+int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
 {
 	struct cgroupfs_root *root;
-	struct cgroup *cgrp = NULL;
-	int ret = -ENOENT;
+	struct cgroup *cgrp;
+	int hierarchy_id = 1, ret = 0;
+
+	if (buflen < 2)
+		return -ENAMETOOLONG;
 
 	mutex_lock(&cgroup_mutex);
 
-	root = idr_find(&cgroup_hierarchy_idr, hierarchy_id);
+	root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
+
 	if (root) {
 		cgrp = task_cgroup_from_root(task, root);
 		ret = cgroup_path(cgrp, buf, buflen);
+	} else {
+		/* if no hierarchy exists, everyone is in "/" */
+		memcpy(buf, "/", 2);
 	}
 
 	mutex_unlock(&cgroup_mutex);
-
 	return ret;
 }
-EXPORT_SYMBOL_GPL(task_cgroup_path_from_hierarchy);
+EXPORT_SYMBOL_GPL(task_cgroup_path);
 
 /*
  * Control Group taskset

From bcf53de4e60d9000b82f541d654529e2902a4c2c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Marchesin?= <marcheu@chromium.org>
Date: Fri, 12 Jul 2013 13:54:41 -0700
Subject: [PATCH 080/320] drm/i915: Preserve the DDI_A_4_LANES bit from the
 bios
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Otherwise the DDI_A_4_LANES bit gets lost and we can't use > 2 lanes
on eDP. This fixes eDP on hsw with > 2 lanes.

Also s/port_reversal/saved_port_bits/ since the current name is
confusing.

Signed-off-by: Stéphane Marchesin <marcheu@chromium.org>
Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Cc: stable@vger.kernel.org
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_ddi.c | 10 ++++++----
 drivers/gpu/drm/i915/intel_drv.h |  2 +-
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index 324211ac9c55..b042ee5c4070 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -301,7 +301,7 @@ static void intel_ddi_mode_set(struct drm_encoder *encoder,
 		struct intel_digital_port *intel_dig_port =
 			enc_to_dig_port(encoder);
 
-		intel_dp->DP = intel_dig_port->port_reversal |
+		intel_dp->DP = intel_dig_port->saved_port_bits |
 			       DDI_BUF_CTL_ENABLE | DDI_BUF_EMP_400MV_0DB_HSW;
 		intel_dp->DP |= DDI_PORT_WIDTH(intel_dp->lane_count);
 
@@ -1109,7 +1109,8 @@ static void intel_enable_ddi(struct intel_encoder *intel_encoder)
 		 * enabling the port.
 		 */
 		I915_WRITE(DDI_BUF_CTL(port),
-			   intel_dig_port->port_reversal | DDI_BUF_CTL_ENABLE);
+			   intel_dig_port->saved_port_bits |
+			   DDI_BUF_CTL_ENABLE);
 	} else if (type == INTEL_OUTPUT_EDP) {
 		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
@@ -1347,8 +1348,9 @@ void intel_ddi_init(struct drm_device *dev, enum port port)
 	intel_encoder->get_config = intel_ddi_get_config;
 
 	intel_dig_port->port = port;
-	intel_dig_port->port_reversal = I915_READ(DDI_BUF_CTL(port)) &
-					DDI_BUF_PORT_REVERSAL;
+	intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) &
+					  (DDI_BUF_PORT_REVERSAL |
+					   DDI_A_4_LANES);
 	intel_dig_port->dp.output_reg = DDI_BUF_CTL(port);
 
 	intel_encoder->type = INTEL_OUTPUT_UNKNOWN;
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index c8c9b6f48230..b7d6e09456ce 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -504,7 +504,7 @@ struct intel_dp {
 struct intel_digital_port {
 	struct intel_encoder base;
 	enum port port;
-	u32 port_reversal;
+	u32 saved_port_bits;
 	struct intel_dp dp;
 	struct intel_hdmi hdmi;
 };

From f5786b8e934e77f76f689c515baa582ff5a196ec Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Fri, 21 Jun 2013 15:36:11 +0200
Subject: [PATCH 081/320] ARM i.MX53: Fix UART pad configuration

The current default pad configuration for UART RX and TX pads sets a 360k
pull-down and writes 1 to a reserved bit (1 << 0). It doesn't seem right to
me that in idle state, the UART has to keep the signal high against a
pull-down resistor.

This patch instead sets a 100k pull-up, which incidentally corresponds to the
register reset value for all but one (MX53_PAD_KEY_ROW0__UART4_RXD_MUX) pad,
and removes the write to the reserved bit.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
---
 arch/arm/boot/dts/imx53.dtsi | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/arch/arm/boot/dts/imx53.dtsi b/arch/arm/boot/dts/imx53.dtsi
index 3895fbba8fce..569aa9f2c4ed 100644
--- a/arch/arm/boot/dts/imx53.dtsi
+++ b/arch/arm/boot/dts/imx53.dtsi
@@ -725,15 +725,15 @@ MX53_PAD_GPIO_1__PWM2_PWMO	0x80000000
 				uart1 {
 					pinctrl_uart1_1: uart1grp-1 {
 						fsl,pins = <
-							MX53_PAD_CSI0_DAT10__UART1_TXD_MUX 0x1c5
-							MX53_PAD_CSI0_DAT11__UART1_RXD_MUX 0x1c5
+							MX53_PAD_CSI0_DAT10__UART1_TXD_MUX 0x1e4
+							MX53_PAD_CSI0_DAT11__UART1_RXD_MUX 0x1e4
 						>;
 					};
 
 					pinctrl_uart1_2: uart1grp-2 {
 						fsl,pins = <
-							MX53_PAD_PATA_DIOW__UART1_TXD_MUX  0x1c5
-							MX53_PAD_PATA_DMACK__UART1_RXD_MUX 0x1c5
+							MX53_PAD_PATA_DIOW__UART1_TXD_MUX  0x1e4
+							MX53_PAD_PATA_DMACK__UART1_RXD_MUX 0x1e4
 						>;
 					};
 
@@ -748,8 +748,8 @@ MX53_PAD_PATA_IORDY__UART1_RTS	 0x1c5
 				uart2 {
 					pinctrl_uart2_1: uart2grp-1 {
 						fsl,pins = <
-							MX53_PAD_PATA_BUFFER_EN__UART2_RXD_MUX 0x1c5
-							MX53_PAD_PATA_DMARQ__UART2_TXD_MUX     0x1c5
+							MX53_PAD_PATA_BUFFER_EN__UART2_RXD_MUX 0x1e4
+							MX53_PAD_PATA_DMARQ__UART2_TXD_MUX     0x1e4
 						>;
 					};
 
@@ -766,17 +766,17 @@ MX53_PAD_PATA_INTRQ__UART2_CTS		0x1c5
 				uart3 {
 					pinctrl_uart3_1: uart3grp-1 {
 						fsl,pins = <
-							MX53_PAD_PATA_CS_0__UART3_TXD_MUX 0x1c5
-							MX53_PAD_PATA_CS_1__UART3_RXD_MUX 0x1c5
-							MX53_PAD_PATA_DA_1__UART3_CTS	  0x1c5
-							MX53_PAD_PATA_DA_2__UART3_RTS	  0x1c5
+							MX53_PAD_PATA_CS_0__UART3_TXD_MUX 0x1e4
+							MX53_PAD_PATA_CS_1__UART3_RXD_MUX 0x1e4
+							MX53_PAD_PATA_DA_1__UART3_CTS	  0x1e4
+							MX53_PAD_PATA_DA_2__UART3_RTS	  0x1e4
 						>;
 					};
 
 					pinctrl_uart3_2: uart3grp-2 {
 						fsl,pins = <
-							MX53_PAD_PATA_CS_0__UART3_TXD_MUX 0x1c5
-							MX53_PAD_PATA_CS_1__UART3_RXD_MUX 0x1c5
+							MX53_PAD_PATA_CS_0__UART3_TXD_MUX 0x1e4
+							MX53_PAD_PATA_CS_1__UART3_RXD_MUX 0x1e4
 						>;
 					};
 
@@ -785,8 +785,8 @@ MX53_PAD_PATA_CS_1__UART3_RXD_MUX 0x1c5
 				uart4 {
 					pinctrl_uart4_1: uart4grp-1 {
 						fsl,pins = <
-							MX53_PAD_KEY_COL0__UART4_TXD_MUX 0x1c5
-							MX53_PAD_KEY_ROW0__UART4_RXD_MUX 0x1c5
+							MX53_PAD_KEY_COL0__UART4_TXD_MUX 0x1e4
+							MX53_PAD_KEY_ROW0__UART4_RXD_MUX 0x1e4
 						>;
 					};
 				};
@@ -794,8 +794,8 @@ MX53_PAD_KEY_ROW0__UART4_RXD_MUX 0x1c5
 				uart5 {
 					pinctrl_uart5_1: uart5grp-1 {
 						fsl,pins = <
-							MX53_PAD_KEY_COL1__UART5_TXD_MUX 0x1c5
-							MX53_PAD_KEY_ROW1__UART5_RXD_MUX 0x1c5
+							MX53_PAD_KEY_COL1__UART5_TXD_MUX 0x1e4
+							MX53_PAD_KEY_ROW1__UART5_RXD_MUX 0x1e4
 						>;
 					};
 				};

From bf81e2f1006d10694bcb62c1a001f3a786b902ac Mon Sep 17 00:00:00 2001
From: Markus Pargmann <mpa@pengutronix.de>
Date: Sun, 23 Jun 2013 10:51:10 +0200
Subject: [PATCH 082/320] ARM: imx27: Fix documentation for SPLL clock

spll_gate was added with commit b7eed2076183994dbda2c19bc7fba99b65a135e3
"ARM: imx27: add a clock gate to activate SPLL clock".

spll_gate is missing in the devicetree clock documentation for imx27. This
patch adds it to the list of clocks in the documentation.

Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
---
 Documentation/devicetree/bindings/clock/imx27-clock.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/clock/imx27-clock.txt b/Documentation/devicetree/bindings/clock/imx27-clock.txt
index ab1a56e9de9d..7a2070393732 100644
--- a/Documentation/devicetree/bindings/clock/imx27-clock.txt
+++ b/Documentation/devicetree/bindings/clock/imx27-clock.txt
@@ -98,6 +98,7 @@ clocks and IDs.
 	fpm                  83
 	mpll_osc_sel         84
 	mpll_sel             85
+	spll_gate	     86
 
 Examples:
 

From 0d5ca6d973b54f76eaccf86116dd0c66b073afc5 Mon Sep 17 00:00:00 2001
From: Alexander Shiyan <shc_work@mail.ru>
Date: Sun, 23 Jun 2013 17:40:35 +0400
Subject: [PATCH 083/320] ARM: i.MX27: Typo fix

Signed-off-by: Alexander Shiyan <shc_work@mail.ru>
Acked-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
---
 arch/arm/mach-imx/mx27.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/mach-imx/mx27.h b/arch/arm/mach-imx/mx27.h
index e074616d54ca..8a65f192e7f3 100644
--- a/arch/arm/mach-imx/mx27.h
+++ b/arch/arm/mach-imx/mx27.h
@@ -135,7 +135,7 @@
 #define MX27_INT_GPT4		(NR_IRQS_LEGACY + 4)
 #define MX27_INT_RTIC		(NR_IRQS_LEGACY + 5)
 #define MX27_INT_CSPI3		(NR_IRQS_LEGACY + 6)
-#define MX27_INT_SDHC		(NR_IRQS_LEGACY + 7)
+#define MX27_INT_MSHC		(NR_IRQS_LEGACY + 7)
 #define MX27_INT_GPIO		(NR_IRQS_LEGACY + 8)
 #define MX27_INT_SDHC3		(NR_IRQS_LEGACY + 9)
 #define MX27_INT_SDHC2		(NR_IRQS_LEGACY + 10)

From ceac9b9214df539ca814a784c2af94f554bc78d4 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Wed, 26 Jun 2013 15:08:48 +0200
Subject: [PATCH 084/320] ARM i.MX6Q: Fix IOMUXC GPR1 defines for ENET_CLK_SEL
 and IPU1/2_MUX

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
---
 include/linux/mfd/syscon/imx6q-iomuxc-gpr.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
index dab34a1deb2c..b1521e82fecf 100644
--- a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
+++ b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
@@ -103,15 +103,15 @@
 #define IMX6Q_GPR1_EXC_MON_MASK			BIT(22)
 #define IMX6Q_GPR1_EXC_MON_OKAY			0x0
 #define IMX6Q_GPR1_EXC_MON_SLVE			BIT(22)
-#define IMX6Q_GPR1_MIPI_IPU2_SEL_MASK		BIT(21)
-#define IMX6Q_GPR1_MIPI_IPU2_SEL_GASKET		0x0
-#define IMX6Q_GPR1_MIPI_IPU2_SEL_IOMUX		BIT(21)
-#define IMX6Q_GPR1_MIPI_IPU1_MUX_MASK		BIT(20)
-#define IMX6Q_GPR1_MIPI_IPU1_MUX_GASKET		0x0
-#define IMX6Q_GPR1_MIPI_IPU1_MUX_IOMUX		BIT(20)
-#define IMX6Q_GPR1_MIPI_IPU2_MUX_MASK		BIT(19)
+#define IMX6Q_GPR1_ENET_CLK_SEL_MASK		BIT(21)
+#define IMX6Q_GPR1_ENET_CLK_SEL_PAD		0
+#define IMX6Q_GPR1_ENET_CLK_SEL_ANATOP		BIT(21)
+#define IMX6Q_GPR1_MIPI_IPU2_MUX_MASK		BIT(20)
 #define IMX6Q_GPR1_MIPI_IPU2_MUX_GASKET		0x0
-#define IMX6Q_GPR1_MIPI_IPU2_MUX_IOMUX		BIT(19)
+#define IMX6Q_GPR1_MIPI_IPU2_MUX_IOMUX		BIT(20)
+#define IMX6Q_GPR1_MIPI_IPU1_MUX_MASK		BIT(19)
+#define IMX6Q_GPR1_MIPI_IPU1_MUX_GASKET		0x0
+#define IMX6Q_GPR1_MIPI_IPU1_MUX_IOMUX		BIT(19)
 #define IMX6Q_GPR1_PCIE_TEST_PD			BIT(18)
 #define IMX6Q_GPR1_IPU_VPU_MUX_MASK		BIT(17)
 #define IMX6Q_GPR1_IPU_VPU_MUX_IPU1		0x0

From 3b79cd15bfc5f1ddb5e387310fa3dbb09b81b552 Mon Sep 17 00:00:00 2001
From: Liu Ying <Ying.Liu@freescale.com>
Date: Wed, 3 Jul 2013 15:29:06 +0800
Subject: [PATCH 085/320] ARM: i.MX6Q: correct emi_sel clock muxing

The correct muxing for emi_sel clock should be
2b'00 - 396M PFD
2b'01 - PLL3
2b'10 - AXI clk root
2b'11 - 352M PFD

This patch corrects the muxing in the clock driver.

Signed-off-by: Liu Ying <Ying.Liu@freescale.com>
Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Acked-by: Dirk Behme <dirk.behme@de.bosch.com>
---
 arch/arm/mach-imx/clk-imx6q.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mach-imx/clk-imx6q.c b/arch/arm/mach-imx/clk-imx6q.c
index 4282e99f5ca1..86567d980b07 100644
--- a/arch/arm/mach-imx/clk-imx6q.c
+++ b/arch/arm/mach-imx/clk-imx6q.c
@@ -199,7 +199,8 @@ static const char *pcie_axi_sels[]	= { "axi", "ahb", };
 static const char *ssi_sels[]		= { "pll3_pfd2_508m", "pll3_pfd3_454m", "pll4_post_div", };
 static const char *usdhc_sels[]	= { "pll2_pfd2_396m", "pll2_pfd0_352m", };
 static const char *enfc_sels[]	= { "pll2_pfd0_352m", "pll2_bus", "pll3_usb_otg", "pll2_pfd2_396m", };
-static const char *emi_sels[]		= { "axi", "pll3_usb_otg", "pll2_pfd2_396m", "pll2_pfd0_352m", };
+static const char *emi_sels[]		= { "pll2_pfd2_396m", "pll3_usb_otg", "axi", "pll2_pfd0_352m", };
+static const char *emi_slow_sels[]      = { "axi", "pll3_usb_otg", "pll2_pfd2_396m", "pll2_pfd0_352m", };
 static const char *vdo_axi_sels[]	= { "axi", "ahb", };
 static const char *vpu_axi_sels[]	= { "axi", "pll2_pfd2_396m", "pll2_pfd0_352m", };
 static const char *cko1_sels[]	= { "pll3_usb_otg", "pll2_bus", "pll1_sys", "pll5_video_div",
@@ -392,7 +393,7 @@ static void __init imx6q_clocks_init(struct device_node *ccm_node)
 	clk[usdhc4_sel]       = imx_clk_mux("usdhc4_sel",       base + 0x1c, 19, 1, usdhc_sels,        ARRAY_SIZE(usdhc_sels));
 	clk[enfc_sel]         = imx_clk_mux("enfc_sel",         base + 0x2c, 16, 2, enfc_sels,         ARRAY_SIZE(enfc_sels));
 	clk[emi_sel]          = imx_clk_mux("emi_sel",          base + 0x1c, 27, 2, emi_sels,          ARRAY_SIZE(emi_sels));
-	clk[emi_slow_sel]     = imx_clk_mux("emi_slow_sel",     base + 0x1c, 29, 2, emi_sels,          ARRAY_SIZE(emi_sels));
+	clk[emi_slow_sel]     = imx_clk_mux("emi_slow_sel",     base + 0x1c, 29, 2, emi_slow_sels,     ARRAY_SIZE(emi_slow_sels));
 	clk[vdo_axi_sel]      = imx_clk_mux("vdo_axi_sel",      base + 0x18, 11, 1, vdo_axi_sels,      ARRAY_SIZE(vdo_axi_sels));
 	clk[vpu_axi_sel]      = imx_clk_mux("vpu_axi_sel",      base + 0x18, 14, 2, vpu_axi_sels,      ARRAY_SIZE(vpu_axi_sels));
 	clk[cko1_sel]         = imx_clk_mux("cko1_sel",         base + 0x60, 0,  4, cko1_sels,         ARRAY_SIZE(cko1_sels));

From 66acaf3f897c9245f5456d884e5ae401700cbc45 Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Mon, 1 Jul 2013 15:46:05 +0800
Subject: [PATCH 086/320] ARM: mxs: saif0 is the clock provider to sgtl5000

These systems all use saif0 as the mclock provider to codec sgtl5000.
Reflect that in device tree source, so that sgtl5000 can find the clock
by calling clk_get().

Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
---
 arch/arm/boot/dts/imx28-apx4devkit.dts | 2 +-
 arch/arm/boot/dts/imx28-evk.dts        | 2 +-
 arch/arm/boot/dts/imx28-m28evk.dts     | 2 +-
 arch/arm/boot/dts/imx28.dtsi           | 1 +
 4 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/arm/boot/dts/imx28-apx4devkit.dts b/arch/arm/boot/dts/imx28-apx4devkit.dts
index 43bf3c796cba..0e7fed47bd8d 100644
--- a/arch/arm/boot/dts/imx28-apx4devkit.dts
+++ b/arch/arm/boot/dts/imx28-apx4devkit.dts
@@ -147,7 +147,7 @@ sgtl5000: codec@0a {
 					reg = <0x0a>;
 					VDDA-supply = <&reg_3p3v>;
 					VDDIO-supply = <&reg_3p3v>;
-
+					clocks = <&saif0>;
 				};
 
 				pcf8563: rtc@51 {
diff --git a/arch/arm/boot/dts/imx28-evk.dts b/arch/arm/boot/dts/imx28-evk.dts
index 1f0d38d7b16f..e035f4664b97 100644
--- a/arch/arm/boot/dts/imx28-evk.dts
+++ b/arch/arm/boot/dts/imx28-evk.dts
@@ -195,7 +195,7 @@ sgtl5000: codec@0a {
 					reg = <0x0a>;
 					VDDA-supply = <&reg_3p3v>;
 					VDDIO-supply = <&reg_3p3v>;
-
+					clocks = <&saif0>;
 				};
 
 				at24@51 {
diff --git a/arch/arm/boot/dts/imx28-m28evk.dts b/arch/arm/boot/dts/imx28-m28evk.dts
index 880df2f13be8..44d9da57736e 100644
--- a/arch/arm/boot/dts/imx28-m28evk.dts
+++ b/arch/arm/boot/dts/imx28-m28evk.dts
@@ -184,7 +184,7 @@ sgtl5000: codec@0a {
 					reg = <0x0a>;
 					VDDA-supply = <&reg_3p3v>;
 					VDDIO-supply = <&reg_3p3v>;
-
+					clocks = <&saif0>;
 				};
 
 				eeprom: eeprom@51 {
diff --git a/arch/arm/boot/dts/imx28.dtsi b/arch/arm/boot/dts/imx28.dtsi
index 6a8acb01b1d3..9524a0571281 100644
--- a/arch/arm/boot/dts/imx28.dtsi
+++ b/arch/arm/boot/dts/imx28.dtsi
@@ -837,6 +837,7 @@ saif0: saif@80042000 {
 				compatible = "fsl,imx28-saif";
 				reg = <0x80042000 0x2000>;
 				interrupts = <59 80>;
+				#clock-cells = <0>;
 				clocks = <&clks 53>;
 				dmas = <&dma_apbx 4>;
 				dma-names = "rx-tx";

From 4f71612ee3a1b2d15c8246d926a40c4f7d21cc3b Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Wed, 10 Jul 2013 14:05:44 +0800
Subject: [PATCH 087/320] ARM: imx: fix vf610 enet module clock selection

The fec/enet driver calculates MDC rate with the formula below.

  ref_freq / ((MII_SPEED + 1) x 2)

The ref_freq here is the fec internal module clock, which is missing
from clk-vf610 clock driver right now.  And clk-vf610 driver mistakenly
supplies RMII clock (50 MHz) as the source to fec.  This results in the
situation that fec driver gets ref_freq as 50 MHz, while physically it
runs at 66 MHz (fec module clock physically sources from ipg which runs
at 66 MHz).  That's why software expects MDC runs at 2.5 MHz, while the
measurement tells it runs at 3.3 MHz.  And this causes the PHY KSZ8041
keeps swithing between Full and Half mode as below.

  libphy: 400d0000.etherne:00 - Link is Up - 100/Full
  libphy: 400d0000.etherne:00 - Link is Up - 100/Half
  libphy: 400d0000.etherne:00 - Link is Up - 100/Full
  libphy: 400d0000.etherne:00 - Link is Up - 100/Half
  libphy: 400d0000.etherne:00 - Link is Up - 100/Full
  libphy: 400d0000.etherne:00 - Link is Up - 100/Half

Add the missing module clock for ENET0 and ENET1, and correct the clock
supplying in device tree to fix above issue.

Thanks to Alison Wang <b18965@freescale.com> for debugging the issue.

Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
---
 arch/arm/boot/dts/vf610.dtsi            | 8 ++++----
 arch/arm/mach-imx/clk-vf610.c           | 2 ++
 include/dt-bindings/clock/vf610-clock.h | 4 +++-
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/arch/arm/boot/dts/vf610.dtsi b/arch/arm/boot/dts/vf610.dtsi
index e1eb7dadda80..67d929cf9804 100644
--- a/arch/arm/boot/dts/vf610.dtsi
+++ b/arch/arm/boot/dts/vf610.dtsi
@@ -442,8 +442,8 @@ fec0: ethernet@400d0000 {
 				compatible = "fsl,mvf600-fec";
 				reg = <0x400d0000 0x1000>;
 				interrupts = <0 78 0x04>;
-				clocks = <&clks VF610_CLK_ENET>,
-					<&clks VF610_CLK_ENET>,
+				clocks = <&clks VF610_CLK_ENET0>,
+					<&clks VF610_CLK_ENET0>,
 					<&clks VF610_CLK_ENET>;
 				clock-names = "ipg", "ahb", "ptp";
 				status = "disabled";
@@ -453,8 +453,8 @@ fec1: ethernet@400d1000 {
 				compatible = "fsl,mvf600-fec";
 				reg = <0x400d1000 0x1000>;
 				interrupts = <0 79 0x04>;
-				clocks = <&clks VF610_CLK_ENET>,
-					<&clks VF610_CLK_ENET>,
+				clocks = <&clks VF610_CLK_ENET1>,
+					<&clks VF610_CLK_ENET1>,
 					<&clks VF610_CLK_ENET>;
 				clock-names = "ipg", "ahb", "ptp";
 				status = "disabled";
diff --git a/arch/arm/mach-imx/clk-vf610.c b/arch/arm/mach-imx/clk-vf610.c
index d617c0b7c809..b169a396d93b 100644
--- a/arch/arm/mach-imx/clk-vf610.c
+++ b/arch/arm/mach-imx/clk-vf610.c
@@ -183,6 +183,8 @@ static void __init vf610_clocks_init(struct device_node *ccm_node)
 	clk[VF610_CLK_ENET_TS_SEL] = imx_clk_mux("enet_ts_sel", CCM_CSCMR2, 0, 3, enet_ts_sels, 7);
 	clk[VF610_CLK_ENET] = imx_clk_gate("enet", "enet_sel", CCM_CSCDR1, 24);
 	clk[VF610_CLK_ENET_TS] = imx_clk_gate("enet_ts", "enet_ts_sel", CCM_CSCDR1, 23);
+	clk[VF610_CLK_ENET0] = imx_clk_gate2("enet0", "ipg_bus", CCM_CCGR9, CCM_CCGRx_CGn(0));
+	clk[VF610_CLK_ENET1] = imx_clk_gate2("enet1", "ipg_bus", CCM_CCGR9, CCM_CCGRx_CGn(1));
 
 	clk[VF610_CLK_PIT] = imx_clk_gate2("pit", "ipg_bus", CCM_CCGR1, CCM_CCGRx_CGn(7));
 
diff --git a/include/dt-bindings/clock/vf610-clock.h b/include/dt-bindings/clock/vf610-clock.h
index 15e997fa78f2..4aa2b48cd151 100644
--- a/include/dt-bindings/clock/vf610-clock.h
+++ b/include/dt-bindings/clock/vf610-clock.h
@@ -158,6 +158,8 @@
 #define VF610_CLK_GPU_SEL		145
 #define VF610_CLK_GPU_EN		146
 #define VF610_CLK_GPU2D			147
-#define VF610_CLK_END			148
+#define VF610_CLK_ENET0			148
+#define VF610_CLK_ENET1			149
+#define VF610_CLK_END			150
 
 #endif /* __DT_BINDINGS_CLOCK_VF610_H */

From 15968f1bd4341cfa54023348a4e0b94798211742 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Thu, 11 Jul 2013 16:37:47 +0200
Subject: [PATCH 088/320] ARM i.MX53: mba53: Fix PWM backlight DT node

The i.MX53 PWM controller uses two cells to describe the PWM specifier.
Remove the extra unused values from the backlight DT node pwms property.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
---
 arch/arm/boot/dts/imx53-mba53.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/imx53-mba53.dts b/arch/arm/boot/dts/imx53-mba53.dts
index aaa33bc99f78..a63090267941 100644
--- a/arch/arm/boot/dts/imx53-mba53.dts
+++ b/arch/arm/boot/dts/imx53-mba53.dts
@@ -27,7 +27,7 @@ reg_backlight: fixed@0 {
 
 	backlight {
 		compatible = "pwm-backlight";
-		pwms = <&pwm2 0 50000 0 0>;
+		pwms = <&pwm2 0 50000>;
 		brightness-levels = <0 24 28 32 36 40 44 48 52 56 60 64 68 72 76 80 84 88 92 96 100>;
 		default-brightness-level = <10>;
 		enable-gpios = <&gpio7 7 0>;

From ae7cc03e435fe86d95a3fbef52161dbd9f97db77 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Mon, 15 Jul 2013 00:39:41 -0700
Subject: [PATCH 089/320] ARM: multi_v7: Enabled omap4430 sdp nfsroot

By adding support for OCP2SCP, SPI and KS8851 I can also boot test
multi_v7_defconfig easily.

Note that if using an older u-boot, CONFIG_ARM_ATAG_DTB_COMPAT=y
may also be needed for the appended DTB based booting.

Reviewed-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/configs/multi_v7_defconfig | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index fe0bdc361d2c..120b08edb9a9 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -53,6 +53,7 @@ CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_OMAP_OCP2SCP=y
 CONFIG_BLK_DEV_SD=y
 CONFIG_ATA=y
 CONFIG_SATA_AHCI_PLATFORM=y
@@ -61,6 +62,7 @@ CONFIG_SATA_MV=y
 CONFIG_NETDEVICES=y
 CONFIG_SUN4I_EMAC=y
 CONFIG_NET_CALXEDA_XGMAC=y
+CONFIG_KS8851=y
 CONFIG_SMSC911X=y
 CONFIG_STMMAC_ETH=y
 CONFIG_MDIO_SUN4I=y
@@ -89,6 +91,7 @@ CONFIG_I2C_DESIGNWARE_PLATFORM=y
 CONFIG_I2C_SIRF=y
 CONFIG_I2C_TEGRA=y
 CONFIG_SPI=y
+CONFIG_SPI_OMAP24XX=y
 CONFIG_SPI_PL022=y
 CONFIG_SPI_SIRF=y
 CONFIG_SPI_TEGRA114=y

From 13782a7e2821f62efc747fd26605de4dc823ee29 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Fri, 7 Jun 2013 15:01:58 -0700
Subject: [PATCH 090/320] ARM: dts: Add missing vmmc2 regulator for twl

For some reason vmmc2 regulator is missing for twl.
Let's add it.

Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/dts/twl4030.dtsi | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/arm/boot/dts/twl4030.dtsi b/arch/arm/boot/dts/twl4030.dtsi
index b3034da00a37..ae6a17aed9ee 100644
--- a/arch/arm/boot/dts/twl4030.dtsi
+++ b/arch/arm/boot/dts/twl4030.dtsi
@@ -47,6 +47,12 @@ vmmc1: regulator-vmmc1 {
 		regulator-max-microvolt = <3150000>;
 	};
 
+	vmmc2: regulator-vmmc2 {
+		compatible = "ti,twl4030-vmmc2";
+		regulator-min-microvolt = <1850000>;
+		regulator-max-microvolt = <3150000>;
+	};
+
 	vusb1v5: regulator-vusb1v5 {
 		compatible = "ti,twl4030-vusb1v5";
 	};

From f3ed0a17f0292300b3caca32d823ecd32554a667 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Thu, 11 Jul 2013 09:50:30 -0700
Subject: [PATCH 091/320] Thermal: x86 package temp thermal crash

On systems with no package MSR support this caused crash as there
is a bug in the logic to check presence of DTHERM and PTS feature
together. Added a change so that when there is no PTS support, module
doesn't get loaded. Even if some CPU comes online with the PTS
feature disabled, and other CPUs has this support, this patch
will still prevent such MSR accesses.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Reported-by: Daniel Walker <dwalker@fifo99.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/x86_pkg_temp_thermal.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c
index 5de56f671a9d..034604c021d6 100644
--- a/drivers/thermal/x86_pkg_temp_thermal.c
+++ b/drivers/thermal/x86_pkg_temp_thermal.c
@@ -511,7 +511,7 @@ static int get_core_online(unsigned int cpu)
 
 	/* Check if there is already an instance for this package */
 	if (!phdev) {
-		if (!cpu_has(c, X86_FEATURE_DTHERM) &&
+		if (!cpu_has(c, X86_FEATURE_DTHERM) ||
 					!cpu_has(c, X86_FEATURE_PTS))
 			return -ENODEV;
 		if (pkg_temp_thermal_device_add(cpu))
@@ -562,7 +562,7 @@ static struct notifier_block pkg_temp_thermal_notifier __refdata = {
 };
 
 static const struct x86_cpu_id __initconst pkg_temp_thermal_ids[] = {
-	{ X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_DTHERM },
+	{ X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_PTS },
 	{}
 };
 MODULE_DEVICE_TABLE(x86cpu, pkg_temp_thermal_ids);

From c7c1b3112e643d290471abd0f3b27ffeec6f28dd Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Tue, 18 Jun 2013 21:09:12 +0800
Subject: [PATCH 092/320] Thermal: x86_pkg_temp: fix krealloc() misuse in in
 pkg_temp_thermal_device_add()

If krealloc() returns NULL, it doesn't free the original. So any code
of the form 'foo = krealloc(foo, ...);' is almost certainly a bug.

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/x86_pkg_temp_thermal.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c
index 034604c021d6..cf172d50b5d3 100644
--- a/drivers/thermal/x86_pkg_temp_thermal.c
+++ b/drivers/thermal/x86_pkg_temp_thermal.c
@@ -394,6 +394,7 @@ static int pkg_temp_thermal_device_add(unsigned int cpu)
 	char buffer[30];
 	int thres_count;
 	u32 eax, ebx, ecx, edx;
+	u8 *temp;
 
 	cpuid(6, &eax, &ebx, &ecx, &edx);
 	thres_count = ebx & 0x07;
@@ -417,13 +418,14 @@ static int pkg_temp_thermal_device_add(unsigned int cpu)
 	spin_lock(&pkg_work_lock);
 	if (topology_physical_package_id(cpu) > max_phy_id)
 		max_phy_id = topology_physical_package_id(cpu);
-	pkg_work_scheduled = krealloc(pkg_work_scheduled,
-				(max_phy_id+1) * sizeof(u8), GFP_ATOMIC);
-	if (!pkg_work_scheduled) {
+	temp = krealloc(pkg_work_scheduled,
+			(max_phy_id+1) * sizeof(u8), GFP_ATOMIC);
+	if (!temp) {
 		spin_unlock(&pkg_work_lock);
 		err = -ENOMEM;
 		goto err_ret_free;
 	}
+	pkg_work_scheduled = temp;
 	pkg_work_scheduled[topology_physical_package_id(cpu)] = 0;
 	spin_unlock(&pkg_work_lock);
 

From cdcedd6981194e511cc206887db661d016069d68 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Mon, 15 Jul 2013 12:36:35 +0300
Subject: [PATCH 093/320] usb: dwc3: gadget: don't prevent gadget from being
 probed if we fail

In case we fail our ->udc_start() callback, we
should be ready to accept another modprobe following
the failed one.

We had forgotten to clear dwc->gadget_driver back
to NULL and, because of that, we were preventing
gadget driver modprobe from being retried.

Cc: <stable@vger.kernel.org>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/dwc3/gadget.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index b5e5b35df49c..f77083fedc68 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -1584,6 +1584,7 @@ static int dwc3_gadget_start(struct usb_gadget *g,
 	__dwc3_gadget_ep_disable(dwc->eps[0]);
 
 err0:
+	dwc->gadget_driver = NULL;
 	spin_unlock_irqrestore(&dwc->lock, flags);
 
 	return ret;

From ae40d64b1f2db93d7b092e6425a2f716289fbd09 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 19 Jun 2013 13:27:27 +0200
Subject: [PATCH 094/320] usb: gadget: at91_udc: call at91udc_of_init only when
 needed

This avoids a build error in at91sam9261_9g10_defconfig:

drivers/usb/gadget/at91_udc.c: In function 'at91udc_probe':
drivers/usb/gadget/at91_udc.c:1685:34: warning: 'flags' may be used uninitialized in this
function [-Wmaybe-uninitialized]
  board->vbus_active_low = (flags & OF_GPIO_ACTIVE_LOW) ? 1 : 0;
                                  ^
drivers/usb/gadget/at91_udc.c:1678:21: note: 'flags' was declared here
  enum of_gpio_flags flags;
                     ^

Making the call to at91udc_of_init conditinal also reduces
the object code size without sacrificing build coverage.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Felipe Balbi <balbi@ti.com>
Cc: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/at91_udc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/at91_udc.c b/drivers/usb/gadget/at91_udc.c
index 073b938f9135..2cbab1c9ff58 100644
--- a/drivers/usb/gadget/at91_udc.c
+++ b/drivers/usb/gadget/at91_udc.c
@@ -1725,7 +1725,7 @@ static int at91udc_probe(struct platform_device *pdev)
 	/* init software state */
 	udc = &controller;
 	udc->gadget.dev.parent = dev;
-	if (pdev->dev.of_node)
+	if (IS_ENABLED(CONFIG_OF) && pdev->dev.of_node)
 		at91udc_of_init(udc, pdev->dev.of_node);
 	else
 		memcpy(&udc->board, dev->platform_data,

From 88ae742347831c47846ca7343e786abd6635752b Mon Sep 17 00:00:00 2001
From: Yuan-Hsin Chen <yhchen@faraday-tech.com>
Date: Fri, 21 Jun 2013 14:50:47 +0000
Subject: [PATCH 095/320] usb: gadget: fotg210-udc: remove __init and __exit

Remove __init and __exit from probe() and remove() and
would also fix the section mismatch issue.

Signed-off-by: Yuan-Hsin Chen <yhchen@faraday-tech.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/fotg210-udc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/gadget/fotg210-udc.c b/drivers/usb/gadget/fotg210-udc.c
index cce5535b1dc6..10cd18ddd0d4 100644
--- a/drivers/usb/gadget/fotg210-udc.c
+++ b/drivers/usb/gadget/fotg210-udc.c
@@ -1074,7 +1074,7 @@ static struct usb_gadget_ops fotg210_gadget_ops = {
 	.udc_stop		= fotg210_udc_stop,
 };
 
-static int __exit fotg210_udc_remove(struct platform_device *pdev)
+static int fotg210_udc_remove(struct platform_device *pdev)
 {
 	struct fotg210_udc *fotg210 = dev_get_drvdata(&pdev->dev);
 
@@ -1088,7 +1088,7 @@ static int __exit fotg210_udc_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static int __init fotg210_udc_probe(struct platform_device *pdev)
+static int fotg210_udc_probe(struct platform_device *pdev)
 {
 	struct resource *res, *ires;
 	struct fotg210_udc *fotg210 = NULL;

From 7628083227b6bc4a7e33d7c381d7a4e558424b6b Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <b.brezillon@overkiz.com>
Date: Tue, 25 Jun 2013 10:12:56 +0200
Subject: [PATCH 096/320] usb: gadget: at91_udc: prepare clk before calling
 enable

Replace clk_enable/disable with clk_prepare_enable/disable_unprepare to
avoid common clk framework warnings.

Signed-off-by: Boris BREZILLON <b.brezillon@overkiz.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/at91_udc.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/usb/gadget/at91_udc.c b/drivers/usb/gadget/at91_udc.c
index 2cbab1c9ff58..d9a6add0c852 100644
--- a/drivers/usb/gadget/at91_udc.c
+++ b/drivers/usb/gadget/at91_udc.c
@@ -870,8 +870,8 @@ static void clk_on(struct at91_udc *udc)
 	if (udc->clocked)
 		return;
 	udc->clocked = 1;
-	clk_enable(udc->iclk);
-	clk_enable(udc->fclk);
+	clk_prepare_enable(udc->iclk);
+	clk_prepare_enable(udc->fclk);
 }
 
 static void clk_off(struct at91_udc *udc)
@@ -880,8 +880,8 @@ static void clk_off(struct at91_udc *udc)
 		return;
 	udc->clocked = 0;
 	udc->gadget.speed = USB_SPEED_UNKNOWN;
-	clk_disable(udc->fclk);
-	clk_disable(udc->iclk);
+	clk_disable_unprepare(udc->fclk);
+	clk_disable_unprepare(udc->iclk);
 }
 
 /*
@@ -1782,12 +1782,14 @@ static int at91udc_probe(struct platform_device *pdev)
 	}
 
 	/* don't do anything until we have both gadget driver and VBUS */
-	clk_enable(udc->iclk);
+	retval = clk_prepare_enable(udc->iclk);
+	if (retval)
+		goto fail1;
 	at91_udp_write(udc, AT91_UDP_TXVC, AT91_UDP_TXVC_TXVDIS);
 	at91_udp_write(udc, AT91_UDP_IDR, 0xffffffff);
 	/* Clear all pending interrupts - UDP may be used by bootloader. */
 	at91_udp_write(udc, AT91_UDP_ICR, 0xffffffff);
-	clk_disable(udc->iclk);
+	clk_disable_unprepare(udc->iclk);
 
 	/* request UDC and maybe VBUS irqs */
 	udc->udp_irq = platform_get_irq(pdev, 0);

From 1974d494dea05ea227cb42f5e918828801e237aa Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Thu, 27 Jun 2013 01:08:11 +0800
Subject: [PATCH 097/320] usb: dwc3: fix wrong bit mask in dwc3_event_type

Per dwc3 2.50a spec, the is_devspec bit is used to distinguish the
Device Endpoint-Specific Event or Device-Specific Event (DEVT). If the
bit is 1, the event is represented Device-Specific Event, then use
[7:1] bits as Device Specific Event to marked the type. It has 7 bits,
and we can see the reserved8_31 variable name which means from 8 to 31
bits marked reserved, actually there are 24 bits not 25 bits between
that. And 1 + 7 + 24 = 32, the event size is 4 byes.

So in dwc3_event_type, the bit mask should be:
is_devspec	[0]		1  bit
type		[7:1]		7  bits
reserved8_31	[31:8]		24 bits

This patch should be backported to kernels as old as 3.2, that contain
the commit 72246da40f3719af3bfd104a2365b32537c27d83 "usb: Introduce
DesignWare USB3 DRD Driver".

Cc: <stable@vger.kernel.org>
Signed-off-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/dwc3/core.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index b69d322e3cab..27dad993b007 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -759,8 +759,8 @@ struct dwc3 {
 
 struct dwc3_event_type {
 	u32	is_devspec:1;
-	u32	type:6;
-	u32	reserved8_31:25;
+	u32	type:7;
+	u32	reserved8_31:24;
 } __packed;
 
 #define DWC3_DEPEVT_XFERCOMPLETE	0x01

From 315955d707b50c8aad20a32ec0dd4c9fe243cabe Mon Sep 17 00:00:00 2001
From: Ruchika Kharwar <ruchika@ti.com>
Date: Thu, 4 Jul 2013 00:59:34 -0500
Subject: [PATCH 098/320] usb: dwc3: fix the error returned with usb3_phy
 failure

When there is an error with the usb3_phy probe or absence, the error returned
is erroneously for usb2_phy.

Cc: <stable@vger.kernel.org>
Signed-off-by: Ruchika Kharwar <ruchika@ti.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/dwc3/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index c35d49d39b76..358375e0b291 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -450,7 +450,7 @@ static int dwc3_probe(struct platform_device *pdev)
 	}
 
 	if (IS_ERR(dwc->usb3_phy)) {
-		ret = PTR_ERR(dwc->usb2_phy);
+		ret = PTR_ERR(dwc->usb3_phy);
 
 		/*
 		 * if -ENXIO is returned, it means PHY layer wasn't

From 5257a6332e9d02ed7a94381f553b9c05437baa5d Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Thu, 4 Jul 2013 21:45:02 +0800
Subject: [PATCH 099/320] usb: gadget: mv_u3d_core: fix to pass correct device
 identity to free_irq()

free_irq() expects the same device identity that was passed to
corresponding request_irq(), otherwise the IRQ is not freed.

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/mv_u3d_core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/gadget/mv_u3d_core.c b/drivers/usb/gadget/mv_u3d_core.c
index 07fdb3eaf48a..ec6a2d290398 100644
--- a/drivers/usb/gadget/mv_u3d_core.c
+++ b/drivers/usb/gadget/mv_u3d_core.c
@@ -1776,7 +1776,7 @@ static int mv_u3d_remove(struct platform_device *dev)
 	kfree(u3d->eps);
 
 	if (u3d->irq)
-		free_irq(u3d->irq, &dev->dev);
+		free_irq(u3d->irq, u3d);
 
 	if (u3d->cap_regs)
 		iounmap(u3d->cap_regs);
@@ -1974,7 +1974,7 @@ static int mv_u3d_probe(struct platform_device *dev)
 	return 0;
 
 err_unregister:
-	free_irq(u3d->irq, &dev->dev);
+	free_irq(u3d->irq, u3d);
 err_request_irq:
 err_get_irq:
 	kfree(u3d->status_req);

From 8047806e64ea7b33fcede5b93f7276568a6119e8 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 8 Jul 2013 22:47:37 -0700
Subject: [PATCH 100/320] usb: renesas_usbhs: gadget: remove extra check on
 udc_stop

usb_gadget_ops :: udc_stop might be called with driver = NULL since
511f3c5326eabe1ece35202a404c24c0aeacc246
(usb: gadget: udc-core: fix a regression during gadget driver unbinding)

Because of that, 2nd times insmod goes fail.
This patch fixes it up.

Reported-by: Yusuke Goda <yusuke.goda.sx@renesas.com>
Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/renesas_usbhs/mod_gadget.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c
index ed4949faa70d..805940c37353 100644
--- a/drivers/usb/renesas_usbhs/mod_gadget.c
+++ b/drivers/usb/renesas_usbhs/mod_gadget.c
@@ -855,10 +855,6 @@ static int usbhsg_gadget_stop(struct usb_gadget *gadget,
 	struct usbhsg_gpriv *gpriv = usbhsg_gadget_to_gpriv(gadget);
 	struct usbhs_priv *priv = usbhsg_gpriv_to_priv(gpriv);
 
-	if (!driver		||
-	    !driver->unbind)
-		return -EINVAL;
-
 	usbhsg_try_stop(priv, USBHSG_STATUS_REGISTERD);
 	gpriv->driver = NULL;
 

From 9d140f796f76d6a11fd27f07838285096e09b63b Mon Sep 17 00:00:00 2001
From: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
Date: Tue, 9 Jul 2013 08:14:39 +0200
Subject: [PATCH 101/320] usb: gadget: Kconfig: Fix configfs-based RNDIS
 function build

USB_CONFIGFS_RNDIS depends on USB_U_RNDIS. Select it.

Acked-by: Michal Nazarewicz <mina86@mina86.com>
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig
index 62f6802f6e0f..5fce99a5d99e 100644
--- a/drivers/usb/gadget/Kconfig
+++ b/drivers/usb/gadget/Kconfig
@@ -639,6 +639,7 @@ config USB_CONFIGFS_RNDIS
 	depends on USB_CONFIGFS
 	depends on NET
 	select USB_U_ETHER
+	select USB_U_RNDIS
 	select USB_F_RNDIS
 	help
 	   Microsoft Windows XP bundles the "Remote NDIS" (RNDIS) protocol,

From 8744303eef570b4adc19a406b11418d21baa1c3c Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Thu, 11 Jul 2013 15:54:00 +0200
Subject: [PATCH 102/320] usb: dwc3: USB_DWC3 should depend on HAS_DMA

If NO_DMA=y:

drivers/built-in.o: In function `dwc3_free_one_event_buffer':
drivers/usb/dwc3/core.c:132: undefined reference to `dma_free_coherent'
drivers/built-in.o: In function `dwc3_alloc_one_event_buffer':
drivers/usb/dwc3/core.c:154: undefined reference to `dma_alloc_coherent'
drivers/built-in.o: In function `dma_set_coherent_mask':
include/linux/dma-mapping.h:93: undefined reference to `dma_supported'
drivers/built-in.o: In function `dwc3_free_trb_pool':
drivers/usb/dwc3/gadget.c:407: undefined reference to `dma_free_coherent'
drivers/built-in.o: In function `dwc3_gadget_exit':
drivers/usb/dwc3/gadget.c:2693: undefined reference to `dma_free_coherent'
drivers/built-in.o: In function `dwc3_alloc_trb_pool':
drivers/usb/dwc3/gadget.c:391: undefined reference to `dma_alloc_coherent'
drivers/built-in.o: In function `dwc3_gadget_init':
drivers/usb/dwc3/gadget.c:2598: undefined reference to `dma_alloc_coherent'
drivers/usb/dwc3/gadget.c:2667: undefined reference to `dma_free_coherent'
drivers/usb/dwc3/gadget.c:2674: undefined reference to `dma_free_coherent'
drivers/usb/dwc3/gadget.c:2678: undefined reference to `dma_free_coherent'

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/dwc3/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/dwc3/Kconfig b/drivers/usb/dwc3/Kconfig
index 757aa18027d0..2378958ea63e 100644
--- a/drivers/usb/dwc3/Kconfig
+++ b/drivers/usb/dwc3/Kconfig
@@ -1,6 +1,6 @@
 config USB_DWC3
 	tristate "DesignWare USB3 DRD Core Support"
-	depends on (USB || USB_GADGET) && GENERIC_HARDIRQS
+	depends on (USB || USB_GADGET) && GENERIC_HARDIRQS && HAS_DMA
 	select USB_XHCI_PLATFORM if USB_SUPPORT && USB_XHCI_HCD
 	help
 	  Say Y or M here if your system has a Dual Role SuperSpeed

From 4713aec180b666a78bc658490870b61f6846d8a6 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Thu, 11 Jul 2013 15:54:02 +0200
Subject: [PATCH 103/320] usb: gadget: USB_MV_UDC should depend on HAS_DMA

If NO_DMA=y:

drivers/built-in.o: In function `done':
drivers/usb/gadget/mv_udc_core.c:239: undefined reference to `dma_pool_free'
drivers/built-in.o: In function `build_dtd':
drivers/usb/gadget/mv_udc_core.c:371: undefined reference to `dma_pool_alloc'
drivers/built-in.o: In function `udc_prime_status':
drivers/usb/gadget/mv_udc_core.c:1465: undefined reference to `dma_map_single'
drivers/built-in.o: In function `mv_udc_remove':
drivers/usb/gadget/mv_udc_core.c:2087: undefined reference to `dma_pool_destroy'
drivers/usb/gadget/mv_udc_core.c:2090: undefined reference to `dma_free_coherent'
drivers/built-in.o: In function `mv_udc_probe':
drivers/usb/gadget/mv_udc_core.c:2190: undefined reference to `dma_alloc_coherent'
drivers/usb/gadget/mv_udc_core.c:2201: undefined reference to `dma_pool_create'
drivers/usb/gadget/mv_udc_core.c:2315: undefined reference to `dma_pool_destroy'
drivers/usb/gadget/mv_udc_core.c:2317: undefined reference to `dma_free_coherent'

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig
index 5fce99a5d99e..1fd0e17d3706 100644
--- a/drivers/usb/gadget/Kconfig
+++ b/drivers/usb/gadget/Kconfig
@@ -328,7 +328,7 @@ config USB_S3C_HSUDC
 
 config USB_MV_UDC
 	tristate "Marvell USB2.0 Device Controller"
-	depends on GENERIC_HARDIRQS
+	depends on GENERIC_HARDIRQS && HAS_DMA
 	help
 	  Marvell Socs (including PXA and MMP series) include a high speed
 	  USB2.0 OTG controller, which can be configured as high speed or

From bfcbd020396202e7795d78f8d87845e4e225c3b1 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Thu, 11 Jul 2013 15:54:03 +0200
Subject: [PATCH 104/320] usb: gadget: USB_FOTG210_UDC should depend on HAS_DMA

If NO_DMA=y:

drivers/built-in.o: In function `fotg210_start_dma':
drivers/usb/gadget/fotg210-udc.c:354: undefined reference to `dma_map_single'
drivers/usb/gadget/fotg210-udc.c:357: undefined reference to `dma_mapping_error'
drivers/usb/gadget/fotg210-udc.c:362: undefined reference to `dma_sync_single_for_cpu'
drivers/usb/gadget/fotg210-udc.c:376: undefined reference to `dma_unmap_single'

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig
index 1fd0e17d3706..2c4e978cef04 100644
--- a/drivers/usb/gadget/Kconfig
+++ b/drivers/usb/gadget/Kconfig
@@ -193,6 +193,7 @@ config USB_FUSB300
 	   Faraday usb device controller FUSB300 driver
 
 config USB_FOTG210_UDC
+	depends on HAS_DMA
 	tristate "Faraday FOTG210 USB Peripheral Controller"
 	help
 	   Faraday USB2.0 OTG controller which can be configured as

From 91f6b84739e29069670f54651340d2c20e4e7399 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Thu, 11 Jul 2013 15:54:04 +0200
Subject: [PATCH 105/320] usb: gadget: USB_MV_U3D should depend on HAS_DMA

If NO_DMA=y:

drivers/built-in.o: In function `mv_u3d_done':
drivers/usb/gadget/mv_u3d_core.c:206: undefined reference to `dma_pool_free'
drivers/usb/gadget/mv_u3d_core.c:209: undefined reference to `dma_unmap_single'
drivers/built-in.o: In function `mv_u3d_build_trb_one':
drivers/usb/gadget/mv_u3d_core.c:311: undefined reference to `dma_pool_alloc'
drivers/built-in.o: In function `mv_u3d_req_to_trb':
drivers/usb/gadget/mv_u3d_core.c:480: undefined reference to `dma_map_single'
drivers/built-in.o: In function `mv_u3d_remove':
drivers/usb/gadget/mv_u3d_core.c:1770: undefined reference to `dma_pool_destroy'
drivers/usb/gadget/mv_u3d_core.c:1773: undefined reference to `dma_free_coherent'
drivers/built-in.o: In function `mv_u3d_probe':
drivers/usb/gadget/mv_u3d_core.c:1880: undefined reference to `dma_alloc_coherent'
drivers/usb/gadget/mv_u3d_core.c:1890: undefined reference to `dma_pool_create'
drivers/usb/gadget/mv_u3d_core.c:1984: undefined reference to `dma_pool_destroy'
drivers/usb/gadget/mv_u3d_core.c:1986: undefined reference to `dma_free_coherent'

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig
index 2c4e978cef04..8e9368330b10 100644
--- a/drivers/usb/gadget/Kconfig
+++ b/drivers/usb/gadget/Kconfig
@@ -336,6 +336,7 @@ config USB_MV_UDC
 	  full speed USB peripheral.
 
 config USB_MV_U3D
+	depends on HAS_DMA
 	tristate "MARVELL PXA2128 USB 3.0 controller"
 	help
 	  MARVELL PXA2128 Processor series include a super speed USB3.0 device

From 908b961326b2ecfbf494c9b2f206847d925f269d Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Fri, 12 Jul 2013 11:06:21 -0400
Subject: [PATCH 106/320] usb: gadget: don't fail when DMA isn't present

When CONFIG_HAS_DMA isn't enabled, the UDC core gets build errors:

drivers/built-in.o: In function `dma_set_coherent_mask':
include/linux/dma-mapping.h:93: undefined reference to `dma_supported'
include/linux/dma-mapping.h:93: undefined reference to `dma_supported'
drivers/built-in.o: In function `usb_gadget_unmap_request':
drivers/usb/gadget/udc-core.c:91: undefined reference to `dma_unmap_sg'
drivers/usb/gadget/udc-core.c:96: undefined reference to `dma_unmap_single'
drivers/built-in.o: In function `usb_gadget_map_request':
drivers/usb/gadget/udc-core.c:62: undefined reference to `dma_map_sg'
drivers/usb/gadget/udc-core.c:71: undefined reference to `dma_map_single'
drivers/usb/gadget/udc-core.c:74: undefined reference to `dma_mapping_error'

Prevent this by protecting the DMA API routines with preprocessor tests.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
CC: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/udc-core.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/usb/gadget/udc-core.c b/drivers/usb/gadget/udc-core.c
index ffd8fa541101..c28ac9872030 100644
--- a/drivers/usb/gadget/udc-core.c
+++ b/drivers/usb/gadget/udc-core.c
@@ -50,6 +50,8 @@ static DEFINE_MUTEX(udc_lock);
 
 /* ------------------------------------------------------------------------- */
 
+#ifdef	CONFIG_HAS_DMA
+
 int usb_gadget_map_request(struct usb_gadget *gadget,
 		struct usb_request *req, int is_in)
 {
@@ -99,6 +101,8 @@ void usb_gadget_unmap_request(struct usb_gadget *gadget,
 }
 EXPORT_SYMBOL_GPL(usb_gadget_unmap_request);
 
+#endif	/* CONFIG_HAS_DMA */
+
 /* ------------------------------------------------------------------------- */
 
 void usb_gadget_set_state(struct usb_gadget *gadget,
@@ -194,9 +198,11 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
 	dev_set_name(&gadget->dev, "gadget");
 	gadget->dev.parent = parent;
 
+#ifdef	CONFIG_HAS_DMA
 	dma_set_coherent_mask(&gadget->dev, parent->coherent_dma_mask);
 	gadget->dev.dma_parms = parent->dma_parms;
 	gadget->dev.dma_mask = parent->dma_mask;
+#endif
 
 	if (release)
 		gadget->dev.release = release;

From 24e6bfd9d9d4e2f34dee59f0b477b72bdda5f800 Mon Sep 17 00:00:00 2001
From: Sachin Kamat <sachin.kamat@linaro.org>
Date: Mon, 15 Jul 2013 14:06:54 +0530
Subject: [PATCH 107/320] usb: phy: samsung: Fix an error message typo

The error message is common to both clk_get functions. Update it
accordingly.

Signed-off-by: Sachin Kamat <sachin.kamat@linaro.org>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/phy/phy-samsung-usb2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/phy/phy-samsung-usb2.c b/drivers/usb/phy/phy-samsung-usb2.c
index 1011c16ade7e..758b86d0fcb3 100644
--- a/drivers/usb/phy/phy-samsung-usb2.c
+++ b/drivers/usb/phy/phy-samsung-usb2.c
@@ -388,7 +388,7 @@ static int samsung_usb2phy_probe(struct platform_device *pdev)
 		clk = devm_clk_get(dev, "otg");
 
 	if (IS_ERR(clk)) {
-		dev_err(dev, "Failed to get otg clock\n");
+		dev_err(dev, "Failed to get usbhost/otg clock\n");
 		return PTR_ERR(clk);
 	}
 

From 690c70bab1ebab0782cf9c316b726e2dddebc411 Mon Sep 17 00:00:00 2001
From: Ruchika Kharwar <ruchika@ti.com>
Date: Thu, 4 Jul 2013 00:21:19 -0500
Subject: [PATCH 108/320] usb: phy: omap-usb3: fix dpll clock index

Correction of the omap_usb3_dpll_params array when the sys_clk_rate is
20MHz.

Signed-off-by: Nikhil Devshatwar <nikhil.nd@ti.com>
Signed-off-by: Ruchika Kharwar <ruchika@ti.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/phy/phy-omap-usb3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/phy/phy-omap-usb3.c b/drivers/usb/phy/phy-omap-usb3.c
index efe6e1464f45..a2fb30bbb971 100644
--- a/drivers/usb/phy/phy-omap-usb3.c
+++ b/drivers/usb/phy/phy-omap-usb3.c
@@ -71,9 +71,9 @@ static struct usb_dpll_params omap_usb3_dpll_params[NUM_SYS_CLKS] = {
 	{1250, 5, 4, 20, 0},		/* 12 MHz */
 	{3125, 20, 4, 20, 0},		/* 16.8 MHz */
 	{1172, 8, 4, 20, 65537},	/* 19.2 MHz */
+	{1000, 7, 4, 10, 0},            /* 20 MHz */
 	{1250, 12, 4, 20, 0},		/* 26 MHz */
 	{3125, 47, 4, 20, 92843},	/* 38.4 MHz */
-	{1000, 7, 4, 10, 0},            /* 20 MHz */
 
 };
 

From 84bb08472520882394fe16d7a3548793302563de Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@freescale.com>
Date: Sun, 9 Jun 2013 22:07:47 -0300
Subject: [PATCH 109/320] ARM: dts: imx51-babbage: Pass a real clock to the
 codec

On imx51_babbage the codec clock is activated via GPIO4_26.

Provide a real clock to the sgtl5000 codec via device tree.

Signed-off-by: Fabio Estevam <fabio.estevam@freescale.com>
Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
---
 arch/arm/boot/dts/imx51-babbage.dts | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/imx51-babbage.dts b/arch/arm/boot/dts/imx51-babbage.dts
index 6dd9486c755b..ad3471ca17c7 100644
--- a/arch/arm/boot/dts/imx51-babbage.dts
+++ b/arch/arm/boot/dts/imx51-babbage.dts
@@ -61,6 +61,16 @@ sound {
 		mux-int-port = <2>;
 		mux-ext-port = <3>;
 	};
+
+	clocks {
+		clk_26M: codec_clock {
+			compatible = "fixed-clock";
+			reg=<0>;
+			#clock-cells = <0>;
+			clock-frequency = <26000000>;
+			gpios = <&gpio4 26 1>;
+		};
+	};
 };
 
 &esdhc1 {
@@ -229,6 +239,7 @@ MX51_PAD_GPIO1_6__GPIO1_6    0x100
 				MX51_PAD_EIM_A27__GPIO2_21   0x5
 				MX51_PAD_CSPI1_SS0__GPIO4_24 0x85
 				MX51_PAD_CSPI1_SS1__GPIO4_25 0x85
+				MX51_PAD_CSPI1_RDY__GPIO4_26 0x80000000
 			>;
 		};
 	};
@@ -255,7 +266,7 @@ &i2c2 {
 	sgtl5000: codec@0a {
 		compatible = "fsl,sgtl5000";
 		reg = <0x0a>;
-		clock-frequency = <26000000>;
+		clocks = <&clk_26M>;
 		VDDA-supply = <&vdig_reg>;
 		VDDIO-supply = <&vvideo_reg>;
 	};

From 991821c86c2fb6cc4104ce679247864dbc070a83 Mon Sep 17 00:00:00 2001
From: "zhangwei(Jovi)" <jovi.zhangwei@huawei.com>
Date: Mon, 15 Jul 2013 16:32:34 +0800
Subject: [PATCH 110/320] tracing: Use correct config guard CONFIG_STACK_TRACER

We should use CONFIG_STACK_TRACER to guard readme text
of stack tracer related file, not CONFIG_STACKTRACE.

Link: http://lkml.kernel.org/r/51E3B3A2.8080609@huawei.com

Signed-off-by: zhangwei(Jovi) <jovi.zhangwei@huawei.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 0cd500bffd9b..25b91afc29e0 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3537,14 +3537,14 @@ static const char readme_msg[] =
 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static snapshot buffer\n"
 	"\t\t\t  Read the contents for more information\n"
 #endif
-#ifdef CONFIG_STACKTRACE
+#ifdef CONFIG_STACK_TRACER
 	"  stack_trace\t\t- Shows the max stack trace when active\n"
 	"  stack_max_size\t- Shows current max stack size that was traced\n"
 	"\t\t\t  Write into this file to reset the max size (trigger a new trace)\n"
 #ifdef CONFIG_DYNAMIC_FTRACE
 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace traces\n"
 #endif
-#endif /* CONFIG_STACKTRACE */
+#endif /* CONFIG_STACK_TRACER */
 ;
 
 static ssize_t

From 94e791f522595e2d5276e6d27a5b3b57f4e1cd8d Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Mon, 15 Jul 2013 15:38:52 -0700
Subject: [PATCH 111/320] Thermal: x86_pkg_temp: Limit number of pkg temp zones

Although it is unlikley that physical package id is set to some
arbitary number, it is better to prevent in anycase. Since package
temp zones use this in thermal zone type and for allocation, added
a limit.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/x86_pkg_temp_thermal.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c
index cf172d50b5d3..810143a6aa33 100644
--- a/drivers/thermal/x86_pkg_temp_thermal.c
+++ b/drivers/thermal/x86_pkg_temp_thermal.c
@@ -54,6 +54,8 @@ MODULE_PARM_DESC(notify_delay_ms,
 * is some wrong values returned by cpuid for number of thresholds.
 */
 #define MAX_NUMBER_OF_TRIPS	2
+/* Limit number of package temp zones */
+#define MAX_PKG_TEMP_ZONE_IDS	256
 
 struct phy_dev_entry {
 	struct list_head list;
@@ -401,6 +403,9 @@ static int pkg_temp_thermal_device_add(unsigned int cpu)
 	if (!thres_count)
 		return -ENODEV;
 
+	if (topology_physical_package_id(cpu) > MAX_PKG_TEMP_ZONE_IDS)
+		return -ENODEV;
+
 	thres_count = clamp_val(thres_count, 0, MAX_NUMBER_OF_TRIPS);
 
 	err = get_tj_max(cpu, &tj_max);

From e085cad6c653e20e213a662ef32fb6191ae0197d Mon Sep 17 00:00:00 2001
From: Kukjin Kim <kgene.kim@samsung.com>
Date: Wed, 26 Jun 2013 22:29:44 +0900
Subject: [PATCH 112/320] ARM: EXYNOS: skip pm support on exynos5440

EXYNOS5440 doesn't support PM and current single image for EXYNOS
will be break without this patch. Actually, SSDK5440 cannot boot
without this so this should be merged during rc.

Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
---
 arch/arm/mach-exynos/pm.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/arm/mach-exynos/pm.c b/arch/arm/mach-exynos/pm.c
index 41c20692a13f..c679db577269 100644
--- a/arch/arm/mach-exynos/pm.c
+++ b/arch/arm/mach-exynos/pm.c
@@ -217,6 +217,9 @@ static __init int exynos_pm_drvinit(void)
 	struct clk *pll_base;
 	unsigned int tmp;
 
+	if (soc_is_exynos5440())
+		return 0;
+
 	s3c_pm_init();
 
 	/* All wakeup disable */
@@ -340,6 +343,9 @@ static struct syscore_ops exynos_pm_syscore_ops = {
 
 static __init int exynos_pm_syscore_init(void)
 {
+	if (soc_is_exynos5440())
+		return 0;
+
 	register_syscore_ops(&exynos_pm_syscore_ops);
 	return 0;
 }

From ea2761763739619ca49602d891ec3ffb33ffc71b Mon Sep 17 00:00:00 2001
From: Subash Patel <subash.rp@samsung.com>
Date: Tue, 16 Jul 2013 12:42:13 +0900
Subject: [PATCH 113/320] ARM: EXYNOS: change the PHYSMEM_BITS and SECTION_SIZE

On EXYNOS5440 there is DRAM on the 36-bit address range. Hence
this patch converts the MAX_PHYSMEM_BITS macro to 36 if LPAE is
enabled for the ARM architecture.

The conventional section size on EXYNOS is 256M due to sparsemem.
Since EXYNOS5440 has memory in multiples of 1G in 32-bit and 36-bit
range, this has now been modified to 31.

Signed-off-by: Subash Patel <subash.rp@samsung.com>
Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
---
 arch/arm/mach-exynos/include/mach/memory.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/arm/mach-exynos/include/mach/memory.h b/arch/arm/mach-exynos/include/mach/memory.h
index 374ef2cf7152..2a4cdb7cb326 100644
--- a/arch/arm/mach-exynos/include/mach/memory.h
+++ b/arch/arm/mach-exynos/include/mach/memory.h
@@ -15,8 +15,13 @@
 
 #define PLAT_PHYS_OFFSET		UL(0x40000000)
 
+#ifndef CONFIG_ARM_LPAE
 /* Maximum of 256MiB in one bank */
 #define MAX_PHYSMEM_BITS	32
 #define SECTION_SIZE_BITS	28
+#else
+#define MAX_PHYSMEM_BITS	36
+#define SECTION_SIZE_BITS	31
+#endif
 
 #endif /* __ASM_ARCH_MEMORY_H */

From cfaf8ee2f91607c8955af68538ba849b125fb8d5 Mon Sep 17 00:00:00 2001
From: Thomas Abraham <thomas.ab@samsung.com>
Date: Tue, 16 Jul 2013 12:42:59 +0900
Subject: [PATCH 114/320] ARM: EXYNOS: Enable 64-bit DMA for EXYNOS5440 if LPAE
 is enabled

Allow 64-bit DMA addresses if LPAE is enabled on EXYNOS5440.

Signed-off-by: Thomas Abraham <thomas.ab@samsung.com>
Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
---
 arch/arm/mach-exynos/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig
index 855d4a7b462d..5952e68c76c4 100644
--- a/arch/arm/mach-exynos/Kconfig
+++ b/arch/arm/mach-exynos/Kconfig
@@ -92,6 +92,7 @@ config SOC_EXYNOS5440
 	bool "SAMSUNG EXYNOS5440"
 	default y
 	depends on ARCH_EXYNOS5
+	select ARCH_DMA_ADDR_T_64BIT if ARM_LPAE
 	select ARCH_HAS_OPP
 	select HAVE_ARM_ARCH_TIMER
 	select AUTO_ZRELADDR

From eac27f04a71e1f39f196f7e520d16dcefc955d77 Mon Sep 17 00:00:00 2001
From: Youquan Song <youquan.song@intel.com>
Date: Thu, 11 Jul 2013 21:15:57 -0400
Subject: [PATCH 115/320] ata: Fix DVD not dectected at some platform with
 Wellsburg PCH

There is a patch b55f84e2d527182e7c611d466cd0bb6ddce201de "ata_piix: Fix DVD
 not dectected at some Haswell platforms" to fix an issue of DVD not
recognized on Haswell Desktop platform with Lynx Point.
Recently, it is also found the same issue at some platformas with Wellsburg PCH.

So deliver a similar patch to fix it by disables 32bit PIO in IDE mode.

Signed-off-by: Youquan Song <youquan.song@intel.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: stable@vger.kernel.org
---
 drivers/ata/ata_piix.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
index b52a10c8eeb9..513ad7ed0c99 100644
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@ -330,7 +330,7 @@ static const struct pci_device_id piix_pci_tbl[] = {
 	/* SATA Controller IDE (Wellsburg) */
 	{ 0x8086, 0x8d00, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_snb },
 	/* SATA Controller IDE (Wellsburg) */
-	{ 0x8086, 0x8d08, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata },
+	{ 0x8086, 0x8d08, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata_snb },
 	/* SATA Controller IDE (Wellsburg) */
 	{ 0x8086, 0x8d60, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_snb },
 	/* SATA Controller IDE (Wellsburg) */

From 741532c4a995be11815cb72d4d7a48f442a22fea Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Tue, 18 Jun 2013 19:04:47 +0300
Subject: [PATCH 116/320] ARM: OMAP2+: Provide alias to USB PHY clock

Till the OMAP clocks are correctly defined in device tree, use
this temporary hack to provide clock alias to the USB PHY clocks.

Without this, USB Host & Ethernet will not be functional with
device tree boots on Panda and uEVM.

Signed-off-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/board-generic.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c
index e5fbfed69aa2..be5d005ebad2 100644
--- a/arch/arm/mach-omap2/board-generic.c
+++ b/arch/arm/mach-omap2/board-generic.c
@@ -15,6 +15,7 @@
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
 #include <linux/irqdomain.h>
+#include <linux/clk.h>
 
 #include <asm/mach/arch.h>
 
@@ -35,6 +36,21 @@ static struct of_device_id omap_dt_match_table[] __initdata = {
 	{ }
 };
 
+/*
+ * Create alias for USB host PHY clock.
+ * Remove this when clock phandle can be provided via DT
+ */
+static void __init legacy_init_ehci_clk(char *clkname)
+{
+	int ret;
+
+	ret = clk_add_alias("main_clk", NULL, clkname, NULL);
+	if (ret) {
+		pr_err("%s:Failed to add main_clk alias to %s :%d\n",
+						__func__, clkname, ret);
+	}
+}
+
 static void __init omap_generic_init(void)
 {
 	omap_sdrc_init(NULL, NULL);
@@ -45,10 +61,15 @@ static void __init omap_generic_init(void)
 	 * HACK: call display setup code for selected boards to enable omapdss.
 	 * This will be removed when omapdss supports DT.
 	 */
-	if (of_machine_is_compatible("ti,omap4-panda"))
+	if (of_machine_is_compatible("ti,omap4-panda")) {
 		omap4_panda_display_init_of();
+		legacy_init_ehci_clk("auxclk3_ck");
+
+	}
 	else if (of_machine_is_compatible("ti,omap4-sdp"))
 		omap_4430sdp_display_init_of();
+	else if (of_machine_is_compatible("ti,omap5-uevm"))
+		legacy_init_ehci_clk("auxclk1_ck");
 }
 
 #ifdef CONFIG_SOC_OMAP2420

From 30f099cd88c8e92206c5e4ccf814ce0362db3d22 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Wed, 26 Jun 2013 09:58:16 +0800
Subject: [PATCH 117/320] ARM: edma: remove duplicated include from edma.c

Remove duplicated include.

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 arch/arm/common/edma.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c
index a432e6c1dac1..39ad030ac0c7 100644
--- a/arch/arm/common/edma.c
+++ b/arch/arm/common/edma.c
@@ -26,7 +26,6 @@
 #include <linux/io.h>
 #include <linux/slab.h>
 #include <linux/edma.h>
-#include <linux/err.h>
 #include <linux/of_address.h>
 #include <linux/of_device.h>
 #include <linux/of_dma.h>

From 9c559708b6cda71d8fdabc3f98cb7e98edbd4f0b Mon Sep 17 00:00:00 2001
From: Sekhar Nori <nsekhar@ti.com>
Date: Fri, 12 Jul 2013 15:19:03 +0530
Subject: [PATCH 118/320] ARM: davinci: make file local variables static

Make file local variables static in mach-davinci.
This fixes sparse warnings of the form:

arch/arm/mach-davinci/dm355.c:863:27: warning: symbol 'dm355_venc_pdata' was not declared. Should it be static?

Cc: Prabhakar Lad <prabhakar.csengg@gmail.com>
Acked-by: Prabhakar Lad <prabhakar.csengg@gmail.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 arch/arm/mach-davinci/board-dm365-evm.c | 2 +-
 arch/arm/mach-davinci/dm355.c           | 2 +-
 arch/arm/mach-davinci/dm365.c           | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm/mach-davinci/board-dm365-evm.c b/arch/arm/mach-davinci/board-dm365-evm.c
index afbc439f11d4..4cdb61c54459 100644
--- a/arch/arm/mach-davinci/board-dm365-evm.c
+++ b/arch/arm/mach-davinci/board-dm365-evm.c
@@ -505,7 +505,7 @@ static struct vpbe_output dm365evm_vpbe_outputs[] = {
 /*
  * Amplifiers on the board
  */
-struct ths7303_platform_data ths7303_pdata = {
+static struct ths7303_platform_data ths7303_pdata = {
 	.ch_1 = 3,
 	.ch_2 = 3,
 	.ch_3 = 3,
diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c
index 42ef53f62c6c..86100d179694 100644
--- a/arch/arm/mach-davinci/dm355.c
+++ b/arch/arm/mach-davinci/dm355.c
@@ -860,7 +860,7 @@ static struct platform_device dm355_vpbe_display = {
 	},
 };
 
-struct venc_platform_data dm355_venc_pdata = {
+static struct venc_platform_data dm355_venc_pdata = {
 	.setup_pinmux	= dm355_vpbe_setup_pinmux,
 	.setup_clock	= dm355_venc_setup_clock,
 };
diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c
index fa7af5eda52d..dad28029ba9b 100644
--- a/arch/arm/mach-davinci/dm365.c
+++ b/arch/arm/mach-davinci/dm365.c
@@ -1349,7 +1349,7 @@ static struct platform_device dm365_vpbe_display = {
 	},
 };
 
-struct venc_platform_data dm365_venc_pdata = {
+static struct venc_platform_data dm365_venc_pdata = {
 	.setup_pinmux	= dm365_vpbe_setup_pinmux,
 	.setup_clock	= dm365_venc_setup_clock,
 };

From cf690331642010a837c026088588761f61528ddf Mon Sep 17 00:00:00 2001
From: Sekhar Nori <nsekhar@ti.com>
Date: Fri, 12 Jul 2013 19:19:59 +0530
Subject: [PATCH 119/320] ARM: davinci: defconfig: enable EDMA driver

Enable EDMA dmaengine driver by default in DaVinci
defconfig files as it is required for DMA support
on MMC/SD, SPI etc.

Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 arch/arm/configs/da8xx_omapl_defconfig | 2 ++
 arch/arm/configs/davinci_all_defconfig | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/arch/arm/configs/da8xx_omapl_defconfig b/arch/arm/configs/da8xx_omapl_defconfig
index 7c868139bdb0..1571bea48bed 100644
--- a/arch/arm/configs/da8xx_omapl_defconfig
+++ b/arch/arm/configs/da8xx_omapl_defconfig
@@ -102,6 +102,8 @@ CONFIG_SND_SOC=m
 CONFIG_SND_DAVINCI_SOC=m
 # CONFIG_HID_SUPPORT is not set
 # CONFIG_USB_SUPPORT is not set
+CONFIG_DMADEVICES=y
+CONFIG_TI_EDMA=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
 CONFIG_XFS_FS=m
diff --git a/arch/arm/configs/davinci_all_defconfig b/arch/arm/configs/davinci_all_defconfig
index c86fd75e181a..ab2f7378352c 100644
--- a/arch/arm/configs/davinci_all_defconfig
+++ b/arch/arm/configs/davinci_all_defconfig
@@ -162,6 +162,8 @@ CONFIG_LEDS_TRIGGERS=y
 CONFIG_LEDS_TRIGGER_TIMER=m
 CONFIG_LEDS_TRIGGER_HEARTBEAT=m
 CONFIG_RTC_CLASS=y
+CONFIG_DMADEVICES=y
+CONFIG_TI_EDMA=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
 CONFIG_XFS_FS=m

From 36ff66db3fb5642906e46e73ca9cf92f1c5974ff Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Thu, 27 Jun 2013 15:27:07 -0400
Subject: [PATCH 120/320] USB: move the definition of USB_MAXCHILDREN

The USB_MAXCHILDREN symbol is used in include/uapi/linux/usb/ch11.h, a
user-mode header, even though it is defined in include/linux/usb.h,
which is kernel-only.  This causes compile-time errors when user
programs try to #include linux/usb/ch11.h.

This patch fixes the problem by moving the definition of USB_MAXCHILDREN
into ch11.h.  It also gets rid of unneeded parentheses.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb.h           | 11 -----------
 include/uapi/linux/usb/ch11.h | 11 +++++++++++
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/include/linux/usb.h b/include/linux/usb.h
index a232b7ece1f6..0eec2689b955 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -367,17 +367,6 @@ struct usb_bus {
 
 /* ----------------------------------------------------------------------- */
 
-/* This is arbitrary.
- * From USB 2.0 spec Table 11-13, offset 7, a hub can
- * have up to 255 ports. The most yet reported is 10.
- *
- * Current Wireless USB host hardware (Intel i1480 for example) allows
- * up to 22 devices to connect. Upcoming hardware might raise that
- * limit. Because the arrays need to add a bit for hub status data, we
- * do 31, so plus one evens out to four bytes.
- */
-#define USB_MAXCHILDREN		(31)
-
 struct usb_tt;
 
 enum usb_device_removable {
diff --git a/include/uapi/linux/usb/ch11.h b/include/uapi/linux/usb/ch11.h
index 7692dc69ccf7..331499d597fa 100644
--- a/include/uapi/linux/usb/ch11.h
+++ b/include/uapi/linux/usb/ch11.h
@@ -11,6 +11,17 @@
 
 #include <linux/types.h>	/* __u8 etc */
 
+/* This is arbitrary.
+ * From USB 2.0 spec Table 11-13, offset 7, a hub can
+ * have up to 255 ports. The most yet reported is 10.
+ *
+ * Current Wireless USB host hardware (Intel i1480 for example) allows
+ * up to 22 devices to connect. Upcoming hardware might raise that
+ * limit. Because the arrays need to add a bit for hub status data, we
+ * use 31, so plus one evens out to four bytes.
+ */
+#define USB_MAXCHILDREN		31
+
 /*
  * Hub request types
  */

From e583d9db9960cf40e0bc8afee4946baa9d71596e Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Thu, 11 Jul 2013 14:58:04 -0400
Subject: [PATCH 121/320] USB: global suspend and remote wakeup don't mix
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The hub driver was recently changed to use "global" suspend for system
suspend transitions on non-SuperSpeed buses.  This means that we don't
suspend devices individually by setting the suspend feature on the
upstream hub port; instead devices all go into suspend automatically
when the root hub stops transmitting packets.  The idea was to save
time and to avoid certain kinds of wakeup races.

Now it turns out that many hubs are buggy; they don't relay wakeup
requests from a downstream port to their upstream port if the
downstream port's suspend feature is not set (depending on the speed
of the downstream port, whether or not the hub is enabled for remote
wakeup, and possibly other factors).

We can't have hubs dropping wakeup requests.  Therefore this patch
goes partway back to the old policy: It sets the suspend feature for a
port if the device attached to that port or any of its descendants is
enabled for wakeup.  People will still be able to benefit from the
time savings if they don't care about wakeup and leave it disabled on
all their devices.

In order to accomplish this, the patch adds a new field to the usb_hub
structure: wakeup_enabled_descendants is a count of how many devices
below a suspended hub are enabled for remote wakeup.  A corresponding
new subroutine determines the number of wakeup-enabled devices at or
below an arbitrary suspended USB device.

This should be applied to the 3.10 stable kernel.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Reported-and-tested-by: Toralf Förster <toralf.foerster@gmx.de>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c | 39 +++++++++++++++++++++++++++++++--------
 drivers/usb/core/hub.h |  3 +++
 2 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 4191db32f12c..c857471519e3 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -2848,6 +2848,15 @@ static int usb_disable_function_remotewakeup(struct usb_device *udev)
 				USB_CTRL_SET_TIMEOUT);
 }
 
+/* Count of wakeup-enabled devices at or below udev */
+static unsigned wakeup_enabled_descendants(struct usb_device *udev)
+{
+	struct usb_hub *hub = usb_hub_to_struct_hub(udev);
+
+	return udev->do_remote_wakeup +
+			(hub ? hub->wakeup_enabled_descendants : 0);
+}
+
 /*
  * usb_port_suspend - suspend a usb device's upstream port
  * @udev: device that's no longer in active use, not a root hub
@@ -2888,8 +2897,8 @@ static int usb_disable_function_remotewakeup(struct usb_device *udev)
  * Linux (2.6) currently has NO mechanisms to initiate that:  no khubd
  * timer, no SRP, no requests through sysfs.
  *
- * If Runtime PM isn't enabled or used, non-SuperSpeed devices really get
- * suspended only when their bus goes into global suspend (i.e., the root
+ * If Runtime PM isn't enabled or used, non-SuperSpeed devices may not get
+ * suspended until their bus goes into global suspend (i.e., the root
  * hub is suspended).  Nevertheless, we change @udev->state to
  * USB_STATE_SUSPENDED as this is the device's "logical" state.  The actual
  * upstream port setting is stored in @udev->port_is_suspended.
@@ -2960,15 +2969,21 @@ int usb_port_suspend(struct usb_device *udev, pm_message_t msg)
 	/* see 7.1.7.6 */
 	if (hub_is_superspeed(hub->hdev))
 		status = hub_set_port_link_state(hub, port1, USB_SS_PORT_LS_U3);
-	else if (PMSG_IS_AUTO(msg))
-		status = set_port_feature(hub->hdev, port1,
-						USB_PORT_FEAT_SUSPEND);
+
 	/*
 	 * For system suspend, we do not need to enable the suspend feature
 	 * on individual USB-2 ports.  The devices will automatically go
 	 * into suspend a few ms after the root hub stops sending packets.
 	 * The USB 2.0 spec calls this "global suspend".
+	 *
+	 * However, many USB hubs have a bug: They don't relay wakeup requests
+	 * from a downstream port if the port's suspend feature isn't on.
+	 * Therefore we will turn on the suspend feature if udev or any of its
+	 * descendants is enabled for remote wakeup.
 	 */
+	else if (PMSG_IS_AUTO(msg) || wakeup_enabled_descendants(udev) > 0)
+		status = set_port_feature(hub->hdev, port1,
+				USB_PORT_FEAT_SUSPEND);
 	else {
 		really_suspend = false;
 		status = 0;
@@ -3003,15 +3018,16 @@ int usb_port_suspend(struct usb_device *udev, pm_message_t msg)
 		if (!PMSG_IS_AUTO(msg))
 			status = 0;
 	} else {
-		/* device has up to 10 msec to fully suspend */
 		dev_dbg(&udev->dev, "usb %ssuspend, wakeup %d\n",
 				(PMSG_IS_AUTO(msg) ? "auto-" : ""),
 				udev->do_remote_wakeup);
-		usb_set_device_state(udev, USB_STATE_SUSPENDED);
 		if (really_suspend) {
 			udev->port_is_suspended = 1;
+
+			/* device has up to 10 msec to fully suspend */
 			msleep(10);
 		}
+		usb_set_device_state(udev, USB_STATE_SUSPENDED);
 	}
 
 	/*
@@ -3293,7 +3309,11 @@ static int hub_suspend(struct usb_interface *intf, pm_message_t msg)
 	unsigned		port1;
 	int			status;
 
-	/* Warn if children aren't already suspended */
+	/*
+	 * Warn if children aren't already suspended.
+	 * Also, add up the number of wakeup-enabled descendants.
+	 */
+	hub->wakeup_enabled_descendants = 0;
 	for (port1 = 1; port1 <= hdev->maxchild; port1++) {
 		struct usb_device	*udev;
 
@@ -3303,6 +3323,9 @@ static int hub_suspend(struct usb_interface *intf, pm_message_t msg)
 			if (PMSG_IS_AUTO(msg))
 				return -EBUSY;
 		}
+		if (udev)
+			hub->wakeup_enabled_descendants +=
+					wakeup_enabled_descendants(udev);
 	}
 
 	if (hdev->do_remote_wakeup && hub->quirk_check_port_auto_suspend) {
diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h
index 6508e02b3dac..4e4790dea343 100644
--- a/drivers/usb/core/hub.h
+++ b/drivers/usb/core/hub.h
@@ -59,6 +59,9 @@ struct usb_hub {
 	struct usb_tt		tt;		/* Transaction Translator */
 
 	unsigned		mA_per_port;	/* current for each child */
+#ifdef	CONFIG_PM
+	unsigned		wakeup_enabled_descendants;
+#endif
 
 	unsigned		limited_power:1;
 	unsigned		quiescing:1;

From 84672369ffb98a51d4ddf74c20a23636da3ad615 Mon Sep 17 00:00:00 2001
From: Fernando Soto <fsoto@bluecatnetworks.com>
Date: Fri, 14 Jun 2013 23:13:35 +0000
Subject: [PATCH 122/320] Drivers: hv: vmbus: incorrect device name is printed
 when child device is unregistered
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Whenever a device is unregistered in vmbus_device_unregister (drivers/hv/vmbus_drv.c), the device name in the log message may contain garbage as the memory has already been freed by the time pr_info is called. Log example:
 [ 3149.170475] hv_vmbus: child device àõsèè0_5 unregistered

By logging the message just before calling device_unregister, the correct device name is printed:
[ 3145.034652] hv_vmbus: child device vmbus_0_5 unregistered

Also changing register & unregister messages to debug to avoid unnecessarily cluttering the kernel log.

Signed-off-by: Fernando M Soto <fsoto@bluecatnetworks.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/vmbus_drv.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index a2464bf07c49..e8e071fc1d6d 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -690,7 +690,7 @@ int vmbus_device_register(struct hv_device *child_device_obj)
 	if (ret)
 		pr_err("Unable to register child device\n");
 	else
-		pr_info("child device %s registered\n",
+		pr_debug("child device %s registered\n",
 			dev_name(&child_device_obj->device));
 
 	return ret;
@@ -702,14 +702,14 @@ int vmbus_device_register(struct hv_device *child_device_obj)
  */
 void vmbus_device_unregister(struct hv_device *device_obj)
 {
+	pr_debug("child device %s unregistered\n",
+		dev_name(&device_obj->device));
+
 	/*
 	 * Kick off the process of unregistering the device.
 	 * This will call vmbus_remove() and eventually vmbus_device_release()
 	 */
 	device_unregister(&device_obj->device);
-
-	pr_info("child device %s unregistered\n",
-		dev_name(&device_obj->device));
 }
 
 

From ed07ec93e83ec471d365ce084e43ad90fd205903 Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Sun, 14 Jul 2013 22:38:11 -0700
Subject: [PATCH 123/320] Drivers: hv: balloon: Fix a bug in the hot-add code

As we hot-add 128 MB chunks of memory, we wait to ensure that the memory
is onlined before attempting to hot-add the next chunk. If the udev rule for
memory hot-add is not executed within the allowed time, we would rollback the
state and abort further hot-add. Since the hot-add has succeeded and the only
failure is that the memory is not onlined within the allowed time, we should not
be rolling back the state. Fix this bug.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Cc: Stable <stable@vger.kernel.org>
Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/hv_balloon.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index 4c605c70ebf9..61b7351df1d4 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -562,7 +562,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
 				struct hv_hotadd_state *has)
 {
 	int ret = 0;
-	int i, nid, t;
+	int i, nid;
 	unsigned long start_pfn;
 	unsigned long processed_pfn;
 	unsigned long total_pfn = pfn_count;
@@ -607,14 +607,11 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
 
 		/*
 		 * Wait for the memory block to be onlined.
+		 * Since the hot add has succeeded, it is ok to
+		 * proceed even if the pages in the hot added region
+		 * have not been "onlined" within the allowed time.
 		 */
-		t = wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ);
-		if (t == 0) {
-			pr_info("hot_add memory timedout\n");
-			has->ha_end_pfn -= HA_CHUNK;
-			has->covered_end_pfn -=  processed_pfn;
-			break;
-		}
+		wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ);
 
 	}
 

From c5e2254f8d63a6654149aa32ac5f2b7dd66a976d Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Sun, 14 Jul 2013 22:38:12 -0700
Subject: [PATCH 124/320] Drivers: hv: balloon: Do not post pressure status if
 interrupted

When we are posting pressure status, we may get interrupted and handle
the un-balloon operation. In this case just don't post the status as we
know the pressure status is stale.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Cc: Stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/hv_balloon.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index 61b7351df1d4..deb5c25305af 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -975,6 +975,14 @@ static void post_status(struct hv_dynmem_device *dm)
 				dm->num_pages_ballooned +
 				compute_balloon_floor();
 
+	/*
+	 * If our transaction ID is no longer current, just don't
+	 * send the status. This can happen if we were interrupted
+	 * after we picked our transaction ID.
+	 */
+	if (status.hdr.trans_id != atomic_read(&trans_id))
+		return;
+
 	vmbus_sendpacket(dm->dev->channel, &status,
 				sizeof(struct dm_status),
 				(unsigned long)NULL,

From ed4bb9744b41d39ba35080c2390e201575121dc7 Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Thu, 11 Jul 2013 12:03:31 -0700
Subject: [PATCH 125/320] Tools: hv: KVP: Fix a bug in IPV6 subnet enumeration

Each subnet string needs to be separated with a semicolon. Fix this bug.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Cc: Stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/hv/hv_kvp_daemon.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index ca9fa4d32e07..07819bfa7dba 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -1026,9 +1026,10 @@ kvp_get_ip_info(int family, char *if_name, int op,
 
 				if (sn_offset == 0)
 					strcpy(sn_str, cidr_mask);
-				else
+				else {
+					strcat((char *)ip_buffer->sub_net, ";");
 					strcat(sn_str, cidr_mask);
-				strcat((char *)ip_buffer->sub_net, ";");
+				}
 				sn_offset += strlen(sn_str) + 1;
 			}
 

From 7dcd2677ea912573d9ed4bcd629b0023b2d11505 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Wed, 17 Jul 2013 10:22:58 +0400
Subject: [PATCH 126/320] drm/i915: fix long-standing SNB regression in power
 consumption after resume v2

This patch fixes regression in power consumtion of sandy bridge gpu, which
exists since v3.6 Sometimes after resuming from s2ram gpu starts thinking that
it's extremely busy. After that it never reaches rc6 state.

Bug exists since kernel v3.6:

commit b4ae3f22d238617ca11610b29fde16cf8c0bc6e0
Author: Jesse Barnes <jbarnes@virtuousgeek.org>
Date:   Thu Jun 14 11:04:48 2012 -0700

    drm/i915: load boot context at driver init time

For some reason RC6 is already enabled at the beginning of resuming process.
Following initliaztion breaks some internal state and confuses RPS engine.
This patch disables RC6 at the beginnig of resume and initialization.

I've rearranged initialization sequence, because intel_disable_gt_powersave()
needs initialized force_wake_get/put and some locks from the dev_priv.

Note: The culprit in the initialization sequence seems to be the write
to MBCTL added in the above mentioned commit. The first version of
this patch just held a forcewake reference across the clock gating
init functions, which seems to have been enought to gather quite a few
positive test reports. But since that smelled a bit like ad-hoc
duct-tape v2 now just disables rps/rc6 across the entire hw setup.

References: https://bugs.freedesktop.org/show_bug.cgi?id=54089
References: https://bugzilla.kernel.org/show_bug.cgi?id=58971
References: https://patchwork.kernel.org/patch/2827634/ (patch v1)
Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
[danvet: Add note about v1 vs. v2 of this patch and use standard
layout for the commit citation. Also add the tested-bys from v1 and a
cc: stable.]
Cc: stable@vger.kernel.org (Note: tiny conflict due to the addition of
the backlight lock in 3.11)
Tested-by: Alexander Kaltsas <alexkaltsas@gmail.com> (v1)
Tested-by: rocko <rockorequin@hotmail.com> (v1)
Tested-by: JohnMB <johnmbryant@sky.com> (v1)
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_dma.c | 18 +++++++++---------
 drivers/gpu/drm/i915/intel_pm.c |  5 +++--
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index adb319b53ecd..5c0663f58aff 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1495,6 +1495,14 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	dev_priv->dev = dev;
 	dev_priv->info = info;
 
+	spin_lock_init(&dev_priv->irq_lock);
+	spin_lock_init(&dev_priv->gpu_error.lock);
+	spin_lock_init(&dev_priv->rps.lock);
+	spin_lock_init(&dev_priv->backlight.lock);
+	mutex_init(&dev_priv->dpio_lock);
+	mutex_init(&dev_priv->rps.hw_lock);
+	mutex_init(&dev_priv->modeset_restore_lock);
+
 	i915_dump_device_info(dev_priv);
 
 	if (i915_get_bridge_dev(dev)) {
@@ -1586,6 +1594,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 
 	intel_irq_init(dev);
 	intel_gt_init(dev);
+	intel_gt_reset(dev);
 
 	/* Try to make sure MCHBAR is enabled before poking at it */
 	intel_setup_mchbar(dev);
@@ -1610,15 +1619,6 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	if (!IS_I945G(dev) && !IS_I945GM(dev))
 		pci_enable_msi(dev->pdev);
 
-	spin_lock_init(&dev_priv->irq_lock);
-	spin_lock_init(&dev_priv->gpu_error.lock);
-	spin_lock_init(&dev_priv->rps.lock);
-	spin_lock_init(&dev_priv->backlight.lock);
-	mutex_init(&dev_priv->dpio_lock);
-
-	mutex_init(&dev_priv->rps.hw_lock);
-	mutex_init(&dev_priv->modeset_restore_lock);
-
 	dev_priv->num_plane = 1;
 	if (IS_VALLEYVIEW(dev))
 		dev_priv->num_plane = 2;
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index d10e6735771f..828c426b4b92 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -5487,6 +5487,9 @@ void intel_gt_reset(struct drm_device *dev)
 		if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev))
 			__gen6_gt_force_wake_mt_reset(dev_priv);
 	}
+
+	/* BIOS often leaves RC6 enabled, but disable it for hw init */
+	intel_disable_gt_powersave(dev);
 }
 
 void intel_gt_init(struct drm_device *dev)
@@ -5495,8 +5498,6 @@ void intel_gt_init(struct drm_device *dev)
 
 	spin_lock_init(&dev_priv->gt_lock);
 
-	intel_gt_reset(dev);
-
 	if (IS_VALLEYVIEW(dev)) {
 		dev_priv->gt.force_wake_get = vlv_force_wake_get;
 		dev_priv->gt.force_wake_put = vlv_force_wake_put;

From 35c95375f69ceec721fea67a0532bc17ceb5cf64 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 17 Jul 2013 06:55:04 +0200
Subject: [PATCH 127/320] drm/i915: Sanitize shared dpll state

There seems to be no limit to the amount of gunk the firmware can
leave behind. Some platforms leave pch dplls on which are not in
active use at all. The example in the bug report is a Apple Macbook
Pro.

Note that this escape scrunity of the hw state checker until we've
tried to use this enabled, but unused pll since we did only check for
the inverse case of a in-used, but disabled pll.

v2: Add a WARN in the pll state checker which would have caught this
case.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=66952
Reported-and-tested-by: shui yangwei <yangweix.shui@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_display.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index c59335ce84f4..76b01c16a0e8 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -8314,6 +8314,8 @@ check_shared_dpll_state(struct drm_device *dev)
 		     pll->active, pll->refcount);
 		WARN(pll->active && !pll->on,
 		     "pll in active use but not on in sw tracking\n");
+		WARN(pll->on && !pll->active,
+		     "pll in on but not on in use in sw tracking\n");
 		WARN(pll->on != active,
 		     "pll on state mismatch (expected %i, found %i)\n",
 		     pll->on, active);
@@ -9814,8 +9816,8 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
 		}
 		pll->refcount = pll->active;
 
-		DRM_DEBUG_KMS("%s hw state readout: refcount %i\n",
-			      pll->name, pll->refcount);
+		DRM_DEBUG_KMS("%s hw state readout: refcount %i, on %i\n",
+			      pll->name, pll->refcount, pll->on);
 	}
 
 	list_for_each_entry(encoder, &dev->mode_config.encoder_list,
@@ -9866,6 +9868,7 @@ void intel_modeset_setup_hw_state(struct drm_device *dev,
 	struct drm_plane *plane;
 	struct intel_crtc *crtc;
 	struct intel_encoder *encoder;
+	int i;
 
 	intel_modeset_readout_hw_state(dev);
 
@@ -9881,6 +9884,18 @@ void intel_modeset_setup_hw_state(struct drm_device *dev,
 		intel_dump_pipe_config(crtc, &crtc->config, "[setup_hw_state]");
 	}
 
+	for (i = 0; i < dev_priv->num_shared_dpll; i++) {
+		struct intel_shared_dpll *pll = &dev_priv->shared_dplls[i];
+
+		if (!pll->on || pll->active)
+			continue;
+
+		DRM_DEBUG_KMS("%s enabled but not in use, disabling\n", pll->name);
+
+		pll->disable(dev_priv, pll);
+		pll->on = false;
+	}
+
 	if (force_restore) {
 		/*
 		 * We need to use raw interfaces for restoring state to avoid

From 2e57f47d317dd035b18634b0c602272529368fcc Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 17 Jul 2013 12:14:40 +0100
Subject: [PATCH 128/320] drm/i915: Fix dereferencing invalid connectors in
 is_crtc_connector_off()

In commit e3de42b68478a8c95dd27520e9adead2af9477a5
Author: Imre Deak <imre.deak@intel.com>
Date:   Fri May 3 19:44:07 2013 +0200

    drm/i915: force full modeset if the connector is in DPMS OFF mode

a new function was added that walked over the set of connectors to see
if any of the currently associated CRTC was switched off. This function
walked an array of connectors, rather than the array of pointers to
connectors contained in the drm_mode_set - i.e. it was dereferencing far
past the end of the first connector. This only becomes an issue if we
attempt to use a clone mode (i.e. more than one connector per CRTC) such
that set->num_connectors > 1.

Reported-by: Timo Aaltonen <tjaalton@ubuntu.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=65927
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Imre Deak <imre.deak@intel.com>
Cc: Egbert Eich <eich@suse.de>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: stable@vger.kernel.org
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_display.c | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 76b01c16a0e8..76b034be2c7c 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -8540,15 +8540,20 @@ static void intel_set_config_restore_state(struct drm_device *dev,
 }
 
 static bool
-is_crtc_connector_off(struct drm_crtc *crtc, struct drm_connector *connectors,
-		      int num_connectors)
+is_crtc_connector_off(struct drm_mode_set *set)
 {
 	int i;
 
-	for (i = 0; i < num_connectors; i++)
-		if (connectors[i].encoder &&
-		    connectors[i].encoder->crtc == crtc &&
-		    connectors[i].dpms != DRM_MODE_DPMS_ON)
+	if (set->num_connectors == 0)
+		return false;
+
+	if (WARN_ON(set->connectors == NULL))
+		return false;
+
+	for (i = 0; i < set->num_connectors; i++)
+		if (set->connectors[i]->encoder &&
+		    set->connectors[i]->encoder->crtc == set->crtc &&
+		    set->connectors[i]->dpms != DRM_MODE_DPMS_ON)
 			return true;
 
 	return false;
@@ -8561,10 +8566,8 @@ intel_set_config_compute_mode_changes(struct drm_mode_set *set,
 
 	/* We should be able to check here if the fb has the same properties
 	 * and then just flip_or_move it */
-	if (set->connectors != NULL &&
-	    is_crtc_connector_off(set->crtc, *set->connectors,
-				  set->num_connectors)) {
-			config->mode_changed = true;
+	if (is_crtc_connector_off(set)) {
+		config->mode_changed = true;
 	} else if (set->crtc->fb != set->fb) {
 		/* If we have no fb then treat it as a full mode set */
 		if (set->crtc->fb == NULL) {

From 53ce9a3364de0723b27d861de93bfc882f7db050 Mon Sep 17 00:00:00 2001
From: Niels de Vos <ndevos@redhat.com>
Date: Wed, 17 Jul 2013 14:53:53 +0200
Subject: [PATCH 129/320] fuse: readdirplus: fix dentry leak

In case d_lookup() returns a dentry with d_inode == NULL, the dentry is not
returned with dput(). This results in triggering a BUG() in
shrink_dcache_for_umount_subtree():

  BUG: Dentry ...{i=0,n=...} still in use (1) [unmount of fuse fuse]

[SzM: need to d_drop() as well]

Reported-by: Justin Clift <jclift@redhat.com>
Signed-off-by: Niels de Vos <ndevos@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Tested-by: Brian Foster <bfoster@redhat.com>
Tested-by: Niels de Vos <ndevos@redhat.com>
CC: stable@vger.kernel.org
---
 fs/fuse/dir.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 0eda52738ec4..2ae5308174e2 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1227,9 +1227,15 @@ static int fuse_direntplus_link(struct file *file,
 
 	name.hash = full_name_hash(name.name, name.len);
 	dentry = d_lookup(parent, &name);
-	if (dentry && dentry->d_inode) {
+	if (dentry) {
 		inode = dentry->d_inode;
-		if (get_node_id(inode) == o->nodeid) {
+		if (!inode) {
+			d_drop(dentry);
+		} else if (get_node_id(inode) != o->nodeid) {
+			err = d_invalidate(dentry);
+			if (err)
+				goto out;
+		} else {
 			struct fuse_inode *fi;
 			fi = get_fuse_inode(inode);
 			spin_lock(&fc->lock);
@@ -1242,9 +1248,6 @@ static int fuse_direntplus_link(struct file *file,
 			 */
 			goto found;
 		}
-		err = d_invalidate(dentry);
-		if (err)
-			goto out;
 		dput(dentry);
 		dentry = NULL;
 	}

From a28ef45cbb1e7fadd5159deb17b02de15c6e4aaf Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Wed, 17 Jul 2013 14:53:53 +0200
Subject: [PATCH 130/320] fuse: readdirplus: sanity checks

Add sanity checks before adding or updating an entry with data received
from readdirplus.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
CC: stable@vger.kernel.org
---
 fs/fuse/dir.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 2ae5308174e2..bb6829720dd6 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1223,6 +1223,12 @@ static int fuse_direntplus_link(struct file *file,
 		if (name.name[1] == '.' && name.len == 2)
 			return 0;
 	}
+
+	if (invalid_nodeid(o->nodeid))
+		return -EIO;
+	if (!fuse_valid_type(o->attr.mode))
+		return -EIO;
+
 	fc = get_fuse_conn(dir);
 
 	name.hash = full_name_hash(name.name, name.len);
@@ -1231,10 +1237,14 @@ static int fuse_direntplus_link(struct file *file,
 		inode = dentry->d_inode;
 		if (!inode) {
 			d_drop(dentry);
-		} else if (get_node_id(inode) != o->nodeid) {
+		} else if (get_node_id(inode) != o->nodeid ||
+			   ((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
 			err = d_invalidate(dentry);
 			if (err)
 				goto out;
+		} else if (is_bad_inode(inode)) {
+			err = -EIO;
+			goto out;
 		} else {
 			struct fuse_inode *fi;
 			fi = get_fuse_inode(inode);

From 2914941e3178d84a216fc4eb85292dfef3b6d628 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Wed, 17 Jul 2013 14:53:53 +0200
Subject: [PATCH 131/320] fuse: readdirplus: fix instantiate

Fuse does instantiation slightly differently from NFS/CIFS which use
d_materialise_unique().

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
CC: stable@vger.kernel.org
---
 fs/fuse/dir.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index bb6829720dd6..5dfbb5439e4e 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1272,10 +1272,19 @@ static int fuse_direntplus_link(struct file *file,
 	if (!inode)
 		goto out;
 
-	alias = d_materialise_unique(dentry, inode);
-	err = PTR_ERR(alias);
-	if (IS_ERR(alias))
-		goto out;
+	if (S_ISDIR(inode->i_mode)) {
+		mutex_lock(&fc->inst_mutex);
+		alias = fuse_d_add_directory(dentry, inode);
+		mutex_unlock(&fc->inst_mutex);
+		err = PTR_ERR(alias);
+		if (IS_ERR(alias)) {
+			iput(inode);
+			goto out;
+		}
+	} else {
+		alias = d_splice_alias(inode, dentry);
+	}
+
 	if (alias) {
 		dput(dentry);
 		dentry = alias;

From fa2b7213600f8110ebac64acebc78a885b0594a0 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Wed, 17 Jul 2013 14:53:53 +0200
Subject: [PATCH 132/320] fuse: readdirplus: change attributes once

If we got the inode through fuse_iget() then the attributes are already
up-to-date.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/dir.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 5dfbb5439e4e..37d85e05b1f5 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1252,6 +1252,10 @@ static int fuse_direntplus_link(struct file *file,
 			fi->nlookup++;
 			spin_unlock(&fc->lock);
 
+			fuse_change_attributes(inode, &o->attr,
+					       entry_attr_timeout(o),
+					       attr_version);
+
 			/*
 			 * The other branch to 'found' comes via fuse_iget()
 			 * which bumps nlookup inside
@@ -1291,9 +1295,6 @@ static int fuse_direntplus_link(struct file *file,
 	}
 
 found:
-	fuse_change_attributes(inode, &o->attr, entry_attr_timeout(o),
-			       attr_version);
-
 	fuse_change_entry_timeout(dentry, o);
 
 	err = 0;

From c7263bcdc4d150e3c718b711a5f6fad496d9f662 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Wed, 17 Jul 2013 14:53:54 +0200
Subject: [PATCH 133/320] fuse: readdirplus: cleanup

Niels noted that we don't need the 'dentry = NULL' line.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
CC: Niels de Vos <ndevos@redhat.com>
---
 fs/fuse/dir.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 37d85e05b1f5..72a5d5b04494 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1263,7 +1263,6 @@ static int fuse_direntplus_link(struct file *file,
 			goto found;
 		}
 		dput(dentry);
-		dentry = NULL;
 	}
 
 	dentry = d_alloc(parent, &name);
@@ -1299,8 +1298,7 @@ static int fuse_direntplus_link(struct file *file,
 
 	err = 0;
 out:
-	if (dentry)
-		dput(dentry);
+	dput(dentry);
 	return err;
 }
 

From 94a335dba34ff47cad3d6d0c29b452d43a1be3c8 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 17 Jul 2013 14:51:28 +0200
Subject: [PATCH 134/320] drm/i915: correctly restore fences with objects
 attached
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To avoid stalls we delay tiling changes and especially hold of
committing the new fence state for as long as possible.
Synchronization points are in the execbuf code and in our gtt fault
handler.

Unfortunately we've missed that tricky detail when adding proper fence
restore code in

commit 19b2dbde5732170a03bd82cc8bd442cf88d856f7
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Wed Jun 12 10:15:12 2013 +0100

    drm/i915: Restore fences after resume and GPU resets

The result was that we've restored fences for objects with no tiling,
since the object<->fence link still existed after resume. Now that
wouldn't have been too bad since any subsequent access would have
fixed things up, but if we've changed from tiled to untiled real havoc
happened:

The tiling stride is stored -1 in the fence register, so a stride of 0
resulted in all 1s in the top 32bits, and so a completely bogus fence
spanning everything from the start of the object to the top of the
GTT. The tell-tale in the register dumps looks like:

                 FENCE START 2: 0x0214d001
                 FENCE END 2: 0xfffff3ff

Bit 11 isn't set since the hw doesn't store it, even when writing all
1s (at least on my snb here).

To prevent such a gaffle in the future add a sanity check for fences
with an untiled object attached in i915_gem_write_fence.

v2: Fix the WARN, spotted by Chris.

v3: Trying to reuse get_fences looked ugly and obfuscated the code.
Instead reuse update_fence and to make it really dtrt also move the
fence dirty state clearing into update_fence.

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Stéphane Marchesin <marcheu@chromium.org>
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=60530
Cc: stable@vger.kernel.org (for 3.10 only)
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Tested-by: Matthew Garrett <matthew.garrett@nebula.com>
Tested-by: Björn Bidar <theodorstormgrade@gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 97afd2639fb6..d9e2208cfe98 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2258,7 +2258,17 @@ void i915_gem_restore_fences(struct drm_device *dev)
 
 	for (i = 0; i < dev_priv->num_fence_regs; i++) {
 		struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
-		i915_gem_write_fence(dev, i, reg->obj);
+
+		/*
+		 * Commit delayed tiling changes if we have an object still
+		 * attached to the fence, otherwise just clear the fence.
+		 */
+		if (reg->obj) {
+			i915_gem_object_update_fence(reg->obj, reg,
+						     reg->obj->tiling_mode);
+		} else {
+			i915_gem_write_fence(dev, i, NULL);
+		}
 	}
 }
 
@@ -2795,6 +2805,10 @@ static void i915_gem_write_fence(struct drm_device *dev, int reg,
 	if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
 		mb();
 
+	WARN(obj && (!obj->stride || !obj->tiling_mode),
+	     "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
+	     obj->stride, obj->tiling_mode);
+
 	switch (INTEL_INFO(dev)->gen) {
 	case 7:
 	case 6:
@@ -2836,6 +2850,7 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
 		fence->obj = NULL;
 		list_del_init(&fence->lru_list);
 	}
+	obj->fence_dirty = false;
 }
 
 static int
@@ -2965,7 +2980,6 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
 		return 0;
 
 	i915_gem_object_update_fence(obj, reg, enable);
-	obj->fence_dirty = false;
 
 	return 0;
 }

From 146c3442f2dd0f50d9431aea5d0d10dfd97c9999 Mon Sep 17 00:00:00 2001
From: "zhangwei(Jovi)" <jovi.zhangwei@huawei.com>
Date: Mon, 15 Jul 2013 16:32:44 +0800
Subject: [PATCH 135/320] tracing: Use trace_seq_puts()/trace_seq_putc() where
 possible

For string without format specifiers, use trace_seq_puts()
or trace_seq_putc().

Link: http://lkml.kernel.org/r/51E3B3AC.1000605@huawei.com

Signed-off-by: zhangwei(Jovi) <jovi.zhangwei@huawei.com>
[ fixed a trace_seq_putc(s, " ") to trace_seq_putc(s, ' ') ]
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/ring_buffer.c           | 10 +++---
 kernel/trace/trace_events_filter.c   |  4 +--
 kernel/trace/trace_functions_graph.c | 52 ++++++++++++++--------------
 kernel/trace/trace_mmiotrace.c       |  8 ++---
 kernel/trace/trace_output.c          | 14 ++++----
 kernel/trace/trace_syscalls.c        |  2 +-
 6 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index e444ff88f0a4..eef2e566b2e7 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -36,11 +36,11 @@ int ring_buffer_print_entry_header(struct trace_seq *s)
 {
 	int ret;
 
-	ret = trace_seq_printf(s, "# compressed entry header\n");
-	ret = trace_seq_printf(s, "\ttype_len    :    5 bits\n");
-	ret = trace_seq_printf(s, "\ttime_delta  :   27 bits\n");
-	ret = trace_seq_printf(s, "\tarray       :   32 bits\n");
-	ret = trace_seq_printf(s, "\n");
+	ret = trace_seq_puts(s, "# compressed entry header\n");
+	ret = trace_seq_puts(s, "\ttype_len    :    5 bits\n");
+	ret = trace_seq_puts(s, "\ttime_delta  :   27 bits\n");
+	ret = trace_seq_puts(s, "\tarray       :   32 bits\n");
+	ret = trace_seq_putc(s, '\n');
 	ret = trace_seq_printf(s, "\tpadding     : type == %d\n",
 			       RINGBUF_TYPE_PADDING);
 	ret = trace_seq_printf(s, "\ttime_extend : type == %d\n",
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 0d883dc057d6..0c7b75a8acc8 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -646,7 +646,7 @@ void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
 	if (filter && filter->filter_string)
 		trace_seq_printf(s, "%s\n", filter->filter_string);
 	else
-		trace_seq_printf(s, "none\n");
+		trace_seq_puts(s, "none\n");
 	mutex_unlock(&event_mutex);
 }
 
@@ -660,7 +660,7 @@ void print_subsystem_event_filter(struct event_subsystem *system,
 	if (filter && filter->filter_string)
 		trace_seq_printf(s, "%s\n", filter->filter_string);
 	else
-		trace_seq_printf(s, DEFAULT_SYS_FILTER_MESSAGE "\n");
+		trace_seq_puts(s, DEFAULT_SYS_FILTER_MESSAGE "\n");
 	mutex_unlock(&event_mutex);
 }
 
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 8388bc99f2ee..d56ae9bae00b 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -446,7 +446,7 @@ print_graph_proc(struct trace_seq *s, pid_t pid)
 
 	/* First spaces to align center */
 	for (i = 0; i < spaces / 2; i++) {
-		ret = trace_seq_printf(s, " ");
+		ret = trace_seq_putc(s, ' ');
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
 	}
@@ -457,7 +457,7 @@ print_graph_proc(struct trace_seq *s, pid_t pid)
 
 	/* Last spaces to align center */
 	for (i = 0; i < spaces - (spaces / 2); i++) {
-		ret = trace_seq_printf(s, " ");
+		ret = trace_seq_putc(s, ' ');
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
 	}
@@ -503,7 +503,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
  ------------------------------------------
 
  */
-	ret = trace_seq_printf(s,
+	ret = trace_seq_puts(s,
 		" ------------------------------------------\n");
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
@@ -516,7 +516,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
 	if (ret == TRACE_TYPE_PARTIAL_LINE)
 		return TRACE_TYPE_PARTIAL_LINE;
 
-	ret = trace_seq_printf(s, " => ");
+	ret = trace_seq_puts(s, " => ");
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 
@@ -524,7 +524,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
 	if (ret == TRACE_TYPE_PARTIAL_LINE)
 		return TRACE_TYPE_PARTIAL_LINE;
 
-	ret = trace_seq_printf(s,
+	ret = trace_seq_puts(s,
 		"\n ------------------------------------------\n\n");
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
@@ -645,7 +645,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
 			ret = print_graph_proc(s, pid);
 			if (ret == TRACE_TYPE_PARTIAL_LINE)
 				return TRACE_TYPE_PARTIAL_LINE;
-			ret = trace_seq_printf(s, " | ");
+			ret = trace_seq_puts(s, " | ");
 			if (!ret)
 				return TRACE_TYPE_PARTIAL_LINE;
 		}
@@ -657,9 +657,9 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
 		return ret;
 
 	if (type == TRACE_GRAPH_ENT)
-		ret = trace_seq_printf(s, "==========>");
+		ret = trace_seq_puts(s, "==========>");
 	else
-		ret = trace_seq_printf(s, "<==========");
+		ret = trace_seq_puts(s, "<==========");
 
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
@@ -668,7 +668,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
 	if (ret != TRACE_TYPE_HANDLED)
 		return ret;
 
-	ret = trace_seq_printf(s, "\n");
+	ret = trace_seq_putc(s, '\n');
 
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
@@ -705,13 +705,13 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
 		len += strlen(nsecs_str);
 	}
 
-	ret = trace_seq_printf(s, " us ");
+	ret = trace_seq_puts(s, " us ");
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 
 	/* Print remaining spaces to fit the row's width */
 	for (i = len; i < 7; i++) {
-		ret = trace_seq_printf(s, " ");
+		ret = trace_seq_putc(s, ' ');
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
 	}
@@ -731,13 +731,13 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s,
 	/* No real adata, just filling the column with spaces */
 	switch (duration) {
 	case DURATION_FILL_FULL:
-		ret = trace_seq_printf(s, "              |  ");
+		ret = trace_seq_puts(s, "              |  ");
 		return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
 	case DURATION_FILL_START:
-		ret = trace_seq_printf(s, "  ");
+		ret = trace_seq_puts(s, "  ");
 		return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
 	case DURATION_FILL_END:
-		ret = trace_seq_printf(s, " |");
+		ret = trace_seq_puts(s, " |");
 		return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
 	}
 
@@ -745,10 +745,10 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s,
 	if (flags & TRACE_GRAPH_PRINT_OVERHEAD) {
 		/* Duration exceeded 100 msecs */
 		if (duration > 100000ULL)
-			ret = trace_seq_printf(s, "! ");
+			ret = trace_seq_puts(s, "! ");
 		/* Duration exceeded 10 msecs */
 		else if (duration > 10000ULL)
-			ret = trace_seq_printf(s, "+ ");
+			ret = trace_seq_puts(s, "+ ");
 	}
 
 	/*
@@ -757,7 +757,7 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s,
 	 * to fill out the space.
 	 */
 	if (ret == -1)
-		ret = trace_seq_printf(s, "  ");
+		ret = trace_seq_puts(s, "  ");
 
 	/* Catching here any failure happenned above */
 	if (!ret)
@@ -767,7 +767,7 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s,
 	if (ret != TRACE_TYPE_HANDLED)
 		return ret;
 
-	ret = trace_seq_printf(s, "|  ");
+	ret = trace_seq_puts(s, "|  ");
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 
@@ -817,7 +817,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
 
 	/* Function */
 	for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
-		ret = trace_seq_printf(s, " ");
+		ret = trace_seq_putc(s, ' ');
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
 	}
@@ -858,7 +858,7 @@ print_graph_entry_nested(struct trace_iterator *iter,
 
 	/* Function */
 	for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
-		ret = trace_seq_printf(s, " ");
+		ret = trace_seq_putc(s, ' ');
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
 	}
@@ -917,7 +917,7 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
 		if (ret == TRACE_TYPE_PARTIAL_LINE)
 			return TRACE_TYPE_PARTIAL_LINE;
 
-		ret = trace_seq_printf(s, " | ");
+		ret = trace_seq_puts(s, " | ");
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
 	}
@@ -1117,7 +1117,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
 
 	/* Closing brace */
 	for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) {
-		ret = trace_seq_printf(s, " ");
+		ret = trace_seq_putc(s, ' ');
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
 	}
@@ -1129,7 +1129,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
 	 * belongs to, write out the function name.
 	 */
 	if (func_match) {
-		ret = trace_seq_printf(s, "}\n");
+		ret = trace_seq_puts(s, "}\n");
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
 	} else {
@@ -1179,13 +1179,13 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
 	/* Indentation */
 	if (depth > 0)
 		for (i = 0; i < (depth + 1) * TRACE_GRAPH_INDENT; i++) {
-			ret = trace_seq_printf(s, " ");
+			ret = trace_seq_putc(s, ' ');
 			if (!ret)
 				return TRACE_TYPE_PARTIAL_LINE;
 		}
 
 	/* The comment */
-	ret = trace_seq_printf(s, "/* ");
+	ret = trace_seq_puts(s, "/* ");
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 
@@ -1216,7 +1216,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
 		s->len--;
 	}
 
-	ret = trace_seq_printf(s, " */\n");
+	ret = trace_seq_puts(s, " */\n");
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index a5e8f4878bfa..b3dcfb2f0fef 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -90,7 +90,7 @@ static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev)
 	if (drv)
 		ret += trace_seq_printf(s, " %s\n", drv->name);
 	else
-		ret += trace_seq_printf(s, " \n");
+		ret += trace_seq_puts(s, " \n");
 	return ret;
 }
 
@@ -107,7 +107,7 @@ static void mmio_pipe_open(struct trace_iterator *iter)
 	struct header_iter *hiter;
 	struct trace_seq *s = &iter->seq;
 
-	trace_seq_printf(s, "VERSION 20070824\n");
+	trace_seq_puts(s, "VERSION 20070824\n");
 
 	hiter = kzalloc(sizeof(*hiter), GFP_KERNEL);
 	if (!hiter)
@@ -209,7 +209,7 @@ static enum print_line_t mmio_print_rw(struct trace_iterator *iter)
 			(rw->value >> 0) & 0xff, rw->pc, 0);
 		break;
 	default:
-		ret = trace_seq_printf(s, "rw what?\n");
+		ret = trace_seq_puts(s, "rw what?\n");
 		break;
 	}
 	if (ret)
@@ -245,7 +245,7 @@ static enum print_line_t mmio_print_map(struct trace_iterator *iter)
 			secs, usec_rem, m->map_id, 0UL, 0);
 		break;
 	default:
-		ret = trace_seq_printf(s, "map what?\n");
+		ret = trace_seq_puts(s, "map what?\n");
 		break;
 	}
 	if (ret)
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index bb922d9ee51b..34e7cbac0c9c 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -78,7 +78,7 @@ enum print_line_t trace_print_printk_msg_only(struct trace_iterator *iter)
 
 	trace_assign_type(field, entry);
 
-	ret = trace_seq_printf(s, "%s", field->buf);
+	ret = trace_seq_puts(s, field->buf);
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 
@@ -558,14 +558,14 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
 			if (ret)
 				ret = trace_seq_puts(s, "??");
 			if (ret)
-				ret = trace_seq_puts(s, "\n");
+				ret = trace_seq_putc(s, '\n');
 			continue;
 		}
 		if (!ret)
 			break;
 		if (ret)
 			ret = seq_print_user_ip(s, mm, ip, sym_flags);
-		ret = trace_seq_puts(s, "\n");
+		ret = trace_seq_putc(s, '\n');
 	}
 
 	if (mm)
@@ -579,7 +579,7 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
 	int ret;
 
 	if (!ip)
-		return trace_seq_printf(s, "0");
+		return trace_seq_putc(s, '0');
 
 	if (sym_flags & TRACE_ITER_SYM_OFFSET)
 		ret = seq_print_sym_offset(s, "%s", ip);
@@ -964,14 +964,14 @@ static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags,
 		goto partial;
 
 	if ((flags & TRACE_ITER_PRINT_PARENT) && field->parent_ip) {
-		if (!trace_seq_printf(s, " <-"))
+		if (!trace_seq_puts(s, " <-"))
 			goto partial;
 		if (!seq_print_ip_sym(s,
 				      field->parent_ip,
 				      flags))
 			goto partial;
 	}
-	if (!trace_seq_printf(s, "\n"))
+	if (!trace_seq_putc(s, '\n'))
 		goto partial;
 
 	return TRACE_TYPE_HANDLED;
@@ -1210,7 +1210,7 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,
 
 		if (!seq_print_ip_sym(s, *p, flags))
 			goto partial;
-		if (!trace_seq_puts(s, "\n"))
+		if (!trace_seq_putc(s, '\n'))
 			goto partial;
 	}
 
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 322e16461072..061156215721 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -175,7 +175,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags,
 	entry = syscall_nr_to_meta(syscall);
 
 	if (!entry) {
-		trace_seq_printf(s, "\n");
+		trace_seq_putc(s, '\n');
 		return TRACE_TYPE_HANDLED;
 	}
 

From d611851b421731e2afd9cb956daae001af57a423 Mon Sep 17 00:00:00 2001
From: "zhangwei(Jovi)" <jovi.zhangwei@huawei.com>
Date: Mon, 15 Jul 2013 16:32:50 +0800
Subject: [PATCH 136/320] tracing: Typo fix on ring buffer comments

There have some mismatch between comments with
real function name, update it.

This patch also add some missed function arguments
description.

Link: http://lkml.kernel.org/r/51E3B3B2.4080307@huawei.com

Signed-off-by: zhangwei(Jovi) <jovi.zhangwei@huawei.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/ring_buffer.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index eef2e566b2e7..cc2f66f68dc5 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1066,7 +1066,7 @@ static int rb_check_list(struct ring_buffer_per_cpu *cpu_buffer,
 }
 
 /**
- * check_pages - integrity check of buffer pages
+ * rb_check_pages - integrity check of buffer pages
  * @cpu_buffer: CPU buffer with pages to test
  *
  * As a safety measure we check to make sure the data pages have not
@@ -1258,7 +1258,7 @@ static int rb_cpu_notify(struct notifier_block *self,
 #endif
 
 /**
- * ring_buffer_alloc - allocate a new ring_buffer
+ * __ring_buffer_alloc - allocate a new ring_buffer
  * @size: the size in bytes per cpu that is needed.
  * @flags: attributes to set for the ring buffer.
  *
@@ -1607,6 +1607,7 @@ static void update_pages_handler(struct work_struct *work)
  * ring_buffer_resize - resize the ring buffer
  * @buffer: the buffer to resize.
  * @size: the new size.
+ * @cpu_id: the cpu buffer to resize
  *
  * Minimum size is 2 * BUF_PAGE_SIZE.
  *
@@ -3956,11 +3957,11 @@ EXPORT_SYMBOL_GPL(ring_buffer_consume);
  * expected.
  *
  * After a sequence of ring_buffer_read_prepare calls, the user is
- * expected to make at least one call to ring_buffer_prepare_sync.
+ * expected to make at least one call to ring_buffer_read_prepare_sync.
  * Afterwards, ring_buffer_read_start is invoked to get things going
  * for real.
  *
- * This overall must be paired with ring_buffer_finish.
+ * This overall must be paired with ring_buffer_read_finish.
  */
 struct ring_buffer_iter *
 ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu)
@@ -4009,7 +4010,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync);
  * an intervening ring_buffer_read_prepare_sync must have been
  * performed.
  *
- * Must be paired with ring_buffer_finish.
+ * Must be paired with ring_buffer_read_finish.
  */
 void
 ring_buffer_read_start(struct ring_buffer_iter *iter)
@@ -4031,7 +4032,7 @@ ring_buffer_read_start(struct ring_buffer_iter *iter)
 EXPORT_SYMBOL_GPL(ring_buffer_read_start);
 
 /**
- * ring_buffer_finish - finish reading the iterator of the buffer
+ * ring_buffer_read_finish - finish reading the iterator of the buffer
  * @iter: The iterator retrieved by ring_buffer_start
  *
  * This re-enables the recording to the buffer, and frees the
@@ -4346,6 +4347,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
 /**
  * ring_buffer_alloc_read_page - allocate a page to read from buffer
  * @buffer: the buffer to allocate for.
+ * @cpu: the cpu buffer to allocate.
  *
  * This function is used in conjunction with ring_buffer_read_page.
  * When reading a full page from the ring buffer, these functions
@@ -4403,7 +4405,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_free_read_page);
  * to swap with a page in the ring buffer.
  *
  * for example:
- *	rpage = ring_buffer_alloc_read_page(buffer);
+ *	rpage = ring_buffer_alloc_read_page(buffer, cpu);
  *	if (!rpage)
  *		return error;
  *	ret = ring_buffer_read_page(buffer, &rpage, len, cpu, 0);

From b8ebfd3f7113b63dda93d76bfec638c00e6bd514 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 17 Jun 2013 19:02:04 +0200
Subject: [PATCH 137/320] tracing/function: Avoid perf_trace_buf_*() if
 event_function.perf_events is empty

perf_trace_buf_prepare() + perf_trace_buf_submit(head, task => NULL)
make no sense if hlist_empty(head). Change perf_ftrace_function_call()
to check event_function.perf_events beforehand.

Link: http://lkml.kernel.org/r/20130617170204.GA19803@redhat.com

Acked-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace_event_perf.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 84b1e045faba..12df5573086e 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -266,6 +266,10 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip,
 	struct pt_regs regs;
 	int rctx;
 
+	head = this_cpu_ptr(event_function.perf_events);
+	if (hlist_empty(head))
+		return;
+
 #define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \
 		    sizeof(u64)) - sizeof(u32))
 
@@ -279,8 +283,6 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip,
 
 	entry->ip = ip;
 	entry->parent_ip = parent_ip;
-
-	head = this_cpu_ptr(event_function.perf_events);
 	perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0,
 			      1, &regs, head, NULL);
 

From 421c7860c6e1989da3962fafdd6699316c9f8e20 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 17 Jun 2013 19:02:07 +0200
Subject: [PATCH 138/320] tracing/syscall: Avoid perf_trace_buf_*() if
 sys_data->perf_events is empty

perf_trace_buf_prepare() + perf_trace_buf_submit(head, task => NULL)
make no sense if hlist_empty(head). Change perf_syscall_enter/exit()
to check sys_data->{enter,exit}_event->perf_events beforehand.

Link: http://lkml.kernel.org/r/20130617170207.GA19806@redhat.com

Acked-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace_syscalls.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 061156215721..ac0085777fbd 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -566,6 +566,10 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
 	if (!sys_data)
 		return;
 
+	head = this_cpu_ptr(sys_data->enter_event->perf_events);
+	if (hlist_empty(head))
+		return;
+
 	/* get the size after alignment with the u32 buffer size field */
 	size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
 	size = ALIGN(size + sizeof(u32), sizeof(u64));
@@ -583,8 +587,6 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
 	rec->nr = syscall_nr;
 	syscall_get_arguments(current, regs, 0, sys_data->nb_args,
 			       (unsigned long *)&rec->args);
-
-	head = this_cpu_ptr(sys_data->enter_event->perf_events);
 	perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL);
 }
 
@@ -642,6 +644,10 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
 	if (!sys_data)
 		return;
 
+	head = this_cpu_ptr(sys_data->exit_event->perf_events);
+	if (hlist_empty(head))
+		return;
+
 	/* We can probably do that at build time */
 	size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
 	size -= sizeof(u32);
@@ -661,8 +667,6 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
 
 	rec->nr = syscall_nr;
 	rec->ret = syscall_get_return_value(current, regs);
-
-	head = this_cpu_ptr(sys_data->exit_event->perf_events);
 	perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL);
 }
 

From cd92bf61d6d70bd3eb33b46d600e3f3eb9c5778a Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 17 Jun 2013 19:02:11 +0200
Subject: [PATCH 139/320] tracing/perf: Move the PERF_MAX_TRACE_SIZE check into
 perf_trace_buf_prepare()

Every perf_trace_buf_prepare() caller does
WARN_ONCE(size > PERF_MAX_TRACE_SIZE, message) and "message" is
almost the same.

Shift this WARN_ONCE() into perf_trace_buf_prepare(). This changes
the meaning of _ONCE, but I think this is fine.

	- 4947014 2932448 10104832  17984294  1126b26 vmlinux
	+ 4948422 2932448 10104832  17985702  11270a6 vmlinux

on my build.

Link: http://lkml.kernel.org/r/20130617170211.GA19813@redhat.com

Acked-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/ftrace.h          |  4 ----
 kernel/trace/trace_event_perf.c |  4 ++++
 kernel/trace/trace_kprobe.c     |  6 ------
 kernel/trace/trace_syscalls.c   | 12 ------------
 kernel/trace/trace_uprobe.c     |  2 --
 5 files changed, 4 insertions(+), 24 deletions(-)

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index d615f78cc6b6..41a6643e2136 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -670,10 +670,6 @@ perf_trace_##call(void *__data, proto)					\
 			     sizeof(u64));				\
 	__entry_size -= sizeof(u32);					\
 									\
-	if (WARN_ONCE(__entry_size > PERF_MAX_TRACE_SIZE,		\
-		      "profile buffer not large enough"))		\
-		return;							\
-									\
 	entry = (struct ftrace_raw_##call *)perf_trace_buf_prepare(	\
 		__entry_size, event_call->event.type, &__regs, &rctx);	\
 	if (!entry)							\
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 12df5573086e..80c36bcf66e8 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -236,6 +236,10 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
 
 	BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
 
+	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
+			"perf buffer not large enough"))
+		return NULL;
+
 	pc = preempt_count();
 
 	*rctxp = perf_swevent_get_recursion_context();
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 7ed6976493c8..ae6ce835b023 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1087,9 +1087,6 @@ kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs)
 	__size = sizeof(*entry) + tp->size + dsize;
 	size = ALIGN(__size + sizeof(u32), sizeof(u64));
 	size -= sizeof(u32);
-	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
-		     "profile buffer not large enough"))
-		return;
 
 	entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
 	if (!entry)
@@ -1120,9 +1117,6 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri,
 	__size = sizeof(*entry) + tp->size + dsize;
 	size = ALIGN(__size + sizeof(u32), sizeof(u64));
 	size -= sizeof(u32);
-	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
-		     "profile buffer not large enough"))
-		return;
 
 	entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
 	if (!entry)
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index ac0085777fbd..8fd03657bc7d 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -575,10 +575,6 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
 	size = ALIGN(size + sizeof(u32), sizeof(u64));
 	size -= sizeof(u32);
 
-	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
-		      "perf buffer not large enough"))
-		return;
-
 	rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size,
 				sys_data->enter_event->event.type, regs, &rctx);
 	if (!rec)
@@ -652,14 +648,6 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
 	size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
 	size -= sizeof(u32);
 
-	/*
-	 * Impossible, but be paranoid with the future
-	 * How to put this check outside runtime?
-	 */
-	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
-		"exit event has grown above perf buffer size"))
-		return;
-
 	rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size,
 				sys_data->exit_event->event.type, regs, &rctx);
 	if (!rec)
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index d5d0cd368a56..a23d2d71188e 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -818,8 +818,6 @@ static void uprobe_perf_print(struct trace_uprobe *tu,
 
 	size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
 	size = ALIGN(size + tu->size + sizeof(u32), sizeof(u64)) - sizeof(u32);
-	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough"))
-		return;
 
 	preempt_disable();
 	head = this_cpu_ptr(call->perf_events);

From a232e270dcb55a70ad3241bc6fc160fd9b5c9e6c Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Tue, 9 Jul 2013 18:35:26 +0900
Subject: [PATCH 140/320] tracing/kprobe: Wait for disabling all running kprobe
 handlers

Wait for disabling all running kprobe handlers when a kprobe
event is disabled, since the caller, trace_remove_event_call()
supposes that a removing event is disabled completely by
disabling the event.
With this change, ftrace can ensure that there is no running
event handlers after disabling it.

Link: http://lkml.kernel.org/r/20130709093526.20138.93100.stgit@mhiramat-M0-7522

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace_kprobe.c | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index ae6ce835b023..3811487e7a7a 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -243,11 +243,11 @@ find_event_file_link(struct trace_probe *tp, struct ftrace_event_file *file)
 static int
 disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
 {
+	struct event_file_link *link = NULL;
+	int wait = 0;
 	int ret = 0;
 
 	if (file) {
-		struct event_file_link *link;
-
 		link = find_event_file_link(tp, file);
 		if (!link) {
 			ret = -EINVAL;
@@ -255,10 +255,7 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
 		}
 
 		list_del_rcu(&link->list);
-		/* synchronize with kprobe_trace_func/kretprobe_trace_func */
-		synchronize_sched();
-		kfree(link);
-
+		wait = 1;
 		if (!list_empty(&tp->files))
 			goto out;
 
@@ -271,8 +268,22 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
 			disable_kretprobe(&tp->rp);
 		else
 			disable_kprobe(&tp->rp.kp);
+		wait = 1;
 	}
  out:
+	if (wait) {
+		/*
+		 * Synchronize with kprobe_trace_func/kretprobe_trace_func
+		 * to ensure disabled (all running handlers are finished).
+		 * This is not only for kfree(), but also the caller,
+		 * trace_remove_event_call() supposes it for releasing
+		 * event_call related objects, which will be accessed in
+		 * the kprobe_trace_func/kretprobe_trace_func.
+		 */
+		synchronize_sched();
+		kfree(link);	/* Ignored if link == NULL */
+	}
+
 	return ret;
 }
 

From 609e85a70bcd0eedf4ec60639dbcfb1ab011e054 Mon Sep 17 00:00:00 2001
From: Alexander Z Lam <azl@google.com>
Date: Wed, 10 Jul 2013 17:34:34 -0700
Subject: [PATCH 141/320] tracing: Fix error handling to ensure instances can
 always be removed

Remove debugfs directories for tracing instances during creation if an error
occurs causing the trace_array for that instance to not be added to
ftrace_trace_arrays. If the directory continues to exist after the error, it
cannot be removed because the respective trace_array is not in
ftrace_trace_arrays.

Link: http://lkml.kernel.org/r/1373502874-1706-2-git-send-email-azl@google.com

Cc: stable@vger.kernel.org # 3.10
Cc: Vaibhav Nagarnaik <vnagarnaik@google.com>
Cc: David Sharp <dhsharp@google.com>
Cc: Alexander Z Lam <lambchop468@gmail.com>
Signed-off-by: Alexander Z Lam <azl@google.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 25b91afc29e0..7c3da7bca05b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5973,8 +5973,10 @@ static int new_instance_create(const char *name)
 		goto out_free_tr;
 
 	ret = event_trace_add_tracer(tr->dir, tr);
-	if (ret)
+	if (ret) {
+		debugfs_remove_recursive(tr->dir);
 		goto out_free_tr;
+	}
 
 	init_tracer_debugfs(tr, tr->dir);
 

From f77d09a384676bde6445413949d9d2c508ff3e62 Mon Sep 17 00:00:00 2001
From: Alexander Z Lam <azl@google.com>
Date: Thu, 18 Jul 2013 11:18:44 -0700
Subject: [PATCH 142/320] tracing: Miscellaneous fixes for trace_array ref
 counting

Some error paths did not handle ref counting properly, and some trace files need
ref counting.

Link: http://lkml.kernel.org/r/1374171524-11948-1-git-send-email-azl@google.com

Cc: stable@vger.kernel.org # 3.10
Cc: Vaibhav Nagarnaik <vnagarnaik@google.com>
Cc: David Sharp <dhsharp@google.com>
Cc: Alexander Z Lam <lambchop468@gmail.com>
Signed-off-by: Alexander Z Lam <azl@google.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c        | 24 ++++++++++++++++++------
 kernel/trace/trace_events.c | 21 +++++++++++++++++++--
 2 files changed, 37 insertions(+), 8 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 7c3da7bca05b..7d9ceab42564 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3008,7 +3008,6 @@ static int tracing_release(struct inode *inode, struct file *file)
 
 	iter = m->private;
 	tr = iter->tr;
-	trace_array_put(tr);
 
 	mutex_lock(&trace_types_lock);
 
@@ -3023,6 +3022,9 @@ static int tracing_release(struct inode *inode, struct file *file)
 	if (!iter->snapshot)
 		/* reenable tracing if it was previously enabled */
 		tracing_start_tr(tr);
+
+	__trace_array_put(tr);
+
 	mutex_unlock(&trace_types_lock);
 
 	mutex_destroy(&iter->mutex);
@@ -3447,6 +3449,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
 static int tracing_trace_options_open(struct inode *inode, struct file *file)
 {
 	struct trace_array *tr = inode->i_private;
+	int ret;
 
 	if (tracing_disabled)
 		return -ENODEV;
@@ -3454,7 +3457,11 @@ static int tracing_trace_options_open(struct inode *inode, struct file *file)
 	if (trace_array_get(tr) < 0)
 		return -ENODEV;
 
-	return single_open(file, tracing_trace_options_show, inode->i_private);
+	ret = single_open(file, tracing_trace_options_show, inode->i_private);
+	if (ret < 0)
+		trace_array_put(tr);
+
+	return ret;
 }
 
 static const struct file_operations tracing_iter_fops = {
@@ -3958,6 +3965,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
 	if (!iter) {
 		ret = -ENOMEM;
+		__trace_array_put(tr);
 		goto out;
 	}
 
@@ -4704,21 +4712,24 @@ static int tracing_snapshot_open(struct inode *inode, struct file *file)
 			ret = PTR_ERR(iter);
 	} else {
 		/* Writes still need the seq_file to hold the private data */
+		ret = -ENOMEM;
 		m = kzalloc(sizeof(*m), GFP_KERNEL);
 		if (!m)
-			return -ENOMEM;
+			goto out;
 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
 		if (!iter) {
 			kfree(m);
-			return -ENOMEM;
+			goto out;
 		}
+		ret = 0;
+
 		iter->tr = tr;
 		iter->trace_buffer = &tc->tr->max_buffer;
 		iter->cpu_file = tc->cpu;
 		m->private = iter;
 		file->private_data = m;
 	}
-
+out:
 	if (ret < 0)
 		trace_array_put(tr);
 
@@ -5328,9 +5339,10 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
 }
 
 static const struct file_operations tracing_stats_fops = {
-	.open		= tracing_open_generic,
+	.open		= tracing_open_generic_tc,
 	.read		= tracing_stats_read,
 	.llseek		= generic_file_llseek,
+	.release	= tracing_release_generic_tc,
 };
 
 #ifdef CONFIG_DYNAMIC_FTRACE
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 7d854290bf81..7a75cb22eab7 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1218,6 +1218,7 @@ show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
 
 static int ftrace_event_avail_open(struct inode *inode, struct file *file);
 static int ftrace_event_set_open(struct inode *inode, struct file *file);
+static int ftrace_event_release(struct inode *inode, struct file *file);
 
 static const struct seq_operations show_event_seq_ops = {
 	.start = t_start,
@@ -1245,7 +1246,7 @@ static const struct file_operations ftrace_set_event_fops = {
 	.read = seq_read,
 	.write = ftrace_event_write,
 	.llseek = seq_lseek,
-	.release = seq_release,
+	.release = ftrace_event_release,
 };
 
 static const struct file_operations ftrace_enable_fops = {
@@ -1323,6 +1324,15 @@ ftrace_event_open(struct inode *inode, struct file *file,
 	return ret;
 }
 
+static int ftrace_event_release(struct inode *inode, struct file *file)
+{
+	struct trace_array *tr = inode->i_private;
+
+	trace_array_put(tr);
+
+	return seq_release(inode, file);
+}
+
 static int
 ftrace_event_avail_open(struct inode *inode, struct file *file)
 {
@@ -1336,12 +1346,19 @@ ftrace_event_set_open(struct inode *inode, struct file *file)
 {
 	const struct seq_operations *seq_ops = &show_set_event_seq_ops;
 	struct trace_array *tr = inode->i_private;
+	int ret;
+
+	if (trace_array_get(tr) < 0)
+		return -ENODEV;
 
 	if ((file->f_mode & FMODE_WRITE) &&
 	    (file->f_flags & O_TRUNC))
 		ftrace_clear_events(tr);
 
-	return ftrace_event_open(inode, file, seq_ops);
+	ret = ftrace_event_open(inode, file, seq_ops);
+	if (ret < 0)
+		trace_array_put(tr);
+	return ret;
 }
 
 static struct event_subsystem *

From 8f768993394a8c0d3801033c11fd86ce8c88dcac Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Thu, 18 Jul 2013 14:41:51 -0400
Subject: [PATCH 143/320] tracing: Add ref_data to function and fgraph tracer
 structs

The selftest for function and function graph tracers are defined as
__init, as they are only executed at boot up. The "tracer" structs
that are associated to those tracers are not setup as __init as they
are used after boot. To stop mismatch warnings, those structures
need to be annotated with __ref_data.

Currently, the tracer structures are defined to __read_mostly, as they
do not really change. But in the future they should be converted to
consts, but that will take a little work because they have a "next"
pointer that gets updated when they are registered. That will have to
wait till the next major release.

Link: http://lkml.kernel.org/r/1373596735.17876.84.camel@gandalf.local.home

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Reported-by: Chen Gang <gang.chen@asianux.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.h                 | 9 +++++++++
 kernel/trace/trace_functions.c       | 2 +-
 kernel/trace/trace_functions_graph.c | 2 +-
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 4a4f6e1828b6..57b7bb0d39b7 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -680,6 +680,15 @@ extern int trace_selftest_startup_sched_switch(struct tracer *trace,
 					       struct trace_array *tr);
 extern int trace_selftest_startup_branch(struct tracer *trace,
 					 struct trace_array *tr);
+/*
+ * Tracer data references selftest functions that only occur
+ * on boot up. These can be __init functions. Thus, when selftests
+ * are enabled, then the tracers need to reference __init functions.
+ */
+#define __tracer_data		__refdata
+#else
+/* Tracers are seldom changed. Optimize when selftests are disabled. */
+#define __tracer_data		__read_mostly
 #endif /* CONFIG_FTRACE_STARTUP_TEST */
 
 extern void *head_page(struct trace_array_cpu *data);
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index b863f93b30f3..38fe1483c508 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -199,7 +199,7 @@ static int func_set_flag(u32 old_flags, u32 bit, int set)
 	return 0;
 }
 
-static struct tracer function_trace __read_mostly =
+static struct tracer function_trace __tracer_data =
 {
 	.name		= "function",
 	.init		= function_trace_init,
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index d56ae9bae00b..b5c09242683d 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -1448,7 +1448,7 @@ static struct trace_event graph_trace_ret_event = {
 	.funcs		= &graph_functions
 };
 
-static struct tracer graph_trace __read_mostly = {
+static struct tracer graph_trace __tracer_data = {
 	.name		= "function_graph",
 	.open		= graph_trace_open,
 	.pipe_open	= graph_trace_open,

From 7710b639953b791610f0022a7d52d9801c93b969 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 18 Jul 2013 20:47:10 +0200
Subject: [PATCH 144/320] tracing: Simplify the iteration logic in
 f_start/f_next

f_next() looks overcomplicated, and it is not strictly correct
even if this doesn't matter.

Say, FORMAT_FIELD_SEPERATOR should not return NULL (means EOF)
if trace_get_fields() returns an empty list, we should simply
advance to FORMAT_PRINTFMT as we do when we find the end of list.

1. Change f_next() to return "struct list_head *" rather than
   "ftrace_event_field *", and change f_show() to do list_entry().

   This simplifies the code a bit, only f_show() needs to know
   about ftrace_event_field, and f_next() can play with ->prev
   directly

2. Change f_next() to not play with ->prev / return inside the
   switch() statement. It can simply set node = head/common_head,
   the prev-or-advance-to-the-next-magic below does all work.

While at it. f_start() looks overcomplicated too. I don't think
*pos == 0 makes sense as a separate case, just change this code
to do "while" instead of "do/while".

The patch also moves f_start() down, close to f_stop(). This is
purely cosmetic, just to make the locking added by the next patch
more clear/visible.

Link: http://lkml.kernel.org/r/20130718184710.GA4783@redhat.com

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace_events.c | 60 ++++++++++++++-----------------------
 1 file changed, 22 insertions(+), 38 deletions(-)

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 7a75cb22eab7..76defd91f9b4 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -826,59 +826,33 @@ enum {
 static void *f_next(struct seq_file *m, void *v, loff_t *pos)
 {
 	struct ftrace_event_call *call = m->private;
-	struct ftrace_event_field *field;
 	struct list_head *common_head = &ftrace_common_fields;
 	struct list_head *head = trace_get_fields(call);
+	struct list_head *node = v;
 
 	(*pos)++;
 
 	switch ((unsigned long)v) {
 	case FORMAT_HEADER:
-		if (unlikely(list_empty(common_head)))
-			return NULL;
-
-		field = list_entry(common_head->prev,
-				   struct ftrace_event_field, link);
-		return field;
+		node = common_head;
+		break;
 
 	case FORMAT_FIELD_SEPERATOR:
-		if (unlikely(list_empty(head)))
-			return NULL;
-
-		field = list_entry(head->prev, struct ftrace_event_field, link);
-		return field;
+		node = head;
+		break;
 
 	case FORMAT_PRINTFMT:
 		/* all done */
 		return NULL;
 	}
 
-	field = v;
-	if (field->link.prev == common_head)
+	node = node->prev;
+	if (node == common_head)
 		return (void *)FORMAT_FIELD_SEPERATOR;
-	else if (field->link.prev == head)
+	else if (node == head)
 		return (void *)FORMAT_PRINTFMT;
-
-	field = list_entry(field->link.prev, struct ftrace_event_field, link);
-
-	return field;
-}
-
-static void *f_start(struct seq_file *m, loff_t *pos)
-{
-	loff_t l = 0;
-	void *p;
-
-	/* Start by showing the header */
-	if (!*pos)
-		return (void *)FORMAT_HEADER;
-
-	p = (void *)FORMAT_HEADER;
-	do {
-		p = f_next(m, p, &l);
-	} while (p && l < *pos);
-
-	return p;
+	else
+		return node;
 }
 
 static int f_show(struct seq_file *m, void *v)
@@ -904,8 +878,7 @@ static int f_show(struct seq_file *m, void *v)
 		return 0;
 	}
 
-	field = v;
-
+	field = list_entry(v, struct ftrace_event_field, link);
 	/*
 	 * Smartly shows the array type(except dynamic array).
 	 * Normal:
@@ -932,6 +905,17 @@ static int f_show(struct seq_file *m, void *v)
 	return 0;
 }
 
+static void *f_start(struct seq_file *m, loff_t *pos)
+{
+	void *p = (void *)FORMAT_HEADER;
+	loff_t l = 0;
+
+	while (l < *pos && p)
+		p = f_next(m, p, &l);
+
+	return p;
+}
+
 static void f_stop(struct seq_file *m, void *p)
 {
 }

From cd458ba9d5a5592d37b5145e560071e91ea762ac Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 18 Jul 2013 20:47:12 +0200
Subject: [PATCH 145/320] tracing: Do not (ab)use trace_seq in event_id_read()

event_id_read() has no reason to kmalloc "struct trace_seq"
(more than PAGE_SIZE!), it can use a small buffer instead.

Note: "if (*ppos) return 0" looks strange and even wrong,
simple_read_from_buffer() handles ppos != 0 case corrrectly.

And it seems that almost every user of trace_seq in this file
should be converted too. Unless you use seq_open(), trace_seq
buys nothing compared to the raw buffer, but it needs a bit
more memory and code.

Link: http://lkml.kernel.org/r/20130718184712.GA4786@redhat.com

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace_events.c | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 76defd91f9b4..898f868833f2 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -947,23 +947,14 @@ static ssize_t
 event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
 {
 	struct ftrace_event_call *call = filp->private_data;
-	struct trace_seq *s;
-	int r;
+	char buf[32];
+	int len;
 
 	if (*ppos)
 		return 0;
 
-	s = kmalloc(sizeof(*s), GFP_KERNEL);
-	if (!s)
-		return -ENOMEM;
-
-	trace_seq_init(s);
-	trace_seq_printf(s, "%d\n", call->event.type);
-
-	r = simple_read_from_buffer(ubuf, cnt, ppos,
-				    s->buffer, s->len);
-	kfree(s);
-	return r;
+	len = sprintf(buf, "%d\n", call->event.type);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
 }
 
 static ssize_t

From a644a7e9587802eabb2e229177606f6a74a60fc1 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 19 Jul 2013 16:20:36 +0200
Subject: [PATCH 146/320] tracing: Kill trace_array->waiter

Trivial. trace_array->waiter has no users since 6eaaa5d5
"tracing/core: use appropriate waiting on trace_pipe".

Link: http://lkml.kernel.org/r/20130719142036.GA1594@redhat.com

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 57b7bb0d39b7..e7d643b8a907 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -214,7 +214,6 @@ struct trace_array {
 	struct dentry		*event_dir;
 	struct list_head	systems;
 	struct list_head	events;
-	struct task_struct	*waiter;
 	int			ref;
 };
 

From e70e78e3c83b536730e31231dd9b979768d8df3c Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 19 Jul 2013 17:36:44 +0200
Subject: [PATCH 147/320] tracing: Kill the unbalanced tr->ref++ in
 tracing_buffers_open()

tracing_buffers_open() does trace_array_get() and then it wrongly
inrcements tr->ref again under trace_types_lock. This means that
every caller leaks trace_array:

	# cd /sys/kernel/debug/tracing/
	# mkdir instances/X
	# true < instances/X/per_cpu/cpu0/trace_pipe_raw
	# rmdir instances/X
	rmdir: failed to remove `instances/X': Device or resource busy

Link: http://lkml.kernel.org/r/20130719153644.GA18899@redhat.com

Cc: Ingo Molnar <mingo@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: stable@vger.kernel.org # 3.10
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 7d9ceab42564..3f2477713aca 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4959,8 +4959,6 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp)
 
 	mutex_lock(&trace_types_lock);
 
-	tr->ref++;
-
 	info->iter.tr		= tr;
 	info->iter.cpu_file	= tc->cpu;
 	info->iter.trace	= tr->current_trace;

From 29b7a47a9df2534ed62c4783dcf76153ceb76a73 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 12 Dec 2012 10:18:51 -0800
Subject: [PATCH 148/320] alpha: Convert print_symbol to %pSR

Use the new vsprintf extension to avoid any possible
message interleaving.

Reviewed-by: Matt Turner <mattst88@gmail.com>
Signed-off-by: Matt Turner <mattst88@gmail.com>
Signed-off-by: Joe Perches <joe@perches.com>
---
 arch/alpha/kernel/traps.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index be1fba334bd0..bd0665cdc840 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -66,8 +66,8 @@ dik_show_regs(struct pt_regs *regs, unsigned long *r9_15)
 {
 	printk("pc = [<%016lx>]  ra = [<%016lx>]  ps = %04lx    %s\n",
 	       regs->pc, regs->r26, regs->ps, print_tainted());
-	print_symbol("pc is at %s\n", regs->pc);
-	print_symbol("ra is at %s\n", regs->r26 );
+	printk("pc is at %pSR\n", (void *)regs->pc);
+	printk("ra is at %pSR\n", (void *)regs->r26);
 	printk("v0 = %016lx  t0 = %016lx  t1 = %016lx\n",
 	       regs->r0, regs->r1, regs->r2);
 	printk("t2 = %016lx  t3 = %016lx  t4 = %016lx\n",
@@ -132,9 +132,7 @@ dik_show_trace(unsigned long *sp)
 			continue;
 		if (tmp >= (unsigned long) &_etext)
 			continue;
-		printk("[<%lx>]", tmp);
-		print_symbol(" %s", tmp);
-		printk("\n");
+		printk("[<%lx>] %pSR\n", tmp, (void *)tmp);
 		if (i > 40) {
 			printk(" ...");
 			break;

From 00ee03092a6e4b1d0ddc6b861ebb9ed8d13cc29b Mon Sep 17 00:00:00 2001
From: Chen Gang <gang.chen@asianux.com>
Date: Wed, 29 May 2013 18:51:28 +0800
Subject: [PATCH 149/320] alpha: kernel: using memcpy() instead of strcpy()

When sending message in send_secondary_console_msg(), the length is not
include the NUL byte, and also not copy NUL to 'ipc_buffer'.

When receive message in recv_secondary_console_msg(), the 'cnt' also
excludes NUL.

So when get string from ipc_buffer, it may not be NUL terminated.

Then use memcpy() instead of strcpy(), and set last byte NUL.

Reviewed-by: Matt Turner <mattst88@gmail.com>
Signed-off-by: Matt Turner <mattst88@gmail.com>
Signed-off-by: Chen Gang <gang.chen@asianux.com>
---
 arch/alpha/kernel/smp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 53b18a620e1c..4bc3c414d075 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -266,7 +266,8 @@ recv_secondary_console_msg(void)
 		else {
 			cp1 = (char *) &cpu->ipc_buffer[11];
 			cp2 = buf;
-			strcpy(cp2, cp1);
+			memcpy(cp2, cp1, cnt);
+			cp2[cnt] = '\0';
 			
 			while ((cp2 = strchr(cp2, '\r')) != 0) {
 				*cp2 = ' ';

From 91b678c8edb7d8abd444705df63b80af31ad13e8 Mon Sep 17 00:00:00 2001
From: Chen Gang <gang.chen@asianux.com>
Date: Wed, 29 May 2013 19:04:09 +0800
Subject: [PATCH 150/320] alpha: kernel: typo issue, using '1' instead of '11'

For sending message:

        *(unsigned int *)&cpu->ipc_buffer[0] = len;
        cp1 = (char *) &cpu->ipc_buffer[1];

But for receive message:

                cnt = cpu->ipc_buffer[0] >> 32;
                ...
                       cp1 = (char *) &cpu->ipc_buffer[11];

They are not pairs, it is typo issue of the redundency '1'.

So need use '1' instead of '11'.

Reviewed-by: Matt Turner <mattst88@gmail.com>
Signed-off-by: Matt Turner <mattst88@gmail.com>
Signed-off-by: Chen Gang <gang.chen@asianux.com>
---
 arch/alpha/kernel/smp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 4bc3c414d075..9dbbcb3b9146 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -264,7 +264,7 @@ recv_secondary_console_msg(void)
 		if (cnt <= 0 || cnt >= 80)
 			strcpy(buf, "<<< BOGUS MSG >>>");
 		else {
-			cp1 = (char *) &cpu->ipc_buffer[11];
+			cp1 = (char *) &cpu->ipc_buffer[1];
 			cp2 = buf;
 			memcpy(cp2, cp1, cnt);
 			cp2[cnt] = '\0';

From 8b3ed3c002666540d9782a5219fa75c8a3fba98f Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 21 Jun 2013 18:17:22 +0100
Subject: [PATCH 151/320] alpha: locks: remove unused arch_*_relax operations

The arch_{spin,read,write}_relax macros are not used anywhere in the
kernel and are typically just aliases for cpu_relax().

This patch removes the unused definitions for Alpha.

Cc: Al Viro <viro@zeniv.linux.org.uk>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Signed-off-by: Matt Turner <mattst88@gmail.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/alpha/include/asm/spinlock.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h
index 3bba21e41b81..37b570d01202 100644
--- a/arch/alpha/include/asm/spinlock.h
+++ b/arch/alpha/include/asm/spinlock.h
@@ -168,8 +168,4 @@ static inline void arch_write_unlock(arch_rwlock_t * lock)
 #define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
 #define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
 
-#define arch_spin_relax(lock)	cpu_relax()
-#define arch_read_relax(lock)	cpu_relax()
-#define arch_write_relax(lock)	cpu_relax()
-
 #endif /* _ALPHA_SPINLOCK_H */

From 01350eb6c0d6fa7831b387d50e19830d8d0da354 Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Sat, 13 Jul 2013 13:31:51 -0700
Subject: [PATCH 152/320] alpha: Add kcmp and finit_module syscalls

Reviewed-and-Tested-by: Matt Turner <mattst88@gmail.com>
Signed-off-by: Matt Turner <mattst88@gmail.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 arch/alpha/include/asm/unistd.h      | 3 +--
 arch/alpha/include/uapi/asm/unistd.h | 2 ++
 arch/alpha/kernel/systbls.S          | 2 ++
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h
index 43baee17acdf..f2c94402e2c8 100644
--- a/arch/alpha/include/asm/unistd.h
+++ b/arch/alpha/include/asm/unistd.h
@@ -3,8 +3,7 @@
 
 #include <uapi/asm/unistd.h>
 
-
-#define NR_SYSCALLS			506
+#define NR_SYSCALLS			508
 
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_STAT64
diff --git a/arch/alpha/include/uapi/asm/unistd.h b/arch/alpha/include/uapi/asm/unistd.h
index 801d28bcea51..53ae7bb1bfd1 100644
--- a/arch/alpha/include/uapi/asm/unistd.h
+++ b/arch/alpha/include/uapi/asm/unistd.h
@@ -467,5 +467,7 @@
 #define __NR_sendmmsg			503
 #define __NR_process_vm_readv		504
 #define __NR_process_vm_writev		505
+#define __NR_kcmp			506
+#define __NR_finit_module		507
 
 #endif /* _UAPI_ALPHA_UNISTD_H */
diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S
index 4284ec798ec9..dca9b3fb0071 100644
--- a/arch/alpha/kernel/systbls.S
+++ b/arch/alpha/kernel/systbls.S
@@ -524,6 +524,8 @@ sys_call_table:
 	.quad sys_sendmmsg
 	.quad sys_process_vm_readv
 	.quad sys_process_vm_writev		/* 505 */
+	.quad sys_kcmp
+	.quad sys_finit_module
 
 	.size sys_call_table, . - sys_call_table
 	.type sys_call_table, @object

From a5c6eae4d6763e50101494b392ef6518c0ef96e9 Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Wed, 10 Jul 2013 11:05:59 -0700
Subject: [PATCH 153/320] alpha: Modernize lib/mpi/longlong.h

Remove the compile warning for __udiv_qrnnd not having a prototype.
Use the __builtin_alpha_umulh introduced in gcc 4.0.

Reviewed-and-Tested-by: Matt Turner <mattst88@gmail.com>
Signed-off-by: Matt Turner <mattst88@gmail.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 lib/mpi/longlong.h | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h
index d411355f238e..aac511417ad1 100644
--- a/lib/mpi/longlong.h
+++ b/lib/mpi/longlong.h
@@ -151,15 +151,12 @@ do { \
 #endif /* __a29k__ */
 
 #if defined(__alpha) && W_TYPE_SIZE == 64
-#define umul_ppmm(ph, pl, m0, m1) \
-do { \
-		UDItype __m0 = (m0), __m1 = (m1); \
-		__asm__ ("umulh %r1,%2,%0" \
-		: "=r" ((UDItype) ph) \
-		: "%rJ" (__m0), \
-			"rI" (__m1)); \
-		(pl) = __m0 * __m1; \
-	} while (0)
+#define umul_ppmm(ph, pl, m0, m1)			\
+do {							\
+	UDItype __m0 = (m0), __m1 = (m1);		\
+	(ph) = __builtin_alpha_umulh(__m0, __m1);	\
+	(pl) = __m0 * __m1;                             \
+} while (0)
 #define UMUL_TIME 46
 #ifndef LONGLONG_STANDALONE
 #define udiv_qrnnd(q, r, n1, n0, d) \
@@ -167,7 +164,7 @@ do { UDItype __r; \
 	(q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \
 	(r) = __r; \
 } while (0)
-extern UDItype __udiv_qrnnd();
+extern UDItype __udiv_qrnnd(UDItype *, UDItype, UDItype, UDItype);
 #define UDIV_TIME 220
 #endif /* LONGLONG_STANDALONE */
 #endif /* __alpha */

From 6da7539734d4e0f5caeea1e0efbb74d7b83db1ca Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Thu, 11 Jul 2013 07:42:14 -0700
Subject: [PATCH 154/320] alpha: Improve atomic_add_unless

Use ll/sc loops instead of C loops around cmpxchg.
Update the atomic64_add_unless block comment to match the code.

Reviewed-and-Tested-by: Matt Turner <mattst88@gmail.com>
Signed-off-by: Matt Turner <mattst88@gmail.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 arch/alpha/include/asm/atomic.h | 60 ++++++++++++++++++++-------------
 1 file changed, 37 insertions(+), 23 deletions(-)

diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index c2cbe4fc391c..0dc18fc4d925 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -186,17 +186,24 @@ static __inline__ long atomic64_sub_return(long i, atomic64_t * v)
  */
 static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
 {
-	int c, old;
-	c = atomic_read(v);
-	for (;;) {
-		if (unlikely(c == (u)))
-			break;
-		old = atomic_cmpxchg((v), c, c + (a));
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c;
+	int c, new, old;
+	smp_mb();
+	__asm__ __volatile__(
+	"1:	ldl_l	%[old],%[mem]\n"
+	"	cmpeq	%[old],%[u],%[c]\n"
+	"	addl	%[old],%[a],%[new]\n"
+	"	bne	%[c],2f\n"
+	"	stl_c	%[new],%[mem]\n"
+	"	beq	%[new],3f\n"
+	"2:\n"
+	".subsection 2\n"
+	"3:	br	1b\n"
+	".previous"
+	: [old] "=&r"(old), [new] "=&r"(new), [c] "=&r"(c)
+	: [mem] "m"(*v), [a] "rI"(a), [u] "rI"((long)u)
+	: "memory");
+	smp_mb();
+	return old;
 }
 
 
@@ -207,21 +214,28 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
  * @u: ...unless v is equal to u.
  *
  * Atomically adds @a to @v, so long as it was not @u.
- * Returns the old value of @v.
+ * Returns true iff @v was not @u.
  */
 static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
 {
-	long c, old;
-	c = atomic64_read(v);
-	for (;;) {
-		if (unlikely(c == (u)))
-			break;
-		old = atomic64_cmpxchg((v), c, c + (a));
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c != (u);
+	long c, tmp;
+	smp_mb();
+	__asm__ __volatile__(
+	"1:	ldq_l	%[tmp],%[mem]\n"
+	"	cmpeq	%[tmp],%[u],%[c]\n"
+	"	addq	%[tmp],%[a],%[tmp]\n"
+	"	bne	%[c],2f\n"
+	"	stq_c	%[tmp],%[mem]\n"
+	"	beq	%[tmp],3f\n"
+	"2:\n"
+	".subsection 2\n"
+	"3:	br	1b\n"
+	".previous"
+	: [tmp] "=&r"(tmp), [c] "=&r"(c)
+	: [mem] "m"(*v), [a] "rI"(a), [u] "rI"(u)
+	: "memory");
+	smp_mb();
+	return !c;
 }
 
 #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)

From 748a76b5152e8f38edf8afbf6f4cd6e326ad9e62 Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Thu, 11 Jul 2013 08:04:20 -0700
Subject: [PATCH 155/320] alpha: Implement atomic64_dec_if_positive

Reviewed-and-Tested-by: Matt Turner <mattst88@gmail.com>
Signed-off-by: Matt Turner <mattst88@gmail.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 arch/alpha/Kconfig              |  1 +
 arch/alpha/include/asm/atomic.h | 28 ++++++++++++++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 837a1f2d8b96..082d9b4b5472 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -15,6 +15,7 @@ config ALPHA
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
+	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_CMOS_UPDATE
 	select GENERIC_STRNCPY_FROM_USER
diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index 0dc18fc4d925..78b03ef39f6f 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -238,6 +238,34 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
 	return !c;
 }
 
+/*
+ * atomic64_dec_if_positive - decrement by 1 if old value positive
+ * @v: pointer of type atomic_t
+ *
+ * The function returns the old value of *v minus 1, even if
+ * the atomic variable, v, was not decremented.
+ */
+static inline long atomic64_dec_if_positive(atomic64_t *v)
+{
+	long old, tmp;
+	smp_mb();
+	__asm__ __volatile__(
+	"1:	ldq_l	%[old],%[mem]\n"
+	"	subq	%[old],1,%[tmp]\n"
+	"	ble	%[old],2f\n"
+	"	stq_c	%[tmp],%[mem]\n"
+	"	beq	%[tmp],3f\n"
+	"2:\n"
+	".subsection 2\n"
+	"3:	br	1b\n"
+	".previous"
+	: [old] "=&r"(old), [tmp] "=&r"(tmp)
+	: [mem] "m"(*v)
+	: "memory");
+	smp_mb();
+	return old - 1;
+}
+
 #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
 
 #define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0)

From 231b0bedf55325ee5a1c7bdd47d00131dafc9ab7 Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Tue, 12 Apr 2011 14:45:12 -0700
Subject: [PATCH 156/320] alpha: Generate dwarf2 unwind info for various kernel
 entry points.

Having unwind info past the PALcode generated stack frame makes
debugging the kernel significantly easier.

Acked-by: Matt Turner <mattst88@gmail.com>
Signed-off-by: Matt Turner <mattst88@gmail.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 arch/alpha/kernel/entry.S | 399 +++++++++++++++++++++++++++-----------
 1 file changed, 288 insertions(+), 111 deletions(-)

diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S
index f62a994ef126..a969b95ee5ac 100644
--- a/arch/alpha/kernel/entry.S
+++ b/arch/alpha/kernel/entry.S
@@ -12,11 +12,32 @@
 
 	.text
 	.set noat
+	.cfi_sections	.debug_frame
 
 /* Stack offsets.  */
 #define SP_OFF			184
 #define SWITCH_STACK_SIZE	320
 
+.macro	CFI_START_OSF_FRAME	func
+	.align	4
+	.globl	\func
+	.type	\func,@function
+\func:
+	.cfi_startproc simple
+	.cfi_return_column 64
+	.cfi_def_cfa	$sp, 48
+	.cfi_rel_offset	64, 8
+	.cfi_rel_offset	$gp, 16
+	.cfi_rel_offset	$16, 24
+	.cfi_rel_offset	$17, 32
+	.cfi_rel_offset	$18, 40
+.endm
+
+.macro	CFI_END_OSF_FRAME	func
+	.cfi_endproc
+	.size	\func, . - \func
+.endm
+
 /*
  * This defines the normal kernel pt-regs layout.
  *
@@ -27,100 +48,158 @@
  * the palcode-provided values are available to the signal handler.
  */
 
-#define SAVE_ALL			\
-	subq	$sp, SP_OFF, $sp;	\
-	stq	$0, 0($sp);		\
-	stq	$1, 8($sp);		\
-	stq	$2, 16($sp);		\
-	stq	$3, 24($sp);		\
-	stq	$4, 32($sp);		\
-	stq	$28, 144($sp);		\
-	lda	$2, alpha_mv;		\
-	stq	$5, 40($sp);		\
-	stq	$6, 48($sp);		\
-	stq	$7, 56($sp);		\
-	stq	$8, 64($sp);		\
-	stq	$19, 72($sp);		\
-	stq	$20, 80($sp);		\
-	stq	$21, 88($sp);		\
-	ldq	$2, HAE_CACHE($2);	\
-	stq	$22, 96($sp);		\
-	stq	$23, 104($sp);		\
-	stq	$24, 112($sp);		\
-	stq	$25, 120($sp);		\
-	stq	$26, 128($sp);		\
-	stq	$27, 136($sp);		\
-	stq	$2, 152($sp);		\
-	stq	$16, 160($sp);		\
-	stq	$17, 168($sp);		\
+.macro	SAVE_ALL
+	subq	$sp, SP_OFF, $sp
+	.cfi_adjust_cfa_offset	SP_OFF
+	stq	$0, 0($sp)
+	stq	$1, 8($sp)
+	stq	$2, 16($sp)
+	stq	$3, 24($sp)
+	stq	$4, 32($sp)
+	stq	$28, 144($sp)
+	.cfi_rel_offset	$0, 0
+	.cfi_rel_offset $1, 8
+	.cfi_rel_offset	$2, 16
+	.cfi_rel_offset	$3, 24
+	.cfi_rel_offset	$4, 32
+	.cfi_rel_offset	$28, 144
+	lda	$2, alpha_mv
+	stq	$5, 40($sp)
+	stq	$6, 48($sp)
+	stq	$7, 56($sp)
+	stq	$8, 64($sp)
+	stq	$19, 72($sp)
+	stq	$20, 80($sp)
+	stq	$21, 88($sp)
+	ldq	$2, HAE_CACHE($2)
+	stq	$22, 96($sp)
+	stq	$23, 104($sp)
+	stq	$24, 112($sp)
+	stq	$25, 120($sp)
+	stq	$26, 128($sp)
+	stq	$27, 136($sp)
+	stq	$2, 152($sp)
+	stq	$16, 160($sp)
+	stq	$17, 168($sp)
 	stq	$18, 176($sp)
+	.cfi_rel_offset	$5, 40
+	.cfi_rel_offset	$6, 48
+	.cfi_rel_offset	$7, 56
+	.cfi_rel_offset	$8, 64
+	.cfi_rel_offset $19, 72
+	.cfi_rel_offset	$20, 80
+	.cfi_rel_offset	$21, 88
+	.cfi_rel_offset $22, 96
+	.cfi_rel_offset	$23, 104
+	.cfi_rel_offset	$24, 112
+	.cfi_rel_offset	$25, 120
+	.cfi_rel_offset	$26, 128
+	.cfi_rel_offset	$27, 136
+.endm
 
-#define RESTORE_ALL			\
-	lda	$19, alpha_mv;		\
-	ldq	$0, 0($sp);		\
-	ldq	$1, 8($sp);		\
-	ldq	$2, 16($sp);		\
-	ldq	$3, 24($sp);		\
-	ldq	$21, 152($sp);		\
-	ldq	$20, HAE_CACHE($19);	\
-	ldq	$4, 32($sp);		\
-	ldq	$5, 40($sp);		\
-	ldq	$6, 48($sp);		\
-	ldq	$7, 56($sp);		\
-	subq	$20, $21, $20;		\
-	ldq	$8, 64($sp);		\
-	beq	$20, 99f;		\
-	ldq	$20, HAE_REG($19);	\
-	stq	$21, HAE_CACHE($19);	\
-	stq	$21, 0($20);		\
-99:;					\
-	ldq	$19, 72($sp);		\
-	ldq	$20, 80($sp);		\
-	ldq	$21, 88($sp);		\
-	ldq	$22, 96($sp);		\
-	ldq	$23, 104($sp);		\
-	ldq	$24, 112($sp);		\
-	ldq	$25, 120($sp);		\
-	ldq	$26, 128($sp);		\
-	ldq	$27, 136($sp);		\
-	ldq	$28, 144($sp);		\
+.macro	RESTORE_ALL
+	lda	$19, alpha_mv
+	ldq	$0, 0($sp)
+	ldq	$1, 8($sp)
+	ldq	$2, 16($sp)
+	ldq	$3, 24($sp)
+	ldq	$21, 152($sp)
+	ldq	$20, HAE_CACHE($19)
+	ldq	$4, 32($sp)
+	ldq	$5, 40($sp)
+	ldq	$6, 48($sp)
+	ldq	$7, 56($sp)
+	subq	$20, $21, $20
+	ldq	$8, 64($sp)
+	beq	$20, 99f
+	ldq	$20, HAE_REG($19)
+	stq	$21, HAE_CACHE($19)
+	stq	$21, 0($20)
+99:	ldq	$19, 72($sp)
+	ldq	$20, 80($sp)
+	ldq	$21, 88($sp)
+	ldq	$22, 96($sp)
+	ldq	$23, 104($sp)
+	ldq	$24, 112($sp)
+	ldq	$25, 120($sp)
+	ldq	$26, 128($sp)
+	ldq	$27, 136($sp)
+	ldq	$28, 144($sp)
 	addq	$sp, SP_OFF, $sp
+	.cfi_restore	$0
+	.cfi_restore	$1
+	.cfi_restore	$2
+	.cfi_restore	$3
+	.cfi_restore	$4
+	.cfi_restore	$5
+	.cfi_restore	$6
+	.cfi_restore	$7
+	.cfi_restore	$8
+	.cfi_restore	$19
+	.cfi_restore	$20
+	.cfi_restore	$21
+	.cfi_restore	$22
+	.cfi_restore	$23
+	.cfi_restore	$24
+	.cfi_restore	$25
+	.cfi_restore	$26
+	.cfi_restore	$27
+	.cfi_restore	$28
+	.cfi_adjust_cfa_offset	-SP_OFF
+.endm
+
+.macro	DO_SWITCH_STACK
+	bsr	$1, do_switch_stack
+	.cfi_adjust_cfa_offset	SWITCH_STACK_SIZE
+	.cfi_rel_offset	$9, 0
+	.cfi_rel_offset	$10, 8
+	.cfi_rel_offset	$11, 16
+	.cfi_rel_offset	$12, 24
+	.cfi_rel_offset	$13, 32
+	.cfi_rel_offset	$14, 40
+	.cfi_rel_offset	$15, 48
+	/* We don't really care about the FP registers for debugging.  */
+.endm
+
+.macro	UNDO_SWITCH_STACK
+	bsr	$1, undo_switch_stack
+	.cfi_restore	$9
+	.cfi_restore	$10
+	.cfi_restore	$11
+	.cfi_restore	$12
+	.cfi_restore	$13
+	.cfi_restore	$14
+	.cfi_restore	$15
+	.cfi_adjust_cfa_offset	-SWITCH_STACK_SIZE
+.endm
 
 /*
  * Non-syscall kernel entry points.
  */
 
-	.align	4
-	.globl	entInt
-	.ent	entInt
-entInt:
+CFI_START_OSF_FRAME entInt
 	SAVE_ALL
 	lda	$8, 0x3fff
 	lda	$26, ret_from_sys_call
 	bic	$sp, $8, $8
 	mov	$sp, $19
 	jsr	$31, do_entInt
-.end entInt
+CFI_END_OSF_FRAME entInt
 
-	.align	4
-	.globl	entArith
-	.ent	entArith
-entArith:
+CFI_START_OSF_FRAME entArith
 	SAVE_ALL
 	lda	$8, 0x3fff
 	lda	$26, ret_from_sys_call
 	bic	$sp, $8, $8
 	mov	$sp, $18
 	jsr	$31, do_entArith
-.end entArith
+CFI_END_OSF_FRAME entArith
 
-	.align	4
-	.globl	entMM
-	.ent	entMM
-entMM:
+CFI_START_OSF_FRAME entMM
 	SAVE_ALL
 /* save $9 - $15 so the inline exception code can manipulate them.  */
 	subq	$sp, 56, $sp
+	.cfi_adjust_cfa_offset	56
 	stq	$9, 0($sp)
 	stq	$10, 8($sp)
 	stq	$11, 16($sp)
@@ -128,6 +207,13 @@ entMM:
 	stq	$13, 32($sp)
 	stq	$14, 40($sp)
 	stq	$15, 48($sp)
+	.cfi_rel_offset	$9, 0
+	.cfi_rel_offset	$10, 8
+	.cfi_rel_offset	$11, 16
+	.cfi_rel_offset	$12, 24
+	.cfi_rel_offset	$13, 32
+	.cfi_rel_offset	$14, 40
+	.cfi_rel_offset	$15, 48
 	addq	$sp, 56, $19
 /* handle the fault */
 	lda	$8, 0x3fff
@@ -142,28 +228,33 @@ entMM:
 	ldq	$14, 40($sp)
 	ldq	$15, 48($sp)
 	addq	$sp, 56, $sp
+	.cfi_restore	$9
+	.cfi_restore	$10
+	.cfi_restore	$11
+	.cfi_restore	$12
+	.cfi_restore	$13
+	.cfi_restore	$14
+	.cfi_restore	$15
+	.cfi_adjust_cfa_offset	-56
 /* finish up the syscall as normal.  */
 	br	ret_from_sys_call
-.end entMM
+CFI_END_OSF_FRAME entMM
 
-	.align	4
-	.globl	entIF
-	.ent	entIF
-entIF:
+CFI_START_OSF_FRAME entIF
 	SAVE_ALL
 	lda	$8, 0x3fff
 	lda	$26, ret_from_sys_call
 	bic	$sp, $8, $8
 	mov	$sp, $17
 	jsr	$31, do_entIF
-.end entIF
+CFI_END_OSF_FRAME entIF
 
-	.align	4
-	.globl	entUna
-	.ent	entUna
-entUna:
+CFI_START_OSF_FRAME entUna
 	lda	$sp, -256($sp)
+	.cfi_adjust_cfa_offset	256
 	stq	$0, 0($sp)
+	.cfi_rel_offset	$0, 0
+	.cfi_remember_state
 	ldq	$0, 256($sp)	/* get PS */
 	stq	$1, 8($sp)
 	stq	$2, 16($sp)
@@ -195,6 +286,32 @@ entUna:
 	stq	$28, 224($sp)
 	mov	$sp, $19
 	stq	$gp, 232($sp)
+	.cfi_rel_offset	$1, 1*8
+	.cfi_rel_offset	$2, 2*8
+	.cfi_rel_offset	$3, 3*8
+	.cfi_rel_offset	$4, 4*8
+	.cfi_rel_offset	$5, 5*8
+	.cfi_rel_offset	$6, 6*8
+	.cfi_rel_offset	$7, 7*8
+	.cfi_rel_offset	$8, 8*8
+	.cfi_rel_offset	$9, 9*8
+	.cfi_rel_offset	$10, 10*8
+	.cfi_rel_offset	$11, 11*8
+	.cfi_rel_offset	$12, 12*8
+	.cfi_rel_offset	$13, 13*8
+	.cfi_rel_offset	$14, 14*8
+	.cfi_rel_offset	$15, 15*8
+	.cfi_rel_offset	$19, 19*8
+	.cfi_rel_offset	$20, 20*8
+	.cfi_rel_offset	$21, 21*8
+	.cfi_rel_offset	$22, 22*8
+	.cfi_rel_offset	$23, 23*8
+	.cfi_rel_offset	$24, 24*8
+	.cfi_rel_offset	$25, 25*8
+	.cfi_rel_offset	$26, 26*8
+	.cfi_rel_offset	$27, 27*8
+	.cfi_rel_offset	$28, 28*8
+	.cfi_rel_offset	$29, 29*8
 	lda	$8, 0x3fff
 	stq	$31, 248($sp)
 	bic	$sp, $8, $8
@@ -228,16 +345,45 @@ entUna:
 	ldq	$28, 224($sp)
 	ldq	$gp, 232($sp)
 	lda	$sp, 256($sp)
+	.cfi_restore	$1
+	.cfi_restore	$2
+	.cfi_restore	$3
+	.cfi_restore	$4
+	.cfi_restore	$5
+	.cfi_restore	$6
+	.cfi_restore	$7
+	.cfi_restore	$8
+	.cfi_restore	$9
+	.cfi_restore	$10
+	.cfi_restore	$11
+	.cfi_restore	$12
+	.cfi_restore	$13
+	.cfi_restore	$14
+	.cfi_restore	$15
+	.cfi_restore	$19
+	.cfi_restore	$20
+	.cfi_restore	$21
+	.cfi_restore	$22
+	.cfi_restore	$23
+	.cfi_restore	$24
+	.cfi_restore	$25
+	.cfi_restore	$26
+	.cfi_restore	$27
+	.cfi_restore	$28
+	.cfi_restore	$29
+	.cfi_adjust_cfa_offset	-256
 	call_pal PAL_rti
-.end entUna
 
 	.align	4
-	.ent	entUnaUser
 entUnaUser:
+	.cfi_restore_state
 	ldq	$0, 0($sp)	/* restore original $0 */
 	lda	$sp, 256($sp)	/* pop entUna's stack frame */
+	.cfi_restore	$0
+	.cfi_adjust_cfa_offset	-256
 	SAVE_ALL		/* setup normal kernel stack */
 	lda	$sp, -56($sp)
+	.cfi_adjust_cfa_offset	56
 	stq	$9, 0($sp)
 	stq	$10, 8($sp)
 	stq	$11, 16($sp)
@@ -245,6 +391,13 @@ entUnaUser:
 	stq	$13, 32($sp)
 	stq	$14, 40($sp)
 	stq	$15, 48($sp)
+	.cfi_rel_offset	$9, 0
+	.cfi_rel_offset	$10, 8
+	.cfi_rel_offset	$11, 16
+	.cfi_rel_offset	$12, 24
+	.cfi_rel_offset	$13, 32
+	.cfi_rel_offset	$14, 40
+	.cfi_rel_offset	$15, 48
 	lda	$8, 0x3fff
 	addq	$sp, 56, $19
 	bic	$sp, $8, $8
@@ -257,20 +410,25 @@ entUnaUser:
 	ldq	$14, 40($sp)
 	ldq	$15, 48($sp)
 	lda	$sp, 56($sp)
+	.cfi_restore	$9
+	.cfi_restore	$10
+	.cfi_restore	$11
+	.cfi_restore	$12
+	.cfi_restore	$13
+	.cfi_restore	$14
+	.cfi_restore	$15
+	.cfi_adjust_cfa_offset	-56
 	br	ret_from_sys_call
-.end entUnaUser
+CFI_END_OSF_FRAME entUna
 
-	.align	4
-	.globl	entDbg
-	.ent	entDbg
-entDbg:
+CFI_START_OSF_FRAME entDbg
 	SAVE_ALL
 	lda	$8, 0x3fff
 	lda	$26, ret_from_sys_call
 	bic	$sp, $8, $8
 	mov	$sp, $16
 	jsr	$31, do_entDbg
-.end entDbg
+CFI_END_OSF_FRAME entDbg
 
 /*
  * The system call entry point is special.  Most importantly, it looks
@@ -285,8 +443,12 @@ entDbg:
 
 	.align	4
 	.globl	entSys
-	.globl	ret_from_sys_call
-	.ent	entSys
+	.type	entSys, @function
+	.cfi_startproc simple
+	.cfi_return_column 64
+	.cfi_def_cfa	$sp, 48
+	.cfi_rel_offset	64, 8
+	.cfi_rel_offset	$gp, 16
 entSys:
 	SAVE_ALL
 	lda	$8, 0x3fff
@@ -300,6 +462,9 @@ entSys:
 	stq	$17, SP_OFF+32($sp)
 	s8addq	$0, $5, $5
 	stq	$18, SP_OFF+40($sp)
+	.cfi_rel_offset	$16, SP_OFF+24
+	.cfi_rel_offset	$17, SP_OFF+32
+	.cfi_rel_offset	$18, SP_OFF+40
 	blbs	$3, strace
 	beq	$4, 1f
 	ldq	$27, 0($5)
@@ -310,6 +475,7 @@ entSys:
 	stq	$31, 72($sp)		/* a3=0 => no error */
 
 	.align	4
+	.globl	ret_from_sys_call
 ret_from_sys_call:
 	cmovne	$26, 0, $18		/* $18 = 0 => non-restartable */
 	ldq	$0, SP_OFF($sp)
@@ -324,10 +490,12 @@ ret_to_user:
 	and	$17, _TIF_WORK_MASK, $2
 	bne	$2, work_pending
 restore_all:
+	.cfi_remember_state
 	RESTORE_ALL
 	call_pal PAL_rti
 
 ret_to_kernel:
+	.cfi_restore_state
 	lda	$16, 7
 	call_pal PAL_swpipl
 	br restore_all
@@ -356,7 +524,6 @@ $ret_success:
 	stq	$0, 0($sp)
 	stq	$31, 72($sp)	/* a3=0 => no error */
 	br	ret_from_sys_call
-.end entSys
 
 /*
  * Do all cleanup when returning from all interrupts and system calls.
@@ -370,7 +537,7 @@ $ret_success:
  */
 
 	.align	4
-	.ent	work_pending
+	.type	work_pending, @function
 work_pending:
 	and	$17, _TIF_NOTIFY_RESUME | _TIF_SIGPENDING, $2
 	bne	$2, $work_notifysig
@@ -387,23 +554,22 @@ $work_resched:
 
 $work_notifysig:
 	mov	$sp, $16
-	bsr	$1, do_switch_stack
+	DO_SWITCH_STACK
 	jsr	$26, do_work_pending
-	bsr	$1, undo_switch_stack
+	UNDO_SWITCH_STACK
 	br	restore_all
-.end work_pending
 
 /*
  * PTRACE syscall handler
  */
 
 	.align	4
-	.ent	strace
+	.type	strace, @function
 strace:
 	/* set up signal stack, call syscall_trace */
-	bsr	$1, do_switch_stack
+	DO_SWITCH_STACK
 	jsr	$26, syscall_trace_enter /* returns the syscall number */
-	bsr	$1, undo_switch_stack
+	UNDO_SWITCH_STACK
 
 	/* get the arguments back.. */
 	ldq	$16, SP_OFF+24($sp)
@@ -431,9 +597,9 @@ ret_from_straced:
 $strace_success:
 	stq	$0, 0($sp)		/* save return value */
 
-	bsr	$1, do_switch_stack
+	DO_SWITCH_STACK
 	jsr	$26, syscall_trace_leave
-	bsr	$1, undo_switch_stack
+	UNDO_SWITCH_STACK
 	br	$31, ret_from_sys_call
 
 	.align	3
@@ -447,26 +613,31 @@ $strace_error:
 	stq	$0, 0($sp)
 	stq	$1, 72($sp)	/* a3 for return */
 
-	bsr	$1, do_switch_stack
+	DO_SWITCH_STACK
 	mov	$18, $9		/* save old syscall number */
 	mov	$19, $10	/* save old a3 */
 	jsr	$26, syscall_trace_leave
 	mov	$9, $18
 	mov	$10, $19
-	bsr	$1, undo_switch_stack
+	UNDO_SWITCH_STACK
 
 	mov	$31, $26	/* tell "ret_from_sys_call" we can restart */
 	br	ret_from_sys_call
-.end strace
+CFI_END_OSF_FRAME entSys
 
 /*
  * Save and restore the switch stack -- aka the balance of the user context.
  */
 
 	.align	4
-	.ent	do_switch_stack
+	.type	do_switch_stack, @function
+	.cfi_startproc simple
+	.cfi_return_column 64
+	.cfi_def_cfa $sp, 0
+	.cfi_register 64, $1
 do_switch_stack:
 	lda	$sp, -SWITCH_STACK_SIZE($sp)
+	.cfi_adjust_cfa_offset	SWITCH_STACK_SIZE
 	stq	$9, 0($sp)
 	stq	$10, 8($sp)
 	stq	$11, 16($sp)
@@ -510,10 +681,14 @@ do_switch_stack:
 	stt	$f0, 312($sp)	# save fpcr in slot of $f31
 	ldt	$f0, 64($sp)	# dont let "do_switch_stack" change fp state.
 	ret	$31, ($1), 1
-.end do_switch_stack
+	.cfi_endproc
+	.size	do_switch_stack, .-do_switch_stack
 
 	.align	4
-	.ent	undo_switch_stack
+	.type	undo_switch_stack, @function
+	.cfi_startproc simple
+	.cfi_def_cfa $sp, 0
+	.cfi_register 64, $1
 undo_switch_stack:
 	ldq	$9, 0($sp)
 	ldq	$10, 8($sp)
@@ -558,7 +733,8 @@ undo_switch_stack:
 	ldt	$f30, 304($sp)
 	lda	$sp, SWITCH_STACK_SIZE($sp)
 	ret	$31, ($1), 1
-.end undo_switch_stack
+	.cfi_endproc
+	.size	undo_switch_stack, .-undo_switch_stack
 
 /*
  * The meat of the context switch code.
@@ -566,17 +742,18 @@ undo_switch_stack:
 
 	.align	4
 	.globl	alpha_switch_to
-	.ent	alpha_switch_to
+	.type	alpha_switch_to, @function
+	.cfi_startproc
 alpha_switch_to:
-	.prologue 0
-	bsr	$1, do_switch_stack
+	DO_SWITCH_STACK
 	call_pal PAL_swpctx
 	lda	$8, 0x3fff
-	bsr	$1, undo_switch_stack
+	UNDO_SWITCH_STACK
 	bic	$sp, $8, $8
 	mov	$17, $0
 	ret
-.end alpha_switch_to
+	.cfi_endproc
+	.size	alpha_switch_to, .-alpha_switch_to
 
 /*
  * New processes begin life here.

From e40600997249844933293859c2effa8b7468c97a Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Sat, 13 Jul 2013 16:21:05 -0700
Subject: [PATCH 157/320] alpha: Fix type compatibility warning for
 marvel_map_irq

Acked-by: Phil Carmody <pc+lkml@asdf.org>
Reviewed-and-Tested-by: Matt Turner <mattst88@gmail.com>
Signed-off-by: Matt Turner <mattst88@gmail.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 arch/alpha/kernel/sys_marvel.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c
index 407accc80877..c92e389ff219 100644
--- a/arch/alpha/kernel/sys_marvel.c
+++ b/arch/alpha/kernel/sys_marvel.c
@@ -317,8 +317,9 @@ marvel_init_irq(void)
 }
 
 static int 
-marvel_map_irq(struct pci_dev *dev, u8 slot, u8 pin)
+marvel_map_irq(const struct pci_dev *cdev, u8 slot, u8 pin)
 {
+	struct pci_dev *dev = (struct pci_dev *)cdev;
 	struct pci_controller *hose = dev->sysdata;
 	struct io7_port *io7_port = hose->sysdata;
 	struct io7 *io7 = io7_port->io7;

From 91531b0535e5556783a7da900cc115b5fec8e63f Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Thu, 18 Jul 2013 14:47:37 -0700
Subject: [PATCH 158/320] alpha: Use __builtin_alpha_rpcc

As introduced in gcc 3.2.

Signed-off-by: Matt Turner <mattst88@gmail.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 arch/alpha/kernel/time.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
index e336694ca042..ea3395036556 100644
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -105,9 +105,7 @@ void arch_irq_work_raise(void)
 
 static inline __u32 rpcc(void)
 {
-    __u32 result;
-    asm volatile ("rpcc %0" : "=r"(result));
-    return result;
+	return __builtin_alpha_rpcc();
 }
 
 int update_persistent_clock(struct timespec now)

From 984ac6c0c7ad612d3e7d045243805de0a2702142 Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Thu, 28 Apr 2011 09:22:39 -0700
Subject: [PATCH 159/320] alpha: Don't if-out dp264_device_interrupt.

The code as written is correct, and will be used by QEMU emulation.

Signed-off-by: Matt Turner <mattst88@gmail.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 arch/alpha/kernel/sys_dp264.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c
index 5bf401f7ea97..6c35159bc00e 100644
--- a/arch/alpha/kernel/sys_dp264.c
+++ b/arch/alpha/kernel/sys_dp264.c
@@ -190,9 +190,6 @@ static struct irq_chip clipper_irq_type = {
 static void
 dp264_device_interrupt(unsigned long vector)
 {
-#if 1
-	printk("dp264_device_interrupt: NOT IMPLEMENTED YET!!\n");
-#else
 	unsigned long pld;
 	unsigned int i;
 
@@ -210,12 +207,7 @@ dp264_device_interrupt(unsigned long vector)
 			isa_device_interrupt(vector);
 		else
 			handle_irq(16 + i);
-#if 0
-		TSUNAMI_cchip->dir0.csr = 1UL << i; mb();
-		tmp = TSUNAMI_cchip->dir0.csr;
-#endif
 	}
-#endif
 }
 
 static void 

From 46931bf67c188d026879e943fa0426704991bed9 Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Thu, 21 Apr 2011 16:01:02 -0700
Subject: [PATCH 160/320] alpha: Force the user-visible HZ to a constant 1024.

This kernel/user split was done long ago for other architectures.

Signed-off-by: Matt Turner <mattst88@gmail.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 arch/alpha/include/asm/param.h      | 8 +++++---
 arch/alpha/include/uapi/asm/param.h | 7 -------
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/arch/alpha/include/asm/param.h b/arch/alpha/include/asm/param.h
index bf46af51941b..a5b68b268bcf 100644
--- a/arch/alpha/include/asm/param.h
+++ b/arch/alpha/include/asm/param.h
@@ -3,7 +3,9 @@
 
 #include <uapi/asm/param.h>
 
-#define HZ		CONFIG_HZ
-#define USER_HZ		HZ
-# define CLOCKS_PER_SEC	HZ	/* frequency at which times() counts */
+# undef HZ
+# define HZ		CONFIG_HZ
+# define USER_HZ	1024
+# define CLOCKS_PER_SEC	USER_HZ	/* frequency at which times() counts */
+
 #endif /* _ASM_ALPHA_PARAM_H */
diff --git a/arch/alpha/include/uapi/asm/param.h b/arch/alpha/include/uapi/asm/param.h
index 29daed819ebd..dbcd9834af6d 100644
--- a/arch/alpha/include/uapi/asm/param.h
+++ b/arch/alpha/include/uapi/asm/param.h
@@ -1,13 +1,7 @@
 #ifndef _UAPI_ASM_ALPHA_PARAM_H
 #define _UAPI_ASM_ALPHA_PARAM_H
 
-/* ??? Gross.  I don't want to parameterize this, and supposedly the
-   hardware ignores reprogramming.  We also need userland buy-in to the 
-   change in HZ, since this is visible in the wait4 resources etc.  */
-
-#ifndef __KERNEL__
 #define HZ		1024
-#endif
 
 #define EXEC_PAGESIZE	8192
 
@@ -17,5 +11,4 @@
 
 #define MAXHOSTNAMELEN	64	/* max length of hostname */
 
-
 #endif /* _UAPI_ASM_ALPHA_PARAM_H */

From dff64649e757870e9351e7d85917ae681d20ee54 Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Fri, 19 Jul 2013 12:43:07 -0700
Subject: [PATCH 161/320] alpha: Use handle_percpu_irq for the timer interrupt

Signed-off-by: Matt Turner <mattst88@gmail.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 arch/alpha/kernel/irq_alpha.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c
index f433fc11877a..28e4429596f3 100644
--- a/arch/alpha/kernel/irq_alpha.c
+++ b/arch/alpha/kernel/irq_alpha.c
@@ -236,7 +236,7 @@ void __init
 init_rtc_irq(void)
 {
 	irq_set_chip_and_handler_name(RTC_IRQ, &dummy_irq_chip,
-				      handle_simple_irq, "RTC");
+				      handle_percpu_irq, "RTC");
 	setup_irq(RTC_IRQ, &timer_irqaction);
 }
 

From e85843bec6c2ea7c10ec61238396891cc2b753a9 Mon Sep 17 00:00:00 2001
From: Kamal Mostafa <kamal@canonical.com>
Date: Fri, 19 Jul 2013 15:02:01 -0700
Subject: [PATCH 162/320] drm/i915: quirk no PCH_PWM_ENABLE for Dell XPS13
 backlight

BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=47941
BugLink: https://bugs.launchpad.net/bugs/1163720
BugLink: https://bugs.launchpad.net/bugs/1162026

Some machines suffer from non-functional backlight controls if
BLM_PCH_PWM_ENABLE is set, so provide a quirk to avoid doing so.
Apply this quirk to Dell XPS 13 models.

Tested-by: Eric Griffith <EGriffith92@gmail.com>
Tested-by: Kent Baxley <kent.baxley@canonical.com>
Cc: <stable@vger.kernel.org> # v3.8+
Signed-off-by: Kamal Mostafa <kamal@canonical.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.h      |  1 +
 drivers/gpu/drm/i915/intel_display.c | 16 ++++++++++++++++
 drivers/gpu/drm/i915/intel_panel.c   |  3 ++-
 3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a416645bcd23..204c3eca0fb2 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -555,6 +555,7 @@ enum intel_sbi_destination {
 #define QUIRK_PIPEA_FORCE (1<<0)
 #define QUIRK_LVDS_SSC_DISABLE (1<<1)
 #define QUIRK_INVERT_BRIGHTNESS (1<<2)
+#define QUIRK_NO_PCH_PWM_ENABLE (1<<3)
 
 struct intel_fbdev;
 struct intel_fbc_work;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 76b034be2c7c..5fb305840db8 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -9400,6 +9400,17 @@ static void quirk_invert_brightness(struct drm_device *dev)
 	DRM_INFO("applying inverted panel brightness quirk\n");
 }
 
+/*
+ * Some machines (Dell XPS13) suffer broken backlight controls if
+ * BLM_PCH_PWM_ENABLE is set.
+ */
+static void quirk_no_pcm_pwm_enable(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	dev_priv->quirks |= QUIRK_NO_PCH_PWM_ENABLE;
+	DRM_INFO("applying no-PCH_PWM_ENABLE quirk\n");
+}
+
 struct intel_quirk {
 	int device;
 	int subsystem_vendor;
@@ -9469,6 +9480,11 @@ static struct intel_quirk intel_quirks[] = {
 
 	/* Acer Aspire 4736Z */
 	{ 0x2a42, 0x1025, 0x0260, quirk_invert_brightness },
+
+	/* Dell XPS13 HD Sandy Bridge */
+	{ 0x0116, 0x1028, 0x052e, quirk_no_pcm_pwm_enable },
+	/* Dell XPS13 HD and XPS13 FHD Ivy Bridge */
+	{ 0x0166, 0x1028, 0x058b, quirk_no_pcm_pwm_enable },
 };
 
 static void intel_init_quirks(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index 45010bb5d132..67e2c1f1c9a8 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -583,7 +583,8 @@ void intel_panel_enable_backlight(struct drm_device *dev,
 		POSTING_READ(reg);
 		I915_WRITE(reg, tmp | BLM_PWM_ENABLE);
 
-		if (HAS_PCH_SPLIT(dev)) {
+		if (HAS_PCH_SPLIT(dev) &&
+		    !(dev_priv->quirks & QUIRK_NO_PCH_PWM_ENABLE)) {
 			tmp = I915_READ(BLC_PWM_PCH_CTL1);
 			tmp |= BLM_PCH_PWM_ENABLE;
 			tmp &= ~BLM_PCH_OVERRIDE_ENABLE;

From a7cd1b8fea2f341b626b255d9898a5ca5fabbf0a Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 19 Jul 2013 20:36:51 +0100
Subject: [PATCH 163/320] drm/i915: Serialize almost all register access

In theory, the different register blocks were meant to be only ever
touched when holding either the struct_mutex, mode_config.lock or even a
specific localised lock. This does not seem to be the case, and the
hardware reacts extremely badly if we attempt to concurrently access two
registers within the same cacheline.

The HSD suggests that we only need to do this workaround for display
range registers. However, upon review we need to serialize the multiple
stages in our register write functions - if only for preemption
protection.

Irrespective of the hardware requirements, the current io functions are
a little too loose with respect to the combination of pre- and
post-condition testing that we do in conjunction with the actual io. As
a result, we may be pre-empted and generate both false-postive and
false-negative errors.

Note well that this is a "90%" solution, there remains a few direct
users of ioread/iowrite which will be fixed up in the next few patches.
Since they are more invasive and that this simple change will prevent
almost all lockups on Haswell, we kept this patch simple to facilitate
backporting to stable.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=63914
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: stable@vger.kernel.org
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index f4af1ca0fb62..6ddc5677ea2f 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1253,21 +1253,21 @@ hsw_unclaimed_reg_check(struct drm_i915_private *dev_priv, u32 reg)
 
 #define __i915_read(x, y) \
 u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \
+	unsigned long irqflags; \
 	u##x val = 0; \
+	spin_lock_irqsave(&dev_priv->gt_lock, irqflags); \
 	if (IS_GEN5(dev_priv->dev)) \
 		ilk_dummy_write(dev_priv); \
 	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
-		unsigned long irqflags; \
-		spin_lock_irqsave(&dev_priv->gt_lock, irqflags); \
 		if (dev_priv->forcewake_count == 0) \
 			dev_priv->gt.force_wake_get(dev_priv); \
 		val = read##y(dev_priv->regs + reg); \
 		if (dev_priv->forcewake_count == 0) \
 			dev_priv->gt.force_wake_put(dev_priv); \
-		spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags); \
 	} else { \
 		val = read##y(dev_priv->regs + reg); \
 	} \
+	spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags); \
 	trace_i915_reg_rw(false, reg, val, sizeof(val)); \
 	return val; \
 }
@@ -1280,8 +1280,10 @@ __i915_read(64, q)
 
 #define __i915_write(x, y) \
 void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val) { \
+	unsigned long irqflags; \
 	u32 __fifo_ret = 0; \
 	trace_i915_reg_rw(true, reg, val, sizeof(val)); \
+	spin_lock_irqsave(&dev_priv->gt_lock, irqflags); \
 	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
 		__fifo_ret = __gen6_gt_wait_for_fifo(dev_priv); \
 	} \
@@ -1293,6 +1295,7 @@ void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val) { \
 		gen6_gt_check_fifodbg(dev_priv); \
 	} \
 	hsw_unclaimed_reg_check(dev_priv, reg); \
+	spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags); \
 }
 __i915_write(8, b)
 __i915_write(16, w)

From 9af588736f48401cca6a39a96a59fb994068237f Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Mon, 27 May 2013 15:16:06 +0200
Subject: [PATCH 164/320] ARM: nomadik: Update MMC defconfigs

Enable MMC_UNSAFE_RESUME to accomplish a proper suspend/resume cycle
for SD/SDIO/(e)MMC.

ARMMMCI host driver supports clock gating through runtime PM, thus
MMC_CLKGATE is not needed. Moreover ARMMMCI can do scatter-gather which
means we can explicity disable MMC_BLOCK_BOUNCE, since it's default
enabled, to skip unnecessary bounce buffer copying.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/configs/nhk8815_defconfig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm/configs/nhk8815_defconfig b/arch/arm/configs/nhk8815_defconfig
index 35f8cf299fa2..b605575f3225 100644
--- a/arch/arm/configs/nhk8815_defconfig
+++ b/arch/arm/configs/nhk8815_defconfig
@@ -95,7 +95,8 @@ CONFIG_I2C_NOMADIK=y
 CONFIG_DEBUG_GPIO=y
 # CONFIG_HWMON is not set
 CONFIG_MMC=y
-CONFIG_MMC_CLKGATE=y
+CONFIG_MMC_UNSAFE_RESUME=y
+# CONFIG_MMC_BLOCK_BOUNCE is not set
 CONFIG_MMC_ARMMMCI=y
 CONFIG_NEW_LEDS=y
 CONFIG_LEDS_CLASS=y

From 931f57378631c19545ec933d2df0f4fb46d16659 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 28 May 2013 16:40:29 +0200
Subject: [PATCH 165/320] ARM: nomadik: update defconfig base

Update the Nomadik defconfig enabling:
- GPIO keyboard input
- Regulators
- LED class driver
- LED heartbeat trigger

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/configs/nhk8815_defconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/configs/nhk8815_defconfig b/arch/arm/configs/nhk8815_defconfig
index b605575f3225..e4cb5a420424 100644
--- a/arch/arm/configs/nhk8815_defconfig
+++ b/arch/arm/configs/nhk8815_defconfig
@@ -48,7 +48,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_MTD=y
 CONFIG_MTD_TESTS=m
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_NAND_ECC_SMC=y
 CONFIG_MTD_NAND=y
@@ -94,6 +93,7 @@ CONFIG_I2C_GPIO=y
 CONFIG_I2C_NOMADIK=y
 CONFIG_DEBUG_GPIO=y
 # CONFIG_HWMON is not set
+CONFIG_REGULATOR=y
 CONFIG_MMC=y
 CONFIG_MMC_UNSAFE_RESUME=y
 # CONFIG_MMC_BLOCK_BOUNCE is not set

From fec57e15527ab2e3d22cfbf04f2b98f823f294ff Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 20 Jun 2013 13:48:16 +0200
Subject: [PATCH 166/320] ARM: nomadik: configure for NO_HZ and HRTIMERS

This enables tickless idle (NO_HZ_IDLE) and high resolution
timers for the Nomadik.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/configs/nhk8815_defconfig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/configs/nhk8815_defconfig b/arch/arm/configs/nhk8815_defconfig
index e4cb5a420424..263ae3869e32 100644
--- a/arch/arm/configs/nhk8815_defconfig
+++ b/arch/arm/configs/nhk8815_defconfig
@@ -1,6 +1,8 @@
 # CONFIG_LOCALVERSION_AUTO is not set
 # CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=14

From 0e970cec05635adbe7b686063e2548a8e4afb8f4 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Date: Fri, 28 Jun 2013 17:27:02 +0200
Subject: [PATCH 167/320] gpio/omap: don't create an IRQ mapping for every GPIO
 on DT

When a GPIO is defined as an interrupt line using Device
Tree, a call to irq_create_of_mapping() is made that calls
irq_create_mapping(). So, is not necessary to do the mapping
for all OMAP GPIO lines and explicitly call irq_create_mapping()
on the driver probe() when booting with Device Tree.

Add a custom IRQ domain .map function handler that will be
called by irq_create_mapping() to map the GPIO lines used as IRQ.
This also allows to execute needed setup code such as configuring
a GPIO as input and enabling the GPIO bank.

Changes since v3:
  - Use bank->chip.of_node instead of_have_populated_dt() to check
    DT or legacy boot as suggested by Jean-Christophe PLAGNIOL-VILLARD

Changes since v2:
  - Unconditionally do the IRQ setup in the .map() function and
    only call irq_create_mapping() in the gpio chip init to avoid
    code duplication as suggested by Grant Likely.

Changes since v1:
  - Split the addition of the .map function handler and the
    automatic gpio request in two different patches.
  - Add GPIO IRQ setup logic to the irq domain mapping function.
  - Only call irq_create_mapping for every GPIO on legacy boot.
  - Only setup a GPIO IRQ on the .map function for DeviceTree boot.

Signed-off-by: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Tested-by: Enric Balletbo i Serra <eballetbo@gmail.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-omap.c | 54 +++++++++++++++++++++++++++++-----------
 1 file changed, 40 insertions(+), 14 deletions(-)

diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c
index dfeb3a3a8f20..5e667ff91dc3 100644
--- a/drivers/gpio/gpio-omap.c
+++ b/drivers/gpio/gpio-omap.c
@@ -1068,24 +1068,50 @@ static void omap_gpio_chip_init(struct gpio_bank *bank)
 
 	gpiochip_add(&bank->chip);
 
-	for (j = 0; j < bank->width; j++) {
-		int irq = irq_create_mapping(bank->domain, j);
-		irq_set_lockdep_class(irq, &gpio_lock_class);
-		irq_set_chip_data(irq, bank);
-		if (bank->is_mpuio) {
-			omap_mpuio_alloc_gc(bank, irq, bank->width);
-		} else {
-			irq_set_chip_and_handler(irq, &gpio_irq_chip,
-						 handle_simple_irq);
-			set_irq_flags(irq, IRQF_VALID);
-		}
-	}
+	/*
+	 * REVISIT these explicit calls to irq_create_mapping()
+	 * to do the GPIO to IRQ domain mapping for each GPIO in
+	 * the bank can be removed once all OMAP platforms have
+	 * been migrated to Device Tree boot only.
+	 * Since in DT boot irq_create_mapping() is called from
+	 * irq_create_of_mapping() only for the GPIO lines that
+	 * are used as interrupts.
+	 */
+	if (!bank->chip.of_node)
+		for (j = 0; j < bank->width; j++)
+			irq_create_mapping(bank->domain, j);
 	irq_set_chained_handler(bank->irq, gpio_irq_handler);
 	irq_set_handler_data(bank->irq, bank);
 }
 
 static const struct of_device_id omap_gpio_match[];
 
+static int omap_gpio_irq_map(struct irq_domain *d, unsigned int virq,
+			     irq_hw_number_t hwirq)
+{
+	struct gpio_bank *bank = d->host_data;
+
+	if (!bank)
+		return -EINVAL;
+
+	irq_set_lockdep_class(virq, &gpio_lock_class);
+	irq_set_chip_data(virq, bank);
+	if (bank->is_mpuio) {
+		omap_mpuio_alloc_gc(bank, virq, bank->width);
+	} else {
+		irq_set_chip_and_handler(virq, &gpio_irq_chip,
+					 handle_simple_irq);
+		set_irq_flags(virq, IRQF_VALID);
+	}
+
+	return 0;
+}
+
+static struct irq_domain_ops omap_gpio_irq_ops = {
+	.xlate  = irq_domain_xlate_onetwocell,
+	.map    = omap_gpio_irq_map,
+};
+
 static int omap_gpio_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -1151,10 +1177,10 @@ static int omap_gpio_probe(struct platform_device *pdev)
 	}
 
 	bank->domain = irq_domain_add_legacy(node, bank->width, irq_base,
-					     0, &irq_domain_simple_ops, NULL);
+					     0, &omap_gpio_irq_ops, bank);
 #else
 	bank->domain = irq_domain_add_linear(node, bank->width,
-					     &irq_domain_simple_ops, NULL);
+					     &omap_gpio_irq_ops, bank);
 #endif
 	if (!bank->domain) {
 		dev_err(dev, "Couldn't register an IRQ domain\n");

From b4419e1a15905191661ffe75ba2f9e649f5d565e Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Date: Fri, 28 Jun 2013 17:27:03 +0200
Subject: [PATCH 168/320] gpio/omap: auto request GPIO as input if used as IRQ
 via DT

When an OMAP GPIO is used as an IRQ line, a call to gpio_request()
has to be made to initialize the OMAP GPIO bank before a driver
request the IRQ. Otherwise the call to request_irq() fails.

Drives should not be aware of this neither care wether an IRQ line
is a GPIO or not. They should just request the IRQ and this has to
be handled by the irq_chip driver.

With the current OMAP GPIO DT binding, if we define:

    gpio6: gpio@49058000 {
    	   compatible = "ti,omap3-gpio";
	   reg = <0x49058000 0x200>;
	   interrupts = <34>;
	   ti,hwmods = "gpio6";
	   gpio-controller;
	   #gpio-cells = <2>;
	   interrupt-controller;
	   #interrupt-cells = <2>;
    };

	   interrupt-parent = <&gpio6>;
           interrupts = <16 8>;

The GPIO is correctly mapped as an IRQ but a call to gpio_request()
is never made. Since a call to the custom IRQ domain .map function
handler is made for each GPIO used as an IRQ, the GPIO can be setup
and configured as input there automatically.

Changes since v3:
  - Use bank->chip.of_node instead of_have_populated_dt() to check
    DT or legacy boot as suggested by Jean-Christophe PLAGNIOL-VILLARD
  - Add a comment that this is just a temporary solution until and
    that it has to be removed once is handled by the IRQ core.

Changes since v2:
 - Only make the call to gpio_request_one() conditional in the DT
   case as suggested by Grant Likely.

Changes since v1:
  - Split the irq domain mapping function handler and the GPIO
    request in two different patches.

Signed-off-by: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Tested-by: Enric Balletbo i Serra <eballetbo@gmail.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-omap.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c
index 5e667ff91dc3..3a0c1606f885 100644
--- a/drivers/gpio/gpio-omap.c
+++ b/drivers/gpio/gpio-omap.c
@@ -1090,6 +1090,8 @@ static int omap_gpio_irq_map(struct irq_domain *d, unsigned int virq,
 			     irq_hw_number_t hwirq)
 {
 	struct gpio_bank *bank = d->host_data;
+	int gpio;
+	int ret;
 
 	if (!bank)
 		return -EINVAL;
@@ -1104,6 +1106,22 @@ static int omap_gpio_irq_map(struct irq_domain *d, unsigned int virq,
 		set_irq_flags(virq, IRQF_VALID);
 	}
 
+	/*
+	 * REVISIT most GPIO IRQ chip drivers need to call
+	 * gpio_request() before a GPIO line can be used as an
+	 * IRQ. Ideally this should be handled by the IRQ core
+	 * but until then this has to be done on a per driver
+	 * basis. Remove this once this is managed by the core.
+	 */
+	if (bank->chip.of_node) {
+		gpio = irq_to_gpio(bank, hwirq);
+		ret = gpio_request_one(gpio, GPIOF_IN, NULL);
+		if (ret) {
+			dev_err(bank->dev, "Could not request GPIO%d\n", gpio);
+			return ret;
+		}
+	}
+
 	return 0;
 }
 

From 949eb1a4d29dc75e0b5b16b03747886b52ecf854 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Date: Tue, 2 Jul 2013 21:46:30 +0200
Subject: [PATCH 169/320] gpio/omap: fix build error when OF_GPIO is not
 defined.

The OMAP GPIO driver check if the chip has an associated
Device Tree node using the struct gpio_chip of_node member.

But this is only build if CONFIG_OF_GPIO is defined which
leads to the following error when using omap1_defconfig:

linux/drivers/gpio/gpio-omap.c: In function 'omap_gpio_chip_init':
linux/drivers/gpio/gpio-omap.c:1080:17: error: 'struct gpio_chip' has no member named 'of_node'
linux/drivers/gpio/gpio-omap.c: In function 'omap_gpio_irq_map':
linux/drivers/gpio/gpio-omap.c:1116:16: error: 'struct gpio_chip' has no member named 'of_node'

Reported-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-omap.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c
index 3a0c1606f885..c57244ef428b 100644
--- a/drivers/gpio/gpio-omap.c
+++ b/drivers/gpio/gpio-omap.c
@@ -1037,6 +1037,18 @@ omap_mpuio_alloc_gc(struct gpio_bank *bank, unsigned int irq_start,
 			       IRQ_NOREQUEST | IRQ_NOPROBE, 0);
 }
 
+#if defined(CONFIG_OF_GPIO)
+static inline bool omap_gpio_chip_boot_dt(struct gpio_chip *chip)
+{
+	return chip->of_node != NULL;
+}
+#else
+static inline bool omap_gpio_chip_boot_dt(struct gpio_chip *chip)
+{
+	return false;
+}
+#endif
+
 static void omap_gpio_chip_init(struct gpio_bank *bank)
 {
 	int j;
@@ -1077,7 +1089,7 @@ static void omap_gpio_chip_init(struct gpio_bank *bank)
 	 * irq_create_of_mapping() only for the GPIO lines that
 	 * are used as interrupts.
 	 */
-	if (!bank->chip.of_node)
+	if (!omap_gpio_chip_boot_dt(&bank->chip))
 		for (j = 0; j < bank->width; j++)
 			irq_create_mapping(bank->domain, j);
 	irq_set_chained_handler(bank->irq, gpio_irq_handler);
@@ -1113,7 +1125,7 @@ static int omap_gpio_irq_map(struct irq_domain *d, unsigned int virq,
 	 * but until then this has to be done on a per driver
 	 * basis. Remove this once this is managed by the core.
 	 */
-	if (bank->chip.of_node) {
+	if (omap_gpio_chip_boot_dt(&bank->chip)) {
 		gpio = irq_to_gpio(bank, hwirq);
 		ret = gpio_request_one(gpio, GPIOF_IN, NULL);
 		if (ret) {

From afe8ce9b29c487ea7f62faa936b6abb84e0b8815 Mon Sep 17 00:00:00 2001
From: Rohit Vaswani <rvaswani@codeaurora.org>
Date: Thu, 18 Jul 2013 13:07:14 -0700
Subject: [PATCH 170/320] drivers: gpio: msm: Fix the error condition for
 reading ngpio

of_property_read_u32 return 0 on success. The check was using a ! to
return error. Fix the if condition.

Signed-off-by: Rohit Vaswani <rvaswani@codeaurora.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Pankaj Jangra  <jangra.pankaj9@gmail.com>
Cc: "Bird, Tim" <Tim.Bird@sonymobile.com>
Signed-off-by: David Brown <davidb@codeaurora.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-msm-v2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpio/gpio-msm-v2.c b/drivers/gpio/gpio-msm-v2.c
index f4491a497cc8..c2fa77086eb5 100644
--- a/drivers/gpio/gpio-msm-v2.c
+++ b/drivers/gpio/gpio-msm-v2.c
@@ -378,7 +378,7 @@ static int msm_gpio_probe(struct platform_device *pdev)
 	int ret, ngpio;
 	struct resource *res;
 
-	if (!of_property_read_u32(pdev->dev.of_node, "ngpio", &ngpio)) {
+	if (of_property_read_u32(pdev->dev.of_node, "ngpio", &ngpio)) {
 		dev_err(&pdev->dev, "%s: ngpio property missing\n", __func__);
 		return -EINVAL;
 	}

From 181d1b9e31c668259d3798c521672afb8edd355c Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Sun, 21 Jul 2013 13:16:24 +0200
Subject: [PATCH 171/320] drm/i915: fix up gt init sequence fallout

The regression fix for gen6+ rps fallout

commit 7dcd2677ea912573d9ed4bcd629b0023b2d11505
Author: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date:   Wed Jul 17 10:22:58 2013 +0400

    drm/i915: fix long-standing SNB regression in power consumption after resume

unintentionally also changed the init sequence ordering between
gt_init and gt_reset - we need to reset BIOS damage like leftover
forcewake references before we run our own code. Otherwise we can get
nasty dmesg noise like

[drm:__gen6_gt_force_wake_mt_get] *ERROR* Timed out waiting for forcewake old ack to clear.

again. Since _reset suggests that we first need to have stuff
initialized (which isn't the case here) call it sanitze instead.

While at it also block out the rps disable introduced by the above
commit on ilk: We don't have any knowledge of ilk rps being broken in
similar ways. And the disable functions uses the default hw state
which is only read out when we're enabling rps. So essentially we've
been writing random grabage into that register.

Reported-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: stable@vger.kernel.org
Tested-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_dma.c | 2 +-
 drivers/gpu/drm/i915/i915_drv.c | 4 ++--
 drivers/gpu/drm/i915/i915_drv.h | 2 +-
 drivers/gpu/drm/i915/intel_pm.c | 5 +++--
 4 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 5c0663f58aff..abf158da9b30 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1593,8 +1593,8 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	intel_detect_pch(dev);
 
 	intel_irq_init(dev);
+	intel_gt_sanitize(dev);
 	intel_gt_init(dev);
-	intel_gt_reset(dev);
 
 	/* Try to make sure MCHBAR is enabled before poking at it */
 	intel_setup_mchbar(dev);
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 6ddc5677ea2f..45b3c030f483 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -706,7 +706,7 @@ static int i915_drm_thaw(struct drm_device *dev)
 {
 	int error = 0;
 
-	intel_gt_reset(dev);
+	intel_gt_sanitize(dev);
 
 	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
 		mutex_lock(&dev->struct_mutex);
@@ -732,7 +732,7 @@ int i915_resume(struct drm_device *dev)
 
 	pci_set_master(dev->pdev);
 
-	intel_gt_reset(dev);
+	intel_gt_sanitize(dev);
 
 	/*
 	 * Platforms with opregion should have sane BIOS, older ones (gen3 and
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 204c3eca0fb2..d2ee3343c943 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1584,7 +1584,7 @@ void i915_handle_error(struct drm_device *dev, bool wedged);
 extern void intel_irq_init(struct drm_device *dev);
 extern void intel_hpd_init(struct drm_device *dev);
 extern void intel_gt_init(struct drm_device *dev);
-extern void intel_gt_reset(struct drm_device *dev);
+extern void intel_gt_sanitize(struct drm_device *dev);
 
 void i915_error_state_free(struct kref *error_ref);
 
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 828c426b4b92..6a347f54d39f 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -5476,7 +5476,7 @@ static void vlv_force_wake_put(struct drm_i915_private *dev_priv)
 	gen6_gt_check_fifodbg(dev_priv);
 }
 
-void intel_gt_reset(struct drm_device *dev)
+void intel_gt_sanitize(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
@@ -5489,7 +5489,8 @@ void intel_gt_reset(struct drm_device *dev)
 	}
 
 	/* BIOS often leaves RC6 enabled, but disable it for hw init */
-	intel_disable_gt_powersave(dev);
+	if (INTEL_INFO(dev)->gen >= 6)
+		intel_disable_gt_powersave(dev);
 }
 
 void intel_gt_init(struct drm_device *dev)

From c5f167d31530b9b1aa7beb469aeca10f869eff5a Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Wed, 3 Jul 2013 09:44:50 +0100
Subject: [PATCH 172/320] pinctrl: am33xx dt binding: correct include path

Using #include <include/...> is a bit odd. It happens to work because the DTC
flags include -Iarch/FOO/boot/dts as well as arch/FOO/boot/dts/include and
arch/FOO/boot/dts/include/dt-bindings is a symlink to include/dt-bindings.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Cc: Grant Likely <grant.likely@secretlab.ca>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/dt-bindings/pinctrl/am33xx.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/dt-bindings/pinctrl/am33xx.h b/include/dt-bindings/pinctrl/am33xx.h
index 469e0325e6f4..2fbc804e1a45 100644
--- a/include/dt-bindings/pinctrl/am33xx.h
+++ b/include/dt-bindings/pinctrl/am33xx.h
@@ -5,7 +5,7 @@
 #ifndef _DT_BINDINGS_PINCTRL_AM33XX_H
 #define _DT_BINDINGS_PINCTRL_AM33XX_H
 
-#include <include/dt-bindings/pinctrl/omap.h>
+#include <dt-bindings/pinctrl/omap.h>
 
 /* am33xx specific mux bit defines */
 #undef PULL_ENA

From 42a708c9328b1b23ed5f415401975e37a64f2747 Mon Sep 17 00:00:00 2001
From: Qipan Li <Qipan.Li@csr.com>
Date: Thu, 4 Jul 2013 15:55:25 +0800
Subject: [PATCH 173/320] pinctrl: sirf: fix the pin number and mux bit for
 usp0

we missed a pin and related mux bit for usp pin group, this
patch fixes it.

Signed-off-by: Qipan Li <Qipan.Li@csr.com>
Signed-off-by: Barry Song <Baohua.Song@csr.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/sirf/pinctrl-atlas6.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/pinctrl/sirf/pinctrl-atlas6.c b/drivers/pinctrl/sirf/pinctrl-atlas6.c
index 1fa39a444171..c641be9c5b80 100644
--- a/drivers/pinctrl/sirf/pinctrl-atlas6.c
+++ b/drivers/pinctrl/sirf/pinctrl-atlas6.c
@@ -496,7 +496,7 @@ static const unsigned sdmmc5_pins[] = { 24, 25, 26 };
 static const struct sirfsoc_muxmask usp0_muxmask[] = {
 	{
 		.group = 1,
-		.mask = BIT(19) | BIT(20) | BIT(21) | BIT(22),
+		.mask = BIT(19) | BIT(20) | BIT(21) | BIT(22) | BIT(23),
 	},
 };
 
@@ -507,7 +507,7 @@ static const struct sirfsoc_padmux usp0_padmux = {
 	.funcval = 0,
 };
 
-static const unsigned usp0_pins[] = { 51, 52, 53, 54 };
+static const unsigned usp0_pins[] = { 51, 52, 53, 54, 55 };
 
 static const struct sirfsoc_muxmask usp1_muxmask[] = {
 	{

From d58e9a02e1435744d80a99742d1135a6921318eb Mon Sep 17 00:00:00 2001
From: Qipan Li <Qipan.Li@csr.com>
Date: Thu, 4 Jul 2013 15:55:26 +0800
Subject: [PATCH 174/320] pinctrl: sirf: add usp0_uart_nostreamctrl pin group
 for usp-uart without flowctrl

this patch adds the lost pin group which supports to let USP0 to simulate
a UART without hardware flow control.

Signed-off-by: Qipan Li <Qipan.Li@csr.com>
Signed-off-by: Barry Song <Baohua.Song@csr.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/boot/dts/atlas6.dtsi         |  6 ++++++
 drivers/pinctrl/sirf/pinctrl-atlas6.c | 20 ++++++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/arch/arm/boot/dts/atlas6.dtsi b/arch/arm/boot/dts/atlas6.dtsi
index 9866cd736dee..537041253db7 100644
--- a/arch/arm/boot/dts/atlas6.dtsi
+++ b/arch/arm/boot/dts/atlas6.dtsi
@@ -485,6 +485,12 @@ usp0 {
                                                 sirf,function = "usp0";
                                         };
                                 };
+				usp0_uart_nostreamctrl_pins_a: usp0@1 {
+                                        usp0 {
+                                                sirf,pins = "usp0_uart_nostreamctrl_grp";
+                                                sirf,function = "usp0_uart_nostreamctrl";
+                                        };
+                                };
                                 usp1_pins_a: usp1@0 {
                                         usp1 {
                                                 sirf,pins = "usp1grp";
diff --git a/drivers/pinctrl/sirf/pinctrl-atlas6.c b/drivers/pinctrl/sirf/pinctrl-atlas6.c
index c641be9c5b80..867c9681763c 100644
--- a/drivers/pinctrl/sirf/pinctrl-atlas6.c
+++ b/drivers/pinctrl/sirf/pinctrl-atlas6.c
@@ -509,6 +509,19 @@ static const struct sirfsoc_padmux usp0_padmux = {
 
 static const unsigned usp0_pins[] = { 51, 52, 53, 54, 55 };
 
+static const struct sirfsoc_muxmask usp0_uart_nostreamctrl_muxmask[] = {
+	{
+		.group = 1,
+		.mask = BIT(20) | BIT(21),
+	},
+};
+
+static const struct sirfsoc_padmux usp0_uart_nostreamctrl_padmux = {
+	.muxmask_counts = ARRAY_SIZE(usp0_uart_nostreamctrl_muxmask),
+	.muxmask = usp0_uart_nostreamctrl_muxmask,
+};
+
+static const unsigned usp0_uart_nostreamctrl_pins[] = { 52, 53 };
 static const struct sirfsoc_muxmask usp1_muxmask[] = {
 	{
 		.group = 0,
@@ -822,6 +835,8 @@ static const struct sirfsoc_pin_group sirfsoc_pin_groups[] = {
 	SIRFSOC_PIN_GROUP("uart2grp", uart2_pins),
 	SIRFSOC_PIN_GROUP("uart2_nostreamctrlgrp", uart2_nostreamctrl_pins),
 	SIRFSOC_PIN_GROUP("usp0grp", usp0_pins),
+	SIRFSOC_PIN_GROUP("usp0_uart_nostreamctrl_grp",
+					usp0_uart_nostreamctrl_pins),
 	SIRFSOC_PIN_GROUP("usp1grp", usp1_pins),
 	SIRFSOC_PIN_GROUP("i2c0grp", i2c0_pins),
 	SIRFSOC_PIN_GROUP("i2c1grp", i2c1_pins),
@@ -862,6 +877,8 @@ static const char * const uart0grp[] = { "uart0grp" };
 static const char * const uart1grp[] = { "uart1grp" };
 static const char * const uart2grp[] = { "uart2grp" };
 static const char * const uart2_nostreamctrlgrp[] = { "uart2_nostreamctrlgrp" };
+static const char * const usp0_uart_nostreamctrl_grp[] = {
+					"usp0_uart_nostreamctrl_grp" };
 static const char * const usp0grp[] = { "usp0grp" };
 static const char * const usp1grp[] = { "usp1grp" };
 static const char * const i2c0grp[] = { "i2c0grp" };
@@ -904,6 +921,9 @@ static const struct sirfsoc_pmx_func sirfsoc_pmx_functions[] = {
 	SIRFSOC_PMX_FUNCTION("uart2", uart2grp, uart2_padmux),
 	SIRFSOC_PMX_FUNCTION("uart2_nostreamctrl", uart2_nostreamctrlgrp, uart2_nostreamctrl_padmux),
 	SIRFSOC_PMX_FUNCTION("usp0", usp0grp, usp0_padmux),
+	SIRFSOC_PMX_FUNCTION("usp0_uart_nostreamctrl",
+						usp0_uart_nostreamctrl_grp,
+						usp0_uart_nostreamctrl_padmux),
 	SIRFSOC_PMX_FUNCTION("usp1", usp1grp, usp1_padmux),
 	SIRFSOC_PMX_FUNCTION("i2c0", i2c0grp, i2c0_padmux),
 	SIRFSOC_PMX_FUNCTION("i2c1", i2c1grp, i2c1_padmux),

From c8078de853d51e9b86a2f025ee281b5a81c80655 Mon Sep 17 00:00:00 2001
From: Barry Song <Baohua.Song@csr.com>
Date: Thu, 4 Jul 2013 15:55:27 +0800
Subject: [PATCH 175/320] arm/dts: sirf: fix the pingroup name mismatch between
 drivers and dts

in drivers/pinctrl/sirf, pingroup name is cko0 and cko1, but in dts, they
are cko0 and cko1_rst. this patch fixes the error in dts.

Signed-off-by: Barry Song <Baohua.Song@csr.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/boot/dts/atlas6.dtsi | 16 ++++++++--------
 arch/arm/boot/dts/prima2.dtsi | 16 ++++++++--------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/arch/arm/boot/dts/atlas6.dtsi b/arch/arm/boot/dts/atlas6.dtsi
index 537041253db7..a0f2721ea583 100644
--- a/arch/arm/boot/dts/atlas6.dtsi
+++ b/arch/arm/boot/dts/atlas6.dtsi
@@ -521,16 +521,16 @@ pulse_count {
                                                 sirf,function = "pulse_count";
                                         };
                                 };
-                                cko0_rst_pins_a: cko0_rst@0 {
-                                        cko0_rst {
-                                                sirf,pins = "cko0_rstgrp";
-                                                sirf,function = "cko0_rst";
+                                cko0_pins_a: cko0@0 {
+                                        cko0 {
+                                                sirf,pins = "cko0grp";
+                                                sirf,function = "cko0";
                                         };
                                 };
-                                cko1_rst_pins_a: cko1_rst@0 {
-                                        cko1_rst {
-                                                sirf,pins = "cko1_rstgrp";
-                                                sirf,function = "cko1_rst";
+                                cko1_pins_a: cko1@0 {
+                                        cko1 {
+                                                sirf,pins = "cko1grp";
+                                                sirf,function = "cko1";
                                         };
                                 };
 			};
diff --git a/arch/arm/boot/dts/prima2.dtsi b/arch/arm/boot/dts/prima2.dtsi
index 05e9489cf95c..bbeb623fc2c6 100644
--- a/arch/arm/boot/dts/prima2.dtsi
+++ b/arch/arm/boot/dts/prima2.dtsi
@@ -515,16 +515,16 @@ pulse_count {
                                                 sirf,function = "pulse_count";
                                         };
                                 };
-                                cko0_rst_pins_a: cko0_rst@0 {
-                                        cko0_rst {
-                                                sirf,pins = "cko0_rstgrp";
-                                                sirf,function = "cko0_rst";
+                                cko0_pins_a: cko0@0 {
+                                        cko0 {
+                                                sirf,pins = "cko0grp";
+                                                sirf,function = "cko0";
                                         };
                                 };
-                                cko1_rst_pins_a: cko1_rst@0 {
-                                        cko1_rst {
-                                                sirf,pins = "cko1_rstgrp";
-                                                sirf,function = "cko1_rst";
+                                cko1_pins_a: cko1@0 {
+                                        cko1 {
+                                                sirf,pins = "cko1grp";
+                                                sirf,function = "cko1";
                                         };
                                 };
 			};

From 25f397a429dfa43f22c278d0119a60a343aa568f Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Fri, 19 Jul 2013 18:57:11 +0200
Subject: [PATCH 176/320] drm/crtc-helper: explicit DPMS on after modeset

Atm the crtc helper implementation of set_config has really
inconsisten semantics: If just an fb update is good enough, dpms state
will be left as-is, but if we do a full modeset we force everything to
dpms on.

This change has already been applied to the i915 modeset code in

commit e3de42b68478a8c95dd27520e9adead2af9477a5
Author: Imre Deak <imre.deak@intel.com>
Date:   Fri May 3 19:44:07 2013 +0200

    drm/i915: force full modeset if the connector is in DPMS OFF mode

which according to Greg KH seems to aim for a new record in most
Bugzilla: links in a commit message.

The history of this dpms forcing is pretty interesting. This patch
here is an almost-revert of

commit 811aaa55ba21ab37407018cfc01770d6b037d3fb
Author: Keith Packard <keithp@keithp.com>
Date:   Thu Feb 3 16:57:28 2011 -0800

    drm: Only set DPMS ON when actually configuring a mode

which fixed the bug of trying to dpms on disabled outputs, but
introduced the new discrepancy between an fb update only and full
modesets. The actual introduction of this goes back to

commit bf9dc102e284a5aa78c73fc9d72e11d5ccd8669f
Author: Keith Packard <keithp@keithp.com>
Date:   Fri Nov 26 10:45:58 2010 -0800

    drm: Set connector DPMS status to ON in drm_crtc_helper_set_config

And if you'd dig around in the i915 driver code there's even more fun
around forcing dpms on and losing our heads and temper of the
resulting inconsistencies. Especially the DP re-training code had tons
of funny stuff in it.

v2: So v1 totally blew up on resume on my radeon system here. After
much head-scraching I've figured out that the radeon resume functions
resumes the console system _before_ it actually restores all the
modeset state. And resuming the console systems means that fbdev doeas
an immediate ->set_par call.

Now up to this patch that ->set_par did absolutely nothing: All the
old sw state from pre-suspend was still around (since the modeset
reset wasn't done yet), which means that the set_config calls done as
a result of the ->set_par where all treated as no-ops (despite that
the real hw state was obviously something completely different).

Since v1 of this patch just added a bunch of ->dpms calls if the crtc
was enabled, those set_config calls suddenly stopped being no-ops. But
because the hw state wasn't restored the ->dpms callbacks resulted in
decent amounts of hilarity and eventual full hangs.

Since I can't review all kms drivers for such tricky ordering
constraints v2 opts for a different approach and forces a full modeset
if the connector dpms state isnt' DPMS_ON. Since the ->dpms callbacks
implemented by the modeset helpers update the connector->dpms property
we have the same effect of ensuring that the pipe is ultimately turned
on, even if we just end up updating the fb. This is the same approac
we ended up using in the intel driver.

Note that besides i915.ko only all other drivers eventually call
drm_helper_connector_dpms with the exception of vmwgfx, which does not
support dmps at all.

v3: Dave Airlie merged the broken first version of this patch, so
squash in the revert of

commit 372835a8527f85b3eff20a18c2c339e827dfd4e4
Author: Daniel Vetter <daniel.vetter@ffwll.ch>
Date:   Sat Jun 15 00:13:13 2013 +0200

    drm/crtc-helper: explicit DPMS on after modeset

Also fix up the spelling fail a bit in the commit message while at it.

Cc: Dave Airlie <airlied@redhat.com>
Reviewed-by: Alex Deucher <alexdeucher@gmail.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=67043
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_crtc_helper.c | 27 +++++++++++----------------
 1 file changed, 11 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index 738a4294d820..6a647493ca7f 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -677,6 +677,11 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)
 					/* don't break so fail path works correct */
 					fail = 1;
 				break;
+
+				if (connector->dpms != DRM_MODE_DPMS_ON) {
+					DRM_DEBUG_KMS("connector dpms not on, full mode switch\n");
+					mode_changed = true;
+				}
 			}
 		}
 
@@ -754,6 +759,12 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)
 				ret = -EINVAL;
 				goto fail;
 			}
+			DRM_DEBUG_KMS("Setting connector DPMS state to on\n");
+			for (i = 0; i < set->num_connectors; i++) {
+				DRM_DEBUG_KMS("\t[CONNECTOR:%d:%s] set DPMS on\n", set->connectors[i]->base.id,
+					      drm_get_connector_name(set->connectors[i]));
+				set->connectors[i]->funcs->dpms(set->connectors[i], DRM_MODE_DPMS_ON);
+			}
 		}
 		drm_helper_disable_unused_functions(dev);
 	} else if (fb_changed) {
@@ -771,22 +782,6 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)
 		}
 	}
 
-	/*
-	 * crtc set_config helpers implicit set the crtc and all connected
-	 * encoders to DPMS on for a full mode set. But for just an fb update it
-	 * doesn't do that. To not confuse userspace, do an explicit DPMS_ON
-	 * unconditionally. This will also ensure driver internal dpms state is
-	 * consistent again.
-	 */
-	if (set->crtc->enabled) {
-		DRM_DEBUG_KMS("Setting connector DPMS state to on\n");
-		for (i = 0; i < set->num_connectors; i++) {
-			DRM_DEBUG_KMS("\t[CONNECTOR:%d:%s] set DPMS on\n", set->connectors[i]->base.id,
-				      drm_get_connector_name(set->connectors[i]));
-			set->connectors[i]->funcs->dpms(set->connectors[i], DRM_MODE_DPMS_ON);
-		}
-	}
-
 	kfree(save_connectors);
 	kfree(save_encoders);
 	kfree(save_crtcs);

From ace120dcf23b3bbba00d797a898481997381052f Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Tue, 16 Jul 2013 14:02:28 -0400
Subject: [PATCH 177/320] Thermal: Fix lockup of cpu_down()

Commit f1a18a105 "Thermal: CPU Package temperature thermal" had code
that did a get_online_cpus(), run a loop and then do a
put_online_cpus(). The problem is that the loop had an error exit that
would skip the put_online_cpus() part.

In the error exit part of the function, it also did a get_online_cpus(),
run a loop and then put_online_cpus(). The only way to get to the error
exit part is with get_online_cpus() already performed. If this error
condition is hit, the system will be prevented from taking CPUs offline.
The process taking the CPU offline will lock up hard.

Removing the get_online_cpus() removes the lockup as the hotplug CPU
refcount is back to zero.

This was bisected with ktest.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/x86_pkg_temp_thermal.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c
index 810143a6aa33..f36950e4134f 100644
--- a/drivers/thermal/x86_pkg_temp_thermal.c
+++ b/drivers/thermal/x86_pkg_temp_thermal.c
@@ -599,7 +599,6 @@ static int __init pkg_temp_thermal_init(void)
 	return 0;
 
 err_ret:
-	get_online_cpus();
 	for_each_online_cpu(i)
 		put_core_offline(i);
 	put_online_cpus();

From 90625070c4253377025878c4e82feed8b35c7116 Mon Sep 17 00:00:00 2001
From: Luiz Angelo Daros de Luca <luizluca@gmail.com>
Date: Mon, 1 Jul 2013 23:56:25 -0300
Subject: [PATCH 178/320] usb: serial: cp210x: Add USB ID for Netgear Switches
 embedded serial adapter

This adds NetGear Managed Switch M4100 series, M5300 series, M7100 series
USB ID (0846:0110) to the cp210x driver. Without this, the serial
adapter is not recognized in Linux. Description was obtained from
an Netgear Eng.

Signed-off-by: Luiz Angelo Daros de Luca <luizluca@gmail.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/cp210x.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index d6ef2f8da37d..55852fe13218 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -53,6 +53,7 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x0489, 0xE000) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */
 	{ USB_DEVICE(0x0489, 0xE003) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */
 	{ USB_DEVICE(0x0745, 0x1000) }, /* CipherLab USB CCD Barcode Scanner 1000 */
+	{ USB_DEVICE(0x0846, 0x1100) }, /* NetGear Managed Switch M4100 series, M5300 series, M7100 series */
 	{ USB_DEVICE(0x08e6, 0x5501) }, /* Gemalto Prox-PU/CU contactless smartcard reader */
 	{ USB_DEVICE(0x08FD, 0x000A) }, /* Digianswer A/S , ZigBee/802.15.4 MAC Device */
 	{ USB_DEVICE(0x0BED, 0x1100) }, /* MEI (TM) Cashflow-SC Bill/Voucher Acceptor */

From e7a6121f4929c17215f0cdca3726f4bf3e4e9529 Mon Sep 17 00:00:00 2001
From: Ren Bigcren <bigcren.ren@sonymobile.com>
Date: Tue, 2 Jul 2013 13:34:30 +0200
Subject: [PATCH 179/320] USB: storage: Add MicroVault Flash Drive to
 unusual_devs

The device report an error capacity when read_capacity_16().
Using read_capacity_10() can get the correct capacity.

Signed-off-by: Ren Bigcren <bigcren.ren@sonymobile.com>
Cc: Matthew Dharm <mdharm-usb@one-eyed-alien.net>
Signed-off-by: Oskar Andero <oskar.andero@sonymobile.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/storage/unusual_devs.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index 1799335288bd..c015f2c16729 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -665,6 +665,13 @@ UNUSUAL_DEV(  0x054c, 0x016a, 0x0000, 0x9999,
 		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_INQUIRY ),
 
+/* Submitted by Ren Bigcren <bigcren.ren@sonymobile.com> */
+UNUSUAL_DEV(  0x054c, 0x02a5, 0x0100, 0x0100,
+		"Sony Corp.",
+		"MicroVault Flash Drive",
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+		US_FL_NO_READ_CAPACITY_16 ),
+
 /* floppy reports multiple luns */
 UNUSUAL_DEV(  0x055d, 0x2020, 0x0000, 0x0210,
 		"SAMSUNG",

From 58fc90db8261b571c026bb8bf23aad48a7233118 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=B3hann=20B=2E=20Gu=C3=B0mundsson?= <johannbg@gmail.com>
Date: Thu, 4 Jul 2013 21:47:52 +0000
Subject: [PATCH 180/320] USB: misc: Add Manhattan Hi-Speed USB DVI Converter
 to sisusbvga
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jóhann B. Guðmundsson <johannbg@fedoraproject.org>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/sisusbvga/sisusb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/misc/sisusbvga/sisusb.c b/drivers/usb/misc/sisusbvga/sisusb.c
index c21386ec5d35..de98906f786d 100644
--- a/drivers/usb/misc/sisusbvga/sisusb.c
+++ b/drivers/usb/misc/sisusbvga/sisusb.c
@@ -3247,6 +3247,7 @@ static const struct usb_device_id sisusb_table[] = {
 	{ USB_DEVICE(0x0711, 0x0903) },
 	{ USB_DEVICE(0x0711, 0x0918) },
 	{ USB_DEVICE(0x0711, 0x0920) },
+	{ USB_DEVICE(0x0711, 0x0950) },
 	{ USB_DEVICE(0x182d, 0x021c) },
 	{ USB_DEVICE(0x182d, 0x0269) },
 	{ }

From c38e83b6cc2adf80e3f091fd92cfbeacc9748347 Mon Sep 17 00:00:00 2001
From: Daniil Bolsun <dan.bolsun@gmail.com>
Date: Fri, 19 Jul 2013 10:21:23 +0300
Subject: [PATCH 181/320] USB: option: append Petatel NP10T device to GSM
 modems list

This patch was tested on 3.10.1 kernel.

Same models of Petatel NP10T modems have different device IDs.
Unfortunately they have no additional revision information on a board
which may treat them as different devices. Currently I've seen only
two NP10T devices with various IDs. Possibly Petatel NP10T list will
be appended upon devices with new IDs will appear.

Signed-off-by: Daniil Bolsun <dan.bolsun@gmail.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/option.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 5dd857de05b0..418746b557ad 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -444,7 +444,8 @@ static void option_instat_callback(struct urb *urb);
 
 /* Hyundai Petatel Inc. products */
 #define PETATEL_VENDOR_ID			0x1ff4
-#define PETATEL_PRODUCT_NP10T			0x600e
+#define PETATEL_PRODUCT_NP10T_600A		0x600a
+#define PETATEL_PRODUCT_NP10T_600E		0x600e
 
 /* TP-LINK Incorporated products */
 #define TPLINK_VENDOR_ID			0x2357
@@ -1329,7 +1330,8 @@ static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_4COM2, 0xff, 0x02, 0x01) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_4COM2, 0xff, 0x00, 0x00) },
 	{ USB_DEVICE(CELLIENT_VENDOR_ID, CELLIENT_PRODUCT_MEN200) },
-	{ USB_DEVICE(PETATEL_VENDOR_ID, PETATEL_PRODUCT_NP10T) },
+	{ USB_DEVICE(PETATEL_VENDOR_ID, PETATEL_PRODUCT_NP10T_600A) },
+	{ USB_DEVICE(PETATEL_VENDOR_ID, PETATEL_PRODUCT_NP10T_600E) },
 	{ USB_DEVICE(TPLINK_VENDOR_ID, TPLINK_PRODUCT_MA180),
 	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
 	{ USB_DEVICE(CHANGHONG_VENDOR_ID, CHANGHONG_PRODUCT_CH690) },

From b579fa52f6be0b4157ca9cc5e94d44a2c89a7e95 Mon Sep 17 00:00:00 2001
From: Barry Grussling <barry@grussling.com>
Date: Fri, 19 Jul 2013 14:46:12 -0700
Subject: [PATCH 182/320] usb: cp210x support SEL C662 Vendor/Device

This patch adds support for the Schweitzer Engineering Laboratories
C662 USB cable based off the CP210x driver.

Signed-off-by: Barry Grussling <barry@grussling.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/cp210x.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index 55852fe13218..f68024ba726a 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -149,6 +149,7 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x17F4, 0xAAAA) }, /* Wavesense Jazz blood glucose meter */
 	{ USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */
 	{ USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
+	{ USB_DEVICE(0x1ADB, 0x0001) }, /* Schweitzer Engineering C662 Cable */
 	{ USB_DEVICE(0x1BE3, 0x07A6) }, /* WAGO 750-923 USB Service Cable */
 	{ USB_DEVICE(0x1E29, 0x0102) }, /* Festo CPX-USB */
 	{ USB_DEVICE(0x1E29, 0x0501) }, /* Festo CMSP */

From 7681156982026ebf7eafd7301eb0374d7648d068 Mon Sep 17 00:00:00 2001
From: Sami Rahman <sami.rahman@mmbresearch.com>
Date: Mon, 8 Jul 2013 14:28:55 -0400
Subject: [PATCH 183/320] USB: cp210x: add MMB and PI ZigBee USB Device Support

Added support for MMB Networks and Planet Innovation Ingeni ZigBee USB
devices using customized Silicon Labs' CP210x.c USB to UART bridge
drivers with PIDs: 88A4, 88A5.

Signed-off-by: Sami Rahman <sami.rahman@mmbresearch.com>
Tested-by: Sami Rahman <sami.rahman@mmbresearch.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/cp210x.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index f68024ba726a..0eae4ba3760e 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -119,6 +119,8 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */
 	{ USB_DEVICE(0x10C4, 0x8664) }, /* AC-Services CAN-IF */
 	{ USB_DEVICE(0x10C4, 0x8665) }, /* AC-Services OBD-IF */
+	{ USB_DEVICE(0x10C4, 0x88A4) }, /* MMB Networks ZigBee USB Device */
+	{ USB_DEVICE(0x10C4, 0x88A5) }, /* Planet Innovation Ingeni ZigBee USB Device */
 	{ USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */
 	{ USB_DEVICE(0x10C4, 0xEA61) }, /* Silicon Labs factory default */
 	{ USB_DEVICE(0x10C4, 0xEA70) }, /* Silicon Labs factory default */

From 47a64a13d54f6c669b00542848d5550be3d3310e Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Tue, 9 Jul 2013 17:03:50 +0300
Subject: [PATCH 184/320] USB: EHCI: Fix resume signalling on remote wakeup

Set the ehci->resuming flag for the port we receive a remote
wakeup on so that resume signalling can be completed.

Without this, the root hub timer will not fire again to check
if the resume was completed and there will be a never-ending wait on
on the port.

This effect is only observed if the HUB IRQ IN does not come after we
have initiated the port resume.

Signed-off-by: Roger Quadros <rogerq@ti.com>
Cc: stable <stable@vger.kernel.org>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/ehci-hub.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c
index 2b702772d04d..6dce37555c4f 100644
--- a/drivers/usb/host/ehci-hub.c
+++ b/drivers/usb/host/ehci-hub.c
@@ -874,6 +874,7 @@ static int ehci_hub_control (
 				ehci->reset_done[wIndex] = jiffies
 						+ msecs_to_jiffies(20);
 				usb_hcd_start_port_resume(&hcd->self, wIndex);
+				set_bit(wIndex, &ehci->resuming_ports);
 				/* check the port again */
 				mod_timer(&ehci_to_hcd(ehci)->rh_timer,
 						ehci->reset_done[wIndex]);

From a0062d4e3a0a9cf3ba6a91bca7e66ec89defc228 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@linaro.org>
Date: Thu, 11 Jul 2013 11:34:12 +0100
Subject: [PATCH 185/320] MAINTAINERS: Remove Grant Likely

Unfortunately, I'm no longer to spend the time needed on maintainership.
It is time for me to step aside and pass maintainership to other
engineers. I'm not disappearing from Linux development, but it would be
irresponsible for me to hold onto a job that I am unable to do.

v2: Leave my name on devicetree core code maintainership. Rob NAKed that
    part of the patch. :)

Signed-off-by: Grant Likely <grant.likely@linaro.org>
Cc: Linux Walleij <linus.walleij@linaro.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Rob Herring <rob.herring@calxeda.com>
---
 MAINTAINERS | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index bf61e04291ab..844d7e81b018 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3622,11 +3622,9 @@ F:	drivers/isdn/gigaset/
 F:	include/uapi/linux/gigaset_dev.h
 
 GPIO SUBSYSTEM
-M:	Grant Likely <grant.likely@linaro.org>
 M:	Linus Walleij <linus.walleij@linaro.org>
 S:	Maintained
 L:	linux-gpio@vger.kernel.org
-T:	git git://git.secretlab.ca/git/linux-2.6.git
 F:	Documentation/gpio.txt
 F:	drivers/gpio/
 F:	include/linux/gpio*
@@ -4472,8 +4470,6 @@ F:	drivers/irqchip/
 
 IRQ DOMAINS (IRQ NUMBER MAPPING LIBRARY)
 M:	Benjamin Herrenschmidt <benh@kernel.crashing.org>
-M:	Grant Likely <grant.likely@linaro.org>
-T:	git git://git.secretlab.ca/git/linux-2.6.git irqdomain/next
 S:	Maintained
 F:	Documentation/IRQ-domain.txt
 F:	include/linux/irqdomain.h
@@ -7746,7 +7742,6 @@ F:	drivers/clk/spear/
 
 SPI SUBSYSTEM
 M:	Mark Brown <broonie@kernel.org>
-M:	Grant Likely <grant.likely@linaro.org>
 L:	linux-spi@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi.git
 Q:	http://patchwork.kernel.org/project/spi-devel-general/list/

From d0fb18c5c0caf2ed0eecf3d0145450ae708ed75a Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@linaro.org>
Date: Fri, 19 Jul 2013 20:10:12 -0700
Subject: [PATCH 186/320] MAINTAINERS: Change device tree mailing list

New list on vger.kernel.org. The old list was a pain to moderate.

Signed-off-by: Grant Likely <grant.likely@linaro.org>
---
 MAINTAINERS | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 844d7e81b018..bc286e44b575 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5882,7 +5882,7 @@ OMAP DEVICE TREE SUPPORT
 M:	Benoît Cousson <b-cousson@ti.com>
 M:	Tony Lindgren <tony@atomide.com>
 L:	linux-omap@vger.kernel.org
-L:	devicetree-discuss@lists.ozlabs.org (moderated for non-subscribers)
+L:	devicetree@vger.kernel.org
 S:	Maintained
 F:	arch/arm/boot/dts/*omap*
 F:	arch/arm/boot/dts/*am3*
@@ -6046,7 +6046,7 @@ F:	drivers/i2c/busses/i2c-ocores.c
 OPEN FIRMWARE AND FLATTENED DEVICE TREE
 M:	Grant Likely <grant.likely@linaro.org>
 M:	Rob Herring <rob.herring@calxeda.com>
-L:	devicetree-discuss@lists.ozlabs.org (moderated for non-subscribers)
+L:	devicetree@vger.kernel.org
 W:	http://fdt.secretlab.ca
 T:	git git://git.secretlab.ca/git/linux-2.6.git
 S:	Maintained

From f882820556af33b5aee5b9f0ba459620a9ab1c22 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@linaro.org>
Date: Fri, 19 Jul 2013 18:57:39 -0700
Subject: [PATCH 187/320] MAINTAINERS: Refactor device tree maintainership

Device tree bindings require a lot more attention than they used to.
We've got a group of volunteers willing to take over maintaining
bindings. This patch adds them to the MAINTAINERS file.

This group still needs to work out a process for maintainership and how
they are going to work together. I recommend that they set up a shared
tree on git.kernel.org that they each have commit access to similar to
the tip tree or the arm-soc tree, but it is up to them.

Signed-off-by: Grant Likely <grant.likely@linaro.org>
Cc: Pawel Moll <pawel.moll@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Stephen Warren <swarren@wwwdotorg.org>
Cc: Ian Campbell <ian.campbell@citrix.com>
Cc: Rob Herring <rob.herring@calxeda.com>
Acked-by: Olof Johansson <olof@lixom.net>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
---
 MAINTAINERS | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index bc286e44b575..02064ac6b75e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6050,13 +6050,24 @@ L:	devicetree@vger.kernel.org
 W:	http://fdt.secretlab.ca
 T:	git git://git.secretlab.ca/git/linux-2.6.git
 S:	Maintained
-F:	Documentation/devicetree
-F:	drivers/of
+F:	drivers/of/
 F:	include/linux/of*.h
-F:	scripts/dtc
+F:	scripts/dtc/
 K:	of_get_property
 K:	of_match_table
 
+OPEN FIRMWARE AND FLATTENED DEVICE TREE BINDINGS
+M:	Rob Herring <rob.herring@calxeda.com>
+M:	Pawel Moll <pawel.moll@arm.com>
+M:	Mark Rutland <mark.rutland@arm.com>
+M:	Stephen Warren <swarren@wwwdotorg.org>
+M:	Ian Campbell <ian.campbell@citrix.com>
+L:	devicetree@vger.kernel.org
+S:	Maintained
+F:	Documentation/devicetree/
+F:	arch/*/boot/dts/
+F:	include/dt-bindings/
+
 OPENRISC ARCHITECTURE
 M:	Jonas Bonn <jonas@southpole.se>
 W:	http://openrisc.net

From cdeb89943bfc301aa5711cbb5edc2c86e6630dab Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@linaro.org>
Date: Mon, 22 Jul 2013 01:38:32 +0100
Subject: [PATCH 188/320] MAINTAINERS: Fix incorrect status tag

When I removed myself from the xilinx drivers I used the wrong tag. Fix
it.

Signed-off-by: Grant Likely <grant.likely@linaro.org>
---
 MAINTAINERS | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 02064ac6b75e..ece549058c3f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4986,7 +4986,7 @@ F:	arch/powerpc/platforms/44x/
 
 LINUX FOR POWERPC EMBEDDED XILINX VIRTEX
 L:	linuxppc-dev@lists.ozlabs.org
-S:	Unmaintained
+S:	Orphan
 F:	arch/powerpc/*/*virtex*
 F:	arch/powerpc/*/*/*virtex*
 
@@ -9294,7 +9294,7 @@ S:	Maintained
 F:	drivers/net/ethernet/xilinx/xilinx_axienet*
 
 XILINX SYSTEMACE DRIVER
-S:	Unmaintained
+S:	Orphan
 F:	drivers/block/xsysace.c
 
 XILINX UARTLITE SERIAL DRIVER

From 7d4ddbaa1466f0c06bbca8ad702f4f4b313b8f33 Mon Sep 17 00:00:00 2001
From: Gabor Juhos <juhosg@openwrt.org>
Date: Wed, 1 May 2013 10:53:54 +0200
Subject: [PATCH 189/320] of: add vendor prefix for Qualcomm Atheros, Inc.

This prefix will be used in various compatible properties
for the devices from Qualcomm Atheros, Inc.

Cc: Luis R. Rodriguez <rodrigue@qca.qualcomm.com>
Signed-off-by: Gabor Juhos <juhosg@openwrt.org>
Signed-off-by: Grant Likely <grant.likely@linaro.org>
---
 Documentation/devicetree/bindings/vendor-prefixes.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt
index d5a79caec147..1e4aa2e2b617 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -43,6 +43,7 @@ nxp	NXP Semiconductors
 onnn	ON Semiconductor Corp.
 picochip	Picochip Ltd
 powervr	PowerVR (deprecated, use img)
+qca	Qualcomm Atheros, Inc.
 qcom	Qualcomm, Inc.
 ralink	Mediatek/Ralink Technology Corp.
 ramtron	Ramtron International

From cfd1ee3e37360ac6ec8c737e63e70c5d583f1e51 Mon Sep 17 00:00:00 2001
From: Zhangfei Gao <zhangfei.gao@linaro.org>
Date: Mon, 17 Jun 2013 13:04:59 +0800
Subject: [PATCH 190/320] of: add vendor prefixes for hisilicon

Signed-off-by: Zhangfei Gao <zhangfei.gao@linaro.org>
Signed-off-by: Grant Likely <grant.likely@linaro.org>
---
 Documentation/devicetree/bindings/vendor-prefixes.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt
index 1e4aa2e2b617..366ce9b87240 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -26,6 +26,7 @@ est	ESTeem Wireless Modems
 fsl	Freescale Semiconductor
 GEFanuc	GE Fanuc Intelligent Platforms Embedded Systems, Inc.
 gef	GE Fanuc Intelligent Platforms Embedded Systems, Inc.
+hisilicon	Hisilicon Limited.
 hp	Hewlett Packard
 ibm	International Business Machines (IBM)
 idt	Integrated Device Technologies, Inc.

From c0cdfaa0a5e7a346ac2f661f63f543cdc5f7cbbe Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Sun, 23 Jun 2013 15:50:07 +0800
Subject: [PATCH 191/320] of/irq: Avoid calling list_first_entry() for empty
 list

list_first_entry() expects the list is not empty, we need to check if list is
empty before calling list_first_entry(). Thus use list_first_entry_or_null()
instead of list_first_entry().

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Grant Likely <grant.likely@linaro.org>
---
 drivers/of/irq.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index a3c1c5aae6a9..5c645c7227b8 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -482,8 +482,9 @@ void __init of_irq_init(const struct of_device_id *matches)
 		}
 
 		/* Get the next pending parent that might have children */
-		desc = list_first_entry(&intc_parent_list, typeof(*desc), list);
-		if (list_empty(&intc_parent_list) || !desc) {
+		desc = list_first_entry_or_null(&intc_parent_list,
+						typeof(*desc), list);
+		if (!desc) {
 			pr_err("of_irq_init: children remain, but no parents\n");
 			break;
 		}

From cf9e2368655d86cd800e4d9fe65a407b39d29373 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 18 Jul 2013 12:24:10 +0200
Subject: [PATCH 192/320] of/irq: init struct resource to 0 in
 of_irq_to_resource()

It almost does not matter because most users use only the ->start member
of the struct. However if this struct is passed to a platform device
which is then added via platform_device_add() then the ->parent member is
also used.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Grant Likely <grant.likely@linaro.org>
---
 drivers/of/irq.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index 5c645c7227b8..1264923ade0f 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -345,6 +345,7 @@ int of_irq_to_resource(struct device_node *dev, int index, struct resource *r)
 	if (r && irq) {
 		const char *name = NULL;
 
+		memset(r, 0, sizeof(*r));
 		/*
 		 * Get optional "interrupts-names" property to add a name
 		 * to the resource.

From 74b843512906e60be24ed6aafa2fa2876f294fab Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 9 Jul 2013 16:27:24 +0200
Subject: [PATCH 193/320] pinctrl: sh-pfc: fix SDHI0 VccQ regulator on sh73a0
 with DT

The PFC pinctrl driver on sh73a0 is also regiatering a VccQ regulator for
SDHI0. However, its consumers list only included the platform-data based
SDHI device name. When booted with DT SDHI0 couldn't enable VccQ and
therefore was unusable. Fix this by adding a consumer with DT-based name.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski+renesas@gmail.com>
Acked-by: Simon Horman <horms+renesas@verge.net.au>
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/sh-pfc/pfc-sh73a0.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/pinctrl/sh-pfc/pfc-sh73a0.c b/drivers/pinctrl/sh-pfc/pfc-sh73a0.c
index 7956df58d751..31f7d0e04aaa 100644
--- a/drivers/pinctrl/sh-pfc/pfc-sh73a0.c
+++ b/drivers/pinctrl/sh-pfc/pfc-sh73a0.c
@@ -3785,6 +3785,7 @@ static const struct regulator_desc sh73a0_vccq_mc0_desc = {
 
 static struct regulator_consumer_supply sh73a0_vccq_mc0_consumers[] = {
 	REGULATOR_SUPPLY("vqmmc", "sh_mobile_sdhi.0"),
+	REGULATOR_SUPPLY("vqmmc", "ee100000.sdhi"),
 };
 
 static const struct regulator_init_data sh73a0_vccq_mc0_init_data = {

From 745a39a9e6b2901cd341af01fc9bac78a9649e23 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Thu, 18 Jul 2013 09:24:37 -0400
Subject: [PATCH 194/320] drm/radeon: wait for 3D idle before using CP DMA
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Make sure the 3D engine is idle before using CP DMA for
bo copies.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: Marek Olšák <maraeo@gmail.com>
---
 drivers/gpu/drm/radeon/r600.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 393880a09412..10f712e37003 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -3166,7 +3166,7 @@ int r600_copy_cpdma(struct radeon_device *rdev,
 
 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
-	r = radeon_ring_lock(rdev, ring, num_loops * 6 + 21);
+	r = radeon_ring_lock(rdev, ring, num_loops * 6 + 24);
 	if (r) {
 		DRM_ERROR("radeon: moving bo (%d).\n", r);
 		radeon_semaphore_free(rdev, &sem, NULL);
@@ -3181,6 +3181,9 @@ int r600_copy_cpdma(struct radeon_device *rdev,
 		radeon_semaphore_free(rdev, &sem, NULL);
 	}
 
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+	radeon_ring_write(ring, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
+	radeon_ring_write(ring, WAIT_3D_IDLE_bit);
 	for (i = 0; i < num_loops; i++) {
 		cur_size_in_bytes = size_in_bytes;
 		if (cur_size_in_bytes > 0x1fffff)

From 3e3e53f86bee87bb14474213c879595605e35112 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Thu, 18 Jul 2013 13:11:56 -0400
Subject: [PATCH 195/320] drm/radeon/vm: only align the pt base to 32k

fixes:
https://bugs.freedesktop.org/show_bug.cgi?id=67016

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/radeon/radeon_gart.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index d9d31a383276..6a51d943ccf4 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -466,7 +466,7 @@ int radeon_vm_manager_init(struct radeon_device *rdev)
 		size += rdev->vm_manager.max_pfn * 8;
 		size *= 2;
 		r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager,
-					      RADEON_VM_PTB_ALIGN(size),
+					      RADEON_GPU_PAGE_ALIGN(size),
 					      RADEON_VM_PTB_ALIGN_SIZE,
 					      RADEON_GEM_DOMAIN_VRAM);
 		if (r) {
@@ -621,7 +621,7 @@ int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm)
 	}
 
 retry:
-	pd_size = RADEON_VM_PTB_ALIGN(radeon_vm_directory_size(rdev));
+	pd_size = radeon_vm_directory_size(rdev);
 	r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager,
 			     &vm->page_directory, pd_size,
 			     RADEON_VM_PTB_ALIGN_SIZE, false);
@@ -953,8 +953,8 @@ static int radeon_vm_update_pdes(struct radeon_device *rdev,
 retry:
 		r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager,
 				     &vm->page_tables[pt_idx],
-				     RADEON_VM_PTB_ALIGN(RADEON_VM_PTE_COUNT * 8),
-				     RADEON_VM_PTB_ALIGN_SIZE, false);
+				     RADEON_VM_PTE_COUNT * 8,
+				     RADEON_GPU_PAGE_SIZE, false);
 
 		if (r == -ENOMEM) {
 			r = radeon_vm_evict(rdev, vm);

From 34be8c9af7b8728465963740fc11136ae90dfc36 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Thu, 18 Jul 2013 11:13:53 -0400
Subject: [PATCH 196/320] drm/radeon: fix endian issues with DP handling (v3)

The atom interpreter expects data in LE format, so
swap the message buffer as apprioriate.

v2: properly handle non-dw aligned byte counts.
v3: properly handle remainder

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: Dong He <hedonghust@gmail.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/radeon/atombios_dp.c | 43 +++++++++++++++++++++++++---
 1 file changed, 39 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c
index 064023bed480..32501f6ec991 100644
--- a/drivers/gpu/drm/radeon/atombios_dp.c
+++ b/drivers/gpu/drm/radeon/atombios_dp.c
@@ -44,6 +44,41 @@ static char *pre_emph_names[] = {
 };
 
 /***** radeon AUX functions *****/
+
+/* Atom needs data in little endian format
+ * so swap as appropriate when copying data to
+ * or from atom. Note that atom operates on
+ * dw units.
+ */
+static void radeon_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le)
+{
+#ifdef __BIG_ENDIAN
+	u8 src_tmp[20], dst_tmp[20]; /* used for byteswapping */
+	u32 *dst32, *src32;
+	int i;
+
+	memcpy(src_tmp, src, num_bytes);
+	src32 = (u32 *)src_tmp;
+	dst32 = (u32 *)dst_tmp;
+	if (to_le) {
+		for (i = 0; i < ((num_bytes + 3) / 4); i++)
+			dst32[i] = cpu_to_le32(src32[i]);
+		memcpy(dst, dst_tmp, num_bytes);
+	} else {
+		u8 dws = num_bytes & ~3;
+		for (i = 0; i < ((num_bytes + 3) / 4); i++)
+			dst32[i] = le32_to_cpu(src32[i]);
+		memcpy(dst, dst_tmp, dws);
+		if (num_bytes % 4) {
+			for (i = 0; i < (num_bytes % 4); i++)
+				dst[dws+i] = dst_tmp[dws+i];
+		}
+	}
+#else
+	memcpy(dst, src, num_bytes);
+#endif
+}
+
 union aux_channel_transaction {
 	PROCESS_AUX_CHANNEL_TRANSACTION_PS_ALLOCATION v1;
 	PROCESS_AUX_CHANNEL_TRANSACTION_PARAMETERS_V2 v2;
@@ -65,10 +100,10 @@ static int radeon_process_aux_ch(struct radeon_i2c_chan *chan,
 
 	base = (unsigned char *)(rdev->mode_info.atom_context->scratch + 1);
 
-	memcpy(base, send, send_bytes);
+	radeon_copy_swap(base, send, send_bytes, true);
 
-	args.v1.lpAuxRequest = 0 + 4;
-	args.v1.lpDataOut = 16 + 4;
+	args.v1.lpAuxRequest = cpu_to_le16((u16)(0 + 4));
+	args.v1.lpDataOut = cpu_to_le16((u16)(16 + 4));
 	args.v1.ucDataOutLen = 0;
 	args.v1.ucChannelID = chan->rec.i2c_id;
 	args.v1.ucDelay = delay / 10;
@@ -102,7 +137,7 @@ static int radeon_process_aux_ch(struct radeon_i2c_chan *chan,
 		recv_bytes = recv_size;
 
 	if (recv && recv_size)
-		memcpy(recv, base + 16, recv_bytes);
+		radeon_copy_swap(recv, base + 16, recv_bytes, false);
 
 	return recv_bytes;
 }

From f7929f34fa0e0bb6736a2484fdc07d77a1653081 Mon Sep 17 00:00:00 2001
From: Ondrej Zary <linux@rainbow-software.org>
Date: Fri, 19 Jul 2013 21:08:48 +0200
Subject: [PATCH 197/320] drm/radeon: Another card with wrong primary dac adj

Hello,
got another card with "too bright" problem:
Sapphire Radeon VE 7000 DDR (VGA+S-Video)

lspci -vnn:
01:00.0 VGA compatible controller [0300]: Advanced Micro Devices [AMD] nee ATI RV100 QY [Radeon 7000/VE] [1002:5159] (prog-if 00 [VGA controller])
        Subsystem: PC Partner Limited Sapphire Radeon VE 7000 DDR [174b:7c28]

The patch below fixes the problem for this card.
But I don't like the blacklist, couldn't some heuristic be used instead?
The interesting thing is that the manufacturer is the same as the other card
needing the same quirk. I wonder how many different types are broken this way.

The "wrong" ps2_pdac_adj value that comes from BIOS on this card is 0x300.

====================
drm/radeon: Add primary dac adj quirk for Sapphire Radeon VE 7000 DDR

Values from BIOS are wrong, causing too bright colors.
Use default values instead.

Signed-off-by: Ondrej Zary <linux@rainbow-software.org>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/radeon/radeon_combios.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
index 78edadc9e86b..8528b81cd64f 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -971,10 +971,14 @@ struct radeon_encoder_primary_dac *radeon_combios_get_primary_dac_info(struct
 	}
 
 	/* quirks */
-	/* Radeon 9100 (R200) */
-	if ((dev->pdev->device == 0x514D) &&
+	/* Radeon 7000 (RV100) */
+	if (((dev->pdev->device == 0x5159) &&
 	    (dev->pdev->subsystem_vendor == 0x174B) &&
-	    (dev->pdev->subsystem_device == 0x7149)) {
+	    (dev->pdev->subsystem_device == 0x7c28)) ||
+	/* Radeon 9100 (R200) */
+	   ((dev->pdev->device == 0x514D) &&
+	    (dev->pdev->subsystem_vendor == 0x174B) &&
+	    (dev->pdev->subsystem_device == 0x7149))) {
 		/* vbios value is bad, use the default */
 		found = 0;
 	}

From 03ed8cf9b28d886c64c7e705c7bb1a365fd8fb95 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Fri, 19 Jul 2013 17:44:43 -0400
Subject: [PATCH 198/320] drm/radeon: improve dac adjust heuristics for legacy
 pdac

Hopefully avoid more quirks in the future due to bogus
vbios dac data.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/radeon/radeon_combios.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
index 8528b81cd64f..485f82c9929d 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -965,8 +965,10 @@ struct radeon_encoder_primary_dac *radeon_combios_get_primary_dac_info(struct
 			dac = RBIOS8(dac_info + 0x3) & 0xf;
 			p_dac->ps2_pdac_adj = (bg << 8) | (dac);
 		}
-		/* if the values are all zeros, use the table */
-		if (p_dac->ps2_pdac_adj)
+		/* if the values are zeros, use the table */
+		if ((dac == 0) || (bg == 0))
+			found = 0;
+		else
 			found = 1;
 	}
 

From cef1d00cd56f600121ad121875655ad410a001b8 Mon Sep 17 00:00:00 2001
From: Mark Kettenis <kettenis@openbsd.org>
Date: Sun, 21 Jul 2013 16:44:09 -0400
Subject: [PATCH 199/320] drm/radeon: fix combios tables on older cards

Noticed that my old Radeon 7500 hung after printing

   drm: GPU not posted. posting now...

when it wasn't selected as the primary card the BIOS.  Some digging
revealed that it was hanging in combios_parse_mmio_table() while
parsing the ASIC INIT 3 table.  Looking at the BIOS ROM for the card,
it becomes obvious that there is no ASIC INIT 3 table in the BIOS.
The code is just processing random garbage.  No surprise it hangs!

Why do I say that there is no ASIC INIT 3 table is the BIOS?  This
table is found through the MISC INFO table.  The MISC INFO table can
be found at offset 0x5e in the COMBIOS header.  But the header is
smaller than that.  The COMBIOS header starts at offset 0x126.  The
standard PCI Data Structure (the bit that starts with 'PCIR') lives at
offset 0x180.  That means that the COMBIOS header can not be larger
than 0x5a bytes and therefore cannot contain a MISC INFO table.

I looked at a dozen or so BIOS images, some my own, some downloaded from:

    <http://www.techpowerup.com/vgabios/index.php?manufacturer=ATI&page=1>

It is fairly obvious that the size of the COMBIOS header can be found
at offset 0x6 of the header.  Not sure if it is a 16-bit number or
just an 8-bit number, but that doesn't really matter since the tables
seems to be always smaller than 256 bytes.

So I think combios_get_table_offset() should check if the requested
table is present.  This can be done by checking the offset against the
size of the header.  See the diff below.  The diff is against the WIP
OpenBSD codebase that roughly corresponds to Linux 3.8.13 at this
point.  But I don't think this bit of the code changed much since
then.

For what it is worth:

Signed-off-by: Mark Kettenis <kettenis@openbsd.org>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/radeon/radeon_combios.c | 145 +++++++-----------------
 1 file changed, 41 insertions(+), 104 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
index 485f82c9929d..68ce36056019 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -147,7 +147,7 @@ static uint16_t combios_get_table_offset(struct drm_device *dev,
 					 enum radeon_combios_table_offset table)
 {
 	struct radeon_device *rdev = dev->dev_private;
-	int rev;
+	int rev, size;
 	uint16_t offset = 0, check_offset;
 
 	if (!rdev->bios)
@@ -156,174 +156,106 @@ static uint16_t combios_get_table_offset(struct drm_device *dev,
 	switch (table) {
 		/* absolute offset tables */
 	case COMBIOS_ASIC_INIT_1_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0xc);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0xc;
 		break;
 	case COMBIOS_BIOS_SUPPORT_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x14);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x14;
 		break;
 	case COMBIOS_DAC_PROGRAMMING_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x2a);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x2a;
 		break;
 	case COMBIOS_MAX_COLOR_DEPTH_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x2c);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x2c;
 		break;
 	case COMBIOS_CRTC_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x2e);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x2e;
 		break;
 	case COMBIOS_PLL_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x30);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x30;
 		break;
 	case COMBIOS_TV_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x32);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x32;
 		break;
 	case COMBIOS_DFP_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x34);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x34;
 		break;
 	case COMBIOS_HW_CONFIG_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x36);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x36;
 		break;
 	case COMBIOS_MULTIMEDIA_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x38);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x38;
 		break;
 	case COMBIOS_TV_STD_PATCH_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x3e);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x3e;
 		break;
 	case COMBIOS_LCD_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x40);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x40;
 		break;
 	case COMBIOS_MOBILE_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x42);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x42;
 		break;
 	case COMBIOS_PLL_INIT_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x46);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x46;
 		break;
 	case COMBIOS_MEM_CONFIG_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x48);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x48;
 		break;
 	case COMBIOS_SAVE_MASK_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x4a);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x4a;
 		break;
 	case COMBIOS_HARDCODED_EDID_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x4c);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x4c;
 		break;
 	case COMBIOS_ASIC_INIT_2_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x4e);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x4e;
 		break;
 	case COMBIOS_CONNECTOR_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x50);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x50;
 		break;
 	case COMBIOS_DYN_CLK_1_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x52);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x52;
 		break;
 	case COMBIOS_RESERVED_MEM_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x54);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x54;
 		break;
 	case COMBIOS_EXT_TMDS_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x58);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x58;
 		break;
 	case COMBIOS_MEM_CLK_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x5a);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x5a;
 		break;
 	case COMBIOS_EXT_DAC_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x5c);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x5c;
 		break;
 	case COMBIOS_MISC_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x5e);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x5e;
 		break;
 	case COMBIOS_CRT_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x60);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x60;
 		break;
 	case COMBIOS_INTEGRATED_SYSTEM_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x62);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x62;
 		break;
 	case COMBIOS_COMPONENT_VIDEO_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x64);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x64;
 		break;
 	case COMBIOS_FAN_SPEED_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x66);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x66;
 		break;
 	case COMBIOS_OVERDRIVE_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x68);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x68;
 		break;
 	case COMBIOS_OEM_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x6a);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x6a;
 		break;
 	case COMBIOS_DYN_CLK_2_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x6c);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x6c;
 		break;
 	case COMBIOS_POWER_CONNECTOR_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x6e);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x6e;
 		break;
 	case COMBIOS_I2C_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x70);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x70;
 		break;
 		/* relative offset tables */
 	case COMBIOS_ASIC_INIT_3_TABLE:	/* offset from misc info */
@@ -439,11 +371,16 @@ static uint16_t combios_get_table_offset(struct drm_device *dev,
 		}
 		break;
 	default:
+		check_offset = 0;
 		break;
 	}
 
-	return offset;
+	size = RBIOS8(rdev->bios_header_start + 0x6);
+	/* check absolute offset tables */
+	if (table < COMBIOS_ASIC_INIT_3_TABLE && check_offset && check_offset < size)
+		offset = RBIOS16(rdev->bios_header_start + check_offset);
 
+	return offset;
 }
 
 bool radeon_combios_check_hardcoded_edid(struct radeon_device *rdev)

From 42b9ab7ab798c1fc812bd2032cb506a93080130f Mon Sep 17 00:00:00 2001
From: Jingoo Han <jg1.han@samsung.com>
Date: Fri, 19 Jul 2013 15:56:41 +0900
Subject: [PATCH 200/320] libata: replace strict_strtol() with kstrtol()

The usage of strict_strtol() is not preferred, because
strict_strtol() is obsolete. Thus, kstrtol() should be
used.

Signed-off-by: Jingoo Han <jg1.han@samsung.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/libata-scsi.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 83c08907e042..b1e880a3c3da 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -206,8 +206,10 @@ static ssize_t ata_scsi_park_store(struct device *device,
 	unsigned long flags;
 	int rc;
 
-	rc = strict_strtol(buf, 10, &input);
-	if (rc || input < -2)
+	rc = kstrtol(buf, 10, &input);
+	if (rc)
+		return rc;
+	if (input < -2)
 		return -EINVAL;
 	if (input > ATA_TMOUT_MAX_PARK) {
 		rc = -EOVERFLOW;

From 8cb440ab709fb1f7e3f71a6159ebab9403760472 Mon Sep 17 00:00:00 2001
From: Jean-Francois Moine <moinejf@free.fr>
Date: Mon, 15 Jul 2013 10:14:26 +0200
Subject: [PATCH 201/320] pinctrl: pinctrl-single: fix compile warning when no
 CONFIG_PM

This warning has been introduced by the commit
 0f9bc4bcdf4f pinctrl: single: adopt pinctrl sleep mode management

Signed-off-by: Jean-Francois Moine <moinejf@free.fr>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/pinctrl-single.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c
index 6866548fab31..7323cca440b5 100644
--- a/drivers/pinctrl/pinctrl-single.c
+++ b/drivers/pinctrl/pinctrl-single.c
@@ -1483,6 +1483,7 @@ static int pcs_add_gpio_func(struct device_node *node, struct pcs_device *pcs)
 	return ret;
 }
 
+#ifdef CONFIG_PM
 static int pinctrl_single_suspend(struct platform_device *pdev,
 					pm_message_t state)
 {
@@ -1505,6 +1506,7 @@ static int pinctrl_single_resume(struct platform_device *pdev)
 
 	return pinctrl_force_default(pcs->pctl);
 }
+#endif
 
 static int pcs_probe(struct platform_device *pdev)
 {

From bb9696192826a7d9279caf872e95b41bc26c7eff Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 22 Jul 2013 16:53:36 -0400
Subject: [PATCH 202/320] libata: make it clear that sata_inic162x is
 experimental

sata_inic162x never reached a state where it's reliable enough for
production use and data corruption is a relatively common occurrence.
Make the driver generate warning about the issues and mark the Kconfig
option as experimental.

If the situation doesn't improve, we'd be better off making it depend
on CONFIG_BROKEN.  Let's wait for several cycles and see if the kernel
message draws any attention.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Martin Braure de Calignon <braurede@free.fr>
Reported-by: Ben Hutchings <ben@decadent.org.uk>
Reported-by: risc4all@yahoo.com
---
 drivers/ata/Kconfig         |  2 +-
 drivers/ata/sata_inic162x.c | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index 80dc988f01e4..5cddaf86cd0a 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -107,7 +107,7 @@ config SATA_FSL
 	  If unsure, say N.
 
 config SATA_INIC162X
-	tristate "Initio 162x SATA support"
+	tristate "Initio 162x SATA support (Very Experimental)"
 	depends on PCI
 	help
 	  This option enables support for Initio 162x Serial ATA.
diff --git a/drivers/ata/sata_inic162x.c b/drivers/ata/sata_inic162x.c
index e45131748248..5c54d957370a 100644
--- a/drivers/ata/sata_inic162x.c
+++ b/drivers/ata/sata_inic162x.c
@@ -6,6 +6,18 @@
  *
  * This file is released under GPL v2.
  *
+ * **** WARNING ****
+ *
+ * This driver never worked properly and unfortunately data corruption is
+ * relatively common.  There isn't anyone working on the driver and there's
+ * no support from the vendor.  Do not use this driver in any production
+ * environment.
+ *
+ * http://thread.gmane.org/gmane.linux.debian.devel.bugs.rc/378525/focus=54491
+ * https://bugzilla.kernel.org/show_bug.cgi?id=60565
+ *
+ * *****************
+ *
  * This controller is eccentric and easily locks up if something isn't
  * right.  Documentation is available at initio's website but it only
  * documents registers (not programming model).
@@ -807,6 +819,8 @@ static int inic_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	ata_print_version_once(&pdev->dev, DRV_VERSION);
 
+	dev_alert(&pdev->dev, "inic162x support is broken with common data corruption issues and will be disabled by default, contact linux-ide@vger.kernel.org if in production use\n");
+
 	/* alloc host */
 	host = ata_host_alloc_pinfo(&pdev->dev, ppi, NR_PORTS);
 	hpriv = devm_kzalloc(&pdev->dev, sizeof(*hpriv), GFP_KERNEL);

From 2134ed4d614349b2b4e8d7bb593baa9179b8dd1e Mon Sep 17 00:00:00 2001
From: Dirk Brandewie <dirk.j.brandewie@intel.com>
Date: Thu, 18 Jul 2013 08:48:42 -0700
Subject: [PATCH 203/320] cpufreq / intel_pstate: Change to scale off of max
 P-state

Change to using max P-state instead of max turbo P-state.  This
change resolves two issues.

On a quiet system intel_pstate can fail to respond to a load change.

On CPU SKUs that have a limited number of P-states and no turbo range
intel_pstate fails to select the highest available P-state.

This change is suitable for stable v3.9+

References: https://bugzilla.kernel.org/show_bug.cgi?id=59481
Reported-and-tested-by: Arjan van de Ven <arjan@linux.intel.com>
Reported-and-tested-by: dsmythies@telus.net
Signed-off-by: Dirk Brandewie <dirk.j.brandewie@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Cc: 3.9+ <stable@vger.kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index b012d7600e1a..7cde885011ed 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -103,10 +103,10 @@ struct pstate_adjust_policy {
 static struct pstate_adjust_policy default_policy = {
 	.sample_rate_ms = 10,
 	.deadband = 0,
-	.setpoint = 109,
-	.p_gain_pct = 17,
+	.setpoint = 97,
+	.p_gain_pct = 20,
 	.d_gain_pct = 0,
-	.i_gain_pct = 4,
+	.i_gain_pct = 0,
 };
 
 struct perf_limits {
@@ -468,12 +468,12 @@ static inline void intel_pstate_set_sample_time(struct cpudata *cpu)
 static inline int intel_pstate_get_scaled_busy(struct cpudata *cpu)
 {
 	int32_t busy_scaled;
-	int32_t core_busy, turbo_pstate, current_pstate;
+	int32_t core_busy, max_pstate, current_pstate;
 
 	core_busy = int_tofp(cpu->samples[cpu->sample_ptr].core_pct_busy);
-	turbo_pstate = int_tofp(cpu->pstate.turbo_pstate);
+	max_pstate = int_tofp(cpu->pstate.max_pstate);
 	current_pstate = int_tofp(cpu->pstate.current_pstate);
-	busy_scaled = mul_fp(core_busy, div_fp(turbo_pstate, current_pstate));
+	busy_scaled = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
 
 	return fp_toint(busy_scaled);
 }

From 334ab91d5818256befbf083755d0bb9e7dbd2325 Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@st.com>
Date: Tue, 9 Jul 2013 08:26:24 +0100
Subject: [PATCH 204/320] ARM: dts: STi: Fix pinconf setup for STiH416 serial2

This patch fixes a bug in pinctrl setup of serial2 device, Some of the
pins in the pinctrl node of serial2 do not belong to that
pin-controller. This patch divides them in the pins into there
respective pin controller nodes.

Without this patch serial on StiH416-B2000 Board will not work as it
fails with:

"st-pinctrl pin-controller-rear.3: failed to get pin(99) name
st-pinctrl pin-controller-rear.3: maps: function serial2 group serial2-0
num 4
pinconfig core: failed to register map default (3): no group/pin given"

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@st.com>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 arch/arm/boot/dts/stih416-pinctrl.dtsi | 10 +++++++++-
 arch/arm/boot/dts/stih416.dtsi         |  2 +-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/stih416-pinctrl.dtsi b/arch/arm/boot/dts/stih416-pinctrl.dtsi
index 957b21a71b4b..0f246c979262 100644
--- a/arch/arm/boot/dts/stih416-pinctrl.dtsi
+++ b/arch/arm/boot/dts/stih416-pinctrl.dtsi
@@ -166,6 +166,15 @@ PIO31: gpio@fee09000 {
 				reg		= <0x9000 0x100>;
 				st,bank-name	= "PIO31";
 			};
+
+			serial2-oe {
+				pinctrl_serial2_oe: serial2-1 {
+					st,pins {
+						output-enable	= <&PIO11 3 ALT2 OUT>;
+					};
+				};
+			};
+
 		};
 
 		pin-controller-rear {
@@ -218,7 +227,6 @@ pinctrl_serial2: serial2-0 {
 					st,pins {
 						tx	= <&PIO17 4 ALT2 OUT>;
 						rx	= <&PIO17 5 ALT2 IN>;
-						output-enable	= <&PIO11 3 ALT2 OUT>;
 					};
 				};
 			};
diff --git a/arch/arm/boot/dts/stih416.dtsi b/arch/arm/boot/dts/stih416.dtsi
index 3cecd9689a49..1a0326ea7d07 100644
--- a/arch/arm/boot/dts/stih416.dtsi
+++ b/arch/arm/boot/dts/stih416.dtsi
@@ -79,7 +79,7 @@ serial2: serial@fed32000{
 			interrupts	= <0 197 0>;
 			clocks          = <&CLK_S_ICN_REG_0>;
 			pinctrl-names 	= "default";
-			pinctrl-0 	= <&pinctrl_serial2>;
+			pinctrl-0 	= <&pinctrl_serial2 &pinctrl_serial2_oe>;
 		};
 
 		/* SBC_UART1 */

From c9250073cdd54339a320b78719761d3ea33714fe Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@st.com>
Date: Tue, 9 Jul 2013 08:26:33 +0100
Subject: [PATCH 205/320] ARM: STi: Set correct ARM ERRATAs.

Some of the ARM_ERRATA selection is not done in the initial SOC support
patches. This patch selects 2 new ARM_ERRATA's and removes one which was
actually fixed.

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@st.com>
[olof: reorder new errata entries]
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 arch/arm/mach-sti/Kconfig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mach-sti/Kconfig b/arch/arm/mach-sti/Kconfig
index d04e3bfe1918..835833e3c4f8 100644
--- a/arch/arm/mach-sti/Kconfig
+++ b/arch/arm/mach-sti/Kconfig
@@ -11,8 +11,9 @@ menuconfig ARCH_STI
 	select HAVE_SMP
 	select HAVE_ARM_SCU if SMP
 	select ARCH_REQUIRE_GPIOLIB
-	select ARM_ERRATA_720789
 	select ARM_ERRATA_754322
+	select ARM_ERRATA_764369
+	select ARM_ERRATA_775420
 	select PL310_ERRATA_753970 if CACHE_PL310
 	select PL310_ERRATA_769419 if CACHE_PL310
 	help

From ab116a4df4942c78c189d9b0744dd940ab9e00b9 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Wed, 10 Jul 2013 11:09:12 +0900
Subject: [PATCH 206/320] dmaengine: shdma: fix a build failure on platforms
 with no DMA support

On platforms with no support for the shdma dmaengine driver build is
currently failing with

drivers/built-in.o: In function `sh_mobile_sdhi_probe':
drivers/mmc/host/sh_mobile_sdhi.c:170: undefined reference to`shdma_chan_filter'

Fix the breakage by defining shdma_chan_filter to NULL in such
configurations.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski+renesas@gmail.com>
[horms+renesas@verge.net.au: Apply change to shdma-base.h instead of sh_dma.h]
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 include/linux/shdma-base.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/linux/shdma-base.h b/include/linux/shdma-base.h
index 382cf710ca9a..5b1c9848124c 100644
--- a/include/linux/shdma-base.h
+++ b/include/linux/shdma-base.h
@@ -124,6 +124,10 @@ void shdma_chan_remove(struct shdma_chan *schan);
 int shdma_init(struct device *dev, struct shdma_dev *sdev,
 		    int chan_num);
 void shdma_cleanup(struct shdma_dev *sdev);
+#if IS_ENABLED(CONFIG_SH_DMAE_BASE)
 bool shdma_chan_filter(struct dma_chan *chan, void *arg);
+#else
+#define shdma_chan_filter NULL
+#endif
 
 #endif

From 6287e7319870ec949fb809e4eb4154c2b05b221f Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Thu, 27 Jun 2013 22:42:36 -0400
Subject: [PATCH 207/320] ARM: footbridge: fix overlapping PCI mappings

Commit 8ef6e6201b26cb9fde79c1baa08145af6aca2815 (ARM: footbridge: use
fixed PCI i/o mapping) broke booting on my netwinder.  Before that,
everything boots fine.  Since then, it crashes on boot.

With earlyprintk, I see it BUG-ing like so:
kernel BUG at lib/ioremap.c:27!
Internal error: Oops - BUG: 0 [#1] ARM
...
[<c0139b54>] (ioremap_page_range+0x128/0x154) from [<c02e6a6c>] (dc21285_setup+0xd0/0x114)
[<c02e6a6c>] (dc21285_setup+0xd0/0x114) from [<c02e4874>] (pci_common_init+0xa0/0x298)
[<c02e4874>] (pci_common_init+0xa0/0x298) from [<c02e793c>] (netwinder_pci_init+0xc/0x18)
[<c02e793c>] (netwinder_pci_init+0xc/0x18) from [<c02e27d0>] (do_one_initcall+0xb4/0x180)
...

Russell points out it's because of overlapping PCI mappings that was
added with the aforementioned commit.  Rob thought the code would re-use
the static mapping, but that turns out to not be the case and instead
hits the BUG further down.

After deleting this hunk as suggested by Russel, the system boots up fine
again and all my PCI devices work (IDE, ethernet, the DC21285).

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Acked-by: Rob Herring <rob.herring@calxeda.com>
Cc: stable@vger.kernel.org	# v3.5+
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 arch/arm/mach-footbridge/dc21285.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/arm/mach-footbridge/dc21285.c b/arch/arm/mach-footbridge/dc21285.c
index a7cd2cf5e08d..3490a24f969e 100644
--- a/arch/arm/mach-footbridge/dc21285.c
+++ b/arch/arm/mach-footbridge/dc21285.c
@@ -276,8 +276,6 @@ int __init dc21285_setup(int nr, struct pci_sys_data *sys)
 
 	sys->mem_offset  = DC21285_PCI_MEM;
 
-	pci_ioremap_io(0, DC21285_PCI_IO);
-
 	pci_add_resource_offset(&sys->resources, &res[0], sys->mem_offset);
 	pci_add_resource_offset(&sys->resources, &res[1], sys->mem_offset);
 

From e64bf95e68bf92e7f6d906da4715b937cec5646c Mon Sep 17 00:00:00 2001
From: Rob Herring <rob.herring@calxeda.com>
Date: Sun, 21 Jul 2013 14:53:37 -0500
Subject: [PATCH 208/320] ARM: highbank: Only touch common coherency control
 register fields

Midway adds new register fields to the coherency control registers, so
writing absolute values will break on Midway. Change the register
accesses to only modify the necessary and common fields in order to
support both Midway and Highbank.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 arch/arm/mach-highbank/highbank.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/arm/mach-highbank/highbank.c b/arch/arm/mach-highbank/highbank.c
index dc5d6becd8c7..88815795fe26 100644
--- a/arch/arm/mach-highbank/highbank.c
+++ b/arch/arm/mach-highbank/highbank.c
@@ -115,6 +115,7 @@ static int highbank_platform_notifier(struct notifier_block *nb,
 {
 	struct resource *res;
 	int reg = -1;
+	u32 val;
 	struct device *dev = __dev;
 
 	if (event != BUS_NOTIFY_ADD_DEVICE)
@@ -141,10 +142,10 @@ static int highbank_platform_notifier(struct notifier_block *nb,
 		return NOTIFY_DONE;
 
 	if (of_property_read_bool(dev->of_node, "dma-coherent")) {
-		writel(0xff31, sregs_base + reg);
+		val = readl(sregs_base + reg);
+		writel(val | 0xff01, sregs_base + reg);
 		set_dma_ops(dev, &arm_coherent_dma_ops);
-	} else
-		writel(0, sregs_base + reg);
+	}
 
 	return NOTIFY_OK;
 }

From 7d148ef51a657fd04036c3ed7803da600dd0d451 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Mon, 22 Jul 2013 18:02:39 +0200
Subject: [PATCH 209/320] drm/i915: fix hdmi portclock limits

In

commit 325b9d048810f7689ec644595061c0b700e64bce
Author: Daniel Vetter <daniel.vetter@ffwll.ch>
Date:   Fri Apr 19 11:24:33 2013 +0200

    drm/i915: fixup 12bpc hdmi dotclock handling

I've errornously claimed that we don't yet support the hdmi 1.4
dotclocks > 225 MHz on Haswell. But a bug report and a closer look at
the wrpll table showed that we've supported port clocks up to 300MHz.

With the new code to dynamically compute wrpll limits we should have
no issues going up to the full 340 MHz range of hdmi 1.4, so let's
just use that to fix this regression. That'll allow 4k over hdmi for
free!

v2: Drop the random hunk that somehow slipped in.

v3: Cantiga has the original HDMI dotclock limit of 165MHz. And also
patch up the mode filtering. To do so extract the dotclock limits into
a little helper function.

v4: Use 300MHz (from Bspec) instead of 340MHz (upper limit for hdmi
1.3), apparently hw is not required to be able to drive the highest
dotclocks. Suggested by Damien.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=67048
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=67030
Tested-by: Andreas Reis <andreas.reis@gmail.com> (v2)
Cc: Damien Lespiau <damien.lespiau@intel.com>
Reviewed-by: Damien Lespiau <damien.lespiau@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/intel_hdmi.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index 98df2a0c85bd..2fd3fd5b943e 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -785,10 +785,22 @@ static void intel_disable_hdmi(struct intel_encoder *encoder)
 	}
 }
 
+static int hdmi_portclock_limit(struct intel_hdmi *hdmi)
+{
+	struct drm_device *dev = intel_hdmi_to_dev(hdmi);
+
+	if (IS_G4X(dev))
+		return 165000;
+	else if (IS_HASWELL(dev))
+		return 300000;
+	else
+		return 225000;
+}
+
 static int intel_hdmi_mode_valid(struct drm_connector *connector,
 				 struct drm_display_mode *mode)
 {
-	if (mode->clock > 165000)
+	if (mode->clock > hdmi_portclock_limit(intel_attached_hdmi(connector)))
 		return MODE_CLOCK_HIGH;
 	if (mode->clock < 20000)
 		return MODE_CLOCK_LOW;
@@ -806,6 +818,7 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder,
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_display_mode *adjusted_mode = &pipe_config->adjusted_mode;
 	int clock_12bpc = pipe_config->requested_mode.clock * 3 / 2;
+	int portclock_limit = hdmi_portclock_limit(intel_hdmi);
 	int desired_bpp;
 
 	if (intel_hdmi->color_range_auto) {
@@ -829,7 +842,7 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder,
 	 * outputs. We also need to check that the higher clock still fits
 	 * within limits.
 	 */
-	if (pipe_config->pipe_bpp > 8*3 && clock_12bpc <= 225000
+	if (pipe_config->pipe_bpp > 8*3 && clock_12bpc <= portclock_limit
 	    && HAS_PCH_SPLIT(dev)) {
 		DRM_DEBUG_KMS("picking bpc to 12 for HDMI output\n");
 		desired_bpp = 12*3;
@@ -846,7 +859,7 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder,
 		pipe_config->pipe_bpp = desired_bpp;
 	}
 
-	if (adjusted_mode->clock > 225000) {
+	if (adjusted_mode->clock > portclock_limit) {
 		DRM_DEBUG_KMS("too high HDMI clock, rejecting mode\n");
 		return false;
 	}

From 82b2f495fba338d1e3098dde1df54944a9c19751 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 9 Jul 2013 15:16:06 +0100
Subject: [PATCH 210/320] arm64: virt: ensure visibility of __boot_cpu_mode

Secondary CPUs write to __boot_cpu_mode with caches disabled, and thus a
cached value of __boot_cpu_mode may be incoherent with that in memory.
This could lead to a failure to detect mismatched boot modes.

This patch adds flushing to ensure that writes by secondaries to
__boot_cpu_mode are made visible before we test against it.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Christoffer Dall <cdall@cs.columbia.edu>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/virt.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 439827271e3d..26e310c54344 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -21,6 +21,7 @@
 #define BOOT_CPU_MODE_EL2	(0x0e12b007)
 
 #ifndef __ASSEMBLY__
+#include <asm/cacheflush.h>
 
 /*
  * __boot_cpu_mode records what mode CPUs were booted in.
@@ -36,9 +37,20 @@ extern u32 __boot_cpu_mode[2];
 void __hyp_set_vectors(phys_addr_t phys_vector_base);
 phys_addr_t __hyp_get_vectors(void);
 
+static inline void sync_boot_mode(void)
+{
+	/*
+	 * As secondaries write to __boot_cpu_mode with caches disabled, we
+	 * must flush the corresponding cache entries to ensure the visibility
+	 * of their writes.
+	 */
+	__flush_dcache_area(__boot_cpu_mode, sizeof(__boot_cpu_mode));
+}
+
 /* Reports the availability of HYP mode */
 static inline bool is_hyp_mode_available(void)
 {
+	sync_boot_mode();
 	return (__boot_cpu_mode[0] == BOOT_CPU_MODE_EL2 &&
 		__boot_cpu_mode[1] == BOOT_CPU_MODE_EL2);
 }
@@ -46,6 +58,7 @@ static inline bool is_hyp_mode_available(void)
 /* Check if the bootloader has booted CPUs in different modes */
 static inline bool is_hyp_mode_mismatched(void)
 {
+	sync_boot_mode();
 	return __boot_cpu_mode[0] != __boot_cpu_mode[1];
 }
 

From b0946fc84628b8d60e7a2034b48d1aff7da9d1df Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Tue, 23 Jul 2013 11:05:10 +0100
Subject: [PATCH 211/320] arm64: Fix definition of arm_pm_restart to match the
 declaration

Commit ff70130 (arm64: use common reboot infrastructure) converted the
arm_pm_restart declaration to the new reboot infrastructure but missed
the actual definition.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/process.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 1788bf6b471f..57fb55c44c90 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -81,7 +81,7 @@ void soft_restart(unsigned long addr)
 void (*pm_power_off)(void);
 EXPORT_SYMBOL_GPL(pm_power_off);
 
-void (*arm_pm_restart)(char str, const char *cmd);
+void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
 EXPORT_SYMBOL_GPL(arm_pm_restart);
 
 void arch_cpu_idle_prepare(void)

From 96f15f29038e58e1b0a96483e2b369ff446becf1 Mon Sep 17 00:00:00 2001
From: Jeff Skirvin <jeffrey.d.skirvin@intel.com>
Date: Thu, 11 Jul 2013 17:18:58 -0700
Subject: [PATCH 212/320] [SCSI] isci: Fix a race condition in the SSP task
 management path

This commit fixes a race condition in the isci driver abort task and SSP
device task management path.  The race is caused when an I/O termination
in the SCU hardware is necessary because of an SSP target timeout condition,
and the check of the I/O end state races against the HW-termination-driven
end state.  The failure of the race meant that no TMF was sent to the device
to clean-up the pending I/O.

Signed-off-by: Jeff Skirvin <jeffrey.d.skirvin@intel.com>
Reviewed-by: Lukasz Dorau <lukasz.dorau@intel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: James Bottomley <JBottomley@Parallels.com>
---
 drivers/scsi/isci/task.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/isci/task.c b/drivers/scsi/isci/task.c
index 9bb020ac089c..0d30ca849e8f 100644
--- a/drivers/scsi/isci/task.c
+++ b/drivers/scsi/isci/task.c
@@ -491,6 +491,7 @@ int isci_task_abort_task(struct sas_task *task)
 	struct isci_tmf           tmf;
 	int                       ret = TMF_RESP_FUNC_FAILED;
 	unsigned long             flags;
+	int                       target_done_already = 0;
 
 	/* Get the isci_request reference from the task.  Note that
 	 * this check does not depend on the pending request list
@@ -505,9 +506,11 @@ int isci_task_abort_task(struct sas_task *task)
 	/* If task is already done, the request isn't valid */
 	if (!(task->task_state_flags & SAS_TASK_STATE_DONE) &&
 	    (task->task_state_flags & SAS_TASK_AT_INITIATOR) &&
-	    old_request)
+	    old_request) {
 		idev = isci_get_device(task->dev->lldd_dev);
-
+		target_done_already = test_bit(IREQ_COMPLETE_IN_TARGET,
+					       &old_request->flags);
+	}
 	spin_unlock(&task->task_state_lock);
 	spin_unlock_irqrestore(&ihost->scic_lock, flags);
 
@@ -561,7 +564,7 @@ int isci_task_abort_task(struct sas_task *task)
 
 	if (task->task_proto == SAS_PROTOCOL_SMP ||
 	    sas_protocol_ata(task->task_proto) ||
-	    test_bit(IREQ_COMPLETE_IN_TARGET, &old_request->flags) ||
+	    target_done_already ||
 	    test_bit(IDEV_GONE, &idev->flags)) {
 
 		spin_unlock_irqrestore(&ihost->scic_lock, flags);

From c3ccb1d7cf4c4549151876dd37c0944a682fd9e1 Mon Sep 17 00:00:00 2001
From: Saurav Kashyap <saurav.kashyap@qlogic.com>
Date: Fri, 12 Jul 2013 14:47:51 -0400
Subject: [PATCH 213/320] [SCSI] qla2xxx: Properly set the tagging for
 commands.

This fixes a regression where Xyratex controllers and disks were lost by the
driver:

https://bugzilla.kernel.org/show_bug.cgi?id=59601

Reported-by: Jack Hill <jackhill@jackhill.us>
Signed-off-by: Saurav Kashyap <saurav.kashyap@qlogic.com>
Signed-off-by: Giridhar Malavali <giridhar.malavali@qlogic.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: James Bottomley <JBottomley@Parallels.com>
---
 drivers/scsi/qla2xxx/qla_iocb.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index 42ef481db942..ef0a5481b9dd 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -419,6 +419,8 @@ qla2x00_start_scsi(srb_t *sp)
 			    __constant_cpu_to_le16(CF_SIMPLE_TAG);
 			break;
 		}
+	} else {
+		cmd_pkt->control_flags = __constant_cpu_to_le16(CF_SIMPLE_TAG);
 	}
 
 	/* Load SCSI command packet. */
@@ -1307,11 +1309,11 @@ qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt,
 		    fcp_cmnd->task_attribute = TSK_ORDERED;
 		    break;
 		default:
-		    fcp_cmnd->task_attribute = 0;
+		    fcp_cmnd->task_attribute = TSK_SIMPLE;
 		    break;
 		}
 	} else {
-		fcp_cmnd->task_attribute = 0;
+		fcp_cmnd->task_attribute = TSK_SIMPLE;
 	}
 
 	cmd_pkt->fcp_rsp_dseg_len = 0; /* Let response come in status iocb */
@@ -1525,7 +1527,12 @@ qla24xx_start_scsi(srb_t *sp)
 		case ORDERED_QUEUE_TAG:
 			cmd_pkt->task = TSK_ORDERED;
 			break;
+		default:
+		    cmd_pkt->task = TSK_SIMPLE;
+		    break;
 		}
+	} else {
+		cmd_pkt->task = TSK_SIMPLE;
 	}
 
 	/* Load SCSI command packet. */

From c91bc6ccd13254826fdfceddba0f3b5e308aa93e Mon Sep 17 00:00:00 2001
From: Xiaotian Feng <xtfeng@gmail.com>
Date: Tue, 23 Jul 2013 11:54:10 +0800
Subject: [PATCH 214/320] ahci: fix Null pointer dereference in
 achi_host_active()

commit b29900e6 (AHCI: Make distinct names for ports in /proc/interrupts)
introuded a regression, which resulted Null pointer dereference for achi
host with dummy ports. For ahci ports, when the port is dummy port, its
private_data will be NULL, as ata_dummy_port_ops doesn't support ->port_start.

changes in v2: use pp to check dummy ports, update comments

Reported-and-tested-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Xiaotian Feng <xtfeng@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Alexander Gordeev <agordeev@redhat.com>
Cc: linux-ide@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 drivers/ata/ahci.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 5064f3ea20f1..db4380d70031 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -1146,11 +1146,18 @@ int ahci_host_activate(struct ata_host *host, int irq, unsigned int n_msis)
 		return rc;
 
 	for (i = 0; i < host->n_ports; i++) {
+		const char* desc;
 		struct ahci_port_priv *pp = host->ports[i]->private_data;
 
+		/* pp is NULL for dummy ports */
+		if (pp)
+			desc = pp->irq_desc;
+		else
+			desc = dev_driver_string(host->dev);
+
 		rc = devm_request_threaded_irq(host->dev,
 			irq + i, ahci_hw_interrupt, ahci_thread_fn, IRQF_SHARED,
-			pp->irq_desc, host->ports[i]);
+			desc, host->ports[i]);
 		if (rc)
 			goto out_free_irqs;
 	}

From 085b513f97d8d799d28491239be4b451bcd8c2c5 Mon Sep 17 00:00:00 2001
From: "Ewan D. Milne" <emilne@redhat.com>
Date: Fri, 2 Nov 2012 09:38:34 -0400
Subject: [PATCH 215/320] [SCSI] sd: fix crash when UA received on DIF enabled
 device

sd_prep_fn will allocate a larger CDB for the command via mempool_alloc
for devices using DIF type 2 protection.  This CDB was being freed
in sd_done, which results in a kernel crash if the command is retried
due to a UNIT ATTENTION.  This change moves the code to free the larger
CDB into sd_unprep_fn instead, which is invoked after the request is
complete.

It is no longer necessary to call scsi_print_command separately for
this case as the ->cmnd will no longer be NULL in the normal code path.

Also removed conditional test for DIF type 2 when freeing the larger
CDB because the protection_type could have been changed via sysfs while
the command was executing.

Signed-off-by: Ewan D. Milne <emilne@redhat.com>
Acked-by: Martin K. Petersen <martin.petersen@oracle.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: James Bottomley <JBottomley@Parallels.com>
---
 drivers/scsi/sd.c | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 80f39b8b0223..86fcf2c313ad 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -838,10 +838,17 @@ static int scsi_setup_flush_cmnd(struct scsi_device *sdp, struct request *rq)
 
 static void sd_unprep_fn(struct request_queue *q, struct request *rq)
 {
+	struct scsi_cmnd *SCpnt = rq->special;
+
 	if (rq->cmd_flags & REQ_DISCARD) {
 		free_page((unsigned long)rq->buffer);
 		rq->buffer = NULL;
 	}
+	if (SCpnt->cmnd != rq->cmd) {
+		mempool_free(SCpnt->cmnd, sd_cdb_pool);
+		SCpnt->cmnd = NULL;
+		SCpnt->cmd_len = 0;
+	}
 }
 
 /**
@@ -1720,21 +1727,6 @@ static int sd_done(struct scsi_cmnd *SCpnt)
 	if (rq_data_dir(SCpnt->request) == READ && scsi_prot_sg_count(SCpnt))
 		sd_dif_complete(SCpnt, good_bytes);
 
-	if (scsi_host_dif_capable(sdkp->device->host, sdkp->protection_type)
-	    == SD_DIF_TYPE2_PROTECTION && SCpnt->cmnd != SCpnt->request->cmd) {
-
-		/* We have to print a failed command here as the
-		 * extended CDB gets freed before scsi_io_completion()
-		 * is called.
-		 */
-		if (result)
-			scsi_print_command(SCpnt);
-
-		mempool_free(SCpnt->cmnd, sd_cdb_pool);
-		SCpnt->cmnd = NULL;
-		SCpnt->cmd_len = 0;
-	}
-
 	return good_bytes;
 }
 

From e4daf1ffbe6cc3b12aab4d604e627829e93e9914 Mon Sep 17 00:00:00 2001
From: Harshula Jayasuriya <harshula@redhat.com>
Date: Tue, 23 Jul 2013 14:21:35 +1000
Subject: [PATCH 216/320] nfsd: nfsd_open: when dentry_open returns an error do
 not propagate as struct file

The following call chain:
------------------------------------------------------------
nfs4_get_vfs_file
- nfsd_open
  - dentry_open
    - do_dentry_open
      - __get_file_write_access
        - get_write_access
          - return atomic_inc_unless_negative(&inode->i_writecount) ? 0 : -ETXTBSY;
------------------------------------------------------------

can result in the following state:
------------------------------------------------------------
struct nfs4_file {
...
  fi_fds = {0xffff880c1fa65c80, 0xffffffffffffffe6, 0x0},
  fi_access = {{
      counter = 0x1
    }, {
      counter = 0x0
    }},
...
------------------------------------------------------------

1) First time around, in nfs4_get_vfs_file() fp->fi_fds[O_WRONLY] is
NULL, hence nfsd_open() is called where we get status set to an error
and fp->fi_fds[O_WRONLY] to -ETXTBSY. Thus we do not reach
nfs4_file_get_access() and fi_access[O_WRONLY] is not incremented.

2) Second time around, in nfs4_get_vfs_file() fp->fi_fds[O_WRONLY] is
NOT NULL (-ETXTBSY), so nfsd_open() is NOT called, but
nfs4_file_get_access() IS called and fi_access[O_WRONLY] is incremented.
Thus we leave a landmine in the form of the nfs4_file data structure in
an incorrect state.

3) Eventually, when __nfs4_file_put_access() is called it finds
fi_access[O_WRONLY] being non-zero, it decrements it and calls
nfs4_file_put_fd() which tries to fput -ETXTBSY.
------------------------------------------------------------
...
     [exception RIP: fput+0x9]
     RIP: ffffffff81177fa9  RSP: ffff88062e365c90  RFLAGS: 00010282
     RAX: ffff880c2b3d99cc  RBX: ffff880c2b3d9978  RCX: 0000000000000002
     RDX: dead000000100101  RSI: 0000000000000001  RDI: ffffffffffffffe6
     RBP: ffff88062e365c90   R8: ffff88041fe797d8   R9: ffff88062e365d58
     R10: 0000000000000008  R11: 0000000000000000  R12: 0000000000000001
     R13: 0000000000000007  R14: 0000000000000000  R15: 0000000000000000
     ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
  #9 [ffff88062e365c98] __nfs4_file_put_access at ffffffffa0562334 [nfsd]
 #10 [ffff88062e365cc8] nfs4_file_put_access at ffffffffa05623ab [nfsd]
 #11 [ffff88062e365ce8] free_generic_stateid at ffffffffa056634d [nfsd]
 #12 [ffff88062e365d18] release_open_stateid at ffffffffa0566e4b [nfsd]
 #13 [ffff88062e365d38] nfsd4_close at ffffffffa0567401 [nfsd]
 #14 [ffff88062e365d88] nfsd4_proc_compound at ffffffffa0557f28 [nfsd]
 #15 [ffff88062e365dd8] nfsd_dispatch at ffffffffa054543e [nfsd]
 #16 [ffff88062e365e18] svc_process_common at ffffffffa04ba5a4 [sunrpc]
 #17 [ffff88062e365e98] svc_process at ffffffffa04babe0 [sunrpc]
 #18 [ffff88062e365eb8] nfsd at ffffffffa0545b62 [nfsd]
 #19 [ffff88062e365ee8] kthread at ffffffff81090886
 #20 [ffff88062e365f48] kernel_thread at ffffffff8100c14a
------------------------------------------------------------

Cc: stable@vger.kernel.org
Signed-off-by: Harshula Jayasuriya <harshula@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/vfs.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 8ff6a0019b0b..c827acb0e943 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -830,9 +830,10 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
 			flags = O_WRONLY|O_LARGEFILE;
 	}
 	*filp = dentry_open(&path, flags, current_cred());
-	if (IS_ERR(*filp))
+	if (IS_ERR(*filp)) {
 		host_err = PTR_ERR(*filp);
-	else {
+		*filp = NULL;
+	} else {
 		host_err = ima_file_check(*filp, may_flags);
 
 		if (may_flags & NFSD_MAY_64BIT_COOKIE)

From ca24763588844b14f019ffc45c7df6d9e8f932c5 Mon Sep 17 00:00:00 2001
From: "Alexandr \\\"Sky\\\" Ivanov" <alexandr.sky@gmail.com>
Date: Tue, 23 Jul 2013 17:46:40 +0400
Subject: [PATCH 217/320] USB: option: add D-Link DWM-152/C1 and DWM-156/C1

Adding support for D-Link DWM-152/C1 and DWM-156/C1 devices.

DWM-152/C1:
T:  Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#=  6 Spd=480 MxCh= 0
D:  Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs=  1
P:  Vendor=07d1 ProdID=3e01 Rev= 0.00
S:  Product=USB Configuration
S:  SerialNumber=1234567890ABCDEF
C:* #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA
I:* If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
E:  Ad=81(I) Atr=03(Int.) MxPS=  64 Ivl=2ms
E:  Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms
I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
E:  Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms
I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
E:  Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms
I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
E:  Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms
I:* If#= 4 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=usb-storage
E:  Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms

DWM-156/C1:
T:  Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#=  8 Spd=480 MxCh= 0
D:  Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs=  1
P:  Vendor=07d1 ProdID=3e02 Rev= 0.00
S:  Product=DataCard Device
S:  SerialNumber=1234567890ABCDEF
C:* #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA
I:* If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
E:  Ad=81(I) Atr=03(Int.) MxPS=  64 Ivl=2ms
E:  Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms
I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
E:  Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms
I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
E:  Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms
I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
E:  Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms
I:* If#= 4 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=usb-storage
E:  Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms

Signed-off-by: Alexandr Ivanov <alexandr.sky@gmail.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/option.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 418746b557ad..9cc40031c23c 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -1341,6 +1341,8 @@ static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d02, 0xff, 0x00, 0x00) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d03, 0xff, 0x02, 0x01) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d03, 0xff, 0x00, 0x00) },
+	{ USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */
+	{ USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */
 	{ } /* Terminating entry */
 };
 MODULE_DEVICE_TABLE(usb, option_ids);

From ed882c3e72d6037589baa4328585c2b0450e673c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Vincent=20Stehl=C3=A9?= <vincent.stehle@freescale.com>
Date: Thu, 27 Jun 2013 14:49:27 +0200
Subject: [PATCH 218/320] ARM: keystone: fix compilation warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the following compilation warning:

  arch/arm/mach-keystone/keystone.c:74:2: warning: initialization from incompatible pointer type [enabled by default]
  arch/arm/mach-keystone/keystone.c:74:2: warning: (near initialization for ‘__mach_desc_KEYSTONE.restart’) [enabled by default]

Signed-off-by: Vincent Stehlé <vincent.stehle@freescale.com>
Cc: Robin Holt <holt@sgi.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: trivial@kernel.org
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 arch/arm/mach-keystone/keystone.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/mach-keystone/keystone.c b/arch/arm/mach-keystone/keystone.c
index fe4d9ff93a7e..b661c5c2870a 100644
--- a/arch/arm/mach-keystone/keystone.c
+++ b/arch/arm/mach-keystone/keystone.c
@@ -49,7 +49,7 @@ static const char *keystone_match[] __initconst = {
 	NULL,
 };
 
-void keystone_restart(char mode, const char *cmd)
+void keystone_restart(enum reboot_mode mode, const char *cmd)
 {
 	u32 val;
 

From fe08bf9f46d6ae8e08de32d29234a2c928eebf8f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Vincent=20Stehl=C3=A9?= <vincent.stehle@freescale.com>
Date: Thu, 27 Jun 2013 14:42:41 +0200
Subject: [PATCH 219/320] ARM: zynq: fix compilation warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the following compilation warning:

  arch/arm/mach-zynq/common.c:110:2: warning: initialization from incompatible pointer type [enabled by default]
  arch/arm/mach-zynq/common.c:110:2: warning: (near initialization for ‘__mach_desc_XILINX_EP107.restart’) [enabled by default]

Signed-off-by: Vincent Stehlé <vincent.stehle@freescale.com>
Cc: Robin Holt <holt@sgi.com>
Cc: Michal Simek <michal.simek@xilinx.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: trivial@kernel.org
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 arch/arm/mach-zynq/common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c
index 5b799c29886e..5f252569c689 100644
--- a/arch/arm/mach-zynq/common.c
+++ b/arch/arm/mach-zynq/common.c
@@ -91,7 +91,7 @@ static void __init zynq_map_io(void)
 	zynq_scu_map_io();
 }
 
-static void zynq_system_reset(char mode, const char *cmd)
+static void zynq_system_reset(enum reboot_mode mode, const char *cmd)
 {
 	zynq_slcr_system_reset();
 }

From a829abf8daa2dcf8223a9284b76d221e61130e13 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 5 Jul 2013 17:51:20 +0200
Subject: [PATCH 220/320] ARM: pxa: propagate errors from regulator_enable() to
 pxamci

The em_x270_mci_setpower() and em_x270_usb_hub_init() functions
call regulator_enable(), which may return an error that must
be checked.

This changes the em_x270_usb_hub_init() function to bail out
if it fails, and changes the pxamci_platform_data->setpower
callback so that the a failed em_x270_mci_setpower call
can be propagated by the pxamci driver into the mmc core.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Mike Rapoport <mike@compulab.co.il>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Mark Brown <broonie@opensource.wolfsonmicro.com>
Cc: Haojian Zhuang <haojian.zhuang@gmail.com>
Acked-by: Chris Ball <cjb@laptop.org>
[olof: fixed order of regulator_enable() and test in em_x270_usb_hub_init]
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 arch/arm/mach-pxa/em-x270.c              | 17 +++++++++++++----
 arch/arm/mach-pxa/mainstone.c            |  3 ++-
 arch/arm/mach-pxa/pcm990-baseboard.c     |  3 ++-
 arch/arm/mach-pxa/poodle.c               |  4 +++-
 arch/arm/mach-pxa/spitz.c                |  4 +++-
 arch/arm/mach-pxa/stargate2.c            |  3 ++-
 drivers/mmc/host/pxamci.c                |  2 +-
 include/linux/platform_data/mmc-pxamci.h |  2 +-
 8 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c
index f6726bb4eb95..3a3362fa793e 100644
--- a/arch/arm/mach-pxa/em-x270.c
+++ b/arch/arm/mach-pxa/em-x270.c
@@ -477,16 +477,24 @@ static int em_x270_usb_hub_init(void)
 	/* USB Hub power-on and reset */
 	gpio_direction_output(usb_hub_reset, 1);
 	gpio_direction_output(GPIO9_USB_VBUS_EN, 0);
-	regulator_enable(em_x270_usb_ldo);
+	err = regulator_enable(em_x270_usb_ldo);
+	if (err)
+		goto err_free_rst_gpio;
+
 	gpio_set_value(usb_hub_reset, 0);
 	gpio_set_value(usb_hub_reset, 1);
 	regulator_disable(em_x270_usb_ldo);
-	regulator_enable(em_x270_usb_ldo);
+	err = regulator_enable(em_x270_usb_ldo);
+	if (err)
+		goto err_free_rst_gpio;
+
 	gpio_set_value(usb_hub_reset, 0);
 	gpio_set_value(GPIO9_USB_VBUS_EN, 1);
 
 	return 0;
 
+err_free_rst_gpio:
+	gpio_free(usb_hub_reset);
 err_free_vbus_gpio:
 	gpio_free(GPIO9_USB_VBUS_EN);
 err_free_usb_ldo:
@@ -592,7 +600,7 @@ static int em_x270_mci_init(struct device *dev,
 	return err;
 }
 
-static void em_x270_mci_setpower(struct device *dev, unsigned int vdd)
+static int em_x270_mci_setpower(struct device *dev, unsigned int vdd)
 {
 	struct pxamci_platform_data* p_d = dev->platform_data;
 
@@ -600,10 +608,11 @@ static void em_x270_mci_setpower(struct device *dev, unsigned int vdd)
 		int vdd_uV = (2000 + (vdd - __ffs(MMC_VDD_20_21)) * 100) * 1000;
 
 		regulator_set_voltage(em_x270_sdio_ldo, vdd_uV, vdd_uV);
-		regulator_enable(em_x270_sdio_ldo);
+		return regulator_enable(em_x270_sdio_ldo);
 	} else {
 		regulator_disable(em_x270_sdio_ldo);
 	}
+	return 0;
 }
 
 static void em_x270_mci_exit(struct device *dev, void *data)
diff --git a/arch/arm/mach-pxa/mainstone.c b/arch/arm/mach-pxa/mainstone.c
index d2c652318376..dd70343c8708 100644
--- a/arch/arm/mach-pxa/mainstone.c
+++ b/arch/arm/mach-pxa/mainstone.c
@@ -408,7 +408,7 @@ static int mainstone_mci_init(struct device *dev, irq_handler_t mstone_detect_in
 	return err;
 }
 
-static void mainstone_mci_setpower(struct device *dev, unsigned int vdd)
+static int mainstone_mci_setpower(struct device *dev, unsigned int vdd)
 {
 	struct pxamci_platform_data* p_d = dev->platform_data;
 
@@ -420,6 +420,7 @@ static void mainstone_mci_setpower(struct device *dev, unsigned int vdd)
 		printk(KERN_DEBUG "%s: off\n", __func__);
 		MST_MSCWR1 &= ~MST_MSCWR1_MMC_ON;
 	}
+	return 0;
 }
 
 static void mainstone_mci_exit(struct device *dev, void *data)
diff --git a/arch/arm/mach-pxa/pcm990-baseboard.c b/arch/arm/mach-pxa/pcm990-baseboard.c
index fb7f1d1627dc..13e5b00eae90 100644
--- a/arch/arm/mach-pxa/pcm990-baseboard.c
+++ b/arch/arm/mach-pxa/pcm990-baseboard.c
@@ -335,7 +335,7 @@ static int pcm990_mci_init(struct device *dev, irq_handler_t mci_detect_int,
 	return err;
 }
 
-static void pcm990_mci_setpower(struct device *dev, unsigned int vdd)
+static int pcm990_mci_setpower(struct device *dev, unsigned int vdd)
 {
 	struct pxamci_platform_data *p_d = dev->platform_data;
 	u8 val;
@@ -348,6 +348,7 @@ static void pcm990_mci_setpower(struct device *dev, unsigned int vdd)
 		val &= ~PCM990_CTRL_MMC2PWR;
 
 	pcm990_cpld_writeb(PCM990_CTRL_MMC2PWR, PCM990_CTRL_REG5);
+	return 0;
 }
 
 static void pcm990_mci_exit(struct device *dev, void *data)
diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c
index 711d37e26bd8..aedf053a1de5 100644
--- a/arch/arm/mach-pxa/poodle.c
+++ b/arch/arm/mach-pxa/poodle.c
@@ -258,7 +258,7 @@ static int poodle_mci_init(struct device *dev, irq_handler_t poodle_detect_int,
 	return err;
 }
 
-static void poodle_mci_setpower(struct device *dev, unsigned int vdd)
+static int poodle_mci_setpower(struct device *dev, unsigned int vdd)
 {
 	struct pxamci_platform_data* p_d = dev->platform_data;
 
@@ -270,6 +270,8 @@ static void poodle_mci_setpower(struct device *dev, unsigned int vdd)
 		gpio_set_value(POODLE_GPIO_SD_PWR1, 0);
 		gpio_set_value(POODLE_GPIO_SD_PWR, 0);
 	}
+
+	return 0;
 }
 
 static void poodle_mci_exit(struct device *dev, void *data)
diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
index 2125df0444e7..4c29173026e8 100644
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -598,7 +598,7 @@ static inline void spitz_spi_init(void) {}
  * NOTE: The card detect interrupt isn't debounced so we delay it by 250ms to
  * give the card a chance to fully insert/eject.
  */
-static void spitz_mci_setpower(struct device *dev, unsigned int vdd)
+static int spitz_mci_setpower(struct device *dev, unsigned int vdd)
 {
 	struct pxamci_platform_data* p_d = dev->platform_data;
 
@@ -606,6 +606,8 @@ static void spitz_mci_setpower(struct device *dev, unsigned int vdd)
 		spitz_card_pwr_ctrl(SCOOP_CPR_SD_3V, SCOOP_CPR_SD_3V);
 	else
 		spitz_card_pwr_ctrl(SCOOP_CPR_SD_3V, 0x0);
+
+	return 0;
 }
 
 static struct pxamci_platform_data spitz_mci_platform_data = {
diff --git a/arch/arm/mach-pxa/stargate2.c b/arch/arm/mach-pxa/stargate2.c
index 88fde43c948c..62aea3e835f3 100644
--- a/arch/arm/mach-pxa/stargate2.c
+++ b/arch/arm/mach-pxa/stargate2.c
@@ -734,9 +734,10 @@ static int stargate2_mci_init(struct device *dev,
  *
  * Very simple control. Either it is on or off and is controlled by
  * a gpio pin */
-static void stargate2_mci_setpower(struct device *dev, unsigned int vdd)
+static int stargate2_mci_setpower(struct device *dev, unsigned int vdd)
 {
 	gpio_set_value(SG2_SD_POWER_ENABLE, !!vdd);
+	return 0;
 }
 
 static void stargate2_mci_exit(struct device *dev, void *data)
diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c
index 847b1996ce8e..2c5a91bb8ec3 100644
--- a/drivers/mmc/host/pxamci.c
+++ b/drivers/mmc/host/pxamci.c
@@ -128,7 +128,7 @@ static inline int pxamci_set_power(struct pxamci_host *host,
 			       !!on ^ host->pdata->gpio_power_invert);
 	}
 	if (!host->vcc && host->pdata && host->pdata->setpower)
-		host->pdata->setpower(mmc_dev(host->mmc), vdd);
+		return host->pdata->setpower(mmc_dev(host->mmc), vdd);
 
 	return 0;
 }
diff --git a/include/linux/platform_data/mmc-pxamci.h b/include/linux/platform_data/mmc-pxamci.h
index 9eb515bb799d..1706b3597ce0 100644
--- a/include/linux/platform_data/mmc-pxamci.h
+++ b/include/linux/platform_data/mmc-pxamci.h
@@ -12,7 +12,7 @@ struct pxamci_platform_data {
 	unsigned long detect_delay_ms;		/* delay in millisecond before detecting cards after interrupt */
 	int (*init)(struct device *, irq_handler_t , void *);
 	int (*get_ro)(struct device *);
-	void (*setpower)(struct device *, unsigned int);
+	int (*setpower)(struct device *, unsigned int);
 	void (*exit)(struct device *, void *);
 	int gpio_card_detect;			/* gpio detecting card insertion */
 	int gpio_card_ro;			/* gpio detecting read only toggle */

From a520a9b7edc940fc735c526470313cf963fc72da Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@freescale.com>
Date: Tue, 23 Jul 2013 14:02:06 -0300
Subject: [PATCH 221/320] ARM: multi_v7_defconfig: Select USB chipidea driver

CONFIG_USB_EHCI_MXC selects the old i.mx USB driver, which does not support
device tree.

Select the USB chipidea driver instead, so that USB can be functional on i.mx.

Signed-off-by: Fabio Estevam <fabio.estevam@freescale.com>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 arch/arm/configs/multi_v7_defconfig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index 120b08edb9a9..6e572c64cf5a 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -114,11 +114,12 @@ CONFIG_FB_SIMPLE=y
 CONFIG_USB=y
 CONFIG_USB_XHCI_HCD=y
 CONFIG_USB_EHCI_HCD=y
-CONFIG_USB_EHCI_MXC=y
 CONFIG_USB_EHCI_TEGRA=y
 CONFIG_USB_EHCI_HCD_PLATFORM=y
 CONFIG_USB_ISP1760_HCD=y
 CONFIG_USB_STORAGE=y
+CONFIG_USB_CHIPIDEA=y
+CONFIG_USB_CHIPIDEA_HOST=y
 CONFIG_AB8500_USB=y
 CONFIG_NOP_USB_XCEIV=y
 CONFIG_OMAP_USB2=y

From 4f3cc4809a98a165a9708b72b47de71643797bbd Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 23 Jul 2013 12:53:39 -0400
Subject: [PATCH 222/320] NFSv4: Fix brainfart in attribute length calculation

The calculation of the attribute length was 4 bytes off.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Tested-by: Andre Heider <a.heider@gmail.com>
Reported-and-tested-by: Henrik Rydberg <rydberg@euromail.se>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nfs/nfs4xdr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index c74d6168db99..3850b018815f 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1118,11 +1118,11 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
 				len, ((char *)p - (char *)q) + 4);
 		BUG();
 	}
-	len = (char *)p - (char *)q - (bmval_len << 2);
 	*q++ = htonl(bmval0);
 	*q++ = htonl(bmval1);
 	if (bmval_len == 3)
 		*q++ = htonl(bmval2);
+	len = (char *)p - (char *)(q + 1);
 	*q = htonl(len);
 
 /* out: */

From 69acbaac303e8cb948801a9ddd0ac24e86cc4a1b Mon Sep 17 00:00:00 2001
From: Ian Abbott <abbotti@mev.co.uk>
Date: Mon, 8 Jul 2013 13:36:19 +0100
Subject: [PATCH 223/320] staging: comedi: COMEDI_CANCEL ioctl should wake up
 read/write

Comedi devices can do blocking read() or write() (or poll()) if an
asynchronous command has been set up, blocking for data (for read()) or
buffer space (for write()).  Various events associated with the
asynchronous command will wake up the blocked reader or writer (or
poller).  It is also possible to force the asynchronous command to
terminate by issuing a `COMEDI_CANCEL` ioctl.  That shuts down the
asynchronous command, but does not currently wake up the blocked reader
or writer (or poller).  If the blocked task could be woken up, it would
see that the command is no longer active and return.  The caller of the
`COMEDI_CANCEL` ioctl could attempt to wake up the blocked task by
sending a signal, but that's a nasty workaround.

Change `do_cancel_ioctl()` to wake up the wait queue after it returns
from `do_cancel()`.  `do_cancel()` can propagate an error return value
from the low-level comedi driver's cancel routine, but it always shuts
the command down regardless, so `do_cancel_ioctl()` can wake up he wait
queue regardless of the return value from `do_cancel()`.

Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/comedi/comedi_fops.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c
index 8647518259f6..6cdef9d5f8d0 100644
--- a/drivers/staging/comedi/comedi_fops.c
+++ b/drivers/staging/comedi/comedi_fops.c
@@ -1705,6 +1705,7 @@ static int do_cancel_ioctl(struct comedi_device *dev, unsigned int arg,
 			   void *file)
 {
 	struct comedi_subdevice *s;
+	int ret;
 
 	if (arg >= dev->n_subdevices)
 		return -EINVAL;
@@ -1721,7 +1722,11 @@ static int do_cancel_ioctl(struct comedi_device *dev, unsigned int arg,
 	if (s->busy != file)
 		return -EBUSY;
 
-	return do_cancel(dev, s);
+	ret = do_cancel(dev, s);
+	if (comedi_get_subdevice_runflags(s) & SRF_USER)
+		wake_up_interruptible(&s->async->wait_head);
+
+	return ret;
 }
 
 /*

From 4b18f08be01a7b3c7b6df497137b6e3cb28adaa3 Mon Sep 17 00:00:00 2001
From: Ian Abbott <abbotti@mev.co.uk>
Date: Fri, 5 Jul 2013 16:49:34 +0100
Subject: [PATCH 224/320] staging: comedi: fix a race between do_cmd_ioctl()
 and read/write

`do_cmd_ioctl()` is called with the comedi device's mutex locked to
process the `COMEDI_CMD` ioctl to set up comedi's asynchronous command
handling on a comedi subdevice.  `comedi_read()` and `comedi_write()`
are the `read` and `write` handlers for the comedi device, but do not
lock the mutex (for performance reasons, as some things can hold the
mutex for quite a long time).

There is a race condition if `comedi_read()` or `comedi_write()` is
running at the same time and for the same file object and comedi
subdevice as `do_cmd_ioctl()`.  `do_cmd_ioctl()` sets the subdevice's
`busy` pointer to the file object way before it sets the `SRF_RUNNING` flag
in the subdevice's `runflags` member.  `comedi_read() and
`comedi_write()` check the subdevice's `busy` pointer is pointing to the
current file object, then if the `SRF_RUNNING` flag is not set, will call
`do_become_nonbusy()` to shut down the asyncronous command.  Bad things
can happen if the asynchronous command is being shutdown and set up at
the same time.

To prevent the race, don't set the `busy` pointer until
after the `SRF_RUNNING` flag has been set.  Also, make sure the mutex is
held in `comedi_read()` and `comedi_write()` while calling
`do_become_nonbusy()` in order to avoid moving the race condition to a
point within that function.

Change some error handling `goto cleanup` statements in `do_cmd_ioctl()`
to simple `return -ERRFOO` statements as a result of changing when the
`busy` pointer is set.

Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/comedi/comedi_fops.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c
index 6cdef9d5f8d0..f4a197b2d1fd 100644
--- a/drivers/staging/comedi/comedi_fops.c
+++ b/drivers/staging/comedi/comedi_fops.c
@@ -1413,22 +1413,19 @@ static int do_cmd_ioctl(struct comedi_device *dev,
 		DPRINTK("subdevice busy\n");
 		return -EBUSY;
 	}
-	s->busy = file;
 
 	/* make sure channel/gain list isn't too long */
 	if (cmd.chanlist_len > s->len_chanlist) {
 		DPRINTK("channel/gain list too long %u > %d\n",
 			cmd.chanlist_len, s->len_chanlist);
-		ret = -EINVAL;
-		goto cleanup;
+		return -EINVAL;
 	}
 
 	/* make sure channel/gain list isn't too short */
 	if (cmd.chanlist_len < 1) {
 		DPRINTK("channel/gain list too short %u < 1\n",
 			cmd.chanlist_len);
-		ret = -EINVAL;
-		goto cleanup;
+		return -EINVAL;
 	}
 
 	async->cmd = cmd;
@@ -1438,8 +1435,7 @@ static int do_cmd_ioctl(struct comedi_device *dev,
 	    kmalloc(async->cmd.chanlist_len * sizeof(int), GFP_KERNEL);
 	if (!async->cmd.chanlist) {
 		DPRINTK("allocation failed\n");
-		ret = -ENOMEM;
-		goto cleanup;
+		return -ENOMEM;
 	}
 
 	if (copy_from_user(async->cmd.chanlist, user_chanlist,
@@ -1491,6 +1487,9 @@ static int do_cmd_ioctl(struct comedi_device *dev,
 
 	comedi_set_subdevice_runflags(s, ~0, SRF_USER | SRF_RUNNING);
 
+	/* set s->busy _after_ setting SRF_RUNNING flag to avoid race with
+	 * comedi_read() or comedi_write() */
+	s->busy = file;
 	ret = s->do_cmd(dev, s);
 	if (ret == 0)
 		return 0;
@@ -2058,11 +2057,13 @@ static ssize_t comedi_write(struct file *file, const char __user *buf,
 
 		if (!comedi_is_subdevice_running(s)) {
 			if (count == 0) {
+				mutex_lock(&dev->mutex);
 				if (comedi_is_subdevice_in_error(s))
 					retval = -EPIPE;
 				else
 					retval = 0;
 				do_become_nonbusy(dev, s);
+				mutex_unlock(&dev->mutex);
 			}
 			break;
 		}
@@ -2161,11 +2162,13 @@ static ssize_t comedi_read(struct file *file, char __user *buf, size_t nbytes,
 
 		if (n == 0) {
 			if (!comedi_is_subdevice_running(s)) {
+				mutex_lock(&dev->mutex);
 				do_become_nonbusy(dev, s);
 				if (comedi_is_subdevice_in_error(s))
 					retval = -EPIPE;
 				else
 					retval = 0;
+				mutex_unlock(&dev->mutex);
 				break;
 			}
 			if (file->f_flags & O_NONBLOCK) {
@@ -2203,9 +2206,11 @@ static ssize_t comedi_read(struct file *file, char __user *buf, size_t nbytes,
 		buf += n;
 		break;		/* makes device work like a pipe */
 	}
-	if (comedi_is_subdevice_idle(s) &&
-	    async->buf_read_count - async->buf_write_count == 0) {
-		do_become_nonbusy(dev, s);
+	if (comedi_is_subdevice_idle(s)) {
+		mutex_lock(&dev->mutex);
+		if (async->buf_read_count - async->buf_write_count == 0)
+			do_become_nonbusy(dev, s);
+		mutex_unlock(&dev->mutex);
 	}
 	set_current_state(TASK_RUNNING);
 	remove_wait_queue(&async->wait_head, &wait);

From c8145c56103099565be2e18aeac2a32f79361f3f Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 22 Jul 2013 09:57:49 +0300
Subject: [PATCH 225/320] staging: frontier: use after free in disconnect()

usb_alphatrack_delete() frees "dev" so we can't use it on that path.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/frontier/alphatrack.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/frontier/alphatrack.c b/drivers/staging/frontier/alphatrack.c
index 5590ebf1da15..817f837b240d 100644
--- a/drivers/staging/frontier/alphatrack.c
+++ b/drivers/staging/frontier/alphatrack.c
@@ -827,11 +827,11 @@ static void usb_alphatrack_disconnect(struct usb_interface *intf)
 		mutex_unlock(&dev->mtx);
 		usb_alphatrack_delete(dev);
 	} else {
+		atomic_set(&dev->writes_pending, 0);
 		dev->intf = NULL;
 		mutex_unlock(&dev->mtx);
 	}
 
-	atomic_set(&dev->writes_pending, 0);
 	mutex_unlock(&disconnect_mutex);
 
 	dev_info(&intf->dev, "Alphatrack Surface #%d now disconnected\n",

From 3a349cee593a464b25f329a1b76635900187fdb0 Mon Sep 17 00:00:00 2001
From: Paul Bolle <pebolle@tiscali.nl>
Date: Sun, 14 Jul 2013 13:29:48 +0200
Subject: [PATCH 226/320] staging: drm/imx: drop "select OF_VIDEOMODE"

Commit ac4c1a9b33 ("staging: drm/imx: Add LDB support") added the
DRM_IMX_LDB Kconfig entry. That entry selects OF_VIDEOMODE. But there is
no Kconfig symbol named OF_VIDEOMODE. The select statement for that
symbol is a nop. Drop it.

Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
Acked-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/imx-drm/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/staging/imx-drm/Kconfig b/drivers/staging/imx-drm/Kconfig
index 22339059837f..bd0f2fd01db4 100644
--- a/drivers/staging/imx-drm/Kconfig
+++ b/drivers/staging/imx-drm/Kconfig
@@ -33,7 +33,6 @@ config DRM_IMX_TVE
 config DRM_IMX_LDB
 	tristate "Support for LVDS displays"
 	depends on DRM_IMX
-	select OF_VIDEOMODE
 	help
 	  Choose this to enable the internal LVDS Display Bridge (LDB)
 	  found on i.MX53 and i.MX6 processors.

From f2a6fed1ceaecf6054627f0ddbd4becf43c997fc Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 22 Jul 2013 11:00:53 +0300
Subject: [PATCH 227/320] staging: gdm72xx: potential use after free in
 send_qos_list()

Sometimes free_qos_entry() sometimes frees its argument.  I have moved
the dereference of "entry" ahead on line to avoid a use after free.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/gdm72xx/gdm_qos.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/gdm72xx/gdm_qos.c b/drivers/staging/gdm72xx/gdm_qos.c
index b795353e8348..cc3692439a5c 100644
--- a/drivers/staging/gdm72xx/gdm_qos.c
+++ b/drivers/staging/gdm72xx/gdm_qos.c
@@ -250,8 +250,8 @@ static void send_qos_list(struct nic *nic, struct list_head *head)
 
 	list_for_each_entry_safe(entry, n, head, list) {
 		list_del(&entry->list);
-		free_qos_entry(entry);
 		gdm_wimax_send_tx(entry->skb, entry->dev);
+		free_qos_entry(entry);
 	}
 }
 

From 644d478793c6594277f8ae76954da4ace7ac6f96 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Date: Wed, 26 Jun 2013 15:28:39 +0300
Subject: [PATCH 228/320] staging: zram: protect zram_reset_device() call

Commit 9b3bb7abcdf2df0f1b2657e6cbc9d06bc2b3b36f (remove
zram_sysfs file (v2)) accidentally made zram_reset_device()
racy. Protect zram_reset_device() call with zram->lock.

Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Acked-by: Jerome Marchand <jmarchand@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/zram/zram_drv.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
index 82c7202fd5cc..e77fb6ea40c9 100644
--- a/drivers/staging/zram/zram_drv.c
+++ b/drivers/staging/zram/zram_drv.c
@@ -527,8 +527,11 @@ static void zram_reset_device(struct zram *zram)
 	size_t index;
 	struct zram_meta *meta;
 
-	if (!zram->init_done)
+	down_write(&zram->init_lock);
+	if (!zram->init_done) {
+		up_write(&zram->init_lock);
 		return;
+	}
 
 	meta = zram->meta;
 	zram->init_done = 0;
@@ -549,6 +552,7 @@ static void zram_reset_device(struct zram *zram)
 
 	zram->disksize = 0;
 	set_capacity(zram->disk, 0);
+	up_write(&zram->init_lock);
 }
 
 static void zram_init_device(struct zram *zram, struct zram_meta *meta)

From 72bb99cfe9c57d2044445fb34bbc95b4c0bae6f2 Mon Sep 17 00:00:00 2001
From: Karlis Ogsts <karlis.ogsts@sonymobile.com>
Date: Mon, 22 Jul 2013 13:51:42 -0700
Subject: [PATCH 229/320] staging: android: logger: Correct write offset reset
 on error

In the situation that a writer fails to copy data from userspace it will reset
the write offset to the value it had before it went to sleep. This discarding
any messages written while aquiring the mutex.

Therefore the reset offset needs to be retrieved after acquiring the mutex.

Cc: Android Kernel Team <kernel-team@android.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@sonymobile.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/android/logger.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/android/logger.c b/drivers/staging/android/logger.c
index 080abf2faf97..a8c344422a77 100644
--- a/drivers/staging/android/logger.c
+++ b/drivers/staging/android/logger.c
@@ -469,7 +469,7 @@ static ssize_t logger_aio_write(struct kiocb *iocb, const struct iovec *iov,
 			 unsigned long nr_segs, loff_t ppos)
 {
 	struct logger_log *log = file_get_log(iocb->ki_filp);
-	size_t orig = log->w_off;
+	size_t orig;
 	struct logger_entry header;
 	struct timespec now;
 	ssize_t ret = 0;
@@ -490,6 +490,8 @@ static ssize_t logger_aio_write(struct kiocb *iocb, const struct iovec *iov,
 
 	mutex_lock(&log->mutex);
 
+	orig = log->w_off;
+
 	/*
 	 * Fix up any readers, pulling them forward to the first readable
 	 * entry after (what will be) the new write offset. We do this now

From 53a983c4f8b4246f5ff00567411be55967b0350a Mon Sep 17 00:00:00 2001
From: James Bottomley <JBottomley@Parallels.com>
Date: Sun, 9 Jun 2013 09:23:16 -0700
Subject: [PATCH 230/320] [SCSI] mvsas: Fix kernel panic on tile due to
 unaligned data access

slot->response is a 64 bit quantity (and accessed as such), but its alignment
is only 32 bits.  This doesn't cause a problem on x86, but apparently causes a
kernel panic on Tile:

Stack dump complete Kernel panic - not syncing:
 Kernel unalign fault running the idle task!
 Starting stack dump of tid 0, pid 0 (swapper) on cpu 1 at cycle 341586172541
   frame 0: 0xfffffff700140ee0 dump_stack+0x0/0x20 (sp 0xfffffe43ffedf420)
   frame 1: 0xfffffff700283270 panic+0x150/0x3a0 (sp 0xfffffe43ffedf420)
   frame 2: 0xfffffff70012bff8 jit_bundle_gen+0xfd8/0x27e0 (sp 0xfffffe43ffedf4c8)
   frame 3: 0xfffffff7003b5b68 do_unaligned+0xc0/0x5a0 (sp 0xfffffe43ffedf710)
   frame 4: 0xfffffff70044ca78 handle_interrupt+0x270/0x278 (sp 0xfffffe43ffedf840)
   <interrupt 17 while in kernel mode>
   frame 5: 0xfffffff7002ac370 mvs_slot_complete+0x5f0/0x12a0 (sp 0xfffffe43ffedfa90)
   frame 6: 0xfffffff7002abec0 mvs_slot_complete+0x140/0x12a0 (sp 0xfffffe43ffedfa90)
   frame 7: 0xfffffff7005cc840 mvs_int_rx+0x140/0x2a0 (sp 0xfffffe43ffedfb00)
   frame 8: 0xfffffff7005bbaf0 mvs_94xx_isr+0xd8/0x2b8 (sp 0xfffffe43ffedfb68)
   frame 9: 0xfffffff700658ba0 mvs_tasklet+0x128/0x1f8 (sp 0xfffffe43ffedfba8)
   frame 10: 0xfffffff7003e8230 tasklet_action+0x178/0x2c8 (sp 0xfffffe43ffedfbe0)
   frame 11: 0xfffffff700103850 __do_softirq+0x210/0x398 (sp 0xfffffe43ffedfc40)
   frame 12: 0xfffffff700180308 do_softirq+0xc8/0x140 (sp 0xfffffe43ffedfcd8)
   frame 13: 0xfffffff7000bd7f0 irq_exit+0xb0/0x158 (sp 0xfffffe43ffedfcf0)
   frame 14: 0xfffffff70013fa58 tile_dev_intr+0x1d8/0x2f0 (sp 0xfffffe43ffedfd00)
   frame 15: 0xfffffff70044ca78 handle_interrupt+0x270/0x278 (sp 0xfffffe43ffedfd40)
   <interrupt 30 while in kernel mode>
   frame 16: 0xfffffff700143e68 _cpu_idle_nap+0x0/0x18 (sp 0xfffffe43ffedffb0)
   frame 17: 0xfffffff700482480 cpu_idle+0x310/0x428 (sp 0xfffffe43ffedffb0)

Since the check is just for non-zero, split it to be two 32 bit accesses
(preserving speed in the fast path) and do a get_unaligned() in the slow path.

This is a modification of a wholly get_unaligned patch submitted by Paul Guo

Reported-by: Paul Guo <ggang@tilera.com>
Signed-off-by: James Bottomley <JBottomley@Parallels.com>
---
 drivers/scsi/mvsas/mv_sas.c | 11 ++++++++---
 drivers/scsi/mvsas/mv_sas.h |  1 +
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c
index f14665a6293d..6b1b4e91e53f 100644
--- a/drivers/scsi/mvsas/mv_sas.c
+++ b/drivers/scsi/mvsas/mv_sas.c
@@ -1857,11 +1857,16 @@ int mvs_slot_complete(struct mvs_info *mvi, u32 rx_desc, u32 flags)
 		goto out;
 	}
 
-	/* error info record present */
-	if (unlikely((rx_desc & RXQ_ERR) && (*(u64 *) slot->response))) {
+	/*
+	 * error info record present; slot->response is 32 bit aligned but may
+	 * not be 64 bit aligned, so check for zero in two 32 bit reads
+	 */
+	if (unlikely((rx_desc & RXQ_ERR)
+		     && (*((u32 *)slot->response)
+			 || *(((u32 *)slot->response) + 1)))) {
 		mv_dprintk("port %d slot %d rx_desc %X has error info"
 			"%016llX.\n", slot->port->sas_port.id, slot_idx,
-			 rx_desc, (u64)(*(u64 *)slot->response));
+			 rx_desc, get_unaligned_le64(slot->response));
 		tstat->stat = mvs_slot_err(mvi, task, slot_idx);
 		tstat->resp = SAS_TASK_COMPLETE;
 		goto out;
diff --git a/drivers/scsi/mvsas/mv_sas.h b/drivers/scsi/mvsas/mv_sas.h
index 60e2fb7f2dca..d6b19dc80bee 100644
--- a/drivers/scsi/mvsas/mv_sas.h
+++ b/drivers/scsi/mvsas/mv_sas.h
@@ -39,6 +39,7 @@
 #include <linux/irq.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
+#include <asm/unaligned.h>
 #include <scsi/libsas.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_tcq.h>

From 42577ca8c3616baaafdd8f167b2e1fb959026081 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 23 Jul 2013 16:49:24 +0100
Subject: [PATCH 231/320] Fix __wait_on_atomic_t() to call the action func if
 the counter != 0

Fix __wait_on_atomic_t() so that it calls the action func if the counter != 0
rather than if the counter is 0 so as to be analogous to __wait_on_bit().

Thanks to Yacine who found this by visual inspection.

This will affect FS-Cache in that it will could fail to sleep correctly when
trying to clean up after a netfs cookie is withdrawn.

Reported-by: Yacine Belkadi <yacine.belkadi.1@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
cc: Milosz Tanski <milosz@adfin.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/wait.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/wait.c b/kernel/wait.c
index ce0daa320a26..dec68bd4e9d8 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -333,7 +333,8 @@ int __wait_on_atomic_t(wait_queue_head_t *wq, struct wait_bit_queue *q,
 		prepare_to_wait(wq, &q->wait, mode);
 		val = q->key.flags;
 		if (atomic_read(val) == 0)
-			ret = (*action)(val);
+			break;
+		ret = (*action)(val);
 	} while (!ret && atomic_read(val) != 0);
 	finish_wait(wq, &q->wait);
 	return ret;

From 88d84ac97378c2f1d5fec9af1e8b7d9a662d6b00 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Fri, 19 Jul 2013 12:28:25 +0200
Subject: [PATCH 232/320] EDAC: Fix lockdep splat

Fix the following:

BUG: key ffff88043bdd0330 not in .data!
------------[ cut here ]------------
WARNING: at kernel/lockdep.c:2987 lockdep_init_map+0x565/0x5a0()
DEBUG_LOCKS_WARN_ON(1)
Modules linked in: glue_helper sb_edac(+) edac_core snd acpi_cpufreq lrw gf128mul ablk_helper iTCO_wdt evdev i2c_i801 dcdbas button cryptd pcspkr iTCO_vendor_support usb_common lpc_ich mfd_core soundcore mperf processor microcode
CPU: 2 PID: 599 Comm: modprobe Not tainted 3.10.0 #1
Hardware name: Dell Inc. Precision T3600/0PTTT9, BIOS A08 01/24/2013
 0000000000000009 ffff880439a1d920 ffffffff8160a9a9 ffff880439a1d958
 ffffffff8103d9e0 ffff88043af4a510 ffffffff81a16e11 0000000000000000
 ffff88043bdd0330 0000000000000000 ffff880439a1d9b8 ffffffff8103dacc
Call Trace:
  dump_stack
  warn_slowpath_common
  warn_slowpath_fmt
  lockdep_init_map
  ? trace_hardirqs_on_caller
  ? trace_hardirqs_on
  debug_mutex_init
  __mutex_init
  bus_register
  edac_create_sysfs_mci_device
  edac_mc_add_mc
  sbridge_probe
  pci_device_probe
  driver_probe_device
  __driver_attach
  ? driver_probe_device
  bus_for_each_dev
  driver_attach
  bus_add_driver
  driver_register
  __pci_register_driver
  ? 0xffffffffa0010fff
  sbridge_init
  ? 0xffffffffa0010fff
  do_one_initcall
  load_module
  ? unset_module_init_ro_nx
  SyS_init_module
  tracesys
---[ end trace d24a70b0d3ddf733 ]---
EDAC MC0: Giving out device to 'sbridge_edac.c' 'Sandy Bridge Socket#0': DEV 0000:3f:0e.0
EDAC sbridge: Driver loaded.

What happens is that bus_register needs a statically allocated lock_key
because the last is handed in to lockdep. However, struct mem_ctl_info
embeds struct bus_type (the whole struct, not a pointer to it) and the
whole thing gets dynamically allocated.

Fix this by using a statically allocated struct bus_type for the MC bus.

Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Mauro Carvalho Chehab <mchehab@infradead.org>
Cc: Markus Trippelsdorf <markus@trippelsdorf.de>
Cc: stable@kernel.org # v3.10
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 drivers/edac/edac_mc.c       |  9 +++++++++
 drivers/edac/edac_mc_sysfs.c | 28 +++++++++++++++-------------
 drivers/edac/i5100_edac.c    |  2 +-
 include/linux/edac.h         |  7 ++++++-
 4 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 27e86d938262..89e109022d78 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -48,6 +48,8 @@ static LIST_HEAD(mc_devices);
  */
 static void const *edac_mc_owner;
 
+static struct bus_type mc_bus[EDAC_MAX_MCS];
+
 unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
 			         unsigned len)
 {
@@ -723,6 +725,11 @@ int edac_mc_add_mc(struct mem_ctl_info *mci)
 	int ret = -EINVAL;
 	edac_dbg(0, "\n");
 
+	if (mci->mc_idx >= EDAC_MAX_MCS) {
+		pr_warn_once("Too many memory controllers: %d\n", mci->mc_idx);
+		return -ENODEV;
+	}
+
 #ifdef CONFIG_EDAC_DEBUG
 	if (edac_debug_level >= 3)
 		edac_mc_dump_mci(mci);
@@ -762,6 +769,8 @@ int edac_mc_add_mc(struct mem_ctl_info *mci)
 	/* set load time so that error rate can be tracked */
 	mci->start_time = jiffies;
 
+	mci->bus = &mc_bus[mci->mc_idx];
+
 	if (edac_create_sysfs_mci_device(mci)) {
 		edac_mc_printk(mci, KERN_WARNING,
 			"failed to create sysfs device\n");
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index ef15a7e613bc..e7c32c4f7837 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -370,7 +370,7 @@ static int edac_create_csrow_object(struct mem_ctl_info *mci,
 		return -ENODEV;
 
 	csrow->dev.type = &csrow_attr_type;
-	csrow->dev.bus = &mci->bus;
+	csrow->dev.bus = mci->bus;
 	device_initialize(&csrow->dev);
 	csrow->dev.parent = &mci->dev;
 	csrow->mci = mci;
@@ -605,7 +605,7 @@ static int edac_create_dimm_object(struct mem_ctl_info *mci,
 	dimm->mci = mci;
 
 	dimm->dev.type = &dimm_attr_type;
-	dimm->dev.bus = &mci->bus;
+	dimm->dev.bus = mci->bus;
 	device_initialize(&dimm->dev);
 
 	dimm->dev.parent = &mci->dev;
@@ -975,11 +975,13 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci)
 	 * The memory controller needs its own bus, in order to avoid
 	 * namespace conflicts at /sys/bus/edac.
 	 */
-	mci->bus.name = kasprintf(GFP_KERNEL, "mc%d", mci->mc_idx);
-	if (!mci->bus.name)
+	mci->bus->name = kasprintf(GFP_KERNEL, "mc%d", mci->mc_idx);
+	if (!mci->bus->name)
 		return -ENOMEM;
-	edac_dbg(0, "creating bus %s\n", mci->bus.name);
-	err = bus_register(&mci->bus);
+
+	edac_dbg(0, "creating bus %s\n", mci->bus->name);
+
+	err = bus_register(mci->bus);
 	if (err < 0)
 		return err;
 
@@ -988,7 +990,7 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci)
 	device_initialize(&mci->dev);
 
 	mci->dev.parent = mci_pdev;
-	mci->dev.bus = &mci->bus;
+	mci->dev.bus = mci->bus;
 	dev_set_name(&mci->dev, "mc%d", mci->mc_idx);
 	dev_set_drvdata(&mci->dev, mci);
 	pm_runtime_forbid(&mci->dev);
@@ -997,8 +999,8 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci)
 	err = device_add(&mci->dev);
 	if (err < 0) {
 		edac_dbg(1, "failure: create device %s\n", dev_name(&mci->dev));
-		bus_unregister(&mci->bus);
-		kfree(mci->bus.name);
+		bus_unregister(mci->bus);
+		kfree(mci->bus->name);
 		return err;
 	}
 
@@ -1064,8 +1066,8 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci)
 	}
 fail2:
 	device_unregister(&mci->dev);
-	bus_unregister(&mci->bus);
-	kfree(mci->bus.name);
+	bus_unregister(mci->bus);
+	kfree(mci->bus->name);
 	return err;
 }
 
@@ -1098,8 +1100,8 @@ void edac_unregister_sysfs(struct mem_ctl_info *mci)
 {
 	edac_dbg(1, "Unregistering device %s\n", dev_name(&mci->dev));
 	device_unregister(&mci->dev);
-	bus_unregister(&mci->bus);
-	kfree(mci->bus.name);
+	bus_unregister(mci->bus);
+	kfree(mci->bus->name);
 }
 
 static void mc_attr_release(struct device *dev)
diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c
index 1b635178cc44..157b934e8ce3 100644
--- a/drivers/edac/i5100_edac.c
+++ b/drivers/edac/i5100_edac.c
@@ -974,7 +974,7 @@ static int i5100_setup_debugfs(struct mem_ctl_info *mci)
 	if (!i5100_debugfs)
 		return -ENODEV;
 
-	priv->debugfs = debugfs_create_dir(mci->bus.name, i5100_debugfs);
+	priv->debugfs = debugfs_create_dir(mci->bus->name, i5100_debugfs);
 
 	if (!priv->debugfs)
 		return -ENOMEM;
diff --git a/include/linux/edac.h b/include/linux/edac.h
index 0b763276f619..5c6d7fbaf89e 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -622,7 +622,7 @@ struct edac_raw_error_desc {
  */
 struct mem_ctl_info {
 	struct device			dev;
-	struct bus_type			bus;
+	struct bus_type			*bus;
 
 	struct list_head link;	/* for global list of mem_ctl_info structs */
 
@@ -742,4 +742,9 @@ struct mem_ctl_info {
 #endif
 };
 
+/*
+ * Maximum number of memory controllers in the coherent fabric.
+ */
+#define EDAC_MAX_MCS	16
+
 #endif

From 94190301ffa059c2d127b3a67ec5d161d5c62681 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Fri, 28 Jun 2013 17:15:25 +0200
Subject: [PATCH 233/320] usb: option: add TP-LINK MA260
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Bjørn Mork <bjorn@mork.no>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/option.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 9cc40031c23c..bd02f257de13 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -1334,6 +1334,8 @@ static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE(PETATEL_VENDOR_ID, PETATEL_PRODUCT_NP10T_600E) },
 	{ USB_DEVICE(TPLINK_VENDOR_ID, TPLINK_PRODUCT_MA180),
 	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
+	{ USB_DEVICE(TPLINK_VENDOR_ID, 0x9000),					/* TP-Link MA260 */
+	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
 	{ USB_DEVICE(CHANGHONG_VENDOR_ID, CHANGHONG_PRODUCT_CH690) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d01, 0xff, 0x02, 0x01) },	/* D-Link DWM-156 (variant) */
 	{ USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d01, 0xff, 0x00, 0x00) },	/* D-Link DWM-156 (variant) */

From 0665f9f852b4ac05f2f62046a169f0f5b9212317 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Mon, 22 Jul 2013 14:37:39 +1000
Subject: [PATCH 234/320] drm/qxl: add delayed fb operations

Due to the nature of qxl hw we cannot queue operations while in an irq
context, so we queue these operations as best we can until atomic allocations
fail, and dequeue them later in a work queue.

Daniel looked over the locking on the list and agrees it should be sufficent.

The atomic allocs use no warn, as the last thing we want if we haven't memory
to allocate space for a printk in an irq context is more printks.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/qxl/qxl_drv.h |   1 +
 drivers/gpu/drm/qxl/qxl_fb.c  | 188 +++++++++++++++++++++++++++++-----
 2 files changed, 166 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h
index aacb791464a3..6a4106fb4c21 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.h
+++ b/drivers/gpu/drm/qxl/qxl_drv.h
@@ -314,6 +314,7 @@ struct qxl_device {
 	struct workqueue_struct *gc_queue;
 	struct work_struct gc_work;
 
+	struct work_struct fb_work;
 };
 
 /* forward declaration for QXL_INFO_IO */
diff --git a/drivers/gpu/drm/qxl/qxl_fb.c b/drivers/gpu/drm/qxl/qxl_fb.c
index 76f39d88d684..88722f233430 100644
--- a/drivers/gpu/drm/qxl/qxl_fb.c
+++ b/drivers/gpu/drm/qxl/qxl_fb.c
@@ -37,12 +37,29 @@
 
 #define QXL_DIRTY_DELAY (HZ / 30)
 
+#define QXL_FB_OP_FILLRECT 1
+#define QXL_FB_OP_COPYAREA 2
+#define QXL_FB_OP_IMAGEBLIT 3
+
+struct qxl_fb_op {
+	struct list_head head;
+	int op_type;
+	union {
+		struct fb_fillrect fr;
+		struct fb_copyarea ca;
+		struct fb_image ib;
+	} op;
+	void *img_data;
+};
+
 struct qxl_fbdev {
 	struct drm_fb_helper helper;
 	struct qxl_framebuffer	qfb;
 	struct list_head	fbdev_list;
 	struct qxl_device	*qdev;
 
+	spinlock_t delayed_ops_lock;
+	struct list_head delayed_ops;
 	void *shadow;
 	int size;
 
@@ -164,8 +181,69 @@ static struct fb_deferred_io qxl_defio = {
 	.deferred_io	= qxl_deferred_io,
 };
 
-static void qxl_fb_fillrect(struct fb_info *info,
-			    const struct fb_fillrect *fb_rect)
+static void qxl_fb_delayed_fillrect(struct qxl_fbdev *qfbdev,
+				    const struct fb_fillrect *fb_rect)
+{
+	struct qxl_fb_op *op;
+	unsigned long flags;
+
+	op = kmalloc(sizeof(struct qxl_fb_op), GFP_ATOMIC | __GFP_NOWARN);
+	if (!op)
+		return;
+
+	op->op.fr = *fb_rect;
+	op->img_data = NULL;
+	op->op_type = QXL_FB_OP_FILLRECT;
+
+	spin_lock_irqsave(&qfbdev->delayed_ops_lock, flags);
+	list_add_tail(&op->head, &qfbdev->delayed_ops);
+	spin_unlock_irqrestore(&qfbdev->delayed_ops_lock, flags);
+}
+
+static void qxl_fb_delayed_copyarea(struct qxl_fbdev *qfbdev,
+				    const struct fb_copyarea *fb_copy)
+{
+	struct qxl_fb_op *op;
+	unsigned long flags;
+
+	op = kmalloc(sizeof(struct qxl_fb_op), GFP_ATOMIC | __GFP_NOWARN);
+	if (!op)
+		return;
+
+	op->op.ca = *fb_copy;
+	op->img_data = NULL;
+	op->op_type = QXL_FB_OP_COPYAREA;
+
+	spin_lock_irqsave(&qfbdev->delayed_ops_lock, flags);
+	list_add_tail(&op->head, &qfbdev->delayed_ops);
+	spin_unlock_irqrestore(&qfbdev->delayed_ops_lock, flags);
+}
+
+static void qxl_fb_delayed_imageblit(struct qxl_fbdev *qfbdev,
+				     const struct fb_image *fb_image)
+{
+	struct qxl_fb_op *op;
+	unsigned long flags;
+	uint32_t size = fb_image->width * fb_image->height * (fb_image->depth >= 8 ? fb_image->depth / 8 : 1);
+
+	op = kmalloc(sizeof(struct qxl_fb_op) + size, GFP_ATOMIC | __GFP_NOWARN);
+	if (!op)
+		return;
+
+	op->op.ib = *fb_image;
+	op->img_data = (void *)(op + 1);
+	op->op_type = QXL_FB_OP_IMAGEBLIT;
+
+	memcpy(op->img_data, fb_image->data, size);
+
+	op->op.ib.data = op->img_data;
+	spin_lock_irqsave(&qfbdev->delayed_ops_lock, flags);
+	list_add_tail(&op->head, &qfbdev->delayed_ops);
+	spin_unlock_irqrestore(&qfbdev->delayed_ops_lock, flags);
+}
+
+static void qxl_fb_fillrect_internal(struct fb_info *info,
+				     const struct fb_fillrect *fb_rect)
 {
 	struct qxl_fbdev *qfbdev = info->par;
 	struct qxl_device *qdev = qfbdev->qdev;
@@ -203,17 +281,28 @@ static void qxl_fb_fillrect(struct fb_info *info,
 	qxl_draw_fill_rec.rect = rect;
 	qxl_draw_fill_rec.color = color;
 	qxl_draw_fill_rec.rop = rop;
-	if (!drm_can_sleep()) {
-		qxl_io_log(qdev,
-			"%s: TODO use RCU, mysterious locks with spin_lock\n",
-			__func__);
-		return;
-	}
+
 	qxl_draw_fill(&qxl_draw_fill_rec);
 }
 
-static void qxl_fb_copyarea(struct fb_info *info,
-			    const struct fb_copyarea *region)
+static void qxl_fb_fillrect(struct fb_info *info,
+			    const struct fb_fillrect *fb_rect)
+{
+	struct qxl_fbdev *qfbdev = info->par;
+	struct qxl_device *qdev = qfbdev->qdev;
+
+	if (!drm_can_sleep()) {
+		qxl_fb_delayed_fillrect(qfbdev, fb_rect);
+		schedule_work(&qdev->fb_work);
+		return;
+	}
+	/* make sure any previous work is done */
+	flush_work(&qdev->fb_work);
+	qxl_fb_fillrect_internal(info, fb_rect);
+}
+
+static void qxl_fb_copyarea_internal(struct fb_info *info,
+				     const struct fb_copyarea *region)
 {
 	struct qxl_fbdev *qfbdev = info->par;
 
@@ -223,37 +312,89 @@ static void qxl_fb_copyarea(struct fb_info *info,
 			  region->dx, region->dy);
 }
 
+static void qxl_fb_copyarea(struct fb_info *info,
+			    const struct fb_copyarea *region)
+{
+	struct qxl_fbdev *qfbdev = info->par;
+	struct qxl_device *qdev = qfbdev->qdev;
+
+	if (!drm_can_sleep()) {
+		qxl_fb_delayed_copyarea(qfbdev, region);
+		schedule_work(&qdev->fb_work);
+		return;
+	}
+	/* make sure any previous work is done */
+	flush_work(&qdev->fb_work);
+	qxl_fb_copyarea_internal(info, region);
+}
+
 static void qxl_fb_imageblit_safe(struct qxl_fb_image *qxl_fb_image)
 {
 	qxl_draw_opaque_fb(qxl_fb_image, 0);
 }
 
+static void qxl_fb_imageblit_internal(struct fb_info *info,
+				      const struct fb_image *image)
+{
+	struct qxl_fbdev *qfbdev = info->par;
+	struct qxl_fb_image qxl_fb_image;
+
+	/* ensure proper order  rendering operations - TODO: must do this
+	 * for everything. */
+	qxl_fb_image_init(&qxl_fb_image, qfbdev->qdev, info, image);
+	qxl_fb_imageblit_safe(&qxl_fb_image);
+}
+
 static void qxl_fb_imageblit(struct fb_info *info,
 			     const struct fb_image *image)
 {
 	struct qxl_fbdev *qfbdev = info->par;
 	struct qxl_device *qdev = qfbdev->qdev;
-	struct qxl_fb_image qxl_fb_image;
 
 	if (!drm_can_sleep()) {
-		/* we cannot do any ttm_bo allocation since that will fail on
-		 * ioremap_wc..__get_vm_area_node, so queue the work item
-		 * instead This can happen from printk inside an interrupt
-		 * context, i.e.: smp_apic_timer_interrupt..check_cpu_stall */
-		qxl_io_log(qdev,
-			"%s: TODO use RCU, mysterious locks with spin_lock\n",
-			   __func__);
+		qxl_fb_delayed_imageblit(qfbdev, image);
+		schedule_work(&qdev->fb_work);
 		return;
 	}
+	/* make sure any previous work is done */
+	flush_work(&qdev->fb_work);
+	qxl_fb_imageblit_internal(info, image);
+}
 
-	/* ensure proper order of rendering operations - TODO: must do this
-	 * for everything. */
-	qxl_fb_image_init(&qxl_fb_image, qfbdev->qdev, info, image);
-	qxl_fb_imageblit_safe(&qxl_fb_image);
+static void qxl_fb_work(struct work_struct *work)
+{
+	struct qxl_device *qdev = container_of(work, struct qxl_device, fb_work);
+	unsigned long flags;
+	struct qxl_fb_op *entry, *tmp;
+	struct qxl_fbdev *qfbdev = qdev->mode_info.qfbdev;
+
+	/* since the irq context just adds entries to the end of the
+	   list dropping the lock should be fine, as entry isn't modified
+	   in the operation code */
+	spin_lock_irqsave(&qfbdev->delayed_ops_lock, flags);
+	list_for_each_entry_safe(entry, tmp, &qfbdev->delayed_ops, head) {
+		spin_unlock_irqrestore(&qfbdev->delayed_ops_lock, flags);
+		switch (entry->op_type) {
+		case QXL_FB_OP_FILLRECT:
+			qxl_fb_fillrect_internal(qfbdev->helper.fbdev, &entry->op.fr);
+			break;
+		case QXL_FB_OP_COPYAREA:
+			qxl_fb_copyarea_internal(qfbdev->helper.fbdev, &entry->op.ca);
+			break;
+		case QXL_FB_OP_IMAGEBLIT:
+			qxl_fb_imageblit_internal(qfbdev->helper.fbdev, &entry->op.ib);
+			break;
+		}
+		spin_lock_irqsave(&qfbdev->delayed_ops_lock, flags);
+		list_del(&entry->head);
+		kfree(entry);
+	}
+	spin_unlock_irqrestore(&qfbdev->delayed_ops_lock, flags);
 }
 
 int qxl_fb_init(struct qxl_device *qdev)
 {
+	INIT_WORK(&qdev->fb_work, qxl_fb_work);
 	return 0;
 }
 
@@ -536,7 +677,8 @@ int qxl_fbdev_init(struct qxl_device *qdev)
 	qfbdev->qdev = qdev;
 	qdev->mode_info.qfbdev = qfbdev;
 	qfbdev->helper.funcs = &qxl_fb_helper_funcs;
-
+	spin_lock_init(&qfbdev->delayed_ops_lock);
+	INIT_LIST_HEAD(&qfbdev->delayed_ops);
 	ret = drm_fb_helper_init(qdev->ddev, &qfbdev->helper,
 				 qxl_num_crtc /* num_crtc - QXL supports just 1 */,
 				 QXLFB_CONN_LIMIT);

From 4f49ec92be64ad1d96cf5d26fc8276f9849202a3 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Tue, 23 Jul 2013 14:06:07 +1000
Subject: [PATCH 235/320] qxl: allow creation of pre-pinned objects and use for
 releases.

In order to fix an issue with reservations we need to create the releases
as pre-pinned objects, this changes the placement interface and bo creation
interface to allow creating pinned objects to save nested reservations later.

This is just a stepping stone to main fix which follows to actually fix how
qxl deals with reservations.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/qxl/qxl_cmd.c     |  2 +-
 drivers/gpu/drm/qxl/qxl_gem.c     |  2 +-
 drivers/gpu/drm/qxl/qxl_ioctl.c   |  2 +-
 drivers/gpu/drm/qxl/qxl_object.c  | 23 +++++++++++------------
 drivers/gpu/drm/qxl/qxl_object.h  |  4 ++--
 drivers/gpu/drm/qxl/qxl_release.c |  9 +++------
 drivers/gpu/drm/qxl/qxl_ttm.c     |  2 +-
 7 files changed, 20 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/qxl/qxl_cmd.c b/drivers/gpu/drm/qxl/qxl_cmd.c
index 93c2f2cceb51..4e6a62716618 100644
--- a/drivers/gpu/drm/qxl/qxl_cmd.c
+++ b/drivers/gpu/drm/qxl/qxl_cmd.c
@@ -266,7 +266,7 @@ int qxl_alloc_bo_reserved(struct qxl_device *qdev, unsigned long size,
 	int ret;
 
 	ret = qxl_bo_create(qdev, size, false /* not kernel - device */,
-			    QXL_GEM_DOMAIN_VRAM, NULL, &bo);
+			    false, QXL_GEM_DOMAIN_VRAM, NULL, &bo);
 	if (ret) {
 		DRM_ERROR("failed to allocate VRAM BO\n");
 		return ret;
diff --git a/drivers/gpu/drm/qxl/qxl_gem.c b/drivers/gpu/drm/qxl/qxl_gem.c
index a235693aabba..25e1777fb0a2 100644
--- a/drivers/gpu/drm/qxl/qxl_gem.c
+++ b/drivers/gpu/drm/qxl/qxl_gem.c
@@ -55,7 +55,7 @@ int qxl_gem_object_create(struct qxl_device *qdev, int size,
 	/* At least align on page size */
 	if (alignment < PAGE_SIZE)
 		alignment = PAGE_SIZE;
-	r = qxl_bo_create(qdev, size, kernel, initial_domain, surf, &qbo);
+	r = qxl_bo_create(qdev, size, kernel, false, initial_domain, surf, &qbo);
 	if (r) {
 		if (r != -ERESTARTSYS)
 			DRM_ERROR(
diff --git a/drivers/gpu/drm/qxl/qxl_ioctl.c b/drivers/gpu/drm/qxl/qxl_ioctl.c
index 27f45e49250d..7448c5ea92e6 100644
--- a/drivers/gpu/drm/qxl/qxl_ioctl.c
+++ b/drivers/gpu/drm/qxl/qxl_ioctl.c
@@ -305,7 +305,7 @@ static int qxl_update_area_ioctl(struct drm_device *dev, void *data,
 		goto out;
 
 	if (!qobj->pin_count) {
-		qxl_ttm_placement_from_domain(qobj, qobj->type);
+		qxl_ttm_placement_from_domain(qobj, qobj->type, false);
 		ret = ttm_bo_validate(&qobj->tbo, &qobj->placement,
 				      true, false);
 		if (unlikely(ret))
diff --git a/drivers/gpu/drm/qxl/qxl_object.c b/drivers/gpu/drm/qxl/qxl_object.c
index 1191fe7788c9..50e7a6177167 100644
--- a/drivers/gpu/drm/qxl/qxl_object.c
+++ b/drivers/gpu/drm/qxl/qxl_object.c
@@ -51,20 +51,21 @@ bool qxl_ttm_bo_is_qxl_bo(struct ttm_buffer_object *bo)
 	return false;
 }
 
-void qxl_ttm_placement_from_domain(struct qxl_bo *qbo, u32 domain)
+void qxl_ttm_placement_from_domain(struct qxl_bo *qbo, u32 domain, bool pinned)
 {
 	u32 c = 0;
+	u32 pflag = pinned ? TTM_PL_FLAG_NO_EVICT : 0;
 
 	qbo->placement.fpfn = 0;
 	qbo->placement.lpfn = 0;
 	qbo->placement.placement = qbo->placements;
 	qbo->placement.busy_placement = qbo->placements;
 	if (domain == QXL_GEM_DOMAIN_VRAM)
-		qbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_VRAM;
+		qbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_VRAM | pflag;
 	if (domain == QXL_GEM_DOMAIN_SURFACE)
-		qbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_PRIV0;
+		qbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_PRIV0 | pflag;
 	if (domain == QXL_GEM_DOMAIN_CPU)
-		qbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
+		qbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM | pflag;
 	if (!c)
 		qbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
 	qbo->placement.num_placement = c;
@@ -73,7 +74,7 @@ void qxl_ttm_placement_from_domain(struct qxl_bo *qbo, u32 domain)
 
 
 int qxl_bo_create(struct qxl_device *qdev,
-		  unsigned long size, bool kernel, u32 domain,
+		  unsigned long size, bool kernel, bool pinned, u32 domain,
 		  struct qxl_surface *surf,
 		  struct qxl_bo **bo_ptr)
 {
@@ -99,7 +100,7 @@ int qxl_bo_create(struct qxl_device *qdev,
 	}
 	bo->gem_base.driver_private = NULL;
 	bo->type = domain;
-	bo->pin_count = 0;
+	bo->pin_count = pinned ? 1 : 0;
 	bo->surface_id = 0;
 	qxl_fence_init(qdev, &bo->fence);
 	INIT_LIST_HEAD(&bo->list);
@@ -107,7 +108,7 @@ int qxl_bo_create(struct qxl_device *qdev,
 	if (surf)
 		bo->surf = *surf;
 
-	qxl_ttm_placement_from_domain(bo, domain);
+	qxl_ttm_placement_from_domain(bo, domain, pinned);
 
 	r = ttm_bo_init(&qdev->mman.bdev, &bo->tbo, size, type,
 			&bo->placement, 0, !kernel, NULL, size,
@@ -228,7 +229,7 @@ struct qxl_bo *qxl_bo_ref(struct qxl_bo *bo)
 int qxl_bo_pin(struct qxl_bo *bo, u32 domain, u64 *gpu_addr)
 {
 	struct qxl_device *qdev = (struct qxl_device *)bo->gem_base.dev->dev_private;
-	int r, i;
+	int r;
 
 	if (bo->pin_count) {
 		bo->pin_count++;
@@ -236,9 +237,7 @@ int qxl_bo_pin(struct qxl_bo *bo, u32 domain, u64 *gpu_addr)
 			*gpu_addr = qxl_bo_gpu_offset(bo);
 		return 0;
 	}
-	qxl_ttm_placement_from_domain(bo, domain);
-	for (i = 0; i < bo->placement.num_placement; i++)
-		bo->placements[i] |= TTM_PL_FLAG_NO_EVICT;
+	qxl_ttm_placement_from_domain(bo, domain, true);
 	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
 	if (likely(r == 0)) {
 		bo->pin_count = 1;
@@ -350,7 +349,7 @@ int qxl_bo_list_add(struct qxl_reloc_list *reloc_list, struct qxl_bo *bo)
 		return ret;
 
 	if (!bo->pin_count) {
-		qxl_ttm_placement_from_domain(bo, bo->type);
+		qxl_ttm_placement_from_domain(bo, bo->type, false);
 		ret = ttm_bo_validate(&bo->tbo, &bo->placement,
 				      true, false);
 		if (ret)
diff --git a/drivers/gpu/drm/qxl/qxl_object.h b/drivers/gpu/drm/qxl/qxl_object.h
index ee7ad79ce781..116637f09347 100644
--- a/drivers/gpu/drm/qxl/qxl_object.h
+++ b/drivers/gpu/drm/qxl/qxl_object.h
@@ -88,7 +88,7 @@ static inline int qxl_bo_wait(struct qxl_bo *bo, u32 *mem_type,
 
 extern int qxl_bo_create(struct qxl_device *qdev,
 			 unsigned long size,
-			 bool kernel, u32 domain,
+			 bool kernel, bool pinned, u32 domain,
 			 struct qxl_surface *surf,
 			 struct qxl_bo **bo_ptr);
 extern int qxl_bo_kmap(struct qxl_bo *bo, void **ptr);
@@ -99,7 +99,7 @@ extern struct qxl_bo *qxl_bo_ref(struct qxl_bo *bo);
 extern void qxl_bo_unref(struct qxl_bo **bo);
 extern int qxl_bo_pin(struct qxl_bo *bo, u32 domain, u64 *gpu_addr);
 extern int qxl_bo_unpin(struct qxl_bo *bo);
-extern void qxl_ttm_placement_from_domain(struct qxl_bo *qbo, u32 domain);
+extern void qxl_ttm_placement_from_domain(struct qxl_bo *qbo, u32 domain, bool pinned);
 extern bool qxl_ttm_bo_is_qxl_bo(struct ttm_buffer_object *bo);
 
 extern int qxl_bo_list_add(struct qxl_reloc_list *reloc_list, struct qxl_bo *bo);
diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
index b443d6751d5f..b7f1271c6014 100644
--- a/drivers/gpu/drm/qxl/qxl_release.c
+++ b/drivers/gpu/drm/qxl/qxl_release.c
@@ -118,7 +118,9 @@ static int qxl_release_bo_alloc(struct qxl_device *qdev,
 				struct qxl_bo **bo)
 {
 	int ret;
-	ret = qxl_bo_create(qdev, PAGE_SIZE, false, QXL_GEM_DOMAIN_VRAM, NULL,
+	/* pin releases bo's they are too messy to evict */
+	ret = qxl_bo_create(qdev, PAGE_SIZE, false, true,
+			    QXL_GEM_DOMAIN_VRAM, NULL,
 			    bo);
 	return ret;
 }
@@ -216,11 +218,6 @@ int qxl_alloc_release_reserved(struct qxl_device *qdev, unsigned long size,
 			mutex_unlock(&qdev->release_mutex);
 			return ret;
 		}
-
-		/* pin releases bo's they are too messy to evict */
-		ret = qxl_bo_reserve(qdev->current_release_bo[cur_idx], false);
-		qxl_bo_pin(qdev->current_release_bo[cur_idx], QXL_GEM_DOMAIN_VRAM, NULL);
-		qxl_bo_unreserve(qdev->current_release_bo[cur_idx]);
 	}
 
 	bo = qxl_bo_ref(qdev->current_release_bo[cur_idx]);
diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
index 489cb8cece4d..1dfd84cda2a1 100644
--- a/drivers/gpu/drm/qxl/qxl_ttm.c
+++ b/drivers/gpu/drm/qxl/qxl_ttm.c
@@ -206,7 +206,7 @@ static void qxl_evict_flags(struct ttm_buffer_object *bo,
 		return;
 	}
 	qbo = container_of(bo, struct qxl_bo, tbo);
-	qxl_ttm_placement_from_domain(qbo, QXL_GEM_DOMAIN_CPU);
+	qxl_ttm_placement_from_domain(qbo, QXL_GEM_DOMAIN_CPU, false);
 	*placement = qbo->placement;
 }
 

From 8002db6336dd361fc13214e9515fe5d52ff294ee Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Tue, 23 Jul 2013 14:16:42 +1000
Subject: [PATCH 236/320] qxl: convert qxl driver to proper use for
 reservations

The recent addition of lockdep support to reservations and their subsequent
use by TTM showed up a number of potential problems with the way qxl was using
TTM objects.

a) it was allocating objects, and reserving them later without validating
underneath the reservation, which meant in extreme conditions the objects could
be evicted before the reservation ever used them.

b) it was reserving objects straight after allocating them, but with no
ability to back off should the reservations fail. It now allocates the necessary
objects then does a complete reservation pass on them to avoid deadlocks.

c) it had two lists per release tracking objects, unnecessary complicating
the reservation process.

This patch removes the dual object tracking, adds reservations ticket support
to the release and fence object handling. It then ports the internal fb
drawing code and the userspace facing ioctl to use the new interfaces properly,
along with cleanup up the error path handling in some codepaths.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/qxl/qxl_cmd.c     |  40 ++--
 drivers/gpu/drm/qxl/qxl_display.c |  70 ++++---
 drivers/gpu/drm/qxl/qxl_draw.c    | 269 +++++++++++++++++--------
 drivers/gpu/drm/qxl/qxl_drv.h     |  75 +++----
 drivers/gpu/drm/qxl/qxl_fence.c   |  10 +-
 drivers/gpu/drm/qxl/qxl_image.c   | 113 ++++++++---
 drivers/gpu/drm/qxl/qxl_ioctl.c   | 321 ++++++++++++++++--------------
 drivers/gpu/drm/qxl/qxl_object.c  |  49 +----
 drivers/gpu/drm/qxl/qxl_object.h  |   2 -
 drivers/gpu/drm/qxl/qxl_release.c | 203 ++++++++++++-------
 10 files changed, 682 insertions(+), 470 deletions(-)

diff --git a/drivers/gpu/drm/qxl/qxl_cmd.c b/drivers/gpu/drm/qxl/qxl_cmd.c
index 4e6a62716618..eb89653a7a17 100644
--- a/drivers/gpu/drm/qxl/qxl_cmd.c
+++ b/drivers/gpu/drm/qxl/qxl_cmd.c
@@ -179,9 +179,10 @@ qxl_push_command_ring_release(struct qxl_device *qdev, struct qxl_release *relea
 			      uint32_t type, bool interruptible)
 {
 	struct qxl_command cmd;
+	struct qxl_bo_list *entry = list_first_entry(&release->bos, struct qxl_bo_list, tv.head);
 
 	cmd.type = type;
-	cmd.data = qxl_bo_physical_address(qdev, release->bos[0], release->release_offset);
+	cmd.data = qxl_bo_physical_address(qdev, to_qxl_bo(entry->tv.bo), release->release_offset);
 
 	return qxl_ring_push(qdev->command_ring, &cmd, interruptible);
 }
@@ -191,9 +192,10 @@ qxl_push_cursor_ring_release(struct qxl_device *qdev, struct qxl_release *releas
 			     uint32_t type, bool interruptible)
 {
 	struct qxl_command cmd;
+	struct qxl_bo_list *entry = list_first_entry(&release->bos, struct qxl_bo_list, tv.head);
 
 	cmd.type = type;
-	cmd.data = qxl_bo_physical_address(qdev, release->bos[0], release->release_offset);
+	cmd.data = qxl_bo_physical_address(qdev, to_qxl_bo(entry->tv.bo), release->release_offset);
 
 	return qxl_ring_push(qdev->cursor_ring, &cmd, interruptible);
 }
@@ -214,7 +216,6 @@ int qxl_garbage_collect(struct qxl_device *qdev)
 	struct qxl_release *release;
 	uint64_t id, next_id;
 	int i = 0;
-	int ret;
 	union qxl_release_info *info;
 
 	while (qxl_ring_pop(qdev->release_ring, &id)) {
@@ -224,17 +225,10 @@ int qxl_garbage_collect(struct qxl_device *qdev)
 			if (release == NULL)
 				break;
 
-			ret = qxl_release_reserve(qdev, release, false);
-			if (ret) {
-				qxl_io_log(qdev, "failed to reserve release on garbage collect %lld\n", id);
-				DRM_ERROR("failed to reserve release %lld\n", id);
-			}
-
 			info = qxl_release_map(qdev, release);
 			next_id = info->next;
 			qxl_release_unmap(qdev, release, info);
 
-			qxl_release_unreserve(qdev, release);
 			QXL_INFO(qdev, "popped %lld, next %lld\n", id,
 				next_id);
 
@@ -259,7 +253,9 @@ int qxl_garbage_collect(struct qxl_device *qdev)
 	return i;
 }
 
-int qxl_alloc_bo_reserved(struct qxl_device *qdev, unsigned long size,
+int qxl_alloc_bo_reserved(struct qxl_device *qdev,
+			  struct qxl_release *release,
+			  unsigned long size,
 			  struct qxl_bo **_bo)
 {
 	struct qxl_bo *bo;
@@ -271,15 +267,15 @@ int qxl_alloc_bo_reserved(struct qxl_device *qdev, unsigned long size,
 		DRM_ERROR("failed to allocate VRAM BO\n");
 		return ret;
 	}
-	ret = qxl_bo_reserve(bo, false);
-	if (unlikely(ret != 0))
+	ret = qxl_release_list_add(release, bo);
+	if (ret)
 		goto out_unref;
 
 	*_bo = bo;
 	return 0;
 out_unref:
 	qxl_bo_unref(&bo);
-	return 0;
+	return ret;
 }
 
 static int wait_for_io_cmd_user(struct qxl_device *qdev, uint8_t val, long port, bool intr)
@@ -503,6 +499,10 @@ int qxl_hw_surface_alloc(struct qxl_device *qdev,
 	if (ret)
 		return ret;
 
+	ret = qxl_release_reserve_list(release, true);
+	if (ret)
+		return ret;
+
 	cmd = (struct qxl_surface_cmd *)qxl_release_map(qdev, release);
 	cmd->type = QXL_SURFACE_CMD_CREATE;
 	cmd->u.surface_create.format = surf->surf.format;
@@ -524,14 +524,11 @@ int qxl_hw_surface_alloc(struct qxl_device *qdev,
 
 	surf->surf_create = release;
 
-	/* no need to add a release to the fence for this bo,
+	/* no need to add a release to the fence for this surface bo,
 	   since it is only released when we ask to destroy the surface
 	   and it would never signal otherwise */
-	qxl_fence_releaseable(qdev, release);
-
 	qxl_push_command_ring_release(qdev, release, QXL_CMD_SURFACE, false);
-
-	qxl_release_unreserve(qdev, release);
+	qxl_release_fence_buffer_objects(release);
 
 	surf->hw_surf_alloc = true;
 	spin_lock(&qdev->surf_id_idr_lock);
@@ -573,12 +570,9 @@ int qxl_hw_surface_dealloc(struct qxl_device *qdev,
 	cmd->surface_id = id;
 	qxl_release_unmap(qdev, release, &cmd->release_info);
 
-	qxl_fence_releaseable(qdev, release);
-
 	qxl_push_command_ring_release(qdev, release, QXL_CMD_SURFACE, false);
 
-	qxl_release_unreserve(qdev, release);
-
+	qxl_release_fence_buffer_objects(release);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index f76f5dd7bfc4..835caba026d3 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -179,7 +179,7 @@ static void qxl_crtc_destroy(struct drm_crtc *crtc)
 	kfree(qxl_crtc);
 }
 
-static void
+static int
 qxl_hide_cursor(struct qxl_device *qdev)
 {
 	struct qxl_release *release;
@@ -188,14 +188,22 @@ qxl_hide_cursor(struct qxl_device *qdev)
 
 	ret = qxl_alloc_release_reserved(qdev, sizeof(*cmd), QXL_RELEASE_CURSOR_CMD,
 					 &release, NULL);
+	if (ret)
+		return ret;
+
+	ret = qxl_release_reserve_list(release, true);
+	if (ret) {
+		qxl_release_free(qdev, release);
+		return ret;
+	}
 
 	cmd = (struct qxl_cursor_cmd *)qxl_release_map(qdev, release);
 	cmd->type = QXL_CURSOR_HIDE;
 	qxl_release_unmap(qdev, release, &cmd->release_info);
 
-	qxl_fence_releaseable(qdev, release);
 	qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false);
-	qxl_release_unreserve(qdev, release);
+	qxl_release_fence_buffer_objects(release);
+	return 0;
 }
 
 static int qxl_crtc_cursor_set2(struct drm_crtc *crtc,
@@ -216,10 +224,8 @@ static int qxl_crtc_cursor_set2(struct drm_crtc *crtc,
 
 	int size = 64*64*4;
 	int ret = 0;
-	if (!handle) {
-		qxl_hide_cursor(qdev);
-		return 0;
-	}
+	if (!handle)
+		return qxl_hide_cursor(qdev);
 
 	obj = drm_gem_object_lookup(crtc->dev, file_priv, handle);
 	if (!obj) {
@@ -234,8 +240,9 @@ static int qxl_crtc_cursor_set2(struct drm_crtc *crtc,
 		goto out_unref;
 
 	ret = qxl_bo_pin(user_bo, QXL_GEM_DOMAIN_CPU, NULL);
+	qxl_bo_unreserve(user_bo);
 	if (ret)
-		goto out_unreserve;
+		goto out_unref;
 
 	ret = qxl_bo_kmap(user_bo, &user_ptr);
 	if (ret)
@@ -246,14 +253,20 @@ static int qxl_crtc_cursor_set2(struct drm_crtc *crtc,
 					 &release, NULL);
 	if (ret)
 		goto out_kunmap;
-	ret = qxl_alloc_bo_reserved(qdev, sizeof(struct qxl_cursor) + size,
-				    &cursor_bo);
+
+	ret = qxl_alloc_bo_reserved(qdev, release, sizeof(struct qxl_cursor) + size,
+			   &cursor_bo);
 	if (ret)
 		goto out_free_release;
-	ret = qxl_bo_kmap(cursor_bo, (void **)&cursor);
+
+	ret = qxl_release_reserve_list(release, false);
 	if (ret)
 		goto out_free_bo;
 
+	ret = qxl_bo_kmap(cursor_bo, (void **)&cursor);
+	if (ret)
+		goto out_backoff;
+
 	cursor->header.unique = 0;
 	cursor->header.type = SPICE_CURSOR_TYPE_ALPHA;
 	cursor->header.width = 64;
@@ -269,11 +282,7 @@ static int qxl_crtc_cursor_set2(struct drm_crtc *crtc,
 
 	qxl_bo_kunmap(cursor_bo);
 
-	/* finish with the userspace bo */
 	qxl_bo_kunmap(user_bo);
-	qxl_bo_unpin(user_bo);
-	qxl_bo_unreserve(user_bo);
-	drm_gem_object_unreference_unlocked(obj);
 
 	cmd = (struct qxl_cursor_cmd *)qxl_release_map(qdev, release);
 	cmd->type = QXL_CURSOR_SET;
@@ -281,30 +290,35 @@ static int qxl_crtc_cursor_set2(struct drm_crtc *crtc,
 	cmd->u.set.position.y = qcrtc->cur_y;
 
 	cmd->u.set.shape = qxl_bo_physical_address(qdev, cursor_bo, 0);
-	qxl_release_add_res(qdev, release, cursor_bo);
 
 	cmd->u.set.visible = 1;
 	qxl_release_unmap(qdev, release, &cmd->release_info);
 
-	qxl_fence_releaseable(qdev, release);
 	qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false);
-	qxl_release_unreserve(qdev, release);
+	qxl_release_fence_buffer_objects(release);
+
+	/* finish with the userspace bo */
+	ret = qxl_bo_reserve(user_bo, false);
+	if (!ret) {
+		qxl_bo_unpin(user_bo);
+		qxl_bo_unreserve(user_bo);
+	}
+	drm_gem_object_unreference_unlocked(obj);
 
-	qxl_bo_unreserve(cursor_bo);
 	qxl_bo_unref(&cursor_bo);
 
 	return ret;
+
+out_backoff:
+	qxl_release_backoff_reserve_list(release);
 out_free_bo:
 	qxl_bo_unref(&cursor_bo);
 out_free_release:
-	qxl_release_unreserve(qdev, release);
 	qxl_release_free(qdev, release);
 out_kunmap:
 	qxl_bo_kunmap(user_bo);
 out_unpin:
 	qxl_bo_unpin(user_bo);
-out_unreserve:
-	qxl_bo_unreserve(user_bo);
 out_unref:
 	drm_gem_object_unreference_unlocked(obj);
 	return ret;
@@ -322,6 +336,14 @@ static int qxl_crtc_cursor_move(struct drm_crtc *crtc,
 
 	ret = qxl_alloc_release_reserved(qdev, sizeof(*cmd), QXL_RELEASE_CURSOR_CMD,
 				   &release, NULL);
+	if (ret)
+		return ret;
+
+	ret = qxl_release_reserve_list(release, true);
+	if (ret) {
+		qxl_release_free(qdev, release);
+		return ret;
+	}
 
 	qcrtc->cur_x = x;
 	qcrtc->cur_y = y;
@@ -332,9 +354,9 @@ static int qxl_crtc_cursor_move(struct drm_crtc *crtc,
 	cmd->u.position.y = qcrtc->cur_y;
 	qxl_release_unmap(qdev, release, &cmd->release_info);
 
-	qxl_fence_releaseable(qdev, release);
 	qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false);
-	qxl_release_unreserve(qdev, release);
+	qxl_release_fence_buffer_objects(release);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/qxl/qxl_draw.c b/drivers/gpu/drm/qxl/qxl_draw.c
index 3c8c3dbf9378..56e1d633875e 100644
--- a/drivers/gpu/drm/qxl/qxl_draw.c
+++ b/drivers/gpu/drm/qxl/qxl_draw.c
@@ -23,25 +23,29 @@
 #include "qxl_drv.h"
 #include "qxl_object.h"
 
+static int alloc_clips(struct qxl_device *qdev,
+		       struct qxl_release *release,
+		       unsigned num_clips,
+		       struct qxl_bo **clips_bo)
+{
+	int size = sizeof(struct qxl_clip_rects) + sizeof(struct qxl_rect) * num_clips;
+
+	return qxl_alloc_bo_reserved(qdev, release, size, clips_bo);
+}
+
 /* returns a pointer to the already allocated qxl_rect array inside
  * the qxl_clip_rects. This is *not* the same as the memory allocated
  * on the device, it is offset to qxl_clip_rects.chunk.data */
 static struct qxl_rect *drawable_set_clipping(struct qxl_device *qdev,
 					      struct qxl_drawable *drawable,
 					      unsigned num_clips,
-					      struct qxl_bo **clips_bo,
-					      struct qxl_release *release)
+					      struct qxl_bo *clips_bo)
 {
 	struct qxl_clip_rects *dev_clips;
 	int ret;
-	int size = sizeof(*dev_clips) + sizeof(struct qxl_rect) * num_clips;
-	ret = qxl_alloc_bo_reserved(qdev, size, clips_bo);
-	if (ret)
-		return NULL;
 
-	ret = qxl_bo_kmap(*clips_bo, (void **)&dev_clips);
+	ret = qxl_bo_kmap(clips_bo, (void **)&dev_clips);
 	if (ret) {
-		qxl_bo_unref(clips_bo);
 		return NULL;
 	}
 	dev_clips->num_rects = num_clips;
@@ -52,20 +56,34 @@ static struct qxl_rect *drawable_set_clipping(struct qxl_device *qdev,
 }
 
 static int
-make_drawable(struct qxl_device *qdev, int surface, uint8_t type,
-	      const struct qxl_rect *rect,
-	      struct qxl_release **release)
+alloc_drawable(struct qxl_device *qdev, struct qxl_release **release)
 {
-	struct qxl_drawable *drawable;
-	int i, ret;
-
-	ret = qxl_alloc_release_reserved(qdev, sizeof(*drawable),
+	int ret;
+	ret = qxl_alloc_release_reserved(qdev, sizeof(struct qxl_drawable),
 					 QXL_RELEASE_DRAWABLE, release,
 					 NULL);
-	if (ret)
-		return ret;
+	return ret;
+}
+
+static void
+free_drawable(struct qxl_device *qdev, struct qxl_release *release)
+{
+	qxl_release_free(qdev, release);
+}
+
+/* release needs to be reserved at this point */
+static int
+make_drawable(struct qxl_device *qdev, int surface, uint8_t type,
+	      const struct qxl_rect *rect,
+	      struct qxl_release *release)
+{
+	struct qxl_drawable *drawable;
+	int i;
+
+	drawable = (struct qxl_drawable *)qxl_release_map(qdev, release);
+	if (!drawable)
+		return -ENOMEM;
 
-	drawable = (struct qxl_drawable *)qxl_release_map(qdev, *release);
 	drawable->type = type;
 
 	drawable->surface_id = surface;		/* Only primary for now */
@@ -91,14 +109,23 @@ make_drawable(struct qxl_device *qdev, int surface, uint8_t type,
 		drawable->bbox = *rect;
 
 	drawable->mm_time = qdev->rom->mm_clock;
-	qxl_release_unmap(qdev, *release, &drawable->release_info);
+	qxl_release_unmap(qdev, release, &drawable->release_info);
 	return 0;
 }
 
-static int qxl_palette_create_1bit(struct qxl_bo **palette_bo,
+static int alloc_palette_object(struct qxl_device *qdev,
+				struct qxl_release *release,
+				struct qxl_bo **palette_bo)
+{
+	return qxl_alloc_bo_reserved(qdev, release,
+				     sizeof(struct qxl_palette) + sizeof(uint32_t) * 2,
+				     palette_bo);
+}
+
+static int qxl_palette_create_1bit(struct qxl_bo *palette_bo,
+				   struct qxl_release *release,
 				   const struct qxl_fb_image *qxl_fb_image)
 {
-	struct qxl_device *qdev = qxl_fb_image->qdev;
 	const struct fb_image *fb_image = &qxl_fb_image->fb_image;
 	uint32_t visual = qxl_fb_image->visual;
 	const uint32_t *pseudo_palette = qxl_fb_image->pseudo_palette;
@@ -108,12 +135,7 @@ static int qxl_palette_create_1bit(struct qxl_bo **palette_bo,
 	static uint64_t unique; /* we make no attempt to actually set this
 				 * correctly globaly, since that would require
 				 * tracking all of our palettes. */
-
-	ret = qxl_alloc_bo_reserved(qdev,
-				    sizeof(struct qxl_palette) + sizeof(uint32_t) * 2,
-				    palette_bo);
-
-	ret = qxl_bo_kmap(*palette_bo, (void **)&pal);
+	ret = qxl_bo_kmap(palette_bo, (void **)&pal);
 	pal->num_ents = 2;
 	pal->unique = unique++;
 	if (visual == FB_VISUAL_TRUECOLOR || visual == FB_VISUAL_DIRECTCOLOR) {
@@ -126,7 +148,7 @@ static int qxl_palette_create_1bit(struct qxl_bo **palette_bo,
 	}
 	pal->ents[0] = bgcolor;
 	pal->ents[1] = fgcolor;
-	qxl_bo_kunmap(*palette_bo);
+	qxl_bo_kunmap(palette_bo);
 	return 0;
 }
 
@@ -144,44 +166,63 @@ void qxl_draw_opaque_fb(const struct qxl_fb_image *qxl_fb_image,
 	const char *src = fb_image->data;
 	int depth = fb_image->depth;
 	struct qxl_release *release;
-	struct qxl_bo *image_bo;
 	struct qxl_image *image;
 	int ret;
-
+	struct qxl_drm_image *dimage;
+	struct qxl_bo *palette_bo = NULL;
 	if (stride == 0)
 		stride = depth * width / 8;
 
+	ret = alloc_drawable(qdev, &release);
+	if (ret)
+		return;
+
+	ret = qxl_image_alloc_objects(qdev, release,
+				      &dimage,
+				      height, stride);
+	if (ret)
+		goto out_free_drawable;
+
+	if (depth == 1) {
+		ret = alloc_palette_object(qdev, release, &palette_bo);
+		if (ret)
+			goto out_free_image;
+	}
+
+	/* do a reservation run over all the objects we just allocated */
+	ret = qxl_release_reserve_list(release, true);
+	if (ret)
+		goto out_free_palette;
+
 	rect.left = x;
 	rect.right = x + width;
 	rect.top = y;
 	rect.bottom = y + height;
 
-	ret = make_drawable(qdev, 0, QXL_DRAW_COPY, &rect, &release);
-	if (ret)
-		return;
-
-	ret = qxl_image_create(qdev, release, &image_bo,
-			       (const uint8_t *)src, 0, 0,
-			       width, height, depth, stride);
+	ret = make_drawable(qdev, 0, QXL_DRAW_COPY, &rect, release);
 	if (ret) {
-		qxl_release_unreserve(qdev, release);
+		qxl_release_backoff_reserve_list(release);
+		goto out_free_palette;
+	}
+
+	ret = qxl_image_init(qdev, release, dimage,
+			     (const uint8_t *)src, 0, 0,
+			     width, height, depth, stride);
+	if (ret) {
+		qxl_release_backoff_reserve_list(release);
 		qxl_release_free(qdev, release);
 		return;
 	}
 
 	if (depth == 1) {
-		struct qxl_bo *palette_bo;
 		void *ptr;
-		ret = qxl_palette_create_1bit(&palette_bo, qxl_fb_image);
-		qxl_release_add_res(qdev, release, palette_bo);
+		ret = qxl_palette_create_1bit(palette_bo, release, qxl_fb_image);
 
-		ptr = qxl_bo_kmap_atomic_page(qdev, image_bo, 0);
+		ptr = qxl_bo_kmap_atomic_page(qdev, dimage->bo, 0);
 		image = ptr;
 		image->u.bitmap.palette =
 			qxl_bo_physical_address(qdev, palette_bo, 0);
-		qxl_bo_kunmap_atomic_page(qdev, image_bo, ptr);
-		qxl_bo_unreserve(palette_bo);
-		qxl_bo_unref(&palette_bo);
+		qxl_bo_kunmap_atomic_page(qdev, dimage->bo, ptr);
 	}
 
 	drawable = (struct qxl_drawable *)qxl_release_map(qdev, release);
@@ -199,16 +240,20 @@ void qxl_draw_opaque_fb(const struct qxl_fb_image *qxl_fb_image,
 	drawable->u.copy.mask.bitmap = 0;
 
 	drawable->u.copy.src_bitmap =
-		qxl_bo_physical_address(qdev, image_bo, 0);
+		qxl_bo_physical_address(qdev, dimage->bo, 0);
 	qxl_release_unmap(qdev, release, &drawable->release_info);
 
-	qxl_release_add_res(qdev, release, image_bo);
-	qxl_bo_unreserve(image_bo);
-	qxl_bo_unref(&image_bo);
-
-	qxl_fence_releaseable(qdev, release);
 	qxl_push_command_ring_release(qdev, release, QXL_CMD_DRAW, false);
-	qxl_release_unreserve(qdev, release);
+	qxl_release_fence_buffer_objects(release);
+
+out_free_palette:
+	if (palette_bo)
+		qxl_bo_unref(&palette_bo);
+out_free_image:
+	qxl_image_free_objects(qdev, dimage);
+out_free_drawable:
+	if (ret)
+		free_drawable(qdev, release);
 }
 
 /* push a draw command using the given clipping rectangles as
@@ -243,10 +288,14 @@ void qxl_draw_dirty_fb(struct qxl_device *qdev,
 	int depth = qxl_fb->base.bits_per_pixel;
 	uint8_t *surface_base;
 	struct qxl_release *release;
-	struct qxl_bo *image_bo;
 	struct qxl_bo *clips_bo;
+	struct qxl_drm_image *dimage;
 	int ret;
 
+	ret = alloc_drawable(qdev, &release);
+	if (ret)
+		return;
+
 	left = clips->x1;
 	right = clips->x2;
 	top = clips->y1;
@@ -263,36 +312,52 @@ void qxl_draw_dirty_fb(struct qxl_device *qdev,
 
 	width = right - left;
 	height = bottom - top;
+
+	ret = alloc_clips(qdev, release, num_clips, &clips_bo);
+	if (ret)
+		goto out_free_drawable;
+
+	ret = qxl_image_alloc_objects(qdev, release,
+				      &dimage,
+				      height, stride);
+	if (ret)
+		goto out_free_clips;
+
+	/* do a reservation run over all the objects we just allocated */
+	ret = qxl_release_reserve_list(release, true);
+	if (ret)
+		goto out_free_image;
+
 	drawable_rect.left = left;
 	drawable_rect.right = right;
 	drawable_rect.top = top;
 	drawable_rect.bottom = bottom;
+
 	ret = make_drawable(qdev, 0, QXL_DRAW_COPY, &drawable_rect,
-			    &release);
+			    release);
 	if (ret)
-		return;
+		goto out_release_backoff;
 
 	ret = qxl_bo_kmap(bo, (void **)&surface_base);
 	if (ret)
-		goto out_unref;
+		goto out_release_backoff;
 
-	ret = qxl_image_create(qdev, release, &image_bo, surface_base,
-			       left, top, width, height, depth, stride);
+
+	ret = qxl_image_init(qdev, release, dimage, surface_base,
+			     left, top, width, height, depth, stride);
 	qxl_bo_kunmap(bo);
 	if (ret)
-		goto out_unref;
+		goto out_release_backoff;
+
+	rects = drawable_set_clipping(qdev, drawable, num_clips, clips_bo);
+	if (!rects)
+		goto out_release_backoff;
 
-	rects = drawable_set_clipping(qdev, drawable, num_clips, &clips_bo, release);
-	if (!rects) {
-		qxl_bo_unref(&image_bo);
-		goto out_unref;
-	}
 	drawable = (struct qxl_drawable *)qxl_release_map(qdev, release);
 
 	drawable->clip.type = SPICE_CLIP_TYPE_RECTS;
 	drawable->clip.data = qxl_bo_physical_address(qdev,
 						      clips_bo, 0);
-	qxl_release_add_res(qdev, release, clips_bo);
 
 	drawable->u.copy.src_area.top = 0;
 	drawable->u.copy.src_area.bottom = height;
@@ -306,11 +371,9 @@ void qxl_draw_dirty_fb(struct qxl_device *qdev,
 	drawable->u.copy.mask.pos.y = 0;
 	drawable->u.copy.mask.bitmap = 0;
 
-	drawable->u.copy.src_bitmap = qxl_bo_physical_address(qdev, image_bo, 0);
+	drawable->u.copy.src_bitmap = qxl_bo_physical_address(qdev, dimage->bo, 0);
 	qxl_release_unmap(qdev, release, &drawable->release_info);
-	qxl_release_add_res(qdev, release, image_bo);
-	qxl_bo_unreserve(image_bo);
-	qxl_bo_unref(&image_bo);
+
 	clips_ptr = clips;
 	for (i = 0; i < num_clips; i++, clips_ptr += inc) {
 		rects[i].left   = clips_ptr->x1;
@@ -319,17 +382,22 @@ void qxl_draw_dirty_fb(struct qxl_device *qdev,
 		rects[i].bottom = clips_ptr->y2;
 	}
 	qxl_bo_kunmap(clips_bo);
-	qxl_bo_unreserve(clips_bo);
-	qxl_bo_unref(&clips_bo);
 
-	qxl_fence_releaseable(qdev, release);
 	qxl_push_command_ring_release(qdev, release, QXL_CMD_DRAW, false);
-	qxl_release_unreserve(qdev, release);
-	return;
+	qxl_release_fence_buffer_objects(release);
+
+out_release_backoff:
+	if (ret)
+		qxl_release_backoff_reserve_list(release);
+out_free_image:
+	qxl_image_free_objects(qdev, dimage);
+out_free_clips:
+	qxl_bo_unref(&clips_bo);
+out_free_drawable:
+	/* only free drawable on error */
+	if (ret)
+		free_drawable(qdev, release);
 
-out_unref:
-	qxl_release_unreserve(qdev, release);
-	qxl_release_free(qdev, release);
 }
 
 void qxl_draw_copyarea(struct qxl_device *qdev,
@@ -342,22 +410,36 @@ void qxl_draw_copyarea(struct qxl_device *qdev,
 	struct qxl_release *release;
 	int ret;
 
+	ret = alloc_drawable(qdev, &release);
+	if (ret)
+		return;
+
+	/* do a reservation run over all the objects we just allocated */
+	ret = qxl_release_reserve_list(release, true);
+	if (ret)
+		goto out_free_release;
+
 	rect.left = dx;
 	rect.top = dy;
 	rect.right = dx + width;
 	rect.bottom = dy + height;
-	ret = make_drawable(qdev, 0, QXL_COPY_BITS, &rect, &release);
-	if (ret)
-		return;
+	ret = make_drawable(qdev, 0, QXL_COPY_BITS, &rect, release);
+	if (ret) {
+		qxl_release_backoff_reserve_list(release);
+		goto out_free_release;
+	}
 
 	drawable = (struct qxl_drawable *)qxl_release_map(qdev, release);
 	drawable->u.copy_bits.src_pos.x = sx;
 	drawable->u.copy_bits.src_pos.y = sy;
-
 	qxl_release_unmap(qdev, release, &drawable->release_info);
-	qxl_fence_releaseable(qdev, release);
+
 	qxl_push_command_ring_release(qdev, release, QXL_CMD_DRAW, false);
-	qxl_release_unreserve(qdev, release);
+	qxl_release_fence_buffer_objects(release);
+
+out_free_release:
+	if (ret)
+		free_drawable(qdev, release);
 }
 
 void qxl_draw_fill(struct qxl_draw_fill *qxl_draw_fill_rec)
@@ -370,10 +452,21 @@ void qxl_draw_fill(struct qxl_draw_fill *qxl_draw_fill_rec)
 	struct qxl_release *release;
 	int ret;
 
-	ret = make_drawable(qdev, 0, QXL_DRAW_FILL, &rect, &release);
+	ret = alloc_drawable(qdev, &release);
 	if (ret)
 		return;
 
+	/* do a reservation run over all the objects we just allocated */
+	ret = qxl_release_reserve_list(release, true);
+	if (ret)
+		goto out_free_release;
+
+	ret = make_drawable(qdev, 0, QXL_DRAW_FILL, &rect, release);
+	if (ret) {
+		qxl_release_backoff_reserve_list(release);
+		goto out_free_release;
+	}
+
 	drawable = (struct qxl_drawable *)qxl_release_map(qdev, release);
 	drawable->u.fill.brush.type = SPICE_BRUSH_TYPE_SOLID;
 	drawable->u.fill.brush.u.color = color;
@@ -384,7 +477,11 @@ void qxl_draw_fill(struct qxl_draw_fill *qxl_draw_fill_rec)
 	drawable->u.fill.mask.bitmap = 0;
 
 	qxl_release_unmap(qdev, release, &drawable->release_info);
-	qxl_fence_releaseable(qdev, release);
+
 	qxl_push_command_ring_release(qdev, release, QXL_CMD_DRAW, false);
-	qxl_release_unreserve(qdev, release);
+	qxl_release_fence_buffer_objects(release);
+
+out_free_release:
+	if (ret)
+		free_drawable(qdev, release);
 }
diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h
index 6a4106fb4c21..7e96f4f11738 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.h
+++ b/drivers/gpu/drm/qxl/qxl_drv.h
@@ -42,6 +42,9 @@
 #include <ttm/ttm_placement.h>
 #include <ttm/ttm_module.h>
 
+/* just for ttm_validate_buffer */
+#include <ttm/ttm_execbuf_util.h>
+
 #include <drm/qxl_drm.h>
 #include "qxl_dev.h"
 
@@ -118,9 +121,9 @@ struct qxl_bo {
 	uint32_t surface_id;
 	struct qxl_fence fence; /* per bo fence  - list of releases */
 	struct qxl_release *surf_create;
-	atomic_t reserve_count;
 };
 #define gem_to_qxl_bo(gobj) container_of((gobj), struct qxl_bo, gem_base)
+#define to_qxl_bo(tobj) container_of((tobj), struct qxl_bo, tbo)
 
 struct qxl_gem {
 	struct mutex		mutex;
@@ -128,12 +131,7 @@ struct qxl_gem {
 };
 
 struct qxl_bo_list {
-	struct list_head lhead;
-	struct qxl_bo *bo;
-};
-
-struct qxl_reloc_list {
-	struct list_head bos;
+	struct ttm_validate_buffer tv;
 };
 
 struct qxl_crtc {
@@ -195,10 +193,20 @@ enum {
 struct qxl_release {
 	int id;
 	int type;
-	int bo_count;
 	uint32_t release_offset;
 	uint32_t surface_release_id;
-	struct qxl_bo *bos[QXL_MAX_RES];
+	struct ww_acquire_ctx ticket;
+	struct list_head bos;
+};
+
+struct qxl_drm_chunk {
+	struct list_head head;
+	struct qxl_bo *bo;
+};
+
+struct qxl_drm_image {
+	struct qxl_bo *bo;
+	struct list_head chunk_list;
 };
 
 struct qxl_fb_image {
@@ -434,12 +442,19 @@ int qxl_mmap(struct file *filp, struct vm_area_struct *vma);
 
 /* qxl image */
 
-int qxl_image_create(struct qxl_device *qdev,
-		     struct qxl_release *release,
-		     struct qxl_bo **image_bo,
-		     const uint8_t *data,
-		     int x, int y, int width, int height,
-		     int depth, int stride);
+int qxl_image_init(struct qxl_device *qdev,
+		   struct qxl_release *release,
+		   struct qxl_drm_image *dimage,
+		   const uint8_t *data,
+		   int x, int y, int width, int height,
+		   int depth, int stride);
+int
+qxl_image_alloc_objects(struct qxl_device *qdev,
+			struct qxl_release *release,
+			struct qxl_drm_image **image_ptr,
+			int height, int stride);
+void qxl_image_free_objects(struct qxl_device *qdev, struct qxl_drm_image *dimage);
+
 void qxl_update_screen(struct qxl_device *qxl);
 
 /* qxl io operations (qxl_cmd.c) */
@@ -460,20 +475,15 @@ int qxl_ring_push(struct qxl_ring *ring, const void *new_elt, bool interruptible
 void qxl_io_flush_release(struct qxl_device *qdev);
 void qxl_io_flush_surfaces(struct qxl_device *qdev);
 
-int qxl_release_reserve(struct qxl_device *qdev,
-			struct qxl_release *release, bool no_wait);
-void qxl_release_unreserve(struct qxl_device *qdev,
-			   struct qxl_release *release);
 union qxl_release_info *qxl_release_map(struct qxl_device *qdev,
 					struct qxl_release *release);
 void qxl_release_unmap(struct qxl_device *qdev,
 		       struct qxl_release *release,
 		       union qxl_release_info *info);
-/*
- * qxl_bo_add_resource.
- *
- */
-void qxl_bo_add_resource(struct qxl_bo *main_bo, struct qxl_bo *resource);
+int qxl_release_list_add(struct qxl_release *release, struct qxl_bo *bo);
+int qxl_release_reserve_list(struct qxl_release *release, bool no_intr);
+void qxl_release_backoff_reserve_list(struct qxl_release *release);
+void qxl_release_fence_buffer_objects(struct qxl_release *release);
 
 int qxl_alloc_surface_release_reserved(struct qxl_device *qdev,
 				       enum qxl_surface_cmd_type surface_cmd_type,
@@ -482,15 +492,16 @@ int qxl_alloc_surface_release_reserved(struct qxl_device *qdev,
 int qxl_alloc_release_reserved(struct qxl_device *qdev, unsigned long size,
 			       int type, struct qxl_release **release,
 			       struct qxl_bo **rbo);
-int qxl_fence_releaseable(struct qxl_device *qdev,
-			  struct qxl_release *release);
+
 int
 qxl_push_command_ring_release(struct qxl_device *qdev, struct qxl_release *release,
 			      uint32_t type, bool interruptible);
 int
 qxl_push_cursor_ring_release(struct qxl_device *qdev, struct qxl_release *release,
 			     uint32_t type, bool interruptible);
-int qxl_alloc_bo_reserved(struct qxl_device *qdev, unsigned long size,
+int qxl_alloc_bo_reserved(struct qxl_device *qdev,
+			  struct qxl_release *release,
+			  unsigned long size,
 			  struct qxl_bo **_bo);
 /* qxl drawing commands */
 
@@ -511,15 +522,9 @@ void qxl_draw_copyarea(struct qxl_device *qdev,
 		       u32 sx, u32 sy,
 		       u32 dx, u32 dy);
 
-uint64_t
-qxl_release_alloc(struct qxl_device *qdev, int type,
-		  struct qxl_release **ret);
-
 void qxl_release_free(struct qxl_device *qdev,
 		      struct qxl_release *release);
-void qxl_release_add_res(struct qxl_device *qdev,
-			 struct qxl_release *release,
-			 struct qxl_bo *bo);
+
 /* used by qxl_debugfs_release */
 struct qxl_release *qxl_release_from_id_locked(struct qxl_device *qdev,
 						   uint64_t id);
@@ -562,7 +567,7 @@ void qxl_surface_evict(struct qxl_device *qdev, struct qxl_bo *surf, bool freein
 int qxl_update_surface(struct qxl_device *qdev, struct qxl_bo *surf);
 
 /* qxl_fence.c */
-int qxl_fence_add_release(struct qxl_fence *qfence, uint32_t rel_id);
+void qxl_fence_add_release_locked(struct qxl_fence *qfence, uint32_t rel_id);
 int qxl_fence_remove_release(struct qxl_fence *qfence, uint32_t rel_id);
 int qxl_fence_init(struct qxl_device *qdev, struct qxl_fence *qfence);
 void qxl_fence_fini(struct qxl_fence *qfence);
diff --git a/drivers/gpu/drm/qxl/qxl_fence.c b/drivers/gpu/drm/qxl/qxl_fence.c
index 63c6715ad385..ae59e91cfb9a 100644
--- a/drivers/gpu/drm/qxl/qxl_fence.c
+++ b/drivers/gpu/drm/qxl/qxl_fence.c
@@ -49,17 +49,11 @@
 
    For some reason every so often qxl hw fails to release, things go wrong.
 */
-
-
-int qxl_fence_add_release(struct qxl_fence *qfence, uint32_t rel_id)
+/* must be called with the fence lock held */
+void qxl_fence_add_release_locked(struct qxl_fence *qfence, uint32_t rel_id)
 {
-	struct qxl_bo *bo = container_of(qfence, struct qxl_bo, fence);
-
-	spin_lock(&bo->tbo.bdev->fence_lock);
 	radix_tree_insert(&qfence->tree, rel_id, qfence);
 	qfence->num_active_releases++;
-	spin_unlock(&bo->tbo.bdev->fence_lock);
-	return 0;
 }
 
 int qxl_fence_remove_release(struct qxl_fence *qfence, uint32_t rel_id)
diff --git a/drivers/gpu/drm/qxl/qxl_image.c b/drivers/gpu/drm/qxl/qxl_image.c
index cf856206996b..7fbcc35e8ad3 100644
--- a/drivers/gpu/drm/qxl/qxl_image.c
+++ b/drivers/gpu/drm/qxl/qxl_image.c
@@ -30,31 +30,100 @@
 #include "qxl_object.h"
 
 static int
-qxl_image_create_helper(struct qxl_device *qdev,
-			struct qxl_release *release,
-			struct qxl_bo **image_bo,
-			const uint8_t *data,
-			int width, int height,
-			int depth, unsigned int hash,
-			int stride)
+qxl_allocate_chunk(struct qxl_device *qdev,
+		   struct qxl_release *release,
+		   struct qxl_drm_image *image,
+		   unsigned int chunk_size)
 {
+	struct qxl_drm_chunk *chunk;
+	int ret;
+
+	chunk = kmalloc(sizeof(struct qxl_drm_chunk), GFP_KERNEL);
+	if (!chunk)
+		return -ENOMEM;
+
+	ret = qxl_alloc_bo_reserved(qdev, release, chunk_size, &chunk->bo);
+	if (ret) {
+		kfree(chunk);
+		return ret;
+	}
+
+	list_add_tail(&chunk->head, &image->chunk_list);
+	return 0;
+}
+
+int
+qxl_image_alloc_objects(struct qxl_device *qdev,
+			struct qxl_release *release,
+			struct qxl_drm_image **image_ptr,
+			int height, int stride)
+{
+	struct qxl_drm_image *image;
+	int ret;
+
+	image = kmalloc(sizeof(struct qxl_drm_image), GFP_KERNEL);
+	if (!image)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&image->chunk_list);
+
+	ret = qxl_alloc_bo_reserved(qdev, release, sizeof(struct qxl_image), &image->bo);
+	if (ret) {
+		kfree(image);
+		return ret;
+	}
+
+	ret = qxl_allocate_chunk(qdev, release, image, sizeof(struct qxl_data_chunk) + stride * height);
+	if (ret) {
+		qxl_bo_unref(&image->bo);
+		kfree(image);
+		return ret;
+	}
+	*image_ptr = image;
+	return 0;
+}
+
+void qxl_image_free_objects(struct qxl_device *qdev, struct qxl_drm_image *dimage)
+{
+	struct qxl_drm_chunk *chunk, *tmp;
+
+	list_for_each_entry_safe(chunk, tmp, &dimage->chunk_list, head) {
+		qxl_bo_unref(&chunk->bo);
+		kfree(chunk);
+	}
+
+	qxl_bo_unref(&dimage->bo);
+	kfree(dimage);
+}
+
+static int
+qxl_image_init_helper(struct qxl_device *qdev,
+		      struct qxl_release *release,
+		      struct qxl_drm_image *dimage,
+		      const uint8_t *data,
+		      int width, int height,
+		      int depth, unsigned int hash,
+		      int stride)
+{
+	struct qxl_drm_chunk *drv_chunk;
 	struct qxl_image *image;
 	struct qxl_data_chunk *chunk;
 	int i;
 	int chunk_stride;
 	int linesize = width * depth / 8;
-	struct qxl_bo *chunk_bo;
-	int ret;
+	struct qxl_bo *chunk_bo, *image_bo;
 	void *ptr;
 	/* Chunk */
 	/* FIXME: Check integer overflow */
 	/* TODO: variable number of chunks */
+
+	drv_chunk = list_first_entry(&dimage->chunk_list, struct qxl_drm_chunk, head);
+
+	chunk_bo = drv_chunk->bo;
 	chunk_stride = stride; /* TODO: should use linesize, but it renders
 				  wrong (check the bitmaps are sent correctly
 				  first) */
-	ret = qxl_alloc_bo_reserved(qdev, sizeof(*chunk) + height * chunk_stride,
-				    &chunk_bo);
-	
+
 	ptr = qxl_bo_kmap_atomic_page(qdev, chunk_bo, 0);
 	chunk = ptr;
 	chunk->data_size = height * chunk_stride;
@@ -102,7 +171,6 @@ qxl_image_create_helper(struct qxl_device *qdev,
 				while (remain > 0) {
 					page_base = out_offset & PAGE_MASK;
 					page_offset = offset_in_page(out_offset);
-					
 					size = min((int)(PAGE_SIZE - page_offset), remain);
 
 					ptr = qxl_bo_kmap_atomic_page(qdev, chunk_bo, page_base);
@@ -116,14 +184,10 @@ qxl_image_create_helper(struct qxl_device *qdev,
 			}
 		}
 	}
-
-
 	qxl_bo_kunmap(chunk_bo);
 
-	/* Image */
-	ret = qxl_alloc_bo_reserved(qdev, sizeof(*image), image_bo);
-
-	ptr = qxl_bo_kmap_atomic_page(qdev, *image_bo, 0);
+	image_bo = dimage->bo;
+	ptr = qxl_bo_kmap_atomic_page(qdev, image_bo, 0);
 	image = ptr;
 
 	image->descriptor.id = 0;
@@ -154,23 +218,20 @@ qxl_image_create_helper(struct qxl_device *qdev,
 	image->u.bitmap.stride = chunk_stride;
 	image->u.bitmap.palette = 0;
 	image->u.bitmap.data = qxl_bo_physical_address(qdev, chunk_bo, 0);
-	qxl_release_add_res(qdev, release, chunk_bo);
-	qxl_bo_unreserve(chunk_bo);
-	qxl_bo_unref(&chunk_bo);
 
-	qxl_bo_kunmap_atomic_page(qdev, *image_bo, ptr);
+	qxl_bo_kunmap_atomic_page(qdev, image_bo, ptr);
 
 	return 0;
 }
 
-int qxl_image_create(struct qxl_device *qdev,
+int qxl_image_init(struct qxl_device *qdev,
 		     struct qxl_release *release,
-		     struct qxl_bo **image_bo,
+		     struct qxl_drm_image *dimage,
 		     const uint8_t *data,
 		     int x, int y, int width, int height,
 		     int depth, int stride)
 {
 	data += y * stride + x * (depth / 8);
-	return qxl_image_create_helper(qdev, release, image_bo, data,
+	return qxl_image_init_helper(qdev, release, dimage, data,
 				       width, height, depth, 0, stride);
 }
diff --git a/drivers/gpu/drm/qxl/qxl_ioctl.c b/drivers/gpu/drm/qxl/qxl_ioctl.c
index 7448c5ea92e6..6de33563d6f1 100644
--- a/drivers/gpu/drm/qxl/qxl_ioctl.c
+++ b/drivers/gpu/drm/qxl/qxl_ioctl.c
@@ -68,55 +68,60 @@ static int qxl_map_ioctl(struct drm_device *dev, void *data,
 				  &qxl_map->offset);
 }
 
+struct qxl_reloc_info {
+	int type;
+	struct qxl_bo *dst_bo;
+	uint32_t dst_offset;
+	struct qxl_bo *src_bo;
+	int src_offset;
+};
+
 /*
  * dst must be validated, i.e. whole bo on vram/surfacesram (right now all bo's
  * are on vram).
  * *(dst + dst_off) = qxl_bo_physical_address(src, src_off)
  */
 static void
-apply_reloc(struct qxl_device *qdev, struct qxl_bo *dst, uint64_t dst_off,
-	    struct qxl_bo *src, uint64_t src_off)
+apply_reloc(struct qxl_device *qdev, struct qxl_reloc_info *info)
 {
 	void *reloc_page;
-
-	reloc_page = qxl_bo_kmap_atomic_page(qdev, dst, dst_off & PAGE_MASK);
-	*(uint64_t *)(reloc_page + (dst_off & ~PAGE_MASK)) = qxl_bo_physical_address(qdev,
-								     src, src_off);
-	qxl_bo_kunmap_atomic_page(qdev, dst, reloc_page);
+	reloc_page = qxl_bo_kmap_atomic_page(qdev, info->dst_bo, info->dst_offset & PAGE_MASK);
+	*(uint64_t *)(reloc_page + (info->dst_offset & ~PAGE_MASK)) = qxl_bo_physical_address(qdev,
+											      info->src_bo,
+											      info->src_offset);
+	qxl_bo_kunmap_atomic_page(qdev, info->dst_bo, reloc_page);
 }
 
 static void
-apply_surf_reloc(struct qxl_device *qdev, struct qxl_bo *dst, uint64_t dst_off,
-		 struct qxl_bo *src)
+apply_surf_reloc(struct qxl_device *qdev, struct qxl_reloc_info *info)
 {
 	uint32_t id = 0;
 	void *reloc_page;
 
-	if (src && !src->is_primary)
-		id = src->surface_id;
+	if (info->src_bo && !info->src_bo->is_primary)
+		id = info->src_bo->surface_id;
 
-	reloc_page = qxl_bo_kmap_atomic_page(qdev, dst, dst_off & PAGE_MASK);
-	*(uint32_t *)(reloc_page + (dst_off & ~PAGE_MASK)) = id;
-	qxl_bo_kunmap_atomic_page(qdev, dst, reloc_page);
+	reloc_page = qxl_bo_kmap_atomic_page(qdev, info->dst_bo, info->dst_offset & PAGE_MASK);
+	*(uint32_t *)(reloc_page + (info->dst_offset & ~PAGE_MASK)) = id;
+	qxl_bo_kunmap_atomic_page(qdev, info->dst_bo, reloc_page);
 }
 
 /* return holding the reference to this object */
 static struct qxl_bo *qxlhw_handle_to_bo(struct qxl_device *qdev,
 					 struct drm_file *file_priv, uint64_t handle,
-					 struct qxl_reloc_list *reloc_list)
+					 struct qxl_release *release)
 {
 	struct drm_gem_object *gobj;
 	struct qxl_bo *qobj;
 	int ret;
 
 	gobj = drm_gem_object_lookup(qdev->ddev, file_priv, handle);
-	if (!gobj) {
-		DRM_ERROR("bad bo handle %lld\n", handle);
+	if (!gobj)
 		return NULL;
-	}
+
 	qobj = gem_to_qxl_bo(gobj);
 
-	ret = qxl_bo_list_add(reloc_list, qobj);
+	ret = qxl_release_list_add(release, qobj);
 	if (ret)
 		return NULL;
 
@@ -129,6 +134,155 @@ static struct qxl_bo *qxlhw_handle_to_bo(struct qxl_device *qdev,
  * However, the command as passed from user space must *not* contain the initial
  * QXLReleaseInfo struct (first XXX bytes)
  */
+static int qxl_process_single_command(struct qxl_device *qdev,
+				      struct drm_qxl_command *cmd,
+				      struct drm_file *file_priv)
+{
+	struct qxl_reloc_info *reloc_info;
+	int release_type;
+	struct qxl_release *release;
+	struct qxl_bo *cmd_bo;
+	void *fb_cmd;
+	int i, j, ret, num_relocs;
+	int unwritten;
+
+	switch (cmd->type) {
+	case QXL_CMD_DRAW:
+		release_type = QXL_RELEASE_DRAWABLE;
+		break;
+	case QXL_CMD_SURFACE:
+	case QXL_CMD_CURSOR:
+	default:
+		DRM_DEBUG("Only draw commands in execbuffers\n");
+		return -EINVAL;
+		break;
+	}
+
+	if (cmd->command_size > PAGE_SIZE - sizeof(union qxl_release_info))
+		return -EINVAL;
+
+	if (!access_ok(VERIFY_READ,
+		       (void *)(unsigned long)cmd->command,
+		       cmd->command_size))
+		return -EFAULT;
+
+	reloc_info = kmalloc(sizeof(struct qxl_reloc_info) * cmd->relocs_num, GFP_KERNEL);
+	if (!reloc_info)
+		return -ENOMEM;
+
+	ret = qxl_alloc_release_reserved(qdev,
+					 sizeof(union qxl_release_info) +
+					 cmd->command_size,
+					 release_type,
+					 &release,
+					 &cmd_bo);
+	if (ret)
+		goto out_free_reloc;
+
+	/* TODO copy slow path code from i915 */
+	fb_cmd = qxl_bo_kmap_atomic_page(qdev, cmd_bo, (release->release_offset & PAGE_SIZE));
+	unwritten = __copy_from_user_inatomic_nocache(fb_cmd + sizeof(union qxl_release_info) + (release->release_offset & ~PAGE_SIZE), (void *)(unsigned long)cmd->command, cmd->command_size);
+
+	{
+		struct qxl_drawable *draw = fb_cmd;
+		draw->mm_time = qdev->rom->mm_clock;
+	}
+
+	qxl_bo_kunmap_atomic_page(qdev, cmd_bo, fb_cmd);
+	if (unwritten) {
+		DRM_ERROR("got unwritten %d\n", unwritten);
+		ret = -EFAULT;
+		goto out_free_release;
+	}
+
+	/* fill out reloc info structs */
+	num_relocs = 0;
+	for (i = 0; i < cmd->relocs_num; ++i) {
+		struct drm_qxl_reloc reloc;
+
+		if (DRM_COPY_FROM_USER(&reloc,
+				       &((struct drm_qxl_reloc *)(uintptr_t)cmd->relocs)[i],
+				       sizeof(reloc))) {
+			ret = -EFAULT;
+			goto out_free_bos;
+		}
+
+		/* add the bos to the list of bos to validate -
+		   need to validate first then process relocs? */
+		if (reloc.reloc_type != QXL_RELOC_TYPE_BO && reloc.reloc_type != QXL_RELOC_TYPE_SURF) {
+			DRM_DEBUG("unknown reloc type %d\n", reloc_info[i].type);
+
+			ret = -EINVAL;
+			goto out_free_bos;
+		}
+		reloc_info[i].type = reloc.reloc_type;
+
+		if (reloc.dst_handle) {
+			reloc_info[i].dst_bo = qxlhw_handle_to_bo(qdev, file_priv,
+								  reloc.dst_handle, release);
+			if (!reloc_info[i].dst_bo) {
+				ret = -EINVAL;
+				reloc_info[i].src_bo = NULL;
+				goto out_free_bos;
+			}
+			reloc_info[i].dst_offset = reloc.dst_offset;
+		} else {
+			reloc_info[i].dst_bo = cmd_bo;
+			reloc_info[i].dst_offset = reloc.dst_offset + release->release_offset;
+		}
+		num_relocs++;
+
+		/* reserve and validate the reloc dst bo */
+		if (reloc.reloc_type == QXL_RELOC_TYPE_BO || reloc.src_handle > 0) {
+			reloc_info[i].src_bo =
+				qxlhw_handle_to_bo(qdev, file_priv,
+						   reloc.src_handle, release);
+			if (!reloc_info[i].src_bo) {
+				if (reloc_info[i].dst_bo != cmd_bo)
+					drm_gem_object_unreference_unlocked(&reloc_info[i].dst_bo->gem_base);
+				ret = -EINVAL;
+				goto out_free_bos;
+			}
+			reloc_info[i].src_offset = reloc.src_offset;
+		} else {
+			reloc_info[i].src_bo = NULL;
+			reloc_info[i].src_offset = 0;
+		}
+	}
+
+	/* validate all buffers */
+	ret = qxl_release_reserve_list(release, false);
+	if (ret)
+		goto out_free_bos;
+
+	for (i = 0; i < cmd->relocs_num; ++i) {
+		if (reloc_info[i].type == QXL_RELOC_TYPE_BO)
+			apply_reloc(qdev, &reloc_info[i]);
+		else if (reloc_info[i].type == QXL_RELOC_TYPE_SURF)
+			apply_surf_reloc(qdev, &reloc_info[i]);
+	}
+
+	ret = qxl_push_command_ring_release(qdev, release, cmd->type, true);
+	if (ret)
+		qxl_release_backoff_reserve_list(release);
+	else
+		qxl_release_fence_buffer_objects(release);
+
+out_free_bos:
+	for (j = 0; j < num_relocs; j++) {
+		if (reloc_info[j].dst_bo != cmd_bo)
+			drm_gem_object_unreference_unlocked(&reloc_info[j].dst_bo->gem_base);
+		if (reloc_info[j].src_bo && reloc_info[j].src_bo != cmd_bo)
+			drm_gem_object_unreference_unlocked(&reloc_info[j].src_bo->gem_base);
+	}
+out_free_release:
+	if (ret)
+		qxl_release_free(qdev, release);
+out_free_reloc:
+	kfree(reloc_info);
+	return ret;
+}
+
 static int qxl_execbuffer_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *file_priv)
 {
@@ -136,144 +290,21 @@ static int qxl_execbuffer_ioctl(struct drm_device *dev, void *data,
 	struct drm_qxl_execbuffer *execbuffer = data;
 	struct drm_qxl_command user_cmd;
 	int cmd_num;
-	struct qxl_bo *reloc_src_bo;
-	struct qxl_bo *reloc_dst_bo;
-	struct drm_qxl_reloc reloc;
-	void *fb_cmd;
-	int i, ret;
-	struct qxl_reloc_list reloc_list;
-	int unwritten;
-	uint32_t reloc_dst_offset;
-	INIT_LIST_HEAD(&reloc_list.bos);
+	int ret;
 
 	for (cmd_num = 0; cmd_num < execbuffer->commands_num; ++cmd_num) {
-		struct qxl_release *release;
-		struct qxl_bo *cmd_bo;
-		int release_type;
+
 		struct drm_qxl_command *commands =
 			(struct drm_qxl_command *)(uintptr_t)execbuffer->commands;
 
 		if (DRM_COPY_FROM_USER(&user_cmd, &commands[cmd_num],
 				       sizeof(user_cmd)))
 			return -EFAULT;
-		switch (user_cmd.type) {
-		case QXL_CMD_DRAW:
-			release_type = QXL_RELEASE_DRAWABLE;
-			break;
-		case QXL_CMD_SURFACE:
-		case QXL_CMD_CURSOR:
-		default:
-			DRM_DEBUG("Only draw commands in execbuffers\n");
-			return -EINVAL;
-			break;
-		}
 
-		if (user_cmd.command_size > PAGE_SIZE - sizeof(union qxl_release_info))
-			return -EINVAL;
-
-		if (!access_ok(VERIFY_READ,
-			       (void *)(unsigned long)user_cmd.command,
-			       user_cmd.command_size))
-			return -EFAULT;
-
-		ret = qxl_alloc_release_reserved(qdev,
-						 sizeof(union qxl_release_info) +
-						 user_cmd.command_size,
-						 release_type,
-						 &release,
-						 &cmd_bo);
+		ret = qxl_process_single_command(qdev, &user_cmd, file_priv);
 		if (ret)
 			return ret;
-
-		/* TODO copy slow path code from i915 */
-		fb_cmd = qxl_bo_kmap_atomic_page(qdev, cmd_bo, (release->release_offset & PAGE_SIZE));
-		unwritten = __copy_from_user_inatomic_nocache(fb_cmd + sizeof(union qxl_release_info) + (release->release_offset & ~PAGE_SIZE), (void *)(unsigned long)user_cmd.command, user_cmd.command_size);
-
-		{
-			struct qxl_drawable *draw = fb_cmd;
-
-			draw->mm_time = qdev->rom->mm_clock;
-		}
-		qxl_bo_kunmap_atomic_page(qdev, cmd_bo, fb_cmd);
-		if (unwritten) {
-			DRM_ERROR("got unwritten %d\n", unwritten);
-			qxl_release_unreserve(qdev, release);
-			qxl_release_free(qdev, release);
-			return -EFAULT;
-		}
-
-		for (i = 0 ; i < user_cmd.relocs_num; ++i) {
-			if (DRM_COPY_FROM_USER(&reloc,
-					       &((struct drm_qxl_reloc *)(uintptr_t)user_cmd.relocs)[i],
-					       sizeof(reloc))) {
-				qxl_bo_list_unreserve(&reloc_list, true);
-				qxl_release_unreserve(qdev, release);
-				qxl_release_free(qdev, release);
-				return -EFAULT;
-			}
-
-			/* add the bos to the list of bos to validate -
-			   need to validate first then process relocs? */
-			if (reloc.dst_handle) {
-				reloc_dst_bo = qxlhw_handle_to_bo(qdev, file_priv,
-								  reloc.dst_handle, &reloc_list);
-				if (!reloc_dst_bo) {
-					qxl_bo_list_unreserve(&reloc_list, true);
-					qxl_release_unreserve(qdev, release);
-					qxl_release_free(qdev, release);
-					return -EINVAL;
-				}
-				reloc_dst_offset = 0;
-			} else {
-				reloc_dst_bo = cmd_bo;
-				reloc_dst_offset = release->release_offset;
-			}
-
-			/* reserve and validate the reloc dst bo */
-			if (reloc.reloc_type == QXL_RELOC_TYPE_BO || reloc.src_handle > 0) {
-				reloc_src_bo =
-					qxlhw_handle_to_bo(qdev, file_priv,
-							   reloc.src_handle, &reloc_list);
-				if (!reloc_src_bo) {
-					if (reloc_dst_bo != cmd_bo)
-						drm_gem_object_unreference_unlocked(&reloc_dst_bo->gem_base);
-					qxl_bo_list_unreserve(&reloc_list, true);
-					qxl_release_unreserve(qdev, release);
-					qxl_release_free(qdev, release);
-					return -EINVAL;
-				}
-			} else
-				reloc_src_bo = NULL;
-			if (reloc.reloc_type == QXL_RELOC_TYPE_BO) {
-				apply_reloc(qdev, reloc_dst_bo, reloc_dst_offset + reloc.dst_offset,
-					    reloc_src_bo, reloc.src_offset);
-			} else if (reloc.reloc_type == QXL_RELOC_TYPE_SURF) {
-				apply_surf_reloc(qdev, reloc_dst_bo, reloc_dst_offset + reloc.dst_offset, reloc_src_bo);
-			} else {
-				DRM_ERROR("unknown reloc type %d\n", reloc.reloc_type);
-				return -EINVAL;
-			}
-
-			if (reloc_src_bo && reloc_src_bo != cmd_bo) {
-				qxl_release_add_res(qdev, release, reloc_src_bo);
-				drm_gem_object_unreference_unlocked(&reloc_src_bo->gem_base);
-			}
-
-			if (reloc_dst_bo != cmd_bo)
-				drm_gem_object_unreference_unlocked(&reloc_dst_bo->gem_base);
-		}
-		qxl_fence_releaseable(qdev, release);
-
-		ret = qxl_push_command_ring_release(qdev, release, user_cmd.type, true);
-		if (ret == -ERESTARTSYS) {
-			qxl_release_unreserve(qdev, release);
-			qxl_release_free(qdev, release);
-			qxl_bo_list_unreserve(&reloc_list, true);
-			return ret;
-		}
-		qxl_release_unreserve(qdev, release);
 	}
-	qxl_bo_list_unreserve(&reloc_list, 0);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/qxl/qxl_object.c b/drivers/gpu/drm/qxl/qxl_object.c
index 50e7a6177167..aa161cddd87e 100644
--- a/drivers/gpu/drm/qxl/qxl_object.c
+++ b/drivers/gpu/drm/qxl/qxl_object.c
@@ -104,7 +104,7 @@ int qxl_bo_create(struct qxl_device *qdev,
 	bo->surface_id = 0;
 	qxl_fence_init(qdev, &bo->fence);
 	INIT_LIST_HEAD(&bo->list);
-	atomic_set(&bo->reserve_count, 0);
+
 	if (surf)
 		bo->surf = *surf;
 
@@ -316,53 +316,6 @@ int qxl_bo_check_id(struct qxl_device *qdev, struct qxl_bo *bo)
 	return 0;
 }
 
-void qxl_bo_list_unreserve(struct qxl_reloc_list *reloc_list, bool failed)
-{
-	struct qxl_bo_list *entry, *sf;
-
-	list_for_each_entry_safe(entry, sf, &reloc_list->bos, lhead) {
-		qxl_bo_unreserve(entry->bo);
-		list_del(&entry->lhead);
-		kfree(entry);
-	}
-}
-
-int qxl_bo_list_add(struct qxl_reloc_list *reloc_list, struct qxl_bo *bo)
-{
-	struct qxl_bo_list *entry;
-	int ret;
-
-	list_for_each_entry(entry, &reloc_list->bos, lhead) {
-		if (entry->bo == bo)
-			return 0;
-	}
-
-	entry = kmalloc(sizeof(struct qxl_bo_list), GFP_KERNEL);
-	if (!entry)
-		return -ENOMEM;
-
-	entry->bo = bo;
-	list_add(&entry->lhead, &reloc_list->bos);
-
-	ret = qxl_bo_reserve(bo, false);
-	if (ret)
-		return ret;
-
-	if (!bo->pin_count) {
-		qxl_ttm_placement_from_domain(bo, bo->type, false);
-		ret = ttm_bo_validate(&bo->tbo, &bo->placement,
-				      true, false);
-		if (ret)
-			return ret;
-	}
-
-	/* allocate a surface for reserved + validated buffers */
-	ret = qxl_bo_check_id(bo->gem_base.dev->dev_private, bo);
-	if (ret)
-		return ret;
-	return 0;
-}
-
 int qxl_surf_evict(struct qxl_device *qdev)
 {
 	return ttm_bo_evict_mm(&qdev->mman.bdev, TTM_PL_PRIV0);
diff --git a/drivers/gpu/drm/qxl/qxl_object.h b/drivers/gpu/drm/qxl/qxl_object.h
index 116637f09347..8cb6167038e5 100644
--- a/drivers/gpu/drm/qxl/qxl_object.h
+++ b/drivers/gpu/drm/qxl/qxl_object.h
@@ -102,6 +102,4 @@ extern int qxl_bo_unpin(struct qxl_bo *bo);
 extern void qxl_ttm_placement_from_domain(struct qxl_bo *qbo, u32 domain, bool pinned);
 extern bool qxl_ttm_bo_is_qxl_bo(struct ttm_buffer_object *bo);
 
-extern int qxl_bo_list_add(struct qxl_reloc_list *reloc_list, struct qxl_bo *bo);
-extern void qxl_bo_list_unreserve(struct qxl_reloc_list *reloc_list, bool failed);
 #endif
diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
index b7f1271c6014..b61449e52cd5 100644
--- a/drivers/gpu/drm/qxl/qxl_release.c
+++ b/drivers/gpu/drm/qxl/qxl_release.c
@@ -38,7 +38,8 @@
 
 static const int release_size_per_bo[] = { RELEASE_SIZE, SURFACE_RELEASE_SIZE, RELEASE_SIZE };
 static const int releases_per_bo[] = { RELEASES_PER_BO, SURFACE_RELEASES_PER_BO, RELEASES_PER_BO };
-uint64_t
+
+static uint64_t
 qxl_release_alloc(struct qxl_device *qdev, int type,
 		  struct qxl_release **ret)
 {
@@ -53,9 +54,9 @@ qxl_release_alloc(struct qxl_device *qdev, int type,
 		return 0;
 	}
 	release->type = type;
-	release->bo_count = 0;
 	release->release_offset = 0;
 	release->surface_release_id = 0;
+	INIT_LIST_HEAD(&release->bos);
 
 	idr_preload(GFP_KERNEL);
 	spin_lock(&qdev->release_idr_lock);
@@ -77,20 +78,20 @@ void
 qxl_release_free(struct qxl_device *qdev,
 		 struct qxl_release *release)
 {
-	int i;
-
-	QXL_INFO(qdev, "release %d, type %d, %d bos\n", release->id,
-		 release->type, release->bo_count);
+	struct qxl_bo_list *entry, *tmp;
+	QXL_INFO(qdev, "release %d, type %d\n", release->id,
+		 release->type);
 
 	if (release->surface_release_id)
 		qxl_surface_id_dealloc(qdev, release->surface_release_id);
 
-	for (i = 0 ; i < release->bo_count; ++i) {
+	list_for_each_entry_safe(entry, tmp, &release->bos, tv.head) {
+		struct qxl_bo *bo = to_qxl_bo(entry->tv.bo);
 		QXL_INFO(qdev, "release %llx\n",
-			release->bos[i]->tbo.addr_space_offset
+			entry->tv.bo->addr_space_offset
 						- DRM_FILE_OFFSET);
-		qxl_fence_remove_release(&release->bos[i]->fence, release->id);
-		qxl_bo_unref(&release->bos[i]);
+		qxl_fence_remove_release(&bo->fence, release->id);
+		qxl_bo_unref(&bo);
 	}
 	spin_lock(&qdev->release_idr_lock);
 	idr_remove(&qdev->release_idr, release->id);
@@ -98,22 +99,6 @@ qxl_release_free(struct qxl_device *qdev,
 	kfree(release);
 }
 
-void
-qxl_release_add_res(struct qxl_device *qdev, struct qxl_release *release,
-		    struct qxl_bo *bo)
-{
-	int i;
-	for (i = 0; i < release->bo_count; i++)
-		if (release->bos[i] == bo)
-			return;
-
-	if (release->bo_count >= QXL_MAX_RES) {
-		DRM_ERROR("exceeded max resource on a qxl_release item\n");
-		return;
-	}
-	release->bos[release->bo_count++] = qxl_bo_ref(bo);
-}
-
 static int qxl_release_bo_alloc(struct qxl_device *qdev,
 				struct qxl_bo **bo)
 {
@@ -125,58 +110,106 @@ static int qxl_release_bo_alloc(struct qxl_device *qdev,
 	return ret;
 }
 
-int qxl_release_reserve(struct qxl_device *qdev,
-			struct qxl_release *release, bool no_wait)
+int qxl_release_list_add(struct qxl_release *release, struct qxl_bo *bo)
+{
+	struct qxl_bo_list *entry;
+
+	list_for_each_entry(entry, &release->bos, tv.head) {
+		if (entry->tv.bo == &bo->tbo)
+			return 0;
+	}
+
+	entry = kmalloc(sizeof(struct qxl_bo_list), GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+
+	qxl_bo_ref(bo);
+	entry->tv.bo = &bo->tbo;
+	list_add_tail(&entry->tv.head, &release->bos);
+	return 0;
+}
+
+static int qxl_release_validate_bo(struct qxl_bo *bo)
 {
 	int ret;
-	if (atomic_inc_return(&release->bos[0]->reserve_count) == 1) {
-		ret = qxl_bo_reserve(release->bos[0], no_wait);
+
+	if (!bo->pin_count) {
+		qxl_ttm_placement_from_domain(bo, bo->type, false);
+		ret = ttm_bo_validate(&bo->tbo, &bo->placement,
+				      true, false);
 		if (ret)
 			return ret;
 	}
+
+	/* allocate a surface for reserved + validated buffers */
+	ret = qxl_bo_check_id(bo->gem_base.dev->dev_private, bo);
+	if (ret)
+		return ret;
+	return 0;
+}
+
+int qxl_release_reserve_list(struct qxl_release *release, bool no_intr)
+{
+	int ret;
+	struct qxl_bo_list *entry;
+
+	/* if only one object on the release its the release itself
+	   since these objects are pinned no need to reserve */
+	if (list_is_singular(&release->bos))
+		return 0;
+
+	ret = ttm_eu_reserve_buffers(&release->ticket, &release->bos);
+	if (ret)
+		return ret;
+
+	list_for_each_entry(entry, &release->bos, tv.head) {
+		struct qxl_bo *bo = to_qxl_bo(entry->tv.bo);
+
+		ret = qxl_release_validate_bo(bo);
+		if (ret) {
+			ttm_eu_backoff_reservation(&release->ticket, &release->bos);
+			return ret;
+		}
+	}
 	return 0;
 }
 
-void qxl_release_unreserve(struct qxl_device *qdev,
-			  struct qxl_release *release)
+void qxl_release_backoff_reserve_list(struct qxl_release *release)
 {
-	if (atomic_dec_and_test(&release->bos[0]->reserve_count))
-		qxl_bo_unreserve(release->bos[0]);
+	/* if only one object on the release its the release itself
+	   since these objects are pinned no need to reserve */
+	if (list_is_singular(&release->bos))
+		return;
+
+	ttm_eu_backoff_reservation(&release->ticket, &release->bos);
 }
 
+
 int qxl_alloc_surface_release_reserved(struct qxl_device *qdev,
 				       enum qxl_surface_cmd_type surface_cmd_type,
 				       struct qxl_release *create_rel,
 				       struct qxl_release **release)
 {
-	int ret;
-
 	if (surface_cmd_type == QXL_SURFACE_CMD_DESTROY && create_rel) {
 		int idr_ret;
+		struct qxl_bo_list *entry = list_first_entry(&create_rel->bos, struct qxl_bo_list, tv.head);
 		struct qxl_bo *bo;
 		union qxl_release_info *info;
 
 		/* stash the release after the create command */
 		idr_ret = qxl_release_alloc(qdev, QXL_RELEASE_SURFACE_CMD, release);
-		bo = qxl_bo_ref(create_rel->bos[0]);
+		bo = qxl_bo_ref(to_qxl_bo(entry->tv.bo));
 
 		(*release)->release_offset = create_rel->release_offset + 64;
 
-		qxl_release_add_res(qdev, *release, bo);
+		qxl_release_list_add(*release, bo);
 
-		ret = qxl_release_reserve(qdev, *release, false);
-		if (ret) {
-			DRM_ERROR("release reserve failed\n");
-			goto out_unref;
-		}
 		info = qxl_release_map(qdev, *release);
 		info->id = idr_ret;
 		qxl_release_unmap(qdev, *release, info);
 
-
-out_unref:
 		qxl_bo_unref(&bo);
-		return ret;
+		return 0;
 	}
 
 	return qxl_alloc_release_reserved(qdev, sizeof(struct qxl_surface_cmd),
@@ -189,7 +222,7 @@ int qxl_alloc_release_reserved(struct qxl_device *qdev, unsigned long size,
 {
 	struct qxl_bo *bo;
 	int idr_ret;
-	int ret;
+	int ret = 0;
 	union qxl_release_info *info;
 	int cur_idx;
 
@@ -228,36 +261,18 @@ int qxl_alloc_release_reserved(struct qxl_device *qdev, unsigned long size,
 	if (rbo)
 		*rbo = bo;
 
-	qxl_release_add_res(qdev, *release, bo);
-
-	ret = qxl_release_reserve(qdev, *release, false);
 	mutex_unlock(&qdev->release_mutex);
-	if (ret)
-		goto out_unref;
+
+	qxl_release_list_add(*release, bo);
 
 	info = qxl_release_map(qdev, *release);
 	info->id = idr_ret;
 	qxl_release_unmap(qdev, *release, info);
 
-out_unref:
 	qxl_bo_unref(&bo);
 	return ret;
 }
 
-int qxl_fence_releaseable(struct qxl_device *qdev,
-			  struct qxl_release *release)
-{
-	int i, ret;
-	for (i = 0; i < release->bo_count; i++) {
-		if (!release->bos[i]->tbo.sync_obj)
-			release->bos[i]->tbo.sync_obj = &release->bos[i]->fence;
-		ret = qxl_fence_add_release(&release->bos[i]->fence, release->id);
-		if (ret)
-			return ret;
-	}
-	return 0;
-}
-
 struct qxl_release *qxl_release_from_id_locked(struct qxl_device *qdev,
 						   uint64_t id)
 {
@@ -270,10 +285,7 @@ struct qxl_release *qxl_release_from_id_locked(struct qxl_device *qdev,
 		DRM_ERROR("failed to find id in release_idr\n");
 		return NULL;
 	}
-	if (release->bo_count < 1) {
-		DRM_ERROR("read a released resource with 0 bos\n");
-		return NULL;
-	}
+
 	return release;
 }
 
@@ -282,9 +294,12 @@ union qxl_release_info *qxl_release_map(struct qxl_device *qdev,
 {
 	void *ptr;
 	union qxl_release_info *info;
-	struct qxl_bo *bo = release->bos[0];
+	struct qxl_bo_list *entry = list_first_entry(&release->bos, struct qxl_bo_list, tv.head);
+	struct qxl_bo *bo = to_qxl_bo(entry->tv.bo);
 
 	ptr = qxl_bo_kmap_atomic_page(qdev, bo, release->release_offset & PAGE_SIZE);
+	if (!ptr)
+		return NULL;
 	info = ptr + (release->release_offset & ~PAGE_SIZE);
 	return info;
 }
@@ -293,9 +308,51 @@ void qxl_release_unmap(struct qxl_device *qdev,
 		       struct qxl_release *release,
 		       union qxl_release_info *info)
 {
-	struct qxl_bo *bo = release->bos[0];
+	struct qxl_bo_list *entry = list_first_entry(&release->bos, struct qxl_bo_list, tv.head);
+	struct qxl_bo *bo = to_qxl_bo(entry->tv.bo);
 	void *ptr;
 
 	ptr = ((void *)info) - (release->release_offset & ~PAGE_SIZE);
 	qxl_bo_kunmap_atomic_page(qdev, bo, ptr);
 }
+
+void qxl_release_fence_buffer_objects(struct qxl_release *release)
+{
+	struct ttm_validate_buffer *entry;
+	struct ttm_buffer_object *bo;
+	struct ttm_bo_global *glob;
+	struct ttm_bo_device *bdev;
+	struct ttm_bo_driver *driver;
+	struct qxl_bo *qbo;
+
+	/* if only one object on the release its the release itself
+	   since these objects are pinned no need to reserve */
+	if (list_is_singular(&release->bos))
+		return;
+
+	bo = list_first_entry(&release->bos, struct ttm_validate_buffer, head)->bo;
+	bdev = bo->bdev;
+	driver = bdev->driver;
+	glob = bo->glob;
+
+	spin_lock(&glob->lru_lock);
+	spin_lock(&bdev->fence_lock);
+
+	list_for_each_entry(entry, &release->bos, head) {
+		bo = entry->bo;
+		qbo = to_qxl_bo(bo);
+
+		if (!entry->bo->sync_obj)
+			entry->bo->sync_obj = &qbo->fence;
+
+		qxl_fence_add_release_locked(&qbo->fence, release->id);
+
+		ttm_bo_add_to_lru(bo);
+		ww_mutex_unlock(&bo->resv->lock);
+		entry->reserved = false;
+	}
+	spin_unlock(&bdev->fence_lock);
+	spin_unlock(&glob->lru_lock);
+	ww_acquire_fini(&release->ticket);
+}
+

From 9310f61a94ec779b2919864c7e66241ff80fe96d Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Wed, 17 Jul 2013 15:13:14 +0300
Subject: [PATCH 237/320] mei: hbm: fix typo in error message

writet -> write

Tested-by: Shuah Khan <shuah.kh@samsung.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mei/hbm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/mei/hbm.c b/drivers/misc/mei/hbm.c
index f9296abcf02a..6127ab64bb39 100644
--- a/drivers/misc/mei/hbm.c
+++ b/drivers/misc/mei/hbm.c
@@ -167,7 +167,7 @@ int mei_hbm_start_req(struct mei_device *dev)
 
 	dev->hbm_state = MEI_HBM_IDLE;
 	if (mei_write_message(dev, mei_hdr, dev->wr_msg.data)) {
-		dev_err(&dev->pdev->dev, "version message writet failed\n");
+		dev_err(&dev->pdev->dev, "version message write failed\n");
 		dev->dev_state = MEI_DEV_RESETTING;
 		mei_reset(dev, 1);
 		return -ENODEV;

From 315a383ad7dbd484fafb93ef08038e3dbafbb7a8 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Wed, 17 Jul 2013 15:13:15 +0300
Subject: [PATCH 238/320] mei: me: fix reset state machine

ME HW ready bit is down after hw reset was asserted or on error.
Only on error we need to enter the reset flow, additional reset
need to be prevented when reset was triggered during
initialization , power up/down or a reset is already in progress

Tested-by: Shuah Khan <shuah.kh@samsung.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mei/hw-me.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c
index e4f8dec4dc3c..a0e19e61e7df 100644
--- a/drivers/misc/mei/hw-me.c
+++ b/drivers/misc/mei/hw-me.c
@@ -483,7 +483,9 @@ irqreturn_t mei_me_irq_thread_handler(int irq, void *dev_id)
 	/* check if ME wants a reset */
 	if (!mei_hw_is_ready(dev) &&
 	    dev->dev_state != MEI_DEV_RESETTING &&
-	    dev->dev_state != MEI_DEV_INITIALIZING) {
+	    dev->dev_state != MEI_DEV_INITIALIZING &&
+	    dev->dev_state != MEI_DEV_POWER_DOWN &&
+	    dev->dev_state != MEI_DEV_POWER_UP) {
 		dev_dbg(&dev->pdev->dev, "FW not ready.\n");
 		mei_reset(dev, 1);
 		mutex_unlock(&dev->device_lock);

From 99f22c4ef24cf87b0dae6aabe6b5e620b62961d9 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Wed, 17 Jul 2013 15:13:16 +0300
Subject: [PATCH 239/320] mei: don't have to clean the state on power up

When powering up, we don't have to clean up the device state
nothing is connected.

Tested-by: Shuah Khan <shuah.kh@samsung.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mei/init.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c
index ed1d75203af6..e6f16f83ecde 100644
--- a/drivers/misc/mei/init.c
+++ b/drivers/misc/mei/init.c
@@ -148,7 +148,8 @@ void mei_reset(struct mei_device *dev, int interrupts_enabled)
 
 	dev->hbm_state = MEI_HBM_IDLE;
 
-	if (dev->dev_state != MEI_DEV_INITIALIZING) {
+	if (dev->dev_state != MEI_DEV_INITIALIZING &&
+	    dev->dev_state != MEI_DEV_POWER_UP) {
 		if (dev->dev_state != MEI_DEV_DISABLED &&
 		    dev->dev_state != MEI_DEV_POWER_DOWN)
 			dev->dev_state = MEI_DEV_RESETTING;

From dab9bf41b23fe700c4a74133e41eb6a21706031e Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Wed, 17 Jul 2013 15:13:17 +0300
Subject: [PATCH 240/320] mei: me: fix waiting for hw ready

1. MEI_INTEROP_TIMEOUT is in seconds not in jiffies
so we use mei_secs_to_jiffies macro
While cold boot is fast this is relevant in resume
2. wait_event_interruptible_timeout can return with
-ERESTARTSYS so do not override it with -ETIMEDOUT
3.Adjust error message

Tested-by: Shuah Khan <shuah.kh@samsung.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mei/hw-me.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c
index a0e19e61e7df..b22c7e247225 100644
--- a/drivers/misc/mei/hw-me.c
+++ b/drivers/misc/mei/hw-me.c
@@ -239,14 +239,18 @@ static int mei_me_hw_ready_wait(struct mei_device *dev)
 	if (mei_me_hw_is_ready(dev))
 		return 0;
 
+	dev->recvd_hw_ready = false;
 	mutex_unlock(&dev->device_lock);
 	err = wait_event_interruptible_timeout(dev->wait_hw_ready,
-			dev->recvd_hw_ready, MEI_INTEROP_TIMEOUT);
+			dev->recvd_hw_ready,
+			mei_secs_to_jiffies(MEI_INTEROP_TIMEOUT));
 	mutex_lock(&dev->device_lock);
 	if (!err && !dev->recvd_hw_ready) {
+		if (!err)
+			err = -ETIMEDOUT;
 		dev_err(&dev->pdev->dev,
-			"wait hw ready failed. status = 0x%x\n", err);
-		return -ETIMEDOUT;
+			"wait hw ready failed. status = %d\n", err);
+		return err;
 	}
 
 	dev->recvd_hw_ready = false;

From 33959f88fce9b8d3346d8000b3425814cbc6d6c0 Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Thu, 18 Jul 2013 11:31:51 +1000
Subject: [PATCH 241/320] powerpc: Add second POWER8 PVR entry

POWER8 comes with two different PVRs.  This patch enables the additional
PVR in the cputable.

The existing entry (PVR=0x4b) is renamed to POWER8E and the new entry
(PVR=0x4d) is given POWER8.

Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/reg.h  |  3 ++-
 arch/powerpc/kernel/cputable.c  | 20 +++++++++++++++++++-
 arch/powerpc/kernel/prom_init.c |  5 +++--
 3 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 5d7d9c2a5473..a6840e4e24f7 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1088,7 +1088,8 @@
 #define PVR_970MP	0x0044
 #define PVR_970GX	0x0045
 #define PVR_POWER7p	0x004A
-#define PVR_POWER8	0x004B
+#define PVR_POWER8E	0x004B
+#define PVR_POWER8	0x004D
 #define PVR_BE		0x0070
 #define PVR_PA6T	0x0090
 
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 2a45d0f04385..22973a74df73 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -494,9 +494,27 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_restore		= __restore_cpu_power7,
 		.platform		= "power7+",
 	},
-	{	/* Power8 */
+	{	/* Power8E */
 		.pvr_mask		= 0xffff0000,
 		.pvr_value		= 0x004b0000,
+		.cpu_name		= "POWER8E (raw)",
+		.cpu_features		= CPU_FTRS_POWER8,
+		.cpu_user_features	= COMMON_USER_POWER8,
+		.cpu_user_features2	= COMMON_USER2_POWER8,
+		.mmu_features		= MMU_FTRS_POWER8,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_IBM,
+		.oprofile_cpu_type	= "ppc64/power8",
+		.oprofile_type		= PPC_OPROFILE_INVALID,
+		.cpu_setup		= __setup_cpu_power8,
+		.cpu_restore		= __restore_cpu_power8,
+		.platform		= "power8",
+	},
+	{	/* Power8 */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x004d0000,
 		.cpu_name		= "POWER8 (raw)",
 		.cpu_features		= CPU_FTRS_POWER8,
 		.cpu_user_features	= COMMON_USER_POWER8,
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 5eccda9fd33f..607902424e73 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -644,7 +644,8 @@ unsigned char ibm_architecture_vec[] = {
 	W(0xfffe0000), W(0x003a0000),	/* POWER5/POWER5+ */
 	W(0xffff0000), W(0x003e0000),	/* POWER6 */
 	W(0xffff0000), W(0x003f0000),	/* POWER7 */
-	W(0xffff0000), W(0x004b0000),	/* POWER8 */
+	W(0xffff0000), W(0x004b0000),	/* POWER8E */
+	W(0xffff0000), W(0x004d0000),	/* POWER8 */
 	W(0xffffffff), W(0x0f000004),	/* all 2.07-compliant */
 	W(0xffffffff), W(0x0f000003),	/* all 2.06-compliant */
 	W(0xffffffff), W(0x0f000002),	/* all 2.05-compliant */
@@ -706,7 +707,7 @@ unsigned char ibm_architecture_vec[] = {
 	 * must match by the macro below. Update the definition if
 	 * the structure layout changes.
 	 */
-#define IBM_ARCH_VEC_NRCORES_OFFSET	117
+#define IBM_ARCH_VEC_NRCORES_OFFSET	125
 	W(NR_CPUS),			/* number of cores supported */
 	0,
 	0,

From 0e0ed6406e61434d3f38fb58aa8464ec4722b77e Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 15 Jul 2013 14:04:50 +1000
Subject: [PATCH 242/320] powerpc/modules: Module CRC relocation fix causes
 perf issues

Module CRCs are implemented as absolute symbols that get resolved by
a linker script. We build an intermediate .o that contains an
unresolved symbol for each CRC. genksysms parses this .o, calculates
the CRCs and writes a linker script that "resolves" the symbols to
the calculated CRC.

Unfortunately the ppc64 relocatable kernel sees these CRCs as symbols
that need relocating and relocates them at boot. Commit d4703aef
(module: handle ppc64 relocating kcrctabs when CONFIG_RELOCATABLE=y)
added a hook to reverse the bogus relocations. Part of this patch
created a symbol at 0x0:

# head -2 /proc/kallsyms
0000000000000000 T reloc_start
c000000000000000 T .__start

This reloc_start symbol is causing lots of confusion to perf. It
thinks reloc_start is a massive function that stretches from 0x0 to
0xc000000000000000 and we get various cryptic errors out of perf,
including:

problem incrementing symbol count, skipping event

This patch removes the  reloc_start linker script label and instead
defines it as PHYSICAL_START. We also need to wrap it with
CONFIG_PPC64 because the ppc32 kernel can set a non zero
PHYSICAL_START at compile time and we wouldn't want to subtract
it from the CRCs in that case.

Signed-off-by: Anton Blanchard <anton@samba.org>
Cc: <stable@kernel.org>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/module.h | 5 ++---
 arch/powerpc/kernel/vmlinux.lds.S | 3 ---
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h
index c1df590ec444..49fa55bfbac4 100644
--- a/arch/powerpc/include/asm/module.h
+++ b/arch/powerpc/include/asm/module.h
@@ -82,10 +82,9 @@ struct exception_table_entry;
 void sort_ex_table(struct exception_table_entry *start,
 		   struct exception_table_entry *finish);
 
-#ifdef CONFIG_MODVERSIONS
+#if defined(CONFIG_MODVERSIONS) && defined(CONFIG_PPC64)
 #define ARCH_RELOCATES_KCRCTAB
-
-extern const unsigned long reloc_start[];
+#define reloc_start PHYSICAL_START
 #endif
 #endif /* __KERNEL__ */
 #endif	/* _ASM_POWERPC_MODULE_H */
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 654e479802f2..f096e72262f4 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -38,9 +38,6 @@ jiffies = jiffies_64 + 4;
 #endif
 SECTIONS
 {
-	. = 0;
-	reloc_start = .;
-
 	. = KERNELBASE;
 
 /*

From 0b88f772bdf2847461debf417b1ee96043a7cb1b Mon Sep 17 00:00:00 2001
From: Tiejun Chen <tiejun.chen@windriver.com>
Date: Mon, 15 Jul 2013 10:36:04 +0800
Subject: [PATCH 243/320] powerpc: Access local paca after hard irq disabled

In hard_irq_disable(), we accessed the PACA before we hard disabled
the interrupts, potentially causing a warning as get_paca() will
us debug_smp_processor_id().

Move that to after the disabling, and also use local_paca directly
rather than get_paca() to avoid several redundant and useless checks.

Signed-off-by: Tiejun Chen <tiejun.chen@windriver.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/hw_irq.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index ba713f166fa5..10be1dd01c6b 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -96,10 +96,11 @@ static inline bool arch_irqs_disabled(void)
 #endif
 
 #define hard_irq_disable()	do {			\
-	u8 _was_enabled = get_paca()->soft_enabled;	\
+	u8 _was_enabled;				\
 	__hard_irq_disable();				\
-	get_paca()->soft_enabled = 0;			\
-	get_paca()->irq_happened |= PACA_IRQ_HARD_DIS;	\
+	_was_enabled = local_paca->soft_enabled;	\
+	local_paca->soft_enabled = 0;			\
+	local_paca->irq_happened |= PACA_IRQ_HARD_DIS;	\
 	if (_was_enabled)				\
 		trace_hardirqs_off();			\
 } while(0)

From 97645154a9056106a957905c7836a016e5c89b3b Mon Sep 17 00:00:00 2001
From: Paul Bolle <pebolle@tiscali.nl>
Date: Sun, 14 Jul 2013 13:02:15 +0200
Subject: [PATCH 244/320] powerpc/pseries: Drop "select HOTPLUG"

The Kconfig symbol HOTPLUG was removed with commit 40b313608a ("Finally
eradicate CONFIG_HOTPLUG"). But there's still one select statement for
that symbol. It seems that select statement was added after the patch to
remove CONFIG_HOTPLUG was submitted. Anyhow, it is useless and can be
dropped.

Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
Reviewed-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/pseries/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 1bd3399146ed..62b4f8025de0 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -19,7 +19,6 @@ config PPC_PSERIES
 	select ZLIB_DEFLATE
 	select PPC_DOORBELL
 	select HAVE_CONTEXT_TRACKING
-	select HOTPLUG if SMP
 	select HOTPLUG_CPU if SMP
 	default y
 

From 5d7ead0039b0c9500825b46997896352810efb0b Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Sat, 13 Jul 2013 12:53:40 +1000
Subject: [PATCH 245/320] powerpc/perf: Set PPC_FEATURE2_EBB when we register
 the power8 PMU

The presence or absence of EBB is advertised to userspace via the presence
or absence of PPC_FEATURE2_EBB in cpu_user_features2.

Because the kernel can be built without PMU support, we should only add
PPC_FEATURE2_EBB to cpu_user_features2 when we successfully register the
power8 PMU support.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/perf/power8-pmu.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index 96a64d6a8bdf..09def196f5c4 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -621,10 +621,19 @@ static struct power_pmu power8_pmu = {
 
 static int __init init_power8_pmu(void)
 {
+	int rc;
+
 	if (!cur_cpu_spec->oprofile_cpu_type ||
 	    strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power8"))
 		return -ENODEV;
 
-	return register_power_pmu(&power8_pmu);
+	rc = register_power_pmu(&power8_pmu);
+	if (rc)
+		return rc;
+
+	/* Tell userspace that EBB is supported */
+	cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB;
+
+	return 0;
 }
 early_initcall(init_power8_pmu);

From ee1dd1e3dc774cf257012215d996e8e7e370c162 Mon Sep 17 00:00:00 2001
From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Date: Wed, 10 Jul 2013 18:32:56 +0530
Subject: [PATCH 246/320] powerpc: Fix the corrupt r3 error during MCE
 handling.

During Machine Check interrupt on pseries platform, R3 generally points to
memory region inside RTAS (FWNMI) area. We see r3 corruption because when RTAS
delivers the machine check exception it passes the address inside FWNMI area
with the top most bit set. This patch fixes this issue by masking top two bit
in machine check exception handler.

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/pseries/ras.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 7b3cbde8c783..721c0586b284 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -287,6 +287,9 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
 	unsigned long *savep;
 	struct rtas_error_log *h, *errhdr = NULL;
 
+	/* Mask top two bits */
+	regs->gpr[3] &= ~(0x3UL << 62);
+
 	if (!VALID_FWNMI_BUFFER(regs->gpr[3])) {
 		printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]);
 		return NULL;

From 0ba178888b05a4efdaca7da528c170bd09f9687b Mon Sep 17 00:00:00 2001
From: Gavin Shan <shangw@linux.vnet.ibm.com>
Date: Wed, 24 Jul 2013 10:24:51 +0800
Subject: [PATCH 247/320] powerpc/eeh: Remove reference to PCI device

We will rely on pcibios_release_device() to remove the EEH cache
and unbind EEH device for the specific PCI device. So we shouldn't
hold the reference to the PCI device from EEH cache and EEH device.
Otherwise, pcibios_release_device() won't be called as we expected.
The patch removes the reference to the PCI device in EEH core.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/eeh.c       |  4 ----
 arch/powerpc/kernel/eeh_cache.c | 18 +++++-------------
 2 files changed, 5 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 39954fe941b8..b5c425ea2974 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -499,8 +499,6 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon
 	}
 
 	eeh_dev_check_failure(edev);
-
-	pci_dev_put(eeh_dev_to_pci_dev(edev));
 	return val;
 }
 
@@ -904,7 +902,6 @@ static void eeh_add_device_late(struct pci_dev *dev)
 	}
 	WARN_ON(edev->pdev);
 
-	pci_dev_get(dev);
 	edev->pdev = dev;
 	dev->dev.archdata.edev = edev;
 
@@ -992,7 +989,6 @@ static void eeh_remove_device(struct pci_dev *dev, int purge_pe)
 	}
 	edev->pdev = NULL;
 	dev->dev.archdata.edev = NULL;
-	pci_dev_put(dev);
 
 	eeh_rmv_from_parent_pe(edev, purge_pe);
 	eeh_addr_cache_rmv_dev(dev);
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
index f9ac1232a746..e8c9fd546a5c 100644
--- a/arch/powerpc/kernel/eeh_cache.c
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -68,16 +68,12 @@ static inline struct eeh_dev *__eeh_addr_cache_get_device(unsigned long addr)
 		struct pci_io_addr_range *piar;
 		piar = rb_entry(n, struct pci_io_addr_range, rb_node);
 
-		if (addr < piar->addr_lo) {
+		if (addr < piar->addr_lo)
 			n = n->rb_left;
-		} else {
-			if (addr > piar->addr_hi) {
-				n = n->rb_right;
-			} else {
-				pci_dev_get(piar->pcidev);
-				return piar->edev;
-			}
-		}
+		else if (addr > piar->addr_hi)
+			n = n->rb_right;
+		else
+			return piar->edev;
 	}
 
 	return NULL;
@@ -156,7 +152,6 @@ eeh_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
 	if (!piar)
 		return NULL;
 
-	pci_dev_get(dev);
 	piar->addr_lo = alo;
 	piar->addr_hi = ahi;
 	piar->edev = pci_dev_to_eeh_dev(dev);
@@ -250,7 +245,6 @@ static inline void __eeh_addr_cache_rmv_dev(struct pci_dev *dev)
 
 		if (piar->pcidev == dev) {
 			rb_erase(n, &pci_io_addr_cache_root.rb_root);
-			pci_dev_put(piar->pcidev);
 			kfree(piar);
 			goto restart;
 		}
@@ -302,12 +296,10 @@ void eeh_addr_cache_build(void)
 		if (!edev)
 			continue;
 
-		pci_dev_get(dev);  /* matching put is in eeh_remove_device() */
 		dev->dev.archdata.edev = edev;
 		edev->pdev = dev;
 
 		eeh_addr_cache_insert_dev(dev);
-
 		eeh_sysfs_add_device(dev);
 	}
 

From f2856491d24044de08da9e53cf7068841a8b4e1c Mon Sep 17 00:00:00 2001
From: Gavin Shan <shangw@linux.vnet.ibm.com>
Date: Wed, 24 Jul 2013 10:24:52 +0800
Subject: [PATCH 248/320] powerpc/eeh: Export functions for hotplug

Make some functions public in order to support hotplug on either specific
PCI bus or PCI device in future.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/eeh.h | 9 +++++++++
 arch/powerpc/kernel/eeh.c      | 6 +++---
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 09a8743143f3..d9d35c27de25 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -209,9 +209,12 @@ unsigned long eeh_check_failure(const volatile void __iomem *token,
 				unsigned long val);
 int eeh_dev_check_failure(struct eeh_dev *edev);
 void eeh_addr_cache_build(void);
+void eeh_add_device_early(struct device_node *);
 void eeh_add_device_tree_early(struct device_node *);
+void eeh_add_device_late(struct pci_dev *);
 void eeh_add_device_tree_late(struct pci_bus *);
 void eeh_add_sysfs_files(struct pci_bus *);
+void eeh_remove_device(struct pci_dev *, int);
 void eeh_remove_bus_device(struct pci_dev *, int);
 
 /**
@@ -252,12 +255,18 @@ static inline unsigned long eeh_check_failure(const volatile void __iomem *token
 
 static inline void eeh_addr_cache_build(void) { }
 
+static inline void eeh_add_device_early(struct device_node *dn) { }
+
 static inline void eeh_add_device_tree_early(struct device_node *dn) { }
 
+static inline void eeh_add_device_late(struct pci_dev *dev) { }
+
 static inline void eeh_add_device_tree_late(struct pci_bus *bus) { }
 
 static inline void eeh_add_sysfs_files(struct pci_bus *bus) { }
 
+static inline void eeh_remove_device(struct pci_dev *dev, int purge_pe) { }
+
 static inline void eeh_remove_bus_device(struct pci_dev *dev, int purge_pe) { }
 
 #define EEH_POSSIBLE_ERROR(val, type) (0)
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index b5c425ea2974..582ad1ef46a8 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -836,7 +836,7 @@ core_initcall_sync(eeh_init);
  * on the CEC architecture, type of the device, on earlier boot
  * command-line arguments & etc.
  */
-static void eeh_add_device_early(struct device_node *dn)
+void eeh_add_device_early(struct device_node *dn)
 {
 	struct pci_controller *phb;
 
@@ -884,7 +884,7 @@ EXPORT_SYMBOL_GPL(eeh_add_device_tree_early);
  * This routine must be used to complete EEH initialization for PCI
  * devices that were added after system boot (e.g. hotplug, dlpar).
  */
-static void eeh_add_device_late(struct pci_dev *dev)
+void eeh_add_device_late(struct pci_dev *dev)
 {
 	struct device_node *dn;
 	struct eeh_dev *edev;
@@ -972,7 +972,7 @@ EXPORT_SYMBOL_GPL(eeh_add_sysfs_files);
  * this device will no longer be detected after this call; thus,
  * i/o errors affecting this slot may leave this device unusable.
  */
-static void eeh_remove_device(struct pci_dev *dev, int purge_pe)
+void eeh_remove_device(struct pci_dev *dev, int purge_pe)
 {
 	struct eeh_dev *edev;
 

From 008a4938ea07db071f03adffaa3a78c2fbbcde6b Mon Sep 17 00:00:00 2001
From: Gavin Shan <shangw@linux.vnet.ibm.com>
Date: Wed, 24 Jul 2013 10:24:53 +0800
Subject: [PATCH 249/320] powerpc/pci: Override pcibios_release_device()

The patch overrides pcibios_release_device() to release EEH
resources (EEH cache, unbinding EEH device) for the indicated PCI
device.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/pci-hotplug.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c
index 3f608800c06b..3dab2f2801b9 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -21,6 +21,17 @@
 #include <asm/firmware.h>
 #include <asm/eeh.h>
 
+/**
+ * pcibios_release_device - release PCI device
+ * @dev: PCI device
+ *
+ * The function is called before releasing the indicated PCI device.
+ */
+void pcibios_release_device(struct pci_dev *dev)
+{
+	eeh_remove_device(dev, 1);
+}
+
 /**
  * __pcibios_remove_pci_devices - remove all devices under this bus
  * @bus: the indicated PCI bus

From c7b51bce636e4990662bb100bc17e1d4d6c02d34 Mon Sep 17 00:00:00 2001
From: Gavin Shan <shangw@linux.vnet.ibm.com>
Date: Wed, 24 Jul 2013 10:24:54 +0800
Subject: [PATCH 250/320] powerpc/pci/hotplug: Don't need to remove from EEH
 cache twice

Since pcibios_release_device() called by pci_stop_and_remove_bus_device()
has removed the device from the EEH cache, we needn't do that again.

Cc: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 drivers/pci/hotplug/rpadlpar_core.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/pci/hotplug/rpadlpar_core.c b/drivers/pci/hotplug/rpadlpar_core.c
index b29e20b7862f..bb7af78e4eed 100644
--- a/drivers/pci/hotplug/rpadlpar_core.c
+++ b/drivers/pci/hotplug/rpadlpar_core.c
@@ -388,7 +388,6 @@ int dlpar_remove_pci_slot(char *drc_name, struct device_node *dn)
 	/* Remove the EADS bridge device itself */
 	BUG_ON(!bus->self);
 	pr_debug("PCI: Now removing bridge device %s\n", pci_name(bus->self));
-	eeh_remove_bus_device(bus->self, true);
 	pci_stop_and_remove_bus_device(bus->self);
 
 	return 0;

From 807a827d4e7455a40e8f56ec2a67c57a91cab9f7 Mon Sep 17 00:00:00 2001
From: Gavin Shan <shangw@linux.vnet.ibm.com>
Date: Wed, 24 Jul 2013 10:24:55 +0800
Subject: [PATCH 251/320] powerpc/eeh: Keep PE during hotplug

When we do normal hotplug, the PE (shadow EEH structure) shouldn't be
kept around.

However, we need to keep it if the hotplug an artifial one caused by
EEH errors recovery.

Since we remove EEH device through the PCI hook pcibios_release_device(),
the flag "purge_pe" passed to various functions is meaningless. So the patch
removes the meaningless flag and introduce new flag "EEH_PE_KEEP"
to save the PE while doing hotplug during EEH error recovery.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/eeh.h        | 11 +++----
 arch/powerpc/include/asm/pci-bridge.h |  1 -
 arch/powerpc/kernel/eeh.c             | 28 ++--------------
 arch/powerpc/kernel/eeh_driver.c      |  7 ++--
 arch/powerpc/kernel/eeh_pe.c          |  7 ++--
 arch/powerpc/kernel/pci-hotplug.c     | 46 +++++++++------------------
 6 files changed, 30 insertions(+), 70 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index d9d35c27de25..2ce22d7b71a0 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -55,6 +55,8 @@ struct device_node;
 #define EEH_PE_RECOVERING	(1 << 1)	/* Recovering PE	*/
 #define EEH_PE_PHB_DEAD		(1 << 2)	/* Dead PHB		*/
 
+#define EEH_PE_KEEP		(1 << 8)	/* Keep PE on hotplug	*/
+
 struct eeh_pe {
 	int type;			/* PE type: PHB/Bus/Device	*/
 	int state;			/* PE EEH dependent mode	*/
@@ -193,7 +195,7 @@ int eeh_phb_pe_create(struct pci_controller *phb);
 struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb);
 struct eeh_pe *eeh_pe_get(struct eeh_dev *edev);
 int eeh_add_to_parent_pe(struct eeh_dev *edev);
-int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe);
+int eeh_rmv_from_parent_pe(struct eeh_dev *edev);
 void eeh_pe_update_time_stamp(struct eeh_pe *pe);
 void *eeh_pe_dev_traverse(struct eeh_pe *root,
 		eeh_traverse_func fn, void *flag);
@@ -214,8 +216,7 @@ void eeh_add_device_tree_early(struct device_node *);
 void eeh_add_device_late(struct pci_dev *);
 void eeh_add_device_tree_late(struct pci_bus *);
 void eeh_add_sysfs_files(struct pci_bus *);
-void eeh_remove_device(struct pci_dev *, int);
-void eeh_remove_bus_device(struct pci_dev *, int);
+void eeh_remove_device(struct pci_dev *);
 
 /**
  * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.
@@ -265,9 +266,7 @@ static inline void eeh_add_device_tree_late(struct pci_bus *bus) { }
 
 static inline void eeh_add_sysfs_files(struct pci_bus *bus) { }
 
-static inline void eeh_remove_device(struct pci_dev *dev, int purge_pe) { }
-
-static inline void eeh_remove_bus_device(struct pci_dev *dev, int purge_pe) { }
+static inline void eeh_remove_device(struct pci_dev *dev) { }
 
 #define EEH_POSSIBLE_ERROR(val, type) (0)
 #define EEH_IO_ERROR_VALUE(size) (-1UL)
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 2c1d8cb9b265..32d0d2018faf 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -209,7 +209,6 @@ static inline struct eeh_dev *of_node_to_eeh_dev(struct device_node *dn)
 extern struct pci_bus *pcibios_find_pci_bus(struct device_node *dn);
 
 /** Remove all of the PCI devices under this bus */
-extern void __pcibios_remove_pci_devices(struct pci_bus *bus, int purge_pe);
 extern void pcibios_remove_pci_devices(struct pci_bus *bus);
 
 /** Discover new pci devices under this bus, and add them */
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 582ad1ef46a8..ce81477316be 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -964,7 +964,6 @@ EXPORT_SYMBOL_GPL(eeh_add_sysfs_files);
 /**
  * eeh_remove_device - Undo EEH setup for the indicated pci device
  * @dev: pci device to be removed
- * @purge_pe: remove the PE or not
  *
  * This routine should be called when a device is removed from
  * a running system (e.g. by hotplug or dlpar).  It unregisters
@@ -972,7 +971,7 @@ EXPORT_SYMBOL_GPL(eeh_add_sysfs_files);
  * this device will no longer be detected after this call; thus,
  * i/o errors affecting this slot may leave this device unusable.
  */
-void eeh_remove_device(struct pci_dev *dev, int purge_pe)
+void eeh_remove_device(struct pci_dev *dev)
 {
 	struct eeh_dev *edev;
 
@@ -990,34 +989,11 @@ void eeh_remove_device(struct pci_dev *dev, int purge_pe)
 	edev->pdev = NULL;
 	dev->dev.archdata.edev = NULL;
 
-	eeh_rmv_from_parent_pe(edev, purge_pe);
+	eeh_rmv_from_parent_pe(edev);
 	eeh_addr_cache_rmv_dev(dev);
 	eeh_sysfs_remove_device(dev);
 }
 
-/**
- * eeh_remove_bus_device - Undo EEH setup for the indicated PCI device
- * @dev: PCI device
- * @purge_pe: remove the corresponding PE or not
- *
- * This routine must be called when a device is removed from the
- * running system through hotplug or dlpar. The corresponding
- * PCI address cache will be removed.
- */
-void eeh_remove_bus_device(struct pci_dev *dev, int purge_pe)
-{
-	struct pci_bus *bus = dev->subordinate;
-	struct pci_dev *child, *tmp;
-
-	eeh_remove_device(dev, purge_pe);
-
-	if (bus && dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
-		list_for_each_entry_safe(child, tmp, &bus->devices, bus_list)
-			 eeh_remove_bus_device(child, purge_pe);
-	}
-}
-EXPORT_SYMBOL_GPL(eeh_remove_bus_device);
-
 static int proc_eeh_show(struct seq_file *m, void *v)
 {
 	if (0 == eeh_subsystem_enabled) {
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 2b1ce17cae50..9ef3bbb8580a 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -362,8 +362,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
 	 * devices are expected to be attached soon when calling
 	 * into pcibios_add_pci_devices().
 	 */
-	if (bus)
-		__pcibios_remove_pci_devices(bus, 0);
+	if (bus) {
+		eeh_pe_state_mark(pe, EEH_PE_KEEP);
+		pcibios_remove_pci_devices(bus);
+	}
 
 	/* Reset the pci controller. (Asserts RST#; resets config space).
 	 * Reconfigure bridges and devices. Don't try to bring the system
@@ -386,6 +388,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
 	if (bus) {
 		ssleep(5);
 		pcibios_add_pci_devices(bus);
+		eeh_pe_state_clear(pe, EEH_PE_KEEP);
 	}
 
 	pe->tstamp = tstamp;
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 016588a6f5ed..32ef40940bad 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -333,7 +333,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
 		while (parent) {
 			if (!(parent->type & EEH_PE_INVALID))
 				break;
-			parent->type &= ~EEH_PE_INVALID;
+			parent->type &= ~(EEH_PE_INVALID | EEH_PE_KEEP);
 			parent = parent->parent;
 		}
 		pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n",
@@ -397,14 +397,13 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
 /**
  * eeh_rmv_from_parent_pe - Remove one EEH device from the associated PE
  * @edev: EEH device
- * @purge_pe: remove PE or not
  *
  * The PE hierarchy tree might be changed when doing PCI hotplug.
  * Also, the PCI devices or buses could be removed from the system
  * during EEH recovery. So we have to call the function remove the
  * corresponding PE accordingly if necessary.
  */
-int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe)
+int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
 {
 	struct eeh_pe *pe, *parent, *child;
 	int cnt;
@@ -431,7 +430,7 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe)
 		if (pe->type & EEH_PE_PHB)
 			break;
 
-		if (purge_pe) {
+		if (!(pe->state & EEH_PE_KEEP)) {
 			if (list_empty(&pe->edevs) &&
 			    list_empty(&pe->child_list)) {
 				list_del(&pe->child);
diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c
index 3dab2f2801b9..fc0831d4971f 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -29,36 +29,7 @@
  */
 void pcibios_release_device(struct pci_dev *dev)
 {
-	eeh_remove_device(dev, 1);
-}
-
-/**
- * __pcibios_remove_pci_devices - remove all devices under this bus
- * @bus: the indicated PCI bus
- * @purge_pe: destroy the PE on removal of PCI devices
- *
- * Remove all of the PCI devices under this bus both from the
- * linux pci device tree, and from the powerpc EEH address cache.
- * By default, the corresponding PE will be destroied during the
- * normal PCI hotplug path. For PCI hotplug during EEH recovery,
- * the corresponding PE won't be destroied and deallocated.
- */
-void __pcibios_remove_pci_devices(struct pci_bus *bus, int purge_pe)
-{
-	struct pci_dev *dev, *tmp;
-	struct pci_bus *child_bus;
-
-	/* First go down child busses */
-	list_for_each_entry(child_bus, &bus->children, node)
-		__pcibios_remove_pci_devices(child_bus, purge_pe);
-
-	pr_debug("PCI: Removing devices on bus %04x:%02x\n",
-		 pci_domain_nr(bus),  bus->number);
-	list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) {
-		pr_debug("     * Removing %s...\n", pci_name(dev));
-		eeh_remove_bus_device(dev, purge_pe);
-		pci_stop_and_remove_bus_device(dev);
-	}
+	eeh_remove_device(dev);
 }
 
 /**
@@ -70,8 +41,21 @@ void __pcibios_remove_pci_devices(struct pci_bus *bus, int purge_pe)
  */
 void pcibios_remove_pci_devices(struct pci_bus *bus)
 {
-	__pcibios_remove_pci_devices(bus, 1);
+	struct pci_dev *dev, *tmp;
+	struct pci_bus *child_bus;
+
+	/* First go down child busses */
+	list_for_each_entry(child_bus, &bus->children, node)
+		pcibios_remove_pci_devices(child_bus);
+
+	pr_debug("PCI: Removing devices on bus %04x:%02x\n",
+		 pci_domain_nr(bus),  bus->number);
+	list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) {
+		pr_debug("   Removing %s...\n", pci_name(dev));
+		pci_stop_and_remove_bus_device(dev);
+	}
 }
+
 EXPORT_SYMBOL_GPL(pcibios_remove_pci_devices);
 
 /**

From 9feed42e93d2625db86423cedf8b4b2bed00779e Mon Sep 17 00:00:00 2001
From: Gavin Shan <shangw@linux.vnet.ibm.com>
Date: Wed, 24 Jul 2013 10:24:56 +0800
Subject: [PATCH 252/320] powerpc/eeh: Use safe list traversal when walking EEH
 devices

Currently, we're trasversing the EEH devices list using list_for_each_entry().
That's not safe enough because the EEH devices might be removed from
its parent PE while doing iteration. The patch replaces that with
list_for_each_entry_safe().

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/eeh.h |  4 ++--
 arch/powerpc/kernel/eeh.c      |  4 ++--
 arch/powerpc/kernel/eeh_pe.c   | 10 +++++-----
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 2ce22d7b71a0..e8c411b63caf 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -74,8 +74,8 @@ struct eeh_pe {
 	struct list_head child;		/* Child PEs			*/
 };
 
-#define eeh_pe_for_each_dev(pe, edev) \
-		list_for_each_entry(edev, &pe->edevs, list)
+#define eeh_pe_for_each_dev(pe, edev, tmp) \
+		list_for_each_entry_safe(edev, tmp, &pe->edevs, list)
 
 /*
  * The struct is used to trace EEH state for the associated
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index ce81477316be..56bd4584f61f 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -231,7 +231,7 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len)
 void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
 {
 	size_t loglen = 0;
-	struct eeh_dev *edev;
+	struct eeh_dev *edev, *tmp;
 	bool valid_cfg_log = true;
 
 	/*
@@ -251,7 +251,7 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
 		eeh_pe_restore_bars(pe);
 
 		pci_regs_buf[0] = 0;
-		eeh_pe_for_each_dev(pe, edev) {
+		eeh_pe_for_each_dev(pe, edev, tmp) {
 			loglen += eeh_gather_pci_data(edev, pci_regs_buf + loglen,
 						      EEH_PCI_REGS_LOG_LEN - loglen);
 		}
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 32ef40940bad..c8b815e45c8f 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -176,7 +176,7 @@ void *eeh_pe_dev_traverse(struct eeh_pe *root,
 		eeh_traverse_func fn, void *flag)
 {
 	struct eeh_pe *pe;
-	struct eeh_dev *edev;
+	struct eeh_dev *edev, *tmp;
 	void *ret;
 
 	if (!root) {
@@ -186,7 +186,7 @@ void *eeh_pe_dev_traverse(struct eeh_pe *root,
 
 	/* Traverse root PE */
 	for (pe = root; pe; pe = eeh_pe_next(pe, root)) {
-		eeh_pe_for_each_dev(pe, edev) {
+		eeh_pe_for_each_dev(pe, edev, tmp) {
 			ret = fn(edev, flag);
 			if (ret)
 				return ret;
@@ -501,7 +501,7 @@ static void *__eeh_pe_state_mark(void *data, void *flag)
 {
 	struct eeh_pe *pe = (struct eeh_pe *)data;
 	int state = *((int *)flag);
-	struct eeh_dev *tmp;
+	struct eeh_dev *edev, *tmp;
 	struct pci_dev *pdev;
 
 	/*
@@ -511,8 +511,8 @@ static void *__eeh_pe_state_mark(void *data, void *flag)
 	 * the PCI device driver.
 	 */
 	pe->state |= state;
-	eeh_pe_for_each_dev(pe, tmp) {
-		pdev = eeh_dev_to_pci_dev(tmp);
+	eeh_pe_for_each_dev(pe, edev, tmp) {
+		pdev = eeh_dev_to_pci_dev(edev);
 		if (pdev)
 			pdev->error_state = pci_channel_io_frozen;
 	}

From ab444ec97e8bd65fff9d489b9a409fc03979268b Mon Sep 17 00:00:00 2001
From: Gavin Shan <shangw@linux.vnet.ibm.com>
Date: Wed, 24 Jul 2013 10:24:57 +0800
Subject: [PATCH 253/320] powerpc/pci: Partial tree hotplug support

When EEH error happens to one specific PE, the device drivers
of its attached EEH devices (PCI devices) are checked to see
the further action: reset with complete hotplug, or reset without
hotplug. However, that's not enough for those PCI devices whose
drivers can't support EEH, or those PCI devices without driver.
So we need do so-called "partial hotplug" on basis of PCI devices.
In the situation, part of PCI devices of the specific PE are
unplugged and plugged again after PE reset.

The patch changes pcibios_add_pci_devices() so that it can support
full hotplug and so-called "partial" hotplug based on device-tree
or real hardware. It's notable that pci_of_scan.c has been changed
for a bit in order to support the "partial" hotplug based on dev-tree.

Most of the generic code already supports that, we just need to
plumb it properly on our side.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/pci-common.c  |  2 ++
 arch/powerpc/kernel/pci-hotplug.c | 14 +++++---
 arch/powerpc/kernel/pci_of_scan.c | 56 ++++++++++++++++++++++---------
 3 files changed, 51 insertions(+), 21 deletions(-)

diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index f46914a0f33e..7d22a675fe1a 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1462,6 +1462,8 @@ void pcibios_finish_adding_to_bus(struct pci_bus *bus)
 	/* Allocate bus and devices resources */
 	pcibios_allocate_bus_resources(bus);
 	pcibios_claim_one_bus(bus);
+	if (!pci_has_flag(PCI_PROBE_ONLY))
+		pci_assign_unassigned_bus_resources(bus);
 
 	/* Fixup EEH */
 	eeh_add_device_tree_late(bus);
diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c
index fc0831d4971f..c1e17ae68a08 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -71,7 +71,7 @@ EXPORT_SYMBOL_GPL(pcibios_remove_pci_devices);
  */
 void pcibios_add_pci_devices(struct pci_bus * bus)
 {
-	int slotno, num, mode, pass, max;
+	int slotno, mode, pass, max;
 	struct pci_dev *dev;
 	struct device_node *dn = pci_bus_to_OF_node(bus);
 
@@ -85,11 +85,15 @@ void pcibios_add_pci_devices(struct pci_bus * bus)
 		/* use ofdt-based probe */
 		of_rescan_bus(dn, bus);
 	} else if (mode == PCI_PROBE_NORMAL) {
-		/* use legacy probe */
+		/*
+		 * Use legacy probe. In the partial hotplug case, we
+		 * probably have grandchildren devices unplugged. So
+		 * we don't check the return value from pci_scan_slot() in
+		 * order for fully rescan all the way down to pick them up.
+		 * They can have been removed during partial hotplug.
+		 */
 		slotno = PCI_SLOT(PCI_DN(dn->child)->devfn);
-		num = pci_scan_slot(bus, PCI_DEVFN(slotno, 0));
-		if (!num)
-			return;
+		pci_scan_slot(bus, PCI_DEVFN(slotno, 0));
 		pcibios_setup_bus_devices(bus);
 		max = bus->busn_res.start;
 		for (pass = 0; pass < 2; pass++) {
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index 6b0ba5854d99..15d9105323bf 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -230,11 +230,14 @@ void of_scan_pci_bridge(struct pci_dev *dev)
 		return;
 	}
 
-	bus = pci_add_new_bus(dev->bus, dev, busrange[0]);
+	bus = pci_find_bus(pci_domain_nr(dev->bus), busrange[0]);
 	if (!bus) {
-		printk(KERN_ERR "Failed to create pci bus for %s\n",
-		       node->full_name);
-		return;
+		bus = pci_add_new_bus(dev->bus, dev, busrange[0]);
+		if (!bus) {
+			printk(KERN_ERR "Failed to create pci bus for %s\n",
+			       node->full_name);
+			return;
+		}
 	}
 
 	bus->primary = dev->bus->number;
@@ -292,6 +295,38 @@ void of_scan_pci_bridge(struct pci_dev *dev)
 }
 EXPORT_SYMBOL(of_scan_pci_bridge);
 
+static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus,
+			    struct device_node *dn)
+{
+	struct pci_dev *dev = NULL;
+	const u32 *reg;
+	int reglen, devfn;
+
+	pr_debug("  * %s\n", dn->full_name);
+	if (!of_device_is_available(dn))
+		return NULL;
+
+	reg = of_get_property(dn, "reg", &reglen);
+	if (reg == NULL || reglen < 20)
+		return NULL;
+	devfn = (reg[0] >> 8) & 0xff;
+
+	/* Check if the PCI device is already there */
+	dev = pci_get_slot(bus, devfn);
+	if (dev) {
+		pci_dev_put(dev);
+		return dev;
+	}
+
+	/* create a new pci_dev for this device */
+	dev = of_create_pci_dev(dn, bus, devfn);
+	if (!dev)
+		return NULL;
+
+	pr_debug("  dev header type: %x\n", dev->hdr_type);
+	return dev;
+}
+
 /**
  * __of_scan_bus - given a PCI bus node, setup bus and scan for child devices
  * @node: device tree node for the PCI bus
@@ -302,8 +337,6 @@ static void __of_scan_bus(struct device_node *node, struct pci_bus *bus,
 			  int rescan_existing)
 {
 	struct device_node *child;
-	const u32 *reg;
-	int reglen, devfn;
 	struct pci_dev *dev;
 
 	pr_debug("of_scan_bus(%s) bus no %d...\n",
@@ -311,16 +344,7 @@ static void __of_scan_bus(struct device_node *node, struct pci_bus *bus,
 
 	/* Scan direct children */
 	for_each_child_of_node(node, child) {
-		pr_debug("  * %s\n", child->full_name);
-		if (!of_device_is_available(child))
-			continue;
-		reg = of_get_property(child, "reg", &reglen);
-		if (reg == NULL || reglen < 20)
-			continue;
-		devfn = (reg[0] >> 8) & 0xff;
-
-		/* create a new pci_dev for this device */
-		dev = of_create_pci_dev(child, bus, devfn);
+		dev = of_scan_pci_dev(bus, child);
 		if (!dev)
 			continue;
 		pr_debug("    dev header type: %x\n", dev->hdr_type);

From f5c57710dd62dd06f176934a8b4b8accbf00f9f8 Mon Sep 17 00:00:00 2001
From: Gavin Shan <shangw@linux.vnet.ibm.com>
Date: Wed, 24 Jul 2013 10:24:58 +0800
Subject: [PATCH 254/320] powerpc/eeh: Use partial hotplug for EEH unaware
 drivers

When EEH error happens to one specific PE, some devices with drivers
supporting EEH won't except hotplug on the device. However, there
might have other deivces without driver, or with driver without EEH
support. For the case, we need do partial hotplug in order to make
sure that the PE becomes absolutely quite during reset. Otherise,
the PE reset might fail and leads to failure of error recovery.

The current code doesn't handle that 'mixed' case properly, it either
uses the error callbacks to the drivers, or tries hotplug, but doesn't
handle a PE (EEH domain) composed of a combination of the two.

The patch intends to support so-called "partial" hotplug for EEH:
Before we do reset, we stop and remove those PCI devices without
EEH sensitive driver. The corresponding EEH devices are not detached
from its PE, but with special flag. After the reset is done, those
EEH devices with the special flag will be scanned one by one.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/eeh.h               |  6 +-
 arch/powerpc/kernel/eeh.c                    | 30 +++++++-
 arch/powerpc/kernel/eeh_driver.c             | 74 ++++++++++++++++++--
 arch/powerpc/kernel/eeh_pe.c                 | 20 ++----
 arch/powerpc/kernel/eeh_sysfs.c              |  7 ++
 arch/powerpc/platforms/powernv/eeh-powernv.c |  2 +-
 arch/powerpc/platforms/pseries/eeh_pseries.c |  2 +-
 7 files changed, 117 insertions(+), 24 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index e8c411b63caf..f54a60131de5 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -84,7 +84,8 @@ struct eeh_pe {
  * another tree except the currently existing tree of PCI
  * buses and PCI devices
  */
-#define EEH_DEV_IRQ_DISABLED	(1<<0)	/* Interrupt disabled		*/
+#define EEH_DEV_IRQ_DISABLED	(1 << 0)	/* Interrupt disabled	*/
+#define EEH_DEV_DISCONNECTED	(1 << 1)	/* Removing from PE	*/
 
 struct eeh_dev {
 	int mode;			/* EEH mode			*/
@@ -97,6 +98,7 @@ struct eeh_dev {
 	struct pci_controller *phb;	/* Associated PHB		*/
 	struct device_node *dn;		/* Associated device node	*/
 	struct pci_dev *pdev;		/* Associated PCI device	*/
+	struct pci_bus *bus;		/* PCI bus for partial hotplug	*/
 };
 
 static inline struct device_node *eeh_dev_to_of_node(struct eeh_dev *edev)
@@ -197,6 +199,8 @@ struct eeh_pe *eeh_pe_get(struct eeh_dev *edev);
 int eeh_add_to_parent_pe(struct eeh_dev *edev);
 int eeh_rmv_from_parent_pe(struct eeh_dev *edev);
 void eeh_pe_update_time_stamp(struct eeh_pe *pe);
+void *eeh_pe_traverse(struct eeh_pe *root,
+		eeh_traverse_func fn, void *flag);
 void *eeh_pe_dev_traverse(struct eeh_pe *root,
 		eeh_traverse_func fn, void *flag);
 void eeh_pe_restore_bars(struct eeh_pe *pe);
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 56bd4584f61f..a5783f1a7a96 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -900,7 +900,21 @@ void eeh_add_device_late(struct pci_dev *dev)
 		pr_debug("EEH: Already referenced !\n");
 		return;
 	}
-	WARN_ON(edev->pdev);
+
+	/*
+	 * The EEH cache might not be removed correctly because of
+	 * unbalanced kref to the device during unplug time, which
+	 * relies on pcibios_release_device(). So we have to remove
+	 * that here explicitly.
+	 */
+	if (edev->pdev) {
+		eeh_rmv_from_parent_pe(edev);
+		eeh_addr_cache_rmv_dev(edev->pdev);
+		eeh_sysfs_remove_device(edev->pdev);
+
+		edev->pdev = NULL;
+		dev->dev.archdata.edev = NULL;
+	}
 
 	edev->pdev = dev;
 	dev->dev.archdata.edev = edev;
@@ -982,14 +996,24 @@ void eeh_remove_device(struct pci_dev *dev)
 	/* Unregister the device with the EEH/PCI address search system */
 	pr_debug("EEH: Removing device %s\n", pci_name(dev));
 
-	if (!edev || !edev->pdev) {
+	if (!edev || !edev->pdev || !edev->pe) {
 		pr_debug("EEH: Not referenced !\n");
 		return;
 	}
+
+	/*
+	 * During the hotplug for EEH error recovery, we need the EEH
+	 * device attached to the parent PE in order for BAR restore
+	 * a bit later. So we keep it for BAR restore and remove it
+	 * from the parent PE during the BAR resotre.
+	 */
 	edev->pdev = NULL;
 	dev->dev.archdata.edev = NULL;
+	if (!(edev->pe->state & EEH_PE_KEEP))
+		eeh_rmv_from_parent_pe(edev);
+	else
+		edev->mode |= EEH_DEV_DISCONNECTED;
 
-	eeh_rmv_from_parent_pe(edev);
 	eeh_addr_cache_rmv_dev(dev);
 	eeh_sysfs_remove_device(dev);
 }
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 9ef3bbb8580a..9fda75d1f5aa 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -338,6 +338,54 @@ static void *eeh_report_failure(void *data, void *userdata)
 	return NULL;
 }
 
+static void *eeh_rmv_device(void *data, void *userdata)
+{
+	struct pci_driver *driver;
+	struct eeh_dev *edev = (struct eeh_dev *)data;
+	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+	int *removed = (int *)userdata;
+
+	/*
+	 * Actually, we should remove the PCI bridges as well.
+	 * However, that's lots of complexity to do that,
+	 * particularly some of devices under the bridge might
+	 * support EEH. So we just care about PCI devices for
+	 * simplicity here.
+	 */
+	if (!dev || (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE))
+		return NULL;
+	driver = eeh_pcid_get(dev);
+	if (driver && driver->err_handler)
+		return NULL;
+
+	/* Remove it from PCI subsystem */
+	pr_debug("EEH: Removing %s without EEH sensitive driver\n",
+		 pci_name(dev));
+	edev->bus = dev->bus;
+	edev->mode |= EEH_DEV_DISCONNECTED;
+	(*removed)++;
+
+	pci_stop_and_remove_bus_device(dev);
+
+	return NULL;
+}
+
+static void *eeh_pe_detach_dev(void *data, void *userdata)
+{
+	struct eeh_pe *pe = (struct eeh_pe *)data;
+	struct eeh_dev *edev, *tmp;
+
+	eeh_pe_for_each_dev(pe, edev, tmp) {
+		if (!(edev->mode & EEH_DEV_DISCONNECTED))
+			continue;
+
+		edev->mode &= ~(EEH_DEV_DISCONNECTED | EEH_DEV_IRQ_DISABLED);
+		eeh_rmv_from_parent_pe(edev);
+	}
+
+	return NULL;
+}
+
 /**
  * eeh_reset_device - Perform actual reset of a pci slot
  * @pe: EEH PE
@@ -349,8 +397,9 @@ static void *eeh_report_failure(void *data, void *userdata)
  */
 static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
 {
+	struct pci_bus *frozen_bus = eeh_pe_bus_get(pe);
 	struct timeval tstamp;
-	int cnt, rc;
+	int cnt, rc, removed = 0;
 
 	/* pcibios will clear the counter; save the value */
 	cnt = pe->freeze_count;
@@ -362,10 +411,11 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
 	 * devices are expected to be attached soon when calling
 	 * into pcibios_add_pci_devices().
 	 */
-	if (bus) {
-		eeh_pe_state_mark(pe, EEH_PE_KEEP);
+	eeh_pe_state_mark(pe, EEH_PE_KEEP);
+	if (bus)
 		pcibios_remove_pci_devices(bus);
-	}
+	else if (frozen_bus)
+		eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed);
 
 	/* Reset the pci controller. (Asserts RST#; resets config space).
 	 * Reconfigure bridges and devices. Don't try to bring the system
@@ -386,10 +436,24 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
 	 * potentially weird things happen.
 	 */
 	if (bus) {
+		pr_info("EEH: Sleep 5s ahead of complete hotplug\n");
 		ssleep(5);
+
+		/*
+		 * The EEH device is still connected with its parent
+		 * PE. We should disconnect it so the binding can be
+		 * rebuilt when adding PCI devices.
+		 */
+		eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
 		pcibios_add_pci_devices(bus);
-		eeh_pe_state_clear(pe, EEH_PE_KEEP);
+	} else if (frozen_bus && removed) {
+		pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
+		ssleep(5);
+
+		eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
+		pcibios_add_pci_devices(frozen_bus);
 	}
+	eeh_pe_state_clear(pe, EEH_PE_KEEP);
 
 	pe->tstamp = tstamp;
 	pe->freeze_count = cnt;
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index c8b815e45c8f..2aa955ae01a1 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -149,8 +149,8 @@ static struct eeh_pe *eeh_pe_next(struct eeh_pe *pe,
  * callback returns something other than NULL, or no more PEs
  * to be traversed.
  */
-static void *eeh_pe_traverse(struct eeh_pe *root,
-			eeh_traverse_func fn, void *flag)
+void *eeh_pe_traverse(struct eeh_pe *root,
+		      eeh_traverse_func fn, void *flag)
 {
 	struct eeh_pe *pe;
 	void *ret;
@@ -409,8 +409,8 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
 	int cnt;
 
 	if (!edev->pe) {
-		pr_warning("%s: No PE found for EEH device %s\n",
-			__func__, edev->dn->full_name);
+		pr_debug("%s: No PE found for EEH device %s\n",
+			 __func__, edev->dn->full_name);
 		return -EEXIST;
 	}
 
@@ -728,18 +728,12 @@ static void eeh_restore_device_bars(struct eeh_dev *edev,
  */
 static void *eeh_restore_one_device_bars(void *data, void *flag)
 {
-	struct pci_dev *pdev = NULL;
 	struct eeh_dev *edev = (struct eeh_dev *)data;
+	struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
 	struct device_node *dn = eeh_dev_to_of_node(edev);
 
-	/* Trace the PCI bridge */
-	if (eeh_probe_mode_dev()) {
-		pdev = eeh_dev_to_pci_dev(edev);
-		if (pdev->hdr_type != PCI_HEADER_TYPE_BRIDGE)
-                        pdev = NULL;
-        }
-
-	if (pdev)
+	/* Do special restore for bridges */
+	if (pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
 		eeh_restore_bridge_bars(pdev, edev, dn);
 	else
 		eeh_restore_device_bars(edev, dn);
diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c
index e7ae3484918c..61e2a1452131 100644
--- a/arch/powerpc/kernel/eeh_sysfs.c
+++ b/arch/powerpc/kernel/eeh_sysfs.c
@@ -68,6 +68,13 @@ void eeh_sysfs_add_device(struct pci_dev *pdev)
 
 void eeh_sysfs_remove_device(struct pci_dev *pdev)
 {
+	/*
+	 * The parent directory might have been removed. We needn't
+	 * continue for that case.
+	 */
+	if (!pdev->dev.kobj.sd)
+		return;
+
 	device_remove_file(&pdev->dev, &dev_attr_eeh_mode);
 	device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr);
 	device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 969cce73055a..a380428cf9ce 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -114,7 +114,7 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
 	 * the root bridge. So it's not reasonable to continue
 	 * the probing.
 	 */
-	if (!dn || !edev)
+	if (!dn || !edev || edev->pe)
 		return 0;
 
 	/* Skip for PCI-ISA bridge */
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index b456b157d33d..0f44f9fe49ac 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -153,7 +153,7 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag)
 
 	/* Retrieve OF node and eeh device */
 	edev = of_node_to_eeh_dev(dn);
-	if (!of_device_is_available(dn))
+	if (edev->pe || !of_device_is_available(dn))
 		return NULL;
 
 	/* Retrieve class/vendor/device IDs */

From 4b83bd452f17582c8d1c916568fd28a237a2eb46 Mon Sep 17 00:00:00 2001
From: Gavin Shan <shangw@linux.vnet.ibm.com>
Date: Wed, 24 Jul 2013 10:24:59 +0800
Subject: [PATCH 255/320] powerpc/eeh: Don't use pci_dev during BAR restore

While restoring BARs for one specific PCI device, the pci_dev
instance should have been released. So it's not reliable to use
the pci_dev instance on restoring BARs. However, we still need
some information (e.g. PCIe capability position, header type) from
the pci_dev instance. So we have to store those information to
EEH device in advance.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/eeh.h               |  8 ++-
 arch/powerpc/kernel/eeh_pe.c                 | 25 ++++----
 arch/powerpc/platforms/powernv/eeh-powernv.c | 11 ++++
 arch/powerpc/platforms/pseries/eeh_pseries.c | 63 +++++++++++++++++++-
 4 files changed, 91 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index f54a60131de5..4199d9943277 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -84,8 +84,11 @@ struct eeh_pe {
  * another tree except the currently existing tree of PCI
  * buses and PCI devices
  */
-#define EEH_DEV_IRQ_DISABLED	(1 << 0)	/* Interrupt disabled	*/
-#define EEH_DEV_DISCONNECTED	(1 << 1)	/* Removing from PE	*/
+#define EEH_DEV_BRIDGE		(1 << 0)	/* PCI bridge		*/
+#define EEH_DEV_ROOT_PORT	(1 << 1)	/* PCIe root port	*/
+#define EEH_DEV_DS_PORT		(1 << 2)	/* Downstream port	*/
+#define EEH_DEV_IRQ_DISABLED	(1 << 3)	/* Interrupt disabled	*/
+#define EEH_DEV_DISCONNECTED	(1 << 4)	/* Removing from PE	*/
 
 struct eeh_dev {
 	int mode;			/* EEH mode			*/
@@ -93,6 +96,7 @@ struct eeh_dev {
 	int config_addr;		/* Config address		*/
 	int pe_config_addr;		/* PE config address		*/
 	u32 config_space[16];		/* Saved PCI config space	*/
+	u8 pcie_cap;			/* Saved PCIe capability	*/
 	struct eeh_pe *pe;		/* Associated PE		*/
 	struct list_head list;		/* Form link list in the PE	*/
 	struct pci_controller *phb;	/* Associated PHB		*/
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 2aa955ae01a1..f9450537e335 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -578,7 +578,7 @@ void eeh_pe_state_clear(struct eeh_pe *pe, int state)
  * blocked on normal path during the stage. So we need utilize
  * eeh operations, which is always permitted.
  */
-static void eeh_bridge_check_link(struct pci_dev *pdev,
+static void eeh_bridge_check_link(struct eeh_dev *edev,
 				  struct device_node *dn)
 {
 	int cap;
@@ -589,16 +589,17 @@ static void eeh_bridge_check_link(struct pci_dev *pdev,
 	 * We only check root port and downstream ports of
 	 * PCIe switches
 	 */
-	if (!pci_is_pcie(pdev) ||
-	    (pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT &&
-	     pci_pcie_type(pdev) != PCI_EXP_TYPE_DOWNSTREAM))
+	if (!(edev->mode & (EEH_DEV_ROOT_PORT | EEH_DEV_DS_PORT)))
 		return;
 
-	pr_debug("%s: Check PCIe link for %s ...\n",
-		 __func__, pci_name(pdev));
+	pr_debug("%s: Check PCIe link for %04x:%02x:%02x.%01x ...\n",
+		 __func__, edev->phb->global_number,
+		 edev->config_addr >> 8,
+		 PCI_SLOT(edev->config_addr & 0xFF),
+		 PCI_FUNC(edev->config_addr & 0xFF));
 
 	/* Check slot status */
-	cap = pdev->pcie_cap;
+	cap = edev->pcie_cap;
 	eeh_ops->read_config(dn, cap + PCI_EXP_SLTSTA, 2, &val);
 	if (!(val & PCI_EXP_SLTSTA_PDS)) {
 		pr_debug("  No card in the slot (0x%04x) !\n", val);
@@ -652,8 +653,7 @@ static void eeh_bridge_check_link(struct pci_dev *pdev,
 #define BYTE_SWAP(OFF)	(8*((OFF)/4)+3-(OFF))
 #define SAVED_BYTE(OFF)	(((u8 *)(edev->config_space))[BYTE_SWAP(OFF)])
 
-static void eeh_restore_bridge_bars(struct pci_dev *pdev,
-				    struct eeh_dev *edev,
+static void eeh_restore_bridge_bars(struct eeh_dev *edev,
 				    struct device_node *dn)
 {
 	int i;
@@ -679,7 +679,7 @@ static void eeh_restore_bridge_bars(struct pci_dev *pdev,
 	eeh_ops->write_config(dn, PCI_COMMAND, 4, edev->config_space[1]);
 
 	/* Check the PCIe link is ready */
-	eeh_bridge_check_link(pdev, dn);
+	eeh_bridge_check_link(edev, dn);
 }
 
 static void eeh_restore_device_bars(struct eeh_dev *edev,
@@ -729,12 +729,11 @@ static void eeh_restore_device_bars(struct eeh_dev *edev,
 static void *eeh_restore_one_device_bars(void *data, void *flag)
 {
 	struct eeh_dev *edev = (struct eeh_dev *)data;
-	struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
 	struct device_node *dn = eeh_dev_to_of_node(edev);
 
 	/* Do special restore for bridges */
-	if (pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
-		eeh_restore_bridge_bars(pdev, edev, dn);
+	if (edev->mode & EEH_DEV_BRIDGE)
+		eeh_restore_bridge_bars(edev, dn);
 	else
 		eeh_restore_device_bars(edev, dn);
 
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index a380428cf9ce..4361a5c7f3ab 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -124,6 +124,17 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
 	/* Initialize eeh device */
 	edev->class_code	= dev->class;
 	edev->mode		= 0;
+	if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
+		edev->mode |= EEH_DEV_BRIDGE;
+	if (pci_is_pcie(dev)) {
+		edev->pcie_cap = pci_pcie_cap(dev);
+
+		if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT)
+			edev->mode |= EEH_DEV_ROOT_PORT;
+		else if (pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM)
+			edev->mode |= EEH_DEV_DS_PORT;
+	}
+
 	edev->config_addr	= ((dev->bus->number << 8) | dev->devfn);
 	edev->pe_config_addr	= phb->bdfn_to_pe(phb, dev->bus, dev->devfn & 0xff);
 
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 0f44f9fe49ac..9e80f0af311f 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -133,6 +133,48 @@ static int pseries_eeh_init(void)
 	return 0;
 }
 
+static int pseries_eeh_cap_start(struct device_node *dn)
+{
+	struct pci_dn *pdn = PCI_DN(dn);
+	u32 status;
+
+	if (!pdn)
+		return 0;
+
+	rtas_read_config(pdn, PCI_STATUS, 2, &status);
+	if (!(status & PCI_STATUS_CAP_LIST))
+		return 0;
+
+	return PCI_CAPABILITY_LIST;
+}
+
+
+static int pseries_eeh_find_cap(struct device_node *dn, int cap)
+{
+	struct pci_dn *pdn = PCI_DN(dn);
+	int pos = pseries_eeh_cap_start(dn);
+	int cnt = 48;	/* Maximal number of capabilities */
+	u32 id;
+
+	if (!pos)
+		return 0;
+
+        while (cnt--) {
+		rtas_read_config(pdn, pos, 1, &pos);
+		if (pos < 0x40)
+			break;
+		pos &= ~3;
+		rtas_read_config(pdn, pos + PCI_CAP_LIST_ID, 1, &id);
+		if (id == 0xff)
+			break;
+		if (id == cap)
+			return pos;
+		pos += PCI_CAP_LIST_NEXT;
+	}
+
+	return 0;
+}
+
 /**
  * pseries_eeh_of_probe - EEH probe on the given device
  * @dn: OF node
@@ -146,8 +188,10 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag)
 {
 	struct eeh_dev *edev;
 	struct eeh_pe pe;
+	struct pci_dn *pdn = PCI_DN(dn);
 	const u32 *class_code, *vendor_id, *device_id;
 	const u32 *regs;
+	u32 pcie_flags;
 	int enable = 0;
 	int ret;
 
@@ -167,9 +211,26 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag)
 	if (dn->type && !strcmp(dn->type, "isa"))
 		return NULL;
 
-	/* Update class code and mode of eeh device */
+	/*
+	 * Update class code and mode of eeh device. We need
+	 * correctly reflects that current device is root port
+	 * or PCIe switch downstream port.
+	 */
 	edev->class_code = *class_code;
+	edev->pcie_cap = pseries_eeh_find_cap(dn, PCI_CAP_ID_EXP);
 	edev->mode = 0;
+	if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) {
+		edev->mode |= EEH_DEV_BRIDGE;
+		if (edev->pcie_cap) {
+			rtas_read_config(pdn, edev->pcie_cap + PCI_EXP_FLAGS,
+					 2, &pcie_flags);
+			pcie_flags = (pcie_flags & PCI_EXP_FLAGS_TYPE) >> 4;
+			if (pcie_flags == PCI_EXP_TYPE_ROOT_PORT)
+				edev->mode |= EEH_DEV_ROOT_PORT;
+			else if (pcie_flags == PCI_EXP_TYPE_DOWNSTREAM)
+				edev->mode |= EEH_DEV_DS_PORT;
+		}
+	}
 
 	/* Retrieve the device address */
 	regs = of_get_property(dn, "reg", NULL);

From 91150af3adf67463c4ca7d72d4fe1a84da37792c Mon Sep 17 00:00:00 2001
From: Gavin Shan <shangw@linux.vnet.ibm.com>
Date: Wed, 24 Jul 2013 10:25:00 +0800
Subject: [PATCH 256/320] powerpc/eeh: Fix unbalanced enable for IRQ

The patch fixes following issue:

Unbalanced enable for IRQ 23
------------[ cut here ]------------
WARNING: at kernel/irq/manage.c:437
:
NIP [c00000000016de8c] .__enable_irq+0x11c/0x140
LR [c00000000016de88] .__enable_irq+0x118/0x140
Call Trace:
[c000003ea1f23880] [c00000000016de88] .__enable_irq+0x118/0x140 (unreliable)
[c000003ea1f23910] [c00000000016df08] .enable_irq+0x58/0xa0
[c000003ea1f239a0] [c0000000000388b4] .eeh_enable_irq+0xc4/0xe0
[c000003ea1f23a30] [c000000000038a28] .eeh_report_reset+0x78/0x130
[c000003ea1f23ac0] [c000000000037508] .eeh_pe_dev_traverse+0x98/0x170
[c000003ea1f23b60] [c0000000000391ac] .eeh_handle_normal_event+0x2fc/0x3d0
[c000003ea1f23bf0] [c000000000039538] .eeh_handle_event+0x2b8/0x2c0
[c000003ea1f23c90] [c000000000039600] .eeh_event_handler+0xc0/0x170
[c000003ea1f23d30] [c0000000000da9a0] .kthread+0xf0/0x100
[c000003ea1f23e30] [c00000000000a1dc] .ret_from_kernel_thread+0x5c/0x80

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/eeh_driver.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 9fda75d1f5aa..36bed5a12750 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -143,10 +143,14 @@ static void eeh_disable_irq(struct pci_dev *dev)
 static void eeh_enable_irq(struct pci_dev *dev)
 {
 	struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
+	struct irq_desc *desc;
 
 	if ((edev->mode) & EEH_DEV_IRQ_DISABLED) {
 		edev->mode &= ~EEH_DEV_IRQ_DISABLED;
-		enable_irq(dev->irq);
+
+		desc = irq_to_desc(dev->irq);
+		if (desc && desc->depth > 0)
+			enable_irq(dev->irq);
 	}
 }
 

From ab55d2187da27414f78056810713c92f9a4350c2 Mon Sep 17 00:00:00 2001
From: Gavin Shan <shangw@linux.vnet.ibm.com>
Date: Wed, 24 Jul 2013 10:25:01 +0800
Subject: [PATCH 257/320] powerpc/eeh: Introdce flag to protect sysfs

The patch introduces flag EEH_DEV_SYSFS to keep track that the sysfs
entries for the corresponding EEH device (then PCI device) has been
added or removed, in order to avoid race condition.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/eeh.h               |  2 ++
 arch/powerpc/kernel/eeh.c                    |  2 ++
 arch/powerpc/kernel/eeh_sysfs.c              | 16 +++++++++++++++-
 arch/powerpc/platforms/powernv/eeh-powernv.c |  4 ++--
 arch/powerpc/platforms/pseries/eeh_pseries.c |  2 +-
 5 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 4199d9943277..d3e5e9bc8f94 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -90,6 +90,8 @@ struct eeh_pe {
 #define EEH_DEV_IRQ_DISABLED	(1 << 3)	/* Interrupt disabled	*/
 #define EEH_DEV_DISCONNECTED	(1 << 4)	/* Removing from PE	*/
 
+#define EEH_DEV_SYSFS		(1 << 8)	/* Sysfs created        */
+
 struct eeh_dev {
 	int mode;			/* EEH mode			*/
 	int class_code;			/* Class code of the device	*/
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index a5783f1a7a96..ea9414c8088d 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -911,6 +911,7 @@ void eeh_add_device_late(struct pci_dev *dev)
 		eeh_rmv_from_parent_pe(edev);
 		eeh_addr_cache_rmv_dev(edev->pdev);
 		eeh_sysfs_remove_device(edev->pdev);
+		edev->mode &= ~EEH_DEV_SYSFS;
 
 		edev->pdev = NULL;
 		dev->dev.archdata.edev = NULL;
@@ -1016,6 +1017,7 @@ void eeh_remove_device(struct pci_dev *dev)
 
 	eeh_addr_cache_rmv_dev(dev);
 	eeh_sysfs_remove_device(dev);
+	edev->mode &= ~EEH_DEV_SYSFS;
 }
 
 static int proc_eeh_show(struct seq_file *m, void *v)
diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c
index 61e2a1452131..5d753d4f2c75 100644
--- a/arch/powerpc/kernel/eeh_sysfs.c
+++ b/arch/powerpc/kernel/eeh_sysfs.c
@@ -56,26 +56,40 @@ EEH_SHOW_ATTR(eeh_pe_config_addr,  pe_config_addr,  "0x%x");
 
 void eeh_sysfs_add_device(struct pci_dev *pdev)
 {
+	struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
 	int rc=0;
 
+	if (edev && (edev->mode & EEH_DEV_SYSFS))
+		return;
+
 	rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode);
 	rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr);
 	rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
 
 	if (rc)
 		printk(KERN_WARNING "EEH: Unable to create sysfs entries\n");
+	else if (edev)
+		edev->mode |= EEH_DEV_SYSFS;
 }
 
 void eeh_sysfs_remove_device(struct pci_dev *pdev)
 {
+	struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
+
 	/*
 	 * The parent directory might have been removed. We needn't
 	 * continue for that case.
 	 */
-	if (!pdev->dev.kobj.sd)
+	if (!pdev->dev.kobj.sd) {
+		if (edev)
+			edev->mode &= ~EEH_DEV_SYSFS;
 		return;
+	}
 
 	device_remove_file(&pdev->dev, &dev_attr_eeh_mode);
 	device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr);
 	device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
+
+	if (edev)
+		edev->mode &= ~EEH_DEV_SYSFS;
 }
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 4361a5c7f3ab..79663d26e6ea 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -122,8 +122,8 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
 		return 0;
 
 	/* Initialize eeh device */
-	edev->class_code	= dev->class;
-	edev->mode		= 0;
+	edev->class_code = dev->class;
+	edev->mode	&= 0xFFFFFF00;
 	if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
 		edev->mode |= EEH_DEV_BRIDGE;
 	if (pci_is_pcie(dev)) {
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 9e80f0af311f..7fbc25b1813f 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -218,7 +218,7 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag)
 	 */
 	edev->class_code = *class_code;
 	edev->pcie_cap = pseries_eeh_find_cap(dn, PCI_CAP_ID_EXP);
-	edev->mode = 0;
+	edev->mode &= 0xFFFFFF00;
 	if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) {
 		edev->mode |= EEH_DEV_BRIDGE;
 		if (edev->pcie_cap) {

From d817468c4b2892b9468e2a0c92116e38a3a61370 Mon Sep 17 00:00:00 2001
From: Sylwester Nawrocki <sylvester.nawrocki@gmail.com>
Date: Wed, 24 Jul 2013 13:23:51 +0900
Subject: [PATCH 258/320] ARM: S3C24XX: Add missing clkdev entries for s3c2440
 UART

This patch restores serial port operation which has been broken since
commit 60e93575476f ("serial: samsung: enable clock before clearing
pending interrupts during init")

That commit only uncovered the real issue which was missing clkdev
entries for the "uart" clocks on S3C2440. It went unnoticed so far
because return value of clk API calls were not being checked at all
in the samsung serial port driver.

This patch should be backported to at least 3.10 stable kernel, since
the serial port has not been working on s3c2440 since 3.10-rc5.

Cc: Chander Kashyap <chander.kashyap@linaro.org>
Signed-off-by: Sylwester Nawrocki <sylvester.nawrocki@gmail.com>
[on S3C2440 SoC based Mini2440 board]
Tested-by: Sylwester Nawrocki <sylvester.nawrocki@gmail.com>
Reviewed-by: Tomasz Figa <t.figa@samsung.com>
Tested-by: Juergen Beisert <jbe@pengutronix.de>
Cc: <stable@vger.kernel.org>	[3.10]
Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
---
 arch/arm/mach-s3c24xx/clock-s3c2410.c      | 161 +++++++++++++--------
 arch/arm/mach-s3c24xx/clock-s3c2440.c      |   3 +
 arch/arm/plat-samsung/include/plat/clock.h |   5 +
 3 files changed, 106 insertions(+), 63 deletions(-)

diff --git a/arch/arm/mach-s3c24xx/clock-s3c2410.c b/arch/arm/mach-s3c24xx/clock-s3c2410.c
index 34fffdf6fc1d..564553694b54 100644
--- a/arch/arm/mach-s3c24xx/clock-s3c2410.c
+++ b/arch/arm/mach-s3c24xx/clock-s3c2410.c
@@ -119,66 +119,101 @@ static struct clk init_clocks_off[] = {
 	}
 };
 
-static struct clk init_clocks[] = {
-	{
-		.name		= "lcd",
-		.parent		= &clk_h,
-		.enable		= s3c2410_clkcon_enable,
-		.ctrlbit	= S3C2410_CLKCON_LCDC,
-	}, {
-		.name		= "gpio",
-		.parent		= &clk_p,
-		.enable		= s3c2410_clkcon_enable,
-		.ctrlbit	= S3C2410_CLKCON_GPIO,
-	}, {
-		.name		= "usb-host",
-		.parent		= &clk_h,
-		.enable		= s3c2410_clkcon_enable,
-		.ctrlbit	= S3C2410_CLKCON_USBH,
-	}, {
-		.name		= "usb-device",
-		.parent		= &clk_h,
-		.enable		= s3c2410_clkcon_enable,
-		.ctrlbit	= S3C2410_CLKCON_USBD,
-	}, {
-		.name		= "timers",
-		.parent		= &clk_p,
-		.enable		= s3c2410_clkcon_enable,
-		.ctrlbit	= S3C2410_CLKCON_PWMT,
-	}, {
-		.name		= "uart",
-		.devname	= "s3c2410-uart.0",
-		.parent		= &clk_p,
-		.enable		= s3c2410_clkcon_enable,
-		.ctrlbit	= S3C2410_CLKCON_UART0,
-	}, {
-		.name		= "uart",
-		.devname	= "s3c2410-uart.1",
-		.parent		= &clk_p,
-		.enable		= s3c2410_clkcon_enable,
-		.ctrlbit	= S3C2410_CLKCON_UART1,
-	}, {
-		.name		= "uart",
-		.devname	= "s3c2410-uart.2",
-		.parent		= &clk_p,
-		.enable		= s3c2410_clkcon_enable,
-		.ctrlbit	= S3C2410_CLKCON_UART2,
-	}, {
-		.name		= "rtc",
-		.parent		= &clk_p,
-		.enable		= s3c2410_clkcon_enable,
-		.ctrlbit	= S3C2410_CLKCON_RTC,
-	}, {
-		.name		= "watchdog",
-		.parent		= &clk_p,
-		.ctrlbit	= 0,
-	}, {
-		.name		= "usb-bus-host",
-		.parent		= &clk_usb_bus,
-	}, {
-		.name		= "usb-bus-gadget",
-		.parent		= &clk_usb_bus,
-	},
+static struct clk clk_lcd = {
+	.name		= "lcd",
+	.parent		= &clk_h,
+	.enable		= s3c2410_clkcon_enable,
+	.ctrlbit	= S3C2410_CLKCON_LCDC,
+};
+
+static struct clk clk_gpio = {
+	.name		= "gpio",
+	.parent		= &clk_p,
+	.enable		= s3c2410_clkcon_enable,
+	.ctrlbit	= S3C2410_CLKCON_GPIO,
+};
+
+static struct clk clk_usb_host = {
+	.name		= "usb-host",
+	.parent		= &clk_h,
+	.enable		= s3c2410_clkcon_enable,
+	.ctrlbit	= S3C2410_CLKCON_USBH,
+};
+
+static struct clk clk_usb_device = {
+	.name		= "usb-device",
+	.parent		= &clk_h,
+	.enable		= s3c2410_clkcon_enable,
+	.ctrlbit	= S3C2410_CLKCON_USBD,
+};
+
+static struct clk clk_timers = {
+	.name		= "timers",
+	.parent		= &clk_p,
+	.enable		= s3c2410_clkcon_enable,
+	.ctrlbit	= S3C2410_CLKCON_PWMT,
+};
+
+struct clk s3c24xx_clk_uart0 = {
+	.name		= "uart",
+	.devname	= "s3c2410-uart.0",
+	.parent		= &clk_p,
+	.enable		= s3c2410_clkcon_enable,
+	.ctrlbit	= S3C2410_CLKCON_UART0,
+};
+
+struct clk s3c24xx_clk_uart1 = {
+	.name		= "uart",
+	.devname	= "s3c2410-uart.1",
+	.parent		= &clk_p,
+	.enable		= s3c2410_clkcon_enable,
+	.ctrlbit	= S3C2410_CLKCON_UART1,
+};
+
+struct clk s3c24xx_clk_uart2 = {
+	.name		= "uart",
+	.devname	= "s3c2410-uart.2",
+	.parent		= &clk_p,
+	.enable		= s3c2410_clkcon_enable,
+	.ctrlbit	= S3C2410_CLKCON_UART2,
+};
+
+static struct clk clk_rtc = {
+	.name		= "rtc",
+	.parent		= &clk_p,
+	.enable		= s3c2410_clkcon_enable,
+	.ctrlbit	= S3C2410_CLKCON_RTC,
+};
+
+static struct clk clk_watchdog = {
+	.name		= "watchdog",
+	.parent		= &clk_p,
+	.ctrlbit	= 0,
+};
+
+static struct clk clk_usb_bus_host = {
+	.name		= "usb-bus-host",
+	.parent		= &clk_usb_bus,
+};
+
+static struct clk clk_usb_bus_gadget = {
+	.name		= "usb-bus-gadget",
+	.parent		= &clk_usb_bus,
+};
+
+static struct clk *init_clocks[] = {
+	&clk_lcd,
+	&clk_gpio,
+	&clk_usb_host,
+	&clk_usb_device,
+	&clk_timers,
+	&s3c24xx_clk_uart0,
+	&s3c24xx_clk_uart1,
+	&s3c24xx_clk_uart2,
+	&clk_rtc,
+	&clk_watchdog,
+	&clk_usb_bus_host,
+	&clk_usb_bus_gadget,
 };
 
 /* s3c2410_baseclk_add()
@@ -195,7 +230,6 @@ int __init s3c2410_baseclk_add(void)
 {
 	unsigned long clkslow = __raw_readl(S3C2410_CLKSLOW);
 	unsigned long clkcon  = __raw_readl(S3C2410_CLKCON);
-	struct clk *clkp;
 	struct clk *xtal;
 	int ret;
 	int ptr;
@@ -207,8 +241,9 @@ int __init s3c2410_baseclk_add(void)
 
 	/* register clocks from clock array */
 
-	clkp = init_clocks;
-	for (ptr = 0; ptr < ARRAY_SIZE(init_clocks); ptr++, clkp++) {
+	for (ptr = 0; ptr < ARRAY_SIZE(init_clocks); ptr++) {
+		struct clk *clkp = init_clocks[ptr];
+
 		/* ensure that we note the clock state */
 
 		clkp->usage = clkcon & clkp->ctrlbit ? 1 : 0;
diff --git a/arch/arm/mach-s3c24xx/clock-s3c2440.c b/arch/arm/mach-s3c24xx/clock-s3c2440.c
index 1069b5680826..aaf006d1d6dc 100644
--- a/arch/arm/mach-s3c24xx/clock-s3c2440.c
+++ b/arch/arm/mach-s3c24xx/clock-s3c2440.c
@@ -166,6 +166,9 @@ static struct clk_lookup s3c2440_clk_lookup[] = {
 	CLKDEV_INIT(NULL, "clk_uart_baud1", &s3c24xx_uclk),
 	CLKDEV_INIT(NULL, "clk_uart_baud2", &clk_p),
 	CLKDEV_INIT(NULL, "clk_uart_baud3", &s3c2440_clk_fclk_n),
+	CLKDEV_INIT("s3c2440-uart.0", "uart", &s3c24xx_clk_uart0),
+	CLKDEV_INIT("s3c2440-uart.1", "uart", &s3c24xx_clk_uart1),
+	CLKDEV_INIT("s3c2440-uart.2", "uart", &s3c24xx_clk_uart2),
 	CLKDEV_INIT("s3c2440-camif", "camera", &s3c2440_clk_cam_upll),
 };
 
diff --git a/arch/arm/plat-samsung/include/plat/clock.h b/arch/arm/plat-samsung/include/plat/clock.h
index a62753dc15ba..df45d6edc98d 100644
--- a/arch/arm/plat-samsung/include/plat/clock.h
+++ b/arch/arm/plat-samsung/include/plat/clock.h
@@ -83,6 +83,11 @@ extern struct clk clk_ext;
 extern struct clksrc_clk clk_epllref;
 extern struct clksrc_clk clk_esysclk;
 
+/* S3C24XX UART clocks */
+extern struct clk s3c24xx_clk_uart0;
+extern struct clk s3c24xx_clk_uart1;
+extern struct clk s3c24xx_clk_uart2;
+
 /* S3C64XX specific clocks */
 extern struct clk clk_h2;
 extern struct clk clk_27m;

From ad92c615975a57c4b206c5c99f77a0bf22b373c4 Mon Sep 17 00:00:00 2001
From: Denis Kirjanov <kda@linux-powerpc.org>
Date: Tue, 23 Jul 2013 15:28:03 +0400
Subject: [PATCH 259/320] powerpc/pseries: Fix a typo in
 pSeries_lpar_hpte_insert()

Commit 801eb73f45371accc78ca9d6d22d647eeb722c11 introduced
a bug while checking PTE flags. We have to drop the _PAGE_COHERENT flag
when __PAGE_NO_CACHE is set and the cache update policy is not write-through
(i.e. _PAGE_WRITETHRU is not set)

Signed-off-by: Denis Kirjanov <kda@linux-powerpc.org>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
CC:  Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/pseries/lpar.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 02d6e21619bb..8bad880bd177 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -146,7 +146,7 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
 	flags = 0;
 
 	/* Make pHyp happy */
-	if ((rflags & _PAGE_NO_CACHE) & !(rflags & _PAGE_WRITETHRU))
+	if ((rflags & _PAGE_NO_CACHE) && !(rflags & _PAGE_WRITETHRU))
 		hpte_r &= ~_PAGE_COHERENT;
 	if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N))
 		flags |= H_COALESCE_CAND;

From 83383b73ad5fbf52ea4b77aed900a927875f2529 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Wed, 3 Jul 2013 13:50:03 +0530
Subject: [PATCH 260/320] powerpc/mm: Fix fallthrough bug in hpte_decode

We should not fallthrough different case statements in hpte_decode. Add
break statement to break out of the switch. The regression is introduced by
dcda287a9b26309ae43a091d0ecde16f8f61b4c0 "powerpc/mm: Simplify hpte_decode"

Reported-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/mm/hash_native_64.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 3f0c30ae4791..0530ff78c023 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -554,6 +554,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
 			seg_off |= vpi << shift;
 		}
 		*vpn = vsid << (SID_SHIFT - VPN_SHIFT) | seg_off >> VPN_SHIFT;
+		break;
 	case MMU_SEGSIZE_1T:
 		/* We only have 40 - 23 bits of seg_off in avpn */
 		seg_off = (avpn & 0x1ffff) << 23;
@@ -563,6 +564,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
 			seg_off |= vpi << shift;
 		}
 		*vpn = vsid << (SID_SHIFT_1T - VPN_SHIFT) | seg_off >> VPN_SHIFT;
+		break;
 	default:
 		*vpn = size = 0;
 	}

From de640959b63ad437c21f2a217f25332c4ea863cb Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Thu, 4 Jul 2013 10:34:45 +0530
Subject: [PATCH 261/320] powerpc/mm: Use the correct SLB(LLP) encoding in
 tlbie instruction

The sllp value is stored in mmu_psize_defs in such a way that we can easily OR
the value to get the operand for slbmte instruction. ie, the L and LP bits are
not contiguous. Decode the bits and use them correctly in tlbie.
regression is introduced by 1f6aaaccb1b3af8613fe45781c1aefee2ae8c6b3
"powerpc: Update tlbie/tlbiel as per ISA doc"

Reported-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/mm/hash_native_64.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 0530ff78c023..c33d939120c9 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -43,6 +43,7 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
 {
 	unsigned long va;
 	unsigned int penc;
+	unsigned long sllp;
 
 	/*
 	 * We need 14 to 65 bits of va for a tlibe of 4K page
@@ -64,7 +65,9 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
 		/* clear out bits after (52) [0....52.....63] */
 		va &= ~((1ul << (64 - 52)) - 1);
 		va |= ssize << 8;
-		va |= mmu_psize_defs[apsize].sllp << 6;
+		sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) |
+			((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4);
+		va |= sllp << 5;
 		asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
 			     : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
 			     : "memory");
@@ -98,6 +101,7 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
 {
 	unsigned long va;
 	unsigned int penc;
+	unsigned long sllp;
 
 	/* VPN_SHIFT can be atmost 12 */
 	va = vpn << VPN_SHIFT;
@@ -113,7 +117,9 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
 		/* clear out bits after(52) [0....52.....63] */
 		va &= ~((1ul << (64 - 52)) - 1);
 		va |= ssize << 8;
-		va |= mmu_psize_defs[apsize].sllp << 6;
+		sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) |
+			((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4);
+		va |= sllp << 5;
 		asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
 			     : : "r"(va) : "memory");
 		break;

From 679750054ae8129a536734bccf526ff6da35376a Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Tue, 2 Jul 2013 12:20:03 -0600
Subject: [PATCH 262/320] powerpc/powernv: Mark pnv_pci_init_ioda2_phb() as
 __init

Mark pnv_pci_init_ioda2_phb() as __init.  It is called only from an
init function (pnv_pci_init()), and it calls an init function
(pnv_pci_init_ioda_phb()):

    pnv_pci_init                # init
      pnv_pci_init_ioda2_phb    # non-init
	pnv_pci_init_ioda_phb   # init

This should fix a section mismatch warning.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 49b57b9f835d..d8140b125e62 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1266,7 +1266,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np,
 		opal_pci_set_pe(phb_id, 0, 0, 7, 1, 1 , OPAL_MAP_PE);
 }
 
-void pnv_pci_init_ioda2_phb(struct device_node *np)
+void __init pnv_pci_init_ioda2_phb(struct device_node *np)
 {
 	pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2);
 }

From 7689bdcab1dca061c4c91f0e1703cef1b7b67e71 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Date: Mon, 10 Jun 2013 11:23:28 +0530
Subject: [PATCH 263/320] powerpc/perf: Ignore separate BHRB privilege state
 filter request

Completely ignore BHRB privilege state filter request as we are
already configuring that with privilege state filtering attribute
for the accompanying PMU event. This would help achieve cleaner
user space interaction for BHRB.

This patch fixes a situation like this

Before patch:-
------------
./perf record -j any -e branch-misses:k ls
Error:
The sys_perf_event_open() syscall returned with 95 (Operation not
supported) for event (branch-misses:k).
/bin/dmesg may provide additional information.
No CONFIG_PERF_EVENTS=y kernel support configured?

Here 'perf record' actually copies over ':k' filter request into BHRB
privilege state filter config and our previous check in kernel would
fail that.

After patch:-
-------------
./perf record -j any -e branch-misses:k ls
perf  perf.data  perf.data.old  test-mmap-ring
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.002 MB perf.data (~102 samples)]

Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Acked-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/perf/power8-pmu.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index 09def196f5c4..7466374d2787 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -561,18 +561,13 @@ static int power8_generic_events[] = {
 static u64 power8_bhrb_filter_map(u64 branch_sample_type)
 {
 	u64 pmu_bhrb_filter = 0;
-	u64 br_privilege = branch_sample_type & ONLY_PLM;
 
-	/* BHRB and regular PMU events share the same prvillege state
+	/* BHRB and regular PMU events share the same privilege state
 	 * filter configuration. BHRB is always recorded along with a
-	 * regular PMU event. So privilege state filter criteria for BHRB
-	 * and the companion PMU events has to be the same. As a default
-	 * "perf record" tool sets all privillege bits ON when no filter
-	 * criteria is provided in the command line. So as along as all
-	 * privillege bits are ON or they are OFF, we are good to go.
+	 * regular PMU event. As the privilege state filter is handled
+	 * in the basic PMC configuration of the accompanying regular
+	 * PMU event, we ignore any separate BHRB specific request.
 	 */
-	if ((br_privilege != 7) && (br_privilege != 0))
-		return -1;
 
 	/* No branch filter requested */
 	if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY)

From ff3d79dc12c2ed38483f6c1e0f26fde430f27c9d Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Date: Mon, 10 Jun 2013 11:23:29 +0530
Subject: [PATCH 264/320] powerpc/perf: BHRB filter configuration should follow
 the task

When the task moves around the system, the corresponding cpuhw
per cpu strcuture should be popullated with the BHRB filter
request value so that PMU could be configured appropriately with
that during the next call into power_pmu_enable().

Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Acked-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/perf/core-book3s.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index a3985aee77fe..24a45f91c65f 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -1252,8 +1252,11 @@ static int power_pmu_add(struct perf_event *event, int ef_flags)
 
 	ret = 0;
  out:
-	if (has_branch_stack(event))
+	if (has_branch_stack(event)) {
 		power_pmu_bhrb_enable(event);
+		cpuhw->bhrb_filter = ppmu->bhrb_filter_map(
+					event->attr.branch_sample_type);
+	}
 
 	perf_pmu_enable(event->pmu);
 	local_irq_restore(flags);

From 89693016e372983d720c186c0d00b29414b58804 Mon Sep 17 00:00:00 2001
From: Amit Daniel Kachhap <amit.daniel@samsung.com>
Date: Wed, 24 Jul 2013 14:06:13 +0900
Subject: [PATCH 265/320] ARM: SAMSUNG: Add SAMSUNG_PM config option to select
 pm

This patch enables the selection of samsung pm related stuffs
when SAMSUNG_PM config is enabled and not just when generic PM
config is enabled. Power management for s3c64XX and s3c24XX
is enabled by default and for other platform depends on S5P_PM.
This patch also fixes the following compilation error's when compiling
a platform like exynos5440 which does not select pm stuffs.

arch/arm/mach-exynos/built-in.o: In function '__virt_to_phys':
linux/arch/arm/include/asm/memory.h:175: undefined reference to 's3c_cpu_resume'
linux/arch/arm/include/asm/memory.h:175: undefined reference to 's3c_cpu_resume'
linux/arch/arm/include/asm/memory.h:175: undefined reference to 's3c_cpu_resume'
linux/arch/arm/include/asm/memory.h:175: undefined reference to 's3c_cpu_resume'
arch/arm/mach-exynos/built-in.o: In function 'exynos5_init_irq':
linux/arch/arm/mach-exynos/common.c:492: undefined reference to 's3c_irq_wake'
linux/arch/arm/mach-exynos/common.c:492: undefined reference to 's3c_irq_wake'
arch/arm/mach-exynos/built-in.o: In function 'exynos4_init_irq':
linux/arch/arm/mach-exynos/common.c:476: undefined reference to 's3c_irq_wake'
linux/arch/arm/mach-exynos/common.c:476: undefined reference to 's3c_irq_wake'
arch/arm/plat-samsung/built-in.o: In function 's3c_irqext_wake':
linux/arch/arm/plat-samsung/pm.c:144: undefined reference to 's3c_irqwake_eintallow'
linux/arch/arm/plat-samsung/pm.c:144: undefined reference to 's3c_irqwake_eintallow'
arch/arm/plat-samsung/built-in.o: In function 's3c_pm_enter':
linux/arch/arm/plat-samsung/pm.c:263: undefined reference to 's3c_irqwake_intallow'
linux/arch/arm/plat-samsung/pm.c:263: undefined reference to 's3c_irqwake_intallow'
linux/arch/arm/plat-samsung/pm.c:264: undefined reference to 's3c_irqwake_eintallow'
linux/arch/arm/plat-samsung/pm.c:264: undefined reference to 's3c_irqwake_eintallow'
linux/arch/arm/plat-samsung/pm.c:275: undefined reference to 's3c_pm_save_core'
linux/arch/arm/plat-samsung/pm.c:279: undefined reference to 's3c_pm_configure_extint'
linux/arch/arm/plat-samsung/pm.c:310: undefined reference to 's3c_pm_restore_core'
make: *** [vmlinux] Error 1

Signed-off-by: Amit Daniel Kachhap <amit.daniel@samsung.com>
Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
---
 arch/arm/mach-exynos/Makefile           | 2 +-
 arch/arm/mach-exynos/common.h           | 1 -
 arch/arm/mach-exynos/cpuidle.c          | 1 +
 arch/arm/plat-samsung/Kconfig           | 7 +++++++
 arch/arm/plat-samsung/Makefile          | 2 +-
 arch/arm/plat-samsung/include/plat/pm.h | 8 ++++----
 6 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/arch/arm/mach-exynos/Makefile b/arch/arm/mach-exynos/Makefile
index e970a7a4e278..53696154aead 100644
--- a/arch/arm/mach-exynos/Makefile
+++ b/arch/arm/mach-exynos/Makefile
@@ -14,7 +14,7 @@ obj-				:=
 
 obj-$(CONFIG_ARCH_EXYNOS)	+= common.o
 
-obj-$(CONFIG_PM)		+= pm.o
+obj-$(CONFIG_S5P_PM)		+= pm.o
 obj-$(CONFIG_PM_GENERIC_DOMAINS) += pm_domains.o
 obj-$(CONFIG_CPU_IDLE)		+= cpuidle.o
 
diff --git a/arch/arm/mach-exynos/common.h b/arch/arm/mach-exynos/common.h
index 3e156bcddcb4..972490fc09d6 100644
--- a/arch/arm/mach-exynos/common.h
+++ b/arch/arm/mach-exynos/common.h
@@ -97,6 +97,5 @@ struct exynos_pmu_conf {
 };
 
 extern void exynos_sys_powerdown_conf(enum sys_powerdown mode);
-extern void s3c_cpu_resume(void);
 
 #endif /* __ARCH_ARM_MACH_EXYNOS_COMMON_H */
diff --git a/arch/arm/mach-exynos/cpuidle.c b/arch/arm/mach-exynos/cpuidle.c
index 17a18ff3d71e..225ee8431c72 100644
--- a/arch/arm/mach-exynos/cpuidle.c
+++ b/arch/arm/mach-exynos/cpuidle.c
@@ -25,6 +25,7 @@
 #include <mach/regs-pmu.h>
 
 #include <plat/cpu.h>
+#include <plat/pm.h>
 
 #include "common.h"
 
diff --git a/arch/arm/plat-samsung/Kconfig b/arch/arm/plat-samsung/Kconfig
index 3dc5cbea86cc..a5b5ff6e68d2 100644
--- a/arch/arm/plat-samsung/Kconfig
+++ b/arch/arm/plat-samsung/Kconfig
@@ -29,6 +29,13 @@ config PLAT_S5P
 	help
 	  Base platform code for Samsung's S5P series SoC.
 
+config SAMSUNG_PM
+	bool
+	depends on PM && (PLAT_S3C24XX || ARCH_S3C64XX || ARCH_S5P64X0 || S5P_PM)
+	default y
+	help
+	  Base platform power management code for samsung code
+
 if PLAT_SAMSUNG
 
 # boot configurations
diff --git a/arch/arm/plat-samsung/Makefile b/arch/arm/plat-samsung/Makefile
index 98d07d8fc7a7..199bbe304d02 100644
--- a/arch/arm/plat-samsung/Makefile
+++ b/arch/arm/plat-samsung/Makefile
@@ -51,7 +51,7 @@ obj-$(CONFIG_SAMSUNG_DMADEV)	+= dma-ops.o
 
 # PM support
 
-obj-$(CONFIG_PM)		+= pm.o
+obj-$(CONFIG_SAMSUNG_PM)	+= pm.o
 obj-$(CONFIG_SAMSUNG_PM_GPIO)	+= pm-gpio.o
 obj-$(CONFIG_SAMSUNG_PM_CHECK)	+= pm-check.o
 
diff --git a/arch/arm/plat-samsung/include/plat/pm.h b/arch/arm/plat-samsung/include/plat/pm.h
index 5d47ca35cabd..6bc1a8f471e3 100644
--- a/arch/arm/plat-samsung/include/plat/pm.h
+++ b/arch/arm/plat-samsung/include/plat/pm.h
@@ -19,7 +19,7 @@
 
 struct device;
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_SAMSUNG_PM
 
 extern __init int s3c_pm_init(void);
 extern __init int s3c64xx_pm_init(void);
@@ -58,8 +58,6 @@ extern unsigned char pm_uart_udivslot;  /* true to save UART UDIVSLOT */
 
 /* from sleep.S */
 
-extern void s3c_cpu_resume(void);
-
 extern int s3c2410_cpu_suspend(unsigned long);
 
 /* sleep save info */
@@ -106,12 +104,14 @@ extern void s3c_pm_do_save(struct sleep_save *ptr, int count);
 extern void s3c_pm_do_restore(struct sleep_save *ptr, int count);
 extern void s3c_pm_do_restore_core(struct sleep_save *ptr, int count);
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_SAMSUNG_PM
 extern int s3c_irq_wake(struct irq_data *data, unsigned int state);
 extern int s3c_irqext_wake(struct irq_data *data, unsigned int state);
+extern void s3c_cpu_resume(void);
 #else
 #define s3c_irq_wake NULL
 #define s3c_irqext_wake NULL
+#define s3c_cpu_resume NULL
 #endif
 
 /* PM debug functions */

From e70308ec0e4bff344fcfdf160de40e1150552c5f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 24 Jul 2013 17:04:16 +1000
Subject: [PATCH 266/320] Revert "crypto: crct10dif - Wrap crc_t10dif function
 all to use crypto transform framework"

This reverts commits
    67822649d7305caf3dd50ed46c27b99c94eff996
    39761214eefc6b070f29402aa1165f24d789b3f7
    0b95a7f85718adcbba36407ef88bba0a7379ed03
    31d939625a9a20b1badd2d4e6bf6fd39fa523405
    2d31e518a42828df7877bca23a958627d60408bc

Unfortunately this change broke boot on some systems that used an
initrd which does not include the newly created crct10dif modules.
As these modules are required by sd_mod under certain configurations
this is a serious problem.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/Makefile                |   2 -
 arch/x86/crypto/crct10dif-pcl-asm_64.S  | 643 ------------------------
 arch/x86/crypto/crct10dif-pclmul_glue.c | 151 ------
 crypto/Kconfig                          |  19 -
 crypto/Makefile                         |   1 -
 crypto/crct10dif.c                      | 178 -------
 crypto/tcrypt.c                         |   8 -
 crypto/testmgr.c                        |  10 -
 crypto/testmgr.h                        |  33 --
 include/linux/crc-t10dif.h              |   4 -
 lib/Kconfig                             |   2 -
 lib/crc-t10dif.c                        |  73 +--
 12 files changed, 43 insertions(+), 1081 deletions(-)
 delete mode 100644 arch/x86/crypto/crct10dif-pcl-asm_64.S
 delete mode 100644 arch/x86/crypto/crct10dif-pclmul_glue.c
 delete mode 100644 crypto/crct10dif.c

diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 7d6ba9db1be9..6c63c358a7e6 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -27,7 +27,6 @@ obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o
 obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o
 obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o
 obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
-obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o
 
 # These modules require assembler to support AVX.
 ifeq ($(avx_supported),yes)
@@ -82,4 +81,3 @@ crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o
 crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o
 sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o
 sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o
-crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o
diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S
deleted file mode 100644
index 35e97569d05f..000000000000
--- a/arch/x86/crypto/crct10dif-pcl-asm_64.S
+++ /dev/null
@@ -1,643 +0,0 @@
-########################################################################
-# Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions
-#
-# Copyright (c) 2013, Intel Corporation
-#
-# Authors:
-#     Erdinc Ozturk <erdinc.ozturk@intel.com>
-#     Vinodh Gopal <vinodh.gopal@intel.com>
-#     James Guilford <james.guilford@intel.com>
-#     Tim Chen <tim.c.chen@linux.intel.com>
-#
-# This software is available to you under a choice of one of two
-# licenses.  You may choose to be licensed under the terms of the GNU
-# General Public License (GPL) Version 2, available from the file
-# COPYING in the main directory of this source tree, or the
-# OpenIB.org BSD license below:
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-# * Redistributions of source code must retain the above copyright
-#   notice, this list of conditions and the following disclaimer.
-#
-# * Redistributions in binary form must reproduce the above copyright
-#   notice, this list of conditions and the following disclaimer in the
-#   documentation and/or other materials provided with the
-#   distribution.
-#
-# * Neither the name of the Intel Corporation nor the names of its
-#   contributors may be used to endorse or promote products derived from
-#   this software without specific prior written permission.
-#
-#
-# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-########################################################################
-#       Function API:
-#       UINT16 crc_t10dif_pcl(
-#               UINT16 init_crc, //initial CRC value, 16 bits
-#               const unsigned char *buf, //buffer pointer to calculate CRC on
-#               UINT64 len //buffer length in bytes (64-bit data)
-#       );
-#
-#       Reference paper titled "Fast CRC Computation for Generic
-#	Polynomials Using PCLMULQDQ Instruction"
-#       URL: http://www.intel.com/content/dam/www/public/us/en/documents
-#  /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
-#
-#
-
-#include <linux/linkage.h>
-
-.text
-
-#define        arg1 %rdi
-#define        arg2 %rsi
-#define        arg3 %rdx
-
-#define        arg1_low32 %edi
-
-ENTRY(crc_t10dif_pcl)
-.align 16
-
-	# adjust the 16-bit initial_crc value, scale it to 32 bits
-	shl	$16, arg1_low32
-
-	# Allocate Stack Space
-	mov     %rsp, %rcx
-	sub	$16*2, %rsp
-	# align stack to 16 byte boundary
-	and     $~(0x10 - 1), %rsp
-
-	# check if smaller than 256
-	cmp	$256, arg3
-
-	# for sizes less than 128, we can't fold 64B at a time...
-	jl	_less_than_128
-
-
-	# load the initial crc value
-	movd	arg1_low32, %xmm10	# initial crc
-
-	# crc value does not need to be byte-reflected, but it needs
-	# to be moved to the high part of the register.
-	# because data will be byte-reflected and will align with
-	# initial crc at correct place.
-	pslldq	$12, %xmm10
-
-	movdqa  SHUF_MASK(%rip), %xmm11
-	# receive the initial 64B data, xor the initial crc value
-	movdqu	16*0(arg2), %xmm0
-	movdqu	16*1(arg2), %xmm1
-	movdqu	16*2(arg2), %xmm2
-	movdqu	16*3(arg2), %xmm3
-	movdqu	16*4(arg2), %xmm4
-	movdqu	16*5(arg2), %xmm5
-	movdqu	16*6(arg2), %xmm6
-	movdqu	16*7(arg2), %xmm7
-
-	pshufb	%xmm11, %xmm0
-	# XOR the initial_crc value
-	pxor	%xmm10, %xmm0
-	pshufb	%xmm11, %xmm1
-	pshufb	%xmm11, %xmm2
-	pshufb	%xmm11, %xmm3
-	pshufb	%xmm11, %xmm4
-	pshufb	%xmm11, %xmm5
-	pshufb	%xmm11, %xmm6
-	pshufb	%xmm11, %xmm7
-
-	movdqa	rk3(%rip), %xmm10	#xmm10 has rk3 and rk4
-					#imm value of pclmulqdq instruction
-					#will determine which constant to use
-
-	#################################################################
-	# we subtract 256 instead of 128 to save one instruction from the loop
-	sub	$256, arg3
-
-	# at this section of the code, there is 64*x+y (0<=y<64) bytes of
-	# buffer. The _fold_64_B_loop will fold 64B at a time
-	# until we have 64+y Bytes of buffer
-
-
-	# fold 64B at a time. This section of the code folds 4 xmm
-	# registers in parallel
-_fold_64_B_loop:
-
-	# update the buffer pointer
-	add	$128, arg2		#    buf += 64#
-
-	movdqu	16*0(arg2), %xmm9
-	movdqu	16*1(arg2), %xmm12
-	pshufb	%xmm11, %xmm9
-	pshufb	%xmm11, %xmm12
-	movdqa	%xmm0, %xmm8
-	movdqa	%xmm1, %xmm13
-	pclmulqdq	$0x0 , %xmm10, %xmm0
-	pclmulqdq	$0x11, %xmm10, %xmm8
-	pclmulqdq	$0x0 , %xmm10, %xmm1
-	pclmulqdq	$0x11, %xmm10, %xmm13
-	pxor	%xmm9 , %xmm0
-	xorps	%xmm8 , %xmm0
-	pxor	%xmm12, %xmm1
-	xorps	%xmm13, %xmm1
-
-	movdqu	16*2(arg2), %xmm9
-	movdqu	16*3(arg2), %xmm12
-	pshufb	%xmm11, %xmm9
-	pshufb	%xmm11, %xmm12
-	movdqa	%xmm2, %xmm8
-	movdqa	%xmm3, %xmm13
-	pclmulqdq	$0x0, %xmm10, %xmm2
-	pclmulqdq	$0x11, %xmm10, %xmm8
-	pclmulqdq	$0x0, %xmm10, %xmm3
-	pclmulqdq	$0x11, %xmm10, %xmm13
-	pxor	%xmm9 , %xmm2
-	xorps	%xmm8 , %xmm2
-	pxor	%xmm12, %xmm3
-	xorps	%xmm13, %xmm3
-
-	movdqu	16*4(arg2), %xmm9
-	movdqu	16*5(arg2), %xmm12
-	pshufb	%xmm11, %xmm9
-	pshufb	%xmm11, %xmm12
-	movdqa	%xmm4, %xmm8
-	movdqa	%xmm5, %xmm13
-	pclmulqdq	$0x0,  %xmm10, %xmm4
-	pclmulqdq	$0x11, %xmm10, %xmm8
-	pclmulqdq	$0x0,  %xmm10, %xmm5
-	pclmulqdq	$0x11, %xmm10, %xmm13
-	pxor	%xmm9 ,  %xmm4
-	xorps	%xmm8 ,  %xmm4
-	pxor	%xmm12,  %xmm5
-	xorps	%xmm13,  %xmm5
-
-	movdqu	16*6(arg2), %xmm9
-	movdqu	16*7(arg2), %xmm12
-	pshufb	%xmm11, %xmm9
-	pshufb	%xmm11, %xmm12
-	movdqa	%xmm6 , %xmm8
-	movdqa	%xmm7 , %xmm13
-	pclmulqdq	$0x0 , %xmm10, %xmm6
-	pclmulqdq	$0x11, %xmm10, %xmm8
-	pclmulqdq	$0x0 , %xmm10, %xmm7
-	pclmulqdq	$0x11, %xmm10, %xmm13
-	pxor	%xmm9 , %xmm6
-	xorps	%xmm8 , %xmm6
-	pxor	%xmm12, %xmm7
-	xorps	%xmm13, %xmm7
-
-	sub	$128, arg3
-
-	# check if there is another 64B in the buffer to be able to fold
-	jge	_fold_64_B_loop
-	##################################################################
-
-
-	add	$128, arg2
-	# at this point, the buffer pointer is pointing at the last y Bytes
-	# of the buffer the 64B of folded data is in 4 of the xmm
-	# registers: xmm0, xmm1, xmm2, xmm3
-
-
-	# fold the 8 xmm registers to 1 xmm register with different constants
-
-	movdqa	rk9(%rip), %xmm10
-	movdqa	%xmm0, %xmm8
-	pclmulqdq	$0x11, %xmm10, %xmm0
-	pclmulqdq	$0x0 , %xmm10, %xmm8
-	pxor	%xmm8, %xmm7
-	xorps	%xmm0, %xmm7
-
-	movdqa	rk11(%rip), %xmm10
-	movdqa	%xmm1, %xmm8
-	pclmulqdq	 $0x11, %xmm10, %xmm1
-	pclmulqdq	 $0x0 , %xmm10, %xmm8
-	pxor	%xmm8, %xmm7
-	xorps	%xmm1, %xmm7
-
-	movdqa	rk13(%rip), %xmm10
-	movdqa	%xmm2, %xmm8
-	pclmulqdq	 $0x11, %xmm10, %xmm2
-	pclmulqdq	 $0x0 , %xmm10, %xmm8
-	pxor	%xmm8, %xmm7
-	pxor	%xmm2, %xmm7
-
-	movdqa	rk15(%rip), %xmm10
-	movdqa	%xmm3, %xmm8
-	pclmulqdq	$0x11, %xmm10, %xmm3
-	pclmulqdq	$0x0 , %xmm10, %xmm8
-	pxor	%xmm8, %xmm7
-	xorps	%xmm3, %xmm7
-
-	movdqa	rk17(%rip), %xmm10
-	movdqa	%xmm4, %xmm8
-	pclmulqdq	$0x11, %xmm10, %xmm4
-	pclmulqdq	$0x0 , %xmm10, %xmm8
-	pxor	%xmm8, %xmm7
-	pxor	%xmm4, %xmm7
-
-	movdqa	rk19(%rip), %xmm10
-	movdqa	%xmm5, %xmm8
-	pclmulqdq	$0x11, %xmm10, %xmm5
-	pclmulqdq	$0x0 , %xmm10, %xmm8
-	pxor	%xmm8, %xmm7
-	xorps	%xmm5, %xmm7
-
-	movdqa	rk1(%rip), %xmm10	#xmm10 has rk1 and rk2
-					#imm value of pclmulqdq instruction
-					#will determine which constant to use
-	movdqa	%xmm6, %xmm8
-	pclmulqdq	$0x11, %xmm10, %xmm6
-	pclmulqdq	$0x0 , %xmm10, %xmm8
-	pxor	%xmm8, %xmm7
-	pxor	%xmm6, %xmm7
-
-
-	# instead of 64, we add 48 to the loop counter to save 1 instruction
-	# from the loop instead of a cmp instruction, we use the negative
-	# flag with the jl instruction
-	add	$128-16, arg3
-	jl	_final_reduction_for_128
-
-	# now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7
-	# and the rest is in memory. We can fold 16 bytes at a time if y>=16
-	# continue folding 16B at a time
-
-_16B_reduction_loop:
-	movdqa	%xmm7, %xmm8
-	pclmulqdq	$0x11, %xmm10, %xmm7
-	pclmulqdq	$0x0 , %xmm10, %xmm8
-	pxor	%xmm8, %xmm7
-	movdqu	(arg2), %xmm0
-	pshufb	%xmm11, %xmm0
-	pxor	%xmm0 , %xmm7
-	add	$16, arg2
-	sub	$16, arg3
-	# instead of a cmp instruction, we utilize the flags with the
-	# jge instruction equivalent of: cmp arg3, 16-16
-	# check if there is any more 16B in the buffer to be able to fold
-	jge	_16B_reduction_loop
-
-	#now we have 16+z bytes left to reduce, where 0<= z < 16.
-	#first, we reduce the data in the xmm7 register
-
-
-_final_reduction_for_128:
-	# check if any more data to fold. If not, compute the CRC of
-	# the final 128 bits
-	add	$16, arg3
-	je	_128_done
-
-	# here we are getting data that is less than 16 bytes.
-	# since we know that there was data before the pointer, we can
-	# offset the input pointer before the actual point, to receive
-	# exactly 16 bytes. after that the registers need to be adjusted.
-_get_last_two_xmms:
-	movdqa	%xmm7, %xmm2
-
-	movdqu	-16(arg2, arg3), %xmm1
-	pshufb	%xmm11, %xmm1
-
-	# get rid of the extra data that was loaded before
-	# load the shift constant
-	lea	pshufb_shf_table+16(%rip), %rax
-	sub	arg3, %rax
-	movdqu	(%rax), %xmm0
-
-	# shift xmm2 to the left by arg3 bytes
-	pshufb	%xmm0, %xmm2
-
-	# shift xmm7 to the right by 16-arg3 bytes
-	pxor	mask1(%rip), %xmm0
-	pshufb	%xmm0, %xmm7
-	pblendvb	%xmm2, %xmm1	#xmm0 is implicit
-
-	# fold 16 Bytes
-	movdqa	%xmm1, %xmm2
-	movdqa	%xmm7, %xmm8
-	pclmulqdq	$0x11, %xmm10, %xmm7
-	pclmulqdq	$0x0 , %xmm10, %xmm8
-	pxor	%xmm8, %xmm7
-	pxor	%xmm2, %xmm7
-
-_128_done:
-	# compute crc of a 128-bit value
-	movdqa	rk5(%rip), %xmm10	# rk5 and rk6 in xmm10
-	movdqa	%xmm7, %xmm0
-
-	#64b fold
-	pclmulqdq	$0x1, %xmm10, %xmm7
-	pslldq	$8   ,  %xmm0
-	pxor	%xmm0,  %xmm7
-
-	#32b fold
-	movdqa	%xmm7, %xmm0
-
-	pand	mask2(%rip), %xmm0
-
-	psrldq	$12, %xmm7
-	pclmulqdq	$0x10, %xmm10, %xmm7
-	pxor	%xmm0, %xmm7
-
-	#barrett reduction
-_barrett:
-	movdqa	rk7(%rip), %xmm10	# rk7 and rk8 in xmm10
-	movdqa	%xmm7, %xmm0
-	pclmulqdq	$0x01, %xmm10, %xmm7
-	pslldq	$4, %xmm7
-	pclmulqdq	$0x11, %xmm10, %xmm7
-
-	pslldq	$4, %xmm7
-	pxor	%xmm0, %xmm7
-	pextrd	$1, %xmm7, %eax
-
-_cleanup:
-	# scale the result back to 16 bits
-	shr	$16, %eax
-	mov     %rcx, %rsp
-	ret
-
-########################################################################
-
-.align 16
-_less_than_128:
-
-	# check if there is enough buffer to be able to fold 16B at a time
-	cmp	$32, arg3
-	jl	_less_than_32
-	movdqa  SHUF_MASK(%rip), %xmm11
-
-	# now if there is, load the constants
-	movdqa	rk1(%rip), %xmm10	# rk1 and rk2 in xmm10
-
-	movd	arg1_low32, %xmm0	# get the initial crc value
-	pslldq	$12, %xmm0	# align it to its correct place
-	movdqu	(arg2), %xmm7	# load the plaintext
-	pshufb	%xmm11, %xmm7	# byte-reflect the plaintext
-	pxor	%xmm0, %xmm7
-
-
-	# update the buffer pointer
-	add	$16, arg2
-
-	# update the counter. subtract 32 instead of 16 to save one
-	# instruction from the loop
-	sub	$32, arg3
-
-	jmp	_16B_reduction_loop
-
-
-.align 16
-_less_than_32:
-	# mov initial crc to the return value. this is necessary for
-	# zero-length buffers.
-	mov	arg1_low32, %eax
-	test	arg3, arg3
-	je	_cleanup
-
-	movdqa  SHUF_MASK(%rip), %xmm11
-
-	movd	arg1_low32, %xmm0	# get the initial crc value
-	pslldq	$12, %xmm0	# align it to its correct place
-
-	cmp	$16, arg3
-	je	_exact_16_left
-	jl	_less_than_16_left
-
-	movdqu	(arg2), %xmm7	# load the plaintext
-	pshufb	%xmm11, %xmm7	# byte-reflect the plaintext
-	pxor	%xmm0 , %xmm7	# xor the initial crc value
-	add	$16, arg2
-	sub	$16, arg3
-	movdqa	rk1(%rip), %xmm10	# rk1 and rk2 in xmm10
-	jmp	_get_last_two_xmms
-
-
-.align 16
-_less_than_16_left:
-	# use stack space to load data less than 16 bytes, zero-out
-	# the 16B in memory first.
-
-	pxor	%xmm1, %xmm1
-	mov	%rsp, %r11
-	movdqa	%xmm1, (%r11)
-
-	cmp	$4, arg3
-	jl	_only_less_than_4
-
-	# backup the counter value
-	mov	arg3, %r9
-	cmp	$8, arg3
-	jl	_less_than_8_left
-
-	# load 8 Bytes
-	mov	(arg2), %rax
-	mov	%rax, (%r11)
-	add	$8, %r11
-	sub	$8, arg3
-	add	$8, arg2
-_less_than_8_left:
-
-	cmp	$4, arg3
-	jl	_less_than_4_left
-
-	# load 4 Bytes
-	mov	(arg2), %eax
-	mov	%eax, (%r11)
-	add	$4, %r11
-	sub	$4, arg3
-	add	$4, arg2
-_less_than_4_left:
-
-	cmp	$2, arg3
-	jl	_less_than_2_left
-
-	# load 2 Bytes
-	mov	(arg2), %ax
-	mov	%ax, (%r11)
-	add	$2, %r11
-	sub	$2, arg3
-	add	$2, arg2
-_less_than_2_left:
-	cmp     $1, arg3
-        jl      _zero_left
-
-	# load 1 Byte
-	mov	(arg2), %al
-	mov	%al, (%r11)
-_zero_left:
-	movdqa	(%rsp), %xmm7
-	pshufb	%xmm11, %xmm7
-	pxor	%xmm0 , %xmm7	# xor the initial crc value
-
-	# shl r9, 4
-	lea	pshufb_shf_table+16(%rip), %rax
-	sub	%r9, %rax
-	movdqu	(%rax), %xmm0
-	pxor	mask1(%rip), %xmm0
-
-	pshufb	%xmm0, %xmm7
-	jmp	_128_done
-
-.align 16
-_exact_16_left:
-	movdqu	(arg2), %xmm7
-	pshufb	%xmm11, %xmm7
-	pxor	%xmm0 , %xmm7   # xor the initial crc value
-
-	jmp	_128_done
-
-_only_less_than_4:
-	cmp	$3, arg3
-	jl	_only_less_than_3
-
-	# load 3 Bytes
-	mov	(arg2), %al
-	mov	%al, (%r11)
-
-	mov	1(arg2), %al
-	mov	%al, 1(%r11)
-
-	mov	2(arg2), %al
-	mov	%al, 2(%r11)
-
-	movdqa	 (%rsp), %xmm7
-	pshufb	 %xmm11, %xmm7
-	pxor	 %xmm0 , %xmm7  # xor the initial crc value
-
-	psrldq	$5, %xmm7
-
-	jmp	_barrett
-_only_less_than_3:
-	cmp	$2, arg3
-	jl	_only_less_than_2
-
-	# load 2 Bytes
-	mov	(arg2), %al
-	mov	%al, (%r11)
-
-	mov	1(arg2), %al
-	mov	%al, 1(%r11)
-
-	movdqa	(%rsp), %xmm7
-	pshufb	%xmm11, %xmm7
-	pxor	%xmm0 , %xmm7   # xor the initial crc value
-
-	psrldq	$6, %xmm7
-
-	jmp	_barrett
-_only_less_than_2:
-
-	# load 1 Byte
-	mov	(arg2), %al
-	mov	%al, (%r11)
-
-	movdqa	(%rsp), %xmm7
-	pshufb	%xmm11, %xmm7
-	pxor	%xmm0 , %xmm7   # xor the initial crc value
-
-	psrldq	$7, %xmm7
-
-	jmp	_barrett
-
-ENDPROC(crc_t10dif_pcl)
-
-.data
-
-# precomputed constants
-# these constants are precomputed from the poly:
-# 0x8bb70000 (0x8bb7 scaled to 32 bits)
-.align 16
-# Q = 0x18BB70000
-# rk1 = 2^(32*3) mod Q << 32
-# rk2 = 2^(32*5) mod Q << 32
-# rk3 = 2^(32*15) mod Q << 32
-# rk4 = 2^(32*17) mod Q << 32
-# rk5 = 2^(32*3) mod Q << 32
-# rk6 = 2^(32*2) mod Q << 32
-# rk7 = floor(2^64/Q)
-# rk8 = Q
-rk1:
-.quad 0x2d56000000000000
-rk2:
-.quad 0x06df000000000000
-rk3:
-.quad 0x9d9d000000000000
-rk4:
-.quad 0x7cf5000000000000
-rk5:
-.quad 0x2d56000000000000
-rk6:
-.quad 0x1368000000000000
-rk7:
-.quad 0x00000001f65a57f8
-rk8:
-.quad 0x000000018bb70000
-
-rk9:
-.quad 0xceae000000000000
-rk10:
-.quad 0xbfd6000000000000
-rk11:
-.quad 0x1e16000000000000
-rk12:
-.quad 0x713c000000000000
-rk13:
-.quad 0xf7f9000000000000
-rk14:
-.quad 0x80a6000000000000
-rk15:
-.quad 0x044c000000000000
-rk16:
-.quad 0xe658000000000000
-rk17:
-.quad 0xad18000000000000
-rk18:
-.quad 0xa497000000000000
-rk19:
-.quad 0x6ee3000000000000
-rk20:
-.quad 0xe7b5000000000000
-
-
-
-mask1:
-.octa 0x80808080808080808080808080808080
-mask2:
-.octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF
-
-SHUF_MASK:
-.octa 0x000102030405060708090A0B0C0D0E0F
-
-pshufb_shf_table:
-# use these values for shift constants for the pshufb instruction
-# different alignments result in values as shown:
-#	DDQ 0x008f8e8d8c8b8a898887868584838281 # shl 15 (16-1) / shr1
-#	DDQ 0x01008f8e8d8c8b8a8988878685848382 # shl 14 (16-3) / shr2
-#	DDQ 0x0201008f8e8d8c8b8a89888786858483 # shl 13 (16-4) / shr3
-#	DDQ 0x030201008f8e8d8c8b8a898887868584 # shl 12 (16-4) / shr4
-#	DDQ 0x04030201008f8e8d8c8b8a8988878685 # shl 11 (16-5) / shr5
-#	DDQ 0x0504030201008f8e8d8c8b8a89888786 # shl 10 (16-6) / shr6
-#	DDQ 0x060504030201008f8e8d8c8b8a898887 # shl 9  (16-7) / shr7
-#	DDQ 0x07060504030201008f8e8d8c8b8a8988 # shl 8  (16-8) / shr8
-#	DDQ 0x0807060504030201008f8e8d8c8b8a89 # shl 7  (16-9) / shr9
-#	DDQ 0x090807060504030201008f8e8d8c8b8a # shl 6  (16-10) / shr10
-#	DDQ 0x0a090807060504030201008f8e8d8c8b # shl 5  (16-11) / shr11
-#	DDQ 0x0b0a090807060504030201008f8e8d8c # shl 4  (16-12) / shr12
-#	DDQ 0x0c0b0a090807060504030201008f8e8d # shl 3  (16-13) / shr13
-#	DDQ 0x0d0c0b0a090807060504030201008f8e # shl 2  (16-14) / shr14
-#	DDQ 0x0e0d0c0b0a090807060504030201008f # shl 1  (16-15) / shr15
-.octa 0x8f8e8d8c8b8a89888786858483828100
-.octa 0x000e0d0c0b0a09080706050403020100
diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c
deleted file mode 100644
index 7845d7fd54c0..000000000000
--- a/arch/x86/crypto/crct10dif-pclmul_glue.c
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * Cryptographic API.
- *
- * T10 Data Integrity Field CRC16 Crypto Transform using PCLMULQDQ Instructions
- *
- * Copyright (C) 2013 Intel Corporation
- * Author: Tim Chen <tim.c.chen@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/crc-t10dif.h>
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <asm/i387.h>
-#include <asm/cpufeature.h>
-#include <asm/cpu_device_id.h>
-
-asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf,
-				size_t len);
-
-struct chksum_desc_ctx {
-	__u16 crc;
-};
-
-/*
- * Steps through buffer one byte at at time, calculates reflected
- * crc using table.
- */
-
-static int chksum_init(struct shash_desc *desc)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	ctx->crc = 0;
-
-	return 0;
-}
-
-static int chksum_update(struct shash_desc *desc, const u8 *data,
-			 unsigned int length)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	if (irq_fpu_usable()) {
-		kernel_fpu_begin();
-		ctx->crc = crc_t10dif_pcl(ctx->crc, data, length);
-		kernel_fpu_end();
-	} else
-		ctx->crc = crc_t10dif_generic(ctx->crc, data, length);
-	return 0;
-}
-
-static int chksum_final(struct shash_desc *desc, u8 *out)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	*(__u16 *)out = ctx->crc;
-	return 0;
-}
-
-static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len,
-			u8 *out)
-{
-	if (irq_fpu_usable()) {
-		kernel_fpu_begin();
-		*(__u16 *)out = crc_t10dif_pcl(*crcp, data, len);
-		kernel_fpu_end();
-	} else
-		*(__u16 *)out = crc_t10dif_generic(*crcp, data, len);
-	return 0;
-}
-
-static int chksum_finup(struct shash_desc *desc, const u8 *data,
-			unsigned int len, u8 *out)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	return __chksum_finup(&ctx->crc, data, len, out);
-}
-
-static int chksum_digest(struct shash_desc *desc, const u8 *data,
-			 unsigned int length, u8 *out)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	return __chksum_finup(&ctx->crc, data, length, out);
-}
-
-static struct shash_alg alg = {
-	.digestsize		=	CRC_T10DIF_DIGEST_SIZE,
-	.init		=	chksum_init,
-	.update		=	chksum_update,
-	.final		=	chksum_final,
-	.finup		=	chksum_finup,
-	.digest		=	chksum_digest,
-	.descsize		=	sizeof(struct chksum_desc_ctx),
-	.base			=	{
-		.cra_name		=	"crct10dif",
-		.cra_driver_name	=	"crct10dif-pclmul",
-		.cra_priority		=	200,
-		.cra_blocksize		=	CRC_T10DIF_BLOCK_SIZE,
-		.cra_module		=	THIS_MODULE,
-	}
-};
-
-static const struct x86_cpu_id crct10dif_cpu_id[] = {
-	X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ),
-	{}
-};
-MODULE_DEVICE_TABLE(x86cpu, crct10dif_cpu_id);
-
-static int __init crct10dif_intel_mod_init(void)
-{
-	if (!x86_match_cpu(crct10dif_cpu_id))
-		return -ENODEV;
-
-	return crypto_register_shash(&alg);
-}
-
-static void __exit crct10dif_intel_mod_fini(void)
-{
-	crypto_unregister_shash(&alg);
-}
-
-module_init(crct10dif_intel_mod_init);
-module_exit(crct10dif_intel_mod_fini);
-
-MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>");
-MODULE_DESCRIPTION("T10 DIF CRC calculation accelerated with PCLMULQDQ.");
-MODULE_LICENSE("GPL");
-
-MODULE_ALIAS("crct10dif");
-MODULE_ALIAS("crct10dif-pclmul");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 904ffe838567..2754f2bf5d91 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -376,25 +376,6 @@ config CRYPTO_CRC32_PCLMUL
 	  which will enable any routine to use the CRC-32-IEEE 802.3 checksum
 	  and gain better performance as compared with the table implementation.
 
-config CRYPTO_CRCT10DIF
-	tristate "CRCT10DIF algorithm"
-	select CRYPTO_HASH
-	help
-	  CRC T10 Data Integrity Field computation is being cast as
-	  a crypto transform.  This allows for faster crc t10 diff
-	  transforms to be used if they are available.
-
-config CRYPTO_CRCT10DIF_PCLMUL
-	tristate "CRCT10DIF PCLMULQDQ hardware acceleration"
-	depends on X86 && 64BIT && CRC_T10DIF
-	select CRYPTO_HASH
-	help
-	  For x86_64 processors with SSE4.2 and PCLMULQDQ supported,
-	  CRC T10 DIF PCLMULQDQ computation can be hardware
-	  accelerated PCLMULQDQ instruction. This option will create
-	  'crct10dif-plcmul' module, which is faster when computing the
-	  crct10dif checksum as compared with the generic table implementation.
-
 config CRYPTO_GHASH
 	tristate "GHASH digest algorithm"
 	select CRYPTO_GF128MUL
diff --git a/crypto/Makefile b/crypto/Makefile
index 62af87df8729..a8e9b0fefbe9 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -83,7 +83,6 @@ obj-$(CONFIG_CRYPTO_ZLIB) += zlib.o
 obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o
 obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o
 obj-$(CONFIG_CRYPTO_CRC32) += crc32.o
-obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif.o
 obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o
 obj-$(CONFIG_CRYPTO_LZO) += lzo.o
 obj-$(CONFIG_CRYPTO_842) += 842.o
diff --git a/crypto/crct10dif.c b/crypto/crct10dif.c
deleted file mode 100644
index 92aca96d6b98..000000000000
--- a/crypto/crct10dif.c
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * Cryptographic API.
- *
- * T10 Data Integrity Field CRC16 Crypto Transform
- *
- * Copyright (c) 2007 Oracle Corporation.  All rights reserved.
- * Written by Martin K. Petersen <martin.petersen@oracle.com>
- * Copyright (C) 2013 Intel Corporation
- * Author: Tim Chen <tim.c.chen@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/crc-t10dif.h>
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-
-struct chksum_desc_ctx {
-	__u16 crc;
-};
-
-/* Table generated using the following polynomium:
- * x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1
- * gt: 0x8bb7
- */
-static const __u16 t10_dif_crc_table[256] = {
-	0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B,
-	0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6,
-	0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6,
-	0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B,
-	0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1,
-	0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C,
-	0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C,
-	0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781,
-	0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8,
-	0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255,
-	0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925,
-	0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698,
-	0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472,
-	0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF,
-	0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF,
-	0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02,
-	0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA,
-	0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067,
-	0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17,
-	0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA,
-	0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640,
-	0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD,
-	0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D,
-	0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30,
-	0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759,
-	0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4,
-	0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394,
-	0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29,
-	0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3,
-	0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E,
-	0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E,
-	0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3
-};
-
-__u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len)
-{
-	unsigned int i;
-
-	for (i = 0 ; i < len ; i++)
-		crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff];
-
-	return crc;
-}
-EXPORT_SYMBOL(crc_t10dif_generic);
-
-/*
- * Steps through buffer one byte at at time, calculates reflected
- * crc using table.
- */
-
-static int chksum_init(struct shash_desc *desc)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	ctx->crc = 0;
-
-	return 0;
-}
-
-static int chksum_update(struct shash_desc *desc, const u8 *data,
-			 unsigned int length)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	ctx->crc = crc_t10dif_generic(ctx->crc, data, length);
-	return 0;
-}
-
-static int chksum_final(struct shash_desc *desc, u8 *out)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	*(__u16 *)out = ctx->crc;
-	return 0;
-}
-
-static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len,
-			u8 *out)
-{
-	*(__u16 *)out = crc_t10dif_generic(*crcp, data, len);
-	return 0;
-}
-
-static int chksum_finup(struct shash_desc *desc, const u8 *data,
-			unsigned int len, u8 *out)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	return __chksum_finup(&ctx->crc, data, len, out);
-}
-
-static int chksum_digest(struct shash_desc *desc, const u8 *data,
-			 unsigned int length, u8 *out)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	return __chksum_finup(&ctx->crc, data, length, out);
-}
-
-static struct shash_alg alg = {
-	.digestsize		=	CRC_T10DIF_DIGEST_SIZE,
-	.init		=	chksum_init,
-	.update		=	chksum_update,
-	.final		=	chksum_final,
-	.finup		=	chksum_finup,
-	.digest		=	chksum_digest,
-	.descsize		=	sizeof(struct chksum_desc_ctx),
-	.base			=	{
-		.cra_name		=	"crct10dif",
-		.cra_driver_name	=	"crct10dif-generic",
-		.cra_priority		=	100,
-		.cra_blocksize		=	CRC_T10DIF_BLOCK_SIZE,
-		.cra_module		=	THIS_MODULE,
-	}
-};
-
-static int __init crct10dif_mod_init(void)
-{
-	int ret;
-
-	ret = crypto_register_shash(&alg);
-	return ret;
-}
-
-static void __exit crct10dif_mod_fini(void)
-{
-	crypto_unregister_shash(&alg);
-}
-
-module_init(crct10dif_mod_init);
-module_exit(crct10dif_mod_fini);
-
-MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>");
-MODULE_DESCRIPTION("T10 DIF CRC calculation.");
-MODULE_LICENSE("GPL");
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 25a5934f0e50..66d254ce0d11 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -1174,10 +1174,6 @@ static int do_test(int m)
 		ret += tcrypt_test("ghash");
 		break;
 
-	case 47:
-		ret += tcrypt_test("crct10dif");
-		break;
-
 	case 100:
 		ret += tcrypt_test("hmac(md5)");
 		break;
@@ -1502,10 +1498,6 @@ static int do_test(int m)
 		test_hash_speed("crc32c", sec, generic_hash_speed_template);
 		if (mode > 300 && mode < 400) break;
 
-	case 320:
-		test_hash_speed("crct10dif", sec, generic_hash_speed_template);
-		if (mode > 300 && mode < 400) break;
-
 	case 399:
 		break;
 
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 2f00607039e2..ecddf921a9db 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -2045,16 +2045,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 				.count = CRC32C_TEST_VECTORS
 			}
 		}
-	}, {
-		.alg = "crct10dif",
-		.test = alg_test_hash,
-		.fips_allowed = 1,
-		.suite = {
-			.hash = {
-				.vecs = crct10dif_tv_template,
-				.count = CRCT10DIF_TEST_VECTORS
-			}
-		}
 	}, {
 		.alg = "cryptd(__driver-cbc-aes-aesni)",
 		.test = alg_test_null,
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 7d44aa3d6b44..1e701bc075b9 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -450,39 +450,6 @@ static struct hash_testvec rmd320_tv_template[] = {
 	}
 };
 
-#define CRCT10DIF_TEST_VECTORS	3
-static struct hash_testvec crct10dif_tv_template[] = {
-	{
-		.plaintext = "abc",
-		.psize  = 3,
-#ifdef __LITTLE_ENDIAN
-		.digest = "\x3b\x44",
-#else
-		.digest = "\x44\x3b",
-#endif
-	}, {
-		.plaintext = "1234567890123456789012345678901234567890"
-			     "123456789012345678901234567890123456789",
-		.psize	= 79,
-#ifdef __LITTLE_ENDIAN
-		.digest	= "\x70\x4b",
-#else
-		.digest	= "\x4b\x70",
-#endif
-	}, {
-		.plaintext =
-		"abcddddddddddddddddddddddddddddddddddddddddddddddddddddd",
-		.psize  = 56,
-#ifdef __LITTLE_ENDIAN
-		.digest = "\xe3\x9c",
-#else
-		.digest = "\x9c\xe3",
-#endif
-		.np     = 2,
-		.tap    = { 28, 28 }
-	}
-};
-
 /*
  * SHA1 test vectors  from from FIPS PUB 180-1
  * Long vector from CAVS 5.0
diff --git a/include/linux/crc-t10dif.h b/include/linux/crc-t10dif.h
index b3cb71f0d3b0..a9c96d865ee7 100644
--- a/include/linux/crc-t10dif.h
+++ b/include/linux/crc-t10dif.h
@@ -3,10 +3,6 @@
 
 #include <linux/types.h>
 
-#define CRC_T10DIF_DIGEST_SIZE 2
-#define CRC_T10DIF_BLOCK_SIZE 1
-
-__u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len);
 __u16 crc_t10dif(unsigned char const *, size_t);
 
 #endif
diff --git a/lib/Kconfig b/lib/Kconfig
index 339d5a4292f8..fe01d418b09a 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -63,8 +63,6 @@ config CRC16
 
 config CRC_T10DIF
 	tristate "CRC calculation for the T10 Data Integrity Field"
-	select CRYPTO
-	select CRYPTO_CRCT10DIF
 	help
 	  This option is only needed if a module that's not in the
 	  kernel tree needs to calculate CRC checks for use with the
diff --git a/lib/crc-t10dif.c b/lib/crc-t10dif.c
index fe3428c07b47..fbbd66ed86cd 100644
--- a/lib/crc-t10dif.c
+++ b/lib/crc-t10dif.c
@@ -11,44 +11,57 @@
 #include <linux/types.h>
 #include <linux/module.h>
 #include <linux/crc-t10dif.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <crypto/hash.h>
 
-static struct crypto_shash *crct10dif_tfm;
+/* Table generated using the following polynomium:
+ * x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1
+ * gt: 0x8bb7
+ */
+static const __u16 t10_dif_crc_table[256] = {
+	0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B,
+	0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6,
+	0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6,
+	0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B,
+	0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1,
+	0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C,
+	0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C,
+	0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781,
+	0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8,
+	0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255,
+	0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925,
+	0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698,
+	0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472,
+	0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF,
+	0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF,
+	0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02,
+	0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA,
+	0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067,
+	0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17,
+	0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA,
+	0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640,
+	0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD,
+	0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D,
+	0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30,
+	0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759,
+	0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4,
+	0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394,
+	0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29,
+	0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3,
+	0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E,
+	0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E,
+	0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3
+};
 
 __u16 crc_t10dif(const unsigned char *buffer, size_t len)
 {
-	struct {
-		struct shash_desc shash;
-		char ctx[2];
-	} desc;
-	int err;
+	__u16 crc = 0;
+	unsigned int i;
 
-	desc.shash.tfm = crct10dif_tfm;
-	desc.shash.flags = 0;
-	*(__u16 *)desc.ctx = 0;
+	for (i = 0 ; i < len ; i++)
+		crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff];
 
-	err = crypto_shash_update(&desc.shash, buffer, len);
-	BUG_ON(err);
-
-	return *(__u16 *)desc.ctx;
+	return crc;
 }
 EXPORT_SYMBOL(crc_t10dif);
 
-static int __init crc_t10dif_mod_init(void)
-{
-	crct10dif_tfm = crypto_alloc_shash("crct10dif", 0, 0);
-	return PTR_RET(crct10dif_tfm);
-}
-
-static void __exit crc_t10dif_mod_fini(void)
-{
-	crypto_free_shash(crct10dif_tfm);
-}
-
-module_init(crc_t10dif_mod_init);
-module_exit(crc_t10dif_mod_fini);
-
 MODULE_DESCRIPTION("T10 DIF CRC calculation");
 MODULE_LICENSE("GPL");

From 7bdc84fb82627f34d9e13c37926f70fcde38e4c4 Mon Sep 17 00:00:00 2001
From: Yadwinder Singh Brar <yadi.brar@samsung.com>
Date: Wed, 24 Jul 2013 17:05:07 +0900
Subject: [PATCH 267/320] ARM: SAMSUNG: Save/restore only selected uart's
 registers

Basically this code gets executed only during debugging i.e when
DEBUG_LL & SAMSUNG_PM_DEBUG is on, so required only for UART used
for debugging. Since we are removing static iodesc entries for UARTs,
so now only the selected (CONFIG_DEBUG_S3C_UART) UART will be
ioremapped by the debug_ll_io_init() for DEBUG_LL, so save/restore
uart registers only for selected uart.

Signed-off-by: Yadwinder Singh Brar <yadi.brar@samsung.com>
Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
---
 arch/arm/plat-samsung/pm.c | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/arch/arm/plat-samsung/pm.c b/arch/arm/plat-samsung/pm.c
index ea3613642451..d0c23010b693 100644
--- a/arch/arm/plat-samsung/pm.c
+++ b/arch/arm/plat-samsung/pm.c
@@ -80,7 +80,7 @@ unsigned char pm_uart_udivslot;
 
 #ifdef CONFIG_SAMSUNG_PM_DEBUG
 
-static struct pm_uart_save uart_save[CONFIG_SERIAL_SAMSUNG_UARTS];
+static struct pm_uart_save uart_save;
 
 static void s3c_pm_save_uart(unsigned int uart, struct pm_uart_save *save)
 {
@@ -101,11 +101,7 @@ static void s3c_pm_save_uart(unsigned int uart, struct pm_uart_save *save)
 
 static void s3c_pm_save_uarts(void)
 {
-	struct pm_uart_save *save = uart_save;
-	unsigned int uart;
-
-	for (uart = 0; uart < CONFIG_SERIAL_SAMSUNG_UARTS; uart++, save++)
-		s3c_pm_save_uart(uart, save);
+	s3c_pm_save_uart(CONFIG_DEBUG_S3C_UART, &uart_save);
 }
 
 static void s3c_pm_restore_uart(unsigned int uart, struct pm_uart_save *save)
@@ -126,11 +122,7 @@ static void s3c_pm_restore_uart(unsigned int uart, struct pm_uart_save *save)
 
 static void s3c_pm_restore_uarts(void)
 {
-	struct pm_uart_save *save = uart_save;
-	unsigned int uart;
-
-	for (uart = 0; uart < CONFIG_SERIAL_SAMSUNG_UARTS; uart++, save++)
-		s3c_pm_restore_uart(uart, save);
+	s3c_pm_restore_uart(CONFIG_DEBUG_S3C_UART, &uart_save);
 }
 #else
 static void s3c_pm_save_uarts(void) { }

From 7ed76e089a645a12c53bc35936574f710aa11549 Mon Sep 17 00:00:00 2001
From: Yadwinder Singh Brar <yadi.brar@samsung.com>
Date: Wed, 24 Jul 2013 17:05:32 +0900
Subject: [PATCH 268/320] ARM: EXYNOS: Fix low level debug support

Presently, using exynos_defconfig with CONFIG_DEBUG_LL and CONFIG_EARLY_PRIN
on, kernel is not booting, we are getting following:

[    0.000000] ------------[ cut here ]------------
[    0.000000] kernel BUG at mm/vmalloc.c:1134!
[    0.000000] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP ARM
[    0.000000] Modules linked in:
[    0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 3.11.0-rc1 #633
[    0.000000] task: c052ec48 ti: c0524000 task.ti: c0524000
[    0.000000] PC is at vm_area_add_early+0x54/0x94
[    0.000000] LR is at add_static_vm_early+0xc/0x60

Its because exynos[4/5]_map_io() function ioremaps a single 512KB memory
size for all the four uart ports which envelopes the mapping created by
debug_ll_io_init(), called earlier in exynos_init_io().

This patch removes iodesc entries for UART controller for all Samsung SoC's,
since now the Samsung uart driver does a ioremap during probe and any needed
iomapping for earlyprintk will be handled by debug_ll_io_init().

Tested on smdk4412 and smdk5250.

Signed-off-by: Yadwinder Singh Brar <yadi.brar@samsung.com>
Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
---
 arch/arm/mach-exynos/common.c | 26 --------------------------
 1 file changed, 26 deletions(-)

diff --git a/arch/arm/mach-exynos/common.c b/arch/arm/mach-exynos/common.c
index 164685bd25c8..ba95e5db2501 100644
--- a/arch/arm/mach-exynos/common.c
+++ b/arch/arm/mach-exynos/common.c
@@ -58,7 +58,6 @@ static const char name_exynos5440[] = "EXYNOS5440";
 
 static void exynos4_map_io(void);
 static void exynos5_map_io(void);
-static void exynos5440_map_io(void);
 static int exynos_init(void);
 
 static struct cpu_table cpu_ids[] __initdata = {
@@ -95,7 +94,6 @@ static struct cpu_table cpu_ids[] __initdata = {
 	}, {
 		.idcode		= EXYNOS5440_SOC_ID,
 		.idmask		= EXYNOS5_SOC_MASK,
-		.map_io		= exynos5440_map_io,
 		.init		= exynos_init,
 		.name		= name_exynos5440,
 	},
@@ -149,11 +147,6 @@ static struct map_desc exynos4_iodesc[] __initdata = {
 		.pfn		= __phys_to_pfn(EXYNOS4_PA_GIC_DIST),
 		.length		= SZ_64K,
 		.type		= MT_DEVICE,
-	}, {
-		.virtual	= (unsigned long)S3C_VA_UART,
-		.pfn		= __phys_to_pfn(EXYNOS4_PA_UART),
-		.length		= SZ_512K,
-		.type		= MT_DEVICE,
 	}, {
 		.virtual	= (unsigned long)S5P_VA_CMU,
 		.pfn		= __phys_to_pfn(EXYNOS4_PA_CMU),
@@ -268,20 +261,6 @@ static struct map_desc exynos5_iodesc[] __initdata = {
 		.pfn		= __phys_to_pfn(EXYNOS5_PA_PMU),
 		.length		= SZ_64K,
 		.type		= MT_DEVICE,
-	}, {
-		.virtual	= (unsigned long)S3C_VA_UART,
-		.pfn		= __phys_to_pfn(EXYNOS5_PA_UART),
-		.length		= SZ_512K,
-		.type		= MT_DEVICE,
-	},
-};
-
-static struct map_desc exynos5440_iodesc0[] __initdata = {
-	{
-		.virtual	= (unsigned long)S3C_VA_UART,
-		.pfn		= __phys_to_pfn(EXYNOS5440_PA_UART0),
-		.length		= SZ_512K,
-		.type		= MT_DEVICE,
 	},
 };
 
@@ -388,11 +367,6 @@ static void __init exynos5_map_io(void)
 		iotable_init(exynos5250_iodesc, ARRAY_SIZE(exynos5250_iodesc));
 }
 
-static void __init exynos5440_map_io(void)
-{
-	iotable_init(exynos5440_iodesc0, ARRAY_SIZE(exynos5440_iodesc0));
-}
-
 void __init exynos_init_time(void)
 {
 	of_clk_init(NULL);

From 53302bf60d2c95ee42118008499a5f5de544fcc1 Mon Sep 17 00:00:00 2001
From: Sachin Kamat <sachin.kamat@linaro.org>
Date: Wed, 24 Jul 2013 18:55:27 +0900
Subject: [PATCH 269/320] ARM: EXYNOS: Update CONFIG_ARCH_NR_GPIO for Exynos

With the recent cleanup in Exynos platform code notably commits
17859bec ("ARM: EXYNOS: Do not select legacy Kconfig symbols any
more") and b9222210 ("ARM: EXYNOS: Remove mach/gpio.h"), the definition
of ARCH_NR_GPIOS got removed. This started causing problems on SoCs like
Exynos4412 which have more than the default number of GPIOs. Thus define
this number in KConfig file which takes care of current SoC requirements
and provides scope for GPIO expanders. Without this patch we get the
following errors during boot:

gpiochip_add: gpios 251..258 (gpv0) failed to register
samsung-pinctrl 106e0000.pinctrl: failed to register gpio_chip gpv0, error code: -22
samsung-pinctrl: probe of 106e0000.pinctrl failed with error -22

Signed-off-by: Sachin Kamat <sachin.kamat@linaro.org>
Cc: Tomasz Figa <t.figa@samsung.com>
Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
---
 arch/arm/Kconfig | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index ba412e02ec0c..37c0f4e978d4 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1600,8 +1600,7 @@ config LOCAL_TIMERS
 config ARCH_NR_GPIO
 	int
 	default 1024 if ARCH_SHMOBILE || ARCH_TEGRA
-	default 512 if SOC_OMAP5
-	default 512 if ARCH_KEYSTONE
+	default 512 if ARCH_EXYNOS || ARCH_KEYSTONE || SOC_OMAP5
 	default 392 if ARCH_U8500
 	default 352 if ARCH_VT8500
 	default 288 if ARCH_SUNXI

From 56a678344273fd63f8ade26876283a2586a9bf3a Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Wed, 24 Jul 2013 15:27:35 +0200
Subject: [PATCH 270/320] ASoC: dapm: Fix return value of
 snd_soc_dapm_put_{volsw,enum_virt}()

The ALSA core expect the put callback of a control to return 1 if the value of
the control changed and 0 if it did not. Both snd_soc_dapm_put_volsw() and
snd_soc_dapm_put_enum_virt() currently always returns 0. For both functions we
already have a 'change' variable which either contains 1 or 0 depending on
whether the value has changed or not, so just return that.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 sound/soc/soc-dapm.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index b94190820e8c..bd16010441cc 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -2733,7 +2733,7 @@ int snd_soc_dapm_put_volsw(struct snd_kcontrol *kcontrol,
 	}
 
 	mutex_unlock(&card->dapm_mutex);
-	return 0;
+	return change;
 }
 EXPORT_SYMBOL_GPL(snd_soc_dapm_put_volsw);
 
@@ -2861,7 +2861,6 @@ int snd_soc_dapm_put_enum_virt(struct snd_kcontrol *kcontrol,
 	struct soc_enum *e =
 		(struct soc_enum *)kcontrol->private_value;
 	int change;
-	int ret = 0;
 	int wi;
 
 	if (ucontrol->value.enumerated.item[0] >= e->max)
@@ -2881,7 +2880,7 @@ int snd_soc_dapm_put_enum_virt(struct snd_kcontrol *kcontrol,
 	}
 
 	mutex_unlock(&card->dapm_mutex);
-	return ret;
+	return change;
 }
 EXPORT_SYMBOL_GPL(snd_soc_dapm_put_enum_virt);
 

From 649e9c70da6bfbeb563193a35d3424a5aa7c0d38 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 23 Jul 2013 17:25:54 +0200
Subject: [PATCH 271/320] tracing: Introduce trace_create_cpu_file() and
 tracing_get_cpu()

Every "file_operations" used by tracing_init_debugfs_percpu is buggy.
f_op->open/etc does:

	1. struct trace_cpu *tc = inode->i_private;
	   struct trace_array *tr = tc->tr;

	2. trace_array_get(tr) or fail;

	3. do_something(tc);

But tc (and tr) can be already freed before trace_array_get() is called.
And it doesn't matter whether this file is per-cpu or it was created by
init_tracer_debugfs(), free_percpu() or kfree() are equally bad.

Note that even 1. is not safe, the freed memory can be unmapped. But even
if it was safe trace_array_get() can wrongly succeed if we also race with
the next new_instance_create() which can re-allocate the same tr, or tc
was overwritten and ->tr points to the valid tr. In this case 3. uses the
freed/reused memory.

Add the new trivial helper, trace_create_cpu_file() which simply calls
trace_create_file() and encodes "cpu" in "struct inode". Another helper,
tracing_get_cpu() will be used to read cpu_nr-or-RING_BUFFER_ALL_CPUS.

The patch abuses ->i_cdev to encode the number, it is never used unless
the file is S_ISCHR(). But we could use something else, say, i_bytes or
even ->d_fsdata. In any case this hack is hidden inside these 2 helpers,
it would be trivial to change them if needed.

This patch only changes tracing_init_debugfs_percpu() to use the new
trace_create_cpu_file(), the next patches will change file_operations.

Note: tracing_get_cpu(inode) is always safe but you can't trust the
result unless trace_array_get() was called, without trace_types_lock
which acts as a barrier it can wrongly return RING_BUFFER_ALL_CPUS.

Link: http://lkml.kernel.org/r/20130723152554.GA23710@redhat.com

Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 50 +++++++++++++++++++++++++++++++-------------
 1 file changed, 36 insertions(+), 14 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 3f2477713aca..cfff63c2148a 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2843,6 +2843,17 @@ static int s_show(struct seq_file *m, void *v)
 	return 0;
 }
 
+/*
+ * Should be used after trace_array_get(), trace_types_lock
+ * ensures that i_cdev was already initialized.
+ */
+static inline int tracing_get_cpu(struct inode *inode)
+{
+	if (inode->i_cdev) /* See trace_create_cpu_file() */
+		return (long)inode->i_cdev - 1;
+	return RING_BUFFER_ALL_CPUS;
+}
+
 static const struct seq_operations tracer_seq_ops = {
 	.start		= s_start,
 	.next		= s_next,
@@ -5529,6 +5540,17 @@ static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
 	return tr->percpu_dir;
 }
 
+static struct dentry *
+trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
+		      void *data, long cpu, const struct file_operations *fops)
+{
+	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
+
+	if (ret) /* See tracing_get_cpu() */
+		ret->d_inode->i_cdev = (void *)(cpu + 1);
+	return ret;
+}
+
 static void
 tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
 {
@@ -5548,28 +5570,28 @@ tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
 	}
 
 	/* per cpu trace_pipe */
-	trace_create_file("trace_pipe", 0444, d_cpu,
-			(void *)&data->trace_cpu, &tracing_pipe_fops);
+	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
+				&data->trace_cpu, cpu, &tracing_pipe_fops);
 
 	/* per cpu trace */
-	trace_create_file("trace", 0644, d_cpu,
-			(void *)&data->trace_cpu, &tracing_fops);
+	trace_create_cpu_file("trace", 0644, d_cpu,
+				&data->trace_cpu, cpu, &tracing_fops);
 
-	trace_create_file("trace_pipe_raw", 0444, d_cpu,
-			(void *)&data->trace_cpu, &tracing_buffers_fops);
+	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
+				&data->trace_cpu, cpu, &tracing_buffers_fops);
 
-	trace_create_file("stats", 0444, d_cpu,
-			(void *)&data->trace_cpu, &tracing_stats_fops);
+	trace_create_cpu_file("stats", 0444, d_cpu,
+				&data->trace_cpu, cpu, &tracing_stats_fops);
 
-	trace_create_file("buffer_size_kb", 0444, d_cpu,
-			(void *)&data->trace_cpu, &tracing_entries_fops);
+	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
+				&data->trace_cpu, cpu, &tracing_entries_fops);
 
 #ifdef CONFIG_TRACER_SNAPSHOT
-	trace_create_file("snapshot", 0644, d_cpu,
-			  (void *)&data->trace_cpu, &snapshot_fops);
+	trace_create_cpu_file("snapshot", 0644, d_cpu,
+				&data->trace_cpu, cpu, &snapshot_fops);
 
-	trace_create_file("snapshot_raw", 0444, d_cpu,
-			(void *)&data->trace_cpu, &snapshot_raw_fops);
+	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
+				&data->trace_cpu, cpu, &snapshot_raw_fops);
 #endif
 }
 

From 15544209cb0b5312e5220a9337a1fe61d1a1f2d9 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 23 Jul 2013 17:25:57 +0200
Subject: [PATCH 272/320] tracing: Change tracing_pipe_fops() to rely on
 tracing_get_cpu()

tracing_open_pipe() is racy, the memory inode->i_private points to
can be already freed.

Change debugfs_create_file("trace_pipe", data) callers to to pass
"data = tr", tracing_open_pipe() can use tracing_get_cpu().

Link: http://lkml.kernel.org/r/20130723152557.GA23717@redhat.com

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index cfff63c2148a..51a99ef2a6e5 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3959,8 +3959,7 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf,
 
 static int tracing_open_pipe(struct inode *inode, struct file *filp)
 {
-	struct trace_cpu *tc = inode->i_private;
-	struct trace_array *tr = tc->tr;
+	struct trace_array *tr = inode->i_private;
 	struct trace_iterator *iter;
 	int ret = 0;
 
@@ -4006,9 +4005,9 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
 	if (trace_clocks[tr->clock_id].in_ns)
 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
 
-	iter->cpu_file = tc->cpu;
-	iter->tr = tc->tr;
-	iter->trace_buffer = &tc->tr->trace_buffer;
+	iter->tr = tr;
+	iter->trace_buffer = &tr->trace_buffer;
+	iter->cpu_file = tracing_get_cpu(inode);
 	mutex_init(&iter->mutex);
 	filp->private_data = iter;
 
@@ -4031,8 +4030,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
 static int tracing_release_pipe(struct inode *inode, struct file *file)
 {
 	struct trace_iterator *iter = file->private_data;
-	struct trace_cpu *tc = inode->i_private;
-	struct trace_array *tr = tc->tr;
+	struct trace_array *tr = inode->i_private;
 
 	mutex_lock(&trace_types_lock);
 
@@ -5571,7 +5569,7 @@ tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
 
 	/* per cpu trace_pipe */
 	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
-				&data->trace_cpu, cpu, &tracing_pipe_fops);
+				tr, cpu, &tracing_pipe_fops);
 
 	/* per cpu trace */
 	trace_create_cpu_file("trace", 0644, d_cpu,
@@ -6157,7 +6155,7 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
 			(void *)&tr->trace_cpu, &tracing_fops);
 
 	trace_create_file("trace_pipe", 0444, d_tracer,
-			(void *)&tr->trace_cpu, &tracing_pipe_fops);
+			  tr, &tracing_pipe_fops);
 
 	trace_create_file("buffer_size_kb", 0644, d_tracer,
 			(void *)&tr->trace_cpu, &tracing_entries_fops);

From 46ef2be0d1d5ccea0c41bb606143586daadd537c Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 23 Jul 2013 17:26:00 +0200
Subject: [PATCH 273/320] tracing: Change tracing_buffers_fops to rely on
 tracing_get_cpu()

tracing_buffers_open() is racy, the memory inode->i_private points
to can be already freed.

Change debugfs_create_file("trace_pipe_raw", data) caller to pass
"data = tr", tracing_buffers_open() can use tracing_get_cpu().

Change debugfs_create_file("snapshot_raw_fops", data) caller too,
this file uses tracing_buffers_open/release.

Link: http://lkml.kernel.org/r/20130723152600.GA23720@redhat.com

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 51a99ef2a6e5..30c058a56ffb 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4949,8 +4949,7 @@ static const struct file_operations snapshot_raw_fops = {
 
 static int tracing_buffers_open(struct inode *inode, struct file *filp)
 {
-	struct trace_cpu *tc = inode->i_private;
-	struct trace_array *tr = tc->tr;
+	struct trace_array *tr = inode->i_private;
 	struct ftrace_buffer_info *info;
 	int ret;
 
@@ -4969,7 +4968,7 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp)
 	mutex_lock(&trace_types_lock);
 
 	info->iter.tr		= tr;
-	info->iter.cpu_file	= tc->cpu;
+	info->iter.cpu_file	= tracing_get_cpu(inode);
 	info->iter.trace	= tr->current_trace;
 	info->iter.trace_buffer = &tr->trace_buffer;
 	info->spare		= NULL;
@@ -5576,7 +5575,7 @@ tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
 				&data->trace_cpu, cpu, &tracing_fops);
 
 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
-				&data->trace_cpu, cpu, &tracing_buffers_fops);
+				tr, cpu, &tracing_buffers_fops);
 
 	trace_create_cpu_file("stats", 0444, d_cpu,
 				&data->trace_cpu, cpu, &tracing_stats_fops);
@@ -5589,7 +5588,7 @@ tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
 				&data->trace_cpu, cpu, &snapshot_fops);
 
 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
-				&data->trace_cpu, cpu, &snapshot_raw_fops);
+				tr, cpu, &snapshot_raw_fops);
 #endif
 }
 

From 4d3435b8a4c3357695e09c5e7a3bf73a19fca5b0 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 23 Jul 2013 17:26:03 +0200
Subject: [PATCH 274/320] tracing: Change tracing_stats_fops to rely on
 tracing_get_cpu()

tracing_open_generic_tc() is racy, the memory inode->i_private
points to can be already freed.

1. Change one of its users, tracing_stats_fops, to use
   tracing_*_generic_tr() instead.

2. Change trace_create_cpu_file("stats", data) to pass "data = tr".

3. Change tracing_stats_read() to use tracing_get_cpu().

Link: http://lkml.kernel.org/r/20130723152603.GA23727@redhat.com

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 30c058a56ffb..e29dc8f69aac 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2982,7 +2982,6 @@ static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
 	filp->private_data = inode->i_private;
 
 	return 0;
-	
 }
 
 static int tracing_open_generic_tc(struct inode *inode, struct file *filp)
@@ -5285,14 +5284,14 @@ static ssize_t
 tracing_stats_read(struct file *filp, char __user *ubuf,
 		   size_t count, loff_t *ppos)
 {
-	struct trace_cpu *tc = filp->private_data;
-	struct trace_array *tr = tc->tr;
+	struct inode *inode = file_inode(filp);
+	struct trace_array *tr = inode->i_private;
 	struct trace_buffer *trace_buf = &tr->trace_buffer;
+	int cpu = tracing_get_cpu(inode);
 	struct trace_seq *s;
 	unsigned long cnt;
 	unsigned long long t;
 	unsigned long usec_rem;
-	int cpu = tc->cpu;
 
 	s = kmalloc(sizeof(*s), GFP_KERNEL);
 	if (!s)
@@ -5345,10 +5344,10 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
 }
 
 static const struct file_operations tracing_stats_fops = {
-	.open		= tracing_open_generic_tc,
+	.open		= tracing_open_generic_tr,
 	.read		= tracing_stats_read,
 	.llseek		= generic_file_llseek,
-	.release	= tracing_release_generic_tc,
+	.release	= tracing_release_generic_tr,
 };
 
 #ifdef CONFIG_DYNAMIC_FTRACE
@@ -5578,7 +5577,7 @@ tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
 				tr, cpu, &tracing_buffers_fops);
 
 	trace_create_cpu_file("stats", 0444, d_cpu,
-				&data->trace_cpu, cpu, &tracing_stats_fops);
+				tr, cpu, &tracing_stats_fops);
 
 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
 				&data->trace_cpu, cpu, &tracing_entries_fops);

From 0bc392ee46d0fd8e6b678457ef71f074f19a03c5 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 23 Jul 2013 17:26:06 +0200
Subject: [PATCH 275/320] tracing: Change tracing_entries_fops to rely on
 tracing_get_cpu()

tracing_open_generic_tc() is racy, the memory inode->i_private
points to can be already freed.

1. Change its last user, tracing_entries_fops, to use
   tracing_*_generic_tr() instead.

2. Change debugfs_create_file("buffer_size_kb", data) callers
   to pass "data = tr".

3. Change tracing_entries_read() and tracing_entries_write() to
   use tracing_get_cpu().

4. Kill the no longer used tracing_open_generic_tc() and
   tracing_release_generic_tc().

Link: http://lkml.kernel.org/r/20130723152606.GA23730@redhat.com

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 49 +++++++++++---------------------------------
 1 file changed, 12 insertions(+), 37 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index e29dc8f69aac..68b46851666f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2984,23 +2984,6 @@ static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
 	return 0;
 }
 
-static int tracing_open_generic_tc(struct inode *inode, struct file *filp)
-{
-	struct trace_cpu *tc = inode->i_private;
-	struct trace_array *tr = tc->tr;
-
-	if (tracing_disabled)
-		return -ENODEV;
-
-	if (trace_array_get(tr) < 0)
-		return -ENODEV;
-
-	filp->private_data = inode->i_private;
-
-	return 0;
-	
-}
-
 static int tracing_release(struct inode *inode, struct file *file)
 {
 	struct seq_file *m = file->private_data;
@@ -3054,15 +3037,6 @@ static int tracing_release_generic_tr(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static int tracing_release_generic_tc(struct inode *inode, struct file *file)
-{
-	struct trace_cpu *tc = inode->i_private;
-	struct trace_array *tr = tc->tr;
-
-	trace_array_put(tr);
-	return 0;
-}
-
 static int tracing_single_release_tr(struct inode *inode, struct file *file)
 {
 	struct trace_array *tr = inode->i_private;
@@ -4382,15 +4356,16 @@ static ssize_t
 tracing_entries_read(struct file *filp, char __user *ubuf,
 		     size_t cnt, loff_t *ppos)
 {
-	struct trace_cpu *tc = filp->private_data;
-	struct trace_array *tr = tc->tr;
+	struct inode *inode = file_inode(filp);
+	struct trace_array *tr = inode->i_private;
+	int cpu = tracing_get_cpu(inode);
 	char buf[64];
 	int r = 0;
 	ssize_t ret;
 
 	mutex_lock(&trace_types_lock);
 
-	if (tc->cpu == RING_BUFFER_ALL_CPUS) {
+	if (cpu == RING_BUFFER_ALL_CPUS) {
 		int cpu, buf_size_same;
 		unsigned long size;
 
@@ -4417,7 +4392,7 @@ tracing_entries_read(struct file *filp, char __user *ubuf,
 		} else
 			r = sprintf(buf, "X\n");
 	} else
-		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, tc->cpu)->entries >> 10);
+		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
 
 	mutex_unlock(&trace_types_lock);
 
@@ -4429,7 +4404,8 @@ static ssize_t
 tracing_entries_write(struct file *filp, const char __user *ubuf,
 		      size_t cnt, loff_t *ppos)
 {
-	struct trace_cpu *tc = filp->private_data;
+	struct inode *inode = file_inode(filp);
+	struct trace_array *tr = inode->i_private;
 	unsigned long val;
 	int ret;
 
@@ -4443,8 +4419,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
 
 	/* value is in KB */
 	val <<= 10;
-
-	ret = tracing_resize_ring_buffer(tc->tr, val, tc->cpu);
+	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
 	if (ret < 0)
 		return ret;
 
@@ -4892,11 +4867,11 @@ static const struct file_operations tracing_pipe_fops = {
 };
 
 static const struct file_operations tracing_entries_fops = {
-	.open		= tracing_open_generic_tc,
+	.open		= tracing_open_generic_tr,
 	.read		= tracing_entries_read,
 	.write		= tracing_entries_write,
 	.llseek		= generic_file_llseek,
-	.release	= tracing_release_generic_tc,
+	.release	= tracing_release_generic_tr,
 };
 
 static const struct file_operations tracing_total_entries_fops = {
@@ -5580,7 +5555,7 @@ tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
 				tr, cpu, &tracing_stats_fops);
 
 	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
-				&data->trace_cpu, cpu, &tracing_entries_fops);
+				tr, cpu, &tracing_entries_fops);
 
 #ifdef CONFIG_TRACER_SNAPSHOT
 	trace_create_cpu_file("snapshot", 0644, d_cpu,
@@ -6156,7 +6131,7 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
 			  tr, &tracing_pipe_fops);
 
 	trace_create_file("buffer_size_kb", 0644, d_tracer,
-			(void *)&tr->trace_cpu, &tracing_entries_fops);
+			  tr, &tracing_entries_fops);
 
 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
 			  tr, &tracing_total_entries_fops);

From 6484c71cbc170634fa131b6d022d86d61686b88b Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 23 Jul 2013 17:26:10 +0200
Subject: [PATCH 276/320] tracing: Change tracing_fops/snapshot_fops to rely on
 tracing_get_cpu()

tracing_open() and tracing_snapshot_open() are racy, the memory
inode->i_private points to can be already freed.

Convert these last users of "inode->i_private == trace_cpu" to
use "i_private = trace_array" and rely on tracing_get_cpu().

v2: incorporate the fix from Steven, tracing_release() must not
    blindly dereference file->private_data unless we know that
    the file was opened for reading.

Link: http://lkml.kernel.org/r/20130723152610.GA23737@redhat.com

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 50 +++++++++++++++++++-------------------------
 1 file changed, 22 insertions(+), 28 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 68b46851666f..dd7780ddde08 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2862,9 +2862,9 @@ static const struct seq_operations tracer_seq_ops = {
 };
 
 static struct trace_iterator *
-__tracing_open(struct trace_array *tr, struct trace_cpu *tc,
-	       struct inode *inode, struct file *file, bool snapshot)
+__tracing_open(struct inode *inode, struct file *file, bool snapshot)
 {
+	struct trace_array *tr = inode->i_private;
 	struct trace_iterator *iter;
 	int cpu;
 
@@ -2905,8 +2905,8 @@ __tracing_open(struct trace_array *tr, struct trace_cpu *tc,
 		iter->trace_buffer = &tr->trace_buffer;
 	iter->snapshot = snapshot;
 	iter->pos = -1;
+	iter->cpu_file = tracing_get_cpu(inode);
 	mutex_init(&iter->mutex);
-	iter->cpu_file = tc->cpu;
 
 	/* Notify the tracer early; before we stop tracing. */
 	if (iter->trace && iter->trace->open)
@@ -2986,22 +2986,18 @@ static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
 
 static int tracing_release(struct inode *inode, struct file *file)
 {
+	struct trace_array *tr = inode->i_private;
 	struct seq_file *m = file->private_data;
 	struct trace_iterator *iter;
-	struct trace_array *tr;
 	int cpu;
 
-	/* Writes do not use seq_file, need to grab tr from inode */
 	if (!(file->f_mode & FMODE_READ)) {
-		struct trace_cpu *tc = inode->i_private;
-
-		trace_array_put(tc->tr);
+		trace_array_put(tr);
 		return 0;
 	}
 
+	/* Writes do not use seq_file */
 	iter = m->private;
-	tr = iter->tr;
-
 	mutex_lock(&trace_types_lock);
 
 	for_each_tracing_cpu(cpu) {
@@ -3048,8 +3044,7 @@ static int tracing_single_release_tr(struct inode *inode, struct file *file)
 
 static int tracing_open(struct inode *inode, struct file *file)
 {
-	struct trace_cpu *tc = inode->i_private;
-	struct trace_array *tr = tc->tr;
+	struct trace_array *tr = inode->i_private;
 	struct trace_iterator *iter;
 	int ret = 0;
 
@@ -3057,16 +3052,17 @@ static int tracing_open(struct inode *inode, struct file *file)
 		return -ENODEV;
 
 	/* If this file was open for write, then erase contents */
-	if ((file->f_mode & FMODE_WRITE) &&
-	    (file->f_flags & O_TRUNC)) {
-		if (tc->cpu == RING_BUFFER_ALL_CPUS)
+	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
+		int cpu = tracing_get_cpu(inode);
+
+		if (cpu == RING_BUFFER_ALL_CPUS)
 			tracing_reset_online_cpus(&tr->trace_buffer);
 		else
-			tracing_reset(&tr->trace_buffer, tc->cpu);
+			tracing_reset(&tr->trace_buffer, cpu);
 	}
 
 	if (file->f_mode & FMODE_READ) {
-		iter = __tracing_open(tr, tc, inode, file, false);
+		iter = __tracing_open(inode, file, false);
 		if (IS_ERR(iter))
 			ret = PTR_ERR(iter);
 		else if (trace_flags & TRACE_ITER_LATENCY_FMT)
@@ -4680,8 +4676,7 @@ struct ftrace_buffer_info {
 #ifdef CONFIG_TRACER_SNAPSHOT
 static int tracing_snapshot_open(struct inode *inode, struct file *file)
 {
-	struct trace_cpu *tc = inode->i_private;
-	struct trace_array *tr = tc->tr;
+	struct trace_array *tr = inode->i_private;
 	struct trace_iterator *iter;
 	struct seq_file *m;
 	int ret = 0;
@@ -4690,7 +4685,7 @@ static int tracing_snapshot_open(struct inode *inode, struct file *file)
 		return -ENODEV;
 
 	if (file->f_mode & FMODE_READ) {
-		iter = __tracing_open(tr, tc, inode, file, true);
+		iter = __tracing_open(inode, file, true);
 		if (IS_ERR(iter))
 			ret = PTR_ERR(iter);
 	} else {
@@ -4707,8 +4702,8 @@ static int tracing_snapshot_open(struct inode *inode, struct file *file)
 		ret = 0;
 
 		iter->tr = tr;
-		iter->trace_buffer = &tc->tr->max_buffer;
-		iter->cpu_file = tc->cpu;
+		iter->trace_buffer = &tr->max_buffer;
+		iter->cpu_file = tracing_get_cpu(inode);
 		m->private = iter;
 		file->private_data = m;
 	}
@@ -5525,7 +5520,6 @@ trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
 static void
 tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
 {
-	struct trace_array_cpu *data = per_cpu_ptr(tr->trace_buffer.data, cpu);
 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
 	struct dentry *d_cpu;
 	char cpu_dir[30]; /* 30 characters should be more than enough */
@@ -5546,7 +5540,7 @@ tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
 
 	/* per cpu trace */
 	trace_create_cpu_file("trace", 0644, d_cpu,
-				&data->trace_cpu, cpu, &tracing_fops);
+				tr, cpu, &tracing_fops);
 
 	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
 				tr, cpu, &tracing_buffers_fops);
@@ -5559,7 +5553,7 @@ tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
 
 #ifdef CONFIG_TRACER_SNAPSHOT
 	trace_create_cpu_file("snapshot", 0644, d_cpu,
-				&data->trace_cpu, cpu, &snapshot_fops);
+				tr, cpu, &snapshot_fops);
 
 	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
 				tr, cpu, &snapshot_raw_fops);
@@ -6125,7 +6119,7 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
 			  tr, &tracing_iter_fops);
 
 	trace_create_file("trace", 0644, d_tracer,
-			(void *)&tr->trace_cpu, &tracing_fops);
+			  tr, &tracing_fops);
 
 	trace_create_file("trace_pipe", 0444, d_tracer,
 			  tr, &tracing_pipe_fops);
@@ -6146,11 +6140,11 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
 			  &trace_clock_fops);
 
 	trace_create_file("tracing_on", 0644, d_tracer,
-			    tr, &rb_simple_fops);
+			  tr, &rb_simple_fops);
 
 #ifdef CONFIG_TRACER_SNAPSHOT
 	trace_create_file("snapshot", 0644, d_tracer,
-			  (void *)&tr->trace_cpu, &snapshot_fops);
+			  tr, &snapshot_fops);
 #endif
 
 	for_each_tracing_cpu(cpu)

From 9c01fe4593db123c5a72dc36f0400f776e92c954 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 23 Jul 2013 17:26:13 +0200
Subject: [PATCH 277/320] tracing: Kill trace_cpu struct/members

After the previous changes trace_array_cpu->trace_cpu and
trace_array->trace_cpu becomes write-only. Remove these members
and kill "struct trace_cpu" as well.

As a side effect this also removes memset(per_cpu_memory, 0).
It was not needed, alloc_percpu() returns zero-filled memory.

Link: http://lkml.kernel.org/r/20130723152613.GA23741@redhat.com

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 21 ---------------------
 kernel/trace/trace.h |  8 --------
 2 files changed, 29 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index dd7780ddde08..69cba470ea96 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5865,17 +5865,6 @@ struct dentry *trace_instance_dir;
 static void
 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer);
 
-static void init_trace_buffers(struct trace_array *tr, struct trace_buffer *buf)
-{
-	int cpu;
-
-	for_each_tracing_cpu(cpu) {
-		memset(per_cpu_ptr(buf->data, cpu), 0, sizeof(struct trace_array_cpu));
-		per_cpu_ptr(buf->data, cpu)->trace_cpu.cpu = cpu;
-		per_cpu_ptr(buf->data, cpu)->trace_cpu.tr = tr;
-	}
-}
-
 static int
 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
 {
@@ -5893,8 +5882,6 @@ allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size
 		return -ENOMEM;
 	}
 
-	init_trace_buffers(tr, buf);
-
 	/* Allocate the first page for all buffers */
 	set_buffer_entries(&tr->trace_buffer,
 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
@@ -5961,10 +5948,6 @@ static int new_instance_create(const char *name)
 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
 		goto out_free_tr;
 
-	/* Holder for file callbacks */
-	tr->trace_cpu.cpu = RING_BUFFER_ALL_CPUS;
-	tr->trace_cpu.tr = tr;
-
 	tr->dir = debugfs_create_dir(name, trace_instance_dir);
 	if (!tr->dir)
 		goto out_free_tr;
@@ -6438,10 +6421,6 @@ __init static int tracer_alloc_buffers(void)
 
 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
 
-	/* Holder for file callbacks */
-	global_trace.trace_cpu.cpu = RING_BUFFER_ALL_CPUS;
-	global_trace.trace_cpu.tr = &global_trace;
-
 	INIT_LIST_HEAD(&global_trace.systems);
 	INIT_LIST_HEAD(&global_trace.events);
 	list_add(&global_trace.list, &ftrace_trace_arrays);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index e7d643b8a907..afaae41b0a02 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -130,19 +130,12 @@ enum trace_flag_type {
 
 struct trace_array;
 
-struct trace_cpu {
-	struct trace_array	*tr;
-	struct dentry		*dir;
-	int			cpu;
-};
-
 /*
  * The CPU trace array - it consists of thousands of trace entries
  * plus some other descriptor data: (for example which task started
  * the trace, etc.)
  */
 struct trace_array_cpu {
-	struct trace_cpu	trace_cpu;
 	atomic_t		disabled;
 	void			*buffer_page;	/* ring buffer spare */
 
@@ -196,7 +189,6 @@ struct trace_array {
 	bool			allocated_snapshot;
 #endif
 	int			buffer_disabled;
-	struct trace_cpu	trace_cpu;	/* place holder */
 #ifdef CONFIG_FTRACE_SYSCALLS
 	int			sys_refcount_enter;
 	int			sys_refcount_exit;

From 195a8afc7ac962f8da795549fe38e825f1372b0d Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 23 Jul 2013 22:06:15 -0400
Subject: [PATCH 278/320] ftrace: Add check for NULL regs if ops has SAVE_REGS
 set

If a ftrace ops is registered with the SAVE_REGS flag set, and there's
already a ops registered to one of its functions but without the
SAVE_REGS flag, there's a small race window where the SAVE_REGS ops gets
added to the list of callbacks to call for that function before the
callback trampoline gets set to save the regs.

The problem is, the function is not currently saving regs, which opens
a small race window where the ops that is expecting regs to be passed
to it, wont. This can cause a crash if the callback were to reference
the regs, as the SAVE_REGS guarantees that regs will be set.

To fix this, we add a check in the loop case where it checks if the ops
has the SAVE_REGS flag set, and if so, it will ignore it if regs is
not set.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 67708f46baae..8ce9eefc5bb4 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1441,12 +1441,22 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
  * the hashes are freed with call_rcu_sched().
  */
 static int
-ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
+ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
 {
 	struct ftrace_hash *filter_hash;
 	struct ftrace_hash *notrace_hash;
 	int ret;
 
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+	/*
+	 * There's a small race when adding ops that the ftrace handler
+	 * that wants regs, may be called without them. We can not
+	 * allow that handler to be called if regs is NULL.
+	 */
+	if (regs == NULL && (ops->flags & FTRACE_OPS_FL_SAVE_REGS))
+		return 0;
+#endif
+
 	filter_hash = rcu_dereference_raw_notrace(ops->filter_hash);
 	notrace_hash = rcu_dereference_raw_notrace(ops->notrace_hash);
 
@@ -4218,7 +4228,7 @@ static inline void ftrace_startup_enable(int command) { }
 # define ftrace_shutdown_sysctl()	do { } while (0)
 
 static inline int
-ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
+ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
 {
 	return 1;
 }
@@ -4241,7 +4251,7 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip,
 	do_for_each_ftrace_op(op, ftrace_control_list) {
 		if (!(op->flags & FTRACE_OPS_FL_STUB) &&
 		    !ftrace_function_local_disabled(op) &&
-		    ftrace_ops_test(op, ip))
+		    ftrace_ops_test(op, ip, regs))
 			op->func(ip, parent_ip, op, regs);
 	} while_for_each_ftrace_op(op);
 	trace_recursion_clear(TRACE_CONTROL_BIT);
@@ -4274,7 +4284,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
 	 */
 	preempt_disable_notrace();
 	do_for_each_ftrace_op(op, ftrace_ops_list) {
-		if (ftrace_ops_test(op, ip))
+		if (ftrace_ops_test(op, ip, regs))
 			op->func(ip, parent_ip, op, regs);
 	} while_for_each_ftrace_op(op);
 	preempt_enable_notrace();

From 6a6c21ef487be47b300a0b24cd6afeb69d8b9a1a Mon Sep 17 00:00:00 2001
From: Richard Zhu <r65037@freescale.com>
Date: Wed, 24 Jul 2013 14:15:28 +0800
Subject: [PATCH 279/320] ARM: imx6q: update the sata bits definitions of gpr13

Replace the SATA_PHY_# by the more readable definitons.

tj: Being routed through libata branch to enable implementation of
    ahci_imx.

Signed-off-by: Richard Zhu <r65037@freescale.com>
Acked-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/mfd/syscon/imx6q-iomuxc-gpr.h | 121 ++++++++++++++------
 1 file changed, 84 insertions(+), 37 deletions(-)

diff --git a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
index dab34a1deb2c..e235251f1ba9 100644
--- a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
+++ b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
@@ -279,41 +279,88 @@
 #define IMX6Q_GPR13_CAN2_STOP_REQ		BIT(29)
 #define IMX6Q_GPR13_CAN1_STOP_REQ		BIT(28)
 #define IMX6Q_GPR13_ENET_STOP_REQ		BIT(27)
-#define IMX6Q_GPR13_SATA_PHY_8_MASK		(0x7 << 24)
-#define IMX6Q_GPR13_SATA_PHY_8_0_5_DB		(0x0 << 24)
-#define IMX6Q_GPR13_SATA_PHY_8_1_0_DB		(0x1 << 24)
-#define IMX6Q_GPR13_SATA_PHY_8_1_5_DB		(0x2 << 24)
-#define IMX6Q_GPR13_SATA_PHY_8_2_0_DB		(0x3 << 24)
-#define IMX6Q_GPR13_SATA_PHY_8_2_5_DB		(0x4 << 24)
-#define IMX6Q_GPR13_SATA_PHY_8_3_0_DB		(0x5 << 24)
-#define IMX6Q_GPR13_SATA_PHY_8_3_5_DB		(0x6 << 24)
-#define IMX6Q_GPR13_SATA_PHY_8_4_0_DB		(0x7 << 24)
-#define IMX6Q_GPR13_SATA_PHY_7_MASK		(0x1f << 19)
-#define IMX6Q_GPR13_SATA_PHY_7_SATA1I		(0x10 << 19)
-#define IMX6Q_GPR13_SATA_PHY_7_SATA1M		(0x10 << 19)
-#define IMX6Q_GPR13_SATA_PHY_7_SATA1X		(0x1a << 19)
-#define IMX6Q_GPR13_SATA_PHY_7_SATA2I		(0x12 << 19)
-#define IMX6Q_GPR13_SATA_PHY_7_SATA2M		(0x12 << 19)
-#define IMX6Q_GPR13_SATA_PHY_7_SATA2X		(0x1a << 19)
-#define IMX6Q_GPR13_SATA_PHY_6_MASK		(0x7 << 16)
-#define IMX6Q_GPR13_SATA_SPEED_MASK		BIT(15)
-#define IMX6Q_GPR13_SATA_SPEED_1P5G		0x0
-#define IMX6Q_GPR13_SATA_SPEED_3P0G		BIT(15)
-#define IMX6Q_GPR13_SATA_PHY_5			BIT(14)
-#define IMX6Q_GPR13_SATA_PHY_4_MASK		(0x7 << 11)
-#define IMX6Q_GPR13_SATA_PHY_4_16_16		(0x0 << 11)
-#define IMX6Q_GPR13_SATA_PHY_4_14_16		(0x1 << 11)
-#define IMX6Q_GPR13_SATA_PHY_4_12_16		(0x2 << 11)
-#define IMX6Q_GPR13_SATA_PHY_4_10_16		(0x3 << 11)
-#define IMX6Q_GPR13_SATA_PHY_4_9_16		(0x4 << 11)
-#define IMX6Q_GPR13_SATA_PHY_4_8_16		(0x5 << 11)
-#define IMX6Q_GPR13_SATA_PHY_3_MASK		(0xf << 7)
-#define IMX6Q_GPR13_SATA_PHY_3_OFF		0x7
-#define IMX6Q_GPR13_SATA_PHY_2_MASK		(0x1f << 2)
-#define IMX6Q_GPR13_SATA_PHY_2_OFF		0x2
-#define IMX6Q_GPR13_SATA_PHY_1_MASK		(0x3 << 0)
-#define IMX6Q_GPR13_SATA_PHY_1_FAST		(0x0 << 0)
-#define IMX6Q_GPR13_SATA_PHY_1_MED		(0x1 << 0)
-#define IMX6Q_GPR13_SATA_PHY_1_SLOW		(0x2 << 0)
-
+#define IMX6Q_GPR13_SATA_RX_EQ_VAL_MASK		(0x7 << 24)
+#define IMX6Q_GPR13_SATA_RX_EQ_VAL_0_5_DB	(0x0 << 24)
+#define IMX6Q_GPR13_SATA_RX_EQ_VAL_1_0_DB	(0x1 << 24)
+#define IMX6Q_GPR13_SATA_RX_EQ_VAL_1_5_DB	(0x2 << 24)
+#define IMX6Q_GPR13_SATA_RX_EQ_VAL_2_0_DB	(0x3 << 24)
+#define IMX6Q_GPR13_SATA_RX_EQ_VAL_2_5_DB	(0x4 << 24)
+#define IMX6Q_GPR13_SATA_RX_EQ_VAL_3_0_DB	(0x5 << 24)
+#define IMX6Q_GPR13_SATA_RX_EQ_VAL_3_5_DB	(0x6 << 24)
+#define IMX6Q_GPR13_SATA_RX_EQ_VAL_4_0_DB	(0x7 << 24)
+#define IMX6Q_GPR13_SATA_RX_LOS_LVL_MASK	(0x1f << 19)
+#define IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA1I	(0x10 << 19)
+#define IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA1M	(0x10 << 19)
+#define IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA1X	(0x1a << 19)
+#define IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA2I	(0x12 << 19)
+#define IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA2M	(0x12 << 19)
+#define IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA2X	(0x1a << 19)
+#define IMX6Q_GPR13_SATA_RX_DPLL_MODE_MASK	(0x7 << 16)
+#define IMX6Q_GPR13_SATA_RX_DPLL_MODE_1P_1F	(0x0 << 16)
+#define IMX6Q_GPR13_SATA_RX_DPLL_MODE_2P_2F	(0x1 << 16)
+#define IMX6Q_GPR13_SATA_RX_DPLL_MODE_1P_4F	(0x2 << 16)
+#define IMX6Q_GPR13_SATA_RX_DPLL_MODE_2P_4F	(0x3 << 16)
+#define IMX6Q_GPR13_SATA_SPD_MODE_MASK		BIT(15)
+#define IMX6Q_GPR13_SATA_SPD_MODE_1P5G		0x0
+#define IMX6Q_GPR13_SATA_SPD_MODE_3P0G		BIT(15)
+#define IMX6Q_GPR13_SATA_MPLL_SS_EN		BIT(14)
+#define IMX6Q_GPR13_SATA_TX_ATTEN_MASK		(0x7 << 11)
+#define IMX6Q_GPR13_SATA_TX_ATTEN_16_16		(0x0 << 11)
+#define IMX6Q_GPR13_SATA_TX_ATTEN_14_16		(0x1 << 11)
+#define IMX6Q_GPR13_SATA_TX_ATTEN_12_16		(0x2 << 11)
+#define IMX6Q_GPR13_SATA_TX_ATTEN_10_16		(0x3 << 11)
+#define IMX6Q_GPR13_SATA_TX_ATTEN_9_16		(0x4 << 11)
+#define IMX6Q_GPR13_SATA_TX_ATTEN_8_16		(0x5 << 11)
+#define IMX6Q_GPR13_SATA_TX_BOOST_MASK		(0xf << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_0_00_DB	(0x0 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_0_37_DB	(0x1 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_0_74_DB	(0x2 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_1_11_DB	(0x3 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_1_48_DB	(0x4 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_1_85_DB	(0x5 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_2_22_DB	(0x6 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_2_59_DB	(0x7 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_2_96_DB	(0x8 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_3_33_DB	(0x9 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_3_70_DB	(0xa << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_4_07_DB	(0xb << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_4_44_DB	(0xc << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_4_81_DB	(0xd << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_5_28_DB	(0xe << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_5_75_DB	(0xf << 7)
+#define IMX6Q_GPR13_SATA_TX_LVL_MASK		(0x1f << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_0_937_V		(0x00 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_0_947_V		(0x01 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_0_957_V		(0x02 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_0_966_V		(0x03 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_0_976_V		(0x04 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_0_986_V		(0x05 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_0_996_V		(0x06 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_005_V		(0x07 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_015_V		(0x08 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_025_V		(0x09 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_035_V		(0x0a << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_045_V		(0x0b << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_054_V		(0x0c << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_064_V		(0x0d << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_074_V		(0x0e << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_084_V		(0x0f << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_094_V		(0x10 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_104_V		(0x11 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_113_V		(0x12 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_123_V		(0x13 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_133_V		(0x14 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_143_V		(0x15 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_152_V		(0x16 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_162_V		(0x17 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_172_V		(0x18 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_182_V		(0x19 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_191_V		(0x1a << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_201_V		(0x1b << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_211_V		(0x1c << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_221_V		(0x1d << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_230_V		(0x1e << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_240_V		(0x1f << 2)
+#define IMX6Q_GPR13_SATA_MPLL_CLK_EN		BIT(1)
+#define IMX6Q_GPR13_SATA_TX_EDGE_RATE		BIT(0)
 #endif /* __LINUX_IMX6Q_IOMUXC_GPR_H */

From 9e54eae23bc9cca0d8a955018c35b1250e09a73a Mon Sep 17 00:00:00 2001
From: Richard Zhu <r65037@freescale.com>
Date: Wed, 24 Jul 2013 14:15:29 +0800
Subject: [PATCH 280/320] ahci_imx: add ahci sata support on imx platforms

imx6q contains one Synopsys AHCI SATA controller, But it can't share
ahci_platform driver with other controllers because there are some
misalignments of the generic AHCI controller - the bits definitions of
the HBA registers, the Vendor Specific registers, the AHCI PHY clock
and the AHCI signals adjustment window(GPR13 register).

 - CAP_SSS(bit20) of the HOST_CAP is writable, default value is '0',
   should be configured to be '1'

 - bit0 (only one AHCI SATA port on imx6q) of the HOST_PORTS_IMPL
   should be set to be '1'.(default 0)

 - One Vendor Specific register HOST_TIMER1MS(offset:0xe0) should be
   configured regarding to the frequency of AHB bus clock.

 - Configurations of the AHCI PHY clock, and the signal parameters of
   the GPR13

Setup its own ahci sata driver, contained the imx6q specific
initialized codes, re-use the generic ahci_platform driver, and keep
the generic ahci_platform driver clean as much as possible.

tj: patch description reformatted

Signed-off-by: Richard Zhu <r65037@freescale.com>
Reviewed-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/Kconfig    |   9 ++
 drivers/ata/Makefile   |   1 +
 drivers/ata/ahci_imx.c | 236 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 246 insertions(+)
 create mode 100644 drivers/ata/ahci_imx.c

diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index 5cddaf86cd0a..a4af0a2a089c 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -97,6 +97,15 @@ config SATA_AHCI_PLATFORM
 
 	  If unsure, say N.
 
+config AHCI_IMX
+	tristate "Freescale i.MX AHCI SATA support"
+	depends on SATA_AHCI_PLATFORM
+	help
+	  This option enables support for the Freescale i.MX SoC's
+	  onboard AHCI SATA.
+
+	  If unsure, say N.
+
 config SATA_FSL
 	tristate "Freescale 3.0Gbps SATA support"
 	depends on FSL_SOC
diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile
index c04d0fd038a3..46518c622460 100644
--- a/drivers/ata/Makefile
+++ b/drivers/ata/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_SATA_INIC162X)	+= sata_inic162x.o
 obj-$(CONFIG_SATA_SIL24)	+= sata_sil24.o
 obj-$(CONFIG_SATA_DWC)		+= sata_dwc_460ex.o
 obj-$(CONFIG_SATA_HIGHBANK)	+= sata_highbank.o libahci.o
+obj-$(CONFIG_AHCI_IMX)		+= ahci_imx.o
 
 # SFF w/ custom DMA
 obj-$(CONFIG_PDC_ADMA)		+= pdc_adma.o
diff --git a/drivers/ata/ahci_imx.c b/drivers/ata/ahci_imx.c
new file mode 100644
index 000000000000..58debb0acc3a
--- /dev/null
+++ b/drivers/ata/ahci_imx.c
@@ -0,0 +1,236 @@
+/*
+ * Freescale IMX AHCI SATA platform driver
+ * Copyright 2013 Freescale Semiconductor, Inc.
+ *
+ * based on the AHCI SATA platform driver by Jeff Garzik and Anton Vorontsov
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/ahci_platform.h>
+#include <linux/of_device.h>
+#include <linux/mfd/syscon.h>
+#include <linux/mfd/syscon/imx6q-iomuxc-gpr.h>
+#include "ahci.h"
+
+enum {
+	HOST_TIMER1MS = 0xe0, /* Timer 1-ms */
+};
+
+struct imx_ahci_priv {
+	struct platform_device *ahci_pdev;
+	struct clk *sata_ref_clk;
+	struct clk *ahb_clk;
+	struct regmap *gpr;
+};
+
+static int imx6q_sata_init(struct device *dev, void __iomem *mmio)
+{
+	int ret = 0;
+	unsigned int reg_val;
+	struct imx_ahci_priv *imxpriv = dev_get_drvdata(dev->parent);
+
+	imxpriv->gpr =
+		syscon_regmap_lookup_by_compatible("fsl,imx6q-iomuxc-gpr");
+	if (IS_ERR(imxpriv->gpr)) {
+		dev_err(dev, "failed to find fsl,imx6q-iomux-gpr regmap\n");
+		return PTR_ERR(imxpriv->gpr);
+	}
+
+	ret = clk_prepare_enable(imxpriv->sata_ref_clk);
+	if (ret < 0) {
+		dev_err(dev, "prepare-enable sata_ref clock err:%d\n", ret);
+		return ret;
+	}
+
+	/*
+	 * set PHY Paremeters, two steps to configure the GPR13,
+	 * one write for rest of parameters, mask of first write
+	 * is 0x07fffffd, and the other one write for setting
+	 * the mpll_clk_en.
+	 */
+	regmap_update_bits(imxpriv->gpr, 0x34, IMX6Q_GPR13_SATA_RX_EQ_VAL_MASK
+			| IMX6Q_GPR13_SATA_RX_LOS_LVL_MASK
+			| IMX6Q_GPR13_SATA_RX_DPLL_MODE_MASK
+			| IMX6Q_GPR13_SATA_SPD_MODE_MASK
+			| IMX6Q_GPR13_SATA_MPLL_SS_EN
+			| IMX6Q_GPR13_SATA_TX_ATTEN_MASK
+			| IMX6Q_GPR13_SATA_TX_BOOST_MASK
+			| IMX6Q_GPR13_SATA_TX_LVL_MASK
+			| IMX6Q_GPR13_SATA_TX_EDGE_RATE
+			, IMX6Q_GPR13_SATA_RX_EQ_VAL_3_0_DB
+			| IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA2M
+			| IMX6Q_GPR13_SATA_RX_DPLL_MODE_2P_4F
+			| IMX6Q_GPR13_SATA_SPD_MODE_3P0G
+			| IMX6Q_GPR13_SATA_MPLL_SS_EN
+			| IMX6Q_GPR13_SATA_TX_ATTEN_9_16
+			| IMX6Q_GPR13_SATA_TX_BOOST_3_33_DB
+			| IMX6Q_GPR13_SATA_TX_LVL_1_025_V);
+	regmap_update_bits(imxpriv->gpr, 0x34, IMX6Q_GPR13_SATA_MPLL_CLK_EN,
+			IMX6Q_GPR13_SATA_MPLL_CLK_EN);
+	usleep_range(100, 200);
+
+	/*
+	 * Configure the HWINIT bits of the HOST_CAP and HOST_PORTS_IMPL,
+	 * and IP vendor specific register HOST_TIMER1MS.
+	 * Configure CAP_SSS (support stagered spin up).
+	 * Implement the port0.
+	 * Get the ahb clock rate, and configure the TIMER1MS register.
+	 */
+	reg_val = readl(mmio + HOST_CAP);
+	if (!(reg_val & HOST_CAP_SSS)) {
+		reg_val |= HOST_CAP_SSS;
+		writel(reg_val, mmio + HOST_CAP);
+	}
+	reg_val = readl(mmio + HOST_PORTS_IMPL);
+	if (!(reg_val & 0x1)) {
+		reg_val |= 0x1;
+		writel(reg_val, mmio + HOST_PORTS_IMPL);
+	}
+
+	reg_val = clk_get_rate(imxpriv->ahb_clk) / 1000;
+	writel(reg_val, mmio + HOST_TIMER1MS);
+
+	return 0;
+}
+
+static void imx6q_sata_exit(struct device *dev)
+{
+	struct imx_ahci_priv *imxpriv =  dev_get_drvdata(dev->parent);
+
+	regmap_update_bits(imxpriv->gpr, 0x34, IMX6Q_GPR13_SATA_MPLL_CLK_EN,
+			!IMX6Q_GPR13_SATA_MPLL_CLK_EN);
+	clk_disable_unprepare(imxpriv->sata_ref_clk);
+}
+
+static struct ahci_platform_data imx6q_sata_pdata = {
+	.init = imx6q_sata_init,
+	.exit = imx6q_sata_exit,
+};
+
+static const struct of_device_id imx_ahci_of_match[] = {
+	{ .compatible = "fsl,imx6q-ahci", .data = &imx6q_sata_pdata},
+	{},
+};
+MODULE_DEVICE_TABLE(of, imx_ahci_of_match);
+
+static int imx_ahci_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct resource *mem, *irq, res[2];
+	const struct of_device_id *of_id;
+	const struct ahci_platform_data *pdata = NULL;
+	struct imx_ahci_priv *imxpriv;
+	struct device *ahci_dev;
+	struct platform_device *ahci_pdev;
+	int ret;
+
+	imxpriv = devm_kzalloc(dev, sizeof(*imxpriv), GFP_KERNEL);
+	if (!imxpriv) {
+		dev_err(dev, "can't alloc ahci_host_priv\n");
+		return -ENOMEM;
+	}
+
+	ahci_pdev = platform_device_alloc("ahci", -1);
+	if (!ahci_pdev)
+		return -ENODEV;
+
+	ahci_dev = &ahci_pdev->dev;
+	ahci_dev->parent = dev;
+
+	imxpriv->ahb_clk = devm_clk_get(dev, "ahb");
+	if (IS_ERR(imxpriv->ahb_clk)) {
+		dev_err(dev, "can't get ahb clock.\n");
+		ret = PTR_ERR(imxpriv->ahb_clk);
+		goto err_out;
+	}
+
+	imxpriv->sata_ref_clk = devm_clk_get(dev, "sata_ref");
+	if (IS_ERR(imxpriv->sata_ref_clk)) {
+		dev_err(dev, "can't get sata_ref clock.\n");
+		ret = PTR_ERR(imxpriv->sata_ref_clk);
+		goto err_out;
+	}
+
+	imxpriv->ahci_pdev = ahci_pdev;
+	platform_set_drvdata(pdev, imxpriv);
+
+	of_id = of_match_device(imx_ahci_of_match, dev);
+	if (of_id) {
+		pdata = of_id->data;
+	} else {
+		ret = -EINVAL;
+		goto err_out;
+	}
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!mem || !irq) {
+		dev_err(dev, "no mmio/irq resource\n");
+		ret = -ENOMEM;
+		goto err_out;
+	}
+
+	res[0] = *mem;
+	res[1] = *irq;
+
+	ahci_dev->coherent_dma_mask = DMA_BIT_MASK(32);
+	ahci_dev->dma_mask = &ahci_dev->coherent_dma_mask;
+	ahci_dev->of_node = dev->of_node;
+
+	ret = platform_device_add_resources(ahci_pdev, res, 2);
+	if (ret)
+		goto err_out;
+
+	ret = platform_device_add_data(ahci_pdev, pdata, sizeof(*pdata));
+	if (ret)
+		goto err_out;
+
+	ret = platform_device_add(ahci_pdev);
+	if (ret) {
+err_out:
+		platform_device_put(ahci_pdev);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int imx_ahci_remove(struct platform_device *pdev)
+{
+	struct imx_ahci_priv *imxpriv = platform_get_drvdata(pdev);
+	struct platform_device *ahci_pdev = imxpriv->ahci_pdev;
+
+	platform_device_unregister(ahci_pdev);
+	return 0;
+}
+
+static struct platform_driver imx_ahci_driver = {
+	.probe = imx_ahci_probe,
+	.remove = imx_ahci_remove,
+	.driver = {
+		.name = "ahci-imx",
+		.owner = THIS_MODULE,
+		.of_match_table = imx_ahci_of_match,
+	},
+};
+module_platform_driver(imx_ahci_driver);
+
+MODULE_DESCRIPTION("Freescale i.MX AHCI SATA platform driver");
+MODULE_AUTHOR("Richard Zhu <Hong-Xing.Zhu@freescale.com>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ahci:imx");

From 932ef3685f287799fa844862d607a6b596ba5b9e Mon Sep 17 00:00:00 2001
From: Jingoo Han <jg1.han@samsung.com>
Date: Wed, 24 Jul 2013 14:34:08 +0900
Subject: [PATCH 281/320] staging: tidspbridge: replace strict_strtol() with
 kstrtos32()

The usage of strict_strtol() is not preferred, because
strict_strtol() is obsolete. Thus, kstrtos32() should be
used in order to convert a string to s32. Also, error handling
is added to get rid of a __must_check warning.

This fixes a memory corruption bug as well.

Signed-off-by: Jingoo Han <jg1.han@samsung.com>
Reviewed-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/tidspbridge/pmgr/dbll.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/tidspbridge/pmgr/dbll.c b/drivers/staging/tidspbridge/pmgr/dbll.c
index c191ae203565..41e88abe47af 100644
--- a/drivers/staging/tidspbridge/pmgr/dbll.c
+++ b/drivers/staging/tidspbridge/pmgr/dbll.c
@@ -1120,8 +1120,11 @@ static int dbll_rmm_alloc(struct dynamic_loader_allocate *this,
 	   or DYN_EXTERNAL, then mem granularity information is present
 	   within the section name - only process if there are at least three
 	   tokens within the section name (just a minor optimization) */
-	if (count >= 3)
-		strict_strtol(sz_last_token, 10, (long *)&req);
+	if (count >= 3) {
+		status = kstrtos32(sz_last_token, 10, &req);
+		if (status)
+			goto func_cont;
+	}
 
 	if ((req == 0) || (req == 1)) {
 		if (strcmp(sz_sec_last_token, "DYN_DARAM") == 0) {

From 81b884c9dfe6753a78df0cbf4e96f095d718bd95 Mon Sep 17 00:00:00 2001
From: Lidza Louina <lidza.louina@gmail.com>
Date: Wed, 24 Jul 2013 11:29:33 -0400
Subject: [PATCH 282/320] MAINTAINERS: Update the list of maintainers for
 staging/comedi driver.

This patch updates the list of maintainers for the staging/comedi
driver.

Signed-off-by: Lidza Louina <lidza.louina@gmail.com>
Acked-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Acked-by: Ian Abbott <abbotti@mev.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 MAINTAINERS                 | 2 +-
 drivers/staging/comedi/TODO | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index bf61e04291ab..6ddb33d5c653 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7812,7 +7812,7 @@ F:	drivers/staging/asus_oled/
 
 STAGING - COMEDI
 M:	Ian Abbott <abbotti@mev.co.uk>
-M:	Mori Hess <fmhess@users.sourceforge.net>
+M:	H Hartley Sweeten <hsweeten@visionengravers.com>
 S:	Odd Fixes
 F:	drivers/staging/comedi/
 
diff --git a/drivers/staging/comedi/TODO b/drivers/staging/comedi/TODO
index b10f739b7e3e..fa8da9aada30 100644
--- a/drivers/staging/comedi/TODO
+++ b/drivers/staging/comedi/TODO
@@ -9,4 +9,4 @@ TODO:
 Please send patches to Greg Kroah-Hartman <greg@kroah.com> and
 copy:
 	Ian Abbott <abbotti@mev.co.uk>
-	Frank Mori Hess <fmhess@users.sourceforge.net>
+	H Hartley Sweeten <hsweeten@visionengravers.com>

From e1be09808e030d57bb743618eb41e2bc31dd464c Mon Sep 17 00:00:00 2001
From: James Bottomley <JBottomley@Parallels.com>
Date: Wed, 24 Jul 2013 12:43:18 -0700
Subject: [PATCH 283/320] [SCSI] isci: fix breakage caused by >16byte CDB patch

Oops, apparently no-one I cc'd at intel actually bothered to check this
patch for the isci driver:

commit e73823f7a2c921dcf068d34ea03bd682498d9e42
Author: James Bottomley <JBottomley@Parallels.com>
Date:   Tue May 7 15:38:18 2013 -0700

    [SCSI] libsas: implement > 16 byte CDB support

sci_swab32_cpy needs multiples of four, so for commands that aren't that, it's
rounding the wrong way.  fix by doing (len+3)/4 instead of len/4.

Reported-by: Tony Luck <tony.luck@intel.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: James Bottomley <JBottomley@Parallels.com>
---
 drivers/scsi/isci/request.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/isci/request.c b/drivers/scsi/isci/request.c
index 7b082157eb79..99d2930b18c8 100644
--- a/drivers/scsi/isci/request.c
+++ b/drivers/scsi/isci/request.c
@@ -185,7 +185,7 @@ static void sci_io_request_build_ssp_command_iu(struct isci_request *ireq)
 	cmd_iu->_r_c = 0;
 
 	sci_swab32_cpy(&cmd_iu->cdb, task->ssp_task.cmd->cmnd,
-		       task->ssp_task.cmd->cmd_len / sizeof(u32));
+		       (task->ssp_task.cmd->cmd_len+3) / sizeof(u32));
 }
 
 static void sci_task_request_build_ssp_task_iu(struct isci_request *ireq)

From 2652c2163d781676284b2af494a97e64d65a5ee6 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 19 Jul 2013 08:53:13 +0300
Subject: [PATCH 284/320] TTY: snyclinkmp: calculating wrong addresses

This is a static checker fix and I don't have a way to test it.  But
from the context it looks like this is a typo where SCABUFSIZE was
intended instead of sizeof(SCABUFSIZE).  SCABUFSIZE is 1024 and
sizeof(int) is 4.  I would suspect this is a bad bug.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/synclinkmp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/tty/synclinkmp.c b/drivers/tty/synclinkmp.c
index ff171384ea52..dc6e96996ead 100644
--- a/drivers/tty/synclinkmp.c
+++ b/drivers/tty/synclinkmp.c
@@ -3478,7 +3478,7 @@ static int alloc_buf_list(SLMP_INFO *info)
 	for ( i = 0; i < info->rx_buf_count; i++ ) {
 		/* calculate and store physical address of this buffer entry */
 		info->rx_buf_list_ex[i].phys_entry =
-			info->buffer_list_phys + (i * sizeof(SCABUFSIZE));
+			info->buffer_list_phys + (i * SCABUFSIZE);
 
 		/* calculate and store physical address of */
 		/* next entry in cirular list of entries */

From 258a9fd17b92552637bc74776c11737e0472dc86 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20Nordstr=C3=B6m?= <henrik@henriknordstrom.net>
Date: Tue, 23 Jul 2013 21:06:07 +0200
Subject: [PATCH 285/320] tty/8250_early: Don't truncate last character of
 options

The earlier change to use strlcpy uncovered a bug in the options
argument length calculation causing last character to be truncated.
This makes the actual console to be configured with incorrect
baudrate when specifying the console using console=uart,... syntax.

Bug symptom seen in kernel log output:

Kernel command line: console=uart,mmio,0x90000000,115200
Early serial console at MMIO 0x90000000 (options '11520')

which then results in a invalid baud rate 11520 instead of the
expected 115200 when the console is switched to ttyS0 later
in the boot process.

Signed-off-by: Henrik Nordstrom <henrik@henriknordstrom.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_early.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/tty/serial/8250/8250_early.c b/drivers/tty/serial/8250/8250_early.c
index 721904f8efa9..946ddd2b3a54 100644
--- a/drivers/tty/serial/8250/8250_early.c
+++ b/drivers/tty/serial/8250/8250_early.c
@@ -193,7 +193,8 @@ static int __init parse_options(struct early_serial8250_device *device,
 	if (options) {
 		options++;
 		device->baud = simple_strtoul(options, NULL, 0);
-		length = min(strcspn(options, " "), sizeof(device->options));
+		length = min(strcspn(options, " ") + 1,
+			     sizeof(device->options));
 		strlcpy(device->options, options, length);
 	} else {
 		device->baud = probe_baud(port);

From 3bf5d350586d98eb28ab7f86ffbd66518ffd95d8 Mon Sep 17 00:00:00 2001
From: Richard Zhao <rizhao@nvidia.com>
Date: Sun, 21 Jul 2013 10:43:26 +0800
Subject: [PATCH 286/320] serial: tegra: correct help message in Kconfig from
 'ttyHS' to 'ttyTHS'

ttyTHS is consistent with the name used in driver.

Signed-off-by: Richard Zhao <rizhao@nvidia.com>
Acked-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 5e3d68917ffe..1456673bcca0 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -277,7 +277,7 @@ config SERIAL_TEGRA
 	select SERIAL_CORE
 	help
 	  Support for the on-chip UARTs on the NVIDIA Tegra series SOCs
-	  providing /dev/ttyHS0, 1, 2, 3 and 4 (note, some machines may not
+	  providing /dev/ttyTHS0, 1, 2, 3 and 4 (note, some machines may not
 	  provide all of these ports, depending on how the serial port
 	  are enabled). This driver uses the APB DMA to achieve higher baudrate
 	  and better performance.

From 1fad56424f5ad3ce4973505a357212b2e2282b3f Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Fri, 28 Jun 2013 12:24:26 +0200
Subject: [PATCH 287/320] USB: ti_usb_3410_5052: fix dynamic-id matching

The driver failed to take the dynamic ids into account when determining
the device type and therefore all devices were detected as 2-port
devices when using the dynamic-id interface.

Match on the usb-serial-driver field instead of doing redundant id-table
searches.

Reported-by: Anders Hammarquist <iko@iko.pp.se>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/ti_usb_3410_5052.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c
index 7182bb774b79..375b5a400b6f 100644
--- a/drivers/usb/serial/ti_usb_3410_5052.c
+++ b/drivers/usb/serial/ti_usb_3410_5052.c
@@ -371,7 +371,7 @@ static int ti_startup(struct usb_serial *serial)
 	usb_set_serial_data(serial, tdev);
 
 	/* determine device type */
-	if (usb_match_id(serial->interface, ti_id_table_3410))
+	if (serial->type == &ti_1port_device)
 		tdev->td_is_3410 = 1;
 	dev_dbg(&dev->dev, "%s - device type is %s\n", __func__,
 		tdev->td_is_3410 ? "3410" : "5052");

From f82a3133c0c5625d54f3dd1621815b1cf77c8463 Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@lixom.net>
Date: Wed, 24 Jul 2013 16:55:23 -0700
Subject: [PATCH 288/320] ARM: omap5: Only select errata 798181 if SMP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Avoids the following warning when SMP is off:

warning: (ARCH_KEYSTONE && SOC_OMAP5) selects ARM_ERRATA_798181 which
has unmet direct dependencies (CPU_V7 && SMP)

Reported-by: Emilio López <emilio@elopez.com.ar>
Cc: Tony Lindgren <tony@atomide.com>
Cc: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 arch/arm/mach-omap2/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index 627fa7e41fba..3eed0006d189 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig
@@ -62,7 +62,7 @@ config SOC_OMAP5
 	select HAVE_SMP
 	select COMMON_CLK
 	select HAVE_ARM_ARCH_TIMER
-	select ARM_ERRATA_798181
+	select ARM_ERRATA_798181 if SMP
 
 config SOC_AM33XX
 	bool "AM33XX support"

From 63b5df963f52ccbab6fabedf05b7ac6b465789a4 Mon Sep 17 00:00:00 2001
From: Enrico Mioso <mrkiko.rs@gmail.com>
Date: Thu, 25 Jul 2013 02:01:39 +0200
Subject: [PATCH 289/320] usb: serial: option: Add ONYX 3G device support

This patch adds support for the ONYX 3G device (version 1) from ALFA
NETWORK.

Signed-off-by: Enrico Mioso <mrkiko.rs@gmail.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/option.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index bd02f257de13..43093850045b 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -783,6 +783,7 @@ static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE(KYOCERA_VENDOR_ID, KYOCERA_PRODUCT_KPC650) },
 	{ USB_DEVICE(KYOCERA_VENDOR_ID, KYOCERA_PRODUCT_KPC680) },
 	{ USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6613)}, /* Onda H600/ZTE MF330 */
+	{ USB_DEVICE(QUALCOMM_VENDOR_ID, 0x0023)}, /* ONYX 3G device */
 	{ USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9000)}, /* SIMCom SIM5218 */
 	{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6280) }, /* BP3-USB & BP3-EXT HSDPA */
 	{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6008) },

From 9b0ee8cf70457520c144e944bcf8df2324bad880 Mon Sep 17 00:00:00 2001
From: Tsugikazu Shibata <tshibata@ab.jp.nec.com>
Date: Thu, 25 Jul 2013 09:17:23 +0900
Subject: [PATCH 290/320] HOWTO ja_JP sync

Attached is Documentation/ja_JP/HOWTO sync patch for 3.10.
This patch was reviewed by Japanese translation community called JF.

Signed-off-by: Tsugikazu Shibata <tshibata@ab.jp.nec.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ja_JP/HOWTO | 44 +++++++++++++++++++--------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/Documentation/ja_JP/HOWTO b/Documentation/ja_JP/HOWTO
index 050d37fe6d40..8148a47fc70e 100644
--- a/Documentation/ja_JP/HOWTO
+++ b/Documentation/ja_JP/HOWTO
@@ -11,14 +11,14 @@ for non English (read: Japanese) speakers and is not intended as a
 fork. So if you have any comments or updates for this file, please try
 to update the original English file first.
 
-Last Updated: 2011/03/31
+Last Updated: 2013/07/19
 ==================================
 これは、
-linux-2.6.38/Documentation/HOWTO
+linux-3.10/Documentation/HOWTO
 の和訳です。
 
-翻訳団体： JF プロジェクト < http://www.linux.or.jp/JF/ >
-翻訳日： 2011/3/28
+翻訳団体： JF プロジェクト < http://linuxjf.sourceforge.jp/ >
+翻訳日： 2013/7/19
 翻訳者： Tsugikazu Shibata <tshibata at ab dot jp dot nec dot com>
 校正者： 松倉さん <nbh--mats at nifty dot com>
          小林 雅典さん (Masanori Kobayasi) <zap03216 at nifty dot ne dot jp>
@@ -245,7 +245,7 @@ Linux カーネルソースツリーの中に含まれる、きれいにし、
 自己参照方式で、索引がついた web 形式で、ソースコードを参照することが
 できます。この最新の素晴しいカーネルコードのリポジトリは以下で見つかり
 ます-
-	http://sosdg.org/~qiyong/lxr/
+	http://lxr.linux.no/+trees
 
 開発プロセス
 -----------------------
@@ -253,24 +253,24 @@ Linux カーネルソースツリーの中に含まれる、きれいにし、
 Linux カーネルの開発プロセスは現在幾つかの異なるメインカーネル「ブラン
 チ」と多数のサブシステム毎のカーネルブランチから構成されます。
 これらのブランチとは-
-  - メインの 2.6.x カーネルツリー
-  - 2.6.x.y -stable カーネルツリー
-  - 2.6.x -git カーネルパッチ
+  - メインの 3.x カーネルツリー
+  - 3.x.y -stable カーネルツリー
+  - 3.x -git カーネルパッチ
   - サブシステム毎のカーネルツリーとパッチ
-  - 統合テストのための 2.6.x -next カーネルツリー
+  - 統合テストのための 3.x -next カーネルツリー
 
-2.6.x カーネルツリー
+3.x カーネルツリー
 -----------------
 
-2.6.x カーネルは Linus Torvalds によってメンテナンスされ、kernel.org
-の pub/linux/kernel/v2.6/ ディレクトリに存在します。この開発プロセスは
+3.x カーネルは Linus Torvalds によってメンテナンスされ、kernel.org
+の pub/linux/kernel/v3.x/ ディレクトリに存在します。この開発プロセスは
 以下のとおり-
 
   - 新しいカーネルがリリースされた直後に、2週間の特別期間が設けられ、
     この期間中に、メンテナ達は Linus に大きな差分を送ることができます。
     このような差分は通常 -next カーネルに数週間含まれてきたパッチです。
     大きな変更は git(カーネルのソース管理ツール、詳細は
-    http://git-scm.com/  参照) を使って送るのが好ましいやり方ですが、パッ
+    http://git-scm.com/ 参照) を使って送るのが好ましいやり方ですが、パッ
     チファイルの形式のまま送るのでも十分です。
 
   - 2週間後、-rc1 カーネルがリリースされ、この後にはカーネル全体の安定
@@ -302,20 +302,20 @@ Andrew Morton が Linux-kernel メーリングリストにカーネルリリー
   実に認識されたバグの状況によりリリースされるのであり、前もって決めら
   れた計画によってリリースされるものではないからです。」
 
-2.6.x.y -stable カーネルツリー
+3.x.y -stable カーネルツリー
 ---------------------------
 
-バージョン番号が4つの数字に分かれているカーネルは -stable カーネルです。
-これには、2.6.x カーネルで見つかったセキュリティ問題や重大な後戻りに対
+バージョン番号が3つの数字に分かれているカーネルは -stable カーネルです。
+これには、3.x カーネルで見つかったセキュリティ問題や重大な後戻りに対
 する比較的小さい重要な修正が含まれます。
 
 これは、開発/実験的バージョンのテストに協力することに興味が無く、
 最新の安定したカーネルを使いたいユーザに推奨するブランチです。
 
-もし、2.6.x.y カーネルが存在しない場合には、番号が一番大きい 2.6.x が
+もし、3.x.y カーネルが存在しない場合には、番号が一番大きい 3.x が
 最新の安定版カーネルです。
 
-2.6.x.y は "stable" チーム <stable@kernel.org> でメンテされており、必
+3.x.y は "stable" チーム <stable@kernel.org> でメンテされており、必
 要に応じてリリースされます。通常のリリース期間は 2週間毎ですが、差し迫っ
 た問題がなければもう少し長くなることもあります。セキュリティ関連の問題
 の場合はこれに対してだいたいの場合、すぐにリリースがされます。
@@ -324,7 +324,7 @@ Andrew Morton が Linux-kernel メーリングリストにカーネルリリー
 イルにはどのような種類の変更が -stable ツリーに受け入れ可能か、またリ
 リースプロセスがどう動くかが記述されています。
 
-2.6.x -git パッチ
+3.x -git パッチ
 ------------------
 
 git リポジトリで管理されているLinus のカーネルツリーの毎日のスナップ
@@ -358,14 +358,14 @@ quilt シリーズとして公開されているパッチキューも使われ
 をつけることができます。大部分のこれらの patchwork のサイトは
 http://patchwork.kernel.org/ でリストされています。
 
-統合テストのための 2.6.x -next カーネルツリー
+統合テストのための 3.x -next カーネルツリー
 ---------------------------------------------
 
-サブシステムツリーの更新内容がメインラインの 2.6.x ツリーにマージされ
+サブシステムツリーの更新内容がメインラインの 3.x ツリーにマージされ
 る前に、それらは統合テストされる必要があります。この目的のため、実質的
 に全サブシステムツリーからほぼ毎日プルされてできる特別なテスト用のリ
 ポジトリが存在します-
-       http://git.kernel.org/?p=linux/kernel/git/sfr/linux-next.git
+       http://git.kernel.org/?p=linux/kernel/git/next/linux-next.git
        http://linux.f-seidel.de/linux-next/pmwiki/
 
 このやり方によって、-next カーネルは次のマージ機会でどんなものがメイン

From 0eb25bb027a100f5a9df8991f2f628e7d851bc1e Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 24 Jul 2013 15:37:42 +1000
Subject: [PATCH 291/320] md/raid10: remove use-after-free bug.

We always need to be careful when calling generic_make_request, as it
can start a chain of events which might free something that we are
using.

Here is one place I wasn't careful enough.  If the wbio2 is not in
use, then it might get freed at the first generic_make_request call.
So perform all necessary tests first.

This bug was introduced in 3.3-rc3 (24afd80d99) and can cause an
oops, so fix is suitable for any -stable since then.

Cc: stable@vger.kernel.org (3.3+)
Signed-off-by: NeilBrown <neilb@suse.de>
---
 drivers/md/raid10.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 957a719e8c2f..df7b0a06b0ea 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2290,12 +2290,18 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
 	d = r10_bio->devs[1].devnum;
 	wbio = r10_bio->devs[1].bio;
 	wbio2 = r10_bio->devs[1].repl_bio;
+	/* Need to test wbio2->bi_end_io before we call
+	 * generic_make_request as if the former is NULL,
+	 * the latter is free to free wbio2.
+	 */
+	if (wbio2 && !wbio2->bi_end_io)
+		wbio2 = NULL;
 	if (wbio->bi_end_io) {
 		atomic_inc(&conf->mirrors[d].rdev->nr_pending);
 		md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(wbio));
 		generic_make_request(wbio);
 	}
-	if (wbio2 && wbio2->bi_end_io) {
+	if (wbio2) {
 		atomic_inc(&conf->mirrors[d].replacement->nr_pending);
 		md_sync_acct(conf->mirrors[d].replacement->bdev,
 			     bio_sectors(wbio2));

From f94c0b6658c7edea8bc19d13be321e3860a3fa54 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 22 Jul 2013 12:57:21 +1000
Subject: [PATCH 292/320] md/raid5: fix interaction of 'replace' and
 'recovery'.

If a device in a RAID4/5/6 is being replaced while another is being
recovered, then the writes to the replacement device currently don't
happen, resulting in corruption when the replacement completes and the
new drive takes over.

This is because the replacement writes are only triggered when
's.replacing' is set and not when the similar 's.sync' is set (which
is the case during resync and recovery - it means all devices need to
be read).

So schedule those writes when s.replacing is set as well.

In this case we cannot use "STRIPE_INSYNC" to record that the
replacement has happened as that is needed for recording that any
parity calculation is complete.  So introduce STRIPE_REPLACED to
record if the replacement has happened.

For safety we should also check that STRIPE_COMPUTE_RUN is not set.
This has a similar effect to the "s.locked == 0" test.  The latter
ensure that now IO has been flagged but not started.  The former
checks if any parity calculation has been flagged by not started.
We must wait for both of these to complete before triggering the
'replace'.

Add a similar test to the subsequent check for "are we finished yet".
This possibly isn't needed (is subsumed in the STRIPE_INSYNC test),
but it makes it more obvious that the REPLACE will happen before we
think we are finished.

Finally if a NeedReplace device is not UPTODATE then that is an
error.  We really must trigger a warning.

This bug was introduced in commit 9a3e1101b827a59ac9036a672f5fa8d5279d0fe2
(md/raid5:  detect and handle replacements during recovery.)
which introduced replacement for raid5.
That was in 3.3-rc3, so any stable kernel since then would benefit
from this fix.

Cc: stable@vger.kernel.org (3.3+)
Reported-by: qindehua <13691222965@163.com>
Tested-by: qindehua <qindehua@163.com>
Signed-off-by: NeilBrown <neilb@suse.de>
---
 drivers/md/raid5.c | 15 ++++++++++-----
 drivers/md/raid5.h |  1 +
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 2bf094a587cb..78ea44336e75 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3462,6 +3462,7 @@ static void handle_stripe(struct stripe_head *sh)
 		    test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
 			set_bit(STRIPE_SYNCING, &sh->state);
 			clear_bit(STRIPE_INSYNC, &sh->state);
+			clear_bit(STRIPE_REPLACED, &sh->state);
 		}
 		spin_unlock(&sh->stripe_lock);
 	}
@@ -3607,19 +3608,23 @@ static void handle_stripe(struct stripe_head *sh)
 			handle_parity_checks5(conf, sh, &s, disks);
 	}
 
-	if (s.replacing && s.locked == 0
-	    && !test_bit(STRIPE_INSYNC, &sh->state)) {
+	if ((s.replacing || s.syncing) && s.locked == 0
+	    && !test_bit(STRIPE_COMPUTE_RUN, &sh->state)
+	    && !test_bit(STRIPE_REPLACED, &sh->state)) {
 		/* Write out to replacement devices where possible */
 		for (i = 0; i < conf->raid_disks; i++)
-			if (test_bit(R5_UPTODATE, &sh->dev[i].flags) &&
-			    test_bit(R5_NeedReplace, &sh->dev[i].flags)) {
+			if (test_bit(R5_NeedReplace, &sh->dev[i].flags)) {
+				WARN_ON(!test_bit(R5_UPTODATE, &sh->dev[i].flags));
 				set_bit(R5_WantReplace, &sh->dev[i].flags);
 				set_bit(R5_LOCKED, &sh->dev[i].flags);
 				s.locked++;
 			}
-		set_bit(STRIPE_INSYNC, &sh->state);
+		if (s.replacing)
+			set_bit(STRIPE_INSYNC, &sh->state);
+		set_bit(STRIPE_REPLACED, &sh->state);
 	}
 	if ((s.syncing || s.replacing) && s.locked == 0 &&
+	    !test_bit(STRIPE_COMPUTE_RUN, &sh->state) &&
 	    test_bit(STRIPE_INSYNC, &sh->state)) {
 		md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
 		clear_bit(STRIPE_SYNCING, &sh->state);
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index b0b663b119a8..70c49329ca9a 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -306,6 +306,7 @@ enum {
 	STRIPE_SYNC_REQUESTED,
 	STRIPE_SYNCING,
 	STRIPE_INSYNC,
+	STRIPE_REPLACED,
 	STRIPE_PREREAD_ACTIVE,
 	STRIPE_DELAYED,
 	STRIPE_DEGRADED,

From 14c5cec5d0cd73e7e9d4fbea2bbfeea8f3ade871 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@linux.intel.com>
Date: Thu, 25 Jul 2013 12:44:34 +0300
Subject: [PATCH 293/320] drm/i915: initialize gt_lock early with other spin
 locks

commit 181d1b9e31c668259d3798c521672afb8edd355c
Author: Daniel Vetter <daniel.vetter@ffwll.ch>
Date:   Sun Jul 21 13:16:24 2013 +0200

    drm/i915: fix up gt init sequence fallout

moved dev_priv->gt_lock initialization after use. Do the initialization
much earlier with other spin lock initializations.

Reported-by: Sedat Dilek <sedat.dilek@gmail.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Tested-by: Sedat Dilek <sedat.dilek@gmail.com>
Cc: stable@vger.kernel.org (since the regressing patch is also cc: stable)
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_dma.c | 1 +
 drivers/gpu/drm/i915/intel_pm.c | 2 --
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 67ec54f67afe..b1520682cb23 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1498,6 +1498,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	spin_lock_init(&dev_priv->irq_lock);
 	spin_lock_init(&dev_priv->gpu_error.lock);
 	spin_lock_init(&dev_priv->rps.lock);
+	spin_lock_init(&dev_priv->gt_lock);
 	spin_lock_init(&dev_priv->backlight.lock);
 	mutex_init(&dev_priv->dpio_lock);
 	mutex_init(&dev_priv->rps.hw_lock);
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 6a347f54d39f..51a2a60f5bfc 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -5497,8 +5497,6 @@ void intel_gt_init(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	spin_lock_init(&dev_priv->gt_lock);
-
 	if (IS_VALLEYVIEW(dev)) {
 		dev_priv->gt.force_wake_get = vlv_force_wake_get;
 		dev_priv->gt.force_wake_put = vlv_force_wake_put;

From 203a86613fb3bf2767335659513fa98563a3eb71 Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Wed, 24 Jul 2013 10:27:13 -0700
Subject: [PATCH 294/320] xhci: Avoid NULL pointer deref when host dies.

When the host controller fails to respond to an Enable Slot command, and
the host fails to respond to the register write to abort the command
ring, the xHCI driver will assume the host is dead, and call
usb_hc_died().

The USB device's slot_id is still set to zero, and the pointer stored at
xhci->devs[0] will always be NULL.  The call to xhci_check_args in
xhci_free_dev should have caught the NULL virt_dev pointer.

However, xhci_free_dev is designed to free the xhci_virt_device
structures, even if the host is dead, so that we don't leak kernel
memory.  xhci_free_dev checks the return value from the generic
xhci_check_args function.  If the return value is -ENODEV, it carries on
trying to free the virtual device.

The issue is that xhci_check_args looks at the host controller state
before it looks at the xhci_virt_device pointer.  It will return -ENIVAL
because the host is dead, and xhci_free_dev will ignore the return
value, and happily dereference the NULL xhci_virt_device pointer.

The fix is to make sure that xhci_check_args checks the xhci_virt_device
pointer before it checks the host state.

See https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1203453 for
further details.  This patch doesn't solve the underlying issue, but
will ensure we don't see any more NULL pointer dereferences because of
the issue.

This patch should be backported to kernels as old as 3.1, that
contain the commit 7bd89b4017f46a9b92853940fd9771319acb578a "xhci: Don't
submit commands or URBs to halted hosts."

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Reported-by: Vincent Thiele <vincentthiele@gmail.com>
Cc: stable@vger.kernel.org
---
 drivers/usb/host/xhci.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 2c49f00260ca..98aac742a2c1 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -1181,9 +1181,6 @@ static int xhci_check_args(struct usb_hcd *hcd, struct usb_device *udev,
 	}
 
 	xhci = hcd_to_xhci(hcd);
-	if (xhci->xhc_state & XHCI_STATE_HALTED)
-		return -ENODEV;
-
 	if (check_virt_dev) {
 		if (!udev->slot_id || !xhci->devs[udev->slot_id]) {
 			printk(KERN_DEBUG "xHCI %s called with unaddressed "
@@ -1199,6 +1196,9 @@ static int xhci_check_args(struct usb_hcd *hcd, struct usb_device *udev,
 		}
 	}
 
+	if (xhci->xhc_state & XHCI_STATE_HALTED)
+		return -ENODEV;
+
 	return 1;
 }
 

From d5c82feb5cf8026cd4af048330fdcd46e861c686 Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@lixom.net>
Date: Tue, 23 Jul 2013 11:58:20 -0700
Subject: [PATCH 295/320] usb: xhci: Mark two functions __maybe_unused

Resolves the following build warnings:
drivers/usb/host/xhci.c:332:13: warning: 'xhci_msix_sync_irqs' defined but not used [-Wunused-function]
drivers/usb/host/xhci.c:3901:12: warning: 'xhci_change_max_exit_latency' defined but not used [-Wunused-function]

These functions are not always used, and since they're marked static
they will produce build warnings:
- xhci_msix_sync_irqs is only used with CONFIG_PCI.
- xhci_change_max_exit_latency is a little more complicated with
  dependencies on CONFIG_PM and CONFIG_PM_RUNTIME.

Instead of building a bigger maze of ifdefs in this code, I've just
marked both with __maybe_unused.

Signed-off-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
---
 drivers/usb/host/xhci.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 98aac742a2c1..6bbf511645eb 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -329,7 +329,7 @@ static void xhci_cleanup_msix(struct xhci_hcd *xhci)
 	return;
 }
 
-static void xhci_msix_sync_irqs(struct xhci_hcd *xhci)
+static void __maybe_unused xhci_msix_sync_irqs(struct xhci_hcd *xhci)
 {
 	int i;
 
@@ -3898,7 +3898,7 @@ int xhci_find_raw_port_number(struct usb_hcd *hcd, int port1)
  * Issue an Evaluate Context command to change the Maximum Exit Latency in the
  * slot context.  If that succeeds, store the new MEL in the xhci_virt_device.
  */
-static int xhci_change_max_exit_latency(struct xhci_hcd *xhci,
+static int __maybe_unused xhci_change_max_exit_latency(struct xhci_hcd *xhci,
 			struct usb_device *udev, u16 max_exit_latency)
 {
 	struct xhci_virt_device *virt_dev;

From c4d949b76f2c714e0da20c53252e586e8127ac79 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Tue, 23 Jul 2013 15:22:59 -0700
Subject: [PATCH 296/320] usb: fix build warning in pci-quirks.h when
 CONFIG_PCI is not enabled

Fix warning when CONFIG_PCI is not enabled
(from commit 296365781903226a3fb8758901eaeec09d2798e4).

drivers/usb/host/pci-quirks.h: warning: its scope is only this definition or declaration, which is probably not what you want [enabled by default]

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Moiz Sonasath <m-sonasath@ti.com>
Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
---
 drivers/usb/host/pci-quirks.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/host/pci-quirks.h b/drivers/usb/host/pci-quirks.h
index 4b8a2092432f..978c849f9c9a 100644
--- a/drivers/usb/host/pci-quirks.h
+++ b/drivers/usb/host/pci-quirks.h
@@ -13,6 +13,7 @@ void usb_enable_xhci_ports(struct pci_dev *xhci_pdev);
 void usb_disable_xhci_ports(struct pci_dev *xhci_pdev);
 void sb800_prefetch(struct device *dev, int on);
 #else
+struct pci_dev;
 static inline void usb_amd_quirk_pll_disable(void) {}
 static inline void usb_amd_quirk_pll_enable(void) {}
 static inline void usb_amd_dev_put(void) {}

From 07f3cb7c28bf3f4dd80bfb136cf45810c46ac474 Mon Sep 17 00:00:00 2001
From: George Cherian <george.cherian@ti.com>
Date: Mon, 1 Jul 2013 10:59:12 +0530
Subject: [PATCH 297/320] usb: host: xhci: Enable XHCI_SPURIOUS_SUCCESS for all
 controllers with xhci 1.0

Xhci controllers with hci_version > 0.96 gives spurious success
events on short packet completion. During webcam capture the
"ERROR Transfer event TRB DMA ptr not part of current TD" was observed.
The same application works fine with synopsis controllers hci_version 0.96.
The same issue is seen with Intel Pantherpoint xhci controller. So enabling
this quirk in xhci_gen_setup if controller verion is greater than 0.96.
For xhci-pci move the quirk to much generic place xhci_gen_setup.

Note from Sarah:

The xHCI 1.0 spec changed how hardware handles short packets.  The HW
will notify SW of the TRB where the short packet occurred, and it will
also give a successful status for the last TRB in a TD (the one with the
IOC flag set).  On the second successful status, that warning will be
triggered in the driver.

Software is now supposed to not assume the TD is not completed until it
gets that last successful status.  That means we have a slight race
condition, although it should have little practical impact.  This patch
papers over that issue.

It's on my long-term to-do list to fix this race condition, but it is a
much more involved patch that will probably be too big for stable.  This
patch is needed for stable to avoid serious log spam.

This patch should be backported to kernels as old as 3.0, that
contain the commit ad808333d8201d53075a11bc8dd83b81f3d68f0b "Intel xhci:
Ignore spurious successful event."

The patch will have to be modified for kernels older than 3.2, since
that kernel added the xhci_gen_setup function for xhci platform devices.
The correct conflict resolution for kernels older than 3.2 is to set
XHCI_SPURIOUS_SUCCESS in xhci_pci_quirks for all xHCI 1.0 hosts.

Signed-off-by: George Cherian <george.cherian@ti.com>
Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Cc: stable@vger.kernel.org
---
 drivers/usb/host/xhci-pci.c | 1 -
 drivers/usb/host/xhci.c     | 7 +++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index cc24e39b97d5..f00cb203faea 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -93,7 +93,6 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
 	}
 	if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
 			pdev->device == PCI_DEVICE_ID_INTEL_PANTHERPOINT_XHCI) {
-		xhci->quirks |= XHCI_SPURIOUS_SUCCESS;
 		xhci->quirks |= XHCI_EP_LIMIT_QUIRK;
 		xhci->limit_active_eps = 64;
 		xhci->quirks |= XHCI_SW_BW_CHECKING;
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 6bbf511645eb..41eb4fc33453 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -4892,6 +4892,13 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks)
 
 	get_quirks(dev, xhci);
 
+	/* In xhci controllers which follow xhci 1.0 spec gives a spurious
+	 * success event after a short transfer. This quirk will ignore such
+	 * spurious event.
+	 */
+	if (xhci->hci_version > 0x96)
+		xhci->quirks |= XHCI_SPURIOUS_SUCCESS;
+
 	/* Make sure the HC is halted. */
 	retval = xhci_halt(xhci);
 	if (retval)

From d66eaf9f89502971fddcb0de550b01fa6f409d83 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <linux@rempel-privat.de>
Date: Sun, 21 Jul 2013 15:36:19 +0200
Subject: [PATCH 298/320] xhci: fix null pointer dereference on
 ring_doorbell_for_active_rings

in some cases where device is attched to xhci port and do not responding,
for example ath9k_htc with stalled firmware, kernel will
crash on ring_doorbell_for_active_rings.
This patch check if pointer exist before it is used.

This patch should be backported to kernels as old as 2.6.35, that
contain the commit e9df17eb1408cfafa3d1844bfc7f22c7237b31b8 "USB: xhci:
Correct assumptions about number of rings per endpoint"

Signed-off-by: Oleksij Rempel <linux@rempel-privat.de>
Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Cc: stable@vger.kernel.org
---
 drivers/usb/host/xhci-ring.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 1e57eafa6910..5b08cd85f8e7 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -434,7 +434,7 @@ static void ring_doorbell_for_active_rings(struct xhci_hcd *xhci,
 
 	/* A ring has pending URBs if its TD list is not empty */
 	if (!(ep->ep_state & EP_HAS_STREAMS)) {
-		if (!(list_empty(&ep->ring->td_list)))
+		if (ep->ring && !(list_empty(&ep->ring->td_list)))
 			xhci_ring_ep_doorbell(xhci, slot_id, ep_index, 0);
 		return;
 	}

From db6c2c69c27f2c4fd1e2a1c6b0b1119b1e885f8a Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 23 Jul 2013 01:43:20 +0200
Subject: [PATCH 299/320] pinctrl: fix a memleak when freeing maps

We forgot to free the node itself when free:ing a map.

Reported-by: xulinuxkernel <xulinuxkernel@gmail.com>
Reviewed-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
index 5b272bfd261d..2a00239661b3 100644
--- a/drivers/pinctrl/core.c
+++ b/drivers/pinctrl/core.c
@@ -1193,6 +1193,7 @@ void pinctrl_unregister_map(struct pinctrl_map const *map)
 	list_for_each_entry(maps_node, &pinctrl_maps, node) {
 		if (maps_node->maps == map) {
 			list_del(&maps_node->node);
+			kfree(maps_node);
 			mutex_unlock(&pinctrl_maps_mutex);
 			return;
 		}

From e1b4271ac261b290fdab51446996fb13e68a57be Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Wed, 24 Jul 2013 15:47:30 +1000
Subject: [PATCH 300/320] xfs: di_flushiter considered harmful

When we made all inode updates transactional, we no longer needed
the log recovery detection for inodes being newer on disk than the
transaction being replayed - it was redundant as replay of the log
would always result in the latest version of the inode would be on
disk. It was redundant, but left in place because it wasn't
considered to be a problem.

However, with the new "don't read inodes on create" optimisation,
flushiter has come back to bite us. Essentially, the optimisation
made always initialises flushiter to zero in the create transaction,
and so if we then crash and run recovery and the inode already on
disk has a non-zero flushiter it will skip recovery of that inode.
As a result, log recovery does the wrong thing and we end up with a
corrupt filesystem.

Because we have to support old kernel to new kernel upgrades, we
can't just get rid of the flushiter support in log recovery as we
might be upgrading from a kernel that doesn't have fully transactional
inode updates.  Unfortunately, for v4 superblocks there is no way to
guarantee that log recovery knows about this fact.

We cannot add a new inode format flag to say it's a "special inode
create" because it won't be understood by older kernels and so
recovery could do the wrong thing on downgrade. We cannot specially
detect the combination of zero mode/non-zero flushiter on disk to
non-zero mode, zero flushiter in the log item during recovery
because wrapping of the flushiter can result in false detection.

Hence that makes this "don't use flushiter" optimisation limited to
a disk format that guarantees that we don't need it. And that means
the only fix here is to limit the "no read IO on create"
optimisation to version 5 superblocks....

Reported-by: Markus Trippelsdorf <markus@trippelsdorf.de>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>

(cherry picked from commit e60896d8f2b81412421953e14d3feb14177edb56)
---
 fs/xfs/xfs_dinode.h      |  3 +++
 fs/xfs/xfs_inode.c       | 31 ++++++++++++++++++++++---------
 fs/xfs/xfs_log_recover.c | 13 +++++++++++--
 3 files changed, 36 insertions(+), 11 deletions(-)

diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index 07d735a80a0f..e5869b50dc41 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -39,6 +39,9 @@ typedef struct xfs_timestamp {
  * There is a very similar struct icdinode in xfs_inode which matches the
  * layout of the first 96 bytes of this structure, but is kept in native
  * format instead of big endian.
+ *
+ * Note: di_flushiter is only used by v1/2 inodes - it's effectively a zeroed
+ * padding field for v3 inodes.
  */
 typedef struct xfs_dinode {
 	__be16		di_magic;	/* inode magic # = XFS_DINODE_MAGIC */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index b78481f99d9d..bb262c25c8de 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -896,7 +896,6 @@ xfs_dinode_to_disk(
 	to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
 	to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
 	memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
-	to->di_flushiter = cpu_to_be16(from->di_flushiter);
 	to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
 	to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
 	to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
@@ -924,6 +923,9 @@ xfs_dinode_to_disk(
 		to->di_lsn = cpu_to_be64(from->di_lsn);
 		memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
 		uuid_copy(&to->di_uuid, &from->di_uuid);
+		to->di_flushiter = 0;
+	} else {
+		to->di_flushiter = cpu_to_be16(from->di_flushiter);
 	}
 }
 
@@ -1029,10 +1031,14 @@ xfs_dinode_calc_crc(
 /*
  * Read the disk inode attributes into the in-core inode structure.
  *
- * If we are initialising a new inode and we are not utilising the
- * XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new inode core
- * with a random generation number. If we are keeping inodes around, we need to
- * read the inode cluster to get the existing generation number off disk.
+ * For version 5 superblocks, if we are initialising a new inode and we are not
+ * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new
+ * inode core with a random generation number. If we are keeping inodes around,
+ * we need to read the inode cluster to get the existing generation number off
+ * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode
+ * format) then log recovery is dependent on the di_flushiter field being
+ * initialised from the current on-disk value and hence we must also read the
+ * inode off disk.
  */
 int
 xfs_iread(
@@ -1054,6 +1060,7 @@ xfs_iread(
 
 	/* shortcut IO on inode allocation if possible */
 	if ((iget_flags & XFS_IGET_CREATE) &&
+	    xfs_sb_version_hascrc(&mp->m_sb) &&
 	    !(mp->m_flags & XFS_MOUNT_IKEEP)) {
 		/* initialise the on-disk inode core */
 		memset(&ip->i_d, 0, sizeof(ip->i_d));
@@ -2882,12 +2889,18 @@ xfs_iflush_int(
 			__func__, ip->i_ino, ip->i_d.di_forkoff, ip);
 		goto corrupt_out;
 	}
+
 	/*
-	 * bump the flush iteration count, used to detect flushes which
-	 * postdate a log record during recovery. This is redundant as we now
-	 * log every change and hence this can't happen. Still, it doesn't hurt.
+	 * Inode item log recovery for v1/v2 inodes are dependent on the
+	 * di_flushiter count for correct sequencing. We bump the flush
+	 * iteration count so we can detect flushes which postdate a log record
+	 * during recovery. This is redundant as we now log every change and
+	 * hence this can't happen but we need to still do it to ensure
+	 * backwards compatibility with old kernels that predate logging all
+	 * inode changes.
 	 */
-	ip->i_d.di_flushiter++;
+	if (ip->i_d.di_version < 3)
+		ip->i_d.di_flushiter++;
 
 	/*
 	 * Copy the dirty parts of the inode into the on-disk
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 6fcc910a50b9..7681b19aa5dc 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2592,8 +2592,16 @@ xlog_recover_inode_pass2(
 		goto error;
 	}
 
-	/* Skip replay when the on disk inode is newer than the log one */
-	if (dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
+	/*
+	 * di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes
+	 * are transactional and if ordering is necessary we can determine that
+	 * more accurately by the LSN field in the V3 inode core. Don't trust
+	 * the inode versions we might be changing them here - use the
+	 * superblock flag to determine whether we need to look at di_flushiter
+	 * to skip replay when the on disk inode is newer than the log one
+	 */
+	if (!xfs_sb_version_hascrc(&mp->m_sb) &&
+	    dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
 		/*
 		 * Deal with the wrap case, DI_MAX_FLUSH is less
 		 * than smaller numbers
@@ -2608,6 +2616,7 @@ xlog_recover_inode_pass2(
 			goto error;
 		}
 	}
+
 	/* Take the opportunity to reset the flush iteration count */
 	dicp->di_flushiter = 0;
 

From 3d1a69e726406ab662ab88fa30a3a05ed404334d Mon Sep 17 00:00:00 2001
From: Enrico Mioso <mrkiko.rs@gmail.com>
Date: Sat, 29 Jun 2013 15:33:35 +0200
Subject: [PATCH 301/320] usb: serial: option: blacklist ONDA MT689DC QMI
 interface

Prevent the option driver from binding itself to the QMI/WWAN interface, making
it unusable by the proper driver.

Signed-off-by: enrico Mioso <mrkiko.rs@gmail.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/option.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 43093850045b..085da5f210ec 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -819,7 +819,8 @@ static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0017, 0xff, 0xff, 0xff),
 		.driver_info = (kernel_ulong_t)&net_intf3_blacklist },
 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0018, 0xff, 0xff, 0xff) },
-	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0019, 0xff, 0xff, 0xff) },
+	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0019, 0xff, 0xff, 0xff),
+		.driver_info = (kernel_ulong_t)&net_intf3_blacklist },
 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0020, 0xff, 0xff, 0xff) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0021, 0xff, 0xff, 0xff),
 		.driver_info = (kernel_ulong_t)&net_intf4_blacklist },

From 4cf76df06ecc852633ed927d91e01c83c33bc331 Mon Sep 17 00:00:00 2001
From: Dan Williams <dcbw@redhat.com>
Date: Wed, 10 Jul 2013 12:25:02 -0500
Subject: [PATCH 302/320] usb: serial: option: add Olivetti Olicard 200

Speaks AT on interfaces 5 (command & PPP) and 3 (secondary), other
interface protocols are unknown.

Signed-off-by: Dan Williams <dcbw@redhat.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/option.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 085da5f210ec..083b3e902894 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -341,6 +341,7 @@ static void option_instat_callback(struct urb *urb);
 #define OLIVETTI_VENDOR_ID			0x0b3c
 #define OLIVETTI_PRODUCT_OLICARD100		0xc000
 #define OLIVETTI_PRODUCT_OLICARD145		0xc003
+#define OLIVETTI_PRODUCT_OLICARD200		0xc005
 
 /* Celot products */
 #define CELOT_VENDOR_ID				0x211f
@@ -1259,6 +1260,7 @@ static const struct usb_device_id option_ids[] = {
 
 	{ USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD100) },
 	{ USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD145) },
+	{ USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD200) },
 	{ USB_DEVICE(CELOT_VENDOR_ID, CELOT_PRODUCT_CT680M) }, /* CT-650 CDMA 450 1xEVDO modem */
 	{ USB_DEVICE(ONDA_VENDOR_ID, ONDA_MT825UP) }, /* ONDA MT825UP modem */
 	{ USB_DEVICE_AND_INTERFACE_INFO(SAMSUNG_VENDOR_ID, SAMSUNG_PRODUCT_GT_B3730, USB_CLASS_CDC_DATA, 0x00, 0x00) }, /* Samsung GT-B3730 LTE USB modem.*/

From 878c69aae986ae97084458c0183a8c0a059865b1 Mon Sep 17 00:00:00 2001
From: Enrico Mioso <mrkiko.rs@gmail.com>
Date: Sat, 13 Jul 2013 18:54:14 +0200
Subject: [PATCH 303/320] usb: serial: option.c: remove ONDA MT825UP product ID
 fromdriver

Some (very few) early devices like mine, where not exposting a proper CDC
descriptor. This was fixed with an immediate firmware update from the vendor,
and pre-installed on newer devices.
So actual devices can be driven by cdc_acm.c + cdc_ether.c.

Signed-off-by: Enrico Mioso <mrkiko.rs@gmail.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/option.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 083b3e902894..1cf6f125f5f0 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -347,12 +347,6 @@ static void option_instat_callback(struct urb *urb);
 #define CELOT_VENDOR_ID				0x211f
 #define CELOT_PRODUCT_CT680M			0x6801
 
-/* ONDA Communication vendor id */
-#define ONDA_VENDOR_ID       0x1ee8
-
-/* ONDA MT825UP HSDPA 14.2 modem */
-#define ONDA_MT825UP         0x000b
-
 /* Samsung products */
 #define SAMSUNG_VENDOR_ID                       0x04e8
 #define SAMSUNG_PRODUCT_GT_B3730                0x6889
@@ -1262,7 +1256,6 @@ static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD145) },
 	{ USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD200) },
 	{ USB_DEVICE(CELOT_VENDOR_ID, CELOT_PRODUCT_CT680M) }, /* CT-650 CDMA 450 1xEVDO modem */
-	{ USB_DEVICE(ONDA_VENDOR_ID, ONDA_MT825UP) }, /* ONDA MT825UP modem */
 	{ USB_DEVICE_AND_INTERFACE_INFO(SAMSUNG_VENDOR_ID, SAMSUNG_PRODUCT_GT_B3730, USB_CLASS_CDC_DATA, 0x00, 0x00) }, /* Samsung GT-B3730 LTE USB modem.*/
 	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM600) },
 	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM610) },

From 5f8a2e68b679b41cc8e9b642f2f5aa45dd678641 Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Mon, 1 Jul 2013 14:03:33 +0200
Subject: [PATCH 304/320] USB: mos7840: fix memory leak in open

Allocated urbs and buffers were never freed on errors in open.

Cc: stable@vger.kernel.org
Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/mos7840.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c
index 0a818b238508..603fb70dde80 100644
--- a/drivers/usb/serial/mos7840.c
+++ b/drivers/usb/serial/mos7840.c
@@ -905,20 +905,20 @@ static int mos7840_open(struct tty_struct *tty, struct usb_serial_port *port)
 	status = mos7840_get_reg_sync(port, mos7840_port->SpRegOffset, &Data);
 	if (status < 0) {
 		dev_dbg(&port->dev, "Reading Spreg failed\n");
-		return -1;
+		goto err;
 	}
 	Data |= 0x80;
 	status = mos7840_set_reg_sync(port, mos7840_port->SpRegOffset, Data);
 	if (status < 0) {
 		dev_dbg(&port->dev, "writing Spreg failed\n");
-		return -1;
+		goto err;
 	}
 
 	Data &= ~0x80;
 	status = mos7840_set_reg_sync(port, mos7840_port->SpRegOffset, Data);
 	if (status < 0) {
 		dev_dbg(&port->dev, "writing Spreg failed\n");
-		return -1;
+		goto err;
 	}
 	/* End of block to be checked */
 
@@ -927,7 +927,7 @@ static int mos7840_open(struct tty_struct *tty, struct usb_serial_port *port)
 									&Data);
 	if (status < 0) {
 		dev_dbg(&port->dev, "Reading Controlreg failed\n");
-		return -1;
+		goto err;
 	}
 	Data |= 0x08;		/* Driver done bit */
 	Data |= 0x20;		/* rx_disable */
@@ -935,7 +935,7 @@ static int mos7840_open(struct tty_struct *tty, struct usb_serial_port *port)
 				mos7840_port->ControlRegOffset, Data);
 	if (status < 0) {
 		dev_dbg(&port->dev, "writing Controlreg failed\n");
-		return -1;
+		goto err;
 	}
 	/* do register settings here */
 	/* Set all regs to the device default values. */
@@ -946,21 +946,21 @@ static int mos7840_open(struct tty_struct *tty, struct usb_serial_port *port)
 	status = mos7840_set_uart_reg(port, INTERRUPT_ENABLE_REGISTER, Data);
 	if (status < 0) {
 		dev_dbg(&port->dev, "disabling interrupts failed\n");
-		return -1;
+		goto err;
 	}
 	/* Set FIFO_CONTROL_REGISTER to the default value */
 	Data = 0x00;
 	status = mos7840_set_uart_reg(port, FIFO_CONTROL_REGISTER, Data);
 	if (status < 0) {
 		dev_dbg(&port->dev, "Writing FIFO_CONTROL_REGISTER  failed\n");
-		return -1;
+		goto err;
 	}
 
 	Data = 0xcf;
 	status = mos7840_set_uart_reg(port, FIFO_CONTROL_REGISTER, Data);
 	if (status < 0) {
 		dev_dbg(&port->dev, "Writing FIFO_CONTROL_REGISTER  failed\n");
-		return -1;
+		goto err;
 	}
 
 	Data = 0x03;
@@ -1103,6 +1103,15 @@ static int mos7840_open(struct tty_struct *tty, struct usb_serial_port *port)
 	/* mos7840_change_port_settings(mos7840_port,old_termios); */
 
 	return 0;
+err:
+	for (j = 0; j < NUM_URBS; ++j) {
+		urb = mos7840_port->write_urb_pool[j];
+		if (!urb)
+			continue;
+		kfree(urb->transfer_buffer);
+		usb_free_urb(urb);
+	}
+	return status;
 }
 
 /*****************************************************************************

From 172d934c92089e9183165f7024eba9508cb9ebce Mon Sep 17 00:00:00 2001
From: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
Date: Thu, 25 Jul 2013 09:13:18 +0200
Subject: [PATCH 305/320] usb/gadget: free opts struct on error recovery

Fix memory leaks introduced in commits:

40d133d7f542616cf9538508a372306e626a16e9
usb: gadget: f_ncm: convert to new function interface with backward compatibility

fee562a6450b7806f1fbbe1469a67b5395b5c10a
usb: gadget: f_ecm: convert to new function interface with backward compatibility

fcbdf12ebef73a6069e2a1aada1e546fb578a4aa
usb: gadget: f_phonet: convert to new function interface with backward compatibility

b29002a157940752dfed2c488b2011f63f007d71
usb: gadget: f_eem: convert to new function interface with backward compatibility

8cedba7c73af1369599b1111639cfeb66fe13aaa
usb: gadget: f_subset: convert to new function interface with backward compatibility

f466c6353819326873fa48a02c6f2d7c903240d6
usb: gadget: f_rndis: convert to new function interface with backward compatibility

Signed-off-by: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/f_ecm.c    | 7 +++++--
 drivers/usb/gadget/f_eem.c    | 7 +++++--
 drivers/usb/gadget/f_ncm.c    | 7 +++++--
 drivers/usb/gadget/f_phonet.c | 7 +++++--
 drivers/usb/gadget/f_rndis.c  | 7 +++++--
 drivers/usb/gadget/f_subset.c | 7 +++++--
 6 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/drivers/usb/gadget/f_ecm.c b/drivers/usb/gadget/f_ecm.c
index 5d3561ea1c15..edab45da3741 100644
--- a/drivers/usb/gadget/f_ecm.c
+++ b/drivers/usb/gadget/f_ecm.c
@@ -959,8 +959,11 @@ static struct usb_function_instance *ecm_alloc_inst(void)
 	mutex_init(&opts->lock);
 	opts->func_inst.free_func_inst = ecm_free_inst;
 	opts->net = gether_setup_default();
-	if (IS_ERR(opts->net))
-		return ERR_PTR(PTR_ERR(opts->net));
+	if (IS_ERR(opts->net)) {
+		struct net_device *net = opts->net;
+		kfree(opts);
+		return ERR_CAST(net);
+	}
 
 	config_group_init_type_name(&opts->func_inst.group, "", &ecm_func_type);
 
diff --git a/drivers/usb/gadget/f_eem.c b/drivers/usb/gadget/f_eem.c
index 90ee8022e8d8..d00392d879db 100644
--- a/drivers/usb/gadget/f_eem.c
+++ b/drivers/usb/gadget/f_eem.c
@@ -593,8 +593,11 @@ static struct usb_function_instance *eem_alloc_inst(void)
 	mutex_init(&opts->lock);
 	opts->func_inst.free_func_inst = eem_free_inst;
 	opts->net = gether_setup_default();
-	if (IS_ERR(opts->net))
-		return ERR_CAST(opts->net);
+	if (IS_ERR(opts->net)) {
+		struct net_device *net = opts->net;
+		kfree(opts);
+		return ERR_CAST(net);
+	}
 
 	config_group_init_type_name(&opts->func_inst.group, "", &eem_func_type);
 
diff --git a/drivers/usb/gadget/f_ncm.c b/drivers/usb/gadget/f_ncm.c
index 952177f7eb9b..1c28fe13328a 100644
--- a/drivers/usb/gadget/f_ncm.c
+++ b/drivers/usb/gadget/f_ncm.c
@@ -1350,8 +1350,11 @@ static struct usb_function_instance *ncm_alloc_inst(void)
 	mutex_init(&opts->lock);
 	opts->func_inst.free_func_inst = ncm_free_inst;
 	opts->net = gether_setup_default();
-	if (IS_ERR(opts->net))
-		return ERR_PTR(PTR_ERR(opts->net));
+	if (IS_ERR(opts->net)) {
+		struct net_device *net = opts->net;
+		kfree(opts);
+		return ERR_CAST(net);
+	}
 
 	config_group_init_type_name(&opts->func_inst.group, "", &ncm_func_type);
 
diff --git a/drivers/usb/gadget/f_phonet.c b/drivers/usb/gadget/f_phonet.c
index 7944fb0efe3b..1bf26e9f38cd 100644
--- a/drivers/usb/gadget/f_phonet.c
+++ b/drivers/usb/gadget/f_phonet.c
@@ -656,8 +656,11 @@ static struct usb_function_instance *phonet_alloc_inst(void)
 
 	opts->func_inst.free_func_inst = phonet_free_inst;
 	opts->net = gphonet_setup_default();
-	if (IS_ERR(opts->net))
-		return ERR_PTR(PTR_ERR(opts->net));
+	if (IS_ERR(opts->net)) {
+		struct net_device *net = opts->net;
+		kfree(opts);
+		return ERR_CAST(net);
+	}
 
 	config_group_init_type_name(&opts->func_inst.group, "",
 			&phonet_func_type);
diff --git a/drivers/usb/gadget/f_rndis.c b/drivers/usb/gadget/f_rndis.c
index 191df35ae69d..717ed7f95639 100644
--- a/drivers/usb/gadget/f_rndis.c
+++ b/drivers/usb/gadget/f_rndis.c
@@ -963,8 +963,11 @@ static struct usb_function_instance *rndis_alloc_inst(void)
 	mutex_init(&opts->lock);
 	opts->func_inst.free_func_inst = rndis_free_inst;
 	opts->net = gether_setup_default();
-	if (IS_ERR(opts->net))
-		return ERR_CAST(opts->net);
+	if (IS_ERR(opts->net)) {
+		struct net_device *net = opts->net;
+		kfree(opts);
+		return ERR_CAST(net);
+	}
 
 	config_group_init_type_name(&opts->func_inst.group, "",
 				    &rndis_func_type);
diff --git a/drivers/usb/gadget/f_subset.c b/drivers/usb/gadget/f_subset.c
index 5601e1d96c4f..7c8674fa7e80 100644
--- a/drivers/usb/gadget/f_subset.c
+++ b/drivers/usb/gadget/f_subset.c
@@ -505,8 +505,11 @@ static struct usb_function_instance *geth_alloc_inst(void)
 	mutex_init(&opts->lock);
 	opts->func_inst.free_func_inst = geth_free_inst;
 	opts->net = gether_setup_default();
-	if (IS_ERR(opts->net))
-		return ERR_CAST(opts->net);
+	if (IS_ERR(opts->net)) {
+		struct net_device *net = opts->net;
+		kfree(opts);
+		return ERR_CAST(net);
+	}
 
 	config_group_init_type_name(&opts->func_inst.group, "",
 				    &gether_func_type);

From 2c7b871b9102c497ba8f972aa5d38532f05b654d Mon Sep 17 00:00:00 2001
From: William Gulland <wgulland@google.com>
Date: Thu, 27 Jun 2013 16:10:20 -0700
Subject: [PATCH 306/320] usb: Clear both buffers when clearing a control
 transfer TT buffer.

Control transfers have both IN and OUT (or SETUP) packets, so when
clearing TT buffers for a control transfer it's necessary to send
two HUB_CLEAR_TT_BUFFER requests to the hub.

Signed-off-by: William Gulland <wgulland@google.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index c857471519e3..4a8a1d68002c 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -668,6 +668,15 @@ static void hub_irq(struct urb *urb)
 static inline int
 hub_clear_tt_buffer (struct usb_device *hdev, u16 devinfo, u16 tt)
 {
+	/* Need to clear both directions for control ep */
+	if (((devinfo >> 11) & USB_ENDPOINT_XFERTYPE_MASK) ==
+			USB_ENDPOINT_XFER_CONTROL) {
+		int status = usb_control_msg(hdev, usb_sndctrlpipe(hdev, 0),
+				HUB_CLEAR_TT_BUFFER, USB_RT_PORT,
+				devinfo ^ 0x8000, tt, NULL, 0, 1000);
+		if (status)
+			return status;
+	}
 	return usb_control_msg(hdev, usb_sndctrlpipe(hdev, 0),
 			       HUB_CLEAR_TT_BUFFER, USB_RT_PORT, devinfo,
 			       tt, NULL, 0, 1000);

From e3c736fe47289dc6f577c8b20cc146bd7e69aff0 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Thu, 25 Jul 2013 18:27:45 -0400
Subject: [PATCH 307/320] drm/radeon/dpm: fix a typo in the rv6xx mclk setup

Need to set high for the last two entries.  Looks
like a copy and paste typo.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/rv6xx_dpm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/rv6xx_dpm.c b/drivers/gpu/drm/radeon/rv6xx_dpm.c
index 65e33f387341..b1c2a62df950 100644
--- a/drivers/gpu/drm/radeon/rv6xx_dpm.c
+++ b/drivers/gpu/drm/radeon/rv6xx_dpm.c
@@ -819,7 +819,7 @@ static void rv6xx_program_memory_timing_parameters(struct radeon_device *rdev)
 		 POWERMODE1(calculate_memory_refresh_rate(rdev,
 							  pi->hw.sclks[R600_POWER_LEVEL_MEDIUM])) |
 		 POWERMODE2(calculate_memory_refresh_rate(rdev,
-							  pi->hw.sclks[R600_POWER_LEVEL_MEDIUM])) |
+							  pi->hw.sclks[R600_POWER_LEVEL_HIGH])) |
 		 POWERMODE3(calculate_memory_refresh_rate(rdev,
 							  pi->hw.sclks[R600_POWER_LEVEL_HIGH])));
 	WREG32(ARB_RFSH_RATE, arb_refresh_rate);

From 2333a003a83ae8b257ac4bc1bb297c897c1ebb90 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Thu, 25 Jul 2013 18:29:14 -0400
Subject: [PATCH 308/320] drm/radeon/dpm: fix displaygap programming on rv6xx

Need to use the driver state rather than the register
state since the displays may not be enabled when the
power state is programmed.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/rv6xx_dpm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/rv6xx_dpm.c b/drivers/gpu/drm/radeon/rv6xx_dpm.c
index b1c2a62df950..dde402340f22 100644
--- a/drivers/gpu/drm/radeon/rv6xx_dpm.c
+++ b/drivers/gpu/drm/radeon/rv6xx_dpm.c
@@ -1182,10 +1182,10 @@ static void rv6xx_program_display_gap(struct radeon_device *rdev)
 	u32 tmp = RREG32(CG_DISPLAY_GAP_CNTL);
 
 	tmp &= ~(DISP1_GAP_MCHG_MASK | DISP2_GAP_MCHG_MASK);
-	if (RREG32(AVIVO_D1CRTC_CONTROL) & AVIVO_CRTC_EN) {
+	if (rdev->pm.dpm.new_active_crtcs & 1) {
 		tmp |= DISP1_GAP_MCHG(R600_PM_DISPLAY_GAP_VBLANK);
 		tmp |= DISP2_GAP_MCHG(R600_PM_DISPLAY_GAP_IGNORE);
-	} else if (RREG32(AVIVO_D2CRTC_CONTROL) & AVIVO_CRTC_EN) {
+	} else if (rdev->pm.dpm.new_active_crtcs & 2) {
 		tmp |= DISP1_GAP_MCHG(R600_PM_DISPLAY_GAP_IGNORE);
 		tmp |= DISP2_GAP_MCHG(R600_PM_DISPLAY_GAP_VBLANK);
 	} else {

From 515c0967205f2e6d0ca1602ce0de65f9aec1d215 Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@lixom.net>
Date: Wed, 24 Jul 2013 08:42:27 -0700
Subject: [PATCH 309/320] mfd: max8925: fix dt code for backlight

The device-tree enablement for max8925 has several problems, but besides
the bindings being wrong (and not having seen review) there's also some
bad coding practices on how to fill in the platform_data from device tree.

I came across this since it causes a warning when compiling
mmp2_defconfig, and instead of doing the minimal fix to silence the
warning, I restructured the code a bit.

This silences the warning:
drivers/video/backlight/max8925_bl.c: In function 'max8925_backlight_probe':
drivers/video/backlight/max8925_bl.c:177:3: warning: statement with no effect [-Wunused-value]

Note that the bindings themselves need to be revisited too, but that will
affect more than just the backlight driver and is best done separately;
this just fixes the bad code for the backlight driver.

Acked-by: Jingoo Han <jg1.han@samsung.com>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 drivers/video/backlight/max8925_bl.c | 41 ++++++++++++++--------------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/drivers/video/backlight/max8925_bl.c b/drivers/video/backlight/max8925_bl.c
index 5ca11b066b7e..886e797f75f9 100644
--- a/drivers/video/backlight/max8925_bl.c
+++ b/drivers/video/backlight/max8925_bl.c
@@ -101,33 +101,37 @@ static const struct backlight_ops max8925_backlight_ops = {
 	.get_brightness	= max8925_backlight_get_brightness,
 };
 
-#ifdef CONFIG_OF
-static int max8925_backlight_dt_init(struct platform_device *pdev,
-			      struct max8925_backlight_pdata *pdata)
+static void max8925_backlight_dt_init(struct platform_device *pdev)
 {
 	struct device_node *nproot = pdev->dev.parent->of_node, *np;
-	int dual_string;
+	struct max8925_backlight_pdata *pdata;
+	u32 val;
+
+	if (!nproot || !IS_ENABLED(CONFIG_OF))
+		return;
+
+	pdata = devm_kzalloc(&pdev->dev,
+			     sizeof(struct max8925_backlight_pdata),
+			     GFP_KERNEL);
+	if (!pdata)
+		return;
 
-	if (!nproot)
-		return -ENODEV;
 	np = of_find_node_by_name(nproot, "backlight");
 	if (!np) {
 		dev_err(&pdev->dev, "failed to find backlight node\n");
-		return -ENODEV;
+		return;
 	}
 
-	of_property_read_u32(np, "maxim,max8925-dual-string", &dual_string);
-	pdata->dual_string = dual_string;
-	return 0;
+	if (!of_property_read_u32(np, "maxim,max8925-dual-string", &val))
+		pdata->dual_string = val;
+
+	pdev->dev.platform_data = pdata;
 }
-#else
-#define max8925_backlight_dt_init(x, y)	(-1)
-#endif
 
 static int max8925_backlight_probe(struct platform_device *pdev)
 {
 	struct max8925_chip *chip = dev_get_drvdata(pdev->dev.parent);
-	struct max8925_backlight_pdata *pdata = pdev->dev.platform_data;
+	struct max8925_backlight_pdata *pdata;
 	struct max8925_backlight_data *data;
 	struct backlight_device *bl;
 	struct backlight_properties props;
@@ -170,13 +174,10 @@ static int max8925_backlight_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, bl);
 
 	value = 0;
-	if (pdev->dev.parent->of_node && !pdata) {
-		pdata = devm_kzalloc(&pdev->dev,
-				     sizeof(struct max8925_backlight_pdata),
-				     GFP_KERNEL);
-		max8925_backlight_dt_init(pdev, pdata);
-	}
+	if (!pdev->dev.platform_data)
+		max8925_backlight_dt_init(pdev);
 
+	pdata = pdev->dev.platform_data;
 	if (pdata) {
 		if (pdata->lxw_scl)
 			value |= (1 << 7);

From f4f85a8c94752300ad2c4f7a3475a3d3b8ef72ca Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Thu, 25 Jul 2013 20:07:25 -0400
Subject: [PATCH 310/320] drm/radeon/dpm: implement force performance levels
 for rv6xx

Allows you to limit the selected power levels via sysfs.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/radeon_asic.c |  1 +
 drivers/gpu/drm/radeon/radeon_asic.h |  2 ++
 drivers/gpu/drm/radeon/rv6xx_dpm.c   | 35 ++++++++++++++++++++++++++++
 3 files changed, 38 insertions(+)

diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index 78bec1a58ed1..f8f8b3113ddd 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -1161,6 +1161,7 @@ static struct radeon_asic rv6xx_asic = {
 		.get_mclk = &rv6xx_dpm_get_mclk,
 		.print_power_state = &rv6xx_dpm_print_power_state,
 		.debugfs_print_current_performance_level = &rv6xx_dpm_debugfs_print_current_performance_level,
+		.force_performance_level = &rv6xx_dpm_force_performance_level,
 	},
 	.pflip = {
 		.pre_page_flip = &rs600_pre_page_flip,
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index ca1895709908..902479fa737f 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -421,6 +421,8 @@ void rv6xx_dpm_print_power_state(struct radeon_device *rdev,
 				 struct radeon_ps *ps);
 void rv6xx_dpm_debugfs_print_current_performance_level(struct radeon_device *rdev,
 						       struct seq_file *m);
+int rv6xx_dpm_force_performance_level(struct radeon_device *rdev,
+				      enum radeon_dpm_forced_level level);
 /* rs780 dpm */
 int rs780_dpm_init(struct radeon_device *rdev);
 int rs780_dpm_enable(struct radeon_device *rdev);
diff --git a/drivers/gpu/drm/radeon/rv6xx_dpm.c b/drivers/gpu/drm/radeon/rv6xx_dpm.c
index dde402340f22..363018c60412 100644
--- a/drivers/gpu/drm/radeon/rv6xx_dpm.c
+++ b/drivers/gpu/drm/radeon/rv6xx_dpm.c
@@ -1670,6 +1670,8 @@ int rv6xx_dpm_set_power_state(struct radeon_device *rdev)
 	struct radeon_ps *old_ps = rdev->pm.dpm.current_ps;
 	int ret;
 
+	pi->restricted_levels = 0;
+
 	rv6xx_set_uvd_clock_before_set_eng_clock(rdev, new_ps, old_ps);
 
 	rv6xx_clear_vc(rdev);
@@ -1756,6 +1758,8 @@ int rv6xx_dpm_set_power_state(struct radeon_device *rdev)
 
 	rv6xx_set_uvd_clock_after_set_eng_clock(rdev, new_ps, old_ps);
 
+	rdev->pm.dpm.forced_level = RADEON_DPM_FORCED_LEVEL_AUTO;
+
 	return 0;
 }
 
@@ -2085,3 +2089,34 @@ u32 rv6xx_dpm_get_mclk(struct radeon_device *rdev, bool low)
 	else
 		return requested_state->high.mclk;
 }
+
+int rv6xx_dpm_force_performance_level(struct radeon_device *rdev,
+				      enum radeon_dpm_forced_level level)
+{
+	struct rv6xx_power_info *pi = rv6xx_get_pi(rdev);
+
+	if (level == RADEON_DPM_FORCED_LEVEL_HIGH) {
+		pi->restricted_levels = 3;
+	} else if (level == RADEON_DPM_FORCED_LEVEL_LOW) {
+		pi->restricted_levels = 2;
+	} else {
+		pi->restricted_levels = 0;
+	}
+
+	rv6xx_clear_vc(rdev);
+	r600_power_level_enable(rdev, R600_POWER_LEVEL_LOW, true);
+	r600_set_at(rdev, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF);
+	r600_wait_for_power_level(rdev, R600_POWER_LEVEL_LOW);
+	r600_power_level_enable(rdev, R600_POWER_LEVEL_HIGH, false);
+	r600_power_level_enable(rdev, R600_POWER_LEVEL_MEDIUM, false);
+	rv6xx_enable_medium(rdev);
+	rv6xx_enable_high(rdev);
+	if (pi->restricted_levels == 3)
+		r600_power_level_enable(rdev, R600_POWER_LEVEL_LOW, false);
+	rv6xx_program_vc(rdev);
+	rv6xx_program_at(rdev);
+
+	rdev->pm.dpm.forced_level = level;
+
+	return 0;
+}

From f5d9b7f0f93c6a7d40750b8b5528a1e0f0c678fb Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Thu, 25 Jul 2013 21:46:21 -0400
Subject: [PATCH 311/320] drm/radeon/dpm: fix r600_enable_sclk_control()

Actually program the correct register to enable
engine clock scaling control.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/r600_dpm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r600_dpm.c b/drivers/gpu/drm/radeon/r600_dpm.c
index b88f54b134ab..e5c860f4ccbe 100644
--- a/drivers/gpu/drm/radeon/r600_dpm.c
+++ b/drivers/gpu/drm/radeon/r600_dpm.c
@@ -278,9 +278,9 @@ bool r600_dynamicpm_enabled(struct radeon_device *rdev)
 void r600_enable_sclk_control(struct radeon_device *rdev, bool enable)
 {
 	if (enable)
-		WREG32_P(GENERAL_PWRMGT, 0, ~SCLK_PWRMGT_OFF);
+		WREG32_P(SCLK_PWRMGT_CNTL, 0, ~SCLK_PWRMGT_OFF);
 	else
-		WREG32_P(GENERAL_PWRMGT, SCLK_PWRMGT_OFF, ~SCLK_PWRMGT_OFF);
+		WREG32_P(SCLK_PWRMGT_CNTL, SCLK_PWRMGT_OFF, ~SCLK_PWRMGT_OFF);
 }
 
 void r600_enable_mclk_control(struct radeon_device *rdev, bool enable)

From 845ad05ec31e0f3872a321e10dbeaf872022632c Mon Sep 17 00:00:00 2001
From: Feng Kan <fkan@apm.com>
Date: Tue, 23 Jul 2013 18:52:31 +0100
Subject: [PATCH 312/320] arm64: Change kernel stack size to 16K

Written by Catalin Marinas, tested by APM on storm platform. This is needed
because of the failures encountered when running SpecWeb benchmark test.

Signed-off-by: Feng Kan <fkan@apm.com>
Acked-by: Kumar Sankaran <ksankaran@apm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/thread_info.h | 4 ++--
 arch/arm64/kernel/entry.S            | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 3659e460071d..23a3c4791d86 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -24,10 +24,10 @@
 #include <linux/compiler.h>
 
 #ifndef CONFIG_ARM64_64K_PAGES
-#define THREAD_SIZE_ORDER	1
+#define THREAD_SIZE_ORDER	2
 #endif
 
-#define THREAD_SIZE		8192
+#define THREAD_SIZE		16384
 #define THREAD_START_SP		(THREAD_SIZE - 16)
 
 #ifndef __ASSEMBLY__
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 1d1314280a03..6ad781b21c08 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -121,7 +121,7 @@
 
 	.macro	get_thread_info, rd
 	mov	\rd, sp
-	and	\rd, \rd, #~((1 << 13) - 1)	// top of 8K stack
+	and	\rd, \rd, #~(THREAD_SIZE - 1)	// top of stack
 	.endm
 
 /*

From 09d8091c024ec88d1541d93eb8ddb2bd5cf10c39 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 23 Jul 2013 22:21:59 -0400
Subject: [PATCH 313/320] tracing: Remove locking trace_types_lock from
 tracing_reset_all_online_cpus()

Commit a82274151af "tracing: Protect ftrace_trace_arrays list in trace_events.c"
added taking the trace_types_lock mutex in trace_events.c as there were
several locations that needed it for protection. Unfortunately, it also
encapsulated a call to tracing_reset_all_online_cpus() which also takes
the trace_types_lock, causing a deadlock.

This happens when a module has tracepoints and has been traced. When the
module is removed, the trace events module notifier will grab the
trace_types_lock, do a bunch of clean ups, and also clears the buffer
by calling tracing_reset_all_online_cpus. This doesn't happen often
which explains why it wasn't caught right away.

Commit a82274151af was marked for stable, which means this must be
sent to stable too.

Link: http://lkml.kernel.org/r/51EEC646.7070306@broadcom.com

Reported-by: Arend van Spril <arend@broadcom.com>
Tested-by: Arend van Spriel <arend@broadcom.com>
Cc: Alexander Z Lam <azl@google.com>
Cc: Vaibhav Nagarnaik <vnagarnaik@google.com>
Cc: David Sharp <dhsharp@google.com>
Cc: stable@vger.kernel.org # 3.10
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 69cba470ea96..882ec1dd1515 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1224,18 +1224,17 @@ void tracing_reset_current(int cpu)
 	tracing_reset(&global_trace.trace_buffer, cpu);
 }
 
+/* Must have trace_types_lock held */
 void tracing_reset_all_online_cpus(void)
 {
 	struct trace_array *tr;
 
-	mutex_lock(&trace_types_lock);
 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
 		tracing_reset_online_cpus(&tr->trace_buffer);
 #ifdef CONFIG_TRACER_MAX_TRACE
 		tracing_reset_online_cpus(&tr->max_buffer);
 #endif
 	}
-	mutex_unlock(&trace_types_lock);
 }
 
 #define SAVED_CMDLINES 128

From 867974fc09f93bdd7f98d46ac3733934486bbf4a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 26 Jul 2013 08:57:56 -0400
Subject: [PATCH 314/320] ahci_imx: depend on CONFIG_MFD_SYSCON

ahci_imx makes use of regmap but the dependency wasn't specified in
Kconfig leading build failures if CONFIG_AHCI_IMX is enabled but
CONFIG_MFD_SYSCON is not.  Add the Kconfig dependency.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 drivers/ata/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index a4af0a2a089c..4e737728aee2 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -99,7 +99,7 @@ config SATA_AHCI_PLATFORM
 
 config AHCI_IMX
 	tristate "Freescale i.MX AHCI SATA support"
-	depends on SATA_AHCI_PLATFORM
+	depends on SATA_AHCI_PLATFORM && MFD_SYSCON
 	help
 	  This option enables support for the Freescale i.MX SoC's
 	  onboard AHCI SATA.

From 8e5c2b776ae4c35f54547c017e0a943429f5748a Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 25 Jul 2013 21:43:39 +0200
Subject: [PATCH 315/320] Revert "ACPI / video / i915: No ACPI backlight if
 firmware expects Windows 8"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We attempted to address a regression introduced by commit a57f7f9
(ACPICA: Add Windows8/Server2012 string for _OSI method.) after which
ACPI video backlight support doesn't work on a number of systems,
because the relevant AML methods in the ACPI tables in their BIOSes
become useless after the BIOS has been told that the OS is compatible
with Windows 8.  That problem is tracked by the bug entry at:

https://bugzilla.kernel.org/show_bug.cgi?id=51231

Commit 8c5bd7a (ACPI / video / i915: No ACPI backlight if firmware
expects Windows 8) introduced for this purpose essentially prevented
the ACPI backlight support from being used if the BIOS had been told
that the OS was compatible with Windows 8 and the i915 driver was
loaded, in which case the backlight would always be handled by i915.
Unfortunately, however, that turned out to cause problems with
backlight to appear on multiple systems with symptoms indicating that
i915 was unable to control the backlight on those systems as
expected.

For this reason, revert commit 8c5bd7a, but leave the function
acpi_video_backlight_quirks() introduced by it, because another
commit on top of it uses that function.

References: https://lkml.org/lkml/2013/7/21/119
References: https://lkml.org/lkml/2013/7/22/261
References: https://lkml.org/lkml/2013/7/23/429
References: https://lkml.org/lkml/2013/7/23/459
References: https://lkml.org/lkml/2013/7/23/81
References: https://lkml.org/lkml/2013/7/24/27
Reported-and-tested-by: James Hogan <james@albanarts.com>
Reported-and-tested-by: Kamal Mostafa <kamal@canonical.com>
Reported-and-tested-by: Jörg Otte <jrg.otte@gmail.com>
Reported-and-tested-by: Steven Newbury <steve@snewbury.org.uk>
Reported-by: Martin Steigerwald <Martin@lichtvoll.de>
Reported-by: Kalle Valo <kvalo@adurom.com>
Tested-by: Joerg Platte <jplatte@naasa.net>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/internal.h         |  2 -
 drivers/acpi/video.c            | 67 ++++-----------------------------
 drivers/acpi/video_detect.c     | 15 +-------
 drivers/gpu/drm/i915/i915_dma.c |  2 +-
 include/acpi/video.h            | 11 +-----
 include/linux/acpi.h            |  1 -
 6 files changed, 11 insertions(+), 87 deletions(-)

diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 227aca77ee1e..5da44e81dd4d 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -169,10 +169,8 @@ int acpi_create_platform_device(struct acpi_device *adev,
   -------------------------------------------------------------------------- */
 #if defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE)
 bool acpi_video_backlight_quirks(void);
-bool acpi_video_verify_backlight_support(void);
 #else
 static inline bool acpi_video_backlight_quirks(void) { return false; }
-static inline bool acpi_video_verify_backlight_support(void) { return false; }
 #endif
 
 #endif /* _ACPI_INTERNAL_H_ */
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index 6dd237e79b4f..0ec434d2586d 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -911,7 +911,7 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device)
 	if (acpi_video_init_brightness(device))
 		return;
 
-	if (acpi_video_verify_backlight_support()) {
+	if (acpi_video_backlight_support()) {
 		struct backlight_properties props;
 		struct pci_dev *pdev;
 		acpi_handle acpi_parent;
@@ -1366,8 +1366,8 @@ acpi_video_switch_brightness(struct acpi_video_device *device, int event)
 	unsigned long long level_current, level_next;
 	int result = -EINVAL;
 
-	/* no warning message if acpi_backlight=vendor or a quirk is used */
-	if (!acpi_video_verify_backlight_support())
+	/* no warning message if acpi_backlight=vendor is used */
+	if (!acpi_video_backlight_support())
 		return 0;
 
 	if (!device->brightness)
@@ -1875,46 +1875,6 @@ static int acpi_video_bus_remove(struct acpi_device *device)
 	return 0;
 }
 
-static acpi_status video_unregister_backlight(acpi_handle handle, u32 lvl,
-					      void *context, void **rv)
-{
-	struct acpi_device *acpi_dev;
-	struct acpi_video_bus *video;
-	struct acpi_video_device *dev, *next;
-
-	if (acpi_bus_get_device(handle, &acpi_dev))
-		return AE_OK;
-
-	if (acpi_match_device_ids(acpi_dev, video_device_ids))
-		return AE_OK;
-
-	video = acpi_driver_data(acpi_dev);
-	if (!video)
-		return AE_OK;
-
-	acpi_video_bus_stop_devices(video);
-	mutex_lock(&video->device_list_lock);
-	list_for_each_entry_safe(dev, next, &video->video_device_list, entry) {
-		if (dev->backlight) {
-			backlight_device_unregister(dev->backlight);
-			dev->backlight = NULL;
-			kfree(dev->brightness->levels);
-			kfree(dev->brightness);
-		}
-		if (dev->cooling_dev) {
-			sysfs_remove_link(&dev->dev->dev.kobj,
-					  "thermal_cooling");
-			sysfs_remove_link(&dev->cooling_dev->device.kobj,
-					  "device");
-			thermal_cooling_device_unregister(dev->cooling_dev);
-			dev->cooling_dev = NULL;
-		}
-	}
-	mutex_unlock(&video->device_list_lock);
-	acpi_video_bus_start_devices(video);
-	return AE_OK;
-}
-
 static int __init is_i740(struct pci_dev *dev)
 {
 	if (dev->device == 0x00D1)
@@ -1946,25 +1906,14 @@ static int __init intel_opregion_present(void)
 	return opregion;
 }
 
-int __acpi_video_register(bool backlight_quirks)
+int acpi_video_register(void)
 {
-	bool no_backlight;
-	int result;
-
-	no_backlight = backlight_quirks ? acpi_video_backlight_quirks() : false;
-
+	int result = 0;
 	if (register_count) {
 		/*
-		 * If acpi_video_register() has been called already, don't try
-		 * to register acpi_video_bus, but unregister backlight devices
-		 * if no backlight support is requested.
+		 * if the function of acpi_video_register is already called,
+		 * don't register the acpi_vide_bus again and return no error.
 		 */
-		if (no_backlight)
-			acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
-					    ACPI_UINT32_MAX,
-					    video_unregister_backlight,
-					    NULL, NULL, NULL);
-
 		return 0;
 	}
 
@@ -1980,7 +1929,7 @@ int __acpi_video_register(bool backlight_quirks)
 
 	return 0;
 }
-EXPORT_SYMBOL(__acpi_video_register);
+EXPORT_SYMBOL(acpi_video_register);
 
 void acpi_video_unregister(void)
 {
diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
index 826e52def080..c3397748ba46 100644
--- a/drivers/acpi/video_detect.c
+++ b/drivers/acpi/video_detect.c
@@ -238,12 +238,7 @@ static void acpi_video_caps_check(void)
 
 bool acpi_video_backlight_quirks(void)
 {
-	if (acpi_gbl_osi_data >= ACPI_OSI_WIN_8) {
-		acpi_video_caps_check();
-		acpi_video_support |= ACPI_VIDEO_SKIP_BACKLIGHT;
-		return true;
-	}
-	return false;
+	return acpi_gbl_osi_data >= ACPI_OSI_WIN_8;
 }
 EXPORT_SYMBOL(acpi_video_backlight_quirks);
 
@@ -291,14 +286,6 @@ int acpi_video_backlight_support(void)
 }
 EXPORT_SYMBOL(acpi_video_backlight_support);
 
-/* For the ACPI video driver use only. */
-bool acpi_video_verify_backlight_support(void)
-{
-	return (acpi_video_support & ACPI_VIDEO_SKIP_BACKLIGHT) ?
-		false : acpi_video_backlight_support();
-}
-EXPORT_SYMBOL(acpi_video_verify_backlight_support);
-
 /*
  * Use acpi_backlight=vendor/video to force that backlight switching
  * is processed by vendor specific acpi drivers or video.ko driver.
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index cf188ab7051a..adb319b53ecd 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1648,7 +1648,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	if (INTEL_INFO(dev)->num_pipes) {
 		/* Must be done after probing outputs */
 		intel_opregion_init(dev);
-		acpi_video_register_with_quirks();
+		acpi_video_register();
 	}
 
 	if (IS_GEN5(dev))
diff --git a/include/acpi/video.h b/include/acpi/video.h
index b26dc4fb7ba8..61109f2609fc 100644
--- a/include/acpi/video.h
+++ b/include/acpi/video.h
@@ -17,21 +17,12 @@ struct acpi_device;
 #define ACPI_VIDEO_DISPLAY_LEGACY_TV      0x0200
 
 #if (defined CONFIG_ACPI_VIDEO || defined CONFIG_ACPI_VIDEO_MODULE)
-extern int __acpi_video_register(bool backlight_quirks);
-static inline int acpi_video_register(void)
-{
-	return __acpi_video_register(false);
-}
-static inline int acpi_video_register_with_quirks(void)
-{
-	return __acpi_video_register(true);
-}
+extern int acpi_video_register(void);
 extern void acpi_video_unregister(void);
 extern int acpi_video_get_edid(struct acpi_device *device, int type,
 			       int device_id, void **edid);
 #else
 static inline int acpi_video_register(void) { return 0; }
-static inline int acpi_video_register_with_quirks(void) { return 0; }
 static inline void acpi_video_unregister(void) { return; }
 static inline int acpi_video_get_edid(struct acpi_device *device, int type,
 				      int device_id, void **edid)
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 6ad72f92469c..353ba256f368 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -191,7 +191,6 @@ extern bool wmi_has_guid(const char *guid);
 #define ACPI_VIDEO_BACKLIGHT_DMI_VIDEO			0x0200
 #define ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VENDOR		0x0400
 #define ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VIDEO		0x0800
-#define ACPI_VIDEO_SKIP_BACKLIGHT			0x1000
 
 #if defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE)
 

From 5ae90d8e467e625e447000cb4335c4db973b1095 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 28 Jul 2013 20:53:33 -0700
Subject: [PATCH 316/320] Linux 3.11-rc3

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index a35f72a420c0..95a8e55feceb 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 3
 PATCHLEVEL = 11
 SUBLEVEL = 0
-EXTRAVERSION = -rc2
+EXTRAVERSION = -rc3
 NAME = Linux for Workgroups
 
 # *DOCUMENTATION*

From 1deb57042fe2bd14cd7d4687f3c9418d26862053 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@linaro.org>
Date: Mon, 29 Jul 2013 08:50:28 +0100
Subject: [PATCH 317/320] ASoC: bfin-ac97: Fix prototype error following AC'97
 refactoring

As part of the multiplatform refactoring for AC'97 the AC'97 bus ops were
staticised meaning that the prototype (which was never needed) conflicts
with the declaration causing build failures.

Signed-off-by: Mark Brown <broonie@linaro.org>
Acked-by: Lars-Peter Clausen <lars@metafoo.de>
---
 sound/soc/blackfin/bf5xx-ac97.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sound/soc/blackfin/bf5xx-ac97.h b/sound/soc/blackfin/bf5xx-ac97.h
index 15c635e33f4d..0c3e22d90a8d 100644
--- a/sound/soc/blackfin/bf5xx-ac97.h
+++ b/sound/soc/blackfin/bf5xx-ac97.h
@@ -9,7 +9,6 @@
 #ifndef _BF5XX_AC97_H
 #define _BF5XX_AC97_H
 
-extern struct snd_ac97_bus_ops bf5xx_ac97_ops;
 extern struct snd_ac97 *ac97;
 /* Frame format in memory, only support stereo currently */
 struct ac97_frame {

From 610d80eaa987e7b1a2d07ee800c9722e227a3b47 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Tue, 30 Jul 2013 13:34:09 +0200
Subject: [PATCH 318/320] ASoC: bf5xx-ac97: Fix compile error with
 SND_BF5XX_HAVE_COLD_RESET
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If CONFIG_SND_BF5XX_HAVE_COLD_RESET is enabled building the blackfin ac97 driver
fails with the following compile error:
	sound/soc/blackfin/bf5xx-ac97.c: In function ‘asoc_bfin_ac97_probe’:
	sound/soc/blackfin/bf5xx-ac97.c:297: error: expected ‘;’ before ‘{’ token
	sound/soc/blackfin/bf5xx-ac97.c:302: error: label ‘gpio_err’ used but not defined

The issue was introduced in commit 6dab2fd7 ("ASoC: bf5xx-ac97: Convert to
devm_gpio_request_one()").

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 sound/soc/blackfin/bf5xx-ac97.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sound/soc/blackfin/bf5xx-ac97.c b/sound/soc/blackfin/bf5xx-ac97.c
index efb1daecd0dd..e82eb373a731 100644
--- a/sound/soc/blackfin/bf5xx-ac97.c
+++ b/sound/soc/blackfin/bf5xx-ac97.c
@@ -294,11 +294,12 @@ static int asoc_bfin_ac97_probe(struct platform_device *pdev)
 	/* Request PB3 as reset pin */
 	ret = devm_gpio_request_one(&pdev->dev,
 				    CONFIG_SND_BF5XX_RESET_GPIO_NUM,
-				    GPIOF_OUT_INIT_HIGH, "SND_AD198x RESET") {
+				    GPIOF_OUT_INIT_HIGH, "SND_AD198x RESET");
+	if (ret) {
 		dev_err(&pdev->dev,
 			"Failed to request GPIO_%d for reset: %d\n",
 			CONFIG_SND_BF5XX_RESET_GPIO_NUM, ret);
-		goto gpio_err;
+		return ret;
 	}
 #endif
 

From d2ee88d0aaacac664aff6ca5fc0bd7705d8f2414 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Wed, 31 Jul 2013 10:15:19 +0200
Subject: [PATCH 319/320] ASoC: au1x: Fix build
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

d8b51c11ff5a70244753ba60abfd47088cf4dcd4 [ASoC: ac97c: Use
module_platform_driver()] broke the build:

 CC      sound/soc/au1x/ac97c.o
/home/ralf/src/linux/upstream-sfr/sound/soc/au1x/ac97c.c:344:1: error: expected identifier or ‘(’ before ‘&’ token
/home/ralf/src/linux/upstream-sfr/sound/soc/au1x/ac97c.c:344:1: error: pasting "__initcall_" and "&" does not give a valid preprocessing token
/home/ralf/src/linux/upstream-sfr/sound/soc/au1x/ac97c.c:344:1: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘&’ token
/home/ralf/src/linux/upstream-sfr/sound/soc/au1x/ac97c.c:344:1: error: expected identifier or ‘(’ before ‘&’ token
/home/ralf/src/linux/upstream-sfr/sound/soc/au1x/ac97c.c:344:1: error: pasting "__exitcall_" and "&" does not give a valid preprocessing token
/home/ralf/src/linux/upstream-sfr/sound/soc/au1x/ac97c.c:344:1: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘&’ token
/home/ralf/src/linux/upstream-sfr/sound/soc/au1x/ac97c.c:334:31: warning: ‘au1xac97c_driver’ defined but not used [-Wunused-variable]
make[5]: *** [sound/soc/au1x/ac97c.o] Error 1
make[4]: *** [sound/soc/au1x] Error 2
make[3]: *** [sound/soc] Error 2

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 sound/soc/au1x/ac97c.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/au1x/ac97c.c b/sound/soc/au1x/ac97c.c
index d6f7694fcad4..c8a2de103c5f 100644
--- a/sound/soc/au1x/ac97c.c
+++ b/sound/soc/au1x/ac97c.c
@@ -341,7 +341,7 @@ static struct platform_driver au1xac97c_driver = {
 	.remove		= au1xac97c_drvremove,
 };
 
-module_platform_driver(&au1xac97c_driver);
+module_platform_driver(au1xac97c_driver);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Au1000/1500/1100 AC97C ASoC driver");

From 4f8b19143d74e1c3360b21640065765a12bafb1b Mon Sep 17 00:00:00 2001
From: Dimitris Papastamos <dp@opensource.wolfsonmicro.com>
Date: Wed, 31 Jul 2013 13:28:52 +0100
Subject: [PATCH 320/320] ASoC: wm0010: Fix resource leak

If kzalloc() fails for `img' then we are going to leak the memory
for `out'.  We are freeing the memory of all the tx/rx transfers
but the tx/rx buf pointers will be NULL if we drop out earlier.

Signed-off-by: Dimitris Papastamos <dp@opensource.wolfsonmicro.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 sound/soc/codecs/wm0010.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/sound/soc/codecs/wm0010.c b/sound/soc/codecs/wm0010.c
index f5e835662cdc..10adc4145d46 100644
--- a/sound/soc/codecs/wm0010.c
+++ b/sound/soc/codecs/wm0010.c
@@ -410,6 +410,16 @@ static int wm0010_firmware_load(const char *name, struct snd_soc_codec *codec)
 			rec->command, rec->length);
 		len = rec->length + 8;
 
+		xfer = kzalloc(sizeof(*xfer), GFP_KERNEL);
+		if (!xfer) {
+			dev_err(codec->dev, "Failed to allocate xfer\n");
+			ret = -ENOMEM;
+			goto abort;
+		}
+
+		xfer->codec = codec;
+		list_add_tail(&xfer->list, &xfer_list);
+
 		out = kzalloc(len, GFP_KERNEL);
 		if (!out) {
 			dev_err(codec->dev,
@@ -417,6 +427,7 @@ static int wm0010_firmware_load(const char *name, struct snd_soc_codec *codec)
 			ret = -ENOMEM;
 			goto abort1;
 		}
+		xfer->t.rx_buf = out;
 
 		img = kzalloc(len, GFP_KERNEL);
 		if (!img) {
@@ -425,24 +436,13 @@ static int wm0010_firmware_load(const char *name, struct snd_soc_codec *codec)
 			ret = -ENOMEM;
 			goto abort1;
 		}
+		xfer->t.tx_buf = img;
 
 		byte_swap_64((u64 *)&rec->command, img, len);
 
-		xfer = kzalloc(sizeof(*xfer), GFP_KERNEL);
-		if (!xfer) {
-			dev_err(codec->dev, "Failed to allocate xfer\n");
-			ret = -ENOMEM;
-			goto abort1;
-		}
-
-		xfer->codec = codec;
-		list_add_tail(&xfer->list, &xfer_list);
-
 		spi_message_init(&xfer->m);
 		xfer->m.complete = wm0010_boot_xfer_complete;
 		xfer->m.context = xfer;
-		xfer->t.tx_buf = img;
-		xfer->t.rx_buf = out;
 		xfer->t.len = len;
 		xfer->t.bits_per_word = 8;