From e4bc6522d53b7b8eb02cfac35fd18275fd86269d Mon Sep 17 00:00:00 2001 From: Li Wang Date: Tue, 30 Oct 2012 19:52:40 +0800 Subject: [PATCH 001/464] eCryptfs: Avoid unnecessary disk read and data decryption during writing ecryptfs_write_begin grabs a page from page cache for writing. If the page contains invalid data, or data older than the counterpart on the disk, eCryptfs will read out the corresponing data from the disk into the page, decrypt them, then perform writing. However, for this page, if the length of the data to be written into is equal to page size, that means the whole page of data will be overwritten, in which case, it does not matter whatever the data were before, it is beneficial to perform writing directly rather than bothering to read and decrypt first. With this optimization, according to our test on a machine with Intel Core 2 Duo processor, iozone 'write' operation on an existing file with write size being multiple of page size will enjoy a steady 3x speedup. Signed-off-by: Li Wang Signed-off-by: Yunchuan Wen Signed-off-by: Tyler Hicks --- fs/ecryptfs/mmap.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index bd1d57f98f74..564a1fa34b99 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -338,7 +338,8 @@ static int ecryptfs_write_begin(struct file *file, if (prev_page_end_size >= i_size_read(page->mapping->host)) { zero_user(page, 0, PAGE_CACHE_SIZE); - } else { + SetPageUptodate(page); + } else if (len < PAGE_CACHE_SIZE) { rc = ecryptfs_decrypt_page(page); if (rc) { printk(KERN_ERR "%s: Error decrypting " @@ -348,8 +349,8 @@ static int ecryptfs_write_begin(struct file *file, ClearPageUptodate(page); goto out; } + SetPageUptodate(page); } - SetPageUptodate(page); } } /* If creating a page or more of holes, zero them out via truncate. @@ -499,6 +500,13 @@ static int ecryptfs_write_end(struct file *file, } goto out; } + if (!PageUptodate(page)) { + if (copied < PAGE_CACHE_SIZE) { + rc = 0; + goto out; + } + SetPageUptodate(page); + } /* Fills in zeros if 'to' goes beyond inode size */ rc = fill_zeros_to_end_of_page(page, to); if (rc) { From c6f408996c625cb950cad024f90e50519f94713c Mon Sep 17 00:00:00 2001 From: Mukund Jampala Date: Sun, 16 Dec 2012 19:25:58 +0100 Subject: [PATCH 002/464] netfilter: ip[6]t_REJECT: fix wrong transport header pointer in TCP reset The problem occurs when iptables constructs the tcp reset packet. It doesn't initialize the pointer to the tcp header within the skb. When the skb is passed to the ixgbe driver for transmit, the ixgbe driver attempts to access the tcp header and crashes. Currently, other drivers (such as our 1G e1000e or igb drivers) don't access the tcp header on transmit unless the TSO option is turned on. <1>BUG: unable to handle kernel NULL pointer dereference at 0000000d <1>IP: [] ixgbe_xmit_frame_ring+0x8cc/0x2260 [ixgbe] <4>*pdpt = 0000000085e5d001 *pde = 0000000000000000 <0>Oops: 0000 [#1] SMP [...] <4>Pid: 0, comm: swapper Tainted: P 2.6.35.12 #1 Greencity/Thurley <4>EIP: 0060:[] EFLAGS: 00010246 CPU: 16 <4>EIP is at ixgbe_xmit_frame_ring+0x8cc/0x2260 [ixgbe] <4>EAX: c7628820 EBX: 00000007 ECX: 00000000 EDX: 00000000 <4>ESI: 00000008 EDI: c6882180 EBP: dfc6b000 ESP: ced95c48 <4> DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 <0>Process swapper (pid: 0, ti=ced94000 task=ced73bd0 task.ti=ced94000) <0>Stack: <4> cbec7418 c779e0d8 c77cc888 c77cc8a8 0903010a 00000000 c77c0008 00000002 <4><0> cd4997c0 00000010 dfc6b000 00000000 d0d176c9 c77cc8d8 c6882180 cbec7318 <4><0> 00000004 00000004 cbec7230 cbec7110 00000000 cbec70c0 c779e000 00000002 <0>Call Trace: <4> [] ? 0xd0d176c9 <4> [] ? 0xd0d18a4d <4> [<411e243e>] ? dev_hard_start_xmit+0x218/0x2d7 <4> [<411f03d7>] ? sch_direct_xmit+0x4b/0x114 <4> [<411f056a>] ? __qdisc_run+0xca/0xe0 <4> [<411e28b0>] ? dev_queue_xmit+0x2d1/0x3d0 <4> [<411e8120>] ? neigh_resolve_output+0x1c5/0x20f <4> [<411e94a1>] ? neigh_update+0x29c/0x330 <4> [<4121cf29>] ? arp_process+0x49c/0x4cd <4> [<411f80c9>] ? nf_hook_slow+0x3f/0xac <4> [<4121ca8d>] ? arp_process+0x0/0x4cd <4> [<4121ca8d>] ? arp_process+0x0/0x4cd <4> [<4121c6d5>] ? T.901+0x38/0x3b <4> [<4121c918>] ? arp_rcv+0xa3/0xb4 <4> [<4121ca8d>] ? arp_process+0x0/0x4cd <4> [<411e1173>] ? __netif_receive_skb+0x32b/0x346 <4> [<411e19e1>] ? netif_receive_skb+0x5a/0x5f <4> [<411e1ea9>] ? napi_skb_finish+0x1b/0x30 <4> [] ? ixgbe_xmit_frame_ring+0x1564/0x2260 [ixgbe] <4> [<41013468>] ? lapic_next_event+0x13/0x16 <4> [<410429b2>] ? clockevents_program_event+0xd2/0xe4 <4> [<411e1b03>] ? net_rx_action+0x55/0x127 <4> [<4102da1a>] ? __do_softirq+0x77/0xeb <4> [<4102dab1>] ? do_softirq+0x23/0x27 <4> [<41003a67>] ? do_IRQ+0x7d/0x8e <4> [<41002a69>] ? common_interrupt+0x29/0x30 <4> [<41007bcf>] ? mwait_idle+0x48/0x4d <4> [<4100193b>] ? cpu_idle+0x37/0x4c <0>Code: df 09 d7 0f 94 c2 0f b6 d2 e9 e7 fb ff ff 31 db 31 c0 e9 38 ff ff ff 80 78 06 06 0f 85 3e fb ff ff 8b 7c 24 38 8b 8f b8 00 00 00 <0f> b6 51 0d f6 c2 01 0f 85 27 fb ff ff 80 e2 02 75 0d 8b 6c 24 <0>EIP: [] ixgbe_xmit_frame_ring+0x8cc/0x2260 [ixgbe] SS:ESP Signed-off-by: Mukund Jampala Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_REJECT.c | 1 + net/ipv6/netfilter/ip6t_REJECT.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 51f13f8ec724..04b18c1ac345 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -81,6 +81,7 @@ static void send_reset(struct sk_buff *oldskb, int hook) niph->saddr = oiph->daddr; niph->daddr = oiph->saddr; + skb_reset_transport_header(nskb); tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); memset(tcph, 0, sizeof(*tcph)); tcph->source = oth->dest; diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index fd4fb34c51c7..029623dbd411 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -132,6 +132,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) ip6h->saddr = oip6h->daddr; ip6h->daddr = oip6h->saddr; + skb_reset_transport_header(nskb); tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); /* Truncate to length (no data) */ tcph->doff = sizeof(struct tcphdr)/4; From c65ef8dc7b1c16379b9fc29e925716a10804af43 Mon Sep 17 00:00:00 2001 From: Andrew Collins Date: Wed, 12 Dec 2012 14:23:49 +0000 Subject: [PATCH 003/464] netfilter: nf_nat: Also handle non-ESTABLISHED routing changes in MASQUERADE Since (a0ecb85 netfilter: nf_nat: Handle routing changes in MASQUERADE target), the MASQUERADE target handles routing changes which affect the output interface of a connection, but only for ESTABLISHED connections. It is also possible for NEW connections which already have a conntrack entry to be affected by routing changes. This adds a check to drop entries in the NEW+conntrack state when the oif has changed. Signed-off-by: Andrew Collins Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/iptable_nat.c | 15 ++++++++++----- net/ipv6/netfilter/ip6table_nat.c | 15 ++++++++++----- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index da2c8a368f68..eeaff7e4acb5 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -124,23 +124,28 @@ nf_nat_ipv4_fn(unsigned int hooknum, ret = nf_nat_rule_find(skb, hooknum, in, out, ct); if (ret != NF_ACCEPT) return ret; - } else + } else { pr_debug("Already setup manip %s for ct %p\n", maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", ct); + if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) + goto oif_changed; + } break; default: /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == IP_CT_ESTABLISHED_REPLY); - if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) { - nf_ct_kill_acct(ct, ctinfo, skb); - return NF_DROP; - } + if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) + goto oif_changed; } return nf_nat_packet(ct, ctinfo, hooknum, skb); + +oif_changed: + nf_ct_kill_acct(ct, ctinfo, skb); + return NF_DROP; } static unsigned int diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 6c8ae24b85eb..e0e788d25b14 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -127,23 +127,28 @@ nf_nat_ipv6_fn(unsigned int hooknum, ret = nf_nat_rule_find(skb, hooknum, in, out, ct); if (ret != NF_ACCEPT) return ret; - } else + } else { pr_debug("Already setup manip %s for ct %p\n", maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", ct); + if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) + goto oif_changed; + } break; default: /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == IP_CT_ESTABLISHED_REPLY); - if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) { - nf_ct_kill_acct(ct, ctinfo, skb); - return NF_DROP; - } + if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) + goto oif_changed; } return nf_nat_packet(ct, ctinfo, hooknum, skb); + +oif_changed: + nf_ct_kill_acct(ct, ctinfo, skb); + return NF_DROP; } static unsigned int From d7a769ff0e8f908ae8113fe523e7806b6d0b5fc7 Mon Sep 17 00:00:00 2001 From: Florent Fourcot Date: Fri, 14 Dec 2012 00:53:33 +0000 Subject: [PATCH 004/464] netfilter: nf_conntrack_ipv6: fix comment for packets without data Remove ambiguity of double negation. Signed-off-by: Florent Fourcot Acked-by: Rick Jones Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 00ee17c3e893..137e245860ab 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -81,8 +81,8 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, } protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off); /* - * (protoff == skb->len) mean that the packet doesn't have no data - * except of IPv6 & ext headers. but it's tracked anyway. - YK + * (protoff == skb->len) means the packet has not data, just + * IPv6 and possibly extensions headers, but it is tracked anyway */ if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { pr_debug("ip6_conntrack_core: can't find proto in pkt\n"); From 97cf00e93cc24898493e7a058105e3215257ee04 Mon Sep 17 00:00:00 2001 From: Haibo Xi Date: Thu, 6 Dec 2012 23:42:17 +0000 Subject: [PATCH 005/464] netfilter: nf_ct_reasm: fix conntrack reassembly expire code Commit b836c99fd6c9 (ipv6: unify conntrack reassembly expire code with standard one) use the standard IPv6 reassembly code(ip6_expire_frag_queue) to handle conntrack reassembly expire. In ip6_expire_frag_queue, it invoke dev_get_by_index_rcu to get which device received this expired packet.so we must save ifindex when NF_conntrack get this packet. With this patch applied, I can see ICMP Time Exceeded sent from the receiver when the sender sent out 1/2 fragmented IPv6 packet. Signed-off-by: Haibo Xi Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/nf_conntrack_reasm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 22c8ea951185..3dacecc99065 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -311,7 +311,10 @@ found: else fq->q.fragments = skb; - skb->dev = NULL; + if (skb->dev) { + fq->iif = skb->dev->ifindex; + skb->dev = NULL; + } fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; if (payload_len > fq->q.max_size) From 0c36b48b36dc84d4215dc9d1cde1bda829214ba6 Mon Sep 17 00:00:00 2001 From: Bob Hockney Date: Sun, 16 Dec 2012 19:34:11 +0100 Subject: [PATCH 006/464] netfilter: nfnetlink_log: fix mac address for 6in4 tunnels For tunnelled ipv6in4 packets, the LOG target (xt_LOG.c) adjusts the start of the mac field to start at the ethernet header instead of the ipv4 header for the tunnel. This patch conforms what is passed by the NFLOG target through nfnetlink to what the LOG target does. Code borrowed from xt_LOG.c. Signed-off-by: Bob Hockney Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 9f199f2e31fa..4e210e0fd367 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -384,6 +384,7 @@ __build_packet_message(struct nfulnl_instance *inst, struct nfgenmsg *nfmsg; sk_buff_data_t old_tail = inst->skb->tail; struct sock *sk; + const unsigned char *hwhdrp; nlh = nlmsg_put(inst->skb, 0, 0, NFNL_SUBSYS_ULOG << 8 | NFULNL_MSG_PACKET, @@ -485,9 +486,17 @@ __build_packet_message(struct nfulnl_instance *inst, if (indev && skb_mac_header_was_set(skb)) { if (nla_put_be16(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)) || nla_put_be16(inst->skb, NFULA_HWLEN, - htons(skb->dev->hard_header_len)) || - nla_put(inst->skb, NFULA_HWHEADER, skb->dev->hard_header_len, - skb_mac_header(skb))) + htons(skb->dev->hard_header_len))) + goto nla_put_failure; + + hwhdrp = skb_mac_header(skb); + + if (skb->dev->type == ARPHRD_SIT) + hwhdrp -= ETH_HLEN; + + if (hwhdrp >= skb->head && + nla_put(inst->skb, NFULA_HWHEADER, + skb->dev->hard_header_len, hwhdrp)) goto nla_put_failure; } From 252b3e8c1bc0c2b20348ae87d67efcd0a8209f72 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 11 Dec 2012 04:07:42 +0000 Subject: [PATCH 007/464] netfilter: xt_CT: fix crash while destroy ct templates In (d871bef netfilter: ctnetlink: dump entries from the dying and unconfirmed lists), we assume that all conntrack objects are inserted in any of the existing lists. However, template conntrack objects were not. This results in hitting BUG_ON in the destroy_conntrack path while removing a rule that uses the CT target. This patch fixes the situation by adding the template lists, which is where template conntrack objects reside now. Signed-off-by: Pablo Neira Ayuso --- include/net/netns/conntrack.h | 1 + net/netfilter/nf_conntrack_core.c | 2 ++ net/netfilter/xt_CT.c | 8 ++++++++ 3 files changed, 11 insertions(+) diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index a1d83cc8bf85..923cb20051ed 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -71,6 +71,7 @@ struct netns_ct { struct hlist_head *expect_hash; struct hlist_nulls_head unconfirmed; struct hlist_nulls_head dying; + struct hlist_nulls_head tmpl; struct ip_conntrack_stat __percpu *stat; struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb; struct nf_exp_event_notifier __rcu *nf_expect_event_cb; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 08cdc71d8e87..016d95ead930 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1526,6 +1526,7 @@ err_extend: */ #define UNCONFIRMED_NULLS_VAL ((1<<30)+0) #define DYING_NULLS_VAL ((1<<30)+1) +#define TEMPLATE_NULLS_VAL ((1<<30)+2) static int nf_conntrack_init_net(struct net *net) { @@ -1534,6 +1535,7 @@ static int nf_conntrack_init_net(struct net *net) atomic_set(&net->ct.count, 0); INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, UNCONFIRMED_NULLS_VAL); INIT_HLIST_NULLS_HEAD(&net->ct.dying, DYING_NULLS_VAL); + INIT_HLIST_NULLS_HEAD(&net->ct.tmpl, TEMPLATE_NULLS_VAL); net->ct.stat = alloc_percpu(struct ip_conntrack_stat); if (!net->ct.stat) { ret = -ENOMEM; diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index ae7f5daeee43..1668f41acc6e 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -149,6 +149,10 @@ static int xt_ct_tg_check_v0(const struct xt_tgchk_param *par) __set_bit(IPS_TEMPLATE_BIT, &ct->status); __set_bit(IPS_CONFIRMED_BIT, &ct->status); + + /* Overload tuple linked list to put us in template list. */ + hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, + &par->net->ct.tmpl); out: info->ct = ct; return 0; @@ -289,6 +293,10 @@ static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par) __set_bit(IPS_TEMPLATE_BIT, &ct->status); __set_bit(IPS_CONFIRMED_BIT, &ct->status); + + /* Overload tuple linked list to put us in template list. */ + hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, + &par->net->ct.tmpl); out: info->ct = ct; return 0; From e035edd16ee83498cccc9beedfc215e15cab3a07 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 17 Dec 2012 01:12:00 +0100 Subject: [PATCH 008/464] netfilter: nfnetlink_log: fix possible compilation issue due to missing include In (0c36b48 netfilter: nfnetlink_log: fix mac address for 6in4 tunnels) the include file that defines ARPD_SIT was missing. This passed unnoticed during my tests (I did not hit this problem here). net/netfilter/nfnetlink_log.c: In function '__build_packet_message': net/netfilter/nfnetlink_log.c:494:25: error: 'ARPHRD_SIT' undeclared (first use in this function) net/netfilter/nfnetlink_log.c:494:25: note: each undeclared identifier is reported only once for +each function it appears in Reported-by: kbuild test robot Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 4e210e0fd367..92fd8eca0d31 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -13,6 +13,7 @@ */ #include #include +#include #include #include #include From c43d0188d7be6e45eb52a27ed9df714ca2e04e0a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 11 Dec 2012 12:01:42 +0000 Subject: [PATCH 009/464] drm/i915: Fixup cursor latency used for IVB lp3 watermarks It operates at twice the declared latency, so adjust the computation to avoid potential flicker at low power. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=50248 Signed-off-by: Chris Wilson CC: Jesse Barnes Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 117 ++++++++++++++++++++++++++++++-- 1 file changed, 112 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 42839fc73499..9f1ee07ec4ae 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1811,8 +1811,110 @@ static void sandybridge_update_wm(struct drm_device *dev) enabled |= 2; } - if ((dev_priv->num_pipe == 3) && - g4x_compute_wm0(dev, 2, + /* + * Calculate and update the self-refresh watermark only when one + * display plane is used. + * + * SNB support 3 levels of watermark. + * + * WM1/WM2/WM2 watermarks have to be enabled in the ascending order, + * and disabled in the descending order + * + */ + I915_WRITE(WM3_LP_ILK, 0); + I915_WRITE(WM2_LP_ILK, 0); + I915_WRITE(WM1_LP_ILK, 0); + + if (!single_plane_enabled(enabled) || + dev_priv->sprite_scaling_enabled) + return; + enabled = ffs(enabled) - 1; + + /* WM1 */ + if (!ironlake_compute_srwm(dev, 1, enabled, + SNB_READ_WM1_LATENCY() * 500, + &sandybridge_display_srwm_info, + &sandybridge_cursor_srwm_info, + &fbc_wm, &plane_wm, &cursor_wm)) + return; + + I915_WRITE(WM1_LP_ILK, + WM1_LP_SR_EN | + (SNB_READ_WM1_LATENCY() << WM1_LP_LATENCY_SHIFT) | + (fbc_wm << WM1_LP_FBC_SHIFT) | + (plane_wm << WM1_LP_SR_SHIFT) | + cursor_wm); + + /* WM2 */ + if (!ironlake_compute_srwm(dev, 2, enabled, + SNB_READ_WM2_LATENCY() * 500, + &sandybridge_display_srwm_info, + &sandybridge_cursor_srwm_info, + &fbc_wm, &plane_wm, &cursor_wm)) + return; + + I915_WRITE(WM2_LP_ILK, + WM2_LP_EN | + (SNB_READ_WM2_LATENCY() << WM1_LP_LATENCY_SHIFT) | + (fbc_wm << WM1_LP_FBC_SHIFT) | + (plane_wm << WM1_LP_SR_SHIFT) | + cursor_wm); + + /* WM3 */ + if (!ironlake_compute_srwm(dev, 3, enabled, + SNB_READ_WM3_LATENCY() * 500, + &sandybridge_display_srwm_info, + &sandybridge_cursor_srwm_info, + &fbc_wm, &plane_wm, &cursor_wm)) + return; + + I915_WRITE(WM3_LP_ILK, + WM3_LP_EN | + (SNB_READ_WM3_LATENCY() << WM1_LP_LATENCY_SHIFT) | + (fbc_wm << WM1_LP_FBC_SHIFT) | + (plane_wm << WM1_LP_SR_SHIFT) | + cursor_wm); +} + +static void ivybridge_update_wm(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + int latency = SNB_READ_WM0_LATENCY() * 100; /* In unit 0.1us */ + u32 val; + int fbc_wm, plane_wm, cursor_wm; + int ignore_fbc_wm, ignore_plane_wm, ignore_cursor_wm; + unsigned int enabled; + + enabled = 0; + if (g4x_compute_wm0(dev, 0, + &sandybridge_display_wm_info, latency, + &sandybridge_cursor_wm_info, latency, + &plane_wm, &cursor_wm)) { + val = I915_READ(WM0_PIPEA_ILK); + val &= ~(WM0_PIPE_PLANE_MASK | WM0_PIPE_CURSOR_MASK); + I915_WRITE(WM0_PIPEA_ILK, val | + ((plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm)); + DRM_DEBUG_KMS("FIFO watermarks For pipe A -" + " plane %d, " "cursor: %d\n", + plane_wm, cursor_wm); + enabled |= 1; + } + + if (g4x_compute_wm0(dev, 1, + &sandybridge_display_wm_info, latency, + &sandybridge_cursor_wm_info, latency, + &plane_wm, &cursor_wm)) { + val = I915_READ(WM0_PIPEB_ILK); + val &= ~(WM0_PIPE_PLANE_MASK | WM0_PIPE_CURSOR_MASK); + I915_WRITE(WM0_PIPEB_ILK, val | + ((plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm)); + DRM_DEBUG_KMS("FIFO watermarks For pipe B -" + " plane %d, cursor: %d\n", + plane_wm, cursor_wm); + enabled |= 2; + } + + if (g4x_compute_wm0(dev, 2, &sandybridge_display_wm_info, latency, &sandybridge_cursor_wm_info, latency, &plane_wm, &cursor_wm)) { @@ -1875,12 +1977,17 @@ static void sandybridge_update_wm(struct drm_device *dev) (plane_wm << WM1_LP_SR_SHIFT) | cursor_wm); - /* WM3 */ + /* WM3, note we have to correct the cursor latency */ if (!ironlake_compute_srwm(dev, 3, enabled, SNB_READ_WM3_LATENCY() * 500, &sandybridge_display_srwm_info, &sandybridge_cursor_srwm_info, - &fbc_wm, &plane_wm, &cursor_wm)) + &fbc_wm, &plane_wm, &ignore_cursor_wm) || + !ironlake_compute_srwm(dev, 3, enabled, + 2 * SNB_READ_WM3_LATENCY() * 500, + &sandybridge_display_srwm_info, + &sandybridge_cursor_srwm_info, + &ignore_fbc_wm, &ignore_plane_wm, &cursor_wm)) return; I915_WRITE(WM3_LP_ILK, @@ -4005,7 +4112,7 @@ void intel_init_pm(struct drm_device *dev) } else if (IS_IVYBRIDGE(dev)) { /* FIXME: detect B0+ stepping and use auto training */ if (SNB_READ_WM0_LATENCY()) { - dev_priv->display.update_wm = sandybridge_update_wm; + dev_priv->display.update_wm = ivybridge_update_wm; dev_priv->display.update_sprite_wm = sandybridge_update_sprite_wm; } else { DRM_DEBUG_KMS("Failed to read display plane latency. " From af6c4575afada1a9c411e42439b5d40908865b11 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 11 Dec 2012 12:01:43 +0000 Subject: [PATCH 010/464] drm/i915: Double the cursor self-refresh latency on Valleyview It operates at twice the declared latency, so double the latency value used for the cursor watermark calculation. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=50248 Signed-off-by: Chris Wilson CC: Jesse Barnes Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 9f1ee07ec4ae..cdd70e654af5 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1286,6 +1286,7 @@ static void valleyview_update_wm(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; int planea_wm, planeb_wm, cursora_wm, cursorb_wm; int plane_sr, cursor_sr; + int ignore_plane_sr, ignore_cursor_sr; unsigned int enabled = 0; vlv_update_drain_latency(dev); @@ -1308,7 +1309,12 @@ static void valleyview_update_wm(struct drm_device *dev) sr_latency_ns, &valleyview_wm_info, &valleyview_cursor_wm_info, - &plane_sr, &cursor_sr)) + &plane_sr, &ignore_cursor_sr) && + g4x_compute_srwm(dev, ffs(enabled) - 1, + 2*sr_latency_ns, + &valleyview_wm_info, + &valleyview_cursor_wm_info, + &ignore_plane_sr, &cursor_sr)) I915_WRITE(FW_BLC_SELF_VLV, FW_CSPWRDWNEN); else I915_WRITE(FW_BLC_SELF_VLV, From 52bd02d8e3fd83201a1a81bdb4ec6fc0b54d24a0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 7 Dec 2012 10:43:24 +0000 Subject: [PATCH 011/464] drm/i915: Clear self-refresh watermarks when disabled If we elect to disable self-refresh as they require too many FIFO entries, clear the values prior to writing them into the registers. If they are too large they may occupy more bits than available and so corrupt neighbouring WM values. Signed-off-by: Chris Wilson Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index cdd70e654af5..bfc46529f6df 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1303,7 +1303,6 @@ static void valleyview_update_wm(struct drm_device *dev) &planeb_wm, &cursorb_wm)) enabled |= 2; - plane_sr = cursor_sr = 0; if (single_plane_enabled(enabled) && g4x_compute_srwm(dev, ffs(enabled) - 1, sr_latency_ns, @@ -1314,11 +1313,13 @@ static void valleyview_update_wm(struct drm_device *dev) 2*sr_latency_ns, &valleyview_wm_info, &valleyview_cursor_wm_info, - &ignore_plane_sr, &cursor_sr)) + &ignore_plane_sr, &cursor_sr)) { I915_WRITE(FW_BLC_SELF_VLV, FW_CSPWRDWNEN); - else + } else { I915_WRITE(FW_BLC_SELF_VLV, I915_READ(FW_BLC_SELF_VLV) & ~FW_CSPWRDWNEN); + plane_sr = cursor_sr = 0; + } DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n", planea_wm, cursora_wm, @@ -1358,17 +1359,18 @@ static void g4x_update_wm(struct drm_device *dev) &planeb_wm, &cursorb_wm)) enabled |= 2; - plane_sr = cursor_sr = 0; if (single_plane_enabled(enabled) && g4x_compute_srwm(dev, ffs(enabled) - 1, sr_latency_ns, &g4x_wm_info, &g4x_cursor_wm_info, - &plane_sr, &cursor_sr)) + &plane_sr, &cursor_sr)) { I915_WRITE(FW_BLC_SELF, FW_BLC_SELF_EN); - else + } else { I915_WRITE(FW_BLC_SELF, I915_READ(FW_BLC_SELF) & ~FW_BLC_SELF_EN); + plane_sr = cursor_sr = 0; + } DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n", planea_wm, cursora_wm, From f20e0b08b8b2a8432e6abf3683960099f0ab2958 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 7 Dec 2012 10:43:25 +0000 Subject: [PATCH 012/464] drm/i915: Prefer CRTC 'active' rather than 'enabled' during WM computations Only the intel_crtc->active is accurate at the point where we wish to perform WM computations, so use it instead of crtc->enabled. Signed-off-by: Chris Wilson Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index bfc46529f6df..5835277d9632 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -405,7 +405,7 @@ void intel_update_fbc(struct drm_device *dev) * - going to an unsupported config (interlace, pixel multiply, etc.) */ list_for_each_entry(tmp_crtc, &dev->mode_config.crtc_list, head) { - if (tmp_crtc->enabled && + if (to_intel_crtc(tmp_crtc)->active && !to_intel_crtc(tmp_crtc)->primary_disabled && tmp_crtc->fb) { if (crtc) { @@ -992,7 +992,7 @@ static struct drm_crtc *single_enabled_crtc(struct drm_device *dev) struct drm_crtc *crtc, *enabled = NULL; list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { - if (crtc->enabled && crtc->fb) { + if (to_intel_crtc(crtc)->active && crtc->fb) { if (enabled) return NULL; enabled = crtc; @@ -1086,7 +1086,7 @@ static bool g4x_compute_wm0(struct drm_device *dev, int entries, tlb_miss; crtc = intel_get_crtc_for_plane(dev, plane); - if (crtc->fb == NULL || !crtc->enabled) { + if (crtc->fb == NULL || !to_intel_crtc(crtc)->active) { *cursor_wm = cursor->guard_size; *plane_wm = display->guard_size; return false; @@ -1215,7 +1215,7 @@ static bool vlv_compute_drain_latency(struct drm_device *dev, int entries; crtc = intel_get_crtc_for_plane(dev, plane); - if (crtc->fb == NULL || !crtc->enabled) + if (crtc->fb == NULL || !to_intel_crtc(crtc)->active) return false; clock = crtc->mode.clock; /* VESA DOT Clock */ @@ -1476,7 +1476,7 @@ static void i9xx_update_wm(struct drm_device *dev) fifo_size = dev_priv->display.get_fifo_size(dev, 0); crtc = intel_get_crtc_for_plane(dev, 0); - if (crtc->enabled && crtc->fb) { + if (to_intel_crtc(crtc)->active && crtc->fb) { int cpp = crtc->fb->bits_per_pixel / 8; if (IS_GEN2(dev)) cpp = 4; @@ -1490,7 +1490,7 @@ static void i9xx_update_wm(struct drm_device *dev) fifo_size = dev_priv->display.get_fifo_size(dev, 1); crtc = intel_get_crtc_for_plane(dev, 1); - if (crtc->enabled && crtc->fb) { + if (to_intel_crtc(crtc)->active && crtc->fb) { int cpp = crtc->fb->bits_per_pixel / 8; if (IS_GEN2(dev)) cpp = 4; @@ -2044,7 +2044,7 @@ sandybridge_compute_sprite_wm(struct drm_device *dev, int plane, int entries, tlb_miss; crtc = intel_get_crtc_for_plane(dev, plane); - if (crtc->fb == NULL || !crtc->enabled) { + if (crtc->fb == NULL || !to_intel_crtc(crtc)->active) { *sprite_wm = display->guard_size; return false; } From 4283908ef7f11a72c3b80dd4cf026f1a86429f82 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 14 Dec 2012 23:38:28 +0100 Subject: [PATCH 013/464] drm/i915: Implement WaDisableHiZPlanesWhenMSAAEnabled Quoting from Bspec, 3D_CHICKEN1, bit 10 This bit needs to be set always to "1", Project: DevSNB " Reviewed-by: Rodrigo Vivi Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 3f75cfaf1c3f..e0019378f8b1 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -517,6 +517,7 @@ * the enables for writing to the corresponding low bit. */ #define _3D_CHICKEN 0x02084 +#define _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB (1 << 10) #define _3D_CHICKEN2 0x0208c /* Disables pipelining of read flushes past the SF-WIZ interface. * Required on all Ironlake steppings according to the B-Spec, but the diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 5835277d9632..3b85660ce4e4 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3592,6 +3592,10 @@ static void gen6_init_clock_gating(struct drm_device *dev) I915_READ(ILK_DISPLAY_CHICKEN2) | ILK_ELPIN_409_SELECT); + /* WaDisableHiZPlanesWhenMSAAEnabled */ + I915_WRITE(_3D_CHICKEN, + _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB)); + I915_WRITE(WM3_LP_ILK, 0); I915_WRITE(WM2_LP_ILK, 0); I915_WRITE(WM1_LP_ILK, 0); From c36575e663e302dbaa4d16b9c72d2c9a913a9aef Mon Sep 17 00:00:00 2001 From: Forrest Liu Date: Mon, 17 Dec 2012 09:55:39 -0500 Subject: [PATCH 014/464] ext4: fix extent tree corruption caused by hole punch When depth of extent tree is greater than 1, logical start value of interior node is not correctly updated in ext4_ext_rm_idx. Signed-off-by: Forrest Liu Signed-off-by: "Theodore Ts'o" Reviewed-by: Ashish Sangwan Cc: stable@vger.kernel.org --- fs/ext4/extents.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 26af22832a84..5ae1674ec12f 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2226,13 +2226,14 @@ errout: * removes index from the index block. */ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path) + struct ext4_ext_path *path, int depth) { int err; ext4_fsblk_t leaf; /* free index block */ - path--; + depth--; + path = path + depth; leaf = ext4_idx_pblock(path->p_idx); if (unlikely(path->p_hdr->eh_entries == 0)) { EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); @@ -2257,6 +2258,19 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, ext4_free_blocks(handle, inode, NULL, leaf, 1, EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); + + while (--depth >= 0) { + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr)) + break; + path--; + err = ext4_ext_get_access(handle, inode, path); + if (err) + break; + path->p_idx->ei_block = (path+1)->p_idx->ei_block; + err = ext4_ext_dirty(handle, inode, path); + if (err) + break; + } return err; } @@ -2599,7 +2613,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, /* if this leaf is free, then we should * remove it from index block above */ if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) - err = ext4_ext_rm_idx(handle, inode, path + depth); + err = ext4_ext_rm_idx(handle, inode, path, depth); out: return err; @@ -2802,7 +2816,7 @@ again: /* index is empty, remove it; * handle must be already prepared by the * truncatei_leaf() */ - err = ext4_ext_rm_idx(handle, inode, path + i); + err = ext4_ext_rm_idx(handle, inode, path, i); } /* root level has p_bh == NULL, brelse() eats this */ brelse(path[i].p_bh); From 6547fbdbfff62c99e4f7b4f985ff8b3454f33b0f Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 14 Dec 2012 23:38:29 +0100 Subject: [PATCH 015/464] drm/i915: Implement WaSetupGtModeTdRowDispatch I'm not really sure, since the w/a entry is as thin on details as ever, and Bspec doesn't say anything about it. But I've figured only dispatching to rows 0&1 instead of all four should be the right thing for GT1. Reviewed-by: Rodrigo Vivi [danvet: Add the missing snb server GT1 to the check, spotted by Chris Wilson.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 3 +++ drivers/gpu/drm/i915/i915_reg.h | 3 ++- drivers/gpu/drm/i915/intel_pm.c | 5 +++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 557843dd4b2e..062a60b381b7 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1166,6 +1166,9 @@ struct drm_i915_file_private { #define IS_IVB_GT1(dev) ((dev)->pci_device == 0x0156 || \ (dev)->pci_device == 0x0152 || \ (dev)->pci_device == 0x015a) +#define IS_SNB_GT1(dev) ((dev)->pci_device == 0x0102 || \ + (dev)->pci_device == 0x0106 || \ + (dev)->pci_device == 0x010A) #define IS_VALLEYVIEW(dev) (INTEL_INFO(dev)->is_valleyview) #define IS_HASWELL(dev) (INTEL_INFO(dev)->is_haswell) #define IS_MOBILE(dev) (INTEL_INFO(dev)->is_mobile) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e0019378f8b1..186ee5c85b51 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -533,7 +533,8 @@ # define MI_FLUSH_ENABLE (1 << 12) #define GEN6_GT_MODE 0x20d0 -#define GEN6_GT_MODE_HI (1 << 9) +#define GEN6_GT_MODE_HI (1 << 9) +#define GEN6_TD_FOUR_ROW_DISPATCH_DISABLE (1 << 5) #define GFX_MODE 0x02520 #define GFX_MODE_GEN7 0x0229c diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 3b85660ce4e4..55f7a896a121 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3596,6 +3596,11 @@ static void gen6_init_clock_gating(struct drm_device *dev) I915_WRITE(_3D_CHICKEN, _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB)); + /* WaSetupGtModeTdRowDispatch */ + if (IS_SNB_GT1(dev)) + I915_WRITE(GEN6_GT_MODE, + _MASKED_BIT_ENABLE(GEN6_TD_FOUR_ROW_DISPATCH_DISABLE)); + I915_WRITE(WM3_LP_ILK, 0); I915_WRITE(WM2_LP_ILK, 0); I915_WRITE(WM1_LP_ILK, 0); From b45305fce5bb1abec263fcff9d81ebecd6306ede Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 17 Dec 2012 16:21:27 +0100 Subject: [PATCH 016/464] drm/i915: Implement workaround for broken CS tlb on i830/845 Now that Chris Wilson demonstrated that the key for stability on early gen 2 is to simple _never_ exchange the physical backing storage of batch buffers I've tried a stab at a kernel solution. Doesn't look too nefarious imho, now that I don't try to be too clever for my own good any more. v2: After discussing the various techniques, we've decided to always blit batches on the suspect devices, but allow userspace to opt out of the kernel workaround assume full responsibility for providing coherent batches. The principal reason is that avoiding the blit does improve performance in a few key microbenchmarks and also in cairo-trace replays. Signed-Off-by: Daniel Vetter Signed-off-by: Chris Wilson [danvet: - Drop the hunk which uses HAS_BROKEN_CS_TLB to implement the ring wrap w/a. Suggested by Chris Wilson. - Also add the ACTHD check from Chris Wilson for the error state dumping, so that we still catch batches when userspace opts out of the w/a.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 3 + drivers/gpu/drm/i915/i915_drv.h | 4 ++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 + drivers/gpu/drm/i915/i915_irq.c | 12 ++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 76 +++++++++++++++++++--- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + include/uapi/drm/i915_drm.h | 10 +++ 7 files changed, 100 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 8f63cd5de4b4..99daa896105d 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -989,6 +989,9 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_SECURE_BATCHES: value = capable(CAP_SYS_ADMIN); break; + case I915_PARAM_HAS_PINNED_BATCHES: + value = 1; + break; default: DRM_DEBUG_DRIVER("Unknown parameter %d\n", param->param); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 062a60b381b7..1a4c3a1c111f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1100,6 +1100,7 @@ struct drm_i915_gem_object { */ atomic_t pending_flip; }; +#define to_gem_object(obj) (&((struct drm_i915_gem_object *)(obj))->base) #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base) @@ -1199,6 +1200,9 @@ struct drm_i915_file_private { #define HAS_OVERLAY(dev) (INTEL_INFO(dev)->has_overlay) #define OVERLAY_NEEDS_PHYSICAL(dev) (INTEL_INFO(dev)->overlay_needs_physical) +/* Early gen2 have a totally busted CS tlb and require pinned batches. */ +#define HAS_BROKEN_CS_TLB(dev) (IS_I830(dev) || IS_845G(dev)) + /* With the 945 and later, Y tiling got adjusted so that it was 32 128-byte * rows, which changed the alignment requirements and fence programming. */ diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index ee8f97f0539e..d6a994a07393 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -808,6 +808,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, flags |= I915_DISPATCH_SECURE; } + if (args->flags & I915_EXEC_IS_PINNED) + flags |= I915_DISPATCH_PINNED; switch (args->flags & I915_EXEC_RING_MASK) { case I915_EXEC_DEFAULT: diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index a4dc97f8b9f0..2220dec3e5d9 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1087,6 +1087,18 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv, if (!ring->get_seqno) return NULL; + if (HAS_BROKEN_CS_TLB(dev_priv->dev)) { + u32 acthd = I915_READ(ACTHD); + + if (WARN_ON(ring->id != RCS)) + return NULL; + + obj = ring->private; + if (acthd >= obj->gtt_offset && + acthd < obj->gtt_offset + obj->base.size) + return i915_error_object_create(dev_priv, obj); + } + seqno = ring->get_seqno(ring, false); list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) { if (obj->ring != ring) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 2346b920bd86..ae253e04c391 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -547,9 +547,14 @@ static int init_render_ring(struct intel_ring_buffer *ring) static void render_ring_cleanup(struct intel_ring_buffer *ring) { + struct drm_device *dev = ring->dev; + if (!ring->private) return; + if (HAS_BROKEN_CS_TLB(dev)) + drm_gem_object_unreference(to_gem_object(ring->private)); + cleanup_pipe_control(ring); } @@ -969,6 +974,8 @@ i965_dispatch_execbuffer(struct intel_ring_buffer *ring, return 0; } +/* Just userspace ABI convention to limit the wa batch bo to a resonable size */ +#define I830_BATCH_LIMIT (256*1024) static int i830_dispatch_execbuffer(struct intel_ring_buffer *ring, u32 offset, u32 len, @@ -976,15 +983,47 @@ i830_dispatch_execbuffer(struct intel_ring_buffer *ring, { int ret; - ret = intel_ring_begin(ring, 4); - if (ret) - return ret; + if (flags & I915_DISPATCH_PINNED) { + ret = intel_ring_begin(ring, 4); + if (ret) + return ret; - intel_ring_emit(ring, MI_BATCH_BUFFER); - intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); - intel_ring_emit(ring, offset + len - 8); - intel_ring_emit(ring, 0); - intel_ring_advance(ring); + intel_ring_emit(ring, MI_BATCH_BUFFER); + intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); + intel_ring_emit(ring, offset + len - 8); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); + } else { + struct drm_i915_gem_object *obj = ring->private; + u32 cs_offset = obj->gtt_offset; + + if (len > I830_BATCH_LIMIT) + return -ENOSPC; + + ret = intel_ring_begin(ring, 9+3); + if (ret) + return ret; + /* Blit the batch (which has now all relocs applied) to the stable batch + * scratch bo area (so that the CS never stumbles over its tlb + * invalidation bug) ... */ + intel_ring_emit(ring, XY_SRC_COPY_BLT_CMD | + XY_SRC_COPY_BLT_WRITE_ALPHA | + XY_SRC_COPY_BLT_WRITE_RGB); + intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_GXCOPY | 4096); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, (DIV_ROUND_UP(len, 4096) << 16) | 1024); + intel_ring_emit(ring, cs_offset); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 4096); + intel_ring_emit(ring, offset); + intel_ring_emit(ring, MI_FLUSH); + + /* ... and execute it. */ + intel_ring_emit(ring, MI_BATCH_BUFFER); + intel_ring_emit(ring, cs_offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); + intel_ring_emit(ring, cs_offset + len - 8); + intel_ring_advance(ring); + } return 0; } @@ -1596,6 +1635,27 @@ int intel_init_render_ring_buffer(struct drm_device *dev) ring->init = init_render_ring; ring->cleanup = render_ring_cleanup; + /* Workaround batchbuffer to combat CS tlb bug. */ + if (HAS_BROKEN_CS_TLB(dev)) { + struct drm_i915_gem_object *obj; + int ret; + + obj = i915_gem_alloc_object(dev, I830_BATCH_LIMIT); + if (obj == NULL) { + DRM_ERROR("Failed to allocate batch bo\n"); + return -ENOMEM; + } + + ret = i915_gem_object_pin(obj, 0, true, false); + if (ret != 0) { + drm_gem_object_unreference(&obj->base); + DRM_ERROR("Failed to ping batch bo\n"); + return ret; + } + + ring->private = obj; + } + return intel_init_ring_buffer(dev, ring); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 526182ed0c6d..6af87cd05725 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -94,6 +94,7 @@ struct intel_ring_buffer { u32 offset, u32 length, unsigned flags); #define I915_DISPATCH_SECURE 0x1 +#define I915_DISPATCH_PINNED 0x2 void (*cleanup)(struct intel_ring_buffer *ring); int (*sync_to)(struct intel_ring_buffer *ring, struct intel_ring_buffer *to, diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index b746a3cf5fa9..c4d2e9c74002 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -307,6 +307,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_HAS_PRIME_VMAP_FLUSH 21 #define I915_PARAM_RSVD_FOR_FUTURE_USE 22 #define I915_PARAM_HAS_SECURE_BATCHES 23 +#define I915_PARAM_HAS_PINNED_BATCHES 24 typedef struct drm_i915_getparam { int param; @@ -677,6 +678,15 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_SECURE (1<<9) +/** Inform the kernel that the batch is and will always be pinned. This + * negates the requirement for a workaround to be performed to avoid + * an incoherent CS (such as can be found on 830/845). If this flag is + * not passed, the kernel will endeavour to make sure the batch is + * coherent with the CS before execution. If this flag is passed, + * userspace assumes the responsibility for ensuring the same. + */ +#define I915_EXEC_IS_PINNED (1<<10) + #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ (eb2).rsvd1 = context & I915_EXEC_CONTEXT_ID_MASK From 50f29fbd39835e3fd21c60a798f03f2692146ccb Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Sun, 16 Dec 2012 11:29:59 -0800 Subject: [PATCH 017/464] MAINTAINERS: Add an entry for omap related .dts files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All your omap .dts files are belong to us. Benoît has been doing a good job picking up most of the omap .dts files so far. Let's make sure we both get cc:ed for the related patches. The .dts patches need to be queued by us as separate patches from drivers and other code changes to avoid pointless merge conflists like we saw with v3.8 networking changes. Cc: Benoît Cousson Signed-off-by: Tony Lindgren --- MAINTAINERS | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index f71d2f901a69..de4f723ebb84 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5377,6 +5377,15 @@ F: arch/arm/*omap*/ F: drivers/i2c/busses/i2c-omap.c F: include/linux/i2c-omap.h +OMAP DEVICE TREE SUPPORT +M: Benoît Cousson +M: Tony Lindgren +L: linux-omap@vger.kernel.org +L: devicetree-discuss@lists.ozlabs.org (moderated for non-subscribers) +S: Maintained +F: arch/arm/boot/dts/*omap* +F: arch/arm/boot/dts/*am3* + OMAP CLOCK FRAMEWORK SUPPORT M: Paul Walmsley L: linux-omap@vger.kernel.org From 16d60bb86e45aa7513007b4788f16fa2b968f9dd Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Sun, 16 Dec 2012 11:39:40 -0800 Subject: [PATCH 018/464] ARM: OMAP: Split fb.c to remove last remaining cpu_is_omap usage Looks like we missed plat-omap/fb.c for cpu_is_omap usage mach-omap2. This is the last user of cpu_is_omap, so let's quickly fix it up so we can finally remove plat/cpu.h for omap2lus. We want to limit cpu_is_omap macro usage to mach-omap2 only so we can make plat/cpu.h private. After this we can finally drop plat/cpu.h for omap2+. Cc: Tomi Valkeinen Signed-off-by: Tony Lindgren --- arch/arm/mach-omap1/Makefile | 2 +- arch/arm/mach-omap1/fb.c | 80 +++++++++++++++++++++++++ arch/arm/mach-omap2/Makefile | 2 +- arch/arm/{plat-omap => mach-omap2}/fb.c | 50 +--------------- arch/arm/plat-omap/Makefile | 2 +- 5 files changed, 85 insertions(+), 51 deletions(-) create mode 100644 arch/arm/mach-omap1/fb.c rename arch/arm/{plat-omap => mach-omap2}/fb.c (76%) diff --git a/arch/arm/mach-omap1/Makefile b/arch/arm/mach-omap1/Makefile index f0e69cbc5baa..222d58c0ae76 100644 --- a/arch/arm/mach-omap1/Makefile +++ b/arch/arm/mach-omap1/Makefile @@ -4,7 +4,7 @@ # Common support obj-y := io.o id.o sram-init.o sram.o time.o irq.o mux.o flash.o \ - serial.o devices.o dma.o + serial.o devices.o dma.o fb.o obj-y += clock.o clock_data.o opp_data.o reset.o pm_bus.o timer.o ifneq ($(CONFIG_SND_OMAP_SOC_MCBSP),) diff --git a/arch/arm/mach-omap1/fb.c b/arch/arm/mach-omap1/fb.c new file mode 100644 index 000000000000..c770d45c7226 --- /dev/null +++ b/arch/arm/mach-omap1/fb.c @@ -0,0 +1,80 @@ +/* + * File: arch/arm/plat-omap/fb.c + * + * Framebuffer device registration for TI OMAP platforms + * + * Copyright (C) 2006 Nokia Corporation + * Author: Imre Deak + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#if defined(CONFIG_FB_OMAP) || defined(CONFIG_FB_OMAP_MODULE) + +static bool omapfb_lcd_configured; +static struct omapfb_platform_data omapfb_config; + +static u64 omap_fb_dma_mask = ~(u32)0; + +static struct platform_device omap_fb_device = { + .name = "omapfb", + .id = -1, + .dev = { + .dma_mask = &omap_fb_dma_mask, + .coherent_dma_mask = DMA_BIT_MASK(32), + .platform_data = &omapfb_config, + }, + .num_resources = 0, +}; + +void __init omapfb_set_lcd_config(const struct omap_lcd_config *config) +{ + omapfb_config.lcd = *config; + omapfb_lcd_configured = true; +} + +static int __init omap_init_fb(void) +{ + /* + * If the board file has not set the lcd config with + * omapfb_set_lcd_config(), don't bother registering the omapfb device + */ + if (!omapfb_lcd_configured) + return 0; + + return platform_device_register(&omap_fb_device); +} + +arch_initcall(omap_init_fb); + +#else + +void __init omapfb_set_lcd_config(const struct omap_lcd_config *config) +{ +} + +#endif diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile index a8004f33b7e2..947cafe65aef 100644 --- a/arch/arm/mach-omap2/Makefile +++ b/arch/arm/mach-omap2/Makefile @@ -3,7 +3,7 @@ # # Common support -obj-y := id.o io.o control.o mux.o devices.o serial.o gpmc.o timer.o pm.o \ +obj-y := id.o io.o control.o mux.o devices.o fb.o serial.o gpmc.o timer.o pm.o \ common.o gpio.o dma.o wd_timer.o display.o i2c.o hdq1w.o omap_hwmod.o \ omap_device.o sram.o diff --git a/arch/arm/plat-omap/fb.c b/arch/arm/mach-omap2/fb.c similarity index 76% rename from arch/arm/plat-omap/fb.c rename to arch/arm/mach-omap2/fb.c index a3367b783fc7..d9bd965f6d07 100644 --- a/arch/arm/plat-omap/fb.c +++ b/arch/arm/mach-omap2/fb.c @@ -1,6 +1,4 @@ /* - * File: arch/arm/plat-omap/fb.c - * * Framebuffer device registration for TI OMAP platforms * * Copyright (C) 2006 Nokia Corporation @@ -33,7 +31,7 @@ #include -#include +#include "soc.h" #ifdef CONFIG_OMAP2_VRFB @@ -94,45 +92,7 @@ static int __init omap_init_vrfb(void) arch_initcall(omap_init_vrfb); #endif -#if defined(CONFIG_FB_OMAP) || defined(CONFIG_FB_OMAP_MODULE) - -static bool omapfb_lcd_configured; -static struct omapfb_platform_data omapfb_config; - -static u64 omap_fb_dma_mask = ~(u32)0; - -static struct platform_device omap_fb_device = { - .name = "omapfb", - .id = -1, - .dev = { - .dma_mask = &omap_fb_dma_mask, - .coherent_dma_mask = DMA_BIT_MASK(32), - .platform_data = &omapfb_config, - }, - .num_resources = 0, -}; - -void __init omapfb_set_lcd_config(const struct omap_lcd_config *config) -{ - omapfb_config.lcd = *config; - omapfb_lcd_configured = true; -} - -static int __init omap_init_fb(void) -{ - /* - * If the board file has not set the lcd config with - * omapfb_set_lcd_config(), don't bother registering the omapfb device - */ - if (!omapfb_lcd_configured) - return 0; - - return platform_device_register(&omap_fb_device); -} - -arch_initcall(omap_init_fb); - -#elif defined(CONFIG_FB_OMAP2) || defined(CONFIG_FB_OMAP2_MODULE) +#if defined(CONFIG_FB_OMAP2) || defined(CONFIG_FB_OMAP2_MODULE) static u64 omap_fb_dma_mask = ~(u32)0; static struct omapfb_platform_data omapfb_config; @@ -155,10 +115,4 @@ static int __init omap_init_fb(void) arch_initcall(omap_init_fb); -#else - -void __init omapfb_set_lcd_config(const struct omap_lcd_config *config) -{ -} - #endif diff --git a/arch/arm/plat-omap/Makefile b/arch/arm/plat-omap/Makefile index 8d885848600a..97fe69abdf5a 100644 --- a/arch/arm/plat-omap/Makefile +++ b/arch/arm/plat-omap/Makefile @@ -3,7 +3,7 @@ # # Common support -obj-y := sram.o dma.o fb.o counter_32k.o +obj-y := sram.o dma.o counter_32k.o obj-m := obj-n := obj- := From e8d3d47a98cd7184a86d58febc00b4ac47485332 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Sun, 16 Dec 2012 11:29:58 -0800 Subject: [PATCH 019/464] ARM: OMAP2+: Drop plat/cpu.h for omap2plus The cpu_is_omap macros are now local to arch/arm/mach-omap2 in soc.h and plat/cpu.h can finally be dropped for omap2+. Thanks everybody for help with fixing the drivers. Note that we can now also remove the unused plat/cpu.h from smartreflex.c and isp.c as they will cause compile errors with ARCH_MULTIPLATFORM enabled. Cc: Kevin Hilman Acked-by: Jean Pihet Acked-by: Laurent Pinchart Acked-by: Mauro Carvalho Chehab Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/drm.c | 1 - arch/arm/mach-omap2/dss-common.c | 3 +-- arch/arm/mach-omap2/prm2xxx.c | 3 +-- arch/arm/mach-omap2/prm3xxx.c | 3 +-- arch/arm/plat-omap/include/plat/cpu.h | 4 ---- drivers/media/platform/omap3isp/isp.c | 2 -- drivers/power/avs/smartreflex.c | 2 -- 7 files changed, 3 insertions(+), 15 deletions(-) diff --git a/arch/arm/mach-omap2/drm.c b/arch/arm/mach-omap2/drm.c index fce5aa3fff49..4c7566c7e24a 100644 --- a/arch/arm/mach-omap2/drm.c +++ b/arch/arm/mach-omap2/drm.c @@ -27,7 +27,6 @@ #include "omap_device.h" #include "omap_hwmod.h" -#include #if defined(CONFIG_DRM_OMAP) || (CONFIG_DRM_OMAP_MODULE) diff --git a/arch/arm/mach-omap2/dss-common.c b/arch/arm/mach-omap2/dss-common.c index 679a0478644f..4be5cfc81ab8 100644 --- a/arch/arm/mach-omap2/dss-common.c +++ b/arch/arm/mach-omap2/dss-common.c @@ -31,8 +31,7 @@ #include