#include <linux/selinux.h>
#include <linux/mutex.h>
+#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/scm.h>
#include <net/netlink.h>
struct netlink_callback *cb;
struct mutex *cb_mutex;
struct mutex cb_def_mutex;
- void (*data_ready)(struct sock *sk, int bytes);
+ void (*netlink_rcv)(struct sk_buff *skb);
struct module *module;
};
static inline struct netlink_sock *nlk_sk(struct sock *sk)
{
- return (struct netlink_sock *)sk;
+ return container_of(sk, struct netlink_sock, sk);
+}
+
+static inline int netlink_is_kernel(struct sock *sk)
+{
+ return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET;
}
struct nl_pid_hash {
static int netlink_dump(struct sock *sk);
static void netlink_destroy_callback(struct netlink_callback *cb);
-static void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb);
static DEFINE_RWLOCK(nl_table_lock);
static atomic_t nl_table_users = ATOMIC_INIT(0);
wake_up(&nl_table_wait);
}
-static __inline__ struct sock *netlink_lookup(int protocol, u32 pid)
+static __inline__ struct sock *netlink_lookup(struct net *net, int protocol, u32 pid)
{
struct nl_pid_hash *hash = &nl_table[protocol].hash;
struct hlist_head *head;
read_lock(&nl_table_lock);
head = nl_pid_hashfn(hash, pid);
sk_for_each(sk, node, head) {
- if (nlk_sk(sk)->pid == pid) {
+ if ((sk->sk_net == net) && (nlk_sk(sk)->pid == pid)) {
sock_hold(sk);
goto found;
}
* makes sure updates are visible before bind or setsockopt return. */
}
-static int netlink_insert(struct sock *sk, u32 pid)
+static int netlink_insert(struct sock *sk, struct net *net, u32 pid)
{
struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash;
struct hlist_head *head;
head = nl_pid_hashfn(hash, pid);
len = 0;
sk_for_each(osk, node, head) {
- if (nlk_sk(osk)->pid == pid)
+ if ((osk->sk_net == net) && (nlk_sk(osk)->pid == pid))
break;
len++;
}
.obj_size = sizeof(struct netlink_sock),
};
-static int __netlink_create(struct socket *sock, struct mutex *cb_mutex,
- int protocol)
+static int __netlink_create(struct net *net, struct socket *sock,
+ struct mutex *cb_mutex, int protocol)
{
struct sock *sk;
struct netlink_sock *nlk;
sock->ops = &netlink_ops;
- sk = sk_alloc(PF_NETLINK, GFP_KERNEL, &netlink_proto, 1);
+ sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto);
if (!sk)
return -ENOMEM;
return 0;
}
-static int netlink_create(struct socket *sock, int protocol)
+static int netlink_create(struct net *net, struct socket *sock, int protocol)
{
struct module *module = NULL;
struct mutex *cb_mutex;
cb_mutex = nl_table[protocol].cb_mutex;
netlink_unlock_table();
- if ((err = __netlink_create(sock, cb_mutex, protocol)) < 0)
+ if ((err = __netlink_create(net, sock, cb_mutex, protocol)) < 0)
goto out_module;
nlk = nlk_sk(sock->sk);
if (nlk->pid && !nlk->subscriptions) {
struct netlink_notify n = {
+ .net = sk->sk_net,
.protocol = sk->sk_protocol,
.pid = nlk->pid,
};
module_put(nlk->module);
netlink_table_grab();
- if (nlk->flags & NETLINK_KERNEL_SOCKET) {
+ if (netlink_is_kernel(sk)) {
kfree(nl_table[sk->sk_protocol].listeners);
nl_table[sk->sk_protocol].module = NULL;
nl_table[sk->sk_protocol].registered = 0;
static int netlink_autobind(struct socket *sock)
{
struct sock *sk = sock->sk;
+ struct net *net = sk->sk_net;
struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash;
struct hlist_head *head;
struct sock *osk;
netlink_table_grab();
head = nl_pid_hashfn(hash, pid);
sk_for_each(osk, node, head) {
+ if ((osk->sk_net != net))
+ continue;
if (nlk_sk(osk)->pid == pid) {
/* Bind collision, search negative pid values. */
pid = rover--;
}
netlink_table_ungrab();
- err = netlink_insert(sk, pid);
+ err = netlink_insert(sk, net, pid);
if (err == -EADDRINUSE)
goto retry;
static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
{
struct sock *sk = sock->sk;
+ struct net *net = sk->sk_net;
struct netlink_sock *nlk = nlk_sk(sk);
struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
int err;
return -EINVAL;
} else {
err = nladdr->nl_pid ?
- netlink_insert(sk, nladdr->nl_pid) :
+ netlink_insert(sk, net, nladdr->nl_pid) :
netlink_autobind(sock);
if (err)
return err;
static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid)
{
- int protocol = ssk->sk_protocol;
struct sock *sock;
struct netlink_sock *nlk;
- sock = netlink_lookup(protocol, pid);
+ sock = netlink_lookup(ssk->sk_net, ssk->sk_protocol, pid);
if (!sock)
return ERR_PTR(-ECONNREFUSED);
/* Don't bother queuing skb if kernel socket has no input function */
nlk = nlk_sk(sock);
- if ((nlk->pid == 0 && !nlk->data_ready) ||
- (sock->sk_state == NETLINK_CONNECTED &&
- nlk->dst_pid != nlk_sk(ssk)->pid)) {
+ if (sock->sk_state == NETLINK_CONNECTED &&
+ nlk->dst_pid != nlk_sk(ssk)->pid) {
sock_put(sock);
return ERR_PTR(-ECONNREFUSED);
}
* 1: repeat lookup - reference dropped while waiting for socket memory.
*/
int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock,
- long timeo, struct sock *ssk)
+ long *timeo, struct sock *ssk)
{
struct netlink_sock *nlk;
if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
test_bit(0, &nlk->state)) {
DECLARE_WAITQUEUE(wait, current);
- if (!timeo) {
- if (!ssk || nlk_sk(ssk)->pid == 0)
+ if (!*timeo) {
+ if (!ssk || netlink_is_kernel(ssk))
netlink_overrun(sk);
sock_put(sk);
kfree_skb(skb);
if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
test_bit(0, &nlk->state)) &&
!sock_flag(sk, SOCK_DEAD))
- timeo = schedule_timeout(timeo);
+ *timeo = schedule_timeout(*timeo);
__set_current_state(TASK_RUNNING);
remove_wait_queue(&nlk->wait, &wait);
if (signal_pending(current)) {
kfree_skb(skb);
- return sock_intr_errno(timeo);
+ return sock_intr_errno(*timeo);
}
return 1;
}
return 0;
}
-int netlink_sendskb(struct sock *sk, struct sk_buff *skb, int protocol)
+int netlink_sendskb(struct sock *sk, struct sk_buff *skb)
{
int len = skb->len;
return skb;
}
-int netlink_unicast(struct sock *ssk, struct sk_buff *skb, u32 pid, int nonblock)
+static inline void netlink_rcv_wake(struct sock *sk)
+{
+ struct netlink_sock *nlk = nlk_sk(sk);
+
+ if (skb_queue_empty(&sk->sk_receive_queue))
+ clear_bit(0, &nlk->state);
+ if (!test_bit(0, &nlk->state))
+ wake_up_interruptible(&nlk->wait);
+}
+
+static inline int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb)
+{
+ int ret;
+ struct netlink_sock *nlk = nlk_sk(sk);
+
+ ret = -ECONNREFUSED;
+ if (nlk->netlink_rcv != NULL) {
+ ret = skb->len;
+ skb_set_owner_r(skb, sk);
+ nlk->netlink_rcv(skb);
+ }
+ kfree_skb(skb);
+ sock_put(sk);
+ return ret;
+}
+
+int netlink_unicast(struct sock *ssk, struct sk_buff *skb,
+ u32 pid, int nonblock)
{
struct sock *sk;
int err;
kfree_skb(skb);
return PTR_ERR(sk);
}
- err = netlink_attachskb(sk, skb, nonblock, timeo, ssk);
+ if (netlink_is_kernel(sk))
+ return netlink_unicast_kernel(sk, skb);
+
+ err = netlink_attachskb(sk, skb, nonblock, &timeo, ssk);
if (err == 1)
goto retry;
if (err)
return err;
- return netlink_sendskb(sk, skb, ssk->sk_protocol);
+ return netlink_sendskb(sk, skb);
}
int netlink_has_listeners(struct sock *sk, unsigned int group)
int res = 0;
unsigned long *listeners;
- BUG_ON(!(nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET));
+ BUG_ON(!netlink_is_kernel(sk));
rcu_read_lock();
listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners);
struct netlink_broadcast_data {
struct sock *exclude_sk;
+ struct net *net;
u32 pid;
u32 group;
int failure;
!test_bit(p->group - 1, nlk->groups))
goto out;
+ if ((sk->sk_net != p->net))
+ goto out;
+
if (p->failure) {
netlink_overrun(sk);
goto out;
int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
u32 group, gfp_t allocation)
{
+ struct net *net = ssk->sk_net;
struct netlink_broadcast_data info;
struct hlist_node *node;
struct sock *sk;
skb = netlink_trim(skb, allocation);
info.exclude_sk = ssk;
+ info.net = net;
info.pid = pid;
info.group = group;
info.failure = 0;
if (sk == p->exclude_sk)
goto out;
+ if (sk->sk_net != p->exclude_sk->sk_net)
+ goto out;
+
if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups ||
!test_bit(p->group - 1, nlk->groups))
goto out;
put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info);
}
-static inline void netlink_rcv_wake(struct sock *sk)
-{
- struct netlink_sock *nlk = nlk_sk(sk);
-
- if (skb_queue_empty(&sk->sk_receive_queue))
- clear_bit(0, &nlk->state);
- if (!test_bit(0, &nlk->state))
- wake_up_interruptible(&nlk->wait);
-}
-
static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
struct msghdr *msg, size_t len)
{
static void netlink_data_ready(struct sock *sk, int len)
{
- struct netlink_sock *nlk = nlk_sk(sk);
-
- if (nlk->data_ready)
- nlk->data_ready(sk, len);
- netlink_rcv_wake(sk);
+ BUG();
}
/*
*/
struct sock *
-netlink_kernel_create(int unit, unsigned int groups,
- void (*input)(struct sock *sk, int len),
+netlink_kernel_create(struct net *net, int unit, unsigned int groups,
+ void (*input)(struct sk_buff *skb),
struct mutex *cb_mutex, struct module *module)
{
struct socket *sock;
if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
return NULL;
- if (__netlink_create(sock, cb_mutex, unit) < 0)
+ if (__netlink_create(net, sock, cb_mutex, unit) < 0)
goto out_sock_release;
if (groups < 32)
sk = sock->sk;
sk->sk_data_ready = netlink_data_ready;
if (input)
- nlk_sk(sk)->data_ready = input;
+ nlk_sk(sk)->netlink_rcv = input;
- if (netlink_insert(sk, 0))
+ if (netlink_insert(sk, net, 0))
goto out_sock_release;
nlk = nlk_sk(sk);
nlk->flags |= NETLINK_KERNEL_SOCKET;
netlink_table_grab();
- nl_table[unit].groups = groups;
- nl_table[unit].listeners = listeners;
- nl_table[unit].cb_mutex = cb_mutex;
- nl_table[unit].module = module;
- nl_table[unit].registered = 1;
+ if (!nl_table[unit].registered) {
+ nl_table[unit].groups = groups;
+ nl_table[unit].listeners = listeners;
+ nl_table[unit].cb_mutex = cb_mutex;
+ nl_table[unit].module = module;
+ nl_table[unit].registered = 1;
+ } else {
+ kfree(listeners);
+ }
netlink_table_ungrab();
return sk;
atomic_inc(&skb->users);
cb->skb = skb;
- sk = netlink_lookup(ssk->sk_protocol, NETLINK_CB(skb).pid);
+ sk = netlink_lookup(ssk->sk_net, ssk->sk_protocol, NETLINK_CB(skb).pid);
if (sk == NULL) {
netlink_destroy_callback(cb);
return -ECONNREFUSED;
sock_put(sk);
/* We successfully started a dump, by returning -EINTR we
- * signal the queue mangement to interrupt processing of
- * any netlink messages so userspace gets a chance to read
- * the results. */
+ * signal not to send ACK even if it was requested.
+ */
return -EINTR;
}
if (!skb) {
struct sock *sk;
- sk = netlink_lookup(in_skb->sk->sk_protocol,
+ sk = netlink_lookup(in_skb->sk->sk_net,
+ in_skb->sk->sk_protocol,
NETLINK_CB(in_skb).pid);
if (sk) {
sk->sk_err = ENOBUFS;
netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
}
-static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
+int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
struct nlmsghdr *))
{
struct nlmsghdr *nlh;
int err;
while (skb->len >= nlmsg_total_size(0)) {
+ int msglen;
+
nlh = nlmsg_hdr(skb);
err = 0;
/* Only requests are handled by the kernel */
if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
- goto skip;
+ goto ack;
/* Skip control messages */
if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
- goto skip;
+ goto ack;
err = cb(skb, nlh);
- if (err == -EINTR) {
- /* Not an error, but we interrupt processing */
- netlink_queue_skip(nlh, skb);
- return err;
- }
-skip:
+ if (err == -EINTR)
+ goto skip;
+
+ack:
if (nlh->nlmsg_flags & NLM_F_ACK || err)
netlink_ack(skb, nlh, err);
- netlink_queue_skip(nlh, skb);
+skip:
+ msglen = NLMSG_ALIGN(nlh->nlmsg_len);
+ if (msglen > skb->len)
+ msglen = skb->len;
+ skb_pull(skb, msglen);
}
return 0;
}
-/**
- * nelink_run_queue - Process netlink receive queue.
- * @sk: Netlink socket containing the queue
- * @qlen: Place to store queue length upon entry
- * @cb: Callback function invoked for each netlink message found
- *
- * Processes as much as there was in the queue upon entry and invokes
- * a callback function for each netlink message found. The callback
- * function may refuse a message by returning a negative error code
- * but setting the error pointer to 0 in which case this function
- * returns with a qlen != 0.
- *
- * qlen must be initialized to 0 before the initial entry, afterwards
- * the function may be called repeatedly until qlen reaches 0.
- *
- * The callback function may return -EINTR to signal that processing
- * of netlink messages shall be interrupted. In this case the message
- * currently being processed will NOT be requeued onto the receive
- * queue.
- */
-void netlink_run_queue(struct sock *sk, unsigned int *qlen,
- int (*cb)(struct sk_buff *, struct nlmsghdr *))
-{
- struct sk_buff *skb;
-
- if (!*qlen || *qlen > skb_queue_len(&sk->sk_receive_queue))
- *qlen = skb_queue_len(&sk->sk_receive_queue);
-
- for (; *qlen; (*qlen)--) {
- skb = skb_dequeue(&sk->sk_receive_queue);
- if (netlink_rcv_skb(skb, cb)) {
- if (skb->len)
- skb_queue_head(&sk->sk_receive_queue, skb);
- else {
- kfree_skb(skb);
- (*qlen)--;
- }
- break;
- }
-
- kfree_skb(skb);
- }
-}
-
-/**
- * netlink_queue_skip - Skip netlink message while processing queue.
- * @nlh: Netlink message to be skipped
- * @skb: Socket buffer containing the netlink messages.
- *
- * Pulls the given netlink message off the socket buffer so the next
- * call to netlink_queue_run() will not reconsider the message.
- */
-static void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb)
-{
- int msglen = NLMSG_ALIGN(nlh->nlmsg_len);
-
- if (msglen > skb->len)
- msglen = skb->len;
-
- skb_pull(skb, msglen);
-}
-
/**
* nlmsg_notify - send a notification netlink message
* @sk: netlink socket to use
#ifdef CONFIG_PROC_FS
struct nl_seq_iter {
+ struct net *net;
int link;
int hash_idx;
};
for (j = 0; j <= hash->mask; j++) {
sk_for_each(s, node, &hash->table[j]) {
+ if (iter->net != s->sk_net)
+ continue;
if (off == pos) {
iter->link = i;
iter->hash_idx = j;
if (v == SEQ_START_TOKEN)
return netlink_seq_socket_idx(seq, 0);
- s = sk_next(v);
+ iter = seq->private;
+ s = v;
+ do {
+ s = sk_next(s);
+ } while (s && (iter->net != s->sk_net));
if (s)
return s;
- iter = seq->private;
i = iter->link;
j = iter->hash_idx + 1;
for (; j <= hash->mask; j++) {
s = sk_head(&hash->table[j]);
+ while (s && (iter->net != s->sk_net))
+ s = sk_next(s);
if (s) {
iter->link = i;
iter->hash_idx = j;
static int netlink_seq_open(struct inode *inode, struct file *file)
{
- struct seq_file *seq;
struct nl_seq_iter *iter;
- int err;
- iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+ iter = __seq_open_private(file, &netlink_seq_ops, sizeof(*iter));
if (!iter)
return -ENOMEM;
- err = seq_open(file, &netlink_seq_ops);
- if (err) {
- kfree(iter);
- return err;
+ iter->net = get_proc_net(inode);
+ if (!iter->net) {
+ seq_release_private(inode, file);
+ return -ENXIO;
}
- seq = file->private_data;
- seq->private = iter;
return 0;
}
+static int netlink_seq_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq = file->private_data;
+ struct nl_seq_iter *iter = seq->private;
+ put_net(iter->net);
+ return seq_release_private(inode, file);
+}
+
static const struct file_operations netlink_seq_fops = {
.owner = THIS_MODULE,
.open = netlink_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release_private,
+ .release = netlink_seq_release,
};
#endif
.owner = THIS_MODULE, /* for consistency 8) */
};
+static int __net_init netlink_net_init(struct net *net)
+{
+#ifdef CONFIG_PROC_FS
+ if (!proc_net_fops_create(net, "netlink", 0, &netlink_seq_fops))
+ return -ENOMEM;
+#endif
+ return 0;
+}
+
+static void __net_exit netlink_net_exit(struct net *net)
+{
+#ifdef CONFIG_PROC_FS
+ proc_net_remove(net, "netlink");
+#endif
+}
+
+static struct pernet_operations netlink_net_ops = {
+ .init = netlink_net_init,
+ .exit = netlink_net_exit,
+};
+
static int __init netlink_proto_init(void)
{
struct sk_buff *dummy_skb;
int i;
- unsigned long max;
+ unsigned long limit;
unsigned int order;
int err = proto_register(&netlink_proto, 0);
goto panic;
if (num_physpages >= (128 * 1024))
- max = num_physpages >> (21 - PAGE_SHIFT);
+ limit = num_physpages >> (21 - PAGE_SHIFT);
else
- max = num_physpages >> (23 - PAGE_SHIFT);
+ limit = num_physpages >> (23 - PAGE_SHIFT);
- order = get_bitmask_order(max) - 1 + PAGE_SHIFT;
- max = (1UL << order) / sizeof(struct hlist_head);
- order = get_bitmask_order(max > UINT_MAX ? UINT_MAX : max) - 1;
+ order = get_bitmask_order(limit) - 1 + PAGE_SHIFT;
+ limit = (1UL << order) / sizeof(struct hlist_head);
+ order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1;
for (i = 0; i < MAX_LINKS; i++) {
struct nl_pid_hash *hash = &nl_table[i].hash;
}
sock_register(&netlink_family_ops);
-#ifdef CONFIG_PROC_FS
- proc_net_fops_create("netlink", 0, &netlink_seq_fops);
-#endif
+ register_pernet_subsys(&netlink_net_ops);
/* The netlink device handler may be needed early. */
rtnetlink_init();
out:
core_initcall(netlink_proto_init);
EXPORT_SYMBOL(netlink_ack);
-EXPORT_SYMBOL(netlink_run_queue);
+EXPORT_SYMBOL(netlink_rcv_skb);
EXPORT_SYMBOL(netlink_broadcast);
EXPORT_SYMBOL(netlink_dump_start);
EXPORT_SYMBOL(netlink_kernel_create);