source: src/linux/universal/linux-3.18/virt/kvm/eventfd.c @ 31885

Last change on this file since 31885 was 31885, checked in by brainslayer, 3 months ago

update

File size: 22.7 KB
Line 
1/*
2 * kvm eventfd support - use eventfd objects to signal various KVM events
3 *
4 * Copyright 2009 Novell.  All Rights Reserved.
5 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
6 *
7 * Author:
8 *      Gregory Haskins <ghaskins@novell.com>
9 *
10 * This file is free software; you can redistribute it and/or modify
11 * it under the terms of version 2 of the GNU General Public License
12 * as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software Foundation,
21 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
22 */
23
24#include <linux/kvm_host.h>
25#include <linux/kvm.h>
26#include <linux/workqueue.h>
27#include <linux/syscalls.h>
28#include <linux/wait.h>
29#include <linux/poll.h>
30#include <linux/file.h>
31#include <linux/list.h>
32#include <linux/eventfd.h>
33#include <linux/kernel.h>
34#include <linux/srcu.h>
35#include <linux/slab.h>
36#include <linux/seqlock.h>
37#include <trace/events/kvm.h>
38
39#ifdef __KVM_HAVE_IOAPIC
40#include "ioapic.h"
41#endif
42#include "iodev.h"
43
44#ifdef CONFIG_HAVE_KVM_IRQFD
45/*
46 * --------------------------------------------------------------------
47 * irqfd: Allows an fd to be used to inject an interrupt to the guest
48 *
49 * Credit goes to Avi Kivity for the original idea.
50 * --------------------------------------------------------------------
51 */
52
53/*
54 * Resampling irqfds are a special variety of irqfds used to emulate
55 * level triggered interrupts.  The interrupt is asserted on eventfd
56 * trigger.  On acknowledgement through the irq ack notifier, the
57 * interrupt is de-asserted and userspace is notified through the
58 * resamplefd.  All resamplers on the same gsi are de-asserted
59 * together, so we don't need to track the state of each individual
60 * user.  We can also therefore share the same irq source ID.
61 */
62struct _irqfd_resampler {
63        struct kvm *kvm;
64        /*
65         * List of resampling struct _irqfd objects sharing this gsi.
66         * RCU list modified under kvm->irqfds.resampler_lock
67         */
68        struct list_head list;
69        struct kvm_irq_ack_notifier notifier;
70        /*
71         * Entry in list of kvm->irqfd.resampler_list.  Use for sharing
72         * resamplers among irqfds on the same gsi.
73         * Accessed and modified under kvm->irqfds.resampler_lock
74         */
75        struct list_head link;
76};
77
78struct _irqfd {
79        /* Used for MSI fast-path */
80        struct kvm *kvm;
81        wait_queue_t wait;
82        /* Update side is protected by irqfds.lock */
83        struct kvm_kernel_irq_routing_entry irq_entry;
84        seqcount_t irq_entry_sc;
85        /* Used for level IRQ fast-path */
86        int gsi;
87        struct work_struct inject;
88        /* The resampler used by this irqfd (resampler-only) */
89        struct _irqfd_resampler *resampler;
90        /* Eventfd notified on resample (resampler-only) */
91        struct eventfd_ctx *resamplefd;
92        /* Entry in list of irqfds for a resampler (resampler-only) */
93        struct list_head resampler_link;
94        /* Used for setup/shutdown */
95        struct eventfd_ctx *eventfd;
96        struct list_head list;
97        poll_table pt;
98        struct work_struct shutdown;
99};
100
101static struct workqueue_struct *irqfd_cleanup_wq;
102
103static void
104irqfd_inject(struct work_struct *work)
105{
106        struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
107        struct kvm *kvm = irqfd->kvm;
108
109        if (!irqfd->resampler) {
110                kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1,
111                                false);
112                kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0,
113                                false);
114        } else
115                kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
116                            irqfd->gsi, 1, false);
117}
118
119/*
120 * Since resampler irqfds share an IRQ source ID, we de-assert once
121 * then notify all of the resampler irqfds using this GSI.  We can't
122 * do multiple de-asserts or we risk racing with incoming re-asserts.
123 */
124static void
125irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
126{
127        struct _irqfd_resampler *resampler;
128        struct kvm *kvm;
129        struct _irqfd *irqfd;
130        int idx;
131
132        resampler = container_of(kian, struct _irqfd_resampler, notifier);
133        kvm = resampler->kvm;
134
135        kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
136                    resampler->notifier.gsi, 0, false);
137
138        idx = srcu_read_lock(&kvm->irq_srcu);
139
140        list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link)
141                eventfd_signal(irqfd->resamplefd, 1);
142
143        srcu_read_unlock(&kvm->irq_srcu, idx);
144}
145
146static void
147irqfd_resampler_shutdown(struct _irqfd *irqfd)
148{
149        struct _irqfd_resampler *resampler = irqfd->resampler;
150        struct kvm *kvm = resampler->kvm;
151
152        mutex_lock(&kvm->irqfds.resampler_lock);
153
154        list_del_rcu(&irqfd->resampler_link);
155        synchronize_srcu(&kvm->irq_srcu);
156
157        if (list_empty(&resampler->list)) {
158                list_del(&resampler->link);
159                kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier);
160                kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
161                            resampler->notifier.gsi, 0, false);
162                kfree(resampler);
163        }
164
165        mutex_unlock(&kvm->irqfds.resampler_lock);
166}
167
168/*
169 * Race-free decouple logic (ordering is critical)
170 */
171static void
172irqfd_shutdown(struct work_struct *work)
173{
174        struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown);
175        u64 cnt;
176
177        /*
178         * Synchronize with the wait-queue and unhook ourselves to prevent
179         * further events.
180         */
181        eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt);
182
183        /*
184         * We know no new events will be scheduled at this point, so block
185         * until all previously outstanding events have completed
186         */
187        flush_work(&irqfd->inject);
188
189        if (irqfd->resampler) {
190                irqfd_resampler_shutdown(irqfd);
191                eventfd_ctx_put(irqfd->resamplefd);
192        }
193
194        /*
195         * It is now safe to release the object's resources
196         */
197        eventfd_ctx_put(irqfd->eventfd);
198        kfree(irqfd);
199}
200
201
202/* assumes kvm->irqfds.lock is held */
203static bool
204irqfd_is_active(struct _irqfd *irqfd)
205{
206        return list_empty(&irqfd->list) ? false : true;
207}
208
209/*
210 * Mark the irqfd as inactive and schedule it for removal
211 *
212 * assumes kvm->irqfds.lock is held
213 */
214static void
215irqfd_deactivate(struct _irqfd *irqfd)
216{
217        BUG_ON(!irqfd_is_active(irqfd));
218
219        list_del_init(&irqfd->list);
220
221        queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
222}
223
224/*
225 * Called with wqh->lock held and interrupts disabled
226 */
227static int
228irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
229{
230        struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
231        unsigned long flags = (unsigned long)key;
232        struct kvm_kernel_irq_routing_entry irq;
233        struct kvm *kvm = irqfd->kvm;
234        unsigned seq;
235        int idx;
236
237        if (flags & POLLIN) {
238                idx = srcu_read_lock(&kvm->irq_srcu);
239                do {
240                        seq = read_seqcount_begin(&irqfd->irq_entry_sc);
241                        irq = irqfd->irq_entry;
242                } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq));
243                /* An event has been signaled, inject an interrupt */
244                if (irq.type == KVM_IRQ_ROUTING_MSI)
245                        kvm_set_msi(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1,
246                                        false);
247                else
248                        schedule_work(&irqfd->inject);
249                srcu_read_unlock(&kvm->irq_srcu, idx);
250        }
251
252        if (flags & POLLHUP) {
253                /* The eventfd is closing, detach from KVM */
254                unsigned long flags;
255
256                spin_lock_irqsave(&kvm->irqfds.lock, flags);
257
258                /*
259                 * We must check if someone deactivated the irqfd before
260                 * we could acquire the irqfds.lock since the item is
261                 * deactivated from the KVM side before it is unhooked from
262                 * the wait-queue.  If it is already deactivated, we can
263                 * simply return knowing the other side will cleanup for us.
264                 * We cannot race against the irqfd going away since the
265                 * other side is required to acquire wqh->lock, which we hold
266                 */
267                if (irqfd_is_active(irqfd))
268                        irqfd_deactivate(irqfd);
269
270                spin_unlock_irqrestore(&kvm->irqfds.lock, flags);
271        }
272
273        return 0;
274}
275
276static void
277irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
278                        poll_table *pt)
279{
280        struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt);
281        add_wait_queue(wqh, &irqfd->wait);
282}
283
284/* Must be called under irqfds.lock */
285static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd)
286{
287        struct kvm_kernel_irq_routing_entry *e;
288        struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
289        int i, n_entries;
290
291        n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi);
292
293        write_seqcount_begin(&irqfd->irq_entry_sc);
294
295        irqfd->irq_entry.type = 0;
296
297        e = entries;
298        for (i = 0; i < n_entries; ++i, ++e) {
299                /* Only fast-path MSI. */
300                if (e->type == KVM_IRQ_ROUTING_MSI)
301                        irqfd->irq_entry = *e;
302        }
303
304        write_seqcount_end(&irqfd->irq_entry_sc);
305}
306
307static int
308kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
309{
310        struct _irqfd *irqfd, *tmp;
311        struct fd f;
312        struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
313        int ret;
314        unsigned int events;
315        int idx;
316
317        irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
318        if (!irqfd)
319                return -ENOMEM;
320
321        irqfd->kvm = kvm;
322        irqfd->gsi = args->gsi;
323        INIT_LIST_HEAD(&irqfd->list);
324        INIT_WORK(&irqfd->inject, irqfd_inject);
325        INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
326        seqcount_init(&irqfd->irq_entry_sc);
327
328        f = fdget(args->fd);
329        if (!f.file) {
330                ret = -EBADF;
331                goto out;
332        }
333
334        eventfd = eventfd_ctx_fileget(f.file);
335        if (IS_ERR(eventfd)) {
336                ret = PTR_ERR(eventfd);
337                goto fail;
338        }
339
340        irqfd->eventfd = eventfd;
341
342        if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) {
343                struct _irqfd_resampler *resampler;
344
345                resamplefd = eventfd_ctx_fdget(args->resamplefd);
346                if (IS_ERR(resamplefd)) {
347                        ret = PTR_ERR(resamplefd);
348                        goto fail;
349                }
350
351                irqfd->resamplefd = resamplefd;
352                INIT_LIST_HEAD(&irqfd->resampler_link);
353
354                mutex_lock(&kvm->irqfds.resampler_lock);
355
356                list_for_each_entry(resampler,
357                                    &kvm->irqfds.resampler_list, link) {
358                        if (resampler->notifier.gsi == irqfd->gsi) {
359                                irqfd->resampler = resampler;
360                                break;
361                        }
362                }
363
364                if (!irqfd->resampler) {
365                        resampler = kzalloc(sizeof(*resampler), GFP_KERNEL);
366                        if (!resampler) {
367                                ret = -ENOMEM;
368                                mutex_unlock(&kvm->irqfds.resampler_lock);
369                                goto fail;
370                        }
371
372                        resampler->kvm = kvm;
373                        INIT_LIST_HEAD(&resampler->list);
374                        resampler->notifier.gsi = irqfd->gsi;
375                        resampler->notifier.irq_acked = irqfd_resampler_ack;
376                        INIT_LIST_HEAD(&resampler->link);
377
378                        list_add(&resampler->link, &kvm->irqfds.resampler_list);
379                        kvm_register_irq_ack_notifier(kvm,
380                                                      &resampler->notifier);
381                        irqfd->resampler = resampler;
382                }
383
384                list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list);
385                synchronize_srcu(&kvm->irq_srcu);
386
387                mutex_unlock(&kvm->irqfds.resampler_lock);
388        }
389
390        /*
391         * Install our own custom wake-up handling so we are notified via
392         * a callback whenever someone signals the underlying eventfd
393         */
394        init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
395        init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc);
396
397        spin_lock_irq(&kvm->irqfds.lock);
398
399        ret = 0;
400        list_for_each_entry(tmp, &kvm->irqfds.items, list) {
401                if (irqfd->eventfd != tmp->eventfd)
402                        continue;
403                /* This fd is used for another irq already. */
404                ret = -EBUSY;
405                spin_unlock_irq(&kvm->irqfds.lock);
406                goto fail;
407        }
408
409        idx = srcu_read_lock(&kvm->irq_srcu);
410        irqfd_update(kvm, irqfd);
411        srcu_read_unlock(&kvm->irq_srcu, idx);
412
413        list_add_tail(&irqfd->list, &kvm->irqfds.items);
414
415        spin_unlock_irq(&kvm->irqfds.lock);
416
417        /*
418         * Check if there was an event already pending on the eventfd
419         * before we registered, and trigger it as if we didn't miss it.
420         */
421        events = f.file->f_op->poll(f.file, &irqfd->pt);
422
423        if (events & POLLIN)
424                schedule_work(&irqfd->inject);
425
426        /*
427         * do not drop the file until the irqfd is fully initialized, otherwise
428         * we might race against the POLLHUP
429         */
430        fdput(f);
431
432        return 0;
433
434fail:
435        if (irqfd->resampler)
436                irqfd_resampler_shutdown(irqfd);
437
438        if (resamplefd && !IS_ERR(resamplefd))
439                eventfd_ctx_put(resamplefd);
440
441        if (eventfd && !IS_ERR(eventfd))
442                eventfd_ctx_put(eventfd);
443
444        fdput(f);
445
446out:
447        kfree(irqfd);
448        return ret;
449}
450
451bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
452{
453        struct kvm_irq_ack_notifier *kian;
454        int gsi, idx;
455
456        idx = srcu_read_lock(&kvm->irq_srcu);
457        gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
458        if (gsi != -1)
459                hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
460                                         link)
461                        if (kian->gsi == gsi) {
462                                srcu_read_unlock(&kvm->irq_srcu, idx);
463                                return true;
464                        }
465
466        srcu_read_unlock(&kvm->irq_srcu, idx);
467
468        return false;
469}
470EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
471
472void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
473{
474        struct kvm_irq_ack_notifier *kian;
475        int gsi, idx;
476
477        trace_kvm_ack_irq(irqchip, pin);
478
479        idx = srcu_read_lock(&kvm->irq_srcu);
480        gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
481        if (gsi != -1)
482                hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
483                                         link)
484                        if (kian->gsi == gsi)
485                                kian->irq_acked(kian);
486        srcu_read_unlock(&kvm->irq_srcu, idx);
487}
488
489void kvm_register_irq_ack_notifier(struct kvm *kvm,
490                                   struct kvm_irq_ack_notifier *kian)
491{
492        mutex_lock(&kvm->irq_lock);
493        hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
494        mutex_unlock(&kvm->irq_lock);
495#ifdef __KVM_HAVE_IOAPIC
496        kvm_vcpu_request_scan_ioapic(kvm);
497#endif
498}
499
500void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
501                                    struct kvm_irq_ack_notifier *kian)
502{
503        mutex_lock(&kvm->irq_lock);
504        hlist_del_init_rcu(&kian->link);
505        mutex_unlock(&kvm->irq_lock);
506        synchronize_srcu(&kvm->irq_srcu);
507#ifdef __KVM_HAVE_IOAPIC
508        kvm_vcpu_request_scan_ioapic(kvm);
509#endif
510}
511#endif
512
513void
514kvm_eventfd_init(struct kvm *kvm)
515{
516#ifdef CONFIG_HAVE_KVM_IRQFD
517        spin_lock_init(&kvm->irqfds.lock);
518        INIT_LIST_HEAD(&kvm->irqfds.items);
519        INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
520        mutex_init(&kvm->irqfds.resampler_lock);
521#endif
522        INIT_LIST_HEAD(&kvm->ioeventfds);
523}
524
525#ifdef CONFIG_HAVE_KVM_IRQFD
526/*
527 * shutdown any irqfd's that match fd+gsi
528 */
529static int
530kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
531{
532        struct _irqfd *irqfd, *tmp;
533        struct eventfd_ctx *eventfd;
534
535        eventfd = eventfd_ctx_fdget(args->fd);
536        if (IS_ERR(eventfd))
537                return PTR_ERR(eventfd);
538
539        spin_lock_irq(&kvm->irqfds.lock);
540
541        list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
542                if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) {
543                        /*
544                         * This clearing of irq_entry.type is needed for when
545                         * another thread calls kvm_irq_routing_update before
546                         * we flush workqueue below (we synchronize with
547                         * kvm_irq_routing_update using irqfds.lock).
548                         */
549                        write_seqcount_begin(&irqfd->irq_entry_sc);
550                        irqfd->irq_entry.type = 0;
551                        write_seqcount_end(&irqfd->irq_entry_sc);
552                        irqfd_deactivate(irqfd);
553                }
554        }
555
556        spin_unlock_irq(&kvm->irqfds.lock);
557        eventfd_ctx_put(eventfd);
558
559        /*
560         * Block until we know all outstanding shutdown jobs have completed
561         * so that we guarantee there will not be any more interrupts on this
562         * gsi once this deassign function returns.
563         */
564        flush_workqueue(irqfd_cleanup_wq);
565
566        return 0;
567}
568
569int
570kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
571{
572        if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE))
573                return -EINVAL;
574
575        if (args->flags & KVM_IRQFD_FLAG_DEASSIGN)
576                return kvm_irqfd_deassign(kvm, args);
577
578        return kvm_irqfd_assign(kvm, args);
579}
580
581/*
582 * This function is called as the kvm VM fd is being released. Shutdown all
583 * irqfds that still remain open
584 */
585void
586kvm_irqfd_release(struct kvm *kvm)
587{
588        struct _irqfd *irqfd, *tmp;
589
590        spin_lock_irq(&kvm->irqfds.lock);
591
592        list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list)
593                irqfd_deactivate(irqfd);
594
595        spin_unlock_irq(&kvm->irqfds.lock);
596
597        /*
598         * Block until we know all outstanding shutdown jobs have completed
599         * since we do not take a kvm* reference.
600         */
601        flush_workqueue(irqfd_cleanup_wq);
602
603}
604
605/*
606 * Take note of a change in irq routing.
607 * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards.
608 */
609void kvm_irq_routing_update(struct kvm *kvm)
610{
611        struct _irqfd *irqfd;
612
613        spin_lock_irq(&kvm->irqfds.lock);
614
615        list_for_each_entry(irqfd, &kvm->irqfds.items, list)
616                irqfd_update(kvm, irqfd);
617
618        spin_unlock_irq(&kvm->irqfds.lock);
619}
620
621/*
622 * create a host-wide workqueue for issuing deferred shutdown requests
623 * aggregated from all vm* instances. We need our own isolated single-thread
624 * queue to prevent deadlock against flushing the normal work-queue.
625 */
626int kvm_irqfd_init(void)
627{
628        irqfd_cleanup_wq = create_singlethread_workqueue("kvm-irqfd-cleanup");
629        if (!irqfd_cleanup_wq)
630                return -ENOMEM;
631
632        return 0;
633}
634
635void kvm_irqfd_exit(void)
636{
637        destroy_workqueue(irqfd_cleanup_wq);
638}
639#endif
640
641/*
642 * --------------------------------------------------------------------
643 * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal.
644 *
645 * userspace can register a PIO/MMIO address with an eventfd for receiving
646 * notification when the memory has been touched.
647 * --------------------------------------------------------------------
648 */
649
650struct _ioeventfd {
651        struct list_head     list;
652        u64                  addr;
653        int                  length;
654        struct eventfd_ctx  *eventfd;
655        u64                  datamatch;
656        struct kvm_io_device dev;
657        u8                   bus_idx;
658        bool                 wildcard;
659};
660
661static inline struct _ioeventfd *
662to_ioeventfd(struct kvm_io_device *dev)
663{
664        return container_of(dev, struct _ioeventfd, dev);
665}
666
667static void
668ioeventfd_release(struct _ioeventfd *p)
669{
670        eventfd_ctx_put(p->eventfd);
671        list_del(&p->list);
672        kfree(p);
673}
674
675static bool
676ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
677{
678        u64 _val;
679
680        if (addr != p->addr)
681                /* address must be precise for a hit */
682                return false;
683
684        if (!p->length)
685                /* length = 0 means only look at the address, so always a hit */
686                return true;
687
688        if (len != p->length)
689                /* address-range must be precise for a hit */
690                return false;
691
692        if (p->wildcard)
693                /* all else equal, wildcard is always a hit */
694                return true;
695
696        /* otherwise, we have to actually compare the data */
697
698        BUG_ON(!IS_ALIGNED((unsigned long)val, len));
699
700        switch (len) {
701        case 1:
702                _val = *(u8 *)val;
703                break;
704        case 2:
705                _val = *(u16 *)val;
706                break;
707        case 4:
708                _val = *(u32 *)val;
709                break;
710        case 8:
711                _val = *(u64 *)val;
712                break;
713        default:
714                return false;
715        }
716
717        return _val == p->datamatch ? true : false;
718}
719
720/* MMIO/PIO writes trigger an event if the addr/val match */
721static int
722ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len,
723                const void *val)
724{
725        struct _ioeventfd *p = to_ioeventfd(this);
726
727        if (!ioeventfd_in_range(p, addr, len, val))
728                return -EOPNOTSUPP;
729
730        eventfd_signal(p->eventfd, 1);
731        return 0;
732}
733
734/*
735 * This function is called as KVM is completely shutting down.  We do not
736 * need to worry about locking just nuke anything we have as quickly as possible
737 */
738static void
739ioeventfd_destructor(struct kvm_io_device *this)
740{
741        struct _ioeventfd *p = to_ioeventfd(this);
742
743        ioeventfd_release(p);
744}
745
746static const struct kvm_io_device_ops ioeventfd_ops = {
747        .write      = ioeventfd_write,
748        .destructor = ioeventfd_destructor,
749};
750
751/* assumes kvm->slots_lock held */
752static bool
753ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p)
754{
755        struct _ioeventfd *_p;
756
757        list_for_each_entry(_p, &kvm->ioeventfds, list)
758                if (_p->bus_idx == p->bus_idx &&
759                    _p->addr == p->addr &&
760                    (!_p->length || !p->length ||
761                     (_p->length == p->length &&
762                      (_p->wildcard || p->wildcard ||
763                       _p->datamatch == p->datamatch))))
764                        return true;
765
766        return false;
767}
768
769static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags)
770{
771        if (flags & KVM_IOEVENTFD_FLAG_PIO)
772                return KVM_PIO_BUS;
773        if (flags & KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY)
774                return KVM_VIRTIO_CCW_NOTIFY_BUS;
775        return KVM_MMIO_BUS;
776}
777
778static int kvm_assign_ioeventfd_idx(struct kvm *kvm,
779                                enum kvm_bus bus_idx,
780                                struct kvm_ioeventfd *args)
781{
782
783        struct eventfd_ctx *eventfd;
784        struct _ioeventfd *p;
785        int ret;
786
787        eventfd = eventfd_ctx_fdget(args->fd);
788        if (IS_ERR(eventfd))
789                return PTR_ERR(eventfd);
790
791        p = kzalloc(sizeof(*p), GFP_KERNEL);
792        if (!p) {
793                ret = -ENOMEM;
794                goto fail;
795        }
796
797        INIT_LIST_HEAD(&p->list);
798        p->addr    = args->addr;
799        p->bus_idx = bus_idx;
800        p->length  = args->len;
801        p->eventfd = eventfd;
802
803        /* The datamatch feature is optional, otherwise this is a wildcard */
804        if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)
805                p->datamatch = args->datamatch;
806        else
807                p->wildcard = true;
808
809        mutex_lock(&kvm->slots_lock);
810
811        /* Verify that there isn't a match already */
812        if (ioeventfd_check_collision(kvm, p)) {
813                ret = -EEXIST;
814                goto unlock_fail;
815        }
816
817        kvm_iodevice_init(&p->dev, &ioeventfd_ops);
818
819        ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length,
820                                      &p->dev);
821        if (ret < 0)
822                goto unlock_fail;
823
824        kvm->buses[bus_idx]->ioeventfd_count++;
825        list_add_tail(&p->list, &kvm->ioeventfds);
826
827        mutex_unlock(&kvm->slots_lock);
828
829        return 0;
830
831unlock_fail:
832        mutex_unlock(&kvm->slots_lock);
833
834fail:
835        kfree(p);
836        eventfd_ctx_put(eventfd);
837
838        return ret;
839}
840
841static int
842kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
843                           struct kvm_ioeventfd *args)
844{
845        struct _ioeventfd        *p, *tmp;
846        struct eventfd_ctx       *eventfd;
847        int                       ret = -ENOENT;
848
849        eventfd = eventfd_ctx_fdget(args->fd);
850        if (IS_ERR(eventfd))
851                return PTR_ERR(eventfd);
852
853        mutex_lock(&kvm->slots_lock);
854
855        list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) {
856                bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);
857
858                if (p->bus_idx != bus_idx ||
859                    p->eventfd != eventfd  ||
860                    p->addr != args->addr  ||
861                    p->length != args->len ||
862                    p->wildcard != wildcard)
863                        continue;
864
865                if (!p->wildcard && p->datamatch != args->datamatch)
866                        continue;
867
868                kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
869                if (kvm->buses[bus_idx])
870                        kvm->buses[bus_idx]->ioeventfd_count--;
871                ioeventfd_release(p);
872                ret = 0;
873                break;
874        }
875
876        mutex_unlock(&kvm->slots_lock);
877
878        eventfd_ctx_put(eventfd);
879
880        return ret;
881}
882
883static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
884{
885        enum kvm_bus bus_idx = ioeventfd_bus_from_flags(args->flags);
886        int ret = kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
887
888        if (!args->len && bus_idx == KVM_MMIO_BUS)
889                kvm_deassign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);
890
891        return ret;
892}
893
894static int
895kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
896{
897        enum kvm_bus              bus_idx;
898        int ret;
899
900        bus_idx = ioeventfd_bus_from_flags(args->flags);
901        /* must be natural-word sized, or 0 to ignore length */
902        switch (args->len) {
903        case 0:
904        case 1:
905        case 2:
906        case 4:
907        case 8:
908                break;
909        default:
910                return -EINVAL;
911        }
912
913        /* check for range overflow */
914        if (args->addr + args->len < args->addr)
915                return -EINVAL;
916
917        /* check for extra flags that we don't understand */
918        if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
919                return -EINVAL;
920
921        /* ioeventfd with no length can't be combined with DATAMATCH */
922        if (!args->len &&
923            args->flags & (KVM_IOEVENTFD_FLAG_PIO |
924                           KVM_IOEVENTFD_FLAG_DATAMATCH))
925                return -EINVAL;
926
927        ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args);
928        if (ret)
929                goto fail;
930
931        /* When length is ignored, MMIO is also put on a separate bus, for
932         * faster lookups.
933         */
934        if (!args->len && bus_idx == KVM_MMIO_BUS) {
935                ret = kvm_assign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);
936                if (ret < 0)
937                        goto fast_fail;
938        }
939
940        return 0;
941
942fast_fail:
943        kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
944fail:
945        return ret;
946}
947
948int
949kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
950{
951        if (args->flags & KVM_IOEVENTFD_FLAG_DEASSIGN)
952                return kvm_deassign_ioeventfd(kvm, args);
953
954        return kvm_assign_ioeventfd(kvm, args);
955}
Note: See TracBrowser for help on using the repository browser.