source: src/linux/universal/linux-4.4/drivers/hv/hv.c @ 31662

Last change on this file since 31662 was 31662, checked in by brainslayer, 3 months ago

use new squashfs in all kernels

File size: 15.4 KB
Line 
1/*
2 * Copyright (c) 2009, Microsoft Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Authors:
18 *   Haiyang Zhang <haiyangz@microsoft.com>
19 *   Hank Janssen  <hjanssen@microsoft.com>
20 *
21 */
22#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
23
24#include <linux/kernel.h>
25#include <linux/mm.h>
26#include <linux/slab.h>
27#include <linux/vmalloc.h>
28#include <linux/hyperv.h>
29#include <linux/version.h>
30#include <linux/interrupt.h>
31#include <linux/clockchips.h>
32#include <asm/hyperv.h>
33#include <asm/mshyperv.h>
34#include "hyperv_vmbus.h"
35
36/* The one and only */
37struct hv_context hv_context = {
38        .synic_initialized      = false,
39        .hypercall_page         = NULL,
40};
41
42#define HV_TIMER_FREQUENCY (10 * 1000 * 1000) /* 100ns period */
43#define HV_MAX_MAX_DELTA_TICKS 0xffffffff
44#define HV_MIN_DELTA_TICKS 1
45
46/*
47 * query_hypervisor_info - Get version info of the windows hypervisor
48 */
49unsigned int host_info_eax;
50unsigned int host_info_ebx;
51unsigned int host_info_ecx;
52unsigned int host_info_edx;
53
54static int query_hypervisor_info(void)
55{
56        unsigned int eax;
57        unsigned int ebx;
58        unsigned int ecx;
59        unsigned int edx;
60        unsigned int max_leaf;
61        unsigned int op;
62
63        /*
64        * Its assumed that this is called after confirming that Viridian
65        * is present. Query id and revision.
66        */
67        eax = 0;
68        ebx = 0;
69        ecx = 0;
70        edx = 0;
71        op = HVCPUID_VENDOR_MAXFUNCTION;
72        cpuid(op, &eax, &ebx, &ecx, &edx);
73
74        max_leaf = eax;
75
76        if (max_leaf >= HVCPUID_VERSION) {
77                eax = 0;
78                ebx = 0;
79                ecx = 0;
80                edx = 0;
81                op = HVCPUID_VERSION;
82                cpuid(op, &eax, &ebx, &ecx, &edx);
83                host_info_eax = eax;
84                host_info_ebx = ebx;
85                host_info_ecx = ecx;
86                host_info_edx = edx;
87        }
88        return max_leaf;
89}
90
91/*
92 * do_hypercall- Invoke the specified hypercall
93 */
94static u64 do_hypercall(u64 control, void *input, void *output)
95{
96        u64 input_address = (input) ? virt_to_phys(input) : 0;
97        u64 output_address = (output) ? virt_to_phys(output) : 0;
98        void *hypercall_page = hv_context.hypercall_page;
99#ifdef CONFIG_X86_64
100        u64 hv_status = 0;
101
102        if (!hypercall_page)
103                return (u64)ULLONG_MAX;
104
105        __asm__ __volatile__("mov %0, %%r8" : : "r" (output_address) : "r8");
106        __asm__ __volatile__("call *%3" : "=a" (hv_status) :
107                             "c" (control), "d" (input_address),
108                             "m" (hypercall_page));
109
110        return hv_status;
111
112#else
113
114        u32 control_hi = control >> 32;
115        u32 control_lo = control & 0xFFFFFFFF;
116        u32 hv_status_hi = 1;
117        u32 hv_status_lo = 1;
118        u32 input_address_hi = input_address >> 32;
119        u32 input_address_lo = input_address & 0xFFFFFFFF;
120        u32 output_address_hi = output_address >> 32;
121        u32 output_address_lo = output_address & 0xFFFFFFFF;
122
123        if (!hypercall_page)
124                return (u64)ULLONG_MAX;
125
126        __asm__ __volatile__ ("call *%8" : "=d"(hv_status_hi),
127                              "=a"(hv_status_lo) : "d" (control_hi),
128                              "a" (control_lo), "b" (input_address_hi),
129                              "c" (input_address_lo), "D"(output_address_hi),
130                              "S"(output_address_lo), "m" (hypercall_page));
131
132        return hv_status_lo | ((u64)hv_status_hi << 32);
133#endif /* !x86_64 */
134}
135
136#ifdef CONFIG_X86_64
137static cycle_t read_hv_clock_tsc(struct clocksource *arg)
138{
139        cycle_t current_tick;
140        struct ms_hyperv_tsc_page *tsc_pg = hv_context.tsc_page;
141
142        if (tsc_pg->tsc_sequence != -1) {
143                /*
144                 * Use the tsc page to compute the value.
145                 */
146
147                while (1) {
148                        cycle_t tmp;
149                        u32 sequence = tsc_pg->tsc_sequence;
150                        u64 cur_tsc;
151                        u64 scale = tsc_pg->tsc_scale;
152                        s64 offset = tsc_pg->tsc_offset;
153
154                        rdtscll(cur_tsc);
155                        /* current_tick = ((cur_tsc *scale) >> 64) + offset */
156                        asm("mulq %3"
157                                : "=d" (current_tick), "=a" (tmp)
158                                : "a" (cur_tsc), "r" (scale));
159
160                        current_tick += offset;
161                        if (tsc_pg->tsc_sequence == sequence)
162                                return current_tick;
163
164                        if (tsc_pg->tsc_sequence != -1)
165                                continue;
166                        /*
167                         * Fallback using MSR method.
168                         */
169                        break;
170                }
171        }
172        rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
173        return current_tick;
174}
175
176static struct clocksource hyperv_cs_tsc = {
177                .name           = "hyperv_clocksource_tsc_page",
178                .rating         = 425,
179                .read           = read_hv_clock_tsc,
180                .mask           = CLOCKSOURCE_MASK(64),
181                .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
182};
183#endif
184
185
186/*
187 * hv_init - Main initialization routine.
188 *
189 * This routine must be called before any other routines in here are called
190 */
191int hv_init(void)
192{
193        int max_leaf;
194        union hv_x64_msr_hypercall_contents hypercall_msr;
195        union hv_x64_msr_hypercall_contents tsc_msr;
196        void *virtaddr = NULL;
197        void *va_tsc = NULL;
198
199        memset(hv_context.synic_event_page, 0, sizeof(void *) * NR_CPUS);
200        memset(hv_context.synic_message_page, 0,
201               sizeof(void *) * NR_CPUS);
202        memset(hv_context.post_msg_page, 0,
203               sizeof(void *) * NR_CPUS);
204        memset(hv_context.vp_index, 0,
205               sizeof(int) * NR_CPUS);
206        memset(hv_context.event_dpc, 0,
207               sizeof(void *) * NR_CPUS);
208        memset(hv_context.clk_evt, 0,
209               sizeof(void *) * NR_CPUS);
210
211        max_leaf = query_hypervisor_info();
212
213        /*
214         * Write our OS ID.
215         */
216        hv_context.guestid = generate_guest_id(0, LINUX_VERSION_CODE, 0);
217        wrmsrl(HV_X64_MSR_GUEST_OS_ID, hv_context.guestid);
218
219        /* See if the hypercall page is already set */
220        rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
221
222        virtaddr = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_RX);
223
224        if (!virtaddr)
225                goto cleanup;
226
227        hypercall_msr.enable = 1;
228
229        hypercall_msr.guest_physical_address = vmalloc_to_pfn(virtaddr);
230        wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
231
232        /* Confirm that hypercall page did get setup. */
233        hypercall_msr.as_uint64 = 0;
234        rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
235
236        if (!hypercall_msr.enable)
237                goto cleanup;
238
239        hv_context.hypercall_page = virtaddr;
240
241#ifdef CONFIG_X86_64
242        if (ms_hyperv.features & HV_X64_MSR_REFERENCE_TSC_AVAILABLE) {
243                va_tsc = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL);
244                if (!va_tsc)
245                        goto cleanup;
246                hv_context.tsc_page = va_tsc;
247
248                rdmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
249
250                tsc_msr.enable = 1;
251                tsc_msr.guest_physical_address = vmalloc_to_pfn(va_tsc);
252
253                wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
254                clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);
255        }
256#endif
257        return 0;
258
259cleanup:
260        if (virtaddr) {
261                if (hypercall_msr.enable) {
262                        hypercall_msr.as_uint64 = 0;
263                        wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
264                }
265
266                vfree(virtaddr);
267        }
268
269        return -ENOTSUPP;
270}
271
272/*
273 * hv_cleanup - Cleanup routine.
274 *
275 * This routine is called normally during driver unloading or exiting.
276 */
277void hv_cleanup(void)
278{
279        union hv_x64_msr_hypercall_contents hypercall_msr;
280
281        /* Reset our OS id */
282        wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
283
284        if (hv_context.hypercall_page) {
285                hypercall_msr.as_uint64 = 0;
286                wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
287                vfree(hv_context.hypercall_page);
288                hv_context.hypercall_page = NULL;
289        }
290
291#ifdef CONFIG_X86_64
292        /*
293         * Cleanup the TSC page based CS.
294         */
295        if (ms_hyperv.features & HV_X64_MSR_REFERENCE_TSC_AVAILABLE) {
296                /*
297                 * Crash can happen in an interrupt context and unregistering
298                 * a clocksource is impossible and redundant in this case.
299                 */
300                if (!oops_in_progress) {
301                        clocksource_change_rating(&hyperv_cs_tsc, 10);
302                        clocksource_unregister(&hyperv_cs_tsc);
303                }
304
305                hypercall_msr.as_uint64 = 0;
306                wrmsrl(HV_X64_MSR_REFERENCE_TSC, hypercall_msr.as_uint64);
307                vfree(hv_context.tsc_page);
308                hv_context.tsc_page = NULL;
309        }
310#endif
311}
312
313/*
314 * hv_post_message - Post a message using the hypervisor message IPC.
315 *
316 * This involves a hypercall.
317 */
318int hv_post_message(union hv_connection_id connection_id,
319                  enum hv_message_type message_type,
320                  void *payload, size_t payload_size)
321{
322
323        struct hv_input_post_message *aligned_msg;
324        u16 status;
325
326        if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT)
327                return -EMSGSIZE;
328
329        aligned_msg = (struct hv_input_post_message *)
330                        hv_context.post_msg_page[get_cpu()];
331
332        aligned_msg->connectionid = connection_id;
333        aligned_msg->reserved = 0;
334        aligned_msg->message_type = message_type;
335        aligned_msg->payload_size = payload_size;
336        memcpy((void *)aligned_msg->payload, payload, payload_size);
337
338        status = do_hypercall(HVCALL_POST_MESSAGE, aligned_msg, NULL)
339                & 0xFFFF;
340
341        put_cpu();
342        return status;
343}
344
345
346/*
347 * hv_signal_event -
348 * Signal an event on the specified connection using the hypervisor event IPC.
349 *
350 * This involves a hypercall.
351 */
352u16 hv_signal_event(void *con_id)
353{
354        u16 status;
355
356        status = (do_hypercall(HVCALL_SIGNAL_EVENT, con_id, NULL) & 0xFFFF);
357
358        return status;
359}
360
361static int hv_ce_set_next_event(unsigned long delta,
362                                struct clock_event_device *evt)
363{
364        cycle_t current_tick;
365
366        WARN_ON(!clockevent_state_oneshot(evt));
367
368        rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
369        current_tick += delta;
370        wrmsrl(HV_X64_MSR_STIMER0_COUNT, current_tick);
371        return 0;
372}
373
374static int hv_ce_shutdown(struct clock_event_device *evt)
375{
376        wrmsrl(HV_X64_MSR_STIMER0_COUNT, 0);
377        wrmsrl(HV_X64_MSR_STIMER0_CONFIG, 0);
378
379        return 0;
380}
381
382static int hv_ce_set_oneshot(struct clock_event_device *evt)
383{
384        union hv_timer_config timer_cfg;
385
386        timer_cfg.enable = 1;
387        timer_cfg.auto_enable = 1;
388        timer_cfg.sintx = VMBUS_MESSAGE_SINT;
389        wrmsrl(HV_X64_MSR_STIMER0_CONFIG, timer_cfg.as_uint64);
390
391        return 0;
392}
393
394static void hv_init_clockevent_device(struct clock_event_device *dev, int cpu)
395{
396        dev->name = "Hyper-V clockevent";
397        dev->features = CLOCK_EVT_FEAT_ONESHOT;
398        dev->cpumask = cpumask_of(cpu);
399        dev->rating = 1000;
400        /*
401         * Avoid settint dev->owner = THIS_MODULE deliberately as doing so will
402         * result in clockevents_config_and_register() taking additional
403         * references to the hv_vmbus module making it impossible to unload.
404         */
405
406        dev->set_state_shutdown = hv_ce_shutdown;
407        dev->set_state_oneshot = hv_ce_set_oneshot;
408        dev->set_next_event = hv_ce_set_next_event;
409}
410
411
412int hv_synic_alloc(void)
413{
414        size_t size = sizeof(struct tasklet_struct);
415        size_t ced_size = sizeof(struct clock_event_device);
416        int cpu;
417
418        hv_context.hv_numa_map = kzalloc(sizeof(struct cpumask) * nr_node_ids,
419                                         GFP_ATOMIC);
420        if (hv_context.hv_numa_map == NULL) {
421                pr_err("Unable to allocate NUMA map\n");
422                goto err;
423        }
424
425        for_each_present_cpu(cpu) {
426                hv_context.event_dpc[cpu] = kmalloc(size, GFP_ATOMIC);
427                if (hv_context.event_dpc[cpu] == NULL) {
428                        pr_err("Unable to allocate event dpc\n");
429                        goto err;
430                }
431                tasklet_init(hv_context.event_dpc[cpu], vmbus_on_event, cpu);
432
433                hv_context.clk_evt[cpu] = kzalloc(ced_size, GFP_ATOMIC);
434                if (hv_context.clk_evt[cpu] == NULL) {
435                        pr_err("Unable to allocate clock event device\n");
436                        goto err;
437                }
438
439                hv_init_clockevent_device(hv_context.clk_evt[cpu], cpu);
440
441                hv_context.synic_message_page[cpu] =
442                        (void *)get_zeroed_page(GFP_ATOMIC);
443
444                if (hv_context.synic_message_page[cpu] == NULL) {
445                        pr_err("Unable to allocate SYNIC message page\n");
446                        goto err;
447                }
448
449                hv_context.synic_event_page[cpu] =
450                        (void *)get_zeroed_page(GFP_ATOMIC);
451
452                if (hv_context.synic_event_page[cpu] == NULL) {
453                        pr_err("Unable to allocate SYNIC event page\n");
454                        goto err;
455                }
456
457                hv_context.post_msg_page[cpu] =
458                        (void *)get_zeroed_page(GFP_ATOMIC);
459
460                if (hv_context.post_msg_page[cpu] == NULL) {
461                        pr_err("Unable to allocate post msg page\n");
462                        goto err;
463                }
464
465                INIT_LIST_HEAD(&hv_context.percpu_list[cpu]);
466        }
467
468        return 0;
469err:
470        return -ENOMEM;
471}
472
473static void hv_synic_free_cpu(int cpu)
474{
475        kfree(hv_context.event_dpc[cpu]);
476        kfree(hv_context.clk_evt[cpu]);
477        if (hv_context.synic_event_page[cpu])
478                free_page((unsigned long)hv_context.synic_event_page[cpu]);
479        if (hv_context.synic_message_page[cpu])
480                free_page((unsigned long)hv_context.synic_message_page[cpu]);
481        if (hv_context.post_msg_page[cpu])
482                free_page((unsigned long)hv_context.post_msg_page[cpu]);
483}
484
485void hv_synic_free(void)
486{
487        int cpu;
488
489        kfree(hv_context.hv_numa_map);
490        for_each_present_cpu(cpu)
491                hv_synic_free_cpu(cpu);
492}
493
494/*
495 * hv_synic_init - Initialize the Synthethic Interrupt Controller.
496 *
497 * If it is already initialized by another entity (ie x2v shim), we need to
498 * retrieve the initialized message and event pages.  Otherwise, we create and
499 * initialize the message and event pages.
500 */
501void hv_synic_init(void *arg)
502{
503        u64 version;
504        union hv_synic_simp simp;
505        union hv_synic_siefp siefp;
506        union hv_synic_sint shared_sint;
507        union hv_synic_scontrol sctrl;
508        u64 vp_index;
509
510        int cpu = smp_processor_id();
511
512        if (!hv_context.hypercall_page)
513                return;
514
515        /* Check the version */
516        rdmsrl(HV_X64_MSR_SVERSION, version);
517
518        /* Setup the Synic's message page */
519        rdmsrl(HV_X64_MSR_SIMP, simp.as_uint64);
520        simp.simp_enabled = 1;
521        simp.base_simp_gpa = virt_to_phys(hv_context.synic_message_page[cpu])
522                >> PAGE_SHIFT;
523
524        wrmsrl(HV_X64_MSR_SIMP, simp.as_uint64);
525
526        /* Setup the Synic's event page */
527        rdmsrl(HV_X64_MSR_SIEFP, siefp.as_uint64);
528        siefp.siefp_enabled = 1;
529        siefp.base_siefp_gpa = virt_to_phys(hv_context.synic_event_page[cpu])
530                >> PAGE_SHIFT;
531
532        wrmsrl(HV_X64_MSR_SIEFP, siefp.as_uint64);
533
534        /* Setup the shared SINT. */
535        rdmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
536
537        shared_sint.as_uint64 = 0;
538        shared_sint.vector = HYPERVISOR_CALLBACK_VECTOR;
539        shared_sint.masked = false;
540        shared_sint.auto_eoi = true;
541
542        wrmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
543
544        /* Enable the global synic bit */
545        rdmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64);
546        sctrl.enable = 1;
547
548        wrmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64);
549
550        hv_context.synic_initialized = true;
551
552        /*
553         * Setup the mapping between Hyper-V's notion
554         * of cpuid and Linux' notion of cpuid.
555         * This array will be indexed using Linux cpuid.
556         */
557        rdmsrl(HV_X64_MSR_VP_INDEX, vp_index);
558        hv_context.vp_index[cpu] = (u32)vp_index;
559
560        /*
561         * Register the per-cpu clockevent source.
562         */
563        if (ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE)
564                clockevents_config_and_register(hv_context.clk_evt[cpu],
565                                                HV_TIMER_FREQUENCY,
566                                                HV_MIN_DELTA_TICKS,
567                                                HV_MAX_MAX_DELTA_TICKS);
568        return;
569}
570
571/*
572 * hv_synic_clockevents_cleanup - Cleanup clockevent devices
573 */
574void hv_synic_clockevents_cleanup(void)
575{
576        int cpu;
577
578        if (!(ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE))
579                return;
580
581        for_each_online_cpu(cpu)
582                clockevents_unbind_device(hv_context.clk_evt[cpu], cpu);
583}
584
585/*
586 * hv_synic_cleanup - Cleanup routine for hv_synic_init().
587 */
588void hv_synic_cleanup(void *arg)
589{
590        union hv_synic_sint shared_sint;
591        union hv_synic_simp simp;
592        union hv_synic_siefp siefp;
593        union hv_synic_scontrol sctrl;
594        int cpu = smp_processor_id();
595
596        if (!hv_context.synic_initialized)
597                return;
598
599        /* Turn off clockevent device */
600        if (ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE)
601                hv_ce_shutdown(hv_context.clk_evt[cpu]);
602
603        rdmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
604
605        shared_sint.masked = 1;
606
607        /* Need to correctly cleanup in the case of SMP!!! */
608        /* Disable the interrupt */
609        wrmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
610
611        rdmsrl(HV_X64_MSR_SIMP, simp.as_uint64);
612        simp.simp_enabled = 0;
613        simp.base_simp_gpa = 0;
614
615        wrmsrl(HV_X64_MSR_SIMP, simp.as_uint64);
616
617        rdmsrl(HV_X64_MSR_SIEFP, siefp.as_uint64);
618        siefp.siefp_enabled = 0;
619        siefp.base_siefp_gpa = 0;
620
621        wrmsrl(HV_X64_MSR_SIEFP, siefp.as_uint64);
622
623        /* Disable the global synic bit */
624        rdmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64);
625        sctrl.enable = 0;
626        wrmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64);
627}
Note: See TracBrowser for help on using the repository browser.