Colinux base Index: linux-2.6.33-source/CREDITS =================================================================== --- linux-2.6.33-source.orig/CREDITS +++ linux-2.6.33-source/CREDITS @@ -16,6 +16,13 @@ S: (ask for current address) S: Finland +N: Dan Aloni +E: da-x@colinux.org +W: http://www.colinux.org +D: Cooperative Linux +D: Various kernel patches +S: Israel + N: Dragos Acostachioaie E: dragos@iname.com W: http://www.arbornet.org/~dragos Index: linux-2.6.33-source/Makefile =================================================================== --- linux-2.6.33-source.orig/Makefile +++ linux-2.6.33-source/Makefile @@ -312,7 +312,11 @@ AS = $(CROSS_COMPILE)as LD = $(CROSS_COMPILE)ld +ifeq ($(GCCTRACE),Y) +CC = $(COLINUX_ROOT)/bin/tracewrapper.py $(CROSS_COMPILE)gcc +else CC = $(CROSS_COMPILE)gcc +endif CPP = $(CC) -E AR = $(CROSS_COMPILE)ar NM = $(CROSS_COMPILE)nm Index: linux-2.6.33-source/arch/x86/Kconfig =================================================================== --- linux-2.6.33-source.orig/arch/x86/Kconfig +++ linux-2.6.33-source/arch/x86/Kconfig @@ -42,7 +42,7 @@ select HAVE_KVM select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK - select HAVE_GENERIC_DMA_COHERENT if X86_32 + select HAVE_GENERIC_DMA_COHERENT if (X86_32 && !COOPERATIVE) select HAVE_EFFICIENT_UNALIGNED_ACCESS select USER_STACKTRACE_SUPPORT select HAVE_DMA_API_DEBUG @@ -71,6 +71,7 @@ config GENERIC_CMOS_UPDATE def_bool y + depends on !COOPERATIVE config CLOCKSOURCE_WATCHDOG def_bool y @@ -96,12 +97,14 @@ config ZONE_DMA def_bool y + depends on HAVE_DMA config SBUS bool config GENERIC_ISA_DMA def_bool y + depends on HAVE_DMA config GENERIC_IOMAP def_bool y @@ -194,6 +197,8 @@ config GENERIC_HARDIRQS_NO__DO_IRQ def_bool y +# FIXME: Use generic_handle_irq() instead __do_IRQ() + depends on !COOPERATIVE config GENERIC_IRQ_PROBE bool @@ -241,6 +246,7 @@ config SMP bool "Symmetric multi-processing support" + depends on !COOPERATIVE ---help--- This enables support for systems with more than one CPU. If you have a system with only one CPU, like most personal computers, say N. If @@ -592,6 +598,7 @@ config HPET_TIMER def_bool X86_64 prompt "HPET Timer Support" if X86_32 + depends on !COOPERATIVE ---help--- Use the IA-PC HPET (High Precision Event Timer) to manage time in preference to the PIT and RTC, if a HPET is @@ -617,6 +624,7 @@ config DMI default y bool "Enable DMI scanning" if EMBEDDED + depends on !COOPERATIVE ---help--- Enabled scanning of DMI to identify machine quirks. Say Y here unless you have verified that your setup is not @@ -780,6 +788,10 @@ to use it. If you say Y here even though your machine doesn't have an IO-APIC, then the kernel will still run with no slowdown at all. +config X86_UP_COPIC + bool 'Cooperative PIC (COPIC) support' + depends on COOPERATIVE + config X86_LOCAL_APIC def_bool y depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC @@ -894,6 +906,7 @@ config I8K tristate "Dell laptop support" + depends on !COOPERATIVE ---help--- This adds a driver to safely access the System Management Mode of the CPU on the Dell Inspiron 8000. The System Management Mode @@ -915,6 +928,7 @@ config X86_REBOOTFIXUPS bool "Enable X86 board specific fixups for reboot" depends on X86_32 + depends on !COOPERATIVE ---help--- This enables chipset and/or board specific fixups to be done in order to get reboot to work correctly. This is only needed on @@ -994,6 +1008,7 @@ default HIGHMEM4G if !X86_NUMAQ default HIGHMEM64G if X86_NUMAQ depends on X86_32 + depends on !COOPERATIVE config NOHIGHMEM bool "off" @@ -1100,6 +1115,7 @@ config X86_PAE bool "PAE (Physical Address Extension) Support" depends on X86_32 && !HIGHMEM4G + depends on !COOPERATIVE ---help--- PAE is required for NX support, and furthermore enables larger swapspace support for non-overcommit purposes. It @@ -1230,6 +1246,7 @@ config ARCH_SPARSEMEM_ENABLE def_bool y depends on X86_64 || NUMA || (EXPERIMENTAL && X86_32) || X86_32_NON_STANDARD + depends on !COOPERATIVE select SPARSEMEM_STATIC if X86_32 select SPARSEMEM_VMEMMAP_ENABLE if X86_64 @@ -1336,6 +1353,7 @@ config MTRR bool default y + depends on !COOPERATIVE prompt "MTRR (Memory Type Range Register) support" if EMBEDDED ---help--- On Intel P6 family processors (Pentium Pro, Pentium II and later) @@ -1470,6 +1488,7 @@ config KEXEC bool "kexec system call" + depends on !COOPERATIVE ---help--- kexec is a system call that implements the ability to shutdown your current kernel, and to start another kernel. It is like a reboot @@ -1603,6 +1622,26 @@ automatically on SMP systems. ) Say N if you want to disable CPU hotplug. +config COOPERATIVE + bool 'Cooperative Mode' + default y + +config COLINUX_STATS + bool 'Cooperative Linux stats' + depends on COOPERATIVE + default y + help + OS switch counters readable in /proc/colinux/stats. + +# FIXME: IOMEM should disabled, but was needed by keyboard and Serial device +#config NO_IOMEM +# depends on COOPERATIVE +# def_bool y + +config NO_DMA + def_bool y + depends on COOPERATIVE + config COMPAT_VDSO def_bool y prompt "Compat VDSO support" @@ -1676,6 +1715,7 @@ depends on NUMA menu "Power management and ACPI options" + depends on !COOPERATIVE config ARCH_HIBERNATION_HEADER def_bool y Index: linux-2.6.33-source/arch/x86/Kconfig.cpu =================================================================== --- linux-2.6.33-source.orig/arch/x86/Kconfig.cpu +++ linux-2.6.33-source/arch/x86/Kconfig.cpu @@ -392,7 +392,7 @@ config X86_TSC def_bool y - depends on ((MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) && !X86_NUMAQ) || X86_64 + depends on ((MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) && !X86_NUMAQ && !COOPERATIVE) || X86_64 config X86_CMPXCHG64 def_bool y Index: linux-2.6.33-source/arch/x86/kernel/Makefile =================================================================== --- linux-2.6.33-source.orig/arch/x86/kernel/Makefile +++ linux-2.6.33-source/arch/x86/kernel/Makefile @@ -2,7 +2,11 @@ # Makefile for the linux kernel. # -extra-y := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinux.lds +extra-y := head_$(BITS).o +ifndef CONFIG_COOPERATIVE +extra-y += head$(BITS).o head.o +endif +extra-y += init_task.o vmlinux.lds CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) @@ -31,17 +35,24 @@ obj-y := process_$(BITS).o signal.o entry_$(BITS).o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o -obj-y += time.o ioport.o ldt.o dumpstack.o -obj-y += setup.o x86_init.o i8259.o irqinit.o +obj-y += ldt.o dumpstack.o +obj-y += setup.o irqinit.o obj-$(CONFIG_X86_VISWS) += visws_quirks.o +ifndef CONFIG_COOPERATIVE obj-$(CONFIG_X86_32) += probe_roms_32.o +endif obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o -obj-y += bootflag.o e820.o -obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o -obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o +ifndef CONFIG_COOPERATIVE +obj-y += bootflag.o +endif +obj-$(CONFIG_HAS_DMA) += pci-dma.o pci-nommu.o +obj-y += quirks.o i8237.o topology.o kdebugfs.o +obj-y += alternative.o hw_breakpoint.o +ifndef CONFIG_COOPERATIVE obj-y += tsc.o io_delay.o rtc.o +endif obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o obj-y += process.o @@ -57,11 +68,12 @@ obj-y += cpu/ obj-y += acpi/ obj-$(CONFIG_SFI) += sfi.o -obj-y += reboot.o obj-$(CONFIG_MCA) += mca_32.o obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o +ifndef CONFIG_COOPERATIVE obj-$(CONFIG_PCI) += early-quirks.o +endif apm-y := apm_32.o obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_SMP) += smp.o @@ -130,3 +142,19 @@ obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o obj-y += vsmp_64.o endif + +### +# coLinux specific files +ifdef CONFIG_COOPERATIVE +obj-y += cooperative.o +obj-y += timer_cooperative.o +obj-y += ioport_cooperative.o i8259_cooperative.o +obj-y += reboot_cooperative.o +obj-y += x86_init_cooperative.o +else +obj-y += i8253.o i8259.o +obj-y += time.o ioport.o +obj-y += e820.o +obj-y += reboot.o +obj-y += x86_init.o +endif Index: linux-2.6.33-source/arch/x86/kernel/cooperative.c =================================================================== --- /dev/null +++ linux-2.6.33-source/arch/x86/kernel/cooperative.c @@ -0,0 +1,221 @@ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +CO_TRACE_STOP; + +#ifdef ENABLE_PASSAGE_HOLDING_CHECK +int co_passage_page_holding_count = 0; +#endif +bool co_host_fpu_saved = 0; +char co_host_fpu[0x200] __attribute__ ((__aligned__(16))); + +static void __init co_early_cpu_init(void) +{ + /* + * On the first switch to Linux we must set up a valid TR because + * the passage page code assumes such one exists. This is basically + * copied code from cpu_init(). + * + * P.S this is protected by CO_TRACE_STOP so that we don't + * have a monitor context switch. + */ + struct task_struct *me; + struct tss_struct *t; + int cpu; + int i; + struct desc_struct *gdt; + + cpu = stack_smp_processor_id(); + t = &per_cpu(init_tss, cpu); + + me = current; + + write_cr4(mmu_cr4_features); + + /* + * Initialize the per-CPU GDT with the boot GDT, + * and set up the GDT descriptor: + */ + + switch_to_new_gdt(cpu); + loadsegment(fs, 0); + + load_idt(&idt_descr); + + memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); + + /* + * Delete NT + */ + __asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl"); + + t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); + + /* + * <= is required because the CPU will access up to + * 8 bits beyond the end of the IO permission bitmap. + */ + for (i = 0; i <= IO_BITMAP_LONGS; i++) + t->io_bitmap[i] = ~0UL; + + atomic_inc(&init_mm.mm_count); + me->active_mm = &init_mm; + enter_lazy_tlb(&init_mm, me); + + load_sp0(t, ¤t->thread); + set_tss_desc(cpu, t); + gdt = get_cpu_gdt_table(cpu); + gdt[GDT_ENTRY_TSS].b &= 0xfffffdff; + + load_TR_desc(); + load_LDT(&init_mm.context); + + /* Clear %gs. */ + asm volatile ("mov %0, %%gs" : : "r" (0)); +} + +/* + * 'co_start' is the first Linux code that runs in the + * coLinux kernel context. It receives %ecx which contains the + * address of the passage page. The passage page code sets %ecx + * to this value in its context restore part. + */ + +void __init co_start(void) +{ + co_early_cpu_init(); + co_start_kernel(); +} + +static void co_switch_wrapper_protected(void) +{ + kernel_fpu_begin(); + + if (co_host_fpu_saved) { + CO_FPU_RESTORE(co_host_fpu); + co_host_fpu_saved = 0; + } + + /* And switch... */ + co_switch(); + + kernel_fpu_end(); +} + +void co_switch_wrapper(void) +{ + /* taken from irq.c: debugging check for stack overflow */ + long esp; + + __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (THREAD_SIZE - 1)); + if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) { + printk("co_switch_wrapper: stack overflow: %ld\n", esp - sizeof(struct thread_info)); + co_terminate(CO_TERMINATE_STACK_OVERFLOW); + } + +#ifdef CONFIG_COLINUX_STATS + co_proc_counts.switches[co_passage_page->operation]++; +#endif + co_switch_wrapper_protected(); +} + +void co_debug(const char *fmt, ...) +{ +} + +#define MAX_TRACE_POINTS 1024 + +typedef struct { + unsigned char *code; + unsigned char original_byte; + int off; +} co_tracepoint_t; + +co_tracepoint_t tracepoints[MAX_TRACE_POINTS]; +static int active_tracepoints = 0; + +void co_kernel_breakpoint(struct pt_regs * regs) +{ + int i = 0; + unsigned char *code = (unsigned char *)regs->ip; + if (!code) + return; + + code--; + for (i=0; i < active_tracepoints; i++) { + if (tracepoints[i].code == code) { + co_debug("TRACEPOINT: %p", code); + break; + } + } + + if (i == active_tracepoints) { + /* Bad, we don't know this tracepoint */ + co_terminate(CO_TERMINATE_INVALID_OPERATION); + return; + } + + *tracepoints[i].code = tracepoints[i].original_byte; + regs->flags |= (1 << 8); /* Enable TF */ + regs->ip = (unsigned long)code; + tracepoints[i].off = 1; +} + +void co_kernel_set_breakpoints(void) +{ + int i; + + for (i=0; i < active_tracepoints; i++) + if (tracepoints[i].code && tracepoints[i].off) { + *tracepoints[i].code = 0xcc; + tracepoints[i].off = 0; + } +} + +int co_kernel_debug(struct pt_regs *regs, long error_code, unsigned int condition) +{ + /* if not a single step trap */ + if (!(condition & DR_STEP)) + return 0; + + /* if userspace */ + if (regs->cs & 3) + return 0; + + regs->flags &= ~(1 << 8); /* Disable TF */ + + co_kernel_set_breakpoints(); + + return 1; +} + +void co_kernel_tracepoint_add(unsigned char *code) +{ + if (active_tracepoints >= MAX_TRACE_POINTS) + return; + + tracepoints[active_tracepoints].code = code; + tracepoints[active_tracepoints].original_byte = *code; + tracepoints[active_tracepoints].off = 0; + active_tracepoints++; + *code = 0xcc; +} + +co_arch_info_t co_arch_info = { + .kernel_cs = __KERNEL_CS, + .kernel_ds = __KERNEL_DS, +}; + +CO_TRACE_CONTINUE; Index: linux-2.6.33-source/arch/x86/kernel/entry_32.S =================================================================== --- linux-2.6.33-source.orig/arch/x86/kernel/entry_32.S +++ linux-2.6.33-source/arch/x86/kernel/entry_32.S @@ -864,7 +864,11 @@ SAVE_ALL TRACE_IRQS_OFF movl %esp,%eax +#ifdef CONFIG_COOPERATIVE + call proxy_interrupt_handler +#else call do_IRQ +#endif jmp ret_from_intr ENDPROC(common_interrupt) CFI_ENDPROC @@ -917,6 +921,9 @@ CFI_ADJUST_CFA_OFFSET 4 pushl $do_device_not_available CFI_ADJUST_CFA_OFFSET 4 +#if defined(CONFIG_COOPERATIVE) + DISABLE_INTERRUPTS(CLBR_NONE) # FIXME: Need we this realy? +#endif jmp error_code CFI_ENDPROC END(device_not_available) Index: linux-2.6.33-source/arch/x86/kernel/head_32.S =================================================================== --- linux-2.6.33-source.orig/arch/x86/kernel/head_32.S +++ linux-2.6.33-source/arch/x86/kernel/head_32.S @@ -359,6 +359,7 @@ */ call setup_idt +ENTRY(co_arch_start_kernel) checkCPUtype: movl $-1,X86_CPUID # -1 for no CPUID initially @@ -567,6 +568,10 @@ #endif call dump_stack hlt_loop: +#ifdef CONFIG_COOPERATIVE + movl $2,%eax /* CO_TERMINATE_PANIC */ + call co_terminate +#endif hlt jmp hlt_loop @@ -607,7 +612,11 @@ __REFDATA .align 4 ENTRY(initial_code) +#ifdef CONFIG_COOPERATIVE + .long start_kernel +#else /* CONFIG_COOPERATIVE */ .long i386_start_kernel +#endif /* * BSS section @@ -655,7 +664,7 @@ .data ENTRY(stack_start) - .long init_thread_union+THREAD_SIZE + .long init_thread_union+THREAD_SIZE-100 .long __BOOT_DS ready: .byte 0 Index: linux-2.6.33-source/arch/x86/kernel/setup.c =================================================================== --- linux-2.6.33-source.orig/arch/x86/kernel/setup.c +++ linux-2.6.33-source/arch/x86/kernel/setup.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -299,8 +300,10 @@ static void __init reserve_brk(void) { +#ifndef CONFIG_COOPERATIVE if (_brk_end > _brk_start) reserve_early(__pa(_brk_start), __pa(_brk_end), "BRK"); +#endif /* Mark brk area as locked down and no longer taking any new allocations */ @@ -309,6 +312,7 @@ #ifdef CONFIG_BLK_DEV_INITRD +#ifndef CONFIG_COOPERATIVE #define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT) static void __init relocate_initrd(void) { @@ -371,9 +375,20 @@ ramdisk_image, ramdisk_image + ramdisk_size - 1, ramdisk_here, ramdisk_here + ramdisk_size - 1); } +#endif /* !CONFIG_COOPERATIVE */ static void __init reserve_initrd(void) { +#ifdef CONFIG_COOPERATIVE + if (co_boot_params.co_initrd != NULL) { + initrd_start = (unsigned long)co_boot_params.co_initrd; + initrd_end = (unsigned long)co_boot_params.co_initrd + co_boot_params.co_initrd_size; + printk(KERN_INFO "initrd enabled: 0x%lx-0x%lx size: 0x%08lx\n", + initrd_start, initrd_end, co_boot_params.co_initrd_size); + + reserve_bootmem(virt_to_phys(co_boot_params.co_initrd), co_boot_params.co_initrd_size, BOOTMEM_DEFAULT); + } +#else /* CONFIG_COOPERATIVE */ u64 ramdisk_image = boot_params.hdr.ramdisk_image; u64 ramdisk_size = boot_params.hdr.ramdisk_size; u64 ramdisk_end = ramdisk_image + ramdisk_size; @@ -410,6 +425,7 @@ relocate_initrd(); free_early(ramdisk_image, ramdisk_end); +#endif /* CONFIG_COOPERATIVE */ } #else static void __init reserve_initrd(void) @@ -417,6 +433,7 @@ } #endif /* CONFIG_BLK_DEV_INITRD */ +#ifndef CONFIG_COOPERATIVE static void __init parse_setup_data(void) { struct setup_data *data; @@ -482,6 +499,7 @@ early_iounmap(data, sizeof(*data)); } } +#endif /* !CONFIG_COOPERATIVE */ /* * --------- Crashkernel reservation ------------------------------ @@ -549,6 +567,7 @@ } #endif +#ifndef CONFIG_COOPERATIVE static struct resource standard_io_resources[] = { { .name = "dma1", .start = 0x00, .end = 0x1f, .flags = IORESOURCE_BUSY | IORESOURCE_IO }, @@ -581,6 +600,7 @@ request_resource(&ioport_resource, &standard_io_resources[i]); } +#endif /* !CONFIG_COOPERATIVE */ /* * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by @@ -618,6 +638,7 @@ } #endif +#ifndef CONFIG_COOPERATIVE /* List of systems that have known low memory corruption BIOS problems */ static struct dmi_system_id __initdata bad_bios_dmi_table[] = { #ifdef CONFIG_X86_RESERVE_LOW_64K @@ -677,6 +698,7 @@ #endif {} }; +#endif /* !CONFIG_COOPERATIVE */ /* * Determine if we were loaded by an EFI loader. If so, then we have also been @@ -751,10 +773,12 @@ x86_init.oem.arch_setup(); +#ifndef CONFIG_COOPERATIVE setup_memory_map(); parse_setup_data(); /* update the e820_saved too */ e820_reserve_setup_data(); +#endif /* !CONFIG_COOPERATIVE */ copy_edd(); @@ -772,6 +796,9 @@ bss_resource.start = virt_to_phys(&__bss_start); bss_resource.end = virt_to_phys(&__bss_stop)-1; +#ifdef CONFIG_COOPERATIVE + strlcpy(boot_command_line, co_boot_params.co_boot_parameters, COMMAND_LINE_SIZE); +#endif #ifdef CONFIG_CMDLINE_BOOL #ifdef CONFIG_CMDLINE_OVERRIDE strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); @@ -804,8 +831,10 @@ /* Must be before kernel pagetables are setup */ vmi_activate(); +#ifndef CONFIG_COOPERATIVE /* after early param, so could get panic from serial */ reserve_early_setup_data(); +#endif /* !CONFIG_COOPERATIVE */ if (acpi_mps_check()) { #ifdef CONFIG_X86_LOCAL_APIC @@ -814,16 +843,19 @@ setup_clear_cpu_cap(X86_FEATURE_APIC); } +#ifndef CONFIG_COOPERATIVE #ifdef CONFIG_PCI if (pci_early_dump_regs) early_dump_pci_devices(); #endif finish_e820_parsing(); +#endif /* !CONFIG_COOPERATIVE */ if (efi_enabled) efi_init(); +#ifndef CONFIG_COOPERATIVE dmi_scan_machine(); dmi_check_system(bad_bios_dmi_table); @@ -835,13 +867,16 @@ init_hypervisor_platform(); x86_init.resources.probe_roms(); +#endif /* !CONFIG_COOPERATIVE */ /* after parse_early_param, so could debug it */ insert_resource(&iomem_resource, &code_resource); insert_resource(&iomem_resource, &data_resource); insert_resource(&iomem_resource, &bss_resource); - +#ifdef CONFIG_COOPERATIVE + max_pfn = co_boot_params.co_memory_size >> PAGE_SHIFT; +#else /* CONFIG_COOPERATIVE */ #ifdef CONFIG_X86_32 if (ppro_with_ram_bug()) { e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM, @@ -866,6 +901,7 @@ mtrr_bp_init(); if (mtrr_trim_uncached_memory(max_pfn)) max_pfn = e820_end_of_ram_pfn(); +#endif /* CONFIG_COOPERATIVE */ #ifdef CONFIG_X86_32 /* max_low_pfn get updated here */ @@ -933,8 +969,6 @@ init_ohci1394_dma_on_all_controllers(); #endif - reserve_initrd(); - reserve_crashkernel(); vsmp_init(); @@ -961,6 +995,7 @@ #endif initmem_init(0, max_pfn, acpi, k8); + reserve_initrd(); #ifdef CONFIG_X86_64 /* @@ -1018,15 +1053,21 @@ kvm_guest_init(); +#ifndef CONFIG_COOPERATIVE e820_reserve_resources(); e820_mark_nosave_regions(max_low_pfn); +#endif /* !CONFIG_COOPERATIVE */ x86_init.resources.reserve_resources(); +#ifndef CONFIG_COOPERATIVE e820_setup_gap(); +#endif /* !CONFIG_COOPERATIVE */ #ifdef CONFIG_VT -#if defined(CONFIG_VGA_CONSOLE) +#ifdef CONFIG_COOPERATIVE_CONSOLE + conswitchp = &colinux_con; +#elif defined(CONFIG_VGA_CONSOLE) if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY)) conswitchp = &vga_con; #elif defined(CONFIG_DUMMY_CONSOLE) @@ -1040,6 +1081,7 @@ #ifdef CONFIG_X86_32 +#ifndef CONFIG_COOPERATIVE static struct resource video_ram_resource = { .name = "Video RAM area", .start = 0xa0000, @@ -1052,5 +1094,6 @@ request_resource(&iomem_resource, &video_ram_resource); reserve_standard_io_resources(); } +#endif /* !CONFIG_COOPERATIVE */ #endif /* CONFIG_X86_32 */ Index: linux-2.6.33-source/arch/x86/kernel/traps.c =================================================================== --- linux-2.6.33-source.orig/arch/x86/kernel/traps.c +++ linux-2.6.33-source/arch/x86/kernel/traps.c @@ -30,6 +30,7 @@ #include #include #include +#include #ifdef CONFIG_EISA #include @@ -56,6 +57,7 @@ #include #include +#include #ifdef CONFIG_X86_64 #include @@ -172,6 +174,10 @@ return; kernel_trap: + if (cooperative_mode_enabled() && trapnr == 3) { + co_kernel_breakpoint(regs); + return; + } if (!fixup_exception(regs)) { tsk->thread.error_code = error_code; tsk->thread.trap_no = trapnr; @@ -308,6 +314,7 @@ die("general protection fault", regs, error_code); } +#ifndef CONFIG_COOPERATIVE static notrace __kprobes void mem_parity_error(unsigned char reason, struct pt_regs *regs) { @@ -431,6 +438,7 @@ reassert_nmi(); #endif } +#endif /* !CONFIG_COOPERATIVE */ dotraplinkage notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code) @@ -439,8 +447,10 @@ inc_irq_stat(__nmi_count); +#ifndef CONFIG_COOPERATIVE if (!ignore_nmis) default_do_nmi(regs); +#endif /* !CONFIG_COOPERATIVE */ nmi_exit(); } @@ -558,6 +568,10 @@ /* Store the virtualized DR6 value */ tsk->thread.debugreg6 = dr6; + if (cooperative_mode_enabled() && + co_kernel_debug(regs, error_code, dr6)) + return; + if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code, SIGTRAP) == NOTIFY_STOP) return; @@ -833,6 +847,13 @@ clts(); /* Allow maths ops (or we recurse) */ +#ifdef CONFIG_COOPERATIVE + if (!co_host_fpu_saved) { + CO_FPU_SAVE(co_host_fpu); + co_host_fpu_saved = 1; + } +#endif /* CONFIG_COOPERATIVE */ + __math_state_restore(); } EXPORT_SYMBOL_GPL(math_state_restore); Index: linux-2.6.33-source/arch/x86/mm/fault.c =================================================================== --- linux-2.6.33-source.orig/arch/x86/mm/fault.c +++ linux-2.6.33-source/arch/x86/mm/fault.c @@ -317,7 +317,7 @@ goto out; pte = pte_offset_kernel(pmd, address); - printk("*pte = %0*Lx ", sizeof(*pte) * 2, (u64)pte_val(*pte)); + printk("*pte = %0*Lx ", sizeof(*pte) * 2, (u64)CO_P_TO_PP(pte_val(*pte))); out: printk("\n"); } Index: linux-2.6.33-source/arch/x86/mm/init_32.c =================================================================== --- linux-2.6.33-source.orig/arch/x86/mm/init_32.c +++ linux-2.6.33-source/arch/x86/mm/init_32.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -54,10 +55,15 @@ unsigned long highstart_pfn, highend_pfn; +#ifndef CONFIG_COOPERATIVE static noinline int do_test_wp_bit(void); +#endif bool __read_mostly __vmalloc_start_set = false; +/* colinux start_va */ +static long co_start_va = 0; + static __init void *alloc_low_page(void) { unsigned long pfn = e820_table_end++; @@ -121,7 +127,7 @@ page_table = (pte_t *)alloc_low_page(); paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); - set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); + set_pmd(pmd, __pmd(CO_PP_TO_P(__pa(page_table)) | _PAGE_TABLE)); BUG_ON(page_table != pte_offset_kernel(pmd, 0)); } @@ -223,6 +229,7 @@ } } +#ifndef CONFIG_COOPERATIVE static inline int is_kernel_text(unsigned long addr) { if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end) @@ -370,6 +377,7 @@ } return 0; } +#endif /* !CONFIG_COOPERATIVE */ pte_t *kmap_pte; pgprot_t kmap_prot; @@ -471,6 +479,7 @@ void __init native_pagetable_setup_start(pgd_t *base) { +#ifndef CONFIG_COOPERATIVE unsigned long pfn, va; pgd_t *pgd; pud_t *pud; @@ -499,6 +508,7 @@ pte_clear(NULL, va, pte); } paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT); +#endif /* CONFIG_COOPERATIVE */ } void __init native_pagetable_setup_done(pgd_t *base) @@ -717,7 +727,18 @@ num_physpages = highend_pfn; high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; #else +#ifdef CONFIG_COOPERATIVE + /* Allocate boot memory from host */ + max_low_pfn = max_pfn = co_boot_params.co_memory_size >> PAGE_SHIFT; + min_low_pfn = PFN_UP(__pa((unsigned long)&_end)) + 0x10; + co_start_va = (unsigned long)__va(min_low_pfn << PAGE_SHIFT); + co_alloc_pages(co_start_va, 0x20); + + /* Add single region without check via e820_find_active_region */ + add_active_range(0, 0, max_low_pfn); +#else /* CONFIG_COOPERATIVE */ e820_register_active_regions(0, 0, max_low_pfn); +#endif /* CONFIG_COOPERATIVE */ sparse_memory_present_with_active_regions(0); num_physpages = max_low_pfn; high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; @@ -738,8 +759,10 @@ { unsigned long max_zone_pfns[MAX_NR_ZONES]; memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); +#ifndef CONFIG_COOPERATIVE max_zone_pfns[ZONE_DMA] = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; +#endif max_zone_pfns[ZONE_NORMAL] = max_low_pfn; #ifdef CONFIG_HIGHMEM max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; @@ -748,6 +771,7 @@ free_area_init_nodes(max_zone_pfns); } +#ifndef CONFIG_COOPERATIVE static unsigned long __init setup_node_bootmem(int nodeid, unsigned long start_pfn, unsigned long end_pfn, @@ -768,25 +792,40 @@ return bootmap + bootmap_size; } +#endif /* !CONFIG_COOPERATIVE */ void __init setup_bootmem_allocator(void) { - int nodeid; - unsigned long bootmap_size, bootmap; /* * Initialize the boot-time allocator (with low memory only): */ +#ifdef CONFIG_COOPERATIVE + unsigned long bootmap_size; + + bootmap_size = init_bootmem(min_low_pfn, max_low_pfn); + { + unsigned long bootmem_end = co_start_va + bootmap_size + (0x10 << PAGE_SHIFT); + unsigned long physical_end = __PAGE_OFFSET + (max_low_pfn << PAGE_SHIFT); + + free_bootmem(__pa(bootmem_end), physical_end - bootmem_end); + } +#else /* CONFIG_COOPERATIVE */ + int nodeid; + unsigned long bootmap_size, bootmap; + bootmap_size = bootmem_bootmap_pages(max_low_pfn)<> 10, @@ -911,9 +955,19 @@ PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, (LAST_PKMAP*PAGE_SIZE) >> 10, #endif +#ifdef CONFIG_COOPERATIVE + CO_VPTR_BASE_START, CO_VPTR_BASE_END, + (CO_VPTR_BASE_END - CO_VPTR_BASE_START) >> 20, +#endif VMALLOC_START, VMALLOC_END, - (VMALLOC_END - VMALLOC_START) >> 20, + (VMALLOC_END - VMALLOC_START) >> 20); + + printk(KERN_INFO + " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n" + " .init : 0x%08lx - 0x%08lx (%4ld kB)\n" + " .data : 0x%08lx - 0x%08lx (%4ld kB)\n" + " .text : 0x%08lx - 0x%08lx (%4ld kB)\n", (unsigned long)__va(0), (unsigned long)high_memory, ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20, @@ -946,11 +1000,19 @@ BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START); BUG_ON(VMALLOC_END > PKMAP_BASE); #endif +#ifdef CONFIG_COOPERATIVE + BUG_ON(CO_VPTR_BASE_END > FIXADDR_START); + BUG_ON(VMALLOC_END > CO_VPTR_BASE_START); + if (VMALLOC_START > VMALLOC_END) + panic("LOWMEM overlaps vmalloc. Decrease total memory with 'mem=...'!"); +#endif BUG_ON(VMALLOC_START >= VMALLOC_END); BUG_ON((unsigned long)high_memory > VMALLOC_START); +#ifndef CONFIG_COOPERATIVE if (boot_cpu_data.wp_works_ok < 0) test_wp_bit(); +#endif save_pg_dir(); zap_low_mappings(true); @@ -972,6 +1034,7 @@ * This function cannot be __init, since exceptions don't work in that * section. Put this after the callers, so that it cannot be inlined. */ +#ifndef CONFIG_COOPERATIVE static noinline int do_test_wp_bit(void) { char tmp_reg; @@ -991,6 +1054,7 @@ return flag; } +#endif /* !CONFIG_COOPERATIVE */ #ifdef CONFIG_DEBUG_RODATA const int rodata_test_data = 0xC3; Index: linux-2.6.33-source/arch/x86/mm/init.c =================================================================== --- linux-2.6.33-source.orig/arch/x86/mm/init.c +++ linux-2.6.33-source/arch/x86/mm/init.c @@ -28,6 +28,7 @@ #endif ; +#ifndef CONFIG_COOPERATIVE static void __init find_early_table_space(unsigned long end, int use_pse, int use_gbpages) { @@ -86,6 +87,7 @@ printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", end, e820_table_start << PAGE_SHIFT, e820_table_top << PAGE_SHIFT); } +#endif /* !CONFIG_COOPERATIVE */ struct map_range { unsigned long start; @@ -146,6 +148,7 @@ use_gbpages = direct_gbpages; #endif +#ifndef CONFIG_COOPERATIVE /* Enable PSE if available */ if (cpu_has_pse) set_in_cr4(X86_CR4_PSE); @@ -160,6 +163,7 @@ page_size_mask |= 1 << PG_LEVEL_1G; if (use_pse) page_size_mask |= 1 << PG_LEVEL_2M; +#endif /* !CONFIG_COOPERATIVE */ memset(mr, 0, sizeof(mr)); nr_range = 0; @@ -256,6 +260,9 @@ (mr[i].page_size_mask & (1<> PAGE_SHIFT; } @@ -349,16 +357,20 @@ * create a kernel page fault: */ #ifdef CONFIG_DEBUG_PAGEALLOC +#ifndef CONFIG_COOPERATIVE printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n", begin, PAGE_ALIGN(end)); set_memory_np(begin, (end - begin) >> PAGE_SHIFT); +#endif /* !CONFIG_COOPERATIVE */ #else /* * We just marked the kernel text read only above, now that * we are going to free part of that, we need to make that * writeable first. */ +#ifndef CONFIG_COOPERATIVE set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); +#endif /* !CONFIG_COOPERATIVE */ printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); Index: linux-2.6.33-source/arch/x86/mm/pageattr.c =================================================================== --- linux-2.6.33-source.orig/arch/x86/mm/pageattr.c +++ linux-2.6.33-source/arch/x86/mm/pageattr.c @@ -1122,6 +1122,8 @@ } EXPORT_SYMBOL(set_memory_nx); +// FIXME: Make it usable. Flags must change on host too +#ifndef CONFIG_COOPERATIVE int set_memory_ro(unsigned long addr, int numpages) { return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0); @@ -1138,6 +1140,7 @@ { return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_PRESENT), 0); } +#endif /* !CONFIG_COOPERATIVE */ int set_memory_4k(unsigned long addr, int numpages) { @@ -1234,6 +1237,7 @@ } EXPORT_SYMBOL(set_pages_nx); +#ifndef CONFIG_COOPERATIVE int set_pages_ro(struct page *page, int numpages) { unsigned long addr = (unsigned long)page_address(page); @@ -1247,6 +1251,7 @@ return set_memory_rw(addr, numpages); } +#endif /* !CONFIG_COOPERATIVE */ #ifdef CONFIG_DEBUG_PAGEALLOC Index: linux-2.6.33-source/arch/x86/mm/ioremap.c =================================================================== --- linux-2.6.33-source.orig/arch/x86/mm/ioremap.c +++ linux-2.6.33-source/arch/x86/mm/ioremap.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -26,8 +27,10 @@ int page_is_ram(unsigned long pagenr) { +#ifndef CONFIG_COOPERATIVE resource_size_t addr, end; int i; +#endif /* * A special case is the first 4Kb of memory; @@ -45,6 +48,7 @@ pagenr < (BIOS_END >> PAGE_SHIFT)) return 0; +#ifndef CONFIG_COOPERATIVE for (i = 0; i < e820.nr_map; i++) { /* * Not usable memory: @@ -58,6 +62,7 @@ if ((pagenr >= addr) && (pagenr < end)) return 1; } +#endif /* !CONFIG_COOPERATIVE */ return 0; } @@ -87,6 +92,7 @@ return err; } +#ifndef CONFIG_COOPERATIVE /* * Remap an arbitrary physical address space into the kernel virtual * address space. Needed when the kernel wants to access high addresses @@ -216,6 +222,7 @@ free_memtype(phys_addr, phys_addr + size); return NULL; } +#endif /* !CONFIG_COOPERATIVE */ /** * ioremap_nocache - map bus memory into CPU space @@ -240,6 +247,11 @@ */ void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size) { +#ifdef CONFIG_COOPERATIVE + panic("ioremap_nocache %zu:%lu\n", phys_addr, size); + return NULL; +#else /* CONFIG_COOPERATIVE */ + /* * Ideally, this should be: * pat_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS; @@ -251,6 +263,7 @@ return __ioremap_caller(phys_addr, size, val, __builtin_return_address(0)); +#endif /* CONFIG_COOPERATIVE */ } EXPORT_SYMBOL(ioremap_nocache); @@ -266,26 +279,41 @@ */ void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size) { +#ifdef CONFIG_COOPERATIVE + panic("ioremap_wc %zu:%lu\n", phys_addr, size); + return NULL; +#else /* CONFIG_COOPERATIVE */ if (pat_enabled) return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC, __builtin_return_address(0)); else return ioremap_nocache(phys_addr, size); +#endif /* CONFIG_COOPERATIVE */ } EXPORT_SYMBOL(ioremap_wc); void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size) { +#ifdef CONFIG_COOPERATIVE + panic("ioremap_cache %zu:%lu\n", phys_addr, size); + return NULL; +#else /* CONFIG_COOPERATIVE */ return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WB, __builtin_return_address(0)); +#endif /* CONFIG_COOPERATIVE */ } EXPORT_SYMBOL(ioremap_cache); void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size, unsigned long prot_val) { +#ifdef CONFIG_COOPERATIVE + panic("ioremap_prot %zu:%lu\n", phys_addr, size); + return NULL; +#else /* CONFIG_COOPERATIVE */ return __ioremap_caller(phys_addr, size, (prot_val & _PAGE_CACHE_MASK), __builtin_return_address(0)); +#endif /* CONFIG_COOPERATIVE */ } EXPORT_SYMBOL(ioremap_prot); Index: linux-2.6.33-source/arch/x86/include/asm/bug.h =================================================================== --- linux-2.6.33-source.orig/arch/x86/include/asm/bug.h +++ linux-2.6.33-source/arch/x86/include/asm/bug.h @@ -4,6 +4,10 @@ #ifdef CONFIG_BUG #define HAVE_ARCH_BUG +#ifdef CONFIG_COOPERATIVE +extern void co_terminate_bug(int code, int line, const char *file); +#define BUG() do { co_terminate_bug(0, __LINE__, __FILE__); } while(0) +#else /* CONFIG_COOPERATIVE */ #ifdef CONFIG_DEBUG_BUGVERBOSE #ifdef CONFIG_X86_32 @@ -34,6 +38,7 @@ #endif #endif /* !CONFIG_BUG */ +#endif /* CONFIG_COOPERATIVE */ #include #endif /* _ASM_X86_BUG_H */ Index: linux-2.6.33-source/arch/x86/include/asm/cooperative.h =================================================================== --- /dev/null +++ linux-2.6.33-source/arch/x86/include/asm/cooperative.h @@ -0,0 +1,205 @@ +/* + * linux/include/asm/cooperative.h + * + * Copyright (C) 2004 Dan Aloni + * + * This file defines the lower level interfaces between the Cooperative Linux + * kernel and the host OS driver. It's for both external inclusion from the + * and internal inclusion in the kernel sources. + */ + +#ifndef __LINUX_ASM_COOPERATIVE_H__ +#define __LINUX_ASM_COOPERATIVE_H__ + +typedef struct { + unsigned short size; + struct x86_idt_entry *table; +} __attribute__((packed)) x86_idt_t; + +typedef struct { + unsigned short limit; + struct x86_dt_entry *base; +} __attribute__((packed)) x86_gdt_t; + +typedef struct { + unsigned char border2[0x4]; + + unsigned long cs; + #define CO_ARCH_STATE_STACK_CS "0x04" + + unsigned long ds; + #define CO_ARCH_STATE_STACK_DS "0x08" + + unsigned long es; + #define CO_ARCH_STATE_STACK_ES "0x0C" + + unsigned long cr3; + #define CO_ARCH_STATE_STACK_CR3 "0x10" + + unsigned long cr4; + #define CO_ARCH_STATE_STACK_CR4 "0x14" + + unsigned long cr2; + #define CO_ARCH_STATE_STACK_CR2 "0x18" + + unsigned long cr0; + #define CO_ARCH_STATE_STACK_CR0 "0x1C" + + x86_gdt_t gdt; + #define CO_ARCH_STATE_STACK_GDT "0x20" + + unsigned long fs; + #define CO_ARCH_STATE_STACK_FS "0x26" + + unsigned long gs; + #define CO_ARCH_STATE_STACK_GS "0x2A" + + unsigned short ldt; + #define CO_ARCH_STATE_STACK_LDT "0x2E" + + x86_idt_t idt; + #define CO_ARCH_STATE_STACK_IDT "0x30" + + unsigned short tr; + #define CO_ARCH_STATE_STACK_TR "0x36" + + unsigned long return_eip; + #define CO_ARCH_STATE_STACK_RETURN_EIP "0x38" + + unsigned long flags; + #define CO_ARCH_STATE_STACK_FLAGS "0x3C" + + unsigned long esp; + #define CO_ARCH_STATE_STACK_ESP "0x40" + + unsigned long ss; + #define CO_ARCH_STATE_STACK_SS "0x44" + + unsigned long dr0; + #define CO_ARCH_STATE_STACK_DR0 "0x48" + + unsigned long dr1; + #define CO_ARCH_STATE_STACK_DR1 "0x4C" + + unsigned long dr2; + #define CO_ARCH_STATE_STACK_DR2 "0x50" + + unsigned long dr3; + #define CO_ARCH_STATE_STACK_DR3 "0x54" + + unsigned long dr6; + #define CO_ARCH_STATE_STACK_DR6 "0x58" + + unsigned long dr7; + #define CO_ARCH_STATE_STACK_DR7 "0x5C" + + union { + unsigned long temp_cr3; + unsigned long other_map; + } __attribute__((packed)); + #define CO_ARCH_STATE_STACK_TEMP_CR3 "0x60" + #define CO_ARCH_STATE_STACK_OTHERMAP "0x60" + + unsigned long relocate_eip; + #define CO_ARCH_STATE_STACK_RELOCATE_EIP "0x64" + + unsigned long pad1; + #define CO_ARCH_STATE_STACK_RELOCATE_EIP_AFTER "0x68" + + unsigned long va; + #define CO_ARCH_STATE_STACK_VA "0x6C" + + unsigned long sysenter_cs; + #define CO_ARCH_STATE_SYSENTER_CS "0x70" + + unsigned long sysenter_esp; + #define CO_ARCH_STATE_SYSENTER_ESP "0x74" + + unsigned long sysenter_eip; + #define CO_ARCH_STATE_SYSENTER_EIP "0x78" +} __attribute__((packed)) co_arch_state_stack_t; + +#define CO_MAX_PARAM_SIZE 0x400 + +typedef struct co_arch_passage_page_normal_address_space { + unsigned long pgd[0x400]; + unsigned long pte[2][0x400]; +} co_arch_passage_page_normal_address_space_t; + +typedef struct co_arch_passage_page_pae_address_space { + unsigned long long main[0x200]; + unsigned long long pgd[2][0x200]; + unsigned long long pte[2][0x200]; +} co_arch_passage_page_pae_address_space_t; + +typedef struct co_arch_passage_page { + union { + struct { + union { + struct { + union { + unsigned long self_physical_address; + unsigned long temp_pgd_physical; + } __attribute__((packed)); + unsigned long dr0; + unsigned long dr1; + unsigned long dr2; + unsigned long dr3; + unsigned long dr6; + unsigned long dr7; + unsigned char code[0x260]; + } __attribute__((packed)); + unsigned char pad[0x280]; /* Be careful! see NOTE below */ + } __attribute__((packed)); + + /* Machine states */ + + /* + * NOTE: *_state fields must be aligned at 16 bytes boundary since + * the fxsave/fxload instructions expect an aligned arugment. + */ + + co_arch_state_stack_t host_state; + co_arch_state_stack_t linuxvm_state; + + /* Control parameters */ + unsigned long operation; + unsigned long params[]; + } __attribute__((packed)); + unsigned char first_page[0x1000]; + }; + + /* page tables for passage address spaces */ + union { + co_arch_passage_page_normal_address_space_t guest_normal; + co_arch_passage_page_normal_address_space_t temp_space; + } __attribute__((packed)); + union { + co_arch_passage_page_normal_address_space_t host_normal; + co_arch_passage_page_pae_address_space_t host_pae; + } __attribute__((packed)); +} co_arch_passage_page_t; + +/* + * Address space layout: + */ + +#define CO_VPTR_BASE (0xffc00000UL) +#define CO_VPTR_PHYSICAL_TO_PSEUDO_PFN_MAP (CO_VPTR_BASE - 0x1000000UL) +#define CO_VPTR_PSEUDO_RAM_PAGE_TABLES (CO_VPTR_BASE - 0x1100000UL) +#define CO_VPTR_PASSAGE_PAGE (CO_VPTR_BASE - 0x1101000UL) +#define CO_VPTR_IO_AREA_SIZE (0x10000UL) +#define CO_VPTR_IO_AREA_START (CO_VPTR_BASE - 0x1200000UL) +#define CO_VPTR_SELF_MAP (CO_VPTR_BASE - 0x1400000UL) + +#define CO_VPTR_BASE_START CO_VPTR_SELF_MAP +#define CO_VPTR_BASE_END CO_VPTR_BASE + +#define CO_LOWMEMORY_MAX_MB 984 + +typedef struct { + unsigned long kernel_cs; + unsigned long kernel_ds; +} __attribute__((packed)) co_arch_info_t; + +#endif Index: linux-2.6.33-source/arch/x86/include/asm/cooperative_internal.h =================================================================== --- /dev/null +++ linux-2.6.33-source/arch/x86/include/asm/cooperative_internal.h @@ -0,0 +1,53 @@ +/* + * linux/include/asm/cooperative_internal.h + * + * Copyright (C) 2004 Dan Aloni + */ + +#ifndef __LINUX_ASM_COOPERATIVE_INTERNAL_H__ +#define __LINUX_ASM_COOPERATIVE_INTERNAL_H__ + +#include + +#ifdef CONFIG_COOPERATIVE + +extern bool co_host_fpu_saved; +extern char co_host_fpu[0x200]; + +extern void co_kernel_breakpoint(struct pt_regs * regs); +extern int co_kernel_debug(struct pt_regs * regs, long error_code, unsigned int condition); + +#define CO_FPU_SAVE(x) \ +do \ +{ \ + if (cpu_has_fxsr) \ + asm("fxsave " #x " ; fnclex"); \ + else \ + asm("fnsave " #x " ; fwait"); \ +} \ +while (0) + +#define CO_FPU_RESTORE(x) \ +do \ +{ \ + if (cpu_has_fxsr) \ + asm("fxrstor " #x); \ + else \ + asm("frstor " #x); \ +} \ +while (0) + +#else + +static inline void co_kernel_breakpoint(struct pt_regs * regs) +{ +} + +static inline int co_kernel_debug(struct pt_regs * regs, long error_code, unsigned int condition) +{ + return 0; +} + +#endif + +#endif Index: linux-2.6.33-source/arch/x86/include/asm/dma.h =================================================================== --- linux-2.6.33-source.orig/arch/x86/include/asm/dma.h +++ linux-2.6.33-source/arch/x86/include/asm/dma.h @@ -287,6 +287,7 @@ * * Assumes DMA flip-flop is clear. */ +#ifndef CONFIG_COOPERATIVE static inline int get_dma_residue(unsigned int dmanr) { unsigned int io_port; @@ -301,6 +302,7 @@ return (dmanr <= 3) ? count : (count << 1); } +#endif /* These are in kernel/dma.c: */ Index: linux-2.6.33-source/arch/x86/include/asm/irq_vectors.h =================================================================== --- linux-2.6.33-source.orig/arch/x86/include/asm/irq_vectors.h +++ linux-2.6.33-source/arch/x86/include/asm/irq_vectors.h @@ -67,6 +67,18 @@ #define IRQ14_VECTOR (IRQ0_VECTOR + 14) #define IRQ15_VECTOR (IRQ0_VECTOR + 15) +#ifdef CONFIG_COOPERATIVE +#define TIMER_IRQ 0 +#define KEYBOARD_IRQ 1 +#define SERIAL_IRQ 3 +#define SOUND_IRQ 5 +#define POWER_IRQ 9 +#define NETWORK_IRQ 10 +#define SCSI_IRQ 11 +#define MOUSE_IRQ 12 +#define BLOCKDEV_IRQ 15 +#endif + /* * Special IRQ vectors used by the SMP architecture, 0xf0-0xff * @@ -157,7 +169,9 @@ #define CPU_VECTOR_LIMIT ( 8 * NR_CPUS ) #define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS ) -#ifdef CONFIG_X86_IO_APIC +#ifdef CONFIG_COOPERATIVE +# define NR_IRQS (NR_VECTORS - FIRST_EXTERNAL_VECTOR) +#elif defined(CONFIG_X86_IO_APIC) # ifdef CONFIG_SPARSE_IRQ # define NR_IRQS \ (CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ? \ Index: linux-2.6.33-source/arch/x86/include/asm/mc146818rtc.h =================================================================== --- linux-2.6.33-source.orig/arch/x86/include/asm/mc146818rtc.h +++ linux-2.6.33-source/arch/x86/include/asm/mc146818rtc.h @@ -87,6 +87,8 @@ #define current_lock_cmos_reg() 0 #endif +#ifndef CONFIG_COOPERATIVE + /* * The yet supported machines all access the RTC index register via * an ISA port access but the way to access the date register differs ... @@ -99,6 +101,11 @@ extern int mach_set_rtc_mmss(unsigned long nowtime); extern unsigned long mach_get_cmos_time(void); +#else +#define CMOS_READ(addr) (0) +#define CMOS_WRITE(val, addr) do {} while(0) +#endif + #define RTC_IRQ 8 #endif /* _ASM_X86_MC146818RTC_H */ Index: linux-2.6.33-source/arch/x86/include/asm/page.h =================================================================== --- linux-2.6.33-source.orig/arch/x86/include/asm/page.h +++ linux-2.6.33-source/arch/x86/include/asm/page.h @@ -14,6 +14,7 @@ #endif /* CONFIG_X86_64 */ #ifndef __ASSEMBLY__ +#include struct page; @@ -53,6 +54,20 @@ extern bool __virt_addr_valid(unsigned long kaddr); #define virt_addr_valid(kaddr) __virt_addr_valid((unsigned long) (kaddr)) +#ifdef CONFIG_COOPERATIVE +#define CO_PA(pfn) (((unsigned long *)CO_VPTR_PSEUDO_RAM_PAGE_TABLES)[pfn]) +#define CO_VA_PFN(pa) (((unsigned long *)CO_VPTR_PHYSICAL_TO_PSEUDO_PFN_MAP)[((pa) >> PAGE_SHIFT)]) +#define CO_PFN_PP_TO_P(pfn) (CO_PA(pfn) >> PAGE_SHIFT) +#define CO_PFN_P_TO_PP(pfn) (CO_VA_PFN(pfn << PAGE_SHIFT)) +#define CO_PP_TO_P(pa) ((CO_PFN_PP_TO_P(pa >> PAGE_SHIFT) << PAGE_SHIFT) | (pa & ~PAGE_MASK)) +#define CO_P_TO_PP(pa) ((CO_PFN_P_TO_PP(pa >> PAGE_SHIFT) << PAGE_SHIFT) | (pa & ~PAGE_MASK)) +#else +#define CO_PFN_P_TO_PP(pfn) pfn +#define CO_PFN_PP_TO_P(pfn) pfn +#define CO_PP_TO_P(pa) pa +#define CO_P_TO_PP(pa) pa +#endif + #endif /* __ASSEMBLY__ */ #include Index: linux-2.6.33-source/arch/x86/include/asm/pgtable-2level.h =================================================================== --- linux-2.6.33-source.orig/arch/x86/include/asm/pgtable-2level.h +++ linux-2.6.33-source/arch/x86/include/asm/pgtable-2level.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_PGTABLE_2LEVEL_H #define _ASM_X86_PGTABLE_2LEVEL_H +#include + #define pte_ERROR(e) \ printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, (e).pte_low) #define pgd_ERROR(e) \ Index: linux-2.6.33-source/arch/x86/include/asm/pgtable.h =================================================================== --- linux-2.6.33-source.orig/arch/x86/include/asm/pgtable.h +++ linux-2.6.33-source/arch/x86/include/asm/pgtable.h @@ -124,7 +124,7 @@ static inline unsigned long pte_pfn(pte_t pte) { - return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT; + return CO_PFN_P_TO_PP((pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT); } static inline unsigned long pmd_pfn(pmd_t pmd) @@ -230,13 +230,13 @@ static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { - return __pte(((phys_addr_t)page_nr << PAGE_SHIFT) | + return __pte(((phys_addr_t)CO_PFN_PP_TO_P(page_nr) << PAGE_SHIFT) | massage_pgprot(pgprot)); } static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) { - return __pmd(((phys_addr_t)page_nr << PAGE_SHIFT) | + return __pmd(((phys_addr_t)CO_PFN_PP_TO_P(page_nr) << PAGE_SHIFT) | massage_pgprot(pgprot)); } @@ -341,14 +341,14 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) { - return (unsigned long)__va(pmd_val(pmd) & PTE_PFN_MASK); + return (unsigned long)__va(CO_P_TO_PP(pmd_val(pmd)) & PTE_PFN_MASK); } /* * Currently stuck as a macro due to indirect forward reference to * linux/mmzone.h's __section_mem_map_addr() definition: */ -#define pmd_page(pmd) pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT) +#define pmd_page(pmd) pfn_to_page(CO_P_TO_PP(pmd_val(pmd)) >> PAGE_SHIFT) /* * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD] Index: linux-2.6.33-source/arch/x86/include/asm/processor.h =================================================================== --- linux-2.6.33-source.orig/arch/x86/include/asm/processor.h +++ linux-2.6.33-source/arch/x86/include/asm/processor.h @@ -243,6 +243,7 @@ } __attribute__((packed)) ____cacheline_aligned; #endif +#ifdef __KERNEL__ /* * IO-bitmap sizes: */ @@ -677,6 +678,7 @@ return eax; } +#endif /* __KERNEL__ */ static inline unsigned int cpuid_ebx(unsigned int op) { Index: linux-2.6.33-source/include/linux/cooperative.h =================================================================== --- /dev/null +++ linux-2.6.33-source/include/linux/cooperative.h @@ -0,0 +1,399 @@ +/* + * linux/include/linux/cooperative.h + * + * Copyright (C) 2004 Dan Aloni + * + * This file defines the interfaces between the Cooperative Linux kernel + * and the host OS driver. It's for both external inclusion from the + * and internal inclusion in the kernel sources. + */ + +#ifndef __LINUX_COOPERATIVE_H__ +#define __LINUX_COOPERATIVE_H__ + +#ifdef __KERNEL__ +#ifndef CO_KERNEL +#define CO_COLINUX_KERNEL +#define CO_KERNEL +#endif +#endif + +#include + +#define CO_LINUX_API_VERSION 14 + +#pragma pack(0) + +#define CO_BOOTPARAM_STRING_LENGTH 0x100 + +typedef enum { + CO_OPERATION_EMPTY=0, + CO_OPERATION_START, + CO_OPERATION_IDLE, + CO_OPERATION_TERMINATE, + CO_OPERATION_MESSAGE_TO_MONITOR, + CO_OPERATION_MESSAGE_FROM_MONITOR, + CO_OPERATION_FORWARD_INTERRUPT, + CO_OPERATION_DEVICE, + CO_OPERATION_GET_TIME, + CO_OPERATION_DEBUG_LINE, + CO_OPERATION_GET_HIGH_PREC_TIME, + CO_OPERATION_TRACE_POINT, + CO_OPERATION_FREE_PAGES, + CO_OPERATION_ALLOC_PAGES, + CO_OPERATION_PRINTK_unused, + CO_OPERATION_GETPP, + CO_OPERATION_MAX /* Must be last entry all times */ +} co_operation_t; + +#define CO_MODULE_MAX_CONET 16 +#define CO_MODULE_MAX_COBD 32 +#define CO_MODULE_MAX_COFS 32 +#define CO_MODULE_MAX_SERIAL 32 +#define CO_MODULE_MAX_COSCSI 32 +#define CO_MODULE_MAX_COVIDEO 2 +#define CO_MODULE_MAX_COAUDIO 2 + +typedef enum { + CO_MODULE_LINUX, + CO_MODULE_MONITOR, + CO_MODULE_DAEMON, + CO_MODULE_IDLE, + CO_MODULE_KERNEL_SWITCH, + CO_MODULE_USER_SWITCH, + CO_MODULE_CONSOLE, + CO_MODULE_PRINTK, + + CO_MODULE_CONET0, + CO_MODULE_CONET_END=CO_MODULE_CONET0+CO_MODULE_MAX_CONET-1, + + CO_MODULE_COBD0, + CO_MODULE_COBD_END=CO_MODULE_COBD0+CO_MODULE_MAX_COBD-1, + + CO_MODULE_COFS0, + CO_MODULE_COFS_END=CO_MODULE_COFS0+CO_MODULE_MAX_COFS-1, + + CO_MODULE_SERIAL0, + CO_MODULE_SERIAL_END=CO_MODULE_SERIAL0+CO_MODULE_MAX_SERIAL-1, + + CO_MODULE_COSCSI0, + CO_MODULE_COSCSI_END=CO_MODULE_COSCSI0+CO_MODULE_MAX_COSCSI-1, + + CO_MODULE_COVIDEO0, + CO_MODULE_COVIDEO_END=CO_MODULE_COVIDEO0+CO_MODULE_MAX_COVIDEO-1, + + CO_MODULE_COAUDIO0, + CO_MODULE_COAUDIO_END=CO_MODULE_COAUDIO0+CO_MODULE_MAX_COAUDIO-1, + CO_MODULES_MAX, +} co_module_t; + +typedef enum { + CO_PRIORITY_DISCARDABLE=0, + CO_PRIORITY_IMPORTANT, +} co_priority_t; + +typedef enum { + CO_MESSAGE_TYPE_STRING=0, + CO_MESSAGE_TYPE_OTHER=1, +} co_message_type_t; + +typedef struct { + co_module_t from; + co_module_t to; + co_priority_t priority; + co_message_type_t type; + unsigned long size; + unsigned char data[0]; +} __attribute__((packed)) co_message_t; + +typedef enum { + CO_DEVICE_BLOCK=0, + CO_DEVICE_CONSOLE, + CO_DEVICE_KEYBOARD, + CO_DEVICE_NETWORK, + CO_DEVICE_TIMER, + CO_DEVICE_POWER, + CO_DEVICE_SERIAL, + CO_DEVICE_FILESYSTEM, + + CO_DEVICE_MOUSE, + CO_DEVICE_SCSI, + CO_DEVICE_VIDEO, + CO_DEVICE_AUDIO, + CO_DEVICE_PCI, + + CO_DEVICES_TOTAL, +} co_device_t; + +typedef enum { + CO_KBD_SCANCODE_RAW=0, + CO_KBD_SCANCODE_ASCII +} mode_data_from_keyboard_t; + +typedef struct { + unsigned char code; + mode_data_from_keyboard_t mode; +} __attribute__((packed)) co_scan_code_t; + +#define CO_MOUSE_MAX_X 2048 +#define CO_MOUSE_MAX_Y 2048 + +typedef struct { + unsigned btns; + unsigned abs_x; + unsigned abs_y; + int rel_z; +} __attribute__((packed)) co_mouse_data_t; + +typedef enum { + CO_LINUX_MESSAGE_POWER_ALT_CTRL_DEL=0, + CO_LINUX_MESSAGE_POWER_SHUTDOWN, + CO_LINUX_MESSAGE_POWER_OFF, +} co_linux_message_power_type_t; + +typedef struct { + co_linux_message_power_type_t type; +} __attribute__((packed)) co_linux_message_power_t; + +typedef struct { + unsigned long tick_count; +} __attribute__((packed)) co_linux_message_idle_t; + +typedef struct { + co_device_t device; + unsigned int unit; + unsigned long size; + char data[]; +} __attribute__((packed)) co_linux_message_t; + +typedef enum { + CO_TERMINATE_END=0, + CO_TERMINATE_REBOOT, + CO_TERMINATE_POWEROFF, + CO_TERMINATE_PANIC, + CO_TERMINATE_HALT, + CO_TERMINATE_FORCED_OFF, + CO_TERMINATE_FORCED_END, + CO_TERMINATE_INVALID_OPERATION, + CO_TERMINATE_STACK_OVERFLOW, + CO_TERMINATE_BUG, + CO_TERMINATE_VMXE, +} co_termination_reason_t; + +#ifdef CO_KERNEL + +#ifndef asmlinkage +#define asmlinkage __attribute__((regparm(0))) +#endif + +typedef void asmlinkage (*co_switcher_t)(co_arch_passage_page_t *page, + void *from, + void *to); + +#define co_passage_page_func_low(_from_,_to_) \ + (((co_switcher_t)(co_passage_page->code)) \ + (co_passage_page, \ + &_from_.border2, \ + &_to_.border2)) + +# ifdef CO_COLINUX_KERNEL +# define co_passage_page ((co_arch_passage_page_t *)(CO_VPTR_PASSAGE_PAGE)) +# define co_current (co_passage_page->linuxvm_state) +# define co_other (co_passage_page->host_state) +# else +# define co_passage_page (cmon->passage_page) +# define co_other (co_passage_page->linuxvm_state) +# define co_current (co_passage_page->host_state) +# endif + +# define co_switch() co_passage_page_func_low(co_current, co_other) + +#endif /* CO_KERNEL */ + +/* + * Defines operations on various virtual devices. + */ + +typedef enum { + CO_OPERATION_CONSOLE_STARTUP=0, + CO_OPERATION_CONSOLE_INIT=1, + CO_OPERATION_CONSOLE_DEINIT, + CO_OPERATION_CONSOLE_CLEAR, + CO_OPERATION_CONSOLE_PUTC, + CO_OPERATION_CONSOLE_PUTCS, + CO_OPERATION_CONSOLE_CURSOR_DRAW, + CO_OPERATION_CONSOLE_CURSOR_ERASE, + CO_OPERATION_CONSOLE_CURSOR_MOVE, + CO_OPERATION_CONSOLE_SCROLL_UP, + CO_OPERATION_CONSOLE_SCROLL_DOWN, + CO_OPERATION_CONSOLE_BMOVE, + CO_OPERATION_CONSOLE_SWITCH, + CO_OPERATION_CONSOLE_BLANK, + CO_OPERATION_CONSOLE_FONT_OP, + CO_OPERATION_CONSOLE_SET_PALETTE, + CO_OPERATION_CONSOLE_SCROLLDELTA, + CO_OPERATION_CONSOLE_SET_ORIGIN, + CO_OPERATION_CONSOLE_SAVE_SCREEN, + CO_OPERATION_CONSOLE_INVERT_REGION, + CO_OPERATION_CONSOLE_CONFIG, + CO_OPERATION_CONSOLE_INIT_SCROLLBUFFER, +} co_operation_console_t; + + +typedef char co_console_code; +typedef unsigned short co_console_character; +typedef unsigned short co_console_unit; + +typedef struct { + co_console_unit x; + co_console_unit y; + co_console_unit height; +} __attribute__((packed)) co_cursor_pos_t; + +typedef struct { + co_operation_console_t type; + union { + struct { + co_console_unit top; + co_console_unit bottom; + co_console_unit lines; + co_console_character charattr; + } scroll; + struct { + co_console_unit y; + co_console_unit x; + co_console_unit count; + co_console_character data[]; + } putcs; + struct { + co_console_unit x; + co_console_unit y; + co_console_character charattr; + } putc; + struct { + co_console_unit top; + co_console_unit left; + co_console_unit bottom; + co_console_unit right; + co_console_character charattr; + } clear; + struct { + co_console_unit y; + co_console_unit x; + co_console_unit count; + } invert; + struct { + co_console_unit row; + co_console_unit column; + co_console_unit top; + co_console_unit left; + co_console_unit bottom; + co_console_unit right; + } bmove; + struct { + co_console_unit rows; + co_console_unit cols; + co_console_unit attr; + } config; + co_cursor_pos_t cursor; + }; +} __attribute__((packed)) co_console_message_t; + +typedef struct { + unsigned long messages_waiting; + unsigned char buffer[]; +} co_io_buffer_t; + +typedef struct { + unsigned long index; + unsigned long flags; + unsigned long func; + unsigned long pid; +} __attribute__((packed)) co_trace_point_info_t; + +typedef enum { + CO_BLOCK_OPEN=0, + CO_BLOCK_STAT, + CO_BLOCK_READ, + CO_BLOCK_WRITE, + CO_BLOCK_CLOSE, + CO_BLOCK_GET_ALIAS, +} co_block_request_type_t; + +typedef enum { + CO_BLOCK_REQUEST_RETCODE_OK=0, + CO_BLOCK_REQUEST_RETCODE_ERROR=-1, +} co_block_request_retcode_t; + +typedef enum { + CO_NETWORK_GET_MAC=0, +} co_network_request_type_t; + +#ifdef CO_KERNEL +/* If we are compiling kernel code (Linux or Host Driver) */ +# ifdef CO_COLINUX_KERNEL +/* Inside Linux, vm_ptr_t considered a valid pointer in its virtual address space */ +typedef void *vm_ptr_t; +# else +/* But inside the host, the type is considered not to be a pointer in its own address space */ +typedef unsigned long vm_ptr_t; +# endif + +typedef struct { + co_block_request_type_t type; + long rc; + union { + struct { + unsigned long long offset; + unsigned long long size; + unsigned long long disk_size; + vm_ptr_t address; + void * irq_request; + int async; + }; + struct { + char alias[20]; + }; + }; +} __attribute__((packed)) co_block_request_t; + +typedef struct { + void * irq_request; + int uptodate; +} __attribute__((packed)) co_block_intr_t; + +typedef struct { + co_network_request_type_t type; + unsigned int unit; + char mac_address[6]; + char _pad[2]; + int result; +} __attribute__((packed)) co_network_request_t; + +#endif /* CO_KERNEL */ + +typedef struct { + unsigned long api_version; + unsigned long compiler_major; + unsigned long compiler_minor; + unsigned long compiler_abi; +} __attribute__((packed)) co_info_t; + +typedef struct { + unsigned long co_core_end; + unsigned long co_memory_size; + void *co_initrd; + unsigned long co_initrd_size; + unsigned long co_cpu_khz; + unsigned long filler[5]; // compatible old api: empty 5,6,7,8,9 + char co_boot_parameters[CO_BOOTPARAM_STRING_LENGTH]; // params[10] +} __attribute__((packed)) co_boot_params_t; + +#ifndef COLINUX_TRACE +#define CO_TRACE_STOP +#define CO_TRACE_CONTINUE +#endif + +#pragma pack() + +#endif Index: linux-2.6.33-source/include/linux/cooperative_internal.h =================================================================== --- /dev/null +++ linux-2.6.33-source/include/linux/cooperative_internal.h @@ -0,0 +1,143 @@ +/* + * linux/include/linux/cooperative_internal.h + * + * Copyright (C) 2004 Dan Aloni + * + * This header gathers the functions and variables in Cooperative Mode + * when CONFIG_COOPERATIVE is defined. + */ +#ifndef __LINUX_COOPERATIVE_LINUX_H__ +#define __LINUX_COOPERATIVE_LINUX_H__ + +#include +#include +#include + +#ifdef CONFIG_COOPERATIVE + +#define ENABLE_PASSAGE_HOLDING_CHECK + +typedef struct { + struct list_head node; + co_message_t msg; +} co_message_node_t; + +extern co_boot_params_t co_boot_params; +#ifdef ENABLE_PASSAGE_HOLDING_CHECK +extern int co_passage_page_holding_count; +#endif + +#ifdef CONFIG_COLINUX_STATS +typedef struct co_proc_counts { + unsigned long switches[CO_OPERATION_MAX]; +} co_proc_counts_t; + +extern co_proc_counts_t co_proc_counts; +#endif + +#define co_io_buffer ((co_io_buffer_t *)CO_VPTR_IO_AREA_START) +#define cooperative_mode_enabled() 1 + +extern void co_debug(const char *fmt, ...) + __attribute__ ((format (printf, 1, 2))); +extern void co_printk(const char *line, int size); + +extern void co_switch_wrapper(void); +extern void co_callback(struct pt_regs *regs); +extern void co_idle_processor(void); +NORET_TYPE void co_terminate(co_termination_reason_t reason) ATTRIB_NORET; +NORET_TYPE void co_terminate_panic(const char *text, int len) ATTRIB_NORET; +NORET_TYPE void co_terminate_bug(int code, int line, const char *file) ATTRIB_NORET; +extern void co_free_pages(unsigned long vaddr, int pages); +extern int co_alloc_pages(unsigned long vaddr, int pages); +extern void co_start_kernel(void); +extern void co_arch_start_kernel(void); + +extern void co_send_message(co_module_t from, + co_module_t to, + co_priority_t priority, + co_message_type_t type, + unsigned long size, + const char *data); + +extern int co_get_message(co_message_node_t **message, co_device_t device); +static inline void co_free_message(co_message_node_t *message) +{ + kfree(message); +} + +extern void *co_map_buffer(void *, int); + +static inline void co_passage_page_ref_up(void) +{ +#ifdef ENABLE_PASSAGE_HOLDING_CHECK + co_passage_page_holding_count++; +#endif +} + +static inline void co_passage_page_ref_down(void) +{ +#ifdef ENABLE_PASSAGE_HOLDING_CHECK + co_passage_page_holding_count--; +#endif +} + +static inline int co_passage_page_held(void) +{ +#ifdef ENABLE_PASSAGE_HOLDING_CHECK + return co_passage_page_holding_count; +#else + return 0; +#endif +} + +static inline void co_passage_page_acquire(unsigned long *flags) +{ + local_irq_save(*flags); + co_passage_page_ref_up(); +} + +static inline void co_passage_page_release(unsigned long flags) +{ + co_passage_page_ref_down(); + local_irq_restore(flags); +} + +#ifdef ENABLE_PASSAGE_HOLDING_CHECK +#define co_passage_page_assert_valid() do { \ + BUG_ON(co_passage_page_held()); \ +} while (0) +#else +#define co_passage_page_assert_valid() /* nothing */ +#endif + +static inline co_message_t *co_send_message_save(unsigned long *flags) +{ + co_passage_page_assert_valid(); + co_passage_page_acquire(flags); + + if (co_io_buffer->messages_waiting) { + co_passage_page_release(*flags); + return NULL; + } + + co_passage_page->operation = CO_OPERATION_MESSAGE_TO_MONITOR; + co_io_buffer->messages_waiting = 1; + return ((co_message_t *)co_io_buffer->buffer); +} + +static inline void co_send_message_restore(unsigned long flags) +{ + co_switch_wrapper(); + co_passage_page_release(flags); +} + +#else + +#define co_printk(line, size) do {} while (0) +#define co_terminate(reason) do {} while (0) +#define cooperative_mode_enabled() 0 + +#endif + +#endif Index: linux-2.6.33-source/kernel/Makefile =================================================================== --- linux-2.6.33-source.orig/kernel/Makefile +++ linux-2.6.33-source/kernel/Makefile @@ -85,6 +85,7 @@ obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o obj-$(CONFIG_TINY_RCU) += rcutiny.o obj-$(CONFIG_RELAY) += relay.o +obj-$(CONFIG_COOPERATIVE) += cooperative.o obj-$(CONFIG_SYSCTL) += utsname_sysctl.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o Index: linux-2.6.33-source/kernel/cooperative.c =================================================================== --- /dev/null +++ linux-2.6.33-source/kernel/cooperative.c @@ -0,0 +1,442 @@ +/* + * linux/kernel/cooperative.c + * + * Cooperative mode (coLinux) support routines. + * + * Dan Aloni , 2003-2004 (C). + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +CO_TRACE_STOP; + +co_boot_params_t co_boot_params; + +typedef struct { + struct list_head list; + int num_messages; +} co_message_queue_t; + +int co_messages_active = 0; +co_message_queue_t co_outgoing_messages; +co_message_queue_t co_incoming_messages; +co_message_queue_t *co_incoming_queued_messages; + +#ifdef CONFIG_COLINUX_STATS +co_proc_counts_t co_proc_counts; +#endif + +void __init co_start_kernel(void) +{ + memcpy(&co_boot_params, co_passage_page->params, sizeof(co_boot_params)); + co_arch_start_kernel(); +} + +void co_send_message(co_module_t from, + co_module_t to, + co_priority_t priority, + co_message_type_t type, + unsigned long size, + const char *data) +{ + unsigned long flags; + co_message_t *buffer; + + BUG_ON((sizeof(co_message_t) + size) > CO_VPTR_IO_AREA_SIZE - sizeof(co_io_buffer_t)); + + buffer = co_send_message_save(&flags); + if (!buffer) + return; + + buffer->from = from; + buffer->to = to; + buffer->priority = priority; + buffer->type = type; + buffer->size = size; + memcpy(buffer->data, data, size); + + co_send_message_restore(flags); +} + +static void co_message_add_to_incoming(co_message_t *message, unsigned long size) +{ + co_message_node_t *message_copy; + + message_copy = kmalloc(size + sizeof(co_message_node_t) - sizeof(co_message_t), + GFP_ATOMIC); + if (!message_copy) + return; + + memcpy(&message_copy->msg, message, size); + list_add_tail(&message_copy->node, &co_incoming_messages.list); +} + +static void co_handle_jiffies(long count) +{ + if (count > HZ) { + long secs = count / HZ; + + /* 'warp_clock' for long distances */ + write_seqlock(&xtime_lock); + xtime.tv_sec += secs; + count -= (secs * HZ); + update_xtime_cache(0); + write_sequnlock(&xtime_lock); + clock_was_set(); + } + + while (count > 0) { + irq_enter(); + __do_IRQ(TIMER_IRQ); + irq_exit(); + count--; + } +} + +/* called with disabled interrupts */ +static void co_handle_incoming_message(co_message_node_t *node_message) +{ + co_linux_message_t *message; + co_message_queue_t *queue; + int irq; + + message = (co_linux_message_t *)&node_message->msg.data; + switch (message->device) { + case CO_DEVICE_POWER: irq = POWER_IRQ; break; + case CO_DEVICE_KEYBOARD: irq = KEYBOARD_IRQ; break; +#ifdef CONFIG_CONET_COOPERATIVE + case CO_DEVICE_NETWORK: irq = NETWORK_IRQ; break; +#endif +#ifdef CONFIG_SERIAL_COOPERATIVE + case CO_DEVICE_SERIAL: irq = SERIAL_IRQ; break; +#endif + case CO_DEVICE_SCSI: irq = SCSI_IRQ; break; + case CO_DEVICE_MOUSE: irq = MOUSE_IRQ; break; + case CO_DEVICE_BLOCK: irq = BLOCKDEV_IRQ; break; + default: + BUG_ON((unsigned long)message->device >= (unsigned long)CO_DEVICES_TOTAL); + co_free_message(node_message); + return; + } + + /* Add to the queue */ + queue = &co_incoming_queued_messages[message->device]; + list_add(&node_message->node, &queue->list); + queue->num_messages++; + + irq_enter(); + __do_IRQ(irq); + irq_exit(); +} + +static void co_handle_incoming_messages(void) +{ + if (!co_messages_active) + return; + + /* + * Pop a message from the incoming queue. + */ + while (!list_empty(&co_incoming_messages.list)) { + co_message_node_t *message; + + message = list_entry(co_incoming_messages.list.next, + co_message_node_t, node); + BUG_ON((unsigned long)message->msg.from >= (unsigned long)CO_MODULES_MAX); + BUG_ON((unsigned long)message->msg.to >= (unsigned long)CO_MODULES_MAX); + list_del(&message->node); + + /* + * Let the interrupt routine of the arch dependant code + * handle the message, and be responsible to free it. + */ + co_handle_incoming_message(message); + } +} + +void co_callback(struct pt_regs *regs) +{ + long io_size; + unsigned long new_jiffies; + struct pt_regs null_regs; + + BUG_ON(!irqs_disabled()); + if (co_passage_page->operation != CO_OPERATION_MESSAGE_FROM_MONITOR) { + co_passage_page_ref_down(); + return; + } + +#ifdef CONFIG_COLINUX_STATS + co_proc_counts.switches[CO_OPERATION_MESSAGE_FROM_MONITOR]++; +#endif + io_size = co_passage_page->params[0]; + new_jiffies = co_passage_page->params[1]; + + if (co_messages_active && io_size > 0 && io_size <= CO_VPTR_IO_AREA_SIZE) { + static unsigned char temp_storage[CO_VPTR_IO_AREA_SIZE]; + unsigned char *io_buffer = temp_storage; + unsigned char *io_buffer_end = &temp_storage[io_size]; + + /* Copy into temp, because kmalloc calls host to mapping pages */ + memcpy(temp_storage, co_io_buffer->buffer, io_size); + co_io_buffer->messages_waiting = 0; + co_passage_page_ref_down(); + + while (io_buffer < io_buffer_end) { + co_message_t *message = (co_message_t *)io_buffer; + co_linux_message_t *linux_message = (co_linux_message_t *)message->data; + unsigned long size = message->size + sizeof(*message); + + BUG_ON((unsigned long)message->from >= (unsigned long)CO_MODULES_MAX); + BUG_ON((unsigned long)message->to >= (unsigned long)CO_MODULES_MAX); + BUG_ON((unsigned long)linux_message->device >= (unsigned long)CO_DEVICES_TOTAL); + + co_message_add_to_incoming(message, size); + io_buffer += size; + } + } else { + co_io_buffer->messages_waiting = 0; + co_passage_page_ref_down(); + } + + memset (&null_regs, 0, sizeof(null_regs)); + + /* Have only, if from proxy_interrupt_handler(), needs for user_mode() */ + if (regs) + null_regs.cs = regs->cs; + set_irq_regs(&null_regs); + + co_handle_jiffies(new_jiffies); + co_handle_incoming_messages(); +} + +void co_idle_processor(void) +{ + co_passage_page_assert_valid(); + local_irq_disable(); + co_passage_page_ref_up(); + co_passage_page->operation = CO_OPERATION_IDLE; + co_switch_wrapper(); + co_callback(NULL); + local_irq_enable(); +} + +void co_printk(const char *line, int size) +{ + unsigned long flags; + co_message_t *co_message; + + co_message = co_send_message_save(&flags); + if (co_message) { + co_message->from = CO_MODULE_LINUX; + co_message->to = CO_MODULE_PRINTK; + co_message->priority = CO_PRIORITY_DISCARDABLE; + co_message->type = CO_MESSAGE_TYPE_STRING; + if (size > 200) + size = 200; + co_message->size = size + 1; + memcpy(co_message->data, line, size); + co_message->data[size] = '\0'; + co_send_message_restore(flags); + } +} + +NORET_TYPE void co_terminate_panic(const char *text, int len) +{ + unsigned long flags; + + co_passage_page_acquire(&flags); + co_passage_page->operation = CO_OPERATION_TERMINATE; + co_passage_page->params[0] = CO_TERMINATE_PANIC; + co_passage_page->params[1] = 0; + co_passage_page->params[2] = 0; + co_passage_page->params[3] = len; + memcpy((char *)&co_passage_page->params[4], text, len+1); + co_switch_wrapper(); + while(1); +} + +NORET_TYPE void co_terminate_bug(int code, int line, const char *file) +{ + unsigned long flags; + + co_passage_page_acquire(&flags); + co_passage_page->operation = CO_OPERATION_TERMINATE; + co_passage_page->params[0] = CO_TERMINATE_BUG; + co_passage_page->params[1] = code; + co_passage_page->params[2] = line; + co_passage_page->params[3] = strlen(file); + strcpy((char *)&co_passage_page->params[4], file); + co_switch_wrapper(); + while(1); +} +NORET_TYPE EXPORT_SYMBOL(co_terminate_bug); + +void co_terminate(co_termination_reason_t reason) +{ + unsigned long flags; + + co_passage_page_acquire(&flags); + co_passage_page->operation = CO_OPERATION_TERMINATE; + co_passage_page->params[0] = reason; + co_passage_page->params[3] = 0; /* len */ + co_switch_wrapper(); + while(1); +} +EXPORT_SYMBOL(co_terminate); + +int co_get_message(co_message_node_t **message, co_device_t device) +{ + co_message_queue_t *queue; + co_message_node_t *node; + unsigned long flags; + + if (!co_messages_active) + return 0; + + local_irq_save(flags); + queue = &co_incoming_queued_messages[device]; + if (list_empty(&queue->list)) { + local_irq_restore(flags); + return 0; + } + + node = list_entry(queue->list.prev, co_message_node_t, node); + list_del(&node->node); + queue->num_messages--; + local_irq_restore(flags); + + *message = node; + return 1; +} + +co_info_t co_info = { + .api_version = CO_LINUX_API_VERSION, + .compiler_major = __GNUC__, + .compiler_minor = __GNUC_MINOR__, + .compiler_abi = __GXX_ABI_VERSION, +}; + +static int __init initcall_message_queues(void) +{ + int queue_index; + + INIT_LIST_HEAD(&co_outgoing_messages.list); + INIT_LIST_HEAD(&co_incoming_messages.list); + + co_incoming_queued_messages = + kmalloc(sizeof(co_message_queue_t) * CO_DEVICES_TOTAL, GFP_KERNEL); + if (!co_incoming_queued_messages) + panic("unable to allocate message queues\n"); + + for (queue_index=0; queue_index < CO_DEVICES_TOTAL; queue_index++) { + co_message_queue_t *queue = &co_incoming_queued_messages[queue_index]; + queue->num_messages = 0; + INIT_LIST_HEAD(&queue->list); + } + + co_messages_active = 1; + + return 0; +} + + +void co_free_pages(unsigned long vaddr, int pages) +{ + unsigned long flags; + + co_passage_page_acquire(&flags); + co_passage_page->operation = CO_OPERATION_FREE_PAGES; + co_passage_page->params[0] = vaddr; + co_passage_page->params[1] = pages; + co_switch_wrapper(); + co_passage_page_release(flags); +} + +int co_alloc_pages(unsigned long vaddr, int size) +{ + unsigned long flags; + long result; + + co_passage_page_acquire(&flags); + co_passage_page->operation = CO_OPERATION_ALLOC_PAGES; + co_passage_page->params[0] = vaddr; + co_passage_page->params[1] = size; + co_switch_wrapper(); + result = (long)co_passage_page->params[4]; + co_passage_page_release(flags); + + if (result < 0) + return -ENOMEM; + + return 0; +} + +__initcall(initcall_message_queues); + +#ifdef CONFIG_COLINUX_STATS +static int co_guest_read_status(char *page, char **start, off_t off, int count, int *eof, void *clear) +{ + co_proc_counts_t hold; + int len; + + local_irq_disable(); + hold = co_proc_counts; + if (clear) + memset(&co_proc_counts, 0, sizeof(co_proc_counts)); + local_irq_enable(); + + len = sprintf(page, "idle:\t%lu\n" + "msgto:\t%lu\n" + "mfrom:\t%lu\n" + "intr:\t%lu\n" + "dev:\t%lu\n" + "time:\t%lu\n" + "hpt:\t%lu\n" + "free:\t%lu\n" + "alloc:\t%lu\n" + "getpp:\t%lu\n", + hold.switches[CO_OPERATION_IDLE], + hold.switches[CO_OPERATION_MESSAGE_TO_MONITOR], + hold.switches[CO_OPERATION_MESSAGE_FROM_MONITOR], + hold.switches[CO_OPERATION_FORWARD_INTERRUPT], + hold.switches[CO_OPERATION_DEVICE], + hold.switches[CO_OPERATION_GET_TIME], + hold.switches[CO_OPERATION_GET_HIGH_PREC_TIME], + hold.switches[CO_OPERATION_FREE_PAGES], + hold.switches[CO_OPERATION_ALLOC_PAGES], + hold.switches[CO_OPERATION_GETPP]); + + return len; +} + +static __init int co_create_proc_stats(void) +{ + struct proc_dir_entry *co_guest_dir; + + co_guest_dir = proc_mkdir("colinux", NULL); + if(co_guest_dir) { + create_proc_read_entry("stats", + 0444, co_guest_dir, + co_guest_read_status, NULL); + create_proc_read_entry("stats_clear", + 0444, co_guest_dir, + co_guest_read_status, (void*)1); + } + + return 0; +} + +__initcall(co_create_proc_stats); +#endif /* CONFIG_COLINUX_STATS */ + +CO_TRACE_CONTINUE; Index: linux-2.6.33-source/kernel/panic.c =================================================================== --- linux-2.6.33-source.orig/kernel/panic.c +++ linux-2.6.33-source/kernel/panic.c @@ -23,6 +23,7 @@ #include #include #include +#include int panic_on_oops; static unsigned long tainted_mask; @@ -68,7 +69,7 @@ bust_spinlocks(1); va_start(args, fmt); - vsnprintf(buf, sizeof(buf), fmt, args); + i = vsnprintf(buf, sizeof(buf), fmt, args); va_end(args); printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf); #ifdef CONFIG_DEBUG_BUGVERBOSE @@ -84,6 +85,9 @@ kmsg_dump(KMSG_DUMP_PANIC); + if (cooperative_mode_enabled()) + co_terminate_panic(buf, i); + /* * Note smp_send_stop is the usual smp shutdown function, which * unfortunately means it may not be hardened to work in a panic Index: linux-2.6.33-source/kernel/printk.c =================================================================== --- linux-2.6.33-source.orig/kernel/printk.c +++ linux-2.6.33-source/kernel/printk.c @@ -51,6 +51,8 @@ { } +#include + #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) /* printk's without a loglevel use this.. */ @@ -740,6 +742,8 @@ } } + co_printk(printk_buf, printed_len); + /* * Copy the output into log_buf. If the caller didn't provide * appropriate log level tags, we insert them here Index: linux-2.6.33-source/mm/bootmem.c =================================================================== --- linux-2.6.33-source.orig/mm/bootmem.c +++ linux-2.6.33-source/mm/bootmem.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -544,6 +545,21 @@ region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) + start_off); + + if (cooperative_mode_enabled()) { + unsigned long alloc_address = (unsigned long)region; + unsigned long alloc_size = size; + + alloc_size += (alloc_address & (~PAGE_MASK)); + alloc_address &= PAGE_MASK; + alloc_size = (alloc_size + PAGE_SIZE - 1) >> PAGE_SHIFT; + + if (co_alloc_pages(alloc_address, alloc_size)) { + free_bootmem((unsigned long)region, size); + return NULL; + } + } + memset(region, 0, size); /* * The min_count is set to 0 so that bootmem allocated blocks Index: linux-2.6.33-source/mm/page_alloc.c =================================================================== --- linux-2.6.33-source.orig/mm/page_alloc.c +++ linux-2.6.33-source/mm/page_alloc.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -338,6 +339,34 @@ return bad; } +#ifdef CONFIG_COOPERATIVE +static int co_persistent_alloc_pages(unsigned long address, int size) +{ + int result, retries_left; + + for (retries_left = 10; retries_left > 0; retries_left--) { + result = co_alloc_pages(address, size); + if (result) { + unsigned long cache_size; + /* + * Whoops, we have allocated too much of the + * host OS's memory, time to free some cache. + */ + cache_size = global_page_state(NR_FILE_PAGES)-total_swapcache_pages; + cache_size /= 2; + if (cache_size < size*2) + cache_size = size*2; + shrink_all_memory(cache_size); + } else { + return 0; + } + } + + WARN_ON(result != 0); + return result; +} +#endif /* CONFIG_COOPERATIVE */ + static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags) { int i; @@ -453,6 +482,11 @@ { unsigned long page_idx; +#ifdef CONFIG_COOPERATIVE + co_free_pages((unsigned long)page_address(page), 1 << order); + ClearPageCoHostMapped(page); +#endif /* CONFIG_COOPERATIVE */ + if (unlikely(PageCompound(page))) if (unlikely(destroy_compound_page(page, order))) return; @@ -692,6 +726,12 @@ return 1; } +#ifdef CONFIG_COOPERATIVE + if (!TestSetPageCoHostMapped(page)) + if (co_persistent_alloc_pages((unsigned long)page_address(page), 1 << order)) + return 1; +#endif /* CONFIG_COOPERATIVE */ + set_page_private(page, 0); set_page_refcounted(page); @@ -1237,6 +1277,7 @@ VM_BUG_ON(bad_range(zone, page)); if (prep_new_page(page, order, gfp_flags)) goto again; + return page; failed: @@ -1918,7 +1959,7 @@ dump_stack(); show_mem(); } - return page; + return NULL; got_pg: if (kmemcheck_enabled) kmemcheck_pagealloc_alloc(page, order, gfp_mask); Index: linux-2.6.33-source/mm/vmscan.c =================================================================== --- linux-2.6.33-source.orig/mm/vmscan.c +++ linux-2.6.33-source/mm/vmscan.c @@ -2332,7 +2332,7 @@ return nr; } -#ifdef CONFIG_HIBERNATION +#if defined(CONFIG_HIBERNATION) || defined(CONFIG_COOPERATIVE) /* * Try to free `nr_to_reclaim' of memory, system-wide, and return the number of * freed pages. Index: linux-2.6.33-source/drivers/char/mem.c =================================================================== --- linux-2.6.33-source.orig/drivers/char/mem.c +++ linux-2.6.33-source/drivers/char/mem.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -130,6 +131,9 @@ ssize_t read, sz; char *ptr; + if (cooperative_mode_enabled()) + return -ENOMEM; + if (!valid_phys_addr_range(p, count)) return -EFAULT; read = 0; @@ -188,6 +192,9 @@ unsigned long copied; void *ptr; + if (cooperative_mode_enabled()) + return -ENOMEM; + if (!valid_phys_addr_range(p, count)) return -EFAULT; @@ -298,6 +305,9 @@ { size_t size = vma->vm_end - vma->vm_start; + if (cooperative_mode_enabled()) + return -EFAULT; + if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size)) return -EINVAL; @@ -333,6 +343,9 @@ { unsigned long pfn; + if (cooperative_mode_enabled()) + return -EFAULT; + /* Turn a kernel-virtual address into a physical page frame */ pfn = __pa((u64)vma->vm_pgoff << PAGE_SHIFT) >> PAGE_SHIFT; @@ -397,6 +410,9 @@ char * kbuf; /* k-addr because vread() takes vmlist_lock rwlock */ int err = 0; + if (cooperative_mode_enabled()) + return -ENOMEM; + read = 0; if (p < (unsigned long) high_memory) { low_count = count; @@ -527,6 +543,9 @@ char * kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */ int err = 0; + if (cooperative_mode_enabled()) + return -ENOMEM; + if (p < (unsigned long) high_memory) { unsigned long to_write = min_t(unsigned long, count, (unsigned long)high_memory - p); Index: linux-2.6.33-source/arch/x86/kernel/cpu/bugs.c =================================================================== --- linux-2.6.33-source.orig/arch/x86/kernel/cpu/bugs.c +++ linux-2.6.33-source/arch/x86/kernel/cpu/bugs.c @@ -25,6 +25,8 @@ __setup("no-hlt", no_halt); +// Ignore, if have no emulation compiled in +#ifdef CONFIG_MATH_EMULATION static int __init no_387(char *s) { boot_cpu_data.hard_math = 0; @@ -33,6 +35,7 @@ } __setup("no387", no_387); +#endif static double __initdata x = 4195835.0; static double __initdata y = 3145727.0; Index: linux-2.6.33-source/arch/x86/kernel/reboot_cooperative.c =================================================================== --- /dev/null +++ linux-2.6.33-source/arch/x86/kernel/reboot_cooperative.c @@ -0,0 +1,133 @@ +/* + * linux/arch/x86/kernel/reboot_cooperative.c + */ + +#include +#include +#include +#include +#include +#include + +/* sys_reboot needs this dummy. */ +void (*pm_power_off)(void); + +/* + * This is a hack to make ctrl_alt_del work as a poweroff, so the OS can be + * notified, unmount and sync everything. + */ +static int co_powering_off; + +static int co_powerd(void *__unused) +{ + extern char * envp_init[]; + static char *const argv_shutdown[]= { "/sbin/shutdown", "-h", "now", NULL }; + static char *const argv_halt[]= { "/sbin/halt", NULL }; + + if (kernel_execve(argv_shutdown[0], argv_shutdown, envp_init) >= 0) + return 0; + printk(KERN_INFO "%s spawn failed\n", argv_shutdown[0]); + + if (kernel_execve(argv_halt[0], argv_halt, envp_init) >= 0) + return 0; + printk(KERN_INFO "%s spawn failed\n", argv_halt[0]); + + printk(KERN_INFO "Fallback into reboot and power off\n"); + co_powering_off++; + ctrl_alt_del(); + + return 0; +} + +static void deferred_shutdown(struct work_struct *dummy) +{ + kthread_run(co_powerd, NULL, "copowerd"); +} + +/* + * This function will call from interrupt context. + */ +static void co_shedule_shutdown(void) +{ + static DECLARE_WORK(shutdown_work, deferred_shutdown); + + schedule_work(&shutdown_work); +} + + +static void co_machine_restart(void) +{ + co_terminate(CO_TERMINATE_REBOOT); +} + +void machine_power_off(void) +{ + co_terminate(CO_TERMINATE_POWEROFF); +} + +void machine_shutdown(void) +{ + co_terminate(CO_TERMINATE_HALT); +} + +void machine_emergency_restart(void) +{ + co_machine_restart(); +} + +void machine_restart(char *cmd) +{ + if (co_powering_off) + machine_power_off(); + else + co_machine_restart(); +} + +void machine_halt(void) +{ + co_terminate(CO_TERMINATE_HALT); +} + +static irqreturn_t power_interrupt(int irq, void *dev_id) +{ + co_message_node_t *node_message; + + if (co_get_message(&node_message, CO_DEVICE_POWER)) { + co_linux_message_t *message; + co_linux_message_power_t *type; + + message = (co_linux_message_t *)&node_message->msg.data; + type = (co_linux_message_power_t *)message->data; + switch (type->type) { + case CO_LINUX_MESSAGE_POWER_ALT_CTRL_DEL: + ctrl_alt_del(); + break; + case CO_LINUX_MESSAGE_POWER_SHUTDOWN: + co_shedule_shutdown(); + break; + case CO_LINUX_MESSAGE_POWER_OFF: + machine_power_off(); + break; + default: + printk(KERN_ERR "power interrupt: buggy type %d\n", type->type); + } + co_free_message(node_message); + } + + return IRQ_HANDLED; +} + +static int __init co_power_init(void) +{ + int rc; + + rc = request_irq(POWER_IRQ, &power_interrupt, IRQF_SAMPLE_RANDOM, "power", NULL); + if (rc) { + printk(KERN_ERR "POWER: unable to get irq %d", POWER_IRQ); + return rc; + } + + return 0; +} + +__initcall(co_power_init); Index: linux-2.6.33-source/arch/x86/kernel/i8259_cooperative.c =================================================================== --- /dev/null +++ linux-2.6.33-source/arch/x86/kernel/i8259_cooperative.c @@ -0,0 +1,103 @@ +/* + * linux/arch/x86/kernel/i8259_cooperative.c + */ + +#include +#include +#include + +#include +#include + +CO_TRACE_STOP; + +/* Function must have the same prototype as do_IRQ() */ +unsigned int /*__irq_entry*/ proxy_interrupt_handler(struct pt_regs *regs) +{ + /* high bit used in ret_from_ code */ + unsigned vector = ~regs->orig_ax; + + co_passage_page_assert_valid(); + co_passage_page_ref_up(); + co_passage_page->operation = CO_OPERATION_FORWARD_INTERRUPT; + co_passage_page->params[0] = vector; /* IRQ vector */ + co_passage_page->host_state.flags &= ~(1 << 9); /* Turn IF off */ + co_switch_wrapper(); + co_callback(regs); + return 1; +} + +CO_TRACE_CONTINUE; + +/* + * Not like you have any other choice other than using + * COPIC in Cooperative mode. + */ + +static void end_COPIC_irq(unsigned int irq) +{ +} + +static void mask_and_ack_COPIC(unsigned int irq) +{ +} + +static unsigned int startup_COPIC_irq(unsigned int irq) +{ + return 0; +} + +#define shutdown_COPIC_irq disable_COPIC_irq + +static void disable_COPIC_irq(unsigned int irq) +{ +} + +static void enable_COPIC_irq(unsigned int irq) +{ +} + +static struct irq_chip co_pic_irq_chip = { + .name = "COPIC", + .startup = startup_COPIC_irq, + .shutdown = shutdown_COPIC_irq, + .enable = enable_COPIC_irq, + .disable = disable_COPIC_irq, + .mask_ack = mask_and_ack_COPIC, + .end = end_COPIC_irq, + .set_affinity = NULL +}; + +void __init init_ISA_irqs (void) +{ + int i; + + printk("Setting proxy interrupt vectors (0..%d)\n", NR_IRQS-1); + for (i = 0; i < NR_IRQS; i++) { + irq_desc[i].status = IRQ_DISABLED; + irq_desc[i].action = NULL; + irq_desc[i].depth = 1; + + set_irq_chip_and_handler_name(i, &co_pic_irq_chip, handle_simple_irq, "CO"); + } + +} + +void __init init_IRQ(void) +{ + int i; + + /* all the set up before the call gates are initialised */ + init_ISA_irqs(); + + /* + * Cover the whole vector space, no vector can escape us. + */ + for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { + int vector = FIRST_EXTERNAL_VECTOR + i; + if (i >= NR_IRQS) + break; + if (vector != SYSCALL_VECTOR) + set_intr_gate(vector, interrupt[i]); + } +} Index: linux-2.6.33-source/arch/x86/kernel/ioport_cooperative.c =================================================================== --- /dev/null +++ linux-2.6.33-source/arch/x86/kernel/ioport_cooperative.c @@ -0,0 +1,15 @@ +/* + * linux/arch/x86/kernel/ioport_cooperative.c + */ + +#include + +asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) +{ + return -EPERM; +} + +asmlinkage long sys_iopl(unsigned long unused) +{ + return -EPERM; +} Index: linux-2.6.33-source/lib/iomap.c =================================================================== --- linux-2.6.33-source.orig/lib/iomap.c +++ linux-2.6.33-source/lib/iomap.c @@ -58,6 +58,7 @@ bad_io_access(port, #is_pio ); \ } while (0) +#ifndef CONFIG_COOPERATIVE #ifndef pio_read16be #define pio_read16be(port) swab16(inw(port)) #define pio_read32be(port) swab32(inl(port)) @@ -223,6 +224,7 @@ EXPORT_SYMBOL(iowrite8_rep); EXPORT_SYMBOL(iowrite16_rep); EXPORT_SYMBOL(iowrite32_rep); +#endif /* Create a virtual mapping cookie for an IO port range */ void __iomem *ioport_map(unsigned long port, unsigned int nr) Index: linux-2.6.33-source/drivers/char/Kconfig =================================================================== --- linux-2.6.33-source.orig/drivers/char/Kconfig +++ linux-2.6.33-source/drivers/char/Kconfig @@ -1100,6 +1100,7 @@ config DEVPORT bool depends on !M68K + depends on !COOPERATIVE depends on ISA || PCI default y Index: linux-2.6.33-source/drivers/crypto/Kconfig =================================================================== --- linux-2.6.33-source.orig/drivers/crypto/Kconfig +++ linux-2.6.33-source/drivers/crypto/Kconfig @@ -52,6 +52,7 @@ config CRYPTO_DEV_GEODE tristate "Support for the Geode LX AES engine" depends on X86_32 && PCI + depends on !COOPERATIVE select CRYPTO_ALGAPI select CRYPTO_BLKCIPHER help Index: linux-2.6.33-source/drivers/usb/Kconfig =================================================================== --- linux-2.6.33-source.orig/drivers/usb/Kconfig +++ linux-2.6.33-source/drivers/usb/Kconfig @@ -17,6 +17,7 @@ # NOTE: SL-811 option should be board-specific ... config USB_ARCH_HAS_HCD boolean + depends on !COOPERATIVE default y if USB_ARCH_HAS_OHCI default y if USB_ARCH_HAS_EHCI default y if PCMCIA && !M32R # sl811_cs @@ -27,6 +28,7 @@ # many non-PCI SOC chips embed OHCI config USB_ARCH_HAS_OHCI boolean + depends on !COOPERATIVE # ARM: default y if SA1111 default y if ARCH_OMAP @@ -55,6 +57,7 @@ # some non-PCI hcds implement EHCI config USB_ARCH_HAS_EHCI boolean + depends on !COOPERATIVE default y if PPC_83xx default y if SOC_AU1200 default y if ARCH_IXP4XX Index: linux-2.6.33-source/arch/x86/mm/comap.c =================================================================== --- /dev/null +++ linux-2.6.33-source/arch/x86/mm/comap.c @@ -0,0 +1,130 @@ + +/* + * This source code is a part of coLinux source package. + * + * Copyright (C) 2008 Steve Shoecraft + * + * The code is licensed under the GPL. See the COPYING file in + * the root directory. + * + */ + +#include +#include +#include +#include + +#include +#include + +static unsigned long *get_pp(void *host_buffer, int size) { + unsigned long *pp, flags; + int npages,pp_size,rc; + + /* Get # of pages */ + npages = size >> PAGE_SHIFT; + if ((npages * PAGE_SIZE) < size) npages++; + + /* Alloc mem for phys pages */ + pp_size = npages * sizeof(unsigned long); + pp = kmalloc(pp_size, GFP_KERNEL); + if (!pp) { + printk(KERN_ERR "co_map_buffer: error allocating memory for physical pages!\n"); + return 0; + } + + /* Request physical pages from the host */ + co_passage_page_assert_valid(); + co_passage_page_acquire(&flags); + co_passage_page->operation = CO_OPERATION_GETPP; + co_passage_page->params[0] = (unsigned long) pp; + co_passage_page->params[1] = (unsigned long) host_buffer; + co_passage_page->params[2] = size; + co_switch_wrapper(); + rc = co_passage_page->params[0]; + co_passage_page_release(flags); + if (rc) { + printk(KERN_ERR "co_map_buffer: error getting physical pages from host!\n"); + return 0; + } + + return pp; +} + +#define VM_OPTS VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_PFNMAP | VM_RAND_READ + +void *co_map_buffer(void *host_buffer, int size) { + unsigned long addr, *pp, *pd, *pt, pa; + struct vm_struct *area; + void *buffer; + int i,npages,pd_index,pt_index; + + /* Get the physical pages for the host buffer */ + pp = get_pp(host_buffer, size); + if (!pp) return 0; + + /* Get an area of virtual memory */ + area = get_vm_area(size, VM_OPTS); + buffer = area->addr; + + npages = size >> PAGE_SHIFT; + + addr = (unsigned long) buffer; + pd = (unsigned long *) init_mm.pgd; + while(npages) { + /* Get pt */ + pd_index = pgd_index(addr); + if (pd[pd_index] != 0) { + pa = pd[pd_index] & PAGE_MASK; + pt = __va(CO_P_TO_PP(pa)); + } else { + pt = (unsigned long *) kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!pt) { + printk(KERN_ERR "co_map_buffer: unable to alloc new pt entry!\n"); + return 0; + } + memset(pt, 0, PAGE_SIZE); + pa = CO_PP_TO_P(__pa(pt)) | _PAGE_TABLE; + pd[pd_index] = pa; + } + + /* Fill pt */ + pt_index = pte_index(addr); + for(i=pt_index; i < 1024; i++) { + if (!npages) break; + BUG_ON(pt[i] != 0); + pt[i] = *pp | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_DIRTY | _PAGE_ACCESSED; + pp++; + addr += PAGE_SIZE; + npages--; + } + } + + return buffer; +} + +#if 0 +void dump_pt(void *buf, int size) { + unsigned long addr, *pd, *pt; + int x, pd_index, pt_index, npages; + + npages = size >> PAGE_SHIFT; + + addr = (unsigned long) buf; + pd = (unsigned long *) __va(read_cr3()); + printk(KERN_INFO "pd: %p\n", pd); + while(npages) { + pd_index = pgd_index(addr); + printk(KERN_INFO "pd[%04d]: %lx\n", pd_index, pd[pd_index]); + BUG_ON(pd[pd_index] == 0); + pt = __va(CO_P_TO_PP(pd[pd_index] & PAGE_MASK)); + pt_index = pte_index(addr); + for(x=pt_index; x < 1024; x++) { + if (!npages) break; + printk(KERN_INFO "%04d: pt[%04d]: %08lx\n", npages, x, pt[x]); + addr += PAGE_SIZE; + npages--; + } + } +} +#endif Index: linux-2.6.33-source/arch/x86/kernel/vmlinux.lds.S =================================================================== --- linux-2.6.33-source.orig/arch/x86/kernel/vmlinux.lds.S +++ linux-2.6.33-source/arch/x86/kernel/vmlinux.lds.S @@ -101,6 +101,7 @@ #endif . = ALIGN(8); _stext = .; + _kernel_start = .; /* coLinux kernel entry */ TEXT_TEXT SCHED_TEXT LOCK_TEXT Index: linux-2.6.33-source/arch/x86/include/asm/fixmap.h =================================================================== --- linux-2.6.33-source.orig/arch/x86/include/asm/fixmap.h +++ linux-2.6.33-source/arch/x86/include/asm/fixmap.h @@ -25,6 +25,7 @@ #else #include #endif +#include /* * We can't declare FIXADDR_TOP as variable for x86_64 because vsyscall Index: linux-2.6.33-source/arch/x86/include/asm/io_32.h =================================================================== --- linux-2.6.33-source.orig/arch/x86/include/asm/io_32.h +++ linux-2.6.33-source/arch/x86/include/asm/io_32.h @@ -102,10 +102,18 @@ #endif /* __KERNEL__ */ +#ifdef CONFIG_COOPERATIVE +static inline void native_io_delay(void) +{ + asm volatile("jmp 1f; 1: jmp 1f; 1:" : : : "memory"); +} +static inline void io_delay_init(void) {} +#else extern void native_io_delay(void); extern int io_delay_type; extern void io_delay_init(void); +#endif #if defined(CONFIG_PARAVIRT) #include Index: linux-2.6.33-source/arch/x86/include/asm/mmzone_32.h =================================================================== --- linux-2.6.33-source.orig/arch/x86/include/asm/mmzone_32.h +++ linux-2.6.33-source/arch/x86/include/asm/mmzone_32.h @@ -7,6 +7,7 @@ #define _ASM_X86_MMZONE_32_H #include +#include #ifdef CONFIG_NUMA extern struct pglist_data *node_data[]; Index: linux-2.6.33-source/arch/x86/include/asm/pgtable_32.h =================================================================== --- linux-2.6.33-source.orig/arch/x86/include/asm/pgtable_32.h +++ linux-2.6.33-source/arch/x86/include/asm/pgtable_32.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include Index: linux-2.6.33-source/arch/x86/include/asm/pgtable_32_types.h =================================================================== --- linux-2.6.33-source.orig/arch/x86/include/asm/pgtable_32_types.h +++ linux-2.6.33-source/arch/x86/include/asm/pgtable_32_types.h @@ -42,6 +42,8 @@ #ifdef CONFIG_HIGHMEM # define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE) +#elif defined(CONFIG_COOPERATIVE) +# define VMALLOC_END (CO_VPTR_BASE_START - 2 * PAGE_SIZE) #else # define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE) #endif Index: linux-2.6.33-source/kernel/sysctl.c =================================================================== --- linux-2.6.33-source.orig/kernel/sysctl.c +++ linux-2.6.33-source/kernel/sysctl.c @@ -105,7 +105,7 @@ static int __maybe_unused two = 2; static unsigned long one_ul = 1; static int one_hundred = 100; -#ifdef CONFIG_PRINTK +#if defined(CONFIG_PRINTK) && !defined(CONFIG_COOPERATIVE) static int ten_thousand = 10000; #endif @@ -675,6 +675,7 @@ .mode = 0644, .proc_handler = proc_dointvec, }, +#ifndef CONFIG_COOPERATIVE { .procname = "printk_delay", .data = &printk_delay_msec, @@ -684,6 +685,7 @@ .extra1 = &zero, .extra2 = &ten_thousand, }, +#endif /* !CONFIG_COOPERATIVE */ #endif { .procname = "ngroups_max", @@ -744,6 +746,7 @@ .mode = 0644, .proc_handler = proc_dointvec, }, +#ifndef CONFIG_COOPERATIVE { .procname = "io_delay_type", .data = &io_delay_type, @@ -751,6 +754,7 @@ .mode = 0644, .proc_handler = proc_dointvec, }, +#endif /* !CONFIG_COOPERATIVE */ #endif #if defined(CONFIG_MMU) { Index: linux-2.6.33-source/arch/x86/include/asm/system.h =================================================================== --- linux-2.6.33-source.orig/arch/x86/include/asm/system.h +++ linux-2.6.33-source/arch/x86/include/asm/system.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -309,8 +310,22 @@ #define write_cr0(x) (native_write_cr0(x)) #define read_cr2() (native_read_cr2()) #define write_cr2(x) (native_write_cr2(x)) +#ifdef CONFIG_COOPERATIVE + +static inline unsigned long read_cr3(void) +{ + return CO_P_TO_PP(native_read_cr3()); +} + +static inline void write_cr3(unsigned long val) +{ + native_write_cr3(CO_PP_TO_P(val)); +} + +#else #define read_cr3() (native_read_cr3()) #define write_cr3(x) (native_write_cr3(x)) +#endif #define read_cr4() (native_read_cr4()) #define read_cr4_safe() (native_read_cr4_safe()) #define write_cr4(x) (native_write_cr4(x)) Index: linux-2.6.33-source/arch/x86/power/hibernate_32.c =================================================================== --- linux-2.6.33-source.orig/arch/x86/power/hibernate_32.c +++ linux-2.6.33-source/arch/x86/power/hibernate_32.c @@ -65,7 +65,7 @@ if (!page_table) return NULL; - set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); + set_pmd(pmd, __pmd(CO_PP_TO_P(__pa(page_table)) | _PAGE_TABLE)); BUG_ON(page_table != pte_offset_kernel(pmd, 0)); Index: linux-2.6.33-source/arch/x86/kernel/i8237.c =================================================================== --- linux-2.6.33-source.orig/arch/x86/kernel/i8237.c +++ linux-2.6.33-source/arch/x86/kernel/i8237.c @@ -23,6 +23,7 @@ static int i8237A_resume(struct sys_device *dev) { +#ifndef CONFIG_COOPERATIVE unsigned long flags; int i; @@ -41,6 +42,7 @@ enable_dma(4); release_dma_lock(flags); +#endif return 0; } Index: linux-2.6.33-source/arch/x86/mm/Makefile =================================================================== --- linux-2.6.33-source.orig/arch/x86/mm/Makefile +++ linux-2.6.33-source/arch/x86/mm/Makefile @@ -25,4 +25,6 @@ obj-$(CONFIG_K8_NUMA) += k8topology_64.o obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o +obj-$(CONFIG_COOPERATIVE) += comap.o + obj-$(CONFIG_MEMTEST) += memtest.o Index: linux-2.6.33-source/arch/x86/include/asm/pgalloc.h =================================================================== --- linux-2.6.33-source.orig/arch/x86/include/asm/pgalloc.h +++ linux-2.6.33-source/arch/x86/include/asm/pgalloc.h @@ -4,6 +4,7 @@ #include #include /* for struct page */ #include +#include static inline int __paravirt_pgd_alloc(struct mm_struct *mm) { return 0; } @@ -63,7 +64,7 @@ pmd_t *pmd, pte_t *pte) { paravirt_alloc_pte(mm, __pa(pte) >> PAGE_SHIFT); - set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE)); + set_pmd(pmd, __pmd(CO_PP_TO_P(__pa(pte)) | _PAGE_TABLE)); } static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, @@ -72,7 +73,7 @@ unsigned long pfn = page_to_pfn(pte); paravirt_alloc_pte(mm, pfn); - set_pmd(pmd, __pmd(((pteval_t)pfn << PAGE_SHIFT) | _PAGE_TABLE)); + set_pmd(pmd, __pmd(((pteval_t)CO_PFN_PP_TO_P(pfn) << PAGE_SHIFT) | _PAGE_TABLE)); } #define pmd_pgtable(pmd) pmd_page(pmd) Index: linux-2.6.33-source/arch/x86/kernel/process.c =================================================================== --- linux-2.6.33-source.orig/arch/x86/kernel/process.c +++ linux-2.6.33-source/arch/x86/kernel/process.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -357,6 +358,12 @@ } #endif +#ifdef CONFIG_COOPERATIVE +void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) +{ + pm_idle = co_idle_processor; +} +#else /* CONFIG_COOPERATIVE */ /* * We use this if we don't have any better * idle routine.. @@ -682,6 +689,7 @@ return 0; } early_param("idle", idle_setup); +#endif /* CONFIG_COOPERATIVE */ unsigned long arch_align_stack(unsigned long sp) { Index: linux-2.6.33-source/arch/x86/kvm/Kconfig =================================================================== --- linux-2.6.33-source.orig/arch/x86/kvm/Kconfig +++ linux-2.6.33-source/arch/x86/kvm/Kconfig @@ -6,7 +6,7 @@ menuconfig VIRTUALIZATION bool "Virtualization" - depends on HAVE_KVM || X86 + depends on (HAVE_KVM || X86) && !COOPERATIVE default y ---help--- Say Y here to get to see options for using your Linux host to run other Index: linux-2.6.33-source/arch/x86/include/asm/io.h =================================================================== --- linux-2.6.33-source.orig/arch/x86/include/asm/io.h +++ linux-2.6.33-source/arch/x86/include/asm/io.h @@ -167,7 +167,12 @@ */ static inline void __iomem *ioremap(resource_size_t offset, unsigned long size) { +#ifdef CONFIG_COOPERATIVE + panic("ioremap %ld:%ld\n", (long)offset, size); + return NULL; +#else return ioremap_nocache(offset, size); +#endif } extern void iounmap(volatile void __iomem *addr); Index: linux-2.6.33-source/arch/x86/kernel/irqinit.c =================================================================== --- linux-2.6.33-source.orig/arch/x86/kernel/irqinit.c +++ linux-2.6.33-source/arch/x86/kernel/irqinit.c @@ -43,6 +43,7 @@ * (these are usually mapped into the 0x30-0xff vector range) */ +#ifndef CONFIG_COOPERATIVE #ifdef CONFIG_X86_32 /* * Note that on a 486, we don't want to do a SIGFPE on an irq13 @@ -82,6 +83,7 @@ .handler = no_action, .name = "cascade", }; +#endif /* !CONFIG_COOPERATIVE */ DEFINE_PER_CPU(vector_irq_t, vector_irq) = { [0 ... IRQ0_VECTOR - 1] = -1, @@ -116,6 +118,7 @@ return 0; } +#ifndef CONFIG_COOPERATIVE void __init init_ISA_irqs(void) { int i; @@ -249,3 +252,4 @@ irq_ctx_init(smp_processor_id()); #endif } +#endif /* !CONFIG_COOPERATIVE */ Index: linux-2.6.33-source/arch/x86/Makefile =================================================================== --- linux-2.6.33-source.orig/arch/x86/Makefile +++ linux-2.6.33-source/arch/x86/Makefile @@ -116,8 +116,10 @@ # Kernel objects head-y := arch/x86/kernel/head_$(BITS).o +ifndef CONFIG_COOPERATIVE head-y += arch/x86/kernel/head$(BITS).o head-y += arch/x86/kernel/head.o +endif head-y += arch/x86/kernel/init_task.o libs-y += arch/x86/lib/ Index: linux-2.6.33-source/arch/x86/kernel/x86_init_cooperative.c =================================================================== --- /dev/null +++ linux-2.6.33-source/arch/x86/kernel/x86_init_cooperative.c @@ -0,0 +1,94 @@ +/* + * Copyright (C) 2009 Thomas Gleixner + * + * For licencing details see kernel-base/COPYING + */ +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void __cpuinit x86_init_noop(void) { } +void __init x86_init_uint_noop(unsigned int unused) { } +void __init x86_init_pgd_noop(pgd_t *unused) { } +int __init iommu_init_noop(void) { return 0; } +void iommu_shutdown_noop(void) { } + +static void __init reserve_standard_io_resources_noop(void) { } +static unsigned long mach_get_cmos_time_noop(void) { return 0; } +static int mach_set_rtc_mmss_noop(unsigned long unused) { return 0; } +static void __init hpet_time_init_noop(void) { } +static void __init native_init_IRQ_noop(void) { } +static char *__init default_machine_specific_memory_setup_noop(void) { return "CO-dummy"; } +static unsigned long native_calibrate_tsc_noop(void) { return 0; } + +/* + * The platform setup functions are preset with the default functions + * for standard PC hardware. + */ +struct x86_init_ops x86_init __initdata = { + + .resources = { + .probe_roms = x86_init_noop, + .reserve_resources = reserve_standard_io_resources_noop, + .memory_setup = default_machine_specific_memory_setup_noop, + }, + + .mpparse = { + .mpc_record = x86_init_uint_noop, + .setup_ioapic_ids = x86_init_noop, + .mpc_apic_id = default_mpc_apic_id, + .smp_read_mpc_oem = default_smp_read_mpc_oem, + .mpc_oem_bus_info = default_mpc_oem_bus_info, + .find_smp_config = default_find_smp_config, + .get_smp_config = default_get_smp_config, + }, + + .irqs = { + .pre_vector_init = init_ISA_irqs, + .intr_init = native_init_IRQ_noop, + .trap_init = x86_init_noop, + }, + + .oem = { + .arch_setup = x86_init_noop, + .banner = default_banner, + }, + + .paging = { + .pagetable_setup_start = native_pagetable_setup_start, + .pagetable_setup_done = native_pagetable_setup_done, + }, + + .timers = { + .setup_percpu_clockev = setup_boot_APIC_clock, + .tsc_pre_init = x86_init_noop, + .timer_init = hpet_time_init_noop, + }, + + .iommu = { + .iommu_init = iommu_init_noop, + }, +}; + +struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { + .setup_percpu_clockev = setup_secondary_APIC_clock, +}; + +struct x86_platform_ops x86_platform = { + .calibrate_tsc = native_calibrate_tsc_noop, + .get_wallclock = mach_get_cmos_time_noop, + .set_wallclock = mach_set_rtc_mmss_noop, + .iommu_shutdown = iommu_shutdown_noop, + .is_untracked_pat_range = is_ISA_range, +}; Index: linux-2.6.33-source/arch/x86/kernel/cpu/Makefile =================================================================== --- linux-2.6.33-source.orig/arch/x86/kernel/cpu/Makefile +++ linux-2.6.33-source/arch/x86/kernel/cpu/Makefile @@ -14,7 +14,10 @@ obj-y := intel_cacheinfo.o addon_cpuid_features.o obj-y += proc.o capflags.o powerflags.o common.o -obj-y += vmware.o hypervisor.o sched.o +ifndef CONFIG_COOPERATIVE +obj-y += vmware.o hypervisor.o +endif +obj-y += sched.o obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o obj-$(CONFIG_X86_64) += bugs_64.o Index: linux-2.6.33-source/arch/x86/kernel/cpu/common.c =================================================================== --- linux-2.6.33-source.orig/arch/x86/kernel/cpu/common.c +++ linux-2.6.33-source/arch/x86/kernel/cpu/common.c @@ -818,7 +818,9 @@ detect_ht(c); #endif +#ifndef CONFIG_COOPERATIVE init_hypervisor(c); +#endif /* * Clear/Set all flags overriden by options, need do it @@ -864,7 +866,9 @@ void __init identify_boot_cpu(void) { identify_cpu(&boot_cpu_data); +#ifndef CONFIG_COOPERATIVE init_c1e_mask(); +#endif #ifdef CONFIG_X86_32 sysenter_setup(); enable_sep_cpu(); Index: linux-2.6.33-source/include/linux/page-flags.h =================================================================== --- linux-2.6.33-source.orig/include/linux/page-flags.h +++ linux-2.6.33-source/include/linux/page-flags.h @@ -110,6 +110,11 @@ #endif __NR_PAGEFLAGS, +/* coLinux flag should not clear by PAGE_FLAGS_CHECK_AT_PREP */ +#ifdef CONFIG_COOPERATIVE + PG_co_host_mapped, /* Page is mapped on coLinux host */ +#endif + /* Filesystems */ PG_checked = PG_owner_priv_1, @@ -284,6 +289,11 @@ u64 stable_page_flags(struct page *page); +#ifdef CONFIG_COOPERATIVE +CLEARPAGEFLAG(CoHostMapped, co_host_mapped) +TESTSETFLAG(CoHostMapped, co_host_mapped) +#endif + static inline int PageUptodate(struct page *page) { int ret = test_bit(PG_uptodate, &(page)->flags); Index: linux-2.6.33-source/arch/x86/kernel/cpu/intel.c =================================================================== --- linux-2.6.33-source.orig/arch/x86/kernel/cpu/intel.c +++ linux-2.6.33-source/arch/x86/kernel/cpu/intel.c @@ -91,7 +91,9 @@ if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); +#ifndef CONFIG_COOPERATIVE if (!check_tsc_unstable()) +#endif sched_clock_stable = 1; } Index: linux-2.6.33-source/scripts/mkmakefile =================================================================== --- linux-2.6.33-source.orig/scripts/mkmakefile +++ linux-2.6.33-source/scripts/mkmakefile @@ -44,7 +44,10 @@ Makefile:; -\$(all) %/: all +\$(all): all + @: + +%/: all @: EOF